diff --git a/Dockerfile b/Dockerfile index 03f322cb8..057f347c8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -52,7 +52,8 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ apt install -y nginx unzip curl wget git vim less && \ apt install -y ghostscript && \ apt install -y pandoc && \ - apt install -y texlive + apt install -y texlive && \ + apt install -y fonts-freefont-ttf fonts-noto-cjk # Install uv RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \ diff --git a/agent/component/docs_generator.py b/agent/component/docs_generator.py new file mode 100644 index 000000000..9c2442958 --- /dev/null +++ b/agent/component/docs_generator.py @@ -0,0 +1,1570 @@ +import json +import os +import re +import base64 +from datetime import datetime +from abc import ABC +from io import BytesIO +from typing import Optional +from functools import partial +from reportlab.lib.pagesizes import A4 +from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle +from reportlab.lib.units import inch +from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_JUSTIFY +from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, TableStyle, LongTable +from reportlab.lib import colors +from reportlab.pdfbase import pdfmetrics +from reportlab.pdfbase.ttfonts import TTFont +from reportlab.pdfbase.cidfonts import UnicodeCIDFont + +from agent.component.base import ComponentParamBase +from api.utils.api_utils import timeout +from .message import Message + + +class PDFGeneratorParam(ComponentParamBase): + """ + Define the PDF Generator component parameters. + """ + + def __init__(self): + super().__init__() + # Output format + self.output_format = "pdf" # pdf, docx, txt + + # Content inputs + self.content = "" + self.title = "" + self.subtitle = "" + self.header_text = "" + self.footer_text = "" + + # Images + self.logo_image = "" # base64 or file path + self.logo_position = "left" # left, center, right + self.logo_width = 2.0 # inches + self.logo_height = 1.0 # inches + + # Styling + self.font_family = "Helvetica" # Helvetica, Times-Roman, Courier + self.font_size = 12 + self.title_font_size = 24 + self.heading1_font_size = 18 + self.heading2_font_size = 16 + self.heading3_font_size = 14 + self.text_color = "#000000" + self.title_color = "#000000" + + # Page settings + self.page_size = "A4" + self.orientation = "portrait" # portrait, landscape + self.margin_top = 1.0 # inches + self.margin_bottom = 1.0 + self.margin_left = 1.0 + self.margin_right = 1.0 + self.line_spacing = 1.2 + + # Output settings + self.filename = "" + self.output_directory = "/tmp/pdf_outputs" + self.add_page_numbers = True + self.add_timestamp = True + + # Advanced features + self.watermark_text = "" + self.enable_toc = False + + self.outputs = { + "file_path": {"value": "", "type": "string"}, + "pdf_base64": {"value": "", "type": "string"}, + "download": {"value": "", "type": "string"}, + "success": {"value": False, "type": "boolean"} + } + + def check(self): + self.check_empty(self.content, "[PDFGenerator] Content") + self.check_valid_value(self.output_format, "[PDFGenerator] Output format", ["pdf", "docx", "txt"]) + self.check_valid_value(self.logo_position, "[PDFGenerator] Logo position", ["left", "center", "right"]) + self.check_valid_value(self.font_family, "[PDFGenerator] Font family", + ["Helvetica", "Times-Roman", "Courier", "Helvetica-Bold", "Times-Bold"]) + self.check_valid_value(self.page_size, "[PDFGenerator] Page size", ["A4", "Letter"]) + self.check_valid_value(self.orientation, "[PDFGenerator] Orientation", ["portrait", "landscape"]) + self.check_positive_number(self.font_size, "[PDFGenerator] Font size") + self.check_positive_number(self.margin_top, "[PDFGenerator] Margin top") + + +class PDFGenerator(Message, ABC): + component_name = "PDFGenerator" + + # Track if Unicode fonts have been registered + _unicode_fonts_registered = False + _unicode_font_name = None + _unicode_font_bold_name = None + + @classmethod + def _reset_font_cache(cls): + """Reset font registration cache - useful for testing""" + cls._unicode_fonts_registered = False + cls._unicode_font_name = None + cls._unicode_font_bold_name = None + + @classmethod + def _register_unicode_fonts(cls): + """Register Unicode-compatible fonts for multi-language support. + + Uses CID fonts (STSong-Light) for reliable CJK rendering as TTF fonts + have issues with glyph mapping in some ReportLab versions. + """ + # If already registered successfully, return True + if cls._unicode_fonts_registered and cls._unicode_font_name is not None: + return True + + # Reset and try again if previous registration failed + cls._unicode_fonts_registered = True + cls._unicode_font_name = None + cls._unicode_font_bold_name = None + + # Use CID fonts for reliable CJK support + # These are built into ReportLab and work reliably across all platforms + cid_fonts = [ + 'STSong-Light', # Simplified Chinese + 'HeiseiMin-W3', # Japanese + 'HYSMyeongJo-Medium', # Korean + ] + + for cid_font in cid_fonts: + try: + pdfmetrics.registerFont(UnicodeCIDFont(cid_font)) + cls._unicode_font_name = cid_font + cls._unicode_font_bold_name = cid_font # CID fonts don't have bold variants + print(f"Registered CID font: {cid_font}") + break + except Exception as e: + print(f"Failed to register CID font {cid_font}: {e}") + continue + + # If CID fonts fail, try TTF fonts as fallback + if not cls._unicode_font_name: + font_paths = [ + '/usr/share/fonts/truetype/freefont/FreeSans.ttf', + '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', + ] + + for font_path in font_paths: + if os.path.exists(font_path): + try: + pdfmetrics.registerFont(TTFont('UnicodeFont', font_path)) + cls._unicode_font_name = 'UnicodeFont' + cls._unicode_font_bold_name = 'UnicodeFont' + print(f"Registered TTF font from: {font_path}") + + # Register font family + from reportlab.pdfbase.pdfmetrics import registerFontFamily + registerFontFamily('UnicodeFont', normal='UnicodeFont', bold='UnicodeFont') + break + except Exception as e: + print(f"Failed to register TTF font {font_path}: {e}") + continue + + return cls._unicode_font_name is not None + + @staticmethod + def _needs_unicode_font(text: str) -> bool: + """Check if text contains CJK or other complex scripts that need special fonts. + + Standard PDF fonts (Helvetica, Times, Courier) support: + - Basic Latin, Extended Latin, Cyrillic, Greek + + CID fonts are needed for: + - CJK (Chinese, Japanese, Korean) + - Arabic, Hebrew (RTL scripts) + - Thai, Hindi, and other Indic scripts + """ + if not text: + return False + + for char in text: + code = ord(char) + + # CJK Unified Ideographs and related ranges + if 0x4E00 <= code <= 0x9FFF: # CJK Unified Ideographs + return True + if 0x3400 <= code <= 0x4DBF: # CJK Extension A + return True + if 0x3000 <= code <= 0x303F: # CJK Symbols and Punctuation + return True + if 0x3040 <= code <= 0x309F: # Hiragana + return True + if 0x30A0 <= code <= 0x30FF: # Katakana + return True + if 0xAC00 <= code <= 0xD7AF: # Hangul Syllables + return True + if 0x1100 <= code <= 0x11FF: # Hangul Jamo + return True + + # Arabic and Hebrew (RTL scripts) + if 0x0600 <= code <= 0x06FF: # Arabic + return True + if 0x0590 <= code <= 0x05FF: # Hebrew + return True + + # Indic scripts + if 0x0900 <= code <= 0x097F: # Devanagari (Hindi) + return True + if 0x0E00 <= code <= 0x0E7F: # Thai + return True + + return False + + def _get_font_for_content(self, content: str) -> tuple: + """Get appropriate font based on content, returns (regular_font, bold_font)""" + if self._needs_unicode_font(content): + if self._register_unicode_fonts() and self._unicode_font_name: + return (self._unicode_font_name, self._unicode_font_bold_name or self._unicode_font_name) + else: + print("Warning: Content contains non-Latin characters but no Unicode font available") + + # Fall back to configured font + return (self._param.font_family, self._get_bold_font_name()) + + def _get_active_font(self) -> str: + """Get the currently active font (Unicode or configured)""" + return getattr(self, '_active_font', self._param.font_family) + + def _get_active_bold_font(self) -> str: + """Get the currently active bold font (Unicode or configured)""" + return getattr(self, '_active_bold_font', self._get_bold_font_name()) + + def _get_bold_font_name(self) -> str: + """Get the correct bold variant of the current font family""" + font_map = { + 'Helvetica': 'Helvetica-Bold', + 'Times-Roman': 'Times-Bold', + 'Courier': 'Courier-Bold', + } + font_family = getattr(self._param, 'font_family', 'Helvetica') + if 'Bold' in font_family: + return font_family + return font_map.get(font_family, 'Helvetica-Bold') + + def get_input_form(self) -> dict[str, dict]: + return { + "content": { + "name": "Content", + "type": "text" + }, + "title": { + "name": "Title", + "type": "line" + }, + "subtitle": { + "name": "Subtitle", + "type": "line" + } + } + + @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60))) + def _invoke(self, **kwargs): + import traceback + + try: + # Get content from parameters (which may contain variable references) + content = self._param.content or "" + title = self._param.title or "" + subtitle = self._param.subtitle or "" + + # Log PDF generation start + print(f"Starting PDF generation for title: {title}, content length: {len(content)} chars") + + # Resolve variable references in content using canvas + if content and self._canvas.is_reff(content): + # Extract the variable reference and get its value + import re + matches = re.findall(self.variable_ref_patt, content, flags=re.DOTALL) + for match in matches: + try: + var_value = self._canvas.get_variable_value(match) + if var_value: + # Handle partial (streaming) content + if isinstance(var_value, partial): + resolved_content = "" + for chunk in var_value(): + resolved_content += chunk + content = content.replace("{" + match + "}", resolved_content) + else: + content = content.replace("{" + match + "}", str(var_value)) + except Exception as e: + print(f"Error resolving variable {match}: {str(e)}") + content = content.replace("{" + match + "}", f"[ERROR: {str(e)}]") + + # Also process with get_kwargs for any remaining variables + if content: + try: + content, _ = self.get_kwargs(content, kwargs) + except Exception as e: + print(f"Error processing content with get_kwargs: {str(e)}") + + # Process template variables in title + if title and self._canvas.is_reff(title): + try: + matches = re.findall(self.variable_ref_patt, title, flags=re.DOTALL) + for match in matches: + var_value = self._canvas.get_variable_value(match) + if var_value: + title = title.replace("{" + match + "}", str(var_value)) + except Exception as e: + print(f"Error processing title variables: {str(e)}") + + if title: + try: + title, _ = self.get_kwargs(title, kwargs) + except Exception: + pass + + # Process template variables in subtitle + if subtitle and self._canvas.is_reff(subtitle): + try: + matches = re.findall(self.variable_ref_patt, subtitle, flags=re.DOTALL) + for match in matches: + var_value = self._canvas.get_variable_value(match) + if var_value: + subtitle = subtitle.replace("{" + match + "}", str(var_value)) + except Exception as e: + print(f"Error processing subtitle variables: {str(e)}") + + if subtitle: + try: + subtitle, _ = self.get_kwargs(subtitle, kwargs) + except Exception: + pass + + # If content is still empty, check if it was passed directly + if not content: + content = kwargs.get("content", "") + + # Generate document based on format + try: + output_format = self._param.output_format or "pdf" + + if output_format == "pdf": + file_path, doc_base64 = self._generate_pdf(content, title, subtitle) + mime_type = "application/pdf" + elif output_format == "docx": + file_path, doc_base64 = self._generate_docx(content, title, subtitle) + mime_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + elif output_format == "txt": + file_path, doc_base64 = self._generate_txt(content, title, subtitle) + mime_type = "text/plain" + else: + raise Exception(f"Unsupported output format: {output_format}") + + filename = os.path.basename(file_path) + + # Verify the file was created and has content + if not os.path.exists(file_path): + raise Exception(f"Document file was not created: {file_path}") + + file_size = os.path.getsize(file_path) + if file_size == 0: + raise Exception(f"Document file is empty: {file_path}") + + print(f"Successfully generated {output_format.upper()}: {file_path} (Size: {file_size} bytes)") + + # Set outputs + self.set_output("file_path", file_path) + self.set_output("pdf_base64", doc_base64) # Keep same output name for compatibility + self.set_output("success", True) + + # Create download info object + download_info = { + "filename": filename, + "path": file_path, + "base64": doc_base64, + "mime_type": mime_type, + "size": file_size + } + # Output download info as JSON string so it can be used in Message block + download_json = json.dumps(download_info) + self.set_output("download", download_json) + + return download_info + + except Exception as e: + error_msg = f"Error in _generate_pdf: {str(e)}\n{traceback.format_exc()}" + print(error_msg) + self.set_output("success", False) + self.set_output("_ERROR", f"PDF generation failed: {str(e)}") + raise + + except Exception as e: + error_msg = f"Error in PDFGenerator._invoke: {str(e)}\n{traceback.format_exc()}" + print(error_msg) + self.set_output("success", False) + self.set_output("_ERROR", f"PDF generation failed: {str(e)}") + raise + + def _generate_pdf(self, content: str, title: str = "", subtitle: str = "") -> tuple[str, str]: + """Generate PDF from markdown-style content with improved error handling and concurrency support""" + import uuid + import traceback + + # Create output directory if it doesn't exist + os.makedirs(self._param.output_directory, exist_ok=True) + + # Initialize variables that need cleanup + buffer = None + temp_file_path = None + file_path = None + + try: + # Generate a unique filename to prevent conflicts + if self._param.filename: + base_name = os.path.splitext(self._param.filename)[0] + filename = f"{base_name}_{uuid.uuid4().hex[:8]}.pdf" + else: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"document_{timestamp}_{uuid.uuid4().hex[:8]}.pdf" + + file_path = os.path.join(self._param.output_directory, filename) + temp_file_path = f"{file_path}.tmp" + + # Setup page size + page_size = A4 + if self._param.orientation == "landscape": + page_size = (A4[1], A4[0]) + + # Create PDF buffer and document + buffer = BytesIO() + doc = SimpleDocTemplate( + buffer, + pagesize=page_size, + topMargin=self._param.margin_top * inch, + bottomMargin=self._param.margin_bottom * inch, + leftMargin=self._param.margin_left * inch, + rightMargin=self._param.margin_right * inch + ) + + # Build story (content elements) + story = [] + # Combine all text content for Unicode font detection + all_text = f"{title} {subtitle} {content}" + + # IMPORTANT: Register Unicode fonts BEFORE creating any styles or Paragraphs + # This ensures the font family is available for ReportLab's HTML parser + if self._needs_unicode_font(all_text): + self._register_unicode_fonts() + + styles = self._create_styles(all_text) + + # Add logo if provided + if self._param.logo_image: + logo = self._add_logo() + if logo: + story.append(logo) + story.append(Spacer(1, 0.3 * inch)) + + # Add title + if title: + title_para = Paragraph(self._escape_html(title), styles['PDFTitle']) + story.append(title_para) + story.append(Spacer(1, 0.2 * inch)) + + # Add subtitle + if subtitle: + subtitle_para = Paragraph(self._escape_html(subtitle), styles['PDFSubtitle']) + story.append(subtitle_para) + story.append(Spacer(1, 0.3 * inch)) + + # Add timestamp if enabled + if self._param.add_timestamp: + timestamp_text = f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + timestamp_para = Paragraph(timestamp_text, styles['Italic']) + story.append(timestamp_para) + story.append(Spacer(1, 0.2 * inch)) + + # Parse and add content + content_elements = self._parse_markdown_content(content, styles) + story.extend(content_elements) + + # Build PDF + doc.build(story, onFirstPage=self._add_page_decorations, onLaterPages=self._add_page_decorations) + + # Get PDF bytes + pdf_bytes = buffer.getvalue() + + # Write to temporary file first + with open(temp_file_path, 'wb') as f: + f.write(pdf_bytes) + + # Atomic rename to final filename (works across different filesystems) + if os.path.exists(file_path): + os.remove(file_path) + os.rename(temp_file_path, file_path) + + # Verify the file was created and has content + if not os.path.exists(file_path): + raise Exception(f"Failed to create output file: {file_path}") + + file_size = os.path.getsize(file_path) + if file_size == 0: + raise Exception(f"Generated PDF is empty: {file_path}") + + # Convert to base64 + pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8') + + return file_path, pdf_base64 + + except Exception as e: + # Clean up any temporary files on error + if temp_file_path and os.path.exists(temp_file_path): + try: + os.remove(temp_file_path) + except Exception as cleanup_error: + print(f"Error cleaning up temporary file: {cleanup_error}") + + error_msg = f"Error generating PDF: {str(e)}\n{traceback.format_exc()}" + print(error_msg) + raise Exception(f"PDF generation failed: {str(e)}") + + finally: + # Ensure buffer is always closed + if buffer is not None: + try: + buffer.close() + except Exception as close_error: + print(f"Error closing buffer: {close_error}") + + def _create_styles(self, content: str = ""): + """Create custom paragraph styles with Unicode font support if needed""" + # Check if content contains CJK characters that need special fonts + needs_cjk = self._needs_unicode_font(content) + + if needs_cjk: + # Use CID fonts for CJK content + if self._register_unicode_fonts() and self._unicode_font_name: + regular_font = self._unicode_font_name + bold_font = self._unicode_font_bold_name or self._unicode_font_name + print(f"Using CID font for CJK content: {regular_font}") + else: + # Fall back to configured font if CID fonts unavailable + regular_font = self._param.font_family + bold_font = self._get_bold_font_name() + print(f"Warning: CJK content detected but no CID font available, using {regular_font}") + else: + # Use user-selected font for Latin-only content + regular_font = self._param.font_family + bold_font = self._get_bold_font_name() + print(f"Using configured font: {regular_font}") + + # Store active fonts as instance variables for use in other methods + self._active_font = regular_font + self._active_bold_font = bold_font + + # Get fresh style sheet + styles = getSampleStyleSheet() + + # Helper function to get the correct bold font name + def get_bold_font(font_family): + """Get the correct bold variant of a font family""" + # If using Unicode font, return the Unicode bold + if font_family in ('UnicodeFont', self._unicode_font_name): + return bold_font + font_map = { + 'Helvetica': 'Helvetica-Bold', + 'Times-Roman': 'Times-Bold', + 'Courier': 'Courier-Bold', + } + if 'Bold' in font_family: + return font_family + return font_map.get(font_family, 'Helvetica-Bold') + + # Use detected font instead of configured font for non-Latin content + active_font = regular_font + active_bold_font = bold_font + + # Helper function to add or update style + def add_or_update_style(name, **kwargs): + if name in styles: + # Update existing style + style = styles[name] + for key, value in kwargs.items(): + setattr(style, key, value) + else: + # Add new style + styles.add(ParagraphStyle(name=name, **kwargs)) + + # IMPORTANT: Update base styles to use Unicode font for non-Latin content + # This ensures ALL text uses the correct font, not just our custom styles + add_or_update_style('Normal', fontName=active_font) + add_or_update_style('BodyText', fontName=active_font) + add_or_update_style('Bullet', fontName=active_font) + add_or_update_style('Heading1', fontName=active_bold_font) + add_or_update_style('Heading2', fontName=active_bold_font) + add_or_update_style('Heading3', fontName=active_bold_font) + add_or_update_style('Title', fontName=active_bold_font) + + # Title style + add_or_update_style( + 'PDFTitle', + parent=styles['Heading1'], + fontSize=self._param.title_font_size, + textColor=colors.HexColor(self._param.title_color), + fontName=active_bold_font, + alignment=TA_CENTER, + spaceAfter=12 + ) + + # Subtitle style + add_or_update_style( + 'PDFSubtitle', + parent=styles['Heading2'], + fontSize=self._param.heading2_font_size, + textColor=colors.HexColor(self._param.text_color), + fontName=active_font, + alignment=TA_CENTER, + spaceAfter=12 + ) + + # Custom heading styles + add_or_update_style( + 'CustomHeading1', + parent=styles['Heading1'], + fontSize=self._param.heading1_font_size, + fontName=active_bold_font, + textColor=colors.HexColor(self._param.text_color), + spaceAfter=12, + spaceBefore=12 + ) + + add_or_update_style( + 'CustomHeading2', + parent=styles['Heading2'], + fontSize=self._param.heading2_font_size, + fontName=active_bold_font, + textColor=colors.HexColor(self._param.text_color), + spaceAfter=10, + spaceBefore=10 + ) + + add_or_update_style( + 'CustomHeading3', + parent=styles['Heading3'], + fontSize=self._param.heading3_font_size, + fontName=active_bold_font, + textColor=colors.HexColor(self._param.text_color), + spaceAfter=8, + spaceBefore=8 + ) + + # Body text style + add_or_update_style( + 'CustomBody', + parent=styles['BodyText'], + fontSize=self._param.font_size, + fontName=active_font, + textColor=colors.HexColor(self._param.text_color), + leading=self._param.font_size * self._param.line_spacing, + alignment=TA_JUSTIFY + ) + + # Bullet style + add_or_update_style( + 'CustomBullet', + parent=styles['BodyText'], + fontSize=self._param.font_size, + fontName=active_font, + textColor=colors.HexColor(self._param.text_color), + leftIndent=20, + bulletIndent=10 + ) + + # Code style (keep Courier for code blocks) + add_or_update_style( + 'PDFCode', + parent=styles.get('Code', styles['Normal']), + fontSize=self._param.font_size - 1, + fontName='Courier', + textColor=colors.HexColor('#333333'), + backColor=colors.HexColor('#f5f5f5'), + leftIndent=20, + rightIndent=20 + ) + + # Italic style + add_or_update_style( + 'Italic', + parent=styles['Normal'], + fontSize=self._param.font_size, + fontName=active_font, + textColor=colors.HexColor(self._param.text_color) + ) + + return styles + + def _parse_markdown_content(self, content: str, styles): + """Parse markdown-style content and convert to PDF elements""" + elements = [] + lines = content.split('\n') + + i = 0 + while i < len(lines): + line = lines[i].strip() + + # Skip empty lines + if not line: + elements.append(Spacer(1, 0.1 * inch)) + i += 1 + continue + + # Horizontal rule + if line == '---' or line == '___': + elements.append(Spacer(1, 0.1 * inch)) + elements.append(self._create_horizontal_line()) + elements.append(Spacer(1, 0.1 * inch)) + i += 1 + continue + + # Heading 1 + if line.startswith('# ') and not line.startswith('## '): + text = line[2:].strip() + elements.append(Paragraph(self._format_inline(text), styles['CustomHeading1'])) + i += 1 + continue + + # Heading 2 + if line.startswith('## ') and not line.startswith('### '): + text = line[3:].strip() + elements.append(Paragraph(self._format_inline(text), styles['CustomHeading2'])) + i += 1 + continue + + # Heading 3 + if line.startswith('### '): + text = line[4:].strip() + elements.append(Paragraph(self._format_inline(text), styles['CustomHeading3'])) + i += 1 + continue + + # Bullet list + if line.startswith('- ') or line.startswith('* '): + bullet_items = [] + while i < len(lines) and (lines[i].strip().startswith('- ') or lines[i].strip().startswith('* ')): + item_text = lines[i].strip()[2:].strip() + formatted = self._format_inline(item_text) + bullet_items.append(f"• {formatted}") + i += 1 + for item in bullet_items: + elements.append(Paragraph(item, styles['CustomBullet'])) + continue + + # Numbered list + if re.match(r'^\d+\.\s', line): + numbered_items = [] + counter = 1 + while i < len(lines) and re.match(r'^\d+\.\s', lines[i].strip()): + item_text = re.sub(r'^\d+\.\s', '', lines[i].strip()) + numbered_items.append(f"{counter}. {self._format_inline(item_text)}") + counter += 1 + i += 1 + for item in numbered_items: + elements.append(Paragraph(item, styles['CustomBullet'])) + continue + + # Table detection (markdown table must start with |) + if line.startswith('|') and '|' in line: + table_lines = [] + # Collect all consecutive lines that look like table rows + while i < len(lines) and lines[i].strip() and '|' in lines[i]: + table_lines.append(lines[i].strip()) + i += 1 + + # Only process if we have at least 2 lines (header + separator or header + data) + if len(table_lines) >= 2: + table_elements = self._create_table(table_lines) + if table_elements: + # _create_table now returns a list of elements + elements.extend(table_elements) + elements.append(Spacer(1, 0.2 * inch)) + continue + else: + # Not a valid table, treat as regular text + i -= len(table_lines) # Reset position + + # Code block + if line.startswith('```'): + code_lines = [] + i += 1 + while i < len(lines) and not lines[i].strip().startswith('```'): + code_lines.append(lines[i]) + i += 1 + if i < len(lines): + i += 1 + code_text = '\n'.join(code_lines) + elements.append(Paragraph(self._escape_html(code_text), styles['PDFCode'])) + elements.append(Spacer(1, 0.1 * inch)) + continue + + # Regular paragraph + paragraph_lines = [line] + i += 1 + while i < len(lines) and lines[i].strip() and not self._is_special_line(lines[i]): + paragraph_lines.append(lines[i].strip()) + i += 1 + + paragraph_text = ' '.join(paragraph_lines) + formatted_text = self._format_inline(paragraph_text) + elements.append(Paragraph(formatted_text, styles['CustomBody'])) + elements.append(Spacer(1, 0.1 * inch)) + + return elements + + def _is_special_line(self, line: str) -> bool: + """Check if line is a special markdown element""" + line = line.strip() + return (line.startswith('#') or + line.startswith('- ') or + line.startswith('* ') or + re.match(r'^\d+\.\s', line) or + line in ['---', '___'] or + line.startswith('```') or + '|' in line) + + def _format_inline(self, text: str) -> str: + """Format inline markdown (bold, italic, code)""" + # First, escape the existing HTML to not conflict with our tags. + text = self._escape_html(text) + + # IMPORTANT: Process inline code FIRST to protect underscores inside code blocks + # Use a placeholder to protect code blocks from italic/bold processing + code_blocks = [] + def save_code(match): + code_blocks.append(match.group(1)) + return f"__CODE_BLOCK_{len(code_blocks)-1}__" + + text = re.sub(r'`(.+?)`', save_code, text) + + # Then, apply markdown formatting. + # The order is important: from most specific to least specific. + + # Bold and italic combined: ***text*** or ___text___ + text = re.sub(r'\*\*\*(.+?)\*\*\*', r'\1', text) + text = re.sub(r'___(.+?)___', r'\1', text) + + # Bold: **text** or __text__ + text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) + text = re.sub(r'__([^_]+?)__', r'\1', text) # More restrictive to avoid matching placeholders + + # Italic: *text* or _text_ (but not underscores in words like variable_name) + text = re.sub(r'\*([^*]+?)\*', r'\1', text) + # Only match _text_ when surrounded by spaces or at start/end, not mid-word underscores + text = re.sub(r'(?\1', text) + + # Restore code blocks with proper formatting + for i, code in enumerate(code_blocks): + text = text.replace(f"__CODE_BLOCK_{i}__", f'{code}') + + return text + + def _escape_html(self, text: str) -> str: + """Escape HTML special characters and clean up markdown. + + Args: + text: Input text that may contain HTML or markdown + + Returns: + str: Cleaned and escaped text + """ + if not text: + return "" + + # Ensure we're working with a string + text = str(text) + + # Remove HTML form elements and tags + text = re.sub(r']*>', '', text, flags=re.IGNORECASE) # Remove input tags + text = re.sub(r']*>.*?', '', text, flags=re.IGNORECASE | re.DOTALL) # Remove textarea + text = re.sub(r']*>.*?', '', text, flags=re.IGNORECASE | re.DOTALL) # Remove select + text = re.sub(r']*>.*?', '', text, flags=re.IGNORECASE | re.DOTALL) # Remove buttons + text = re.sub(r']*>.*?', '', text, flags=re.IGNORECASE | re.DOTALL) # Remove forms + + # Remove other common HTML tags (but preserve content) + text = re.sub(r']*>', '', text, flags=re.IGNORECASE) + text = re.sub(r'', '', text, flags=re.IGNORECASE) + text = re.sub(r']*>', '', text, flags=re.IGNORECASE) + text = re.sub(r'', '', text, flags=re.IGNORECASE) + text = re.sub(r']*>', '', text, flags=re.IGNORECASE) + text = re.sub(r'

', '\n', text, flags=re.IGNORECASE) + + # First, handle common markdown table artifacts + text = re.sub(r'^[|\-\s:]+$', '', text, flags=re.MULTILINE) # Remove separator lines + text = re.sub(r'^\s*\|\s*|\s*\|\s*$', '', text) # Remove leading/trailing pipes + text = re.sub(r'\s*\|\s*', ' | ', text) # Normalize pipes + + # Remove markdown links, but keep other formatting characters for _format_inline + text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # Remove markdown links + + # Escape HTML special characters + text = text.replace('&', '&') + text = text.replace('<', '<') + text = text.replace('>', '>') + + # Clean up excessive whitespace + text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text) # Multiple blank lines to double + text = re.sub(r' +', ' ', text) # Multiple spaces to single + + return text.strip() + + def _get_cell_style(self, row_idx: int, is_header: bool = False, font_size: int = None) -> 'ParagraphStyle': + """Get the appropriate style for a table cell.""" + styles = getSampleStyleSheet() + + # Helper function to get the correct bold font name + def get_bold_font(font_family): + font_map = { + 'Helvetica': 'Helvetica-Bold', + 'Times-Roman': 'Times-Bold', + 'Courier': 'Courier-Bold', + } + if 'Bold' in font_family: + return font_family + return font_map.get(font_family, 'Helvetica-Bold') + + if is_header: + return ParagraphStyle( + 'TableHeader', + parent=styles['Normal'], + fontSize=self._param.font_size, + fontName=self._get_active_bold_font(), + textColor=colors.whitesmoke, + alignment=TA_CENTER, + leading=self._param.font_size * 1.2, + wordWrap='CJK' + ) + else: + font_size = font_size or (self._param.font_size - 1) + return ParagraphStyle( + 'TableCell', + parent=styles['Normal'], + fontSize=font_size, + fontName=self._get_active_font(), + textColor=colors.black, + alignment=TA_LEFT, + leading=font_size * 1.15, + wordWrap='CJK' + ) + + def _convert_table_to_definition_list(self, data: list[list[str]]) -> list: + """Convert a table to a definition list format for better handling of large content. + + This method handles both simple and complex tables, including those with nested content. + It ensures that large cell content is properly wrapped and paginated. + """ + elements = [] + styles = getSampleStyleSheet() + + # Base styles + base_font_size = getattr(self._param, 'font_size', 10) + + # Body style + body_style = ParagraphStyle( + 'TableBody', + parent=styles['Normal'], + fontSize=base_font_size, + fontName=self._get_active_font(), + textColor=colors.HexColor(getattr(self._param, 'text_color', '#000000')), + spaceAfter=6, + leading=base_font_size * 1.2 + ) + + # Label style (for field names) + label_style = ParagraphStyle( + 'LabelStyle', + parent=body_style, + fontName=self._get_active_bold_font(), + textColor=colors.HexColor('#2c3e50'), + fontSize=base_font_size, + spaceAfter=4, + leftIndent=0, + leading=base_font_size * 1.3 + ) + + # Value style (for cell content) - clean, no borders + value_style = ParagraphStyle( + 'ValueStyle', + parent=body_style, + leftIndent=15, + rightIndent=0, + spaceAfter=8, + spaceBefore=2, + fontSize=base_font_size, + textColor=colors.HexColor('#333333'), + alignment=TA_JUSTIFY, + leading=base_font_size * 1.4, + # No borders or background - clean text only + ) + + try: + # If we have no data, return empty list + if not data or not any(data): + return elements + + # Get column headers or generate them + headers = [] + if data and len(data) > 0: + headers = [str(h).strip() for h in data[0]] + + # If no headers or empty headers, generate them + if not any(headers): + headers = [f"Column {i+1}" for i in range(len(data[0]) if data and len(data) > 0 else 0)] + + # Process each data row (skip header if it exists) + start_row = 1 if len(data) > 1 and any(data[0]) else 0 + + for row_idx in range(start_row, len(data)): + row = data[row_idx] if row_idx < len(data) else [] + if not row: + continue + + # Create a container for the row + row_elements = [] + + # Process each cell in the row + for col_idx in range(len(headers)): + if col_idx >= len(headers): + continue + + # Get cell content + cell_text = str(row[col_idx]).strip() if col_idx < len(row) and row[col_idx] is not None else "" + + # Skip empty cells + if not cell_text or cell_text.isspace(): + continue + + # Clean up markdown artifacts for regular text content + cell_text = str(cell_text) # Ensure it's a string + + # Remove markdown table formatting + cell_text = re.sub(r'^[|\-\s:]+$', '', cell_text, flags=re.MULTILINE) # Remove separator lines + cell_text = re.sub(r'^\s*\|\s*|\s*\|\s*$', '', cell_text) # Remove leading/trailing pipes + cell_text = re.sub(r'\s*\|\s*', ' | ', cell_text) # Normalize pipes + cell_text = re.sub(r'\s+', ' ', cell_text).strip() # Normalize whitespace + + # Remove any remaining markdown formatting + cell_text = re.sub(r'`(.*?)`', r'\1', cell_text) # Remove code ticks + cell_text = re.sub(r'\*\*(.*?)\*\*', r'\1', cell_text) # Remove bold + cell_text = re.sub(r'\*(.*?)\*', r'\1', cell_text) # Remove italic + + # Clean up any HTML entities or special characters + cell_text = self._escape_html(cell_text) + + # If content still looks like a table, convert it to plain text + if '|' in cell_text and ('--' in cell_text or any(cell_text.count('|') > 2 for line in cell_text.split('\n') if line.strip())): + # Convert to a simple text format + lines = [line.strip() for line in cell_text.split('\n') if line.strip()] + cell_text = ' | '.join(lines[:5]) # Join first 5 lines with pipe + if len(lines) > 5: + cell_text += '...' + + # Process long content with better wrapping + max_chars_per_line = 100 # Reduced for better readability + max_paragraphs = 3 # Maximum number of paragraphs to show initially + + # Split into paragraphs + paragraphs = [p for p in cell_text.split('\n\n') if p.strip()] + + # If content is too long, truncate with "show more" indicator + if len(paragraphs) > max_paragraphs or any(len(p) > max_chars_per_line * 3 for p in paragraphs): + wrapped_paragraphs = [] + + for i, para in enumerate(paragraphs[:max_paragraphs]): + if len(para) > max_chars_per_line * 3: + # Split long paragraphs + words = para.split() + current_line = [] + current_length = 0 + + for word in words: + if current_line and current_length + len(word) + 1 > max_chars_per_line: + wrapped_paragraphs.append(' '.join(current_line)) + current_line = [word] + current_length = len(word) + else: + current_line.append(word) + current_length += len(word) + (1 if current_line else 0) + + if current_line: + wrapped_paragraphs.append(' '.join(current_line)) + else: + wrapped_paragraphs.append(para) + + # Add "show more" indicator if there are more paragraphs + if len(paragraphs) > max_paragraphs: + wrapped_paragraphs.append(f"... and {len(paragraphs) - max_paragraphs} more paragraphs") + + cell_text = '\n\n'.join(wrapped_paragraphs) + + # Add label and content with clean formatting (no borders) + label_para = Paragraph(f"{self._escape_html(headers[col_idx])}:", label_style) + value_para = Paragraph(self._escape_html(cell_text), value_style) + + # Add elements with proper spacing + row_elements.append(label_para) + row_elements.append(Spacer(1, 0.03 * 72)) # Tiny space between label and value + row_elements.append(value_para) + + # Add spacing between rows + if row_elements and row_idx < len(data) - 1: + # Add a subtle horizontal line as separator + row_elements.append(Spacer(1, 0.1 * 72)) + row_elements.append(self._create_horizontal_line(width=0.5, color='#e0e0e0')) + row_elements.append(Spacer(1, 0.15 * 72)) + + elements.extend(row_elements) + + # Add some space after the table + if elements: + elements.append(Spacer(1, 0.3 * 72)) # 0.3 inches in points + + except Exception as e: + # Fallback to simple text representation if something goes wrong + error_style = ParagraphStyle( + 'ErrorStyle', + parent=styles['Normal'], + fontSize=base_font_size - 1, + textColor=colors.red, + backColor=colors.HexColor('#fff0f0'), + borderWidth=1, + borderColor=colors.red, + borderPadding=5 + ) + + error_msg = [ + Paragraph("Error processing table:", error_style), + Paragraph(str(e), error_style), + Spacer(1, 0.2 * 72) + ] + + # Add a simplified version of the table + try: + for row in data[:10]: # Limit to first 10 rows to avoid huge error output + error_msg.append(Paragraph(" | ".join(str(cell) for cell in row), body_style)) + if len(data) > 10: + error_msg.append(Paragraph(f"... and {len(data) - 10} more rows", body_style)) + except Exception: + pass + + elements.extend(error_msg) + + return elements + + def _create_table(self, table_lines: list[str]) -> Optional[list]: + """Create a table from markdown table syntax with robust error handling. + + This method handles simple tables and falls back to a list format for complex cases. + + Returns: + A list of flowables (could be a table or alternative representation) + Returns None if the table cannot be created. + """ + if not table_lines or len(table_lines) < 2: + return None + + try: + # Parse table data + data = [] + max_columns = 0 + + for line in table_lines: + # Skip separator lines (e.g., |---|---|) + if re.match(r'^\|[\s\-:]+\|$', line): + continue + + # Handle empty lines within tables + if not line.strip(): + continue + + # Split by | and clean up cells + cells = [] + in_quotes = False + current_cell = "" + + # Custom split to handle escaped pipes and quoted content + for char in line[1:]: # Skip initial | + if char == '|' and not in_quotes: + cells.append(current_cell.strip()) + current_cell = "" + elif char == '"': + in_quotes = not in_quotes + current_cell += char + elif char == '\\' and not in_quotes: + # Handle escaped characters + pass + else: + current_cell += char + + # Add the last cell + if current_cell.strip() or len(cells) > 0: + cells.append(current_cell.strip()) + + # Remove empty first/last elements if they're empty (from leading/trailing |) + if cells and not cells[0]: + cells = cells[1:] + if cells and not cells[-1]: + cells = cells[:-1] + + if cells: + data.append(cells) + max_columns = max(max_columns, len(cells)) + + if not data or max_columns == 0: + return None + + # Ensure all rows have the same number of columns + for row in data: + while len(row) < max_columns: + row.append('') + + # Calculate available width for table + from reportlab.lib.pagesizes import A4 + page_width = A4[0] if self._param.orientation == 'portrait' else A4[1] + available_width = page_width - (self._param.margin_left + self._param.margin_right) * inch + + # Check if we should use definition list format + max_cell_length = max((len(str(cell)) for row in data for cell in row), default=0) + total_rows = len(data) + + # Use definition list format if: + # - Any cell is too large (> 300 chars), OR + # - More than 6 columns, OR + # - More than 20 rows, OR + # - Contains nested tables or complex structures + has_nested_tables = any('|' in cell and '---' in cell for row in data for cell in row) + has_complex_cells = any(len(str(cell)) > 150 for row in data for cell in row) + + should_use_list_format = ( + max_cell_length > 300 or + max_columns > 6 or + total_rows > 20 or + has_nested_tables or + has_complex_cells + ) + + if should_use_list_format: + return self._convert_table_to_definition_list(data) + + # Process cells for normal table + processed_data = [] + for row_idx, row in enumerate(data): + processed_row = [] + for cell_idx, cell in enumerate(row): + cell_text = str(cell).strip() if cell is not None else "" + + # Handle empty cells + if not cell_text: + processed_row.append("") + continue + + # Clean up markdown table artifacts + cell_text = re.sub(r'\\\|', '|', cell_text) # Unescape pipes + cell_text = re.sub(r'\\n', '\n', cell_text) # Handle explicit newlines + + # Check for nested tables + if '|' in cell_text and '---' in cell_text: + # This cell contains a nested table + nested_lines = [line.strip() for line in cell_text.split('\n') if line.strip()] + nested_table = self._create_table(nested_lines) + if nested_table: + processed_row.append(nested_table[0]) # Add the nested table + continue + + # Process as regular text + font_size = self._param.font_size - 1 if row_idx > 0 else self._param.font_size + try: + style = self._get_cell_style(row_idx, is_header=(row_idx == 0), font_size=font_size) + escaped_text = self._escape_html(cell_text) + processed_row.append(Paragraph(escaped_text, style)) + except Exception: + processed_row.append(self._escape_html(cell_text)) + + processed_data.append(processed_row) + + # Calculate column widths + min_col_width = 0.5 * inch + max_cols = int(available_width / min_col_width) + + if max_columns > max_cols: + return self._convert_table_to_definition_list(data) + + col_width = max(min_col_width, available_width / max_columns) + col_widths = [col_width] * max_columns + + # Create the table + try: + table = LongTable(processed_data, colWidths=col_widths, repeatRows=1) + + # Define table style + table_style = [ + ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#2c3e50')), # Darker header + ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), + ('ALIGN', (0, 0), (-1, 0), 'CENTER'), + ('FONTNAME', (0, 0), (-1, 0), self._get_active_bold_font()), + ('FONTSIZE', (0, 0), (-1, -1), self._param.font_size - 1), + ('BOTTOMPADDING', (0, 0), (-1, 0), 12), + ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#f8f9fa')), # Lighter background + ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#dee2e6')), # Lighter grid + ('VALIGN', (0, 0), (-1, -1), 'TOP'), + ('TOPPADDING', (0, 0), (-1, -1), 8), + ('BOTTOMPADDING', (0, 0), (-1, -1), 8), + ('LEFTPADDING', (0, 0), (-1, -1), 8), + ('RIGHTPADDING', (0, 0), (-1, -1), 8), + ] + + # Add zebra striping for better readability + for i in range(1, len(processed_data)): + if i % 2 == 0: + table_style.append(('BACKGROUND', (0, i), (-1, i), colors.HexColor('#f1f3f5'))) + + table.setStyle(TableStyle(table_style)) + + # Add a small spacer after the table + return [table, Spacer(1, 0.2 * inch)] + + except Exception as table_error: + print(f"Error creating table: {table_error}") + return self._convert_table_to_definition_list(data) + + except Exception as e: + print(f"Error processing table: {e}") + # Return a simple text representation of the table + try: + text_content = [] + for row in data: + text_content.append(" | ".join(str(cell) for cell in row)) + return [Paragraph("
".join(text_content), self._get_cell_style(0))] + except Exception: + return None + + def _create_horizontal_line(self, width: float = 1, color: str = None): + """Create a horizontal line with customizable width and color + + Args: + width: Line thickness in points (default: 1) + color: Hex color string (default: grey) + + Returns: + HRFlowable: Horizontal line element + """ + from reportlab.platypus import HRFlowable + line_color = colors.HexColor(color) if color else colors.grey + return HRFlowable(width="100%", thickness=width, color=line_color, spaceBefore=0, spaceAfter=0) + + def _add_logo(self) -> Optional[Image]: + """Add logo image to PDF""" + try: + # Check if it's base64 or file path + if self._param.logo_image.startswith('data:image'): + # Extract base64 data + base64_data = self._param.logo_image.split(',')[1] + image_data = base64.b64decode(base64_data) + img = Image(BytesIO(image_data)) + elif os.path.exists(self._param.logo_image): + img = Image(self._param.logo_image) + else: + return None + + # Set size + img.drawWidth = self._param.logo_width * inch + img.drawHeight = self._param.logo_height * inch + + # Set alignment + if self._param.logo_position == 'center': + img.hAlign = 'CENTER' + elif self._param.logo_position == 'right': + img.hAlign = 'RIGHT' + else: + img.hAlign = 'LEFT' + + return img + except Exception as e: + print(f"Error adding logo: {e}") + return None + + def _add_page_decorations(self, canvas, doc): + """Add header, footer, page numbers, watermark""" + canvas.saveState() + + # Get active font for decorations + active_font = self._get_active_font() + + # Add watermark + if self._param.watermark_text: + canvas.setFont(active_font, 60) + canvas.setFillColorRGB(0.9, 0.9, 0.9, alpha=0.3) + canvas.saveState() + canvas.translate(doc.pagesize[0] / 2, doc.pagesize[1] / 2) + canvas.rotate(45) + canvas.drawCentredString(0, 0, self._param.watermark_text) + canvas.restoreState() + + # Add header + if self._param.header_text: + canvas.setFont(active_font, 9) + canvas.setFillColorRGB(0.5, 0.5, 0.5) + canvas.drawString(doc.leftMargin, doc.pagesize[1] - 0.5 * inch, self._param.header_text) + + # Add footer + if self._param.footer_text: + canvas.setFont(active_font, 9) + canvas.setFillColorRGB(0.5, 0.5, 0.5) + canvas.drawString(doc.leftMargin, 0.5 * inch, self._param.footer_text) + + # Add page numbers + if self._param.add_page_numbers: + page_num = canvas.getPageNumber() + text = f"Page {page_num}" + canvas.setFont(active_font, 9) + canvas.setFillColorRGB(0.5, 0.5, 0.5) + canvas.drawRightString(doc.pagesize[0] - doc.rightMargin, 0.5 * inch, text) + + canvas.restoreState() + + def thoughts(self) -> str: + return "Generating PDF document with formatted content..." + + def _generate_docx(self, content: str, title: str = "", subtitle: str = "") -> tuple[str, str]: + """Generate DOCX from markdown-style content""" + import uuid + from docx import Document + from docx.shared import Pt + from docx.enum.text import WD_ALIGN_PARAGRAPH + + # Create output directory if it doesn't exist + os.makedirs(self._param.output_directory, exist_ok=True) + + try: + # Generate filename + if self._param.filename: + base_name = os.path.splitext(self._param.filename)[0] + filename = f"{base_name}_{uuid.uuid4().hex[:8]}.docx" + else: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"document_{timestamp}_{uuid.uuid4().hex[:8]}.docx" + + file_path = os.path.join(self._param.output_directory, filename) + + # Create document + doc = Document() + + # Add title + if title: + title_para = doc.add_heading(title, level=0) + title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER + + # Add subtitle + if subtitle: + subtitle_para = doc.add_heading(subtitle, level=1) + subtitle_para.alignment = WD_ALIGN_PARAGRAPH.CENTER + + # Add timestamp if enabled + if self._param.add_timestamp: + timestamp_text = f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + ts_para = doc.add_paragraph(timestamp_text) + ts_para.runs[0].italic = True + ts_para.runs[0].font.size = Pt(9) + + # Parse and add content + lines = content.split('\n') + i = 0 + while i < len(lines): + line = lines[i].strip() + + if not line: + i += 1 + continue + + # Headings + if line.startswith('# ') and not line.startswith('## '): + doc.add_heading(line[2:].strip(), level=1) + elif line.startswith('## ') and not line.startswith('### '): + doc.add_heading(line[3:].strip(), level=2) + elif line.startswith('### '): + doc.add_heading(line[4:].strip(), level=3) + # Bullet list + elif line.startswith('- ') or line.startswith('* '): + doc.add_paragraph(line[2:].strip(), style='List Bullet') + # Numbered list + elif re.match(r'^\d+\.\s', line): + text = re.sub(r'^\d+\.\s', '', line) + doc.add_paragraph(text, style='List Number') + # Regular paragraph + else: + para = doc.add_paragraph(line) + para.runs[0].font.size = Pt(self._param.font_size) + + i += 1 + + # Save document + doc.save(file_path) + + # Read and encode to base64 + with open(file_path, 'rb') as f: + doc_bytes = f.read() + doc_base64 = base64.b64encode(doc_bytes).decode('utf-8') + + return file_path, doc_base64 + + except Exception as e: + raise Exception(f"DOCX generation failed: {str(e)}") + + def _generate_txt(self, content: str, title: str = "", subtitle: str = "") -> tuple[str, str]: + """Generate TXT from markdown-style content""" + import uuid + + # Create output directory if it doesn't exist + os.makedirs(self._param.output_directory, exist_ok=True) + + try: + # Generate filename + if self._param.filename: + base_name = os.path.splitext(self._param.filename)[0] + filename = f"{base_name}_{uuid.uuid4().hex[:8]}.txt" + else: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"document_{timestamp}_{uuid.uuid4().hex[:8]}.txt" + + file_path = os.path.join(self._param.output_directory, filename) + + # Build text content + text_content = [] + + if title: + text_content.append(title.upper()) + text_content.append("=" * len(title)) + text_content.append("") + + if subtitle: + text_content.append(subtitle) + text_content.append("-" * len(subtitle)) + text_content.append("") + + if self._param.add_timestamp: + timestamp_text = f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + text_content.append(timestamp_text) + text_content.append("") + + # Add content (keep markdown formatting for readability) + text_content.append(content) + + # Join and save + final_text = '\n'.join(text_content) + + with open(file_path, 'w', encoding='utf-8') as f: + f.write(final_text) + + # Encode to base64 + txt_base64 = base64.b64encode(final_text.encode('utf-8')).decode('utf-8') + + return file_path, txt_base64 + + except Exception as e: + raise Exception(f"TXT generation failed: {str(e)}") diff --git a/docs/guides/agent/agent_component_reference/docs_generator.md b/docs/guides/agent/agent_component_reference/docs_generator.md new file mode 100644 index 000000000..2fc0700f7 --- /dev/null +++ b/docs/guides/agent/agent_component_reference/docs_generator.md @@ -0,0 +1,241 @@ +--- +sidebar_position: 35 +slug: /docs_generator +--- + +# Docs Generator component + +A component that generates downloadable PDF, DOCX, or TXT documents from markdown-style content with full Unicode support. + +--- + +The **Docs Generator** component enables you to create professional documents directly within your agent workflow. It accepts markdown-formatted text and converts it into downloadable files, making it ideal for generating reports, summaries, or any structured document output. + +## Key features + +- **Multiple output formats**: PDF, DOCX, and TXT +- **Full Unicode support**: Automatic font switching for CJK (Chinese, Japanese, Korean), Arabic, Hebrew, and other non-Latin scripts +- **Rich formatting**: Headers, lists, tables, code blocks, and more +- **Customizable styling**: Fonts, margins, page size, and orientation +- **Document extras**: Logo, watermark, page numbers, and timestamps +- **Direct download**: Generates a download button for the chat interface + +## Prerequisites + +- Content to be converted into a document (typically from an **Agent** or other text-generating component). + +## Examples + +You can pair an **Agent** component with the **Docs Generator** to create dynamic documents based on user queries. The **Agent** generates the content, and the **Docs Generator** converts it into a downloadable file. Connect the output to a **Message** component to display the download button in the chat. + +A typical workflow looks like: + +``` +Begin → Agent → Docs Generator → Message +``` + +In the **Message** component, reference the `download` output variable from the **Docs Generator** to display a download button in the chat interface. + +## Configurations + +### Content + +The main text content to include in the document. Supports markdown formatting: + +- **Bold**: `**text**` or `__text__` +- **Italic**: `*text*` or `_text_` +- **Inline code**: `` `code` `` +- **Headings**: `# Heading 1`, `## Heading 2`, `### Heading 3` +- **Bullet lists**: `- item` or `* item` +- **Numbered lists**: `1. item` +- **Tables**: `| Column 1 | Column 2 |` +- **Horizontal lines**: `---` +- **Code blocks**: ` ``` code ``` ` + +:::tip NOTE +Click **(x)** or type `/` to insert variables from upstream components. +::: + +### Title + +Optional. The document title displayed at the top of the generated file. + +### Subtitle + +Optional. A subtitle displayed below the title. + +### Output format + +The file format for the generated document: + +- **PDF** (default): Portable Document Format with full styling support. +- **DOCX**: Microsoft Word format. +- **TXT**: Plain text format. + +### Logo image + +Optional. A logo image to display at the top of the document. You can either: + +- Upload an image file using the file picker +- Paste an image path, URL, or base64-encoded data + +### Logo position + +The horizontal position of the logo: + +- **left** (default) +- **center** +- **right** + +### Logo dimensions + +- **Logo width**: Width in inches (default: `2.0`) +- **Logo height**: Height in inches (default: `1.0`) + +### Font family + +The font used throughout the document: + +- **Helvetica** (default) +- **Times-Roman** +- **Courier** +- **Helvetica-Bold** +- **Times-Bold** + +### Font size + +The base font size in points. Defaults to `12`. + +### Title font size + +The font size for the document title. Defaults to `24`. + +### Page size + +The paper size for the document: + +- **A4** (default) +- **Letter** + +### Orientation + +The page orientation: + +- **Portrait** (default) +- **Landscape** + +### Margins + +Page margins in inches: + +- **Margin top**: Defaults to `1.0` +- **Margin bottom**: Defaults to `1.0` +- **Margin left**: Defaults to `1.0` +- **Margin right**: Defaults to `1.0` + +### Filename + +Optional. Custom filename for the generated document. If left empty, a filename is auto-generated with a timestamp. + +### Output directory + +The server directory where generated documents are saved. Defaults to `/tmp/pdf_outputs`. + +### Add page numbers + +When enabled, page numbers are added to the footer of each page. Defaults to `true`. + +### Add timestamp + +When enabled, a generation timestamp is added to the document footer. Defaults to `true`. + +### Watermark text + +Optional. Text to display as a diagonal watermark across each page. Useful for marking documents as "Draft", "Confidential", etc. + +## Output + +The **Docs Generator** component provides the following output variables: + +| Variable name | Type | Description | +| ------------- | --------- | --------------------------------------------------------------------------- | +| `file_path` | `string` | The server path where the generated document is saved. | +| `pdf_base64` | `string` | The document content encoded in base64 format. | +| `download` | `string` | JSON containing download information for the chat interface. | +| `success` | `boolean` | Indicates whether the document was generated successfully. | + +### Displaying the download button + +To display a download button in the chat, add a **Message** component after the **Docs Generator** and reference the `download` variable: + +1. Connect the **Docs Generator** output to a **Message** component. +2. In the **Message** component's content field, type `/` and select `{Docs Generator_0@download}`. +3. When the agent runs, a download button will appear in the chat, allowing users to download the generated document. + +The download button automatically handles: +- File type detection (PDF, DOCX, TXT) +- Proper MIME type for browser downloads +- Base64 decoding for direct file delivery + +## Unicode and multi-language support + +The **Docs Generator** includes intelligent font handling for international content: + +### How it works + +1. **Content analysis**: The component scans the text for non-Latin characters. +2. **Automatic font switching**: When CJK or other complex scripts are detected, the system automatically switches to a compatible CID font (STSong-Light for Chinese, HeiseiMin-W3 for Japanese, HYSMyeongJo-Medium for Korean). +3. **Latin content**: For documents containing only Latin characters (including extended Latin, Cyrillic, and Greek), the user-selected font family is used. + +### Supported scripts + +| Script | Unicode Range | Font Used | +| ------ | ------------- | --------- | +| Chinese (CJK) | U+4E00–U+9FFF | STSong-Light | +| Japanese (Hiragana/Katakana) | U+3040–U+30FF | HeiseiMin-W3 | +| Korean (Hangul) | U+AC00–U+D7AF | HYSMyeongJo-Medium | +| Arabic | U+0600–U+06FF | CID font fallback | +| Hebrew | U+0590–U+05FF | CID font fallback | +| Devanagari (Hindi) | U+0900–U+097F | CID font fallback | +| Thai | U+0E00–U+0E7F | CID font fallback | + +### Font installation + +For full multi-language support in self-hosted deployments, ensure Unicode fonts are installed: + +**Linux (Debian/Ubuntu):** +```bash +apt-get install fonts-freefont-ttf fonts-noto-cjk +``` + +**Docker:** The official RAGFlow Docker image includes these fonts. For custom images, add the font packages to your Dockerfile: +```dockerfile +RUN apt-get update && apt-get install -y fonts-freefont-ttf fonts-noto-cjk +``` + +:::tip NOTE +CID fonts (STSong-Light, HeiseiMin-W3, etc.) are built into ReportLab and do not require additional installation. They are used automatically when CJK content is detected. +::: + +## Troubleshooting + +### Characters appear as boxes or question marks + +This indicates missing font support. Ensure: +1. The content contains supported Unicode characters. +2. For self-hosted deployments, Unicode fonts are installed on the server. +3. The document is being viewed in a PDF reader that supports embedded fonts. + +### Download button not appearing + +Ensure: +1. The **Message** component is connected after the **Docs Generator**. +2. The `download` variable is correctly referenced using `/` (which appears as `{Docs Generator_0@download}` when copied). +3. The document generation completed successfully (check `success` output). + +### Large tables not rendering correctly + +For tables with many columns or large cell content: +- The component automatically converts wide tables to a definition list format for better readability. +- Consider splitting large tables into multiple smaller tables. +- Use landscape orientation for wide tables. diff --git a/pyproject.toml b/pyproject.toml index cb714db6c..d4770ab21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -154,8 +154,10 @@ dependencies = [ "exceptiongroup>=1.3.0,<2.0.0", "ffmpeg-python>=0.2.0", "imageio-ffmpeg>=0.6.0", + "reportlab>=4.4.1", + "jinja2>=3.1.0", "boxsdk>=10.1.0", - "aiosmtplib>=5.0.0", + "aiosmtplib>=5.0.0" ] [dependency-groups] diff --git a/uv.lock b/uv.lock index d8d837f17..9ba76da22 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.12, <3.15" resolution-markers = [ "python_full_version >= '3.14' and sys_platform == 'darwin'", @@ -3279,7 +3279,7 @@ wheels = [ [[package]] name = "jupyter-client" -version = "8.6.3" +version = "8.7.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "jupyter-core" }, @@ -3288,9 +3288,9 @@ dependencies = [ { name = "tornado" }, { name = "traitlets" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/22/bf9f12fdaeae18019a468b68952a60fe6dbab5d67cd2a103cac7659b41ca/jupyter_client-8.6.3.tar.gz", hash = "sha256:35b3a0947c4a6e9d589eb97d7d4cd5e90f910ee73101611f01283732bd6d9419", size = 342019, upload-time = "2024-09-17T10:44:17.613Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a6/27/d10de45e8ad4ce872372c4a3a37b7b35b6b064f6f023a5c14ffcced4d59d/jupyter_client-8.7.0.tar.gz", hash = "sha256:3357212d9cbe01209e59190f67a3a7e1f387a4f4e88d1e0433ad84d7b262531d", size = 344691, upload-time = "2025-12-09T18:37:01.953Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/85/b0394e0b6fcccd2c1eeefc230978a6f8cb0c5df1e4cd3e7625735a0d7d1e/jupyter_client-8.6.3-py3-none-any.whl", hash = "sha256:e8a19cc986cc45905ac3362915f410f3af85424b4c0905e94fa5f2cb08e8f23f", size = 106105, upload-time = "2024-09-17T10:44:15.218Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bb/f5/fddaec430367be9d62a7ed125530e133bfd4a1c0350fe221149ee0f2b526/jupyter_client-8.7.0-py3-none-any.whl", hash = "sha256:3671a94fd25e62f5f2f554f5e95389c2294d89822378a5f2dd24353e1494a9e0", size = 106215, upload-time = "2025-12-09T18:37:00.024Z" }, ] [[package]] @@ -5979,6 +5979,7 @@ dependencies = [ { name = "infinity-emb" }, { name = "infinity-sdk" }, { name = "itsdangerous" }, + { name = "jinja2" }, { name = "jira" }, { name = "json-repair" }, { name = "langfuse" }, @@ -6036,6 +6037,7 @@ dependencies = [ { name = "ranx" }, { name = "readability-lxml" }, { name = "replicate" }, + { name = "reportlab" }, { name = "requests" }, { name = "roman-numbers" }, { name = "ruamel-base" }, @@ -6148,6 +6150,7 @@ requires-dist = [ { name = "infinity-emb", specifier = ">=0.0.66,<0.0.67" }, { name = "infinity-sdk", specifier = "==0.6.11" }, { name = "itsdangerous", specifier = "==2.1.2" }, + { name = "jinja2", specifier = ">=3.1.0" }, { name = "jira", specifier = "==3.10.5" }, { name = "json-repair", specifier = "==0.35.0" }, { name = "langfuse", specifier = ">=2.60.0" }, @@ -6205,6 +6208,7 @@ requires-dist = [ { name = "ranx", specifier = "==0.3.20" }, { name = "readability-lxml", specifier = ">=0.8.4,<1.0.0" }, { name = "replicate", specifier = "==0.31.0" }, + { name = "reportlab", specifier = ">=4.4.1" }, { name = "requests", specifier = ">=2.32.3,<3.0.0" }, { name = "roman-numbers", specifier = "==1.0.2" }, { name = "ruamel-base", specifier = "==1.0.0" }, @@ -7409,21 +7413,21 @@ wheels = [ [[package]] name = "tornado" -version = "6.5.2" +version = "6.5.3" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/ce/1eb500eae19f4648281bb2186927bb062d2438c2e5093d1360391afd2f90/tornado-6.5.2.tar.gz", hash = "sha256:ab53c8f9a0fa351e2c0741284e06c7a45da86afb544133201c5cc8578eb076a0", size = 510821, upload-time = "2025-08-08T18:27:00.78Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7f/2e/3d22d478f27cb4b41edd4db7f10cd7846d0a28ea443342de3dba97035166/tornado-6.5.3.tar.gz", hash = "sha256:16abdeb0211796ffc73765bc0a20119712d68afeeaf93d1a3f2edf6b3aee8d5a", size = 513348, upload-time = "2025-12-11T04:16:42.225Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f6/48/6a7529df2c9cc12efd2e8f5dd219516184d703b34c06786809670df5b3bd/tornado-6.5.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2436822940d37cde62771cff8774f4f00b3c8024fe482e16ca8387b8a2724db6", size = 442563, upload-time = "2025-08-08T18:26:42.945Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/b5/9b575a0ed3e50b00c40b08cbce82eb618229091d09f6d14bce80fc01cb0b/tornado-6.5.2-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:583a52c7aa94ee046854ba81d9ebb6c81ec0fd30386d96f7640c96dad45a03ef", size = 440729, upload-time = "2025-08-08T18:26:44.473Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/4e/619174f52b120efcf23633c817fd3fed867c30bff785e2cd5a53a70e483c/tornado-6.5.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0fe179f28d597deab2842b86ed4060deec7388f1fd9c1b4a41adf8af058907e", size = 444295, upload-time = "2025-08-08T18:26:46.021Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/fa/87b41709552bbd393c85dd18e4e3499dcd8983f66e7972926db8d96aa065/tornado-6.5.2-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b186e85d1e3536d69583d2298423744740986018e393d0321df7340e71898882", size = 443644, upload-time = "2025-08-08T18:26:47.625Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/41/fb15f06e33d7430ca89420283a8762a4e6b8025b800ea51796ab5e6d9559/tornado-6.5.2-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e792706668c87709709c18b353da1f7662317b563ff69f00bab83595940c7108", size = 443878, upload-time = "2025-08-08T18:26:50.599Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/92/fe6d57da897776ad2e01e279170ea8ae726755b045fe5ac73b75357a5a3f/tornado-6.5.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:06ceb1300fd70cb20e43b1ad8aaee0266e69e7ced38fa910ad2e03285009ce7c", size = 444549, upload-time = "2025-08-08T18:26:51.864Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/02/c8f4f6c9204526daf3d760f4aa555a7a33ad0e60843eac025ccfd6ff4a93/tornado-6.5.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:74db443e0f5251be86cbf37929f84d8c20c27a355dd452a5cfa2aada0d001ec4", size = 443973, upload-time = "2025-08-08T18:26:53.625Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/2d/f5f5707b655ce2317190183868cd0f6822a1121b4baeae509ceb9590d0bd/tornado-6.5.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b5e735ab2889d7ed33b32a459cac490eda71a1ba6857b0118de476ab6c366c04", size = 443954, upload-time = "2025-08-08T18:26:55.072Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e8/59/593bd0f40f7355806bf6573b47b8c22f8e1374c9b6fd03114bd6b7a3dcfd/tornado-6.5.2-cp39-abi3-win32.whl", hash = "sha256:c6f29e94d9b37a95013bb669616352ddb82e3bfe8326fccee50583caebc8a5f0", size = 445023, upload-time = "2025-08-08T18:26:56.677Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c7/2a/f609b420c2f564a748a2d80ebfb2ee02a73ca80223af712fca591386cafb/tornado-6.5.2-cp39-abi3-win_amd64.whl", hash = "sha256:e56a5af51cc30dd2cae649429af65ca2f6571da29504a07995175df14c18f35f", size = 445427, upload-time = "2025-08-08T18:26:57.91Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5e/4f/e1f65e8f8c76d73658b33d33b81eed4322fb5085350e4328d5c956f0c8f9/tornado-6.5.2-cp39-abi3-win_arm64.whl", hash = "sha256:d6c33dc3672e3a1f3618eb63b7ef4683a7688e7b9e6e8f0d9aa5726360a004af", size = 444456, upload-time = "2025-08-08T18:26:59.207Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/e9/bf22f66e1d5d112c0617974b5ce86666683b32c09b355dfcd59f8d5c8ef6/tornado-6.5.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2dd7d7e8d3e4635447a8afd4987951e3d4e8d1fb9ad1908c54c4002aabab0520", size = 443860, upload-time = "2025-12-11T04:16:26.638Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/9c/594b631f0b8dc5977080c7093d1e96f1377c10552577d2c31bb0208c9362/tornado-6.5.3-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:5977a396f83496657779f59a48c38096ef01edfe4f42f1c0634b791dde8165d0", size = 442118, upload-time = "2025-12-11T04:16:28.32Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/f6/685b869f5b5b9d9547571be838c6106172082751696355b60fc32a4988ed/tornado-6.5.3-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f72ac800be2ac73ddc1504f7aa21069a4137e8d70c387172c063d363d04f2208", size = 445700, upload-time = "2025-12-11T04:16:29.64Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/91/4c/f0d19edf24912b7f21ae5e941f7798d132ad4d9b71441c1e70917a297265/tornado-6.5.3-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c43c4fc4f5419c6561cfb8b884a8f6db7b142787d47821e1a0e1296253458265", size = 445041, upload-time = "2025-12-11T04:16:30.799Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/2b/e02da94f4a4aef2bb3b923c838ef284a77548a5f06bac2a8682b36b4eead/tornado-6.5.3-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de8b3fed4b3afb65d542d7702ac8767b567e240f6a43020be8eaef59328f117b", size = 445270, upload-time = "2025-12-11T04:16:32.316Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/58/e2/7a7535d23133443552719dba526dacbb7415f980157da9f14950ddb88ad6/tornado-6.5.3-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:dbc4b4c32245b952566e17a20d5c1648fbed0e16aec3fc7e19f3974b36e0e47c", size = 445957, upload-time = "2025-12-11T04:16:33.913Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/1f/9ff92eca81ff17a86286ec440dcd5eab0400326eb81761aa9a4eecb1ffb9/tornado-6.5.3-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:db238e8a174b4bfd0d0238b8cfcff1c14aebb4e2fcdafbf0ea5da3b81caceb4c", size = 445371, upload-time = "2025-12-11T04:16:35.093Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/b1/1d03ae4526a393b0b839472a844397337f03c7f3a1e6b5c82241f0e18281/tornado-6.5.3-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:892595c100cd9b53a768cbfc109dfc55dec884afe2de5290611a566078d9692d", size = 445348, upload-time = "2025-12-11T04:16:36.679Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4b/7d/7c181feadc8941f418d0d26c3790ee34ffa4bd0a294bc5201d44ebd19c1e/tornado-6.5.3-cp39-abi3-win32.whl", hash = "sha256:88141456525fe291e47bbe1ba3ffb7982549329f09b4299a56813923af2bd197", size = 446433, upload-time = "2025-12-11T04:16:38.332Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/34/98/4f7f938606e21d0baea8c6c39a7c8e95bdf8e50b0595b1bb6f0de2af7a6e/tornado-6.5.3-cp39-abi3-win_amd64.whl", hash = "sha256:ba4b513d221cc7f795a532c1e296f36bcf6a60e54b15efd3f092889458c69af1", size = 446842, upload-time = "2025-12-11T04:16:39.867Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/27/0e3fca4c4edf33fb6ee079e784c63961cd816971a45e5e4cacebe794158d/tornado-6.5.3-cp39-abi3-win_arm64.whl", hash = "sha256:278c54d262911365075dd45e0b6314308c74badd6ff9a54490e7daccdd5ed0ea", size = 445863, upload-time = "2025-12-11T04:16:41.099Z" }, ] [[package]] diff --git a/web/src/components/message-item/index.tsx b/web/src/components/message-item/index.tsx index e42a73f96..13fdaab7c 100644 --- a/web/src/components/message-item/index.tsx +++ b/web/src/components/message-item/index.tsx @@ -14,6 +14,11 @@ import { cn } from '@/lib/utils'; import MarkdownContent from '../markdown-content'; import { ReferenceDocumentList } from '../next-message-item/reference-document-list'; import { UploadedMessageFiles } from '../next-message-item/uploaded-message-files'; +import { + PDFDownloadButton, + extractPDFDownloadInfo, + removePDFDownloadInfo, +} from '../pdf-download-button'; import { RAGFlowAvatar } from '../ragflow-avatar'; import { useTheme } from '../theme-provider'; import { AssistantGroupButton, UserGroupButton } from './group-button'; @@ -61,6 +66,20 @@ const MessageItem = ({ return reference?.doc_aggs ?? []; }, [reference?.doc_aggs]); + // Extract PDF download info from message content + const pdfDownloadInfo = useMemo( + () => extractPDFDownloadInfo(item.content), + [item.content], + ); + + // If we have PDF download info, extract the remaining text + const messageContent = useMemo(() => { + if (!pdfDownloadInfo) return item.content; + + // Remove the JSON part from the content to avoid showing it + return removePDFDownloadInfo(item.content, pdfDownloadInfo); + }, [item.content, pdfDownloadInfo]); + const handleRegenerateMessage = useCallback(() => { regenerateMessage?.(item); }, [regenerateMessage, item]); @@ -122,23 +141,34 @@ const MessageItem = ({ > )} -
- -
+ {/* Show PDF download button if download info is present */} + {pdfDownloadInfo && ( + + )} + + {/* Show message content if there's any text besides the download */} + {messageContent && ( +
+ +
+ )} {isAssistant && referenceDocumentList.length > 0 && ( extractPDFDownloadInfo(item.content), + [item.content], + ); + + // If we have PDF download info, extract the remaining text + const messageContent = useMemo(() => { + if (!pdfDownloadInfo) return item.content; + + // Remove the JSON part from the content to avoid showing it + return removePDFDownloadInfo(item.content, pdfDownloadInfo); + }, [item.content, pdfDownloadInfo]); + const handleRegenerateMessage = useCallback(() => { regenerateMessage?.(item); }, [regenerateMessage, item]); @@ -219,28 +239,39 @@ function MessageItem({ /> )} -
- {item.data ? ( - children - ) : sendLoading && isEmpty(item.content) ? ( - <>{!isShare && 'running...'} - ) : ( - - )} -
+ {/* Show PDF download button if download info is present */} + {pdfDownloadInfo && ( + + )} + + {/* Show message content if there's any text besides the download */} + {messageContent && ( +
+ {item.data ? ( + children + ) : sendLoading && isEmpty(messageContent) ? ( + <>{!isShare && 'running...'} + ) : ( + + )} +
+ )} {isAssistant && referenceDocuments.length > 0 && ( + )} {/* {isAssistant && item.attachment && item.attachment.doc_id && (
diff --git a/web/src/components/pdf-download-button/index.tsx b/web/src/components/pdf-download-button/index.tsx new file mode 100644 index 000000000..9d7511274 --- /dev/null +++ b/web/src/components/pdf-download-button/index.tsx @@ -0,0 +1,196 @@ +import { Button } from '@/components/ui/button'; +import { Download, FileText } from 'lucide-react'; +import { useCallback } from 'react'; + +interface DocumentDownloadInfo { + filename: string; + base64: string; + mime_type: string; +} + +interface DocumentDownloadButtonProps { + downloadInfo: DocumentDownloadInfo; + className?: string; +} + +export function PDFDownloadButton({ + downloadInfo, + className, +}: DocumentDownloadButtonProps) { + const handleDownload = useCallback(() => { + try { + // Convert base64 to blob + const byteCharacters = atob(downloadInfo.base64); + const byteNumbers = new Array(byteCharacters.length); + for (let i = 0; i < byteCharacters.length; i++) { + byteNumbers[i] = byteCharacters.charCodeAt(i); + } + const byteArray = new Uint8Array(byteNumbers); + const blob = new Blob([byteArray], { type: downloadInfo.mime_type }); + + // Create download link + const url = window.URL.createObjectURL(blob); + const link = document.createElement('a'); + link.href = url; + link.download = downloadInfo.filename; + document.body.appendChild(link); + link.click(); + + // Cleanup + document.body.removeChild(link); + window.URL.revokeObjectURL(url); + } catch (error) { + console.error('Error downloading document:', error); + } + }, [downloadInfo]); + + // Determine document type from mime_type or filename + const getDocumentType = () => { + if (downloadInfo.mime_type === 'application/pdf') return 'PDF Document'; + if ( + downloadInfo.mime_type === + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' + ) + return 'Word Document'; + if (downloadInfo.mime_type === 'text/plain') return 'Text Document'; + + // Fallback to file extension + const ext = downloadInfo.filename.split('.').pop()?.toUpperCase(); + if (ext === 'PDF') return 'PDF Document'; + if (ext === 'DOCX') return 'Word Document'; + if (ext === 'TXT') return 'Text Document'; + + return 'Document'; + }; + + return ( +
+
+
+ +
+
+
+
+ {downloadInfo.filename} +
+
{getDocumentType()}
+
+ +
+ ); +} + +// Helper function to detect if content contains document download info +export function extractPDFDownloadInfo( + content: string, +): DocumentDownloadInfo | null { + try { + // Try to parse as JSON first (for pure JSON content) + const parsed = JSON.parse(content); + if (parsed && parsed.filename && parsed.base64 && parsed.mime_type) { + // Accept PDF, DOCX, and TXT formats + const validMimeTypes = [ + 'application/pdf', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'text/plain', + ]; + if (validMimeTypes.includes(parsed.mime_type)) { + return parsed as DocumentDownloadInfo; + } + } + } catch { + // If direct parsing fails, try to extract JSON object from mixed content + // Look for a JSON object that contains the required fields + // This regex finds a balanced JSON object by counting braces + const startPattern = /\{[^{}]*"filename"[^{}]*:/g; + let match; + + while ((match = startPattern.exec(content)) !== null) { + const startIndex = match.index; + let braceCount = 0; + let endIndex = startIndex; + + // Find the matching closing brace + for (let i = startIndex; i < content.length; i++) { + if (content[i] === '{') braceCount++; + if (content[i] === '}') braceCount--; + + if (braceCount === 0) { + endIndex = i + 1; + break; + } + } + + if (endIndex > startIndex) { + try { + const jsonStr = content.substring(startIndex, endIndex); + const parsed = JSON.parse(jsonStr); + if (parsed && parsed.filename && parsed.base64 && parsed.mime_type) { + // Accept PDF, DOCX, and TXT formats + const validMimeTypes = [ + 'application/pdf', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'text/plain', + ]; + if (validMimeTypes.includes(parsed.mime_type)) { + return parsed as DocumentDownloadInfo; + } + } + } catch { + // This wasn't valid JSON, continue searching + } + } + } + } + return null; +} + +// Helper function to remove document download info from content +export function removePDFDownloadInfo( + content: string, + downloadInfo: DocumentDownloadInfo, +): string { + try { + // First, check if the entire content is just the JSON (most common case) + try { + const parsed = JSON.parse(content); + if ( + parsed && + parsed.filename === downloadInfo.filename && + parsed.base64 === downloadInfo.base64 + ) { + // The entire content is just the download JSON, return empty + return ''; + } + } catch { + // Content is not pure JSON, continue with removal + } + + // Try to remove the JSON string from content + const jsonStr = JSON.stringify(downloadInfo); + let cleaned = content.replace(jsonStr, '').trim(); + + // Also try with pretty-printed JSON (with indentation) + const prettyJsonStr = JSON.stringify(downloadInfo, null, 2); + cleaned = cleaned.replace(prettyJsonStr, '').trim(); + + // Also try to find and remove JSON object pattern from mixed content + // This handles cases where the JSON might have different formatting + const startPattern = /\{[^{}]*"filename"[^{}]*"base64"[^{}]*\}/g; + cleaned = cleaned.replace(startPattern, '').trim(); + + return cleaned; + } catch { + return content; + } +} diff --git a/web/src/constants/agent.tsx b/web/src/constants/agent.tsx index 2f51e24f6..efe3076d4 100644 --- a/web/src/constants/agent.tsx +++ b/web/src/constants/agent.tsx @@ -101,6 +101,7 @@ export enum Operator { UserFillUp = 'UserFillUp', StringTransform = 'StringTransform', SearXNG = 'SearXNG', + PDFGenerator = 'PDFGenerator', Placeholder = 'Placeholder', DataOperations = 'DataOperations', ListOperations = 'ListOperations', diff --git a/web/src/locales/de.ts b/web/src/locales/de.ts index 82fc60832..69b659586 100644 --- a/web/src/locales/de.ts +++ b/web/src/locales/de.ts @@ -878,6 +878,27 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'Eine Komponente, die auf https://searxng.org/ sucht und Ihnen ermöglicht, die Anzahl der Suchergebnisse mit TopN anzugeben. Sie ergänzt die vorhandenen Wissensdatenbanken.', + pdfGenerator: 'Dokumentengenerator', + pDFGenerator: 'Dokumentengenerator', + pdfGeneratorDescription: `Eine Komponente, die Dokumente (PDF, DOCX, TXT) aus markdown-formatierten Inhalten mit anpassbarem Stil, Bildern und Tabellen generiert. Unterstützt: **fett**, *kursiv*, # Überschriften, - Listen, Tabellen mit | Syntax.`, + pDFGeneratorDescription: `Eine Komponente, die Dokumente (PDF, DOCX, TXT) aus markdown-formatierten Inhalten mit anpassbarem Stil, Bildern und Tabellen generiert. Unterstützt: **fett**, *kursiv*, # Überschriften, - Listen, Tabellen mit | Syntax.`, + subtitle: 'Untertitel', + logoImage: 'Logo-Bild', + logoPosition: 'Logo-Position', + logoWidth: 'Logo-Breite', + logoHeight: 'Logo-Höhe', + fontFamily: 'Schriftfamilie', + fontSize: 'Schriftgröße', + titleFontSize: 'Titel-Schriftgröße', + pageSize: 'Seitengröße', + orientation: 'Ausrichtung', + marginTop: 'Oberer Rand', + marginBottom: 'Unterer Rand', + filename: 'Dateiname', + outputDirectory: 'Ausgabeverzeichnis', + addPageNumbers: 'Seitenzahlen hinzufügen', + addTimestamp: 'Zeitstempel hinzufügen', + watermarkText: 'Wasserzeichentext', channel: 'Kanal', channelTip: 'Führt eine Textsuche oder Nachrichtensuche für die Eingabe der Komponente durch', diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index c879afbbd..fd9977257 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -1307,6 +1307,27 @@ Example: Virtual Hosted Style`, searXNG: 'SearXNG', searXNGDescription: 'A component that searches via your provided SearXNG instance URL. Specify TopN and the instance URL.', + pdfGenerator: 'Docs Generator', + pDFGenerator: 'Docs Generator', + pdfGeneratorDescription: `A component that generates documents (PDF, DOCX, TXT) from markdown-formatted content with customizable styling, images, and tables. Supports: **bold**, *italic*, # headings, - lists, tables with | syntax.`, + pDFGeneratorDescription: `A component that generates documents (PDF, DOCX, TXT) from markdown-formatted content with customizable styling, images, and tables. Supports: **bold**, *italic*, # headings, - lists, tables with | syntax.`, + subtitle: 'Subtitle', + logoImage: 'Logo Image', + logoPosition: 'Logo Position', + logoWidth: 'Logo Width', + logoHeight: 'Logo Height', + fontFamily: 'Font Family', + fontSize: 'Font Size', + titleFontSize: 'Title Font Size', + pageSize: 'Page Size', + orientation: 'Orientation', + marginTop: 'Margin Top', + marginBottom: 'Margin Bottom', + filename: 'Filename', + outputDirectory: 'Output Directory', + addPageNumbers: 'Add Page Numbers', + addTimestamp: 'Add Timestamp', + watermarkText: 'Watermark Text', channel: 'Channel', channelTip: `Perform text search or news search on the component's input`, text: 'Text', @@ -1690,7 +1711,6 @@ This delimiter is used to split the input text into several text pieces echo of datatype: 'MINE type of the HTTP request', insertVariableTip: `Enter / Insert variables`, historyversion: 'Version history', - filename: 'File name', version: { created: 'Created', details: 'Version details', diff --git a/web/src/locales/es.ts b/web/src/locales/es.ts index d45291674..a666adb59 100644 --- a/web/src/locales/es.ts +++ b/web/src/locales/es.ts @@ -578,15 +578,31 @@ export default { 'Este componente se usa para obtener resultados de búsqueda de www.baidu.com. Típicamente, actúa como un complemento a las bases de conocimiento. Top N especifica el número de resultados de búsqueda que necesitas ajustar.', duckDuckGo: 'DuckDuckGo', duckDuckGoDescription: - 'Un componente que recupera resultados de búsqueda de duckduckgo.com, con TopN especificando el número de resultados de búsqueda. Complementa las bases de conocimiento existentes.', + 'Un componente que busca en duckduckgo.com, permitiéndote especificar el número de resultados de búsqueda usando TopN. Supplementa las bases de conocimiento existentes.', searXNG: 'SearXNG', searXNGDescription: - 'Un componente que realiza búsquedas mediante la URL de la instancia de SearXNG que usted proporcione. Especifique TopN y la URL de la instancia.', - channel: 'Canal', - channelTip: - 'Realizar búsqueda de texto o búsqueda de noticias en la entrada del componente.', - text: 'Texto', - news: 'Noticias', + 'Un componente que busca a través de la URL de la instancia SearXNG que proporcionas. Especifica TopN y la URL de la instancia.', + pdfGenerator: 'Generador de Documentos', + pDFGenerator: 'Generador de Documentos', + pdfGeneratorDescription: `Un componente que genera documentos (PDF, DOCX, TXT) desde contenido formateado en markdown con estilo personalizable, imágenes y tablas. Soporta: **negrita**, *cursiva*, # encabezados, - listas, tablas con sintaxis |.`, + pDFGeneratorDescription: `Un componente que genera documentos (PDF, DOCX, TXT) desde contenido formateado en markdown con estilo personalizable, imágenes y tablas. Soporta: **negrita**, *cursiva*, # encabezados, - listas, tablas con sintaxis |.`, + subtitle: 'Subtítulo', + logoImage: 'Imagen Logo', + logoPosition: 'Posición Logo', + logoWidth: 'Ancho Logo', + logoHeight: 'Alto Logo', + fontFamily: 'Familia Fuente', + fontSize: 'Tamaño Fuente', + titleFontSize: 'Tamaño Fuente Título', + pageSize: 'Tamaño Página', + orientation: 'Orientación', + marginTop: 'Margen Superior', + marginBottom: 'Margen Inferior', + filename: 'Nombre Archivo', + outputDirectory: 'Directorio Salida', + addPageNumbers: 'Agregar Números Página', + addTimestamp: 'Agregar Timestamp', + watermarkText: 'Texto Marca Agua', messageHistoryWindowSize: 'Tamaño de la ventana del historial de mensajes', messageHistoryWindowSizeTip: diff --git a/web/src/locales/fr.ts b/web/src/locales/fr.ts index 40f15055c..99249a1f6 100644 --- a/web/src/locales/fr.ts +++ b/web/src/locales/fr.ts @@ -788,15 +788,31 @@ export default { 'Un composant qui recherche sur baidu.com, utilisant TopN pour spécifier le nombre de résultats. Il complète les bases de connaissances existantes.', duckDuckGo: 'DuckDuckGo', duckDuckGoDescription: - 'Un composant qui recherche sur duckduckgo.com, vous permettant de spécifier le nombre de résultats avec TopN. Il complète les bases de connaissances existantes.', + 'Un composant qui recherche sur duckduckgo.com, vous permettant de spécifier le nombre de résultats de recherche avec TopN. Il complète les bases de connaissances existantes.', searXNG: 'SearXNG', searXNGDescription: "Un composant qui effectue des recherches via la URL de l'instance de SearXNG que vous fournissez. Spécifiez TopN et l'URL de l'instance.", - channel: 'Canal', - channelTip: - "Effectuer une recherche de texte ou d'actualités sur l'entrée du composant", - text: 'Texte', - news: 'Actualités', + pdfGenerator: 'Générateur de Documents', + pDFGenerator: 'Générateur de Documents', + pdfGeneratorDescription: `Un composant qui génère des documents (PDF, DOCX, TXT) à partir de contenu formaté en markdown avec un style personnalisable, des images et des tableaux. Prend en charge : **gras**, *italique*, # titres, - listes, tableaux avec syntaxe |.`, + pDFGeneratorDescription: `Un composant qui génère des documents (PDF, DOCX, TXT) à partir de contenu formaté en markdown avec un style personnalisable, des images et des tableaux. Prend en charge : **gras**, *italique*, # titres, - listes, tableaux avec syntaxe |.`, + subtitle: 'Sous-titre', + logoImage: 'Image Logo', + logoPosition: 'Position Logo', + logoWidth: 'Largeur Logo', + logoHeight: 'Hauteur Logo', + fontFamily: 'Famille Police', + fontSize: 'Taille Police', + titleFontSize: 'Taille Police Titre', + pageSize: 'Taille Page', + orientation: 'Orientation', + marginTop: 'Marge Supérieure', + marginBottom: 'Marge Inférieure', + filename: 'Nom Fichier', + outputDirectory: 'Répertoire Sortie', + addPageNumbers: 'Ajouter Numéros Page', + addTimestamp: 'Ajouter Timestamp', + watermarkText: 'Texte Filigrane', messageHistoryWindowSize: "Taille de la fenêtre d'historique des messages", messageHistoryWindowSizeTip: @@ -1173,7 +1189,6 @@ export default { datatype: 'Type MIME de la requête HTTP', insertVariableTip: `Entrer / Insérer des variables`, historyversion: 'Historique des versions', - filename: 'Nom du fichier', version: { created: 'Créé', details: 'Détails de la version', diff --git a/web/src/locales/id.ts b/web/src/locales/id.ts index 95bd80896..f529f8524 100644 --- a/web/src/locales/id.ts +++ b/web/src/locales/id.ts @@ -770,6 +770,27 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'Komponen yang melakukan pencarian menggunakan URL instance SearXNG yang Anda berikan. Spesifikasikan TopN dan URL instance.', + pdfGenerator: 'Pembuat Dokumen', + pDFGenerator: 'Pembuat Dokumen', + pdfGeneratorDescription: `Komponen yang menghasilkan dokumen (PDF, DOCX, TXT) dari konten berformat markdown dengan gaya yang dapat disesuaikan, gambar, dan tabel. Mendukung: **tebal**, *miring*, # judul, - daftar, tabel dengan sintaks |.`, + pDFGeneratorDescription: `Komponen yang menghasilkan dokumen (PDF, DOCX, TXT) dari konten berformat markdown dengan gaya yang dapat disesuaikan, gambar, dan tabel. Mendukung: **tebal**, *miring*, # judul, - daftar, tabel dengan sintaks |.`, + subtitle: 'Subjudul', + logoImage: 'Gambar Logo', + logoPosition: 'Posisi Logo', + logoWidth: 'Lebar Logo', + logoHeight: 'Tinggi Logo', + fontFamily: 'Keluarga Font', + fontSize: 'Ukuran Font', + titleFontSize: 'Ukuran Font Judul', + pageSize: 'Ukuran Halaman', + orientation: 'Orientasi', + marginTop: 'Margin Atas', + marginBottom: 'Margin Bawah', + filename: 'Nama File', + outputDirectory: 'Direktori Output', + addPageNumbers: 'Tambahkan Nomor Halaman', + addTimestamp: 'Tambahkan Timestamp', + watermarkText: 'Teks Watermark', channel: 'Saluran', channelTip: `Lakukan pencarian teks atau pencarian berita pada input komponen`, text: 'Teks', diff --git a/web/src/locales/it.ts b/web/src/locales/it.ts index 505d188f9..7e99681ae 100644 --- a/web/src/locales/it.ts +++ b/web/src/locales/it.ts @@ -930,6 +930,30 @@ Quanto sopra è il contenuto che devi riassumere.`, duckDuckGo: 'DuckDuckGo', duckDuckGoDescription: 'Un componente che cerca da duckduckgo.com, permettendo di specificare il numero di risultati di ricerca usando TopN.', + searXNG: 'SearXNG', + searXNGDescription: + 'Un componente che cerca tramite lURL dellistanza SearXNG fornita. Specifica TopN e lURL dellistanza.', + pdfGenerator: 'Generatore Documenti', + pDFGenerator: 'Generatore Documenti', + pdfGeneratorDescription: `Un componente che genera documenti (PDF, DOCX, TXT) da contenuti formattati in markdown con stile personalizzabile, immagini e tabelle. Supporta: **grassetto**, *corsivo*, # titoli, - elenchi, tabelle con sintassi |.`, + pDFGeneratorDescription: `Un componente che genera documenti (PDF, DOCX, TXT) da contenuti formattati in markdown con stile personalizzabile, immagini e tabelle. Supporta: **grassetto**, *corsivo*, # titoli, - elenchi, tabelle con sintassi |.`, + subtitle: 'Sottotitolo', + logoImage: 'Immagine Logo', + logoPosition: 'Posizione Logo', + logoWidth: 'Larghezza Logo', + logoHeight: 'Altezza Logo', + fontFamily: 'Famiglia Font', + fontSize: 'Dimensione Font', + titleFontSize: 'Dimensione Font Titolo', + pageSize: 'Dimensione Pagina', + orientation: 'Orientamento', + marginTop: 'Margine Superiore', + marginBottom: 'Margine Inferiore', + filename: 'Nome File', + outputDirectory: 'Directory Output', + addPageNumbers: 'Aggiungi Numeri Pagina', + addTimestamp: 'Aggiungi Timestamp', + watermarkText: 'Testo Filigrana', channel: 'Canale', channelTip: `Esegui ricerca testo o notizie sull'input del componente`, text: 'Testo', diff --git a/web/src/locales/ja.ts b/web/src/locales/ja.ts index cfcf0a981..7d139a27b 100644 --- a/web/src/locales/ja.ts +++ b/web/src/locales/ja.ts @@ -795,11 +795,27 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'SearXNGのインスタンスURLを提供して検索を行うコンポーネント。TopNとインスタンスURLを指定してください。', - channel: 'チャンネル', - channelTip: `コンポーネントの入力に対してテキスト検索またはニュース検索を実行します`, - text: 'テキスト', - news: 'ニュース', - messageHistoryWindowSize: 'メッセージウィンドウサイズ', + pdfGenerator: 'ドキュメント生成', + pDFGenerator: 'ドキュメント生成', + pdfGeneratorDescription: `マークダウン形式のコンテンツからドキュメント(PDF、DOCX、TXT)を生成するコンポーネント。カスタムスタイル、画像、テーブルをサポート。サポート:**太字**、*斜体*、# 見出し、- リスト、| 構文のテーブル。`, + pDFGeneratorDescription: `マークダウン形式のコンテンツからドキュメント(PDF、DOCX、TXT)を生成するコンポーネント。カスタムスタイル、画像、テーブルをサポート。サポート:**太字**、*斜体*、# 見出し、- リスト、| 構文のテーブル。`, + subtitle: 'サブタイトル', + logoImage: 'ロゴ画像', + logoPosition: 'ロゴ位置', + logoWidth: 'ロゴ幅', + logoHeight: 'ロゴ高さ', + fontFamily: 'フォントファミリー', + fontSize: 'フォントサイズ', + titleFontSize: 'タイトルフォントサイズ', + pageSize: 'ページサイズ', + orientation: '向き', + marginTop: '上余白', + marginBottom: '下余白', + filename: 'ファイル名', + outputDirectory: '出力ディレクトリ', + addPageNumbers: 'ページ番号を追加', + addTimestamp: 'タイムスタンプを追加', + watermarkText: '透かしテキスト', messageHistoryWindowSizeTip: 'LLMに表示される会話履歴のウィンドウサイズ。大きいほど良いですが、LLMの最大トークン制限に注意してください。', wikipedia: 'Wikipedia', diff --git a/web/src/locales/pt-br.ts b/web/src/locales/pt-br.ts index 3095b5e42..8593cf268 100644 --- a/web/src/locales/pt-br.ts +++ b/web/src/locales/pt-br.ts @@ -737,11 +737,27 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'Um componente que realiza buscas via URL da instância SearXNG que você fornece. Especifique TopN e URL da instância.', - channel: 'Canal', - channelTip: `Realize uma busca por texto ou por notícias na entrada do componente`, - text: 'Texto', - news: 'Notícias', - messageHistoryWindowSize: 'Tamanho da janela de mensagens', + pdfGenerator: 'Gerador de Documentos', + pDFGenerator: 'Gerador de Documentos', + pdfGeneratorDescription: `Um componente que gera documentos (PDF, DOCX, TXT) de conteúdo formatado em markdown com estilo personalizável, imagens e tabelas. Suporta: **negrito**, *itálico*, # títulos, - listas, tabelas com sintaxe |.`, + pDFGeneratorDescription: `Um componente que gera documentos (PDF, DOCX, TXT) de conteúdo formatado em markdown com estilo personalizável, imagens e tabelas. Suporta: **negrito**, *itálico*, # títulos, - listas, tabelas com sintaxe |.`, + subtitle: 'Subtítulo', + logoImage: 'Imagem Logo', + logoPosition: 'Posição Logo', + logoWidth: 'Largura Logo', + logoHeight: 'Altura Logo', + fontFamily: 'Família Fonte', + fontSize: 'Tamanho Fonte', + titleFontSize: 'Tamanho Fonte Título', + pageSize: 'Tamanho Página', + orientation: 'Orientação', + marginTop: 'Margem Superior', + marginBottom: 'Margem Inferior', + filename: 'Nome Arquivo', + outputDirectory: 'Diretório Saída', + addPageNumbers: 'Adicionar Números Página', + addTimestamp: 'Adicionar Timestamp', + watermarkText: 'Texto Marca Dágua', messageHistoryWindowSizeTip: 'O tamanho da janela do histórico de conversa visível para o LLM. Quanto maior, melhor, mas fique atento ao limite máximo de tokens do LLM.', wikipedia: 'Wikipedia', diff --git a/web/src/locales/ru.ts b/web/src/locales/ru.ts index 6c6141abc..433a4ffab 100644 --- a/web/src/locales/ru.ts +++ b/web/src/locales/ru.ts @@ -1223,6 +1223,27 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'Компонент, который выполняет поиск через ваш предоставленный URL экземпляра SearXNG. Укажите TopN и URL экземпляра.', + pdfGenerator: 'Генератор документов', + pDFGenerator: 'Генератор документов', + pdfGeneratorDescription: `Компонент, который генерирует документы (PDF, DOCX, TXT) из содержимого в формате markdown с настраиваемым стилем, изображениями и таблицами. Поддерживает: **жирный**, *курсив*, # заголовки, - списки, таблицы с синтаксисом |.`, + pDFGeneratorDescription: `Компонент, который генерирует документы (PDF, DOCX, TXT) из содержимого в формате markdown с настраиваемым стилем, изображениями и таблицами. Поддерживает: **жирный**, *курсив*, # заголовки, - списки, таблицы с синтаксисом |.`, + subtitle: 'Подзаголовок', + logoImage: 'Изображение логотипа', + logoPosition: 'Позиция логотипа', + logoWidth: 'Ширина логотипа', + logoHeight: 'Высота логотипа', + fontFamily: 'Семейство шрифтов', + fontSize: 'Размер шрифта', + titleFontSize: 'Размер шрифта заголовка', + pageSize: 'Размер страницы', + orientation: 'Ориентация', + marginTop: 'Верхний отступ', + marginBottom: 'Нижний отступ', + filename: 'Имя файла', + outputDirectory: 'Выходной каталог', + addPageNumbers: 'Добавить номера страниц', + addTimestamp: 'Добавить временную метку', + watermarkText: 'Текст водяного знака', channel: 'Канал', channelTip: `Выполняет текстовый поиск или поиск новостей на входе компонента`, text: 'Текст', @@ -1604,7 +1625,6 @@ export default { datatype: 'MIME тип HTTP запроса', insertVariableTip: `Введите / Вставьте переменные`, historyversion: 'История версий', - filename: 'Имя файла', version: { created: 'Создано', details: 'Детали версии', diff --git a/web/src/locales/vi.ts b/web/src/locales/vi.ts index b240b4d69..a506babf9 100644 --- a/web/src/locales/vi.ts +++ b/web/src/locales/vi.ts @@ -821,15 +821,31 @@ export default { baiduDescription: `Thành phần này được sử dụng để lấy kết quả tìm kiếm từ www.baidu.com. Thông thường, nó hoạt động như một phần bổ sung cho các cơ sở kiến thức. Top N chỉ định số lượng kết quả tìm kiếm bạn cần điều chỉnh.`, duckDuckGo: 'DuckDuckGo', duckDuckGoDescription: - 'Một thành phần truy xuất kết quả tìm kiếm từ duckduckgo.com, với TopN xác định số lượng kết quả tìm kiếm. Nó bổ sung cho các cơ sở kiến thức hiện có.', + 'Một thành phần tìm kiếm trên duckduckgo.com, cho phép bạn chỉ định số lượng kết quả tìm kiếm sử dụng TopN. Nó bổ sung cho các cơ sở kiến thức hiện có.', searXNG: 'SearXNG', searXNGDescription: - 'Một thành phần truy xuất kết quả tìm kiếm từ searxng.com, với TopN xác định số lượng kết quả tìm kiếm. Nó bổ sung cho các cơ sở kiến thức hiện có.', - channel: 'Kênh', - channelTip: `Thực hiện tìm kiếm văn bản hoặc tìm kiếm tin tức trên đầu vào của thành phần`, - text: 'Văn bản', - news: 'Tin tức', - messageHistoryWindowSize: 'Cửa sổ lịch sử tin nhắn', + 'Một thành phần tìm kiếm thông qua URL phiên bản SearXNG bạn cung cấp. Chỉ định TopN và URL phiên bản.', + pdfGenerator: 'Trình tạo Tài liệu', + pDFGenerator: 'Trình tạo Tài liệu', + pdfGeneratorDescription: `Một thành phần tạo tài liệu (PDF, DOCX, TXT) từ nội dung định dạng markdown với kiểu tùy chỉnh, hình ảnh và bảng. Hỗ trợ: **in đậm**, *in nghiêng*, # tiêu đề, - danh sách, bảng với cú pháp |.`, + pDFGeneratorDescription: `Một thành phần tạo tài liệu (PDF, DOCX, TXT) từ nội dung định dạng markdown với kiểu tùy chỉnh, hình ảnh và bảng. Hỗ trợ: **in đậm**, *in nghiêng*, # tiêu đề, - danh sách, bảng với cú pháp |.`, + subtitle: 'Phụ đề', + logoImage: 'Hình ảnh Logo', + logoPosition: 'Vị trí Logo', + logoWidth: 'Chiều rộng Logo', + logoHeight: 'Chiều cao Logo', + fontFamily: 'Họ phông chữ', + fontSize: 'Kích thước phông chữ', + titleFontSize: 'Kích thước phông chữ tiêu đề', + pageSize: 'Kích thước trang', + orientation: 'Hướng', + marginTop: 'Lề trên', + marginBottom: 'Lề dưới', + filename: 'Tên tệp', + outputDirectory: 'Thư mục đầu ra', + addPageNumbers: 'Thêm số trang', + addTimestamp: 'Thêm dấu thời gian', + watermarkText: 'Văn bản watermark', messageHistoryWindowSizeTip: 'Kích thước cửa sổ lịch sử cuộc trò chuyện hiển thị với LLM. Càng lớn càng tốt, nhưng hãy chú ý đến giới hạn tối đa số token của LLM.', wikipedia: 'Wikipedia', diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index c8657dc9b..67207c546 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -849,15 +849,31 @@ export default { baiduDescription: `此組件用於取得www.baidu.com的搜尋結果,一般作為知識庫的補充,Top N指定需要採納的搜尋結果數。`, duckDuckGo: 'DuckDuckGo', duckDuckGoDescription: - '此元件用於從 www.duckduckgo.com 取得搜尋結果。通常,它作為知識庫的補充。 Top N 指定您需要採用的搜尋結果數。', + '此組件用於從 www.duckduckgo.com 取得搜尋結果,通常充當知識庫的補充。Top N 指定搜尋結果的數量。', searXNG: 'SearXNG', searXNGDescription: - '該組件通過您提供的 SearXNG 實例地址進行搜索。請設置 Top N 和實例 URL。', - channel: '頻道', - channelTip: '針對該組件的輸入進行文字搜尋或新聞搜索', - text: '文字', - news: '新聞', - messageHistoryWindowSize: '歷史訊息視窗大小', + '此組件透過您提供的 SearXNG 實例 URL 進行搜尋。請設定 Top N 和實例 URL。', + pdfGenerator: '文檔生成器', + pPDFGenerator: '文檔生成器', + pdfGeneratorDescription: `該組件從 markdown 格式的內容生成文檔(PDF、DOCX、TXT),支援自定義樣式、圖片和表格。支援:**粗體**、*斜體*、# 標題、- 列表、使用 | 語法的表格。`, + pPDFGeneratorDescription: `該組件從 markdown 格式的內容生成文檔(PDF、DOCX、TXT),支援自定義樣式、圖片和表格。支援:**粗體**、*斜體*、# 標題、- 列表、使用 | 語法的表格。`, + subtitle: '副標題', + logoImage: '標誌圖片', + logoPosition: '標誌位置', + logoWidth: '標誌寬度', + logoHeight: '標誌高度', + fontFamily: '字體系列', + fontSize: '字體大小', + titleFontSize: '標題字體大小', + pageSize: '頁面大小', + orientation: '方向', + marginTop: '上邊距', + marginBottom: '下邊距', + filename: '檔名', + outputDirectory: '輸出目錄', + addPageNumbers: '添加頁碼', + addTimestamp: '添加時間戳', + watermarkText: '浮水印文字', messageHistoryWindowSizeTip: 'LLM 需要查看的對話歷史視窗大小。越大越好,但要注意 LLM 的最大 Token 數。', wikipedia: '維基百科', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index dba89ea92..50b2faae3 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -1187,6 +1187,27 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 searXNG: 'SearXNG', searXNGDescription: '该组件通过您提供的 SearXNG 实例地址进行搜索。请设置 Top N 和实例 URL。', + pdfGenerator: '文档生成器', + pDFGenerator: '文档生成器', + pdfGeneratorDescription: `该组件从 markdown 格式的内容生成文档(PDF、DOCX、TXT),支持自定义样式、图片和表格。支持:**粗体**、*斜体*、# 标题、- 列表、使用 | 语法的表格。`, + pDFGeneratorDescription: `该组件从 markdown 格式的内容生成文档(PDF、DOCX、TXT),支持自定义样式、图片和表格。支持:**粗体**、*斜体*、# 标题、- 列表、使用 | 语法的表格。`, + subtitle: '副标题', + logoImage: '标志图片', + logoPosition: '标志位置', + logoWidth: '标志宽度', + logoHeight: '标志高度', + fontFamily: '字体系列', + fontSize: '字体大小', + titleFontSize: '标题字体大小', + pageSize: '页面大小', + orientation: '方向', + marginTop: '上边距', + marginBottom: '下边距', + filename: '文件名', + outputDirectory: '输出目录', + addPageNumbers: '添加页码', + addTimestamp: '添加时间戳', + watermarkText: '水印文本', channel: '频道', channelTip: '针对该组件的输入进行文本搜索或新闻搜索', text: '文本', diff --git a/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx b/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx index 5aa5c2873..e48d8ee5e 100644 --- a/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx +++ b/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx @@ -122,6 +122,7 @@ export function AccordionOperators({ Operator.Invoke, Operator.WenCai, Operator.SearXNG, + Operator.PDFGenerator, ]} isCustomDropdown={isCustomDropdown} mousePosition={mousePosition} diff --git a/web/src/pages/agent/constant/index.tsx b/web/src/pages/agent/constant/index.tsx index 5c25b7fe0..f1c4bab1b 100644 --- a/web/src/pages/agent/constant/index.tsx +++ b/web/src/pages/agent/constant/index.tsx @@ -932,6 +932,71 @@ export enum AgentVariableType { Conversation = 'conversation', } +// PDF Generator enums +export enum PDFGeneratorFontFamily { + Helvetica = 'Helvetica', + TimesRoman = 'Times-Roman', + Courier = 'Courier', + HelveticaBold = 'Helvetica-Bold', + TimesBold = 'Times-Bold', +} + +export enum PDFGeneratorLogoPosition { + Left = 'left', + Center = 'center', + Right = 'right', +} + +export enum PDFGeneratorPageSize { + A4 = 'A4', + Letter = 'Letter', +} + +export enum PDFGeneratorOrientation { + Portrait = 'portrait', + Landscape = 'landscape', +} + +export const initialPDFGeneratorValues = { + output_format: 'pdf', + content: '', + title: '', + subtitle: '', + header_text: '', + footer_text: '', + logo_image: '', + logo_position: PDFGeneratorLogoPosition.Left, + logo_width: 2.0, + logo_height: 1.0, + font_family: PDFGeneratorFontFamily.Helvetica, + font_size: 12, + title_font_size: 24, + heading1_font_size: 18, + heading2_font_size: 16, + heading3_font_size: 14, + text_color: '#000000', + title_color: '#000000', + page_size: PDFGeneratorPageSize.A4, + orientation: PDFGeneratorOrientation.Portrait, + margin_top: 1.0, + margin_bottom: 1.0, + margin_left: 1.0, + margin_right: 1.0, + line_spacing: 1.2, + filename: '', + output_directory: '/tmp/pdf_outputs', + add_page_numbers: true, + add_timestamp: true, + watermark_text: '', + enable_toc: false, + outputs: { + file_path: { type: 'string', value: '' }, + pdf_base64: { type: 'string', value: '' }, + download: { type: 'string', value: '' }, + success: { type: 'boolean', value: false }, + }, +}; + export enum WebhookMethod { Post = 'POST', Get = 'GET', diff --git a/web/src/pages/agent/form-sheet/form-config-map.tsx b/web/src/pages/agent/form-sheet/form-config-map.tsx index 9bda1f674..a552412f1 100644 --- a/web/src/pages/agent/form-sheet/form-config-map.tsx +++ b/web/src/pages/agent/form-sheet/form-config-map.tsx @@ -22,6 +22,7 @@ import ListOperationsForm from '../form/list-operations-form'; import LoopForm from '../form/loop-form'; import MessageForm from '../form/message-form'; import ParserForm from '../form/parser-form'; +import PDFGeneratorForm from '../form/pdf-generator-form'; import PubMedForm from '../form/pubmed-form'; import RetrievalForm from '../form/retrieval-form/next'; import RewriteQuestionForm from '../form/rewrite-question-form'; @@ -110,6 +111,9 @@ export const FormConfigMap = { [Operator.SearXNG]: { component: SearXNGForm, }, + [Operator.PDFGenerator]: { + component: PDFGeneratorForm, + }, [Operator.Note]: { component: () => <>, }, diff --git a/web/src/pages/agent/form/pdf-generator-form/index.tsx b/web/src/pages/agent/form/pdf-generator-form/index.tsx new file mode 100644 index 000000000..110bb6369 --- /dev/null +++ b/web/src/pages/agent/form/pdf-generator-form/index.tsx @@ -0,0 +1,535 @@ +import { FormContainer } from '@/components/form-container'; +import { + Form, + FormControl, + FormDescription, + FormField, + FormItem, + FormLabel, + FormMessage, +} from '@/components/ui/form'; +import { Input } from '@/components/ui/input'; +import { RAGFlowSelect } from '@/components/ui/select'; +import { Switch } from '@/components/ui/switch'; +import { zodResolver } from '@hookform/resolvers/zod'; +import { t } from 'i18next'; +import { memo, useMemo } from 'react'; +import { useForm } from 'react-hook-form'; +import { z } from 'zod'; +import { + PDFGeneratorFontFamily, + PDFGeneratorLogoPosition, + PDFGeneratorOrientation, + PDFGeneratorPageSize, +} from '../../constant'; +import { INextOperatorForm } from '../../interface'; +import { FormWrapper } from '../components/form-wrapper'; +import { Output, transferOutputs } from '../components/output'; +import { PromptEditor } from '../components/prompt-editor'; +import { useValues } from './use-values'; +import { useWatchFormChange } from './use-watch-form-change'; + +function PDFGeneratorForm({ node }: INextOperatorForm) { + const values = useValues(node); + + const FormSchema = z.object({ + output_format: z.string().default('pdf'), + content: z.string().min(1, 'Content is required'), + title: z.string().optional(), + subtitle: z.string().optional(), + header_text: z.string().optional(), + footer_text: z.string().optional(), + logo_image: z.string().optional(), + logo_position: z.string(), + logo_width: z.number(), + logo_height: z.number(), + font_family: z.string(), + font_size: z.number(), + title_font_size: z.number(), + heading1_font_size: z.number(), + heading2_font_size: z.number(), + heading3_font_size: z.number(), + text_color: z.string(), + title_color: z.string(), + page_size: z.string(), + orientation: z.string(), + margin_top: z.number(), + margin_bottom: z.number(), + margin_left: z.number(), + margin_right: z.number(), + line_spacing: z.number(), + filename: z.string().optional(), + output_directory: z.string(), + add_page_numbers: z.boolean(), + add_timestamp: z.boolean(), + watermark_text: z.string().optional(), + enable_toc: z.boolean(), + outputs: z + .object({ + file_path: z.object({ type: z.string() }), + pdf_base64: z.object({ type: z.string() }), + success: z.object({ type: z.string() }), + }) + .optional(), + }); + + const form = useForm>({ + defaultValues: values, + resolver: zodResolver(FormSchema), + }); + + const outputList = useMemo(() => { + return transferOutputs(values.outputs); + }, [values.outputs]); + + useWatchFormChange(node?.id, form); + + return ( +
+ + + {/* Output Format Selection */} + ( + + Output Format + + + + + Choose the output document format + + + + )} + /> + + {/* Content Section */} + ( + + {t('flow.content')} + + + + +
+
+ Markdown support: **bold**, *italic*, + `code`, # Heading 1, ## Heading 2 +
+
+ Lists: - bullet or 1. numbered +
+
+ Tables: | Column 1 | Column 2 | (use | to + separate columns, <br> or \n for line breaks in + cells) +
+
+ Other: --- for horizontal line, ``` for + code blocks +
+
+
+ +
+ )} + /> + + {/* Title & Subtitle */} + ( + + {t('flow.title')} + + + + + + )} + /> + + ( + + {t('flow.subtitle')} + + + + + + )} + /> + + {/* Logo Settings */} + ( + + {t('flow.logoImage')} + +
+ { + const file = e.target.files?.[0]; + if (file) { + const reader = new FileReader(); + reader.onloadend = () => { + field.onChange(reader.result as string); + }; + reader.readAsDataURL(file); + } + }} + className="cursor-pointer" + /> + +
+
+ + Upload an image file or paste a file path/URL/base64 + + +
+ )} + /> + + ( + + {t('flow.logoPosition')} + + ({ label: val, value: val }), + )} + > + + + + )} + /> + +
+ ( + + {t('flow.logoWidth')} (inches) + + + field.onChange(parseFloat(e.target.value)) + } + /> + + + + )} + /> + + ( + + {t('flow.logoHeight')} (inches) + + + field.onChange(parseFloat(e.target.value)) + } + /> + + + + )} + /> +
+ + {/* Font Settings */} + ( + + {t('flow.fontFamily')} + + ({ label: val, value: val }), + )} + > + + + + )} + /> + +
+ ( + + {t('flow.fontSize')} + + field.onChange(parseInt(e.target.value))} + /> + + + + )} + /> + + ( + + {t('flow.titleFontSize')} + + field.onChange(parseInt(e.target.value))} + /> + + + + )} + /> +
+ + {/* Page Settings */} + ( + + {t('flow.pageSize')} + + ({ + label: val, + value: val, + }))} + > + + + + )} + /> + + ( + + {t('flow.orientation')} + + ({ label: val, value: val }), + )} + > + + + + )} + /> + + {/* Margins */} +
+ ( + + {t('flow.marginTop')} (inches) + + + field.onChange(parseFloat(e.target.value)) + } + /> + + + + )} + /> + + ( + + {t('flow.marginBottom')} (inches) + + + field.onChange(parseFloat(e.target.value)) + } + /> + + + + )} + /> +
+ + {/* Output Settings */} + ( + + {t('flow.filename')} + + + + + + )} + /> + + ( + + {t('flow.outputDirectory')} + + + + + + )} + /> + + {/* Additional Options */} + ( + +
+ {t('flow.addPageNumbers')} + + Add page numbers to the document + +
+ + + +
+ )} + /> + + ( + +
+ {t('flow.addTimestamp')} + + Add generation timestamp to the document + +
+ + + +
+ )} + /> + + ( + + {t('flow.watermarkText')} + + + + + + )} + /> + +
} + /> +
+
+
+ +
+
+ ); +} + +export default memo(PDFGeneratorForm); diff --git a/web/src/pages/agent/form/pdf-generator-form/use-values.ts b/web/src/pages/agent/form/pdf-generator-form/use-values.ts new file mode 100644 index 000000000..1ecd82908 --- /dev/null +++ b/web/src/pages/agent/form/pdf-generator-form/use-values.ts @@ -0,0 +1,11 @@ +import { useMemo } from 'react'; +import { Node } from 'reactflow'; +import { initialPDFGeneratorValues } from '../../constant'; + +export const useValues = (node?: Node) => { + const values = useMemo(() => { + return node?.data.form ?? initialPDFGeneratorValues; + }, [node?.data.form]); + + return values; +}; diff --git a/web/src/pages/agent/form/pdf-generator-form/use-watch-form-change.ts b/web/src/pages/agent/form/pdf-generator-form/use-watch-form-change.ts new file mode 100644 index 000000000..f8f4de3db --- /dev/null +++ b/web/src/pages/agent/form/pdf-generator-form/use-watch-form-change.ts @@ -0,0 +1,19 @@ +import { useEffect } from 'react'; +import { UseFormReturn } from 'react-hook-form'; +import useGraphStore from '../../store'; + +export const useWatchFormChange = ( + nodeId: string | undefined, + form: UseFormReturn, +) => { + const updateNodeForm = useGraphStore((state) => state.updateNodeForm); + + useEffect(() => { + const { unsubscribe } = form.watch((value) => { + if (nodeId) { + updateNodeForm(nodeId, value); + } + }); + return () => unsubscribe(); + }, [form, nodeId, updateNodeForm]); +}; diff --git a/web/src/pages/agent/operator-icon.tsx b/web/src/pages/agent/operator-icon.tsx index b390507a4..c1a799971 100644 --- a/web/src/pages/agent/operator-icon.tsx +++ b/web/src/pages/agent/operator-icon.tsx @@ -16,6 +16,7 @@ import { IconFontFill } from '@/components/icon-font'; import { cn } from '@/lib/utils'; import { FileCode, + FileText, HousePlus, Infinity as InfinityIcon, LogOut, @@ -67,6 +68,7 @@ export const LucideIconMap = { [Operator.DataOperations]: FileCode, [Operator.Loop]: InfinityIcon, [Operator.ExitLoop]: LogOut, + [Operator.PDFGenerator]: FileText, }; const Empty = () => { diff --git a/web/src/pages/agents/hooks/use-selelct-filters.ts b/web/src/pages/agents/hooks/use-selelct-filters.ts index e1ea755ee..aa4f4f4dd 100644 --- a/web/src/pages/agents/hooks/use-selelct-filters.ts +++ b/web/src/pages/agents/hooks/use-selelct-filters.ts @@ -7,11 +7,15 @@ export function useSelectFilters() { const { data } = useFetchAgentList({}); const canvasCategory = useMemo(() => { - return groupListByType(data.canvas, 'canvas_category', 'canvas_category'); - }, [data.canvas]); + return groupListByType( + data?.canvas ?? [], + 'canvas_category', + 'canvas_category', + ); + }, [data?.canvas]); const filters: FilterCollection[] = [ - buildOwnersFilter(data.canvas), + buildOwnersFilter(data?.canvas ?? []), { field: 'canvasCategory', list: canvasCategory,