Feat: extract message output to file (#11251)

### What problem does this PR solve? Feat: extract message output to file ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-01-31 23:55:06 +08:00 · 2025-11-14 19:52:11 +08:00
parent cd55f6c1b8
commit 68e3b33ae4
6 changed files with 104 additions and 2 deletions
--- a/agent/canvas.py
+++ b/agent/canvas.py
@ -408,6 +408,10 @@ class Canvas(Graph):
                    else:
                        yield decorate("message", {"content": cpn_obj.output("content")})
                        cite = re.search(r"\[ID:[ 0-9]+\]",  cpn_obj.output("content"))
+
+                    if isinstance(cpn_obj.output("attachment"), tuple):
+                        yield decorate("message", {"attachment": cpn_obj.output("attachment")})
+                        
                    yield decorate("message_end", {"reference": self.get_reference() if cite else None})

                    while partials:
--- a/agent/component/message.py
+++ b/agent/component/message.py
@ -17,6 +17,9 @@ import json
 import os
 import random
 import re
+import pypandoc
+import logging
+import tempfile
 from functools import partial
 from typing import Any

@ -24,7 +27,8 @@ from agent.component.base import ComponentBase, ComponentParamBase
 from jinja2 import Template as Jinja2Template

 from common.connection_utils import timeout
-
+from common.misc_utils import get_uuid
+from common import settings

 class MessageParam(ComponentParamBase):
    """
@ -34,6 +38,7 @@ class MessageParam(ComponentParamBase):
        super().__init__()
        self.content = []
        self.stream = True
+        self.output_format = None  # default output format
        self.outputs = {
            "content": {
                "type": "str"
@ -133,6 +138,7 @@ class Message(ComponentBase):
            yield rand_cnt[s: ]

        self.set_output("content", all_content)
+        self._convert_content(all_content)

    def _is_jinjia2(self, content:str) -> bool:
        patt = [
@ -164,6 +170,68 @@ class Message(ComponentBase):
            content = re.sub(n, v, content)

        self.set_output("content", content)
+        self._convert_content(content)

    def thoughts(self) -> str:
        return ""
+
+    def _convert_content(self, content):
+        doc_id = get_uuid()
+        
+        if self._param.output_format.lower() not in {"markdown", "html", "pdf", "docx"}:
+            self._param.output_format = "markdown"
+
+        try:
+            if self._param.output_format in {"markdown", "html"}:
+                if isinstance(content, str):
+                    converted = pypandoc.convert_text(
+                        content,
+                        to=self._param.output_format,
+                        format="markdown",
+                    )
+                else:
+                    converted = pypandoc.convert_file(
+                        content,
+                        to=self._param.output_format,
+                        format="markdown",
+                    )
+
+                binary_content = converted.encode("utf-8")
+
+            else:  # pdf, docx
+                with tempfile.NamedTemporaryFile(suffix=f".{self._param.output_format}", delete=False) as tmp:
+                    tmp_name = tmp.name
+
+                try:
+                    if isinstance(content, str):
+                        pypandoc.convert_text(
+                            content,
+                            to=self._param.output_format,
+                            format="markdown",
+                            outputfile=tmp_name,
+                        )
+                    else:
+                        pypandoc.convert_file(
+                            content,
+                            to=self._param.output_format,
+                            format="markdown",
+                            outputfile=tmp_name,
+                        )
+
+                    with open(tmp_name, "rb") as f:
+                        binary_content = f.read()
+
+                finally:
+                    if os.path.exists(tmp_name):
+                        os.remove(tmp_name)
+
+            settings.STORAGE_IMPL.put(self._canvas._tenant_id, doc_id, binary_content)
+            self.set_output("attachment", {
+                "doc_id":doc_id, 
+                "format":self._param.output_format, 
+                "file_name":f"{doc_id[:8]}.{self._param.output_format}"})
+
+            logging.info(f"Converted content uploaded as {doc_id} (format={self._param.output_format})")
+
+        except Exception as e:
+            logging.error(f"Error converting content to {self._param.output_format}: {e}")