{ "id": 24, "title": { "en": "Chunk Summary", "zh": "总结切片" }, "description": { "en": "This template uses an LLM to generate chunk summaries for building text and vector indexes. During retrieval, summaries enhance matching, and the original chunks are returned as results.", "zh": "此模板利用大模型生成切片摘要,并据此建立全文索引与向量。检索时以摘要提升匹配效果,最终召回对应的原文切片。" }, "canvas_type": "Ingestion Pipeline", "canvas_category": "dataflow_canvas", "dsl": { "components": { "File": { "obj": { "component_name": "File", "params": {} }, "downstream": [ "Parser:HipSignsRhyme" ], "upstream": [] }, "Parser:HipSignsRhyme": { "obj": { "component_name": "Parser", "params": { "outputs": { "html": { "type": "string", "value": "" }, "json": { "type": "Array", "value": [] }, "markdown": { "type": "string", "value": "" }, "text": { "type": "string", "value": "" } }, "setups": { "pdf": { "output_format": "json", "suffix": [ "pdf" ], "parse_method": "DeepDOC" }, "spreadsheet": { "output_format": "html", "suffix": [ "xls", "xlsx", "csv" ] }, "image": { "output_format": "text", "suffix": [ "jpg", "jpeg", "png", "gif" ], "parse_method": "ocr" }, "email": { "output_format": "text", "suffix": [ "eml", "msg" ], "fields": [ "from", "to", "cc", "bcc", "date", "subject", "body", "attachments" ] }, "text&markdown": { "output_format": "text", "suffix": [ "md", "markdown", "mdx", "txt" ] }, "word": { "output_format": "json", "suffix": [ "doc", "docx" ] }, "slides": { "output_format": "json", "suffix": [ "pptx" ] } } } }, "downstream": [ "Splitter:LateExpertsFeel" ], "upstream": [ "File" ] }, "Splitter:LateExpertsFeel": { "obj": { "component_name": "Splitter", "params": { "chunk_token_size": 512, "delimiters": [ "\n" ], "outputs": { "chunks": { "type": "Array", "value": [] } }, "overlapped_percent": 0 } }, "downstream": [ "Extractor:YummyGhostsType" ], "upstream": [ "Parser:HipSignsRhyme" ] }, "Tokenizer:EightRocketsAppear": { "obj": { "component_name": "Tokenizer", "params": { "fields": "summary", "filename_embd_weight": 0.1, "outputs": {}, "search_method": [ "embedding", "full_text" ] } }, "downstream": [], "upstream": [ "Extractor:YummyGhostsType" ] }, "Extractor:YummyGhostsType": { "obj": { "component_name": "Extractor", "params": { "field_name": "summary", "frequencyPenaltyEnabled": false, "frequency_penalty": 0.7, "llm_id": "deepseek-chat@DeepSeek", "maxTokensEnabled": false, "max_tokens": 256, "outputs": { "chunks": { "type": "Array", "value": [] } }, "presencePenaltyEnabled": false, "presence_penalty": 0.4, "prompts": [ { "content": "Text to Summarize:\n\n\n{Splitter:LateExpertsFeel@chunks}", "role": "user" } ], "sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.", "temperature": 0.1, "temperatureEnabled": false, "topPEnabled": false, "top_p": 0.3 } }, "downstream": [ "Tokenizer:EightRocketsAppear" ], "upstream": [ "Splitter:LateExpertsFeel" ] } }, "globals": {}, "graph": { "nodes": [ { "data": { "label": "File", "name": "File" }, "id": "File", "measured": { "height": 48, "width": 200 }, "position": { "x": 50, "y": 200 }, "sourcePosition": "left", "targetPosition": "right", "type": "beginNode" }, { "data": { "form": { "outputs": { "html": { "type": "string", "value": "" }, "json": { "type": "Array", "value": [] }, "markdown": { "type": "string", "value": "" }, "text": { "type": "string", "value": "" } }, "setups": [ { "fileFormat": "pdf", "output_format": "json", "parse_method": "DeepDOC" }, { "fileFormat": "spreadsheet", "output_format": "html" }, { "fileFormat": "image", "output_format": "text", "parse_method": "ocr" }, { "fields": [ "from", "to", "cc", "bcc", "date", "subject", "body", "attachments" ], "fileFormat": "email", "output_format": "text" }, { "fileFormat": "text&markdown", "output_format": "text" }, { "fileFormat": "word", "output_format": "json" }, { "fileFormat": "slides", "output_format": "json" } ] }, "label": "Parser", "name": "Parser" }, "dragging": false, "id": "Parser:HipSignsRhyme", "measured": { "height": 412, "width": 200 }, "position": { "x": 316.99524094206413, "y": 195.39629819663406 }, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "parserNode" }, { "data": { "form": { "chunk_token_size": 512, "delimiters": [ { "value": "\n" } ], "outputs": { "chunks": { "type": "Array", "value": [] } }, "overlapped_percent": 0 }, "label": "Splitter", "name": "Token Splitter" }, "dragging": false, "id": "Splitter:LateExpertsFeel", "measured": { "height": 80, "width": 200 }, "position": { "x": 600.5891036507014, "y": 197.6804920892271 }, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "splitterNode" }, { "data": { "form": { "fields": "summary", "filename_embd_weight": 0.1, "outputs": {}, "search_method": [ "embedding", "full_text" ] }, "label": "Tokenizer", "name": "Tokenizer" }, "dragging": false, "id": "Tokenizer:EightRocketsAppear", "measured": { "height": 120, "width": 200 }, "position": { "x": 1136.0745258879847, "y": 202.22674640530906 }, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "tokenizerNode" }, { "data": { "form": { "field_name": "summary", "frequencyPenaltyEnabled": false, "frequency_penalty": 0.7, "llm_id": "deepseek-chat@DeepSeek", "maxTokensEnabled": false, "max_tokens": 256, "outputs": { "chunks": { "type": "Array", "value": [] } }, "presencePenaltyEnabled": false, "presence_penalty": 0.4, "prompts": "Text to Summarize:\n\n\n{Splitter:LateExpertsFeel@chunks}", "sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.", "temperature": 0.1, "temperatureEnabled": false, "topPEnabled": false, "top_p": 0.3 }, "label": "Extractor", "name": "Transformer" }, "dragging": false, "id": "Extractor:YummyGhostsType", "measured": { "height": 84, "width": 200 }, "position": { "x": 870.1728208672672, "y": 201.4516837225608 }, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "contextNode" }, { "id": "Note:MightyPandasWatch", "type": "noteNode", "position": { "x": 1128.1996486833773, "y": 342.4601052720091 }, "data": { "label": "Note", "name": "Index summary", "form": { "text": "Using summary to build both text and vector indexes." } }, "sourcePosition": "right", "targetPosition": "left", "dragHandle": ".note-drag-handle", "measured": { "width": 249, "height": 128 }, "selected": false, "dragging": false } ], "edges": [ { "data": { "isHovered": false }, "id": "xy-edge__Filestart-Parser:HipSignsRhymeend", "source": "File", "sourceHandle": "start", "target": "Parser:HipSignsRhyme", "targetHandle": "end" }, { "data": { "isHovered": false }, "id": "xy-edge__Parser:HipSignsRhymestart-Splitter:LateExpertsFeelend", "source": "Parser:HipSignsRhyme", "sourceHandle": "start", "target": "Splitter:LateExpertsFeel", "targetHandle": "end" }, { "data": { "isHovered": false }, "id": "xy-edge__Splitter:LateExpertsFeelstart-Extractor:YummyGhostsTypeend", "source": "Splitter:LateExpertsFeel", "sourceHandle": "start", "target": "Extractor:YummyGhostsType", "targetHandle": "end" }, { "data": { "isHovered": false }, "id": "xy-edge__Extractor:YummyGhostsTypestart-Tokenizer:EightRocketsAppearend", "markerEnd": "logo", "source": "Extractor:YummyGhostsType", "sourceHandle": "start", "style": { "stroke": "rgba(91, 93, 106, 1)", "strokeWidth": 1 }, "target": "Tokenizer:EightRocketsAppear", "targetHandle": "end", "type": "buttonEdge", "zIndex": 1001 } ] }, "history": [], "messages": [], "path": [], "retrieval": [] }, "avatar": "" }