Files
ragflow/agent/templates/chunk_summary.json
LeonTung 67529825e2 Feat: Contribute ingestion pipeline templates (#10551)
### Type of change

- [x] Other (please describe): contribute agent templates
2025-10-14 21:29:42 +08:00

493 lines
25 KiB
JSON

{
"id": 24,
"title": {
"en": "Chunk Summary",
"zh": "总结切片"
},
"description": {
"en": "This template uses an LLM to generate chunk summaries for building text and vector indexes. During retrieval, summaries enhance matching, and the original chunks are returned as results.",
"zh": "此模板利用大模型生成切片摘要,并据此建立全文索引与向量。检索时以摘要提升匹配效果,最终召回对应的原文切片。"
},
"canvas_type": "Ingestion Pipeline",
"canvas_category": "dataflow_canvas",
"dsl": {
"components": {
"File": {
"obj": {
"component_name": "File",
"params": {}
},
"downstream": [
"Parser:HipSignsRhyme"
],
"upstream": []
},
"Parser:HipSignsRhyme": {
"obj": {
"component_name": "Parser",
"params": {
"outputs": {
"html": {
"type": "string",
"value": ""
},
"json": {
"type": "Array<object>",
"value": []
},
"markdown": {
"type": "string",
"value": ""
},
"text": {
"type": "string",
"value": ""
}
},
"setups": {
"pdf": {
"output_format": "json",
"suffix": [
"pdf"
],
"parse_method": "DeepDOC"
},
"spreadsheet": {
"output_format": "html",
"suffix": [
"xls",
"xlsx",
"csv"
]
},
"image": {
"output_format": "text",
"suffix": [
"jpg",
"jpeg",
"png",
"gif"
],
"parse_method": "ocr"
},
"email": {
"output_format": "text",
"suffix": [
"eml",
"msg"
],
"fields": [
"from",
"to",
"cc",
"bcc",
"date",
"subject",
"body",
"attachments"
]
},
"text&markdown": {
"output_format": "text",
"suffix": [
"md",
"markdown",
"mdx",
"txt"
]
},
"word": {
"output_format": "json",
"suffix": [
"doc",
"docx"
]
},
"slides": {
"output_format": "json",
"suffix": [
"pptx"
]
}
}
}
},
"downstream": [
"Splitter:LateExpertsFeel"
],
"upstream": [
"File"
]
},
"Splitter:LateExpertsFeel": {
"obj": {
"component_name": "Splitter",
"params": {
"chunk_token_size": 512,
"delimiters": [
"\n"
],
"outputs": {
"chunks": {
"type": "Array<Object>",
"value": []
}
},
"overlapped_percent": 0
}
},
"downstream": [
"Extractor:YummyGhostsType"
],
"upstream": [
"Parser:HipSignsRhyme"
]
},
"Tokenizer:EightRocketsAppear": {
"obj": {
"component_name": "Tokenizer",
"params": {
"fields": "summary",
"filename_embd_weight": 0.1,
"outputs": {},
"search_method": [
"embedding",
"full_text"
]
}
},
"downstream": [],
"upstream": [
"Extractor:YummyGhostsType"
]
},
"Extractor:YummyGhostsType": {
"obj": {
"component_name": "Extractor",
"params": {
"field_name": "summary",
"frequencyPenaltyEnabled": false,
"frequency_penalty": 0.7,
"llm_id": "deepseek-chat@DeepSeek",
"maxTokensEnabled": false,
"max_tokens": 256,
"outputs": {
"chunks": {
"type": "Array<Object>",
"value": []
}
},
"presencePenaltyEnabled": false,
"presence_penalty": 0.4,
"prompts": [
{
"content": "Text to Summarize:\n\n\n{Splitter:LateExpertsFeel@chunks}",
"role": "user"
}
],
"sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.",
"temperature": 0.1,
"temperatureEnabled": false,
"topPEnabled": false,
"top_p": 0.3
}
},
"downstream": [
"Tokenizer:EightRocketsAppear"
],
"upstream": [
"Splitter:LateExpertsFeel"
]
}
},
"globals": {},
"graph": {
"nodes": [
{
"data": {
"label": "File",
"name": "File"
},
"id": "File",
"measured": {
"height": 48,
"width": 200
},
"position": {
"x": 50,
"y": 200
},
"sourcePosition": "left",
"targetPosition": "right",
"type": "beginNode"
},
{
"data": {
"form": {
"outputs": {
"html": {
"type": "string",
"value": ""
},
"json": {
"type": "Array<object>",
"value": []
},
"markdown": {
"type": "string",
"value": ""
},
"text": {
"type": "string",
"value": ""
}
},
"setups": [
{
"fileFormat": "pdf",
"output_format": "json",
"parse_method": "DeepDOC"
},
{
"fileFormat": "spreadsheet",
"output_format": "html"
},
{
"fileFormat": "image",
"output_format": "text",
"parse_method": "ocr"
},
{
"fields": [
"from",
"to",
"cc",
"bcc",
"date",
"subject",
"body",
"attachments"
],
"fileFormat": "email",
"output_format": "text"
},
{
"fileFormat": "text&markdown",
"output_format": "text"
},
{
"fileFormat": "word",
"output_format": "json"
},
{
"fileFormat": "slides",
"output_format": "json"
}
]
},
"label": "Parser",
"name": "Parser"
},
"dragging": false,
"id": "Parser:HipSignsRhyme",
"measured": {
"height": 412,
"width": 200
},
"position": {
"x": 316.99524094206413,
"y": 195.39629819663406
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "parserNode"
},
{
"data": {
"form": {
"chunk_token_size": 512,
"delimiters": [
{
"value": "\n"
}
],
"outputs": {
"chunks": {
"type": "Array<Object>",
"value": []
}
},
"overlapped_percent": 0
},
"label": "Splitter",
"name": "Token Splitter"
},
"dragging": false,
"id": "Splitter:LateExpertsFeel",
"measured": {
"height": 80,
"width": 200
},
"position": {
"x": 600.5891036507014,
"y": 197.6804920892271
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "splitterNode"
},
{
"data": {
"form": {
"fields": "summary",
"filename_embd_weight": 0.1,
"outputs": {},
"search_method": [
"embedding",
"full_text"
]
},
"label": "Tokenizer",
"name": "Tokenizer"
},
"dragging": false,
"id": "Tokenizer:EightRocketsAppear",
"measured": {
"height": 120,
"width": 200
},
"position": {
"x": 1136.0745258879847,
"y": 202.22674640530906
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "tokenizerNode"
},
{
"data": {
"form": {
"field_name": "summary",
"frequencyPenaltyEnabled": false,
"frequency_penalty": 0.7,
"llm_id": "deepseek-chat@DeepSeek",
"maxTokensEnabled": false,
"max_tokens": 256,
"outputs": {
"chunks": {
"type": "Array<Object>",
"value": []
}
},
"presencePenaltyEnabled": false,
"presence_penalty": 0.4,
"prompts": "Text to Summarize:\n\n\n{Splitter:LateExpertsFeel@chunks}",
"sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.",
"temperature": 0.1,
"temperatureEnabled": false,
"topPEnabled": false,
"top_p": 0.3
},
"label": "Extractor",
"name": "Transformer"
},
"dragging": false,
"id": "Extractor:YummyGhostsType",
"measured": {
"height": 84,
"width": 200
},
"position": {
"x": 870.1728208672672,
"y": 201.4516837225608
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "contextNode"
},
{
"id": "Note:MightyPandasWatch",
"type": "noteNode",
"position": {
"x": 1128.1996486833773,
"y": 342.4601052720091
},
"data": {
"label": "Note",
"name": "Index summary",
"form": {
"text": "Using summary to build both text and vector indexes."
}
},
"sourcePosition": "right",
"targetPosition": "left",
"dragHandle": ".note-drag-handle",
"measured": {
"width": 249,
"height": 128
},
"selected": false,
"dragging": false
}
],
"edges": [
{
"data": {
"isHovered": false
},
"id": "xy-edge__Filestart-Parser:HipSignsRhymeend",
"source": "File",
"sourceHandle": "start",
"target": "Parser:HipSignsRhyme",
"targetHandle": "end"
},
{
"data": {
"isHovered": false
},
"id": "xy-edge__Parser:HipSignsRhymestart-Splitter:LateExpertsFeelend",
"source": "Parser:HipSignsRhyme",
"sourceHandle": "start",
"target": "Splitter:LateExpertsFeel",
"targetHandle": "end"
},
{
"data": {
"isHovered": false
},
"id": "xy-edge__Splitter:LateExpertsFeelstart-Extractor:YummyGhostsTypeend",
"source": "Splitter:LateExpertsFeel",
"sourceHandle": "start",
"target": "Extractor:YummyGhostsType",
"targetHandle": "end"
},
{
"data": {
"isHovered": false
},
"id": "xy-edge__Extractor:YummyGhostsTypestart-Tokenizer:EightRocketsAppearend",
"markerEnd": "logo",
"source": "Extractor:YummyGhostsType",
"sourceHandle": "start",
"style": {
"stroke": "rgba(91, 93, 106, 1)",
"strokeWidth": 1
},
"target": "Tokenizer:EightRocketsAppear",
"targetHandle": "end",
"type": "buttonEdge",
"zIndex": 1001
}
]
},
"history": [],
"messages": [],
"path": [],
"retrieval": []
},
"avatar": ""
}