{ "id": 25, "title": { "en": "Title Chunker", "de": "Titel basierte Segmentierung", "zh": "标题切片" }, "description": { "en": "This template slices the parsed file based on its title structure. It is ideal for documents with well-defined headings, such as product manuals, legal contracts, research reports, and academic papers.", "de": "Diese Vorlage segmentiert die geparste Datei basierend auf ihrer Titelstruktur. Sie eignet sich ideal für Dokumente mit klar definierten Überschriften, wie Produkthandbücher, Verträge, Forschungsberichte und wissenschaftliche Arbeiten.", "zh": "此模板将解析后的文件按标题结构进行切片,适用于具有清晰标题层级的文档类型,如产品手册、合同法规、研究报告和学术论文等。" }, "canvas_type": "Ingestion Pipeline", "canvas_category": "dataflow_canvas", "dsl": { "components": { "File": { "obj": { "component_name": "File", "params": {} }, "downstream": [ "Parser:HipSignsRhyme" ], "upstream": [] }, "Parser:HipSignsRhyme": { "obj": { "component_name": "Parser", "params": { "outputs": { "html": { "type": "string", "value": "" }, "json": { "type": "Array", "value": [] }, "markdown": { "type": "string", "value": "" }, "text": { "type": "string", "value": "" } }, "setups": { "pdf": { "output_format": "json", "suffix": [ "pdf" ], "parse_method": "DeepDOC" }, "text&markdown": { "output_format": "text", "suffix": [ "md", "markdown", "mdx", "txt" ] }, "word": { "output_format": "json", "suffix": [ "doc", "docx" ] } } } }, "downstream": [ "HierarchicalMerger:BusyPoetsSearch" ], "upstream": [ "File" ] }, "Tokenizer:NeatRadiosEnd": { "obj": { "component_name": "Tokenizer", "params": { "fields": "text", "filename_embd_weight": 0.1, "outputs": {}, "search_method": [ "embedding", "full_text" ] } }, "downstream": [], "upstream": [ "HierarchicalMerger:BusyPoetsSearch" ] }, "HierarchicalMerger:BusyPoetsSearch": { "obj": { "component_name": "HierarchicalMerger", "params": { "hierarchy": 3, "levels": [ [ "^#[^#]" ], [ "^##[^#]" ], [ "^###[^#]" ], [ "^####[^#]" ] ], "outputs": { "chunks": { "type": "Array", "value": [] } } } }, "downstream": [ "Tokenizer:NeatRadiosEnd" ], "upstream": [ "Parser:HipSignsRhyme" ] } }, "globals": {}, "graph": { "nodes": [ { "data": { "label": "File", "name": "File" }, "id": "File", "measured": { "height": 48, "width": 200 }, "position": { "x": 50, "y": 200 }, "sourcePosition": "left", "targetPosition": "right", "type": "beginNode" }, { "data": { "form": { "outputs": { "html": { "type": "string", "value": "" }, "json": { "type": "Array", "value": [] }, "markdown": { "type": "string", "value": "" }, "text": { "type": "string", "value": "" } }, "setups": [ { "fileFormat": "pdf", "output_format": "json", "parse_method": "DeepDOC" }, { "fileFormat": "text&markdown", "output_format": "text" }, { "fileFormat": "word", "output_format": "json" } ] }, "label": "Parser", "name": "Parser" }, "dragging": false, "id": "Parser:HipSignsRhyme", "measured": { "height": 204, "width": 200 }, "position": { "x": 316.99524094206413, "y": 195.39629819663406 }, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "parserNode" }, { "data": { "form": { "fields": "text", "filename_embd_weight": 0.1, "outputs": {}, "search_method": [ "embedding", "full_text" ] }, "label": "Tokenizer", "name": "Indexer" }, "dragging": false, "id": "Tokenizer:NeatRadiosEnd", "measured": { "height": 120, "width": 200 }, "position": { "x": 855.3572909622682, "y": 199.08562542263914 }, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "tokenizerNode" }, { "data": { "form": { "hierarchy": "3", "levels": [ { "expressions": [ { "expression": "^#[^#]" } ] }, { "expressions": [ { "expression": "^##[^#]" } ] }, { "expressions": [ { "expression": "^###[^#]" } ] }, { "expressions": [ { "expression": "^####[^#]" } ] } ], "outputs": { "chunks": { "type": "Array", "value": [] } } }, "label": "HierarchicalMerger", "name": "Title Chunker" }, "dragging": false, "id": "HierarchicalMerger:BusyPoetsSearch", "measured": { "height": 80, "width": 200 }, "position": { "x": 587.0312356829183, "y": 197.9169308584236 }, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "splitterNode" }, { "data": { "form": { "text": "It is ideal for documents with well-defined headings, such as product manuals, legal contracts, research reports, and academic papers." }, "label": "Note", "name": "Chunk by Title" }, "dragHandle": ".note-drag-handle", "dragging": false, "height": 159, "id": "Note:KhakiBerriesPick", "measured": { "height": 159, "width": 323 }, "position": { "x": 623.9675370532708, "y": 369.74281927307146 }, "resizing": false, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "noteNode", "width": 323 } ], "edges": [ { "id": "xy-edge__Filestart-Parser:HipSignsRhymeend", "source": "File", "sourceHandle": "start", "target": "Parser:HipSignsRhyme", "targetHandle": "end" }, { "id": "xy-edge__Parser:HipSignsRhymestart-HierarchicalMerger:BusyPoetsSearchend", "source": "Parser:HipSignsRhyme", "sourceHandle": "start", "target": "HierarchicalMerger:BusyPoetsSearch", "targetHandle": "end", "data": { "isHovered": false } }, { "data": { "isHovered": false }, "id": "xy-edge__HierarchicalMerger:BusyPoetsSearchstart-Tokenizer:NeatRadiosEndend", "markerEnd": "logo", "source": "HierarchicalMerger:BusyPoetsSearch", "sourceHandle": "start", "style": { "stroke": "rgba(91, 93, 106, 1)", "strokeWidth": 1 }, "target": "Tokenizer:NeatRadiosEnd", "targetHandle": "end", "type": "buttonEdge", "zIndex": 1001 } ] }, "history": [], "messages": [], "path": [], "retrieval": [] }, "avatar": "" }