{ "id": 25, "title": { "en": "Title Chunker", "de": "Titel basierte Segmentierung", "zh": "标题切片" }, "description": { "en": "This template slices the parsed file based on its title structure. It is ideal for documents with well-defined headings, such as product manuals, legal contracts, research reports, and academic papers.", "de": "Diese Vorlage segmentiert die geparste Datei basierend auf ihrer Titelstruktur. Sie eignet sich ideal für Dokumente mit klar definierten Überschriften, wie Produkthandbücher, Verträge, Forschungsberichte und wissenschaftliche Arbeiten.", "zh": "此模板将解析后的文件按标题结构进行切片,适用于具有清晰标题层级的文档类型,如产品手册、合同法规、研究报告和学术论文等。" }, "canvas_type": "Ingestion Pipeline", "canvas_category": "dataflow_canvas", "dsl": { "components": { "File": { "obj": { "component_name": "File", "params": {} }, "downstream": [ "Parser:HipSignsRhyme" ], "upstream": [] }, "Parser:HipSignsRhyme": { "obj": { "component_name": "Parser", "params": { "outputs": { "html": { "type": "string", "value": "" }, "json": { "type": "Array", "value": [] }, "markdown": { "type": "string", "value": "" }, "text": { "type": "string", "value": "" } }, "setups": { "pdf": { "output_format": "json", "suffix": [ "pdf" ], "parse_method": "DeepDOC" }, "text&markdown": { "output_format": "text", "suffix": [ "md", "markdown", "mdx", "txt" ] }, "word": { "output_format": "json", "suffix": [ "doc", "docx" ] } } } }, "downstream": [ "HierarchicalMerger:BusyPoetsSearch" ], "upstream": [ "File" ] }, "Tokenizer:NeatRadiosEnd": { "obj": { "component_name": "Tokenizer", "params": { "fields": "text", "filename_embd_weight": 0.1, "outputs": {}, "search_method": [ "embedding", "full_text" ] } }, "downstream": [], "upstream": [ "HierarchicalMerger:BusyPoetsSearch" ] }, "HierarchicalMerger:BusyPoetsSearch": { "obj": { "component_name": "HierarchicalMerger", "params": { "hierarchy": 3, "levels": [ [ "^#[^#]" ], [ "^##[^#]" ], [ "^###[^#]" ], [ "^####[^#]" ] ], "outputs": { "chunks": { "type": "Array", "value": [] } } } }, "downstream": [ "Tokenizer:NeatRadiosEnd" ], "upstream": [ "Parser:HipSignsRhyme" ] } }, "globals": {}, "graph": { "nodes": [ { "data": { "label": "File", "name": "File" }, "id": "File", "measured": { "height": 48, "width": 200 }, "position": { "x": 50, "y": 200 }, "sourcePosition": "left", "targetPosition": "right", "type": "beginNode" }, { "data": { "form": { "outputs": { "html": { "type": "string", "value": "" }, "json": { "type": "Array", "value": [] }, "markdown": { "type": "string", "value": "" }, "text": { "type": "string", "value": "" } }, "setups": [ { "fileFormat": "pdf", "output_format": "json", "parse_method": "DeepDOC" }, { "fileFormat": "text&markdown", "output_format": "text" }, { "fileFormat": "word", "output_format": "json" } ] }, "label": "Parser", "name": "Parser" }, "dragging": false, "id": "Parser:HipSignsRhyme", "measured": { "height": 204, "width": 200 }, "position": { "x": 316.99524094206413, "y": 195.39629819663406 }, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "parserNode" }, { "data": { "form": { "fields": "text", "filename_embd_weight": 0.1, "outputs": {}, "search_method": [ "embedding", "full_text" ] }, "label": "Tokenizer", "name": "Indexer" }, "dragging": false, "id": "Tokenizer:NeatRadiosEnd", "measured": { "height": 120, "width": 200 }, "position": { "x": 855.3572909622682, "y": 199.08562542263914 }, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "tokenizerNode" }, { "data": { "form": { "hierarchy": "3", "levels": [ { "expressions": [ { "expression": "^#[^#]" } ] }, { "expressions": [ { "expression": "^##[^#]" } ] }, { "expressions": [ { "expression": "^###[^#]" } ] }, { "expressions": [ { "expression": "^####[^#]" } ] } ], "outputs": { "chunks": { "type": "Array", "value": [] } } }, "label": "HierarchicalMerger", "name": "Title Chunker" }, "dragging": false, "id": "HierarchicalMerger:BusyPoetsSearch", "measured": { "height": 80, "width": 200 }, "position": { "x": 587.0312356829183, "y": 197.9169308584236 }, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "splitterNode" }, { "data": { "form": { "text": "It is ideal for documents with well-defined headings, such as product manuals, legal contracts, research reports, and academic papers." }, "label": "Note", "name": "Chunk by Title" }, "dragHandle": ".note-drag-handle", "dragging": false, "height": 159, "id": "Note:KhakiBerriesPick", "measured": { "height": 159, "width": 323 }, "position": { "x": 623.9675370532708, "y": 369.74281927307146 }, "resizing": false, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "noteNode", "width": 323 } ], "edges": [ { "id": "xy-edge__Filestart-Parser:HipSignsRhymeend", "source": "File", "sourceHandle": "start", "target": "Parser:HipSignsRhyme", "targetHandle": "end" }, { "id": "xy-edge__Parser:HipSignsRhymestart-HierarchicalMerger:BusyPoetsSearchend", "source": "Parser:HipSignsRhyme", "sourceHandle": "start", "target": "HierarchicalMerger:BusyPoetsSearch", "targetHandle": "end", "data": { "isHovered": false } }, { "data": { "isHovered": false }, "id": "xy-edge__HierarchicalMerger:BusyPoetsSearchstart-Tokenizer:NeatRadiosEndend", "markerEnd": "logo", "source": "HierarchicalMerger:BusyPoetsSearch", "sourceHandle": "start", "style": { "stroke": "rgba(91, 93, 106, 1)", "strokeWidth": 1 }, "target": "Tokenizer:NeatRadiosEnd", "targetHandle": "end", "type": "buttonEdge", "zIndex": 1001 } ] }, "history": [], "messages": [], "path": [], "retrieval": [] }, "avatar": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAACXBIWXMAABYlAAAWJQFJUiTwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAABMaSURBVHgBbVprjF3Vdf72Puc+5s7Lnhm/x/ZgCHWAFLdVKEqogJY2ikIJEUSBhiZFIaVRKZj+KFHa1G7apBS1KlGpkh99EEUgykMiKlFpFCUIyS1pSTFCQLADHmNjjz3jec99nXP27rfW3ufOtZOxru/cc8/Zez2+9a3HHoMLfv7skjuuy/Li4wbmZn6c8t7rdWMMnHOw1kKuyWf56f9dfuSe8rO8y/fyjFyXn/7fy+/llSRJeNbyWW5pZQ2+jOzvcbiW1A67wv3FwelHp/vl7e18cOr3NrTQOcCb9/cLVQpcblIUxfrDZl0B2UffRSlVQp5z5ylXGqEnbFSg/935gsIn4bmoPC8i4T+qw//xMJ8URRZ7CqjwvvVDY5J9pcVE0J4l5Tbjoxg+COc8wtdiUW6a2AuEkZco7HrrqTAXeKp8uai0tWKsILzjq5Kk+rvhtcRENbw/nAPXixK6Yst1DvChfaUAsplYqTSzRxGFFeFsUEldbPW65b1BIYuAOLm5vGZU6XK9fkiWgpfXre69rlgqcKMXfbhB1ynkszH7qkgP6DpfnLptiiIc+xlXRryLRZwzUSgfrBE3UGGCsYKgIkwvNoJFXQ+O8nLhnd7xbh3Fcm+aprqn/B7ihEpH+NJMVM6qBxROGhv8nNrr6R97oMR4vwLBwtS4KGXyKoKJiyo+uai8yz9ZmDsHTIoBihA/Vr9yEXrBa/3wRLAvCnlAl0j5lqvysk4JOlnDYj3WqCJcbm5Oucw+9LmzDFoIbHxF9lPLCkxUEB/CIeDZB6bgPy6G7rJHa1UepZUq0BvTmkc3p7I0XrfNbQuiV7DM640hoFI3PYsHAnJBaBNgozKlia7psG7kQgnGfTylCL3ADewQAtCaCq3iVFi57n0ZxxSKgeW4YLaWYHm+wMQu4PIbEuy5po6pq6sY2lxFbUiYhJtXJOC76sW800a3VaC1lGH+WAevfx+YfsngnVcdOh1gYrMoI1okEQnB43mWhz0FlqqZwInXvZ9KSyT6qK08ldBcOYOpYgOG1SCBj8kKFcyeLDC2HfitL6b41Ts3YnCsyuclaMUTEqxpQIawTyYWr/P5nJasoz7kUB8BxnYWeN+vZ7ylzRubOPGywXNfK/Dysx5jVKQ2ZHpxkiZJdEjwgigpwSxBzyD+Hd+fUKzC2vQeEOsrZBKDueMeOy73+My/jGPylwaVZfIOnxNLwcYArfHBNCakAM3CdSKfFxSgUHgqMn0WmAti2Sbh1ETW9njij7p44Z8Mtl7MjQuD3NEDgkFCz0aCSAQRjBfzJ7tu8/38LIuLNW0P38TumlGP7P+Pcey+aphYLoKwIqCuWGbeCoUSxqgqjeqLMIKwmI15hEIrNjTKhXWohFKjC5jPm6gNLmPpdII//UAbI42UMPS9xBYBQcNW4POuGGZd+BC8gS4Fe0nF4MxJh6tur+Dvz+3C9l+s00KF4tBqLhAz2kiGRtlIc7FxCGGWR5bxwUO6vHjIqjVBkoCv8zKvoaoGSNJBenUbGmMpHjlXR23MIWtCvR1ShlcP5HmHy9DQJfbLNG5iGqd3cOJoji+9MIpbH96C9mre5yWjTlcLK3UmgszoCeFNocGcG7XVo/QhBchDHhBgm1TjAvSUsXUuMUAv1PhkVVJeVHgcnZUBHHi1hsYWmqMLJQUJYMn6gXByiYP1pJQkAcc2yXH6bY+HT2zC7g+O0OqZJI31hCWL9FQxIT0pZYQrmhlc4H7hdNqL75n+rt4RZeQVadkrxGp8Mg0wdMryvDSK9nJQIpfMzF0phvyvDAlFZn+1KfydOJx6exUPvTNAOqxrginpUwJTaBa+QAB/zF8avEmkuBjLNigCTUBiUXmGQUxKVY+YbogPUUrjqIjJi+ukkn8SVdRWGXOrA/jq0RSnT9gASGI/JUTSpBo8UP6kaQ0nj5/CAz8YwshWWi4vgj1NSaX0DuPTVEJG1LRvArVJUJbcDR/hHmsmX8a5QFUh5VRgR3ZSr/iuKufFMLoW73Oh9lRMpKM0bB33fzfFwntSQlQDCRRFTHpRifbaKq65NcHe62twmVjcoxfWQia1HN3/ZoJ7ja6uZqqQjym+lFxVtSYKG12EsqI1amWtVK1Rz3ifa5wInuV3Lx6PPtPiORQQvD6GvR9hsvwVq/obk4ZCslf703ULi0v43GMJceeDlWghNZaEaIMlwWND8Ic3onP8MnS+OwE7UESRgyAmVnaa9MI+uk6onxxClZwHWq1KVoSSRcgLwWCI0AtwsiGe4nveGcPvfstg4WQwqhBOWtJnc6GN2/+S6bnDJCG1h9zilkk0tUCRtEz97Xk8Pd3G8OvL+MgHh9Tlap+kiAFMjs9imS0QkDRQkYDshAAVtDDJoc79/nqNXmzAjo7x+Qz1O5cgpKV0K0nLB8MZG+GictYwfkkVu65sY+1cTPbhC2BluYXr/jBRo5sB1i4NWm6IbFOZ0x5AIr+5N8dH3RFcu4MiTS2jLIWz13IUR8gUL5HTc6+ZVZKUm8/R+ocM2ROb0f7HDXAzRulVY0NMVyO77RyDHaoBfQSgHtLE63WdkrYlfopsENfur6C1EAwfPZDgihtaqDQqrCgrWLyXbDTWgFmqo3LTGmrXnIVrT6DxKSaZD+/hQh0kO7h4h0ueoIBP8769m+iwFtIhMsQVThMP2ty8VdUaCBsp1/wqksk8JLm61yDUGHL1YEUtF6R65B0nqMQa8wLvSd/Hmollp2ed5fIBXPnJJTx2l0ddiEcy8dKcwyfvJKF1+fACrfpTJpbLKrDDQzDkYVTP8Muz3HAUlUlCB51o5Rqtx70Z3KgSTiMENStU1yT9DZP5zwSaLLot3joA985GZPMrsFe1lNkMS2tTSbRg1PpLvHaOBnl8mEmaMBwkfCfG0P7PWaQ3riK9lFDM2IvVath2aYHOWgzi1lqGi65OlGmKFaJqwCvmpW5xJ+vIn98KP78RrecXsPild9H+9jA6j04qBTYfYj0yxCTVIqZrxPvRceSPjCP7cYLOE23kSRN+cQEFGc4WTFJHqfFZ0uM4PdIlVbeoXB7YSj43H+Q64/x+iJ9ZelvmjfTGK+AObaMsbHZMoeX2jl9mvHZDjkGNTcXoFqkMCix9jiuPtAiZDvKVReQnV5AdGoQ7shHFu6TQt7jpm6S9I07rFzfIjZjn3fwZZEvzsFwLW1kWMJvaBkOSybForwlHI1+ch1tqo5ghxCq55kLDfZyGboLuczTGFqv5xxF+tVt4f5fB/cpRXk+QvUg4VmU/i0t+zbC8EQ8wG6YSTLSe9Jm+0QrU2WqH9q5FBWbm4Igiu0XqfRfoT8KnWEb3++eo5CotTAaiIvnyIpVaQcJEiPdzHSEFF5KTN02kt88i2dcOI5p2F4WYX5NhB8Xb9KRlXHQpzy8Q95u4z5UrVIJ7Dg+gOEuSyGSa0ozVrQmdh7R1RlxT5YXrFlH83wjSDXGKQLkGPk+B9tAb38tC+8fOyjpudsZg4sU65m+it5hDLC2bvr/A0KcyureKwU/XsPoAE15DWIV7NBh0O0KKwTDTVJUlA+PDJbEIIft4ZvmCcPT/W0XruYwC0rugTG+cRn6Ke5/LVTHJF1IBSAeNTsvp5MGteIwfaGHmY4Mx6TAWWBvVrpqhcTfpgwVd6+n2bG0O1VzqGEKCyhta1Fa5wYzIRwt1aZBkBe3DVHa8huomQmZMatY1LZ+zmSY6nO7U5hhJZ1mHfrTNzy0tVQRCtcmthNgy8qECFRF4fB5VNlF2AzSTv/tKjsqADhbYrGRJgIYMpyQ1DBKXnSxMJ5h4Cr/C9zmt31Uxuth3GaCLzBcp7006yn6av4tQWkhPI6ySWyojGUzY5mzIzKJ0MsHnzzaVJs0EKZuUbHc4rY/ABqbzkzkM3gts+HNS9LWDKN4cw+Bt/J6VsWTs4y8TahVSbMYFOq0EK7MOg6NpUEA5N9dJgF8KDbSrrdGSpL+V4ZBsBIPLRtO8tWSrLgMwJ94W6fRXKSyhVv8AP9cC3+fzC8wRG7D85RFSLK9/aE55X/KFXWshf9dj+L4aZm9hgtxtUB1vYPFuqVAp8KAQAjD72Rzj3yIdM1n+9MccJmyXYk/qbGaEk69IH+C0bExHwzApW1lCZdRg9cuXYvUru6kA+b8IjY2tMXxcNXQGjULHJfnCImG3htW/IX4fMRpi9YtZGHZYPmcM8MU5ZCcW0ZleQTrFpMkmqXN6Bu2ZU8jfY098kUXjrgbzRY5sgcw1wudG2wyNDtpvNVH/zdB9vPF8V9vMnHuS5XIMb/T40TMDSOqxq9oiKZva89UkPeYn1tB9nYsRw55VanduHt1zM+i8tSLRiIGbqsinGYz0QLG8yoXPoZhd1e5q8H4q80YHnRmuwXqrOMWAn2a9VR2Af69AdqqF7hnCrE64tXOMfmEQm57agOoV5HnXQpEwe+9rYvxpzpFuJ4ngFJ56IMGGTVYJJfVW23b86Nk67vrXeQqQYPR+BvJVzMZ7ibGBFNkiLXySw6h6Fdm79FLKYGMKrjJJOTYXw5/eSGWXsPJkUz1XmRrEwC0VrcEqmxrYfsig9b22xk5lNwP3QyPaju48MsEYypU65bNrGe17k51VjH+NuUTgIx2ddGhNBn97Ecff3IFT73SxeZsJY8t7dn/Cy3zl7HsGX32xicnLCRG2lORBrD7J4Fqp090G9RsYuCkZaI3ZsUFGkdGbVJorYT5qBya08gzkRmxnQrdhhqOznKrT6tUwi0pTZmUWKgFsE633VRiUg0YTSEEqPu3g5hW6tdEE909uRqPO/JE77Y1TmdNI1pzYZvHsQzX88dMdjlGUXTH0GVIgs7OXEqAttQvdNsDsm8vMk4tnTvldq8XOLJlEMtxGhBrRxQTldH6jVW457BWl8kJrYR9LZc0vvtRAUjQfKGaVIArOhqrDBZ55YAK+SdarlCP+MJlRHvekukPfSTB7rBYsJZVhu07XyVDT9pr28J1Z3zj0mvqbNi3ClcWMDqpCH1kJgwgbuiw5vNB3W1HYGCvUnMR2knRsyELFe3xmVntmR0Ml9N7xlzfh3/+WcbDBhi6tPLu4e/dv+3I2L8YsWN9849gSOsuxK4oNj5YVYSxRdvKhrkec8dv13tqqpcNMSEoOJ7WHvvrH7OGl4xcd+BZxkFaeIwSDWXqiubwV93CUObmHeMnXzxaw3tAEDJL2sTTfwTNfGUGN5XARRx9hcp2tN8cWsdU0ejpTCu/j8ZPTd6tDrzDxliZmVV9MHrxR3ltctxuE13lmGsUxITdYozmgm+/CfuaFyT0IBaI9/yTIlick2pny95Ex4Nt/1cUP/nmCg9jQcSHCpsQ14kymHK+XjZQJg1VVSC0kQy8bsjNC4RuGYOWYMHpOJ0s2Ch8hadjWrs7vwhcmOti2KxyIlOdz60dS8fzgvPEib9zGbuvv7l7DoSfHGPmpWlmtKzjtwcWFXjXOs1x5nGTCJFvrKHlOsRbGgWH86OKcqZxkmHh0EkoQySXV4XG8yqHBvbta2Dkl4/Vg5DDm9+cdT6nk/SePckPOVL2bDz702Sa++fvDqG8YZeYLxz/heChbh1KRx+MkH87ElP6CYOKR0u4hiONJjonHqAFwMW5yJIMkDTeJB1nrf/OODmFDGOcXHGn1/Zzngf6jVR2J88Ht2y0OkVbv2EoWeG0CtZEG+wYXBDExEJM45NVhfpxOqLfitDlu6uMYMdBuPIlh4EoZUx0awOyJSXzj1gYh08UCx/gbNzmNof7J4XnHX/Ga+fzOjx2jAlMXnpP1/yTc9PSMxIfFJ+6p4Dfu7HByF4a2RbfQDkkSlMjv4sFGSEZGPSOFn01lEhEn1aTOrF3F6TfqePmpKv7rcYf50w6bthht3vvZr1+W/jPr+PmwuXvqpocZTPdpIpEpQZ+25an6uubkjhWDRTb+W5mdJy9OcNm1wJ4rqdxmh6Ex1u41E6bIjqc8rJvaqyzaWJedOcoDErakM0cSHPsf/n7SabW7YQJ6jlYeHQHnH7/2jz4vPKLl50fNH0zeeB3d+sPeGXHfue3Pw14/A8hkrLPKcrwjR0m836WKnCSOyKUMlya8SkapsdlJGUfVmnRiYVyox6fW9M4lfCwngggmkFyk6PKwvPxrgbC/v0i/vWfq5od5NnBfUSYqH2ecfVr/vL9x6FkG0q8k8ShUYJ7G46UQJjaumejELfxuje0dbocgNnF+2r9vaPZ1vBgR0Qf1rz84/fh+BRu3O0gMH05NEvNgSfP+PDf2w6o/qGTzXIdUIQY4CeIJYhZO2TW7h3O03Mv1XGdCPl5DzOQuFnQlZNEL9aKnzPofkrjDdVQPanzKfy8t/qT94Y17/43pm1Mse3UYnOK8aD8/Ftb/aOPCv4HQQ27JpElpufL0PemdQ2sxF1mqnFgrw8bEmJh+9oFO8kIO0Cz99QHbvvPg9BPrf+zR/7N/6uap3JmD9N2VXHKfFnsuTJh7R1F97/0e6o+Tdag5hY4QRJpUNCsnNhyb2pgvUAQFbYn7+Hyyvs40vfYdDoyffXD6iRf69/p/CbMWUUVYM2EAAAAASUVORK5CYII=" }