mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
### What problem does this PR solve? #9869 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: jinhai <haijin.chn@gmail.com> Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: chanx <1243304602@qq.com> Co-authored-by: balibabu <cike8899@users.noreply.github.com> Co-authored-by: Lynn <lynn_inf@hotmail.com> Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com> Co-authored-by: huangzl <huangzl@shinemo.com> Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> Co-authored-by: Wilmer <33392318@qq.com> Co-authored-by: Adrian Weidig <adrianweidig@gmx.net> Co-authored-by: Zhichang Yu <yuzhichang@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Yongteng Lei <yongtengrey@outlook.com> Co-authored-by: Liu An <asiro@qq.com> Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com> Co-authored-by: BadwomanCraZY <511528396@qq.com> Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com> Co-authored-by: Russell Valentine <russ@coldstonelabs.org> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Billy Bao <newyorkupperbay@gmail.com> Co-authored-by: Zhedong Cen <cenzhedong2@126.com> Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com> Co-authored-by: TensorNull <tensor.null@gmail.com> Co-authored-by: TeslaZY <TeslaZY@outlook.com> Co-authored-by: Ajay <160579663+aybanda@users.noreply.github.com> Co-authored-by: AB <aj@Ajays-MacBook-Air.local> Co-authored-by: 天海蒼灆 <huangaoqin@tecpie.com> Co-authored-by: He Wang <wanghechn@qq.com> Co-authored-by: Atsushi Hatakeyama <atu729@icloud.com> Co-authored-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Mohamed Mathari <155896313+melmathari@users.noreply.github.com> Co-authored-by: Mohamed Mathari <nocodeventure@Mac-mini-van-Mohamed.fritz.box> Co-authored-by: Stephen Hu <stephenhu@seismic.com> Co-authored-by: Shaun Zhang <zhangwfjh@users.noreply.github.com> Co-authored-by: zhimeng123 <60221886+zhimeng123@users.noreply.github.com> Co-authored-by: mxc <mxc@example.com> Co-authored-by: Dominik Novotný <50611433+SgtMarmite@users.noreply.github.com> Co-authored-by: EVGENY M <168018528+rjohny55@users.noreply.github.com> Co-authored-by: mcoder6425 <mcoder64@gmail.com> Co-authored-by: lemsn <lemsn@msn.com> Co-authored-by: lemsn <lemsn@126.com> Co-authored-by: Adrian Gora <47756404+adagora@users.noreply.github.com> Co-authored-by: Womsxd <45663319+Womsxd@users.noreply.github.com> Co-authored-by: FatMii <39074672+FatMii@users.noreply.github.com>
140 lines
3.6 KiB
JSON
140 lines
3.6 KiB
JSON
{
|
|
"components": {
|
|
"File": {
|
|
"obj":{
|
|
"component_name": "File",
|
|
"params": {
|
|
}
|
|
},
|
|
"downstream": ["Parser:0"],
|
|
"upstream": []
|
|
},
|
|
"Parser:0": {
|
|
"obj": {
|
|
"component_name": "Parser",
|
|
"params": {
|
|
"setups": {
|
|
"pdf": {
|
|
"parse_method": "deepdoc",
|
|
"vlm_name": "",
|
|
"lang": "Chinese",
|
|
"suffix": [
|
|
"pdf"
|
|
],
|
|
"output_format": "json"
|
|
},
|
|
"spreadsheet": {
|
|
"suffix": [
|
|
"xls",
|
|
"xlsx",
|
|
"csv"
|
|
],
|
|
"output_format": "html"
|
|
},
|
|
"word": {
|
|
"suffix": [
|
|
"doc",
|
|
"docx"
|
|
],
|
|
"output_format": "json"
|
|
},
|
|
"slides": {
|
|
"parse_method": "presentation",
|
|
"suffix": [
|
|
"pptx"
|
|
],
|
|
"output_format": "json"
|
|
},
|
|
"markdown": {
|
|
"suffix": [
|
|
"md",
|
|
"markdown"
|
|
],
|
|
"output_format": "json"
|
|
},
|
|
"text": {
|
|
"suffix": ["txt"],
|
|
"output_format": "json"
|
|
},
|
|
"image": {
|
|
"parse_method": "vlm",
|
|
"llm_id":"glm-4.5v",
|
|
"lang": "Chinese",
|
|
"suffix": [
|
|
"jpg",
|
|
"jpeg",
|
|
"png",
|
|
"gif"
|
|
],
|
|
"output_format": "text"
|
|
},
|
|
"audio": {
|
|
"suffix": [
|
|
"da",
|
|
"wave",
|
|
"wav",
|
|
"mp3",
|
|
"aac",
|
|
"flac",
|
|
"ogg",
|
|
"aiff",
|
|
"au",
|
|
"midi",
|
|
"wma",
|
|
"realaudio",
|
|
"vqf",
|
|
"oggvorbis",
|
|
"ape"
|
|
],
|
|
"lang": "Chinese",
|
|
"llm_id": "SenseVoiceSmall",
|
|
"output_format": "json"
|
|
},
|
|
"email": {
|
|
"suffix": [
|
|
"msg"
|
|
],
|
|
"fields": [
|
|
"from",
|
|
"to",
|
|
"cc",
|
|
"bcc",
|
|
"date",
|
|
"subject",
|
|
"body",
|
|
"attachments"
|
|
],
|
|
"output_format": "json"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"downstream": ["Splitter:0"],
|
|
"upstream": ["Begin"]
|
|
},
|
|
"Splitter:0": {
|
|
"obj": {
|
|
"component_name": "Splitter",
|
|
"params": {
|
|
"chunk_token_size": 512,
|
|
"delimiters": ["\n"],
|
|
"overlapped_percent": 0
|
|
}
|
|
},
|
|
"downstream": ["Tokenizer:0"],
|
|
"upstream": ["Parser:0"]
|
|
},
|
|
"Tokenizer:0": {
|
|
"obj": {
|
|
"component_name": "Tokenizer",
|
|
"params": {
|
|
}
|
|
},
|
|
"downstream": [],
|
|
"upstream": ["Chunker:0"]
|
|
}
|
|
},
|
|
"path": []
|
|
}
|
|
|