mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat: Use data pipeline to visualize the parsing configuration of the knowledge base (#10423)
### What problem does this PR solve? #9869 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: jinhai <haijin.chn@gmail.com> Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: chanx <1243304602@qq.com> Co-authored-by: balibabu <cike8899@users.noreply.github.com> Co-authored-by: Lynn <lynn_inf@hotmail.com> Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com> Co-authored-by: huangzl <huangzl@shinemo.com> Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> Co-authored-by: Wilmer <33392318@qq.com> Co-authored-by: Adrian Weidig <adrianweidig@gmx.net> Co-authored-by: Zhichang Yu <yuzhichang@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Yongteng Lei <yongtengrey@outlook.com> Co-authored-by: Liu An <asiro@qq.com> Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com> Co-authored-by: BadwomanCraZY <511528396@qq.com> Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com> Co-authored-by: Russell Valentine <russ@coldstonelabs.org> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Billy Bao <newyorkupperbay@gmail.com> Co-authored-by: Zhedong Cen <cenzhedong2@126.com> Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com> Co-authored-by: TensorNull <tensor.null@gmail.com> Co-authored-by: TeslaZY <TeslaZY@outlook.com> Co-authored-by: Ajay <160579663+aybanda@users.noreply.github.com> Co-authored-by: AB <aj@Ajays-MacBook-Air.local> Co-authored-by: 天海蒼灆 <huangaoqin@tecpie.com> Co-authored-by: He Wang <wanghechn@qq.com> Co-authored-by: Atsushi Hatakeyama <atu729@icloud.com> Co-authored-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Mohamed Mathari <155896313+melmathari@users.noreply.github.com> Co-authored-by: Mohamed Mathari <nocodeventure@Mac-mini-van-Mohamed.fritz.box> Co-authored-by: Stephen Hu <stephenhu@seismic.com> Co-authored-by: Shaun Zhang <zhangwfjh@users.noreply.github.com> Co-authored-by: zhimeng123 <60221886+zhimeng123@users.noreply.github.com> Co-authored-by: mxc <mxc@example.com> Co-authored-by: Dominik Novotný <50611433+SgtMarmite@users.noreply.github.com> Co-authored-by: EVGENY M <168018528+rjohny55@users.noreply.github.com> Co-authored-by: mcoder6425 <mcoder64@gmail.com> Co-authored-by: lemsn <lemsn@msn.com> Co-authored-by: lemsn <lemsn@126.com> Co-authored-by: Adrian Gora <47756404+adagora@users.noreply.github.com> Co-authored-by: Womsxd <45663319+Womsxd@users.noreply.github.com> Co-authored-by: FatMii <39074672+FatMii@users.noreply.github.com>
This commit is contained in:
@ -94,12 +94,20 @@ export default {
|
||||
noMoreData: '没有更多数据了',
|
||||
},
|
||||
knowledgeDetails: {
|
||||
fileSize: '文件大小',
|
||||
fileType: '文件类型',
|
||||
uploadedBy: '创建者',
|
||||
notGenerated: '未生成',
|
||||
generatedOn: '生成于',
|
||||
subbarFiles: '文件列表',
|
||||
generate: '生成',
|
||||
raptor: 'Raptor',
|
||||
knowledgeGraph: '知识图谱',
|
||||
processingType: '处理类型',
|
||||
dataPipeline: '数据管道',
|
||||
operations: '操作',
|
||||
taskId: '任务ID',
|
||||
duration: '耗时',
|
||||
details: '详情',
|
||||
status: '状态',
|
||||
task: '任务',
|
||||
startDate: '开始时间',
|
||||
@ -111,7 +119,7 @@ export default {
|
||||
success: '成功',
|
||||
failed: '失败',
|
||||
completed: '已完成',
|
||||
processLog: '处理进度日志',
|
||||
datasetLog: '知识库日志',
|
||||
created: '创建于',
|
||||
learnMore: '了解更多',
|
||||
general: '通用',
|
||||
@ -130,7 +138,7 @@ export default {
|
||||
name: '名称',
|
||||
namePlaceholder: '请输入名称',
|
||||
doc: '文档',
|
||||
datasetDescription: '😉 解析成功后才能问答哦。',
|
||||
datasetDescription: '解析成功后才能问答哦。',
|
||||
addFile: '新增文件',
|
||||
searchFiles: '搜索文件',
|
||||
localFiles: '本地文件',
|
||||
@ -246,12 +254,29 @@ export default {
|
||||
theDocumentBeingParsedCannotBeDeleted: '正在解析的文档不能被删除',
|
||||
},
|
||||
knowledgeConfiguration: {
|
||||
deleteGenerateModalContent: `
|
||||
<p>删除生成的 <strong class='text-text-primary'>{{type}}</strong> 结果
|
||||
将从此数据集中移除所有派生实体和关系。
|
||||
您的原始文件将保持不变。<p>
|
||||
<br/>
|
||||
是否要继续?
|
||||
`,
|
||||
extractRaptor: '从文档中提取Raptor',
|
||||
extractKnowledgeGraph: '从文档中提取知识图谱',
|
||||
filterPlaceholder: '请输入',
|
||||
fileFilterTip: '',
|
||||
fileFilter: '正则匹配表达式',
|
||||
setDefaultTip: '',
|
||||
setDefault: '设置默认',
|
||||
eidtLinkDataPipeline: '编辑数据流',
|
||||
linkPipelineSetTip: '管理与此数据集的数据管道链接',
|
||||
default: '默认',
|
||||
dataPipeline: '数据流',
|
||||
linkDataPipeline: '关联数据流',
|
||||
enableAutoGenerate: '是否启用自动生成',
|
||||
teamPlaceholder: '请选择团队',
|
||||
dataFlowPlaceholder: '请选择数据流',
|
||||
buildItFromScratch: '去Scratch构建',
|
||||
useRAPTORToEnhanceRetrieval: '使用 RAPTOR 提升检索效果',
|
||||
extractKnowledgeGraph: '知识图谱提取',
|
||||
dataFlow: '数据流',
|
||||
parseType: '切片方法',
|
||||
manualSetup: '手动设置',
|
||||
@ -1471,6 +1496,11 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
sqlStatementTip:
|
||||
'在此处编写您的 SQL 查询。您可以使用变量、原始 SQL,或使用变量语法混合使用两者。',
|
||||
frameworkPrompts: '框架',
|
||||
release: '发布',
|
||||
createFromBlank: '从空白创建',
|
||||
createFromTemplate: '从模板创建',
|
||||
importJsonFile: '导入 JSON 文件',
|
||||
chooseAgentType: '选择智能体类型',
|
||||
},
|
||||
footer: {
|
||||
profile: 'All rights reserved @ React',
|
||||
@ -1494,6 +1524,17 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
okText: '确认',
|
||||
cancelText: '取消',
|
||||
},
|
||||
mcp: {
|
||||
export: '导出',
|
||||
import: '导入',
|
||||
url: 'URL',
|
||||
serverType: '服务器类型',
|
||||
addMCP: '添加 MCP',
|
||||
editMCP: '编辑 MCP',
|
||||
toolsAvailable: '可用的工具',
|
||||
mcpServers: 'MCP 服务器',
|
||||
customizeTheListOfMcpServers: '自定义 MCP 服务器列表',
|
||||
},
|
||||
search: {
|
||||
searchApps: '搜索',
|
||||
createSearch: '创建查询',
|
||||
@ -1540,14 +1581,127 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
parseSummaryTip: '解析器: deepdoc',
|
||||
rerunFromCurrentStep: '从当前步骤重新运行',
|
||||
rerunFromCurrentStepTip: '已修改,点击重新运行。',
|
||||
confirmRerun: '确认重新运行流程',
|
||||
confirmRerunModalContent: `
|
||||
<p class="text-sm text-text-disabled font-medium mb-2">
|
||||
您即将从 <strong class="text-text-primary">{{step}}</strong> 步骤开始重新运行该过程
|
||||
</p>
|
||||
<p class="text-sm mb-3 text-text-secondary">这将:</p>
|
||||
<ul class="list-disc list-inside space-y-1 text-sm text-text-secondary">
|
||||
<li>从当前步骤开始覆盖现有结果</li>
|
||||
<li>创建新的日志条目进行跟踪</li>
|
||||
<li>之前的步骤将保持不变</li>
|
||||
</ul>`,
|
||||
changeStepModalTitle: '切换步骤警告',
|
||||
changeStepModalContent: `
|
||||
<p>您目前正在编辑此阶段的结果。</p>
|
||||
<p>如果您切换到后续阶段,您的更改将会丢失。</p>
|
||||
<p>要保留这些更改,请点击“重新运行”以重新运行当前阶段。</p> `,
|
||||
changeStepModalConfirmText: '继续切换',
|
||||
changeStepModalCancelText: '取消',
|
||||
unlinkPipelineModalTitle: '解绑数据流',
|
||||
unlinkPipelineModalContent: `
|
||||
<p>一旦取消链接,该数据集将不再连接到当前数据管道。</p>
|
||||
<p>正在解析的文件将继续解析,直到完成。</p>
|
||||
<p>尚未解析的文件将不再被处理。</p> <br/>
|
||||
<p>你确定要继续吗?</p> `,
|
||||
unlinkPipelineModalConfirmText: '解绑',
|
||||
},
|
||||
dataflow: {
|
||||
parser: '解析器',
|
||||
parserDescription: '解析器',
|
||||
chunker: '分块器',
|
||||
chunkerDescription: '分块器',
|
||||
parserDescription: '从文件中提取原始文本和结构以供下游处理。',
|
||||
tokenizer: '分词器',
|
||||
tokenizerDescription: '分词器',
|
||||
tokenizerDescription:
|
||||
'根据所选的搜索方法,将文本转换为所需的数据结构(例如,用于嵌入搜索的向量嵌入)。',
|
||||
splitter: '分词器拆分器',
|
||||
splitterDescription:
|
||||
'根据分词器长度将文本拆分成块,并带有可选的分隔符和重叠。',
|
||||
hierarchicalMergerDescription:
|
||||
'使用正则表达式规则按标题层次结构将文档拆分成多个部分,以实现更精细的控制。',
|
||||
hierarchicalMerger: '标题拆分器',
|
||||
extractor: '提取器',
|
||||
extractorDescription:
|
||||
'使用 LLM 从文档块(例如摘要、分类等)中提取结构化见解。',
|
||||
outputFormat: '输出格式',
|
||||
lang: '语言',
|
||||
fileFormats: '文件格式',
|
||||
fields: '字段',
|
||||
addParser: '增加解析器',
|
||||
hierarchy: '层次结构',
|
||||
regularExpressions: '正则表达式',
|
||||
overlappedPercent: '重叠百分比',
|
||||
searchMethod: '搜索方法',
|
||||
filenameEmbdWeight: '文件名嵌入权重',
|
||||
begin: '文件',
|
||||
parserMethod: '解析方法',
|
||||
systemPrompt: '系统提示词',
|
||||
systemPromptPlaceholder:
|
||||
'请输入用于图像分析的系统提示词,若为空则使用系统缺省值',
|
||||
exportJson: '导出 JSON',
|
||||
viewResult: '查看结果',
|
||||
running: '运行中',
|
||||
summary: '增强上下文',
|
||||
keywords: '关键词',
|
||||
questions: '问题',
|
||||
metadata: '元数据',
|
||||
fieldName: '结果目的地',
|
||||
prompts: {
|
||||
system: {
|
||||
keywords: `角色
|
||||
你是一名文本分析员。
|
||||
|
||||
任务
|
||||
从给定的文本内容中提取最重要的关键词/短语。
|
||||
|
||||
要求
|
||||
- 总结文本内容,并给出最重要的5个关键词/短语。
|
||||
- 关键词必须与给定的文本内容使用相同的语言。
|
||||
- 关键词之间用英文逗号分隔。
|
||||
- 仅输出关键词。`,
|
||||
questions: `角色
|
||||
你是一名文本分析员。
|
||||
|
||||
任务
|
||||
针对给定的文本内容提出3个问题。
|
||||
|
||||
要求
|
||||
- 理解并总结文本内容,并提出最重要的3个问题。
|
||||
- 问题的含义不应重叠。
|
||||
- 问题应尽可能涵盖文本的主要内容。
|
||||
- 问题必须与给定的文本内容使用相同的语言。
|
||||
- 每行一个问题。
|
||||
- 仅输出问题。`,
|
||||
summary: `扮演一个精准的摘要者。你的任务是为提供的内容创建一个简洁且忠实于原文的摘要。
|
||||
|
||||
关键说明:
|
||||
1. 准确性:摘要必须严格基于所提供的信息。请勿引入任何未明确说明的新事实、结论或解释。
|
||||
2. 语言:摘要必须使用与原文相同的语言。
|
||||
3. 客观性:不带偏见地呈现要点,保留内容的原始意图和语气。请勿进行编辑。
|
||||
4. 简洁性:专注于最重要的思想,省略细节和多余的内容。`,
|
||||
metadata: `从给定内容中提取重要的结构化信息。仅输出有效的 JSON 字符串,不包含任何附加文本。如果未找到重要的结构化信息,则输出一个空的 JSON 对象:{}。
|
||||
|
||||
重要的结构化信息可能包括:姓名、日期、地点、事件、关键事实、数字数据或其他可提取实体。`,
|
||||
},
|
||||
user: {
|
||||
keywords: `文本内容
|
||||
[在此处插入文本]`,
|
||||
questions: `文本内容
|
||||
[在此处插入文本]`,
|
||||
summary: `要总结的文本:
|
||||
[在此处插入文本]`,
|
||||
metadata: `内容:[在此处插入内容]`,
|
||||
},
|
||||
},
|
||||
cancel: '取消',
|
||||
filenameEmbeddingWeight: '文件名嵌入权重',
|
||||
switchPromptMessage: '提示词将发生变化,请确认是否放弃已有提示词?',
|
||||
},
|
||||
datasetOverview: {
|
||||
downloadTip: '正在从数据源下载文件。',
|
||||
processingTip: '正在由数据流处理文件。',
|
||||
totalFiles: '文件总数',
|
||||
downloading: '正在下载',
|
||||
processing: '正在处理',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user