diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index d0ce59023..c5e5de483 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -1605,6 +1605,119 @@ This delimiter is used to split the input text into several text pieces echo of ceateAgent: 'Agent flow', createPipeline: 'Ingestion pipeline', chooseAgentType: 'Choose Agent Type', + parser: 'Parser', + parserDescription: + 'Extracts raw text and structure from files for downstream processing.', + tokenizer: 'Indexer', + tokenizerRequired: 'Please add the Indexer node first', + tokenizerDescription: + 'Transforms text into the required data structure (e.g., vector embeddings for Embedding Search) depending on the chosen search method.', + splitter: 'Token', + splitterDescription: + 'Split text into chunks by token length with optional delimiters and overlap.', + hierarchicalMergerDescription: + 'Split documents into sections by title hierarchy with regex rules for finer control.', + hierarchicalMerger: 'Title', + extractor: 'Transformer', + extractorDescription: + 'Use an LLM to extract structured insights from document chunks—such as summaries, classifications, etc.', + outputFormat: 'Output format', + fileFormats: 'File format', + fileFormatOptions: { + pdf: 'PDF', + spreadsheet: 'Spreadsheet', + image: 'Image', + email: 'Email', + 'text&markdown': 'Text & Markup', + word: 'Word', + slides: 'PPT', + audio: 'Audio', + }, + fields: 'Field', + addParser: 'Add Parser', + hierarchy: 'Hierarchy', + regularExpressions: 'Regular Expressions', + overlappedPercent: 'Overlapped percent (%)', + searchMethod: 'Search method', + searchMethodTip: `Defines how the content can be searched — by full-text, embedding, or both. +The Indexer will store the content in the corresponding data structures for the selected methods.`, + // file: 'File', + parserMethod: 'Parsing method', + // systemPrompt: 'System Prompt', + systemPromptPlaceholder: + 'Enter system prompt for image analysis, if empty the system default value will be used', + exportJson: 'Export JSON', + viewResult: 'View result', + running: 'Running', + summary: 'Summary', + keywords: 'Keywords', + questions: 'Questions', + metadata: 'Metadata', + fieldName: 'Result destination', + prompts: { + system: { + keywords: `Role +You are a text analyzer. + +Task +Extract the most important keywords/phrases of a given piece of text content. + +Requirements +- Summarize the text content, and give the top 5 important keywords/phrases. +- The keywords MUST be in the same language as the given piece of text content. +- The keywords are delimited by ENGLISH COMMA. +- Output keywords ONLY.`, + questions: `Role +You are a text analyzer. + +Task +Propose 3 questions about a given piece of text content. + +Requirements +- Understand and summarize the text content, and propose the top 3 important questions. +- The questions SHOULD NOT have overlapping meanings. +- The questions SHOULD cover the main content of the text as much as possible. +- The questions MUST be in the same language as the given piece of text content. +- One question per line. +- Output questions ONLY.`, + summary: `Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original. + +Key Instructions: +1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated. +2. Language: Write the summary in the same language as the source text. +3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize. +4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.`, + metadata: `Extract important structured information from the given content. Output ONLY a valid JSON string with no additional text. If no important structured information is found, output an empty JSON object: {}. + +Important structured information may include: names, dates, locations, events, key facts, numerical data, or other extractable entities.`, + }, + user: { + keywords: `Text Content +[Insert text here]`, + questions: `Text Content +[Insert text here]`, + summary: `Text to Summarize: +[Insert text here]`, + metadata: `Content: [INSERT CONTENT HERE]`, + }, + }, + cancel: 'Cancel', + swicthPromptMessage: + 'The prompt word will change. Please confirm whether to abandon the existing prompt word?', + tokenizerSearchMethodOptions: { + full_text: 'Full-text', + embedding: 'Embedding', + }, + filenameEmbeddingWeight: 'Filename embedding weight', + tokenizerFieldsOptions: { + text: 'Processed Text', + keywords: 'Keywords', + questions: 'Questions', + summary: 'Augmented Context', + }, + imageParseMethodOptions: { + ocr: 'OCR', + }, }, llmTools: { bad_calculator: { @@ -1705,125 +1818,6 @@ This delimiter is used to split the input text into several text pieces echo of

Are you sure you want to proceed?

`, unlinkPipelineModalConfirmText: 'Unlink', }, - dataflow: { - parser: 'Parser', - parserDescription: - 'Extracts raw text and structure from files for downstream processing.', - tokenizer: 'Indexer', - tokenizerRequired: 'Please add the Indexer node first', - tokenizerDescription: - 'Transforms text into the required data structure (e.g., vector embeddings for Embedding Search) depending on the chosen search method.', - splitter: 'Token', - splitterDescription: - 'Split text into chunks by token length with optional delimiters and overlap.', - hierarchicalMergerDescription: - 'Split documents into sections by title hierarchy with regex rules for finer control.', - hierarchicalMerger: 'Title', - extractor: 'Transformer', - extractorDescription: - 'Use an LLM to extract structured insights from document chunks—such as summaries, classifications, etc.', - outputFormat: 'Output format', - lang: 'Language', - fileFormats: 'File format', - fileFormatOptions: { - pdf: 'PDF', - spreadsheet: 'Spreadsheet', - image: 'Image', - email: 'Email', - 'text&markdown': 'Text & Markup', - word: 'Word', - slides: 'PPT', - audio: 'Audio', - }, - fields: 'Field', - addParser: 'Add Parser', - hierarchy: 'Hierarchy', - regularExpressions: 'Regular Expressions', - overlappedPercent: 'Overlapped percent (%)', - searchMethod: 'Search method', - searchMethodTip: `Defines how the content can be searched — by full-text, embedding, or both. -The Indexer will store the content in the corresponding data structures for the selected methods.`, - begin: 'File', - parserMethod: 'Parsing method', - systemPrompt: 'System Prompt', - systemPromptPlaceholder: - 'Enter system prompt for image analysis, if empty the system default value will be used', - exportJson: 'Export JSON', - viewResult: 'View result', - running: 'Running', - summary: 'Summary', - keywords: 'Keywords', - questions: 'Questions', - metadata: 'Metadata', - fieldName: 'Result destination', - prompts: { - system: { - keywords: `Role -You are a text analyzer. - -Task -Extract the most important keywords/phrases of a given piece of text content. - -Requirements -- Summarize the text content, and give the top 5 important keywords/phrases. -- The keywords MUST be in the same language as the given piece of text content. -- The keywords are delimited by ENGLISH COMMA. -- Output keywords ONLY.`, - questions: `Role -You are a text analyzer. - -Task -Propose 3 questions about a given piece of text content. - -Requirements -- Understand and summarize the text content, and propose the top 3 important questions. -- The questions SHOULD NOT have overlapping meanings. -- The questions SHOULD cover the main content of the text as much as possible. -- The questions MUST be in the same language as the given piece of text content. -- One question per line. -- Output questions ONLY.`, - summary: `Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original. - -Key Instructions: -1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated. -2. Language: Write the summary in the same language as the source text. -3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize. -4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.`, - metadata: `Extract important structured information from the given content. Output ONLY a valid JSON string with no additional text. If no important structured information is found, output an empty JSON object: {}. - -Important structured information may include: names, dates, locations, events, key facts, numerical data, or other extractable entities.`, - }, - user: { - keywords: `Text Content -[Insert text here]`, - questions: `Text Content -[Insert text here]`, - summary: `Text to Summarize: -[Insert text here]`, - metadata: `Content: [INSERT CONTENT HERE]`, - }, - }, - cancel: 'Cancel', - swicthPromptMessage: - 'The prompt word will change. Please confirm whether to abandon the existing prompt word?', - tokenizerSearchMethodOptions: { - full_text: 'Full-text', - embedding: 'Embedding', - }, - filenameEmbeddingWeight: 'Filename embedding weight', - tokenizerFieldsOptions: { - text: 'Processed Text', - keywords: 'Keywords', - questions: 'Questions', - summary: 'Augmented Context', - }, - imageParseMethodOptions: { - ocr: 'OCR', - }, - note: 'Note', - noteDescription: 'Note', - notePlaceholder: 'Please enter a note', - }, datasetOverview: { downloadTip: 'Files being downloaded from data sources. ', processingTip: 'Files being processed by Ingestion pipeline.', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 8c5bbf939..e5c117521 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -1511,6 +1511,93 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 createFromTemplate: '从模板创建', importJsonFile: '导入 JSON 文件', chooseAgentType: '选择智能体类型', + parser: '解析器', + parserDescription: '从文件中提取原始文本和结构以供下游处理。', + tokenizer: '分词器', + tokenizerRequired: '请先添加Tokenizer节点', + tokenizerDescription: + '根据所选的搜索方法,将文本转换为所需的数据结构(例如,用于嵌入搜索的向量嵌入)。', + splitter: '按字符分割', + splitterDescription: + '根据分词器长度将文本拆分成块,并带有可选的分隔符和重叠。', + hierarchicalMergerDescription: + '使用正则表达式规则按标题层次结构将文档拆分成多个部分,以实现更精细的控制。', + hierarchicalMerger: '按标题分割', + extractor: '提取器', + extractorDescription: + '使用 LLM 从文档块(例如摘要、分类等)中提取结构化见解。', + outputFormat: '输出格式', + fileFormats: '文件格式', + fields: '字段', + addParser: '增加解析器', + hierarchy: '层次结构', + regularExpressions: '正则表达式', + overlappedPercent: '重叠百分比(%)', + searchMethod: '搜索方法', + searchMethodTip: `决定该数据集启用的搜索方式,可选择全文、向量,或两者兼有。 +Tokenizer 会根据所选方式将内容存储为对应的数据结构。`, + filenameEmbdWeight: '文件名嵌入权重', + parserMethod: '解析方法', + systemPromptPlaceholder: + '请输入用于图像分析的系统提示词,若为空则使用系统缺省值', + exportJson: '导出 JSON', + viewResult: '查看结果', + running: '运行中', + summary: '增强上下文', + keywords: '关键词', + questions: '问题', + metadata: '元数据', + fieldName: '结果目的地', + prompts: { + system: { + keywords: `角色 +你是一名文本分析员。 + +任务 +从给定的文本内容中提取最重要的关键词/短语。 + +要求 +- 总结文本内容,并给出最重要的5个关键词/短语。 +- 关键词必须与给定的文本内容使用相同的语言。 +- 关键词之间用英文逗号分隔。 +- 仅输出关键词。`, + questions: `角色 +你是一名文本分析员。 + +任务 +针对给定的文本内容提出3个问题。 + +要求 +- 理解并总结文本内容,并提出最重要的3个问题。 +- 问题的含义不应重叠。 +- 问题应尽可能涵盖文本的主要内容。 +- 问题必须与给定的文本内容使用相同的语言。 +- 每行一个问题。 +- 仅输出问题。`, + summary: `扮演一个精准的摘要者。你的任务是为提供的内容创建一个简洁且忠实于原文的摘要。 + +关键说明: +1. 准确性:摘要必须严格基于所提供的信息。请勿引入任何未明确说明的新事实、结论或解释。 +2. 语言:摘要必须使用与原文相同的语言。 +3. 客观性:不带偏见地呈现要点,保留内容的原始意图和语气。请勿进行编辑。 +4. 简洁性:专注于最重要的思想,省略细节和多余的内容。`, + metadata: `从给定内容中提取重要的结构化信息。仅输出有效的 JSON 字符串,不包含任何附加文本。如果未找到重要的结构化信息,则输出一个空的 JSON 对象:{}。 + +重要的结构化信息可能包括:姓名、日期、地点、事件、关键事实、数字数据或其他可提取实体。`, + }, + user: { + keywords: `文本内容 +[在此处插入文本]`, + questions: `文本内容 +[在此处插入文本]`, + summary: `要总结的文本: +[在此处插入文本]`, + metadata: `内容:[在此处插入内容]`, + }, + }, + cancel: '取消', + filenameEmbeddingWeight: '文件名嵌入权重', + switchPromptMessage: '提示词将发生变化,请确认是否放弃已有提示词?', }, footer: { profile: 'All rights reserved @ React', @@ -1618,101 +1705,6 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于

你确定要继续吗?

`, unlinkPipelineModalConfirmText: '解绑', }, - dataflow: { - parser: '解析器', - parserDescription: '从文件中提取原始文本和结构以供下游处理。', - tokenizer: '分词器', - tokenizerRequired: '请先添加Tokenizer节点', - tokenizerDescription: - '根据所选的搜索方法,将文本转换为所需的数据结构(例如,用于嵌入搜索的向量嵌入)。', - splitter: '按字符分割', - splitterDescription: - '根据分词器长度将文本拆分成块,并带有可选的分隔符和重叠。', - hierarchicalMergerDescription: - '使用正则表达式规则按标题层次结构将文档拆分成多个部分,以实现更精细的控制。', - hierarchicalMerger: '按标题分割', - extractor: '提取器', - extractorDescription: - '使用 LLM 从文档块(例如摘要、分类等)中提取结构化见解。', - outputFormat: '输出格式', - lang: '语言', - fileFormats: '文件格式', - fields: '字段', - addParser: '增加解析器', - hierarchy: '层次结构', - regularExpressions: '正则表达式', - overlappedPercent: '重叠百分比(%)', - searchMethod: '搜索方法', - searchMethodTip: `决定该数据集启用的搜索方式,可选择全文、向量,或两者兼有。 -Tokenizer 会根据所选方式将内容存储为对应的数据结构。`, - filenameEmbdWeight: '文件名嵌入权重', - begin: '文件', - parserMethod: '解析方法', - systemPrompt: '系统提示词', - systemPromptPlaceholder: - '请输入用于图像分析的系统提示词,若为空则使用系统缺省值', - exportJson: '导出 JSON', - viewResult: '查看结果', - running: '运行中', - summary: '增强上下文', - keywords: '关键词', - questions: '问题', - metadata: '元数据', - fieldName: '结果目的地', - prompts: { - system: { - keywords: `角色 -你是一名文本分析员。 - -任务 -从给定的文本内容中提取最重要的关键词/短语。 - -要求 -- 总结文本内容,并给出最重要的5个关键词/短语。 -- 关键词必须与给定的文本内容使用相同的语言。 -- 关键词之间用英文逗号分隔。 -- 仅输出关键词。`, - questions: `角色 -你是一名文本分析员。 - -任务 -针对给定的文本内容提出3个问题。 - -要求 -- 理解并总结文本内容,并提出最重要的3个问题。 -- 问题的含义不应重叠。 -- 问题应尽可能涵盖文本的主要内容。 -- 问题必须与给定的文本内容使用相同的语言。 -- 每行一个问题。 -- 仅输出问题。`, - summary: `扮演一个精准的摘要者。你的任务是为提供的内容创建一个简洁且忠实于原文的摘要。 - -关键说明: -1. 准确性:摘要必须严格基于所提供的信息。请勿引入任何未明确说明的新事实、结论或解释。 -2. 语言:摘要必须使用与原文相同的语言。 -3. 客观性:不带偏见地呈现要点,保留内容的原始意图和语气。请勿进行编辑。 -4. 简洁性:专注于最重要的思想,省略细节和多余的内容。`, - metadata: `从给定内容中提取重要的结构化信息。仅输出有效的 JSON 字符串,不包含任何附加文本。如果未找到重要的结构化信息,则输出一个空的 JSON 对象:{}。 - -重要的结构化信息可能包括:姓名、日期、地点、事件、关键事实、数字数据或其他可提取实体。`, - }, - user: { - keywords: `文本内容 -[在此处插入文本]`, - questions: `文本内容 -[在此处插入文本]`, - summary: `要总结的文本: -[在此处插入文本]`, - metadata: `内容:[在此处插入内容]`, - }, - }, - cancel: '取消', - filenameEmbeddingWeight: '文件名嵌入权重', - switchPromptMessage: '提示词将发生变化,请确认是否放弃已有提示词?', - note: '注释', - noteDescription: '注释', - notePlaceholder: '请输入注释', - }, datasetOverview: { downloadTip: '正在从数据源下载文件。', processingTip: '正在由pipeline处理文件。', diff --git a/web/src/pages/agent/canvas/node/file-node.tsx b/web/src/pages/agent/canvas/node/file-node.tsx index 41e0b2507..d868d70fa 100644 --- a/web/src/pages/agent/canvas/node/file-node.tsx +++ b/web/src/pages/agent/canvas/node/file-node.tsx @@ -36,7 +36,7 @@ function InnerFileNode({ data, id, selected }: NodeProps) {
- {t(`dataflow.begin`)} + {t(`flow.begin`)}
diff --git a/web/src/pages/agent/canvas/node/parser-node.tsx b/web/src/pages/agent/canvas/node/parser-node.tsx index 15539d0b8..d66c79c45 100644 --- a/web/src/pages/agent/canvas/node/parser-node.tsx +++ b/web/src/pages/agent/canvas/node/parser-node.tsx @@ -46,7 +46,7 @@ function ParserNode({ className="flex flex-col text-text-primary gap-1" > Parser {idx + 1} - {t(`dataflow.fileFormatOptions.${x.fileFormat}`)} + {t(`flow.fileFormatOptions.${x.fileFormat}`)} )} diff --git a/web/src/pages/agent/canvas/node/tokenizer-node.tsx b/web/src/pages/agent/canvas/node/tokenizer-node.tsx index 20b261bf4..830ababdd 100644 --- a/web/src/pages/agent/canvas/node/tokenizer-node.tsx +++ b/web/src/pages/agent/canvas/node/tokenizer-node.tsx @@ -38,12 +38,10 @@ function TokenizerNode({ > - - {t('dataflow.searchMethod')} - + {t('flow.searchMethod')}
    {data.form?.search_method.map((x) => ( -
  • {t(`dataflow.tokenizerSearchMethodOptions.${x}`)}
  • +
  • {t(`flow.tokenizerSearchMethodOptions.${x}`)}
  • ))}
diff --git a/web/src/pages/agent/form/extractor-form/index.tsx b/web/src/pages/agent/form/extractor-form/index.tsx index c178e8a46..391d8c09e 100644 --- a/web/src/pages/agent/form/extractor-form/index.tsx +++ b/web/src/pages/agent/form/extractor-form/index.tsx @@ -47,7 +47,7 @@ const ExtractorForm = ({ node }: INextOperatorForm) => { const promptOptions = useBuildNodeOutputOptions(node?.id); - const options = buildOptions(ContextGeneratorFieldName, t, 'dataflow'); + const options = buildOptions(ContextGeneratorFieldName, t, 'flow'); const { handleFieldNameChange, @@ -63,7 +63,7 @@ const ExtractorForm = ({ node }: INextOperatorForm) => {
- + {(field) => ( { @@ -93,7 +93,7 @@ const ExtractorForm = ({ node }: INextOperatorForm) => { {visible && ( ) { const setPromptValue = useCallback( (field: keyof ExtractorFormSchemaType, key: string, value: string) => { - form.setValue(field, t(`dataflow.prompts.${key}.${value}`), { + form.setValue(field, t(`flow.prompts.${key}.${value}`), { shouldDirty: true, shouldValidate: true, }); diff --git a/web/src/pages/agent/form/hierarchical-merger-form/index.tsx b/web/src/pages/agent/form/hierarchical-merger-form/index.tsx index 623530792..0083b92a4 100644 --- a/web/src/pages/agent/form/hierarchical-merger-form/index.tsx +++ b/web/src/pages/agent/form/hierarchical-merger-form/index.tsx @@ -98,7 +98,7 @@ export function RegularExpressions({ - {t('dataflow.regularExpressions')} + {t('flow.regularExpressions')}
{fields.map((field, index) => ( @@ -158,7 +158,7 @@ const HierarchicalMergerForm = ({ node }: INextOperatorForm) => { return ( - + {fields.map((field, index) => ( diff --git a/web/src/pages/agent/form/parser-form/common-form-fields.tsx b/web/src/pages/agent/form/parser-form/common-form-fields.tsx index a44b22a83..d26e51819 100644 --- a/web/src/pages/agent/form/parser-form/common-form-fields.tsx +++ b/web/src/pages/agent/form/parser-form/common-form-fields.tsx @@ -50,7 +50,7 @@ export function OutputFormatFormField({ return ( ); } @@ -92,7 +92,7 @@ export function LanguageFormField({ prefix }: CommonProps) { return ( {(field) => ( {(field) => ( -