Feat: Move the pipeline translation field to flow #9869 (#10697)

### What problem does this PR solve?

Feat: Move the pipeline translation field to flow #9869

### Type of change


- [X] New Feature (non-breaking change which adds functionality)
This commit is contained in:
balibabu
2025-10-21 15:23:37 +08:00
committed by GitHub
parent 41a647fe32
commit 544c9990e3
18 changed files with 234 additions and 250 deletions

View File

@ -1605,6 +1605,119 @@ This delimiter is used to split the input text into several text pieces echo of
ceateAgent: 'Agent flow',
createPipeline: 'Ingestion pipeline',
chooseAgentType: 'Choose Agent Type',
parser: 'Parser',
parserDescription:
'Extracts raw text and structure from files for downstream processing.',
tokenizer: 'Indexer',
tokenizerRequired: 'Please add the Indexer node first',
tokenizerDescription:
'Transforms text into the required data structure (e.g., vector embeddings for Embedding Search) depending on the chosen search method.',
splitter: 'Token',
splitterDescription:
'Split text into chunks by token length with optional delimiters and overlap.',
hierarchicalMergerDescription:
'Split documents into sections by title hierarchy with regex rules for finer control.',
hierarchicalMerger: 'Title',
extractor: 'Transformer',
extractorDescription:
'Use an LLM to extract structured insights from document chunks—such as summaries, classifications, etc.',
outputFormat: 'Output format',
fileFormats: 'File format',
fileFormatOptions: {
pdf: 'PDF',
spreadsheet: 'Spreadsheet',
image: 'Image',
email: 'Email',
'text&markdown': 'Text & Markup',
word: 'Word',
slides: 'PPT',
audio: 'Audio',
},
fields: 'Field',
addParser: 'Add Parser',
hierarchy: 'Hierarchy',
regularExpressions: 'Regular Expressions',
overlappedPercent: 'Overlapped percent (%)',
searchMethod: 'Search method',
searchMethodTip: `Defines how the content can be searched — by full-text, embedding, or both.
The Indexer will store the content in the corresponding data structures for the selected methods.`,
// file: 'File',
parserMethod: 'Parsing method',
// systemPrompt: 'System Prompt',
systemPromptPlaceholder:
'Enter system prompt for image analysis, if empty the system default value will be used',
exportJson: 'Export JSON',
viewResult: 'View result',
running: 'Running',
summary: 'Summary',
keywords: 'Keywords',
questions: 'Questions',
metadata: 'Metadata',
fieldName: 'Result destination',
prompts: {
system: {
keywords: `Role
You are a text analyzer.
Task
Extract the most important keywords/phrases of a given piece of text content.
Requirements
- Summarize the text content, and give the top 5 important keywords/phrases.
- The keywords MUST be in the same language as the given piece of text content.
- The keywords are delimited by ENGLISH COMMA.
- Output keywords ONLY.`,
questions: `Role
You are a text analyzer.
Task
Propose 3 questions about a given piece of text content.
Requirements
- Understand and summarize the text content, and propose the top 3 important questions.
- The questions SHOULD NOT have overlapping meanings.
- The questions SHOULD cover the main content of the text as much as possible.
- The questions MUST be in the same language as the given piece of text content.
- One question per line.
- Output questions ONLY.`,
summary: `Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.
Key Instructions:
1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.
2. Language: Write the summary in the same language as the source text.
3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.
4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.`,
metadata: `Extract important structured information from the given content. Output ONLY a valid JSON string with no additional text. If no important structured information is found, output an empty JSON object: {}.
Important structured information may include: names, dates, locations, events, key facts, numerical data, or other extractable entities.`,
},
user: {
keywords: `Text Content
[Insert text here]`,
questions: `Text Content
[Insert text here]`,
summary: `Text to Summarize:
[Insert text here]`,
metadata: `Content: [INSERT CONTENT HERE]`,
},
},
cancel: 'Cancel',
swicthPromptMessage:
'The prompt word will change. Please confirm whether to abandon the existing prompt word?',
tokenizerSearchMethodOptions: {
full_text: 'Full-text',
embedding: 'Embedding',
},
filenameEmbeddingWeight: 'Filename embedding weight',
tokenizerFieldsOptions: {
text: 'Processed Text',
keywords: 'Keywords',
questions: 'Questions',
summary: 'Augmented Context',
},
imageParseMethodOptions: {
ocr: 'OCR',
},
},
llmTools: {
bad_calculator: {
@ -1705,125 +1818,6 @@ This delimiter is used to split the input text into several text pieces echo of
<p>Are you sure you want to proceed?</p> `,
unlinkPipelineModalConfirmText: 'Unlink',
},
dataflow: {
parser: 'Parser',
parserDescription:
'Extracts raw text and structure from files for downstream processing.',
tokenizer: 'Indexer',
tokenizerRequired: 'Please add the Indexer node first',
tokenizerDescription:
'Transforms text into the required data structure (e.g., vector embeddings for Embedding Search) depending on the chosen search method.',
splitter: 'Token',
splitterDescription:
'Split text into chunks by token length with optional delimiters and overlap.',
hierarchicalMergerDescription:
'Split documents into sections by title hierarchy with regex rules for finer control.',
hierarchicalMerger: 'Title',
extractor: 'Transformer',
extractorDescription:
'Use an LLM to extract structured insights from document chunks—such as summaries, classifications, etc.',
outputFormat: 'Output format',
lang: 'Language',
fileFormats: 'File format',
fileFormatOptions: {
pdf: 'PDF',
spreadsheet: 'Spreadsheet',
image: 'Image',
email: 'Email',
'text&markdown': 'Text & Markup',
word: 'Word',
slides: 'PPT',
audio: 'Audio',
},
fields: 'Field',
addParser: 'Add Parser',
hierarchy: 'Hierarchy',
regularExpressions: 'Regular Expressions',
overlappedPercent: 'Overlapped percent (%)',
searchMethod: 'Search method',
searchMethodTip: `Defines how the content can be searched — by full-text, embedding, or both.
The Indexer will store the content in the corresponding data structures for the selected methods.`,
begin: 'File',
parserMethod: 'Parsing method',
systemPrompt: 'System Prompt',
systemPromptPlaceholder:
'Enter system prompt for image analysis, if empty the system default value will be used',
exportJson: 'Export JSON',
viewResult: 'View result',
running: 'Running',
summary: 'Summary',
keywords: 'Keywords',
questions: 'Questions',
metadata: 'Metadata',
fieldName: 'Result destination',
prompts: {
system: {
keywords: `Role
You are a text analyzer.
Task
Extract the most important keywords/phrases of a given piece of text content.
Requirements
- Summarize the text content, and give the top 5 important keywords/phrases.
- The keywords MUST be in the same language as the given piece of text content.
- The keywords are delimited by ENGLISH COMMA.
- Output keywords ONLY.`,
questions: `Role
You are a text analyzer.
Task
Propose 3 questions about a given piece of text content.
Requirements
- Understand and summarize the text content, and propose the top 3 important questions.
- The questions SHOULD NOT have overlapping meanings.
- The questions SHOULD cover the main content of the text as much as possible.
- The questions MUST be in the same language as the given piece of text content.
- One question per line.
- Output questions ONLY.`,
summary: `Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.
Key Instructions:
1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.
2. Language: Write the summary in the same language as the source text.
3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.
4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.`,
metadata: `Extract important structured information from the given content. Output ONLY a valid JSON string with no additional text. If no important structured information is found, output an empty JSON object: {}.
Important structured information may include: names, dates, locations, events, key facts, numerical data, or other extractable entities.`,
},
user: {
keywords: `Text Content
[Insert text here]`,
questions: `Text Content
[Insert text here]`,
summary: `Text to Summarize:
[Insert text here]`,
metadata: `Content: [INSERT CONTENT HERE]`,
},
},
cancel: 'Cancel',
swicthPromptMessage:
'The prompt word will change. Please confirm whether to abandon the existing prompt word?',
tokenizerSearchMethodOptions: {
full_text: 'Full-text',
embedding: 'Embedding',
},
filenameEmbeddingWeight: 'Filename embedding weight',
tokenizerFieldsOptions: {
text: 'Processed Text',
keywords: 'Keywords',
questions: 'Questions',
summary: 'Augmented Context',
},
imageParseMethodOptions: {
ocr: 'OCR',
},
note: 'Note',
noteDescription: 'Note',
notePlaceholder: 'Please enter a note',
},
datasetOverview: {
downloadTip: 'Files being downloaded from data sources. ',
processingTip: 'Files being processed by Ingestion pipeline.',

View File

@ -1511,6 +1511,93 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
createFromTemplate: '从模板创建',
importJsonFile: '导入 JSON 文件',
chooseAgentType: '选择智能体类型',
parser: '解析器',
parserDescription: '从文件中提取原始文本和结构以供下游处理。',
tokenizer: '分词器',
tokenizerRequired: '请先添加Tokenizer节点',
tokenizerDescription:
'根据所选的搜索方法,将文本转换为所需的数据结构(例如,用于嵌入搜索的向量嵌入)。',
splitter: '按字符分割',
splitterDescription:
'根据分词器长度将文本拆分成块,并带有可选的分隔符和重叠。',
hierarchicalMergerDescription:
'使用正则表达式规则按标题层次结构将文档拆分成多个部分,以实现更精细的控制。',
hierarchicalMerger: '按标题分割',
extractor: '提取器',
extractorDescription:
'使用 LLM 从文档块(例如摘要、分类等)中提取结构化见解。',
outputFormat: '输出格式',
fileFormats: '文件格式',
fields: '字段',
addParser: '增加解析器',
hierarchy: '层次结构',
regularExpressions: '正则表达式',
overlappedPercent: '重叠百分比(%',
searchMethod: '搜索方法',
searchMethodTip: `决定该数据集启用的搜索方式,可选择全文、向量,或两者兼有。
Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
filenameEmbdWeight: '文件名嵌入权重',
parserMethod: '解析方法',
systemPromptPlaceholder:
'请输入用于图像分析的系统提示词,若为空则使用系统缺省值',
exportJson: '导出 JSON',
viewResult: '查看结果',
running: '运行中',
summary: '增强上下文',
keywords: '关键词',
questions: '问题',
metadata: '元数据',
fieldName: '结果目的地',
prompts: {
system: {
keywords: `角色
你是一名文本分析员。
任务
从给定的文本内容中提取最重要的关键词/短语。
要求
- 总结文本内容并给出最重要的5个关键词/短语。
- 关键词必须与给定的文本内容使用相同的语言。
- 关键词之间用英文逗号分隔。
- 仅输出关键词。`,
questions: `角色
你是一名文本分析员。
任务
针对给定的文本内容提出3个问题。
要求
- 理解并总结文本内容并提出最重要的3个问题。
- 问题的含义不应重叠。
- 问题应尽可能涵盖文本的主要内容。
- 问题必须与给定的文本内容使用相同的语言。
- 每行一个问题。
- 仅输出问题。`,
summary: `扮演一个精准的摘要者。你的任务是为提供的内容创建一个简洁且忠实于原文的摘要。
关键说明:
1. 准确性:摘要必须严格基于所提供的信息。请勿引入任何未明确说明的新事实、结论或解释。
2. 语言:摘要必须使用与原文相同的语言。
3. 客观性:不带偏见地呈现要点,保留内容的原始意图和语气。请勿进行编辑。
4. 简洁性:专注于最重要的思想,省略细节和多余的内容。`,
metadata: `从给定内容中提取重要的结构化信息。仅输出有效的 JSON 字符串,不包含任何附加文本。如果未找到重要的结构化信息,则输出一个空的 JSON 对象:{}。
重要的结构化信息可能包括:姓名、日期、地点、事件、关键事实、数字数据或其他可提取实体。`,
},
user: {
keywords: `文本内容
[在此处插入文本]`,
questions: `文本内容
[在此处插入文本]`,
summary: `要总结的文本:
[在此处插入文本]`,
metadata: `内容:[在此处插入内容]`,
},
},
cancel: '取消',
filenameEmbeddingWeight: '文件名嵌入权重',
switchPromptMessage: '提示词将发生变化,请确认是否放弃已有提示词?',
},
footer: {
profile: 'All rights reserved @ React',
@ -1618,101 +1705,6 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
<p>你确定要继续吗?</p> `,
unlinkPipelineModalConfirmText: '解绑',
},
dataflow: {
parser: '解析器',
parserDescription: '从文件中提取原始文本和结构以供下游处理。',
tokenizer: '分词器',
tokenizerRequired: '请先添加Tokenizer节点',
tokenizerDescription:
'根据所选的搜索方法,将文本转换为所需的数据结构(例如,用于嵌入搜索的向量嵌入)。',
splitter: '按字符分割',
splitterDescription:
'根据分词器长度将文本拆分成块,并带有可选的分隔符和重叠。',
hierarchicalMergerDescription:
'使用正则表达式规则按标题层次结构将文档拆分成多个部分,以实现更精细的控制。',
hierarchicalMerger: '按标题分割',
extractor: '提取器',
extractorDescription:
'使用 LLM 从文档块(例如摘要、分类等)中提取结构化见解。',
outputFormat: '输出格式',
lang: '语言',
fileFormats: '文件格式',
fields: '字段',
addParser: '增加解析器',
hierarchy: '层次结构',
regularExpressions: '正则表达式',
overlappedPercent: '重叠百分比(%',
searchMethod: '搜索方法',
searchMethodTip: `决定该数据集启用的搜索方式,可选择全文、向量,或两者兼有。
Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
filenameEmbdWeight: '文件名嵌入权重',
begin: '文件',
parserMethod: '解析方法',
systemPrompt: '系统提示词',
systemPromptPlaceholder:
'请输入用于图像分析的系统提示词,若为空则使用系统缺省值',
exportJson: '导出 JSON',
viewResult: '查看结果',
running: '运行中',
summary: '增强上下文',
keywords: '关键词',
questions: '问题',
metadata: '元数据',
fieldName: '结果目的地',
prompts: {
system: {
keywords: `角色
你是一名文本分析员。
任务
从给定的文本内容中提取最重要的关键词/短语。
要求
- 总结文本内容并给出最重要的5个关键词/短语。
- 关键词必须与给定的文本内容使用相同的语言。
- 关键词之间用英文逗号分隔。
- 仅输出关键词。`,
questions: `角色
你是一名文本分析员。
任务
针对给定的文本内容提出3个问题。
要求
- 理解并总结文本内容并提出最重要的3个问题。
- 问题的含义不应重叠。
- 问题应尽可能涵盖文本的主要内容。
- 问题必须与给定的文本内容使用相同的语言。
- 每行一个问题。
- 仅输出问题。`,
summary: `扮演一个精准的摘要者。你的任务是为提供的内容创建一个简洁且忠实于原文的摘要。
关键说明:
1. 准确性:摘要必须严格基于所提供的信息。请勿引入任何未明确说明的新事实、结论或解释。
2. 语言:摘要必须使用与原文相同的语言。
3. 客观性:不带偏见地呈现要点,保留内容的原始意图和语气。请勿进行编辑。
4. 简洁性:专注于最重要的思想,省略细节和多余的内容。`,
metadata: `从给定内容中提取重要的结构化信息。仅输出有效的 JSON 字符串,不包含任何附加文本。如果未找到重要的结构化信息,则输出一个空的 JSON 对象:{}。
重要的结构化信息可能包括:姓名、日期、地点、事件、关键事实、数字数据或其他可提取实体。`,
},
user: {
keywords: `文本内容
[在此处插入文本]`,
questions: `文本内容
[在此处插入文本]`,
summary: `要总结的文本:
[在此处插入文本]`,
metadata: `内容:[在此处插入内容]`,
},
},
cancel: '取消',
filenameEmbeddingWeight: '文件名嵌入权重',
switchPromptMessage: '提示词将发生变化,请确认是否放弃已有提示词?',
note: '注释',
noteDescription: '注释',
notePlaceholder: '请输入注释',
},
datasetOverview: {
downloadTip: '正在从数据源下载文件。',
processingTip: '正在由pipeline处理文件。',