Feature: Added data source functionality #10703 (#11046)

### What problem does this PR solve?

Feature: Added data source functionality

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
chanx
2025-11-06 11:53:46 +08:00
committed by GitHub
parent 15c75bbf15
commit f581a1c4e5
31 changed files with 2526 additions and 16 deletions

View File

@ -274,6 +274,9 @@ export default {
reRankModelWaring: 'Re-rank model is very time consuming.',
},
knowledgeConfiguration: {
dataSource: 'Data Source',
linkSourceSetTip: 'Manage data source linkage with this dataset',
linkDataSource: 'Link Data Source',
tocExtraction: 'TOC Enhance',
tocExtractionTip:
" For existing chunks, generate a hierarchical table of contents (one directory per file). During queries, when Directory Enhancement is activated, the system will use a large model to determine which directory items are relevant to the user's question, thereby identifying the relevant chunks.",
@ -680,6 +683,19 @@ This auto-tagging feature enhances retrieval by adding another layer of domain-s
tocEnhanceTip: ` During the parsing of the document, table of contents information was generated (see the 'Enable Table of Contents Extraction' option in the General method). This allows the large model to return table of contents items relevant to the user's query, thereby using these items to retrieve related chunks and apply weighting to these chunks during the sorting process. This approach is derived from mimicking the behavioral logic of how humans search for knowledge in books.`,
},
setting: {
errorMsg: 'Error message',
newDocs: 'New Docs',
timeStarted: 'Time started',
log: 'Log',
s3Description:
'Connect to your AWS S3 bucket to import and sync stored files.',
discordDescription:
'Link your Discord server to access and analyze chat data.',
notionDescription:
'Sync pages and databases from Notion for knowledge retrieval.',
availableSourcesDescription: 'Select a data source to add',
availableSources: 'Available Sources',
datasourceDescription: 'Manage your data source and connections',
save: 'Save',
search: 'Search',
availableModels: 'Available models',
@ -697,6 +713,7 @@ This auto-tagging feature enhances retrieval by adding another layer of domain-s
'Please enter your current password to change your password.',
model: 'Model providers',
systemModelDescription: 'Please complete these settings before beginning',
dataSources: 'Data Sources',
team: 'Team',
system: 'System',
logout: 'Log out',
@ -1837,12 +1854,16 @@ Important structured information may include: names, dates, locations, events, k
changeStepModalConfirmText: 'Switch Anyway',
changeStepModalCancelText: 'Cancel',
unlinkPipelineModalTitle: 'Unlink Ingestion pipeline',
unlinkPipelineModalConfirmText: 'Unlink',
unlinkPipelineModalContent: `
<p>Once unlinked, this Dataset will no longer be connected to the current Ingestion pipeline.</p>
<p>Files that are already being parsed will continue until completion</p>
<p>Files that are not yet parsed will no longer be processed</p> <br/>
<p>Are you sure you want to proceed?</p> `,
unlinkPipelineModalConfirmText: 'Unlink',
unlinkSourceModalTitle: 'Unlink data source',
unlinkSourceModalContent: `
<p>Are you sure to unlink this data source </p>`,
unlinkSourceModalConfirmText: 'Unlink',
},
datasetOverview: {
downloadTip: 'Files being downloaded from data sources. ',

View File

@ -260,6 +260,9 @@ export default {
theDocumentBeingParsedCannotBeDeleted: '正在解析的文档不能被删除',
},
knowledgeConfiguration: {
dataSource: '数据源',
linkSourceSetTip: '管理与此数据集的数据源链接',
linkDataSource: '链接数据源',
tocExtractionTip:
'对于已有的chunk生成层级结构的目录信息每个文件一个目录。在查询时激活`目录增强`后系统会用大模型去判断用户问题和哪些目录项相关从而找到相关的chunk。',
deleteGenerateModalContent: `
@ -671,6 +674,16 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
tocEnhanceTip: `解析文档时生成了目录信息见General方法的启用目录抽取让大模型返回和用户问题相关的目录项从而利用目录项拿到相关chunk对这些chunk在排序中进行加权。这种方法来源于模仿人类查询书本中知识的行为逻辑`,
},
setting: {
errorMsg: '错误信息',
newDocs: '新文档',
timeStarted: '开始时间',
log: '日志',
s3Description: ' 连接你的 AWS S3 存储桶以导入和同步文件。',
discordDescription: ' 连接你的 Discord 服务器以访问和分析聊天数据。',
notionDescription: ' 同步 Notion 页面与数据库,用于知识检索。',
availableSourcesDescription: '选择要添加的数据源',
availableSources: '可用数据源',
datasourceDescription: '管理您的数据源和连接',
save: '保存',
search: '搜索',
availableModels: '可选模型',
@ -688,6 +701,7 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
passwordDescription: '请输入您当前的密码以更改您的密码。',
model: '模型提供商',
systemModelDescription: '请在开始之前完成这些设置',
dataSources: '数据源',
team: '团队',
system: '系统',
logout: '登出',
@ -1731,6 +1745,10 @@ Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
<p>尚未解析的文件将不再被处理。</p> <br/>
<p>你确定要继续吗?</p> `,
unlinkPipelineModalConfirmText: '解绑',
unlinkSourceModalTitle: '取消链接数据源',
unlinkSourceModalContent: `
<p>您确定要取消链接此数据源吗?</p>`,
unlinkSourceModalConfirmText: '取消链接',
},
datasetOverview: {
downloadTip: '正在从数据源下载文件。',