mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-23 06:46:40 +08:00
Feat: Separate connectors from s3 (#12045)
### What problem does this PR solve? Feat: Separate connectors from s3 #12008 ### Type of change - [x] New Feature (non-breaking change which adds functionality) Overview: <img width="1500" alt="image" src="https://github.com/user-attachments/assets/d54fea7a-7294-4ec0-ab6c-9753b3f03a72" /> Oracle: <img width="350" alt="image" src="https://github.com/user-attachments/assets/bca140c1-33d8-4950-afdc-153407eedc46" />
This commit is contained in:
@ -124,6 +124,10 @@ class FileSource(StrEnum):
|
||||
MOODLE = "moodle"
|
||||
DROPBOX = "dropbox"
|
||||
BOX = "box"
|
||||
R2 = "r2"
|
||||
OCI_STORAGE = "oci_storage"
|
||||
GOOGLE_CLOUD_STORAGE = "google_cloud_storage"
|
||||
|
||||
|
||||
class PipelineTaskType(StrEnum):
|
||||
PARSE = "Parse"
|
||||
|
||||
@ -161,23 +161,59 @@ class SyncBase:
|
||||
def _get_source_prefix(self):
|
||||
return ""
|
||||
|
||||
|
||||
class S3(SyncBase):
|
||||
SOURCE_NAME: str = FileSource.S3
|
||||
class _BlobLikeBase(SyncBase):
|
||||
DEFAULT_BUCKET_TYPE: str = "s3"
|
||||
|
||||
async def _generate(self, task: dict):
|
||||
self.connector = BlobStorageConnector(bucket_type=self.conf.get("bucket_type", "s3"), bucket_name=self.conf["bucket_name"], prefix=self.conf.get("prefix", ""))
|
||||
bucket_type = self.conf.get("bucket_type", self.DEFAULT_BUCKET_TYPE)
|
||||
|
||||
self.connector = BlobStorageConnector(
|
||||
bucket_type=bucket_type,
|
||||
bucket_name=self.conf["bucket_name"],
|
||||
prefix=self.conf.get("prefix", ""),
|
||||
)
|
||||
self.connector.load_credentials(self.conf["credentials"])
|
||||
|
||||
document_batch_generator = (
|
||||
self.connector.load_from_state()
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]
|
||||
else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp())
|
||||
else self.connector.poll_source(
|
||||
task["poll_range_start"].timestamp(),
|
||||
datetime.now(timezone.utc).timestamp(),
|
||||
)
|
||||
)
|
||||
|
||||
begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"])
|
||||
logging.info("Connect to {}: {}(prefix/{}) {}".format(self.conf.get("bucket_type", "s3"), self.conf["bucket_name"], self.conf.get("prefix", ""), begin_info))
|
||||
begin_info = (
|
||||
"totally"
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]
|
||||
else "from {}".format(task["poll_range_start"])
|
||||
)
|
||||
|
||||
logging.info(
|
||||
"Connect to {}: {}(prefix/{}) {}".format(
|
||||
bucket_type,
|
||||
self.conf["bucket_name"],
|
||||
self.conf.get("prefix", ""),
|
||||
begin_info,
|
||||
)
|
||||
)
|
||||
return document_batch_generator
|
||||
|
||||
class S3(_BlobLikeBase):
|
||||
SOURCE_NAME: str = FileSource.S3
|
||||
DEFAULT_BUCKET_TYPE: str = "s3"
|
||||
|
||||
class R2(_BlobLikeBase):
|
||||
SOURCE_NAME: str = FileSource.R2
|
||||
DEFAULT_BUCKET_TYPE: str = "r2"
|
||||
|
||||
class OCI_STORAGE(_BlobLikeBase):
|
||||
SOURCE_NAME: str = FileSource.OCI_STORAGE
|
||||
DEFAULT_BUCKET_TYPE: str = "oci_storage"
|
||||
|
||||
class GOOGLE_CLOUD_STORAGE(_BlobLikeBase):
|
||||
SOURCE_NAME: str = FileSource.GOOGLE_CLOUD_STORAGE
|
||||
DEFAULT_BUCKET_TYPE: str = "google_cloud_storage"
|
||||
|
||||
class Confluence(SyncBase):
|
||||
SOURCE_NAME: str = FileSource.CONFLUENCE
|
||||
@ -705,6 +741,9 @@ class BOX(SyncBase):
|
||||
|
||||
func_factory = {
|
||||
FileSource.S3: S3,
|
||||
FileSource.R2: R2,
|
||||
FileSource.OCI_STORAGE: OCI_STORAGE,
|
||||
FileSource.GOOGLE_CLOUD_STORAGE: GOOGLE_CLOUD_STORAGE,
|
||||
FileSource.NOTION: Notion,
|
||||
FileSource.DISCORD: Discord,
|
||||
FileSource.CONFLUENCE: Confluence,
|
||||
|
||||
1
web/src/assets/svg/data-source/google-cloud-storage.svg
Normal file
1
web/src/assets/svg/data-source/google-cloud-storage.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="24px" height="24px" viewBox="0 0 24 24"><defs><style>.cls-1{fill:#aecbfa;}.cls-2{fill:#669df6;}.cls-3{fill:#4285f4;}.cls-4{fill:#fff;}</style></defs><title>Icon_24px_CloudStorage_Color</title><g data-name="Product Icons"><rect class="cls-1" x="2" y="4" width="20" height="7"/><rect class="cls-2" x="20" y="4" width="2" height="7"/><polygon class="cls-3" points="22 4 20 4 20 11 22 4"/><rect class="cls-2" x="2" y="4" width="2" height="7"/><rect class="cls-4" x="6" y="7" width="6" height="1"/><rect class="cls-4" x="15" y="6" width="3" height="3" rx="1.5"/><rect class="cls-1" x="2" y="13" width="20" height="7"/><rect class="cls-2" x="20" y="13" width="2" height="7"/><polygon class="cls-3" points="22 13 20 13 20 20 22 13"/><rect class="cls-2" x="2" y="13" width="2" height="7"/><rect class="cls-4" x="6" y="16" width="6" height="1"/><rect class="cls-4" x="15" y="15" width="3" height="3" rx="1.5"/></g></svg>
|
||||
|
After Width: | Height: | Size: 958 B |
1
web/src/assets/svg/data-source/oracle-storage.svg
Normal file
1
web/src/assets/svg/data-source/oracle-storage.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1333.31 843.16" shape-rendering="geometricPrecision" text-rendering="geometricPrecision" image-rendering="optimizeQuality" fill-rule="evenodd" clip-rule="evenodd"><path d="M421.65 843.16C188.89 843.16 0 654.74 0 421.91 0 189.09 188.89 0 421.65 0h490.08c232.83 0 421.58 189.09 421.58 421.91 0 232.83-188.75 421.25-421.58 421.25H421.65zm479.18-148.72c150.8 0 272.94-121.79 272.94-272.53 0-150.73-122.14-273.2-272.94-273.2H432.48c-150.73 0-272.94 122.47-272.94 273.2 0 150.74 122.2 272.53 272.94 272.53h468.35z" fill="#c74634" fill-rule="nonzero"/></svg>
|
||||
|
After Width: | Height: | Size: 604 B |
5
web/src/assets/svg/data-source/r2.svg
Normal file
5
web/src/assets/svg/data-source/r2.svg
Normal file
@ -0,0 +1,5 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" viewBox="0 0 122.88 55.57" style="enable-background:new 0 0 122.88 55.57" xml:space="preserve"><style type="text/css"><![CDATA[
|
||||
.st0{fill:#F48120;}
|
||||
.st1{fill:#FAAD3F;}
|
||||
.st2{fill:#FFFFFF;}
|
||||
]]></style><g><polygon class="st2" points="112.65,33.03 97.2,24.17 94.54,23.01 31.33,23.45 31.33,55.53 112.65,55.53 112.65,33.03"/><path class="st0" d="M84.52,52.58c0.76-2.59,0.47-4.97-0.79-6.73c-1.15-1.62-3.1-2.56-5.44-2.67L33.96,42.6 c-0.29,0-0.54-0.14-0.68-0.36c-0.14-0.21-0.18-0.5-0.11-0.79c0.14-0.43,0.58-0.76,1.04-0.79l44.73-0.58 c5.29-0.25,11.06-4.54,13.07-9.8l2.56-6.66c0.11-0.29,0.14-0.58,0.07-0.86C91.76,9.72,80.13,0,66.23,0 c-12.82,0-23.7,8.28-27.59,19.77c-2.52-1.87-5.73-2.88-9.18-2.56c-6.16,0.61-11.09,5.55-11.7,11.7c-0.14,1.58-0.04,3.13,0.32,4.57 C8.03,33.78,0,41.99,0,52.11c0,0.9,0.07,1.8,0.18,2.7c0.07,0.43,0.43,0.76,0.86,0.76h81.82c0.47,0,0.9-0.32,1.04-0.79L84.52,52.58 L84.52,52.58z"/><path class="st1" d="M98.64,24.09c-0.4,0-0.83,0-1.22,0.04c-0.29,0-0.54,0.22-0.65,0.5l-1.73,6.01c-0.76,2.59-0.47,4.97,0.79,6.73 c1.15,1.62,3.1,2.56,5.44,2.67l9.44,0.58c0.29,0,0.54,0.14,0.68,0.36c0.14,0.22,0.18,0.54,0.11,0.79 c-0.14,0.43-0.58,0.76-1.04,0.79l-9.83,0.58c-5.33,0.25-11.06,4.54-13.07,9.79l-0.72,1.84c-0.14,0.36,0.11,0.72,0.5,0.72h33.78 c0.4,0,0.76-0.25,0.86-0.65c0.58-2.09,0.9-4.29,0.9-6.55C122.88,34.97,112,24.09,98.64,24.09L98.64,24.09z"/></g></svg>
|
||||
|
After Width: | Height: | Size: 1.5 KiB |
@ -849,6 +849,12 @@ Example: Virtual Hosted Style`,
|
||||
'Integrate your Confluence workspace to search documentation.',
|
||||
s3Description:
|
||||
'Connect to your AWS S3 bucket to import and sync stored files.',
|
||||
google_cloud_storageDescription:
|
||||
'Connect your Google Cloud Storage bucket to import and sync files.',
|
||||
r2Description:
|
||||
'Connect your Cloudflare R2 bucket to import and sync files.',
|
||||
oci_storageDescription:
|
||||
'Connect your Oracle Cloud Object Storage bucket to import and sync files.',
|
||||
discordDescription:
|
||||
'Link your Discord server to access and analyze chat data.',
|
||||
notionDescription:
|
||||
@ -873,6 +879,7 @@ Example: Virtual Hosted Style`,
|
||||
'Upload the OAuth JSON generated from Google Console. If it only contains client credentials, run the browser-based verification once to mint long-lived refresh tokens.',
|
||||
dropboxDescription:
|
||||
'Connect your Dropbox to sync files and folders from a chosen account.',
|
||||
boxDescription: 'Connect your Box drive to sync files and folders.',
|
||||
dropboxAccessTokenTip:
|
||||
'Generate a long-lived access token in the Dropbox App Console with files.metadata.read, files.content.read, and sharing.read scopes.',
|
||||
moodleDescription:
|
||||
|
||||
@ -732,10 +732,18 @@ export default {
|
||||
'Интегрируйте ваше рабочее пространство Confluence для поиска документации.',
|
||||
s3Description:
|
||||
'Подключитесь к вашему AWS S3 бакету для импорта и синхронизации хранимых файлов.',
|
||||
oci_storageDescription:
|
||||
'Подключите бакет Oracle Cloud Object Storage для импорта и синхронизации файлов.',
|
||||
r2Description:
|
||||
'Подключите ваш бакет Cloudflare R2 для импорта и синхронизации файлов.',
|
||||
google_cloud_storageDescription:
|
||||
'Подключите бакет Google Cloud Storage для импорта и синхронизации файлов.',
|
||||
discordDescription:
|
||||
'Свяжите ваш Discord сервер для доступа и анализа данных чата.',
|
||||
notionDescription:
|
||||
'Синхронизируйте страницы и базы данных из Notion для извлечения знаний.',
|
||||
boxDescription:
|
||||
'Подключите ваш диск Box для синхронизации файлов и папок.',
|
||||
google_driveDescription:
|
||||
'Подключите ваш Google Drive через OAuth и синхронизируйте определенные папки или диски.',
|
||||
gmailDescription:
|
||||
|
||||
@ -732,8 +732,12 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
log: '日志',
|
||||
confluenceDescription: '连接你的 Confluence 工作区以搜索文档内容。',
|
||||
s3Description: ' 连接你的 AWS S3 存储桶以导入和同步文件。',
|
||||
google_cloud_storageDescription:
|
||||
'连接你的 Google Cloud Storage 存储桶以导入和同步文件。',
|
||||
discordDescription: ' 连接你的 Discord 服务器以访问和分析聊天数据。',
|
||||
notionDescription: ' 同步 Notion 页面与数据库,用于知识检索。',
|
||||
oci_storageDescription:
|
||||
'连接你的 Oracle Cloud Object Storage 存储桶以导入和同步文件。',
|
||||
google_driveDescription:
|
||||
'通过 OAuth 连接 Google Drive,并同步指定的文件夹或云端硬盘。',
|
||||
gmailDescription: '通过 OAuth 连接 Gmail,用于同步邮件。',
|
||||
@ -749,6 +753,8 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
gmailTokenTip:
|
||||
'请上传由 Google Console 生成的 OAuth JSON。如果仅包含 client credentials,请通过浏览器授权一次以获取长期有效的刷新 Token。',
|
||||
dropboxDescription: '连接 Dropbox,同步指定账号下的文件与文件夹。',
|
||||
boxDescription: '连接你的 Box 云盘以同步文件和文件夹。',
|
||||
r2Description: '连接你的 Cloudflare R2 存储桶以导入和同步文件。',
|
||||
dropboxAccessTokenTip:
|
||||
'请在 Dropbox App Console 生成 Access Token,并勾选 files.metadata.read、files.content.read、sharing.read 等必要权限。',
|
||||
jiraDescription: '接入 Jira 工作区,持续同步Issues、评论与附件。',
|
||||
|
||||
@ -20,6 +20,9 @@ export enum DataSourceKey {
|
||||
WEBDAV = 'webdav',
|
||||
BOX = 'box',
|
||||
DROPBOX = 'dropbox',
|
||||
R2 = 'r2',
|
||||
OCI_STORAGE = 'oci_storage',
|
||||
GOOGLE_CLOUD_STORAGE = 'google_cloud_storage',
|
||||
// SHAREPOINT = 'sharepoint',
|
||||
// SLACK = 'slack',
|
||||
// TEAMS = 'teams',
|
||||
@ -27,6 +30,23 @@ export enum DataSourceKey {
|
||||
|
||||
export const generateDataSourceInfo = (t: TFunction) => {
|
||||
return {
|
||||
[DataSourceKey.GOOGLE_CLOUD_STORAGE]: {
|
||||
name: 'Google Cloud Storage',
|
||||
description: t(
|
||||
`setting.${DataSourceKey.GOOGLE_CLOUD_STORAGE}Description`,
|
||||
),
|
||||
icon: <SvgIcon name={'data-source/google-cloud-storage'} width={38} />,
|
||||
},
|
||||
[DataSourceKey.OCI_STORAGE]: {
|
||||
name: 'Oracle Storage',
|
||||
description: t(`setting.${DataSourceKey.OCI_STORAGE}Description`),
|
||||
icon: <SvgIcon name={'data-source/oracle-storage'} width={38} />,
|
||||
},
|
||||
[DataSourceKey.R2]: {
|
||||
name: 'R2',
|
||||
description: t(`setting.${DataSourceKey.R2}Description`),
|
||||
icon: <SvgIcon name={'data-source/r2'} width={38} />,
|
||||
},
|
||||
[DataSourceKey.S3]: {
|
||||
name: 'S3',
|
||||
description: t(`setting.${DataSourceKey.S3}Description`),
|
||||
@ -122,8 +142,85 @@ export const DataSourceFormBaseFields = [
|
||||
})),
|
||||
},
|
||||
];
|
||||
|
||||
export const DataSourceFormFields = {
|
||||
[DataSourceKey.GOOGLE_CLOUD_STORAGE]: [
|
||||
{
|
||||
label: 'GCS Access Key ID',
|
||||
name: 'config.credentials.access_key_id',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'GCS Secret Access Key',
|
||||
name: 'config.credentials.secret_access_key',
|
||||
type: FormFieldType.Password,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'Bucket Name',
|
||||
name: 'config.bucket_name',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
},
|
||||
],
|
||||
[DataSourceKey.OCI_STORAGE]: [
|
||||
{
|
||||
label: 'OCI Namespace',
|
||||
name: 'config.credentials.namespace',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'OCI Region',
|
||||
name: 'config.credentials.region',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'OCI Access Key ID',
|
||||
name: 'config.credentials.access_key_id',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'OCI Secret Access Key',
|
||||
name: 'config.credentials.secret_access_key',
|
||||
type: FormFieldType.Password,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'Bucket Name',
|
||||
name: 'config.bucket_name',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
},
|
||||
],
|
||||
[DataSourceKey.R2]: [
|
||||
{
|
||||
label: 'R2 Account ID',
|
||||
name: 'config.credentials.account_id',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'R2 Access Key ID',
|
||||
name: 'config.credentials.r2_access_key_id',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'R2 Secret Access Key',
|
||||
name: 'config.credentials.r2_secret_access_key',
|
||||
type: FormFieldType.Password,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'Bucket Name',
|
||||
name: 'config.bucket_name',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
},
|
||||
],
|
||||
[DataSourceKey.S3]: [
|
||||
{
|
||||
label: 'AWS Access Key ID',
|
||||
@ -149,9 +246,6 @@ export const DataSourceFormFields = {
|
||||
type: FormFieldType.Select,
|
||||
options: [
|
||||
{ label: 'S3', value: 's3' },
|
||||
{ label: 'R2', value: 'r2' },
|
||||
{ label: 'Google Cloud Storage', value: 'google_cloud_storage' },
|
||||
{ label: 'OCI Storage', value: 'oci_storage' },
|
||||
{ label: 'S3 Compatible', value: 's3_compatible' },
|
||||
],
|
||||
required: true,
|
||||
@ -304,7 +398,6 @@ export const DataSourceFormFields = {
|
||||
<GoogleDriveTokenField
|
||||
value={fieldProps.value}
|
||||
onChange={fieldProps.onChange}
|
||||
placeholder='{ "token": "...", "refresh_token": "...", ... }'
|
||||
/>
|
||||
),
|
||||
tooltip: t('setting.google_driveTokenTip'),
|
||||
@ -399,7 +492,6 @@ export const DataSourceFormFields = {
|
||||
<GmailTokenField
|
||||
value={fieldProps.value}
|
||||
onChange={fieldProps.onChange}
|
||||
placeholder='{ "token": "...", "refresh_token": "...", ... }'
|
||||
/>
|
||||
),
|
||||
tooltip: t('setting.gmailTokenTip'),
|
||||
@ -613,6 +705,18 @@ export const DataSourceFormDefaultValues = {
|
||||
},
|
||||
},
|
||||
},
|
||||
[DataSourceKey.R2]: {
|
||||
name: '',
|
||||
source: DataSourceKey.R2,
|
||||
config: {
|
||||
bucket_name: '',
|
||||
credentials: {
|
||||
account_id: '',
|
||||
r2_access_key_id: '',
|
||||
r2_secret_access_key: '',
|
||||
},
|
||||
},
|
||||
},
|
||||
[DataSourceKey.NOTION]: {
|
||||
name: '',
|
||||
source: DataSourceKey.NOTION,
|
||||
@ -678,6 +782,30 @@ export const DataSourceFormDefaultValues = {
|
||||
},
|
||||
},
|
||||
},
|
||||
[DataSourceKey.GOOGLE_CLOUD_STORAGE]: {
|
||||
name: '',
|
||||
source: DataSourceKey.GOOGLE_CLOUD_STORAGE,
|
||||
config: {
|
||||
bucket_name: '',
|
||||
credentials: {
|
||||
access_key_id: '',
|
||||
secret_access_key: '',
|
||||
},
|
||||
},
|
||||
},
|
||||
[DataSourceKey.OCI_STORAGE]: {
|
||||
name: '',
|
||||
source: DataSourceKey.OCI_STORAGE,
|
||||
config: {
|
||||
bucket_name: '',
|
||||
credentials: {
|
||||
namespace: '',
|
||||
region: '',
|
||||
access_key_id: '',
|
||||
secret_access_key: '',
|
||||
},
|
||||
},
|
||||
},
|
||||
[DataSourceKey.MOODLE]: {
|
||||
name: '',
|
||||
source: DataSourceKey.MOODLE,
|
||||
|
||||
Reference in New Issue
Block a user