diff --git a/common/constants.py b/common/constants.py index 498ca284e..490a5a4e3 100644 --- a/common/constants.py +++ b/common/constants.py @@ -124,7 +124,11 @@ class FileSource(StrEnum): MOODLE = "moodle" DROPBOX = "dropbox" BOX = "box" + R2 = "r2" + OCI_STORAGE = "oci_storage" + GOOGLE_CLOUD_STORAGE = "google_cloud_storage" + class PipelineTaskType(StrEnum): PARSE = "Parse" DOWNLOAD = "Download" diff --git a/common/data_source/blob_connector.py b/common/data_source/blob_connector.py index c4b4fba11..acb08f6dd 100644 --- a/common/data_source/blob_connector.py +++ b/common/data_source/blob_connector.py @@ -56,7 +56,7 @@ class BlobStorageConnector(LoadConnector, PollConnector): # Validate credentials if self.bucket_type == BlobType.R2: - if not all( + if not all( credentials.get(key) for key in ["r2_access_key_id", "r2_secret_access_key", "account_id"] ): diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py index 2dd296719..5674b73d0 100644 --- a/rag/svr/sync_data_source.py +++ b/rag/svr/sync_data_source.py @@ -161,23 +161,59 @@ class SyncBase: def _get_source_prefix(self): return "" - -class S3(SyncBase): - SOURCE_NAME: str = FileSource.S3 +class _BlobLikeBase(SyncBase): + DEFAULT_BUCKET_TYPE: str = "s3" async def _generate(self, task: dict): - self.connector = BlobStorageConnector(bucket_type=self.conf.get("bucket_type", "s3"), bucket_name=self.conf["bucket_name"], prefix=self.conf.get("prefix", "")) + bucket_type = self.conf.get("bucket_type", self.DEFAULT_BUCKET_TYPE) + + self.connector = BlobStorageConnector( + bucket_type=bucket_type, + bucket_name=self.conf["bucket_name"], + prefix=self.conf.get("prefix", ""), + ) self.connector.load_credentials(self.conf["credentials"]) + document_batch_generator = ( self.connector.load_from_state() if task["reindex"] == "1" or not task["poll_range_start"] - else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp()) + else self.connector.poll_source( + task["poll_range_start"].timestamp(), + datetime.now(timezone.utc).timestamp(), + ) ) - begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"]) - logging.info("Connect to {}: {}(prefix/{}) {}".format(self.conf.get("bucket_type", "s3"), self.conf["bucket_name"], self.conf.get("prefix", ""), begin_info)) + begin_info = ( + "totally" + if task["reindex"] == "1" or not task["poll_range_start"] + else "from {}".format(task["poll_range_start"]) + ) + + logging.info( + "Connect to {}: {}(prefix/{}) {}".format( + bucket_type, + self.conf["bucket_name"], + self.conf.get("prefix", ""), + begin_info, + ) + ) return document_batch_generator +class S3(_BlobLikeBase): + SOURCE_NAME: str = FileSource.S3 + DEFAULT_BUCKET_TYPE: str = "s3" + +class R2(_BlobLikeBase): + SOURCE_NAME: str = FileSource.R2 + DEFAULT_BUCKET_TYPE: str = "r2" + +class OCI_STORAGE(_BlobLikeBase): + SOURCE_NAME: str = FileSource.OCI_STORAGE + DEFAULT_BUCKET_TYPE: str = "oci_storage" + +class GOOGLE_CLOUD_STORAGE(_BlobLikeBase): + SOURCE_NAME: str = FileSource.GOOGLE_CLOUD_STORAGE + DEFAULT_BUCKET_TYPE: str = "google_cloud_storage" class Confluence(SyncBase): SOURCE_NAME: str = FileSource.CONFLUENCE @@ -705,6 +741,9 @@ class BOX(SyncBase): func_factory = { FileSource.S3: S3, + FileSource.R2: R2, + FileSource.OCI_STORAGE: OCI_STORAGE, + FileSource.GOOGLE_CLOUD_STORAGE: GOOGLE_CLOUD_STORAGE, FileSource.NOTION: Notion, FileSource.DISCORD: Discord, FileSource.CONFLUENCE: Confluence, diff --git a/web/src/assets/svg/data-source/google-cloud-storage.svg b/web/src/assets/svg/data-source/google-cloud-storage.svg new file mode 100644 index 000000000..59db35d96 --- /dev/null +++ b/web/src/assets/svg/data-source/google-cloud-storage.svg @@ -0,0 +1 @@ +Icon_24px_CloudStorage_Color \ No newline at end of file diff --git a/web/src/assets/svg/data-source/oracle-storage.svg b/web/src/assets/svg/data-source/oracle-storage.svg new file mode 100644 index 000000000..90768f8bc --- /dev/null +++ b/web/src/assets/svg/data-source/oracle-storage.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/web/src/assets/svg/data-source/r2.svg b/web/src/assets/svg/data-source/r2.svg new file mode 100644 index 000000000..d31b48fb0 --- /dev/null +++ b/web/src/assets/svg/data-source/r2.svg @@ -0,0 +1,5 @@ + \ No newline at end of file diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 9c353b367..97a94a137 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -849,6 +849,12 @@ Example: Virtual Hosted Style`, 'Integrate your Confluence workspace to search documentation.', s3Description: 'Connect to your AWS S3 bucket to import and sync stored files.', + google_cloud_storageDescription: + 'Connect your Google Cloud Storage bucket to import and sync files.', + r2Description: + 'Connect your Cloudflare R2 bucket to import and sync files.', + oci_storageDescription: + 'Connect your Oracle Cloud Object Storage bucket to import and sync files.', discordDescription: 'Link your Discord server to access and analyze chat data.', notionDescription: @@ -873,6 +879,7 @@ Example: Virtual Hosted Style`, 'Upload the OAuth JSON generated from Google Console. If it only contains client credentials, run the browser-based verification once to mint long-lived refresh tokens.', dropboxDescription: 'Connect your Dropbox to sync files and folders from a chosen account.', + boxDescription: 'Connect your Box drive to sync files and folders.', dropboxAccessTokenTip: 'Generate a long-lived access token in the Dropbox App Console with files.metadata.read, files.content.read, and sharing.read scopes.', moodleDescription: diff --git a/web/src/locales/ru.ts b/web/src/locales/ru.ts index 433a4ffab..cf10bf73c 100644 --- a/web/src/locales/ru.ts +++ b/web/src/locales/ru.ts @@ -732,10 +732,18 @@ export default { 'Интегрируйте ваше рабочее пространство Confluence для поиска документации.', s3Description: 'Подключитесь к вашему AWS S3 бакету для импорта и синхронизации хранимых файлов.', + oci_storageDescription: + 'Подключите бакет Oracle Cloud Object Storage для импорта и синхронизации файлов.', + r2Description: + 'Подключите ваш бакет Cloudflare R2 для импорта и синхронизации файлов.', + google_cloud_storageDescription: + 'Подключите бакет Google Cloud Storage для импорта и синхронизации файлов.', discordDescription: 'Свяжите ваш Discord сервер для доступа и анализа данных чата.', notionDescription: 'Синхронизируйте страницы и базы данных из Notion для извлечения знаний.', + boxDescription: + 'Подключите ваш диск Box для синхронизации файлов и папок.', google_driveDescription: 'Подключите ваш Google Drive через OAuth и синхронизируйте определенные папки или диски.', gmailDescription: diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 21791f872..399425391 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -732,8 +732,12 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 log: '日志', confluenceDescription: '连接你的 Confluence 工作区以搜索文档内容。', s3Description: ' 连接你的 AWS S3 存储桶以导入和同步文件。', + google_cloud_storageDescription: + '连接你的 Google Cloud Storage 存储桶以导入和同步文件。', discordDescription: ' 连接你的 Discord 服务器以访问和分析聊天数据。', notionDescription: ' 同步 Notion 页面与数据库,用于知识检索。', + oci_storageDescription: + '连接你的 Oracle Cloud Object Storage 存储桶以导入和同步文件。', google_driveDescription: '通过 OAuth 连接 Google Drive,并同步指定的文件夹或云端硬盘。', gmailDescription: '通过 OAuth 连接 Gmail,用于同步邮件。', @@ -749,6 +753,8 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 gmailTokenTip: '请上传由 Google Console 生成的 OAuth JSON。如果仅包含 client credentials,请通过浏览器授权一次以获取长期有效的刷新 Token。', dropboxDescription: '连接 Dropbox,同步指定账号下的文件与文件夹。', + boxDescription: '连接你的 Box 云盘以同步文件和文件夹。', + r2Description: '连接你的 Cloudflare R2 存储桶以导入和同步文件。', dropboxAccessTokenTip: '请在 Dropbox App Console 生成 Access Token,并勾选 files.metadata.read、files.content.read、sharing.read 等必要权限。', jiraDescription: '接入 Jira 工作区,持续同步Issues、评论与附件。', diff --git a/web/src/pages/user-setting/data-source/contant.tsx b/web/src/pages/user-setting/data-source/contant.tsx index 483afc432..eec3a7fc9 100644 --- a/web/src/pages/user-setting/data-source/contant.tsx +++ b/web/src/pages/user-setting/data-source/contant.tsx @@ -20,6 +20,9 @@ export enum DataSourceKey { WEBDAV = 'webdav', BOX = 'box', DROPBOX = 'dropbox', + R2 = 'r2', + OCI_STORAGE = 'oci_storage', + GOOGLE_CLOUD_STORAGE = 'google_cloud_storage', // SHAREPOINT = 'sharepoint', // SLACK = 'slack', // TEAMS = 'teams', @@ -27,6 +30,23 @@ export enum DataSourceKey { export const generateDataSourceInfo = (t: TFunction) => { return { + [DataSourceKey.GOOGLE_CLOUD_STORAGE]: { + name: 'Google Cloud Storage', + description: t( + `setting.${DataSourceKey.GOOGLE_CLOUD_STORAGE}Description`, + ), + icon: , + }, + [DataSourceKey.OCI_STORAGE]: { + name: 'Oracle Storage', + description: t(`setting.${DataSourceKey.OCI_STORAGE}Description`), + icon: , + }, + [DataSourceKey.R2]: { + name: 'R2', + description: t(`setting.${DataSourceKey.R2}Description`), + icon: , + }, [DataSourceKey.S3]: { name: 'S3', description: t(`setting.${DataSourceKey.S3}Description`), @@ -122,8 +142,85 @@ export const DataSourceFormBaseFields = [ })), }, ]; - export const DataSourceFormFields = { + [DataSourceKey.GOOGLE_CLOUD_STORAGE]: [ + { + label: 'GCS Access Key ID', + name: 'config.credentials.access_key_id', + type: FormFieldType.Text, + required: true, + }, + { + label: 'GCS Secret Access Key', + name: 'config.credentials.secret_access_key', + type: FormFieldType.Password, + required: true, + }, + { + label: 'Bucket Name', + name: 'config.bucket_name', + type: FormFieldType.Text, + required: true, + }, + ], + [DataSourceKey.OCI_STORAGE]: [ + { + label: 'OCI Namespace', + name: 'config.credentials.namespace', + type: FormFieldType.Text, + required: true, + }, + { + label: 'OCI Region', + name: 'config.credentials.region', + type: FormFieldType.Text, + required: true, + }, + { + label: 'OCI Access Key ID', + name: 'config.credentials.access_key_id', + type: FormFieldType.Text, + required: true, + }, + { + label: 'OCI Secret Access Key', + name: 'config.credentials.secret_access_key', + type: FormFieldType.Password, + required: true, + }, + { + label: 'Bucket Name', + name: 'config.bucket_name', + type: FormFieldType.Text, + required: true, + }, + ], + [DataSourceKey.R2]: [ + { + label: 'R2 Account ID', + name: 'config.credentials.account_id', + type: FormFieldType.Text, + required: true, + }, + { + label: 'R2 Access Key ID', + name: 'config.credentials.r2_access_key_id', + type: FormFieldType.Text, + required: true, + }, + { + label: 'R2 Secret Access Key', + name: 'config.credentials.r2_secret_access_key', + type: FormFieldType.Password, + required: true, + }, + { + label: 'Bucket Name', + name: 'config.bucket_name', + type: FormFieldType.Text, + required: true, + }, + ], [DataSourceKey.S3]: [ { label: 'AWS Access Key ID', @@ -149,9 +246,6 @@ export const DataSourceFormFields = { type: FormFieldType.Select, options: [ { label: 'S3', value: 's3' }, - { label: 'R2', value: 'r2' }, - { label: 'Google Cloud Storage', value: 'google_cloud_storage' }, - { label: 'OCI Storage', value: 'oci_storage' }, { label: 'S3 Compatible', value: 's3_compatible' }, ], required: true, @@ -304,7 +398,6 @@ export const DataSourceFormFields = { ), tooltip: t('setting.google_driveTokenTip'), @@ -399,7 +492,6 @@ export const DataSourceFormFields = { ), tooltip: t('setting.gmailTokenTip'), @@ -613,6 +705,18 @@ export const DataSourceFormDefaultValues = { }, }, }, + [DataSourceKey.R2]: { + name: '', + source: DataSourceKey.R2, + config: { + bucket_name: '', + credentials: { + account_id: '', + r2_access_key_id: '', + r2_secret_access_key: '', + }, + }, + }, [DataSourceKey.NOTION]: { name: '', source: DataSourceKey.NOTION, @@ -678,6 +782,30 @@ export const DataSourceFormDefaultValues = { }, }, }, + [DataSourceKey.GOOGLE_CLOUD_STORAGE]: { + name: '', + source: DataSourceKey.GOOGLE_CLOUD_STORAGE, + config: { + bucket_name: '', + credentials: { + access_key_id: '', + secret_access_key: '', + }, + }, + }, + [DataSourceKey.OCI_STORAGE]: { + name: '', + source: DataSourceKey.OCI_STORAGE, + config: { + bucket_name: '', + credentials: { + namespace: '', + region: '', + access_key_id: '', + secret_access_key: '', + }, + }, + }, [DataSourceKey.MOODLE]: { name: '', source: DataSourceKey.MOODLE,