diff --git a/common/constants.py b/common/constants.py
index 498ca284e..490a5a4e3 100644
--- a/common/constants.py
+++ b/common/constants.py
@@ -124,7 +124,11 @@ class FileSource(StrEnum):
MOODLE = "moodle"
DROPBOX = "dropbox"
BOX = "box"
+ R2 = "r2"
+ OCI_STORAGE = "oci_storage"
+ GOOGLE_CLOUD_STORAGE = "google_cloud_storage"
+
class PipelineTaskType(StrEnum):
PARSE = "Parse"
DOWNLOAD = "Download"
diff --git a/common/data_source/blob_connector.py b/common/data_source/blob_connector.py
index c4b4fba11..acb08f6dd 100644
--- a/common/data_source/blob_connector.py
+++ b/common/data_source/blob_connector.py
@@ -56,7 +56,7 @@ class BlobStorageConnector(LoadConnector, PollConnector):
# Validate credentials
if self.bucket_type == BlobType.R2:
- if not all(
+ if not all(
credentials.get(key)
for key in ["r2_access_key_id", "r2_secret_access_key", "account_id"]
):
diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py
index 2dd296719..5674b73d0 100644
--- a/rag/svr/sync_data_source.py
+++ b/rag/svr/sync_data_source.py
@@ -161,23 +161,59 @@ class SyncBase:
def _get_source_prefix(self):
return ""
-
-class S3(SyncBase):
- SOURCE_NAME: str = FileSource.S3
+class _BlobLikeBase(SyncBase):
+ DEFAULT_BUCKET_TYPE: str = "s3"
async def _generate(self, task: dict):
- self.connector = BlobStorageConnector(bucket_type=self.conf.get("bucket_type", "s3"), bucket_name=self.conf["bucket_name"], prefix=self.conf.get("prefix", ""))
+ bucket_type = self.conf.get("bucket_type", self.DEFAULT_BUCKET_TYPE)
+
+ self.connector = BlobStorageConnector(
+ bucket_type=bucket_type,
+ bucket_name=self.conf["bucket_name"],
+ prefix=self.conf.get("prefix", ""),
+ )
self.connector.load_credentials(self.conf["credentials"])
+
document_batch_generator = (
self.connector.load_from_state()
if task["reindex"] == "1" or not task["poll_range_start"]
- else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp())
+ else self.connector.poll_source(
+ task["poll_range_start"].timestamp(),
+ datetime.now(timezone.utc).timestamp(),
+ )
)
- begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"])
- logging.info("Connect to {}: {}(prefix/{}) {}".format(self.conf.get("bucket_type", "s3"), self.conf["bucket_name"], self.conf.get("prefix", ""), begin_info))
+ begin_info = (
+ "totally"
+ if task["reindex"] == "1" or not task["poll_range_start"]
+ else "from {}".format(task["poll_range_start"])
+ )
+
+ logging.info(
+ "Connect to {}: {}(prefix/{}) {}".format(
+ bucket_type,
+ self.conf["bucket_name"],
+ self.conf.get("prefix", ""),
+ begin_info,
+ )
+ )
return document_batch_generator
+class S3(_BlobLikeBase):
+ SOURCE_NAME: str = FileSource.S3
+ DEFAULT_BUCKET_TYPE: str = "s3"
+
+class R2(_BlobLikeBase):
+ SOURCE_NAME: str = FileSource.R2
+ DEFAULT_BUCKET_TYPE: str = "r2"
+
+class OCI_STORAGE(_BlobLikeBase):
+ SOURCE_NAME: str = FileSource.OCI_STORAGE
+ DEFAULT_BUCKET_TYPE: str = "oci_storage"
+
+class GOOGLE_CLOUD_STORAGE(_BlobLikeBase):
+ SOURCE_NAME: str = FileSource.GOOGLE_CLOUD_STORAGE
+ DEFAULT_BUCKET_TYPE: str = "google_cloud_storage"
class Confluence(SyncBase):
SOURCE_NAME: str = FileSource.CONFLUENCE
@@ -705,6 +741,9 @@ class BOX(SyncBase):
func_factory = {
FileSource.S3: S3,
+ FileSource.R2: R2,
+ FileSource.OCI_STORAGE: OCI_STORAGE,
+ FileSource.GOOGLE_CLOUD_STORAGE: GOOGLE_CLOUD_STORAGE,
FileSource.NOTION: Notion,
FileSource.DISCORD: Discord,
FileSource.CONFLUENCE: Confluence,
diff --git a/web/src/assets/svg/data-source/google-cloud-storage.svg b/web/src/assets/svg/data-source/google-cloud-storage.svg
new file mode 100644
index 000000000..59db35d96
--- /dev/null
+++ b/web/src/assets/svg/data-source/google-cloud-storage.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/web/src/assets/svg/data-source/oracle-storage.svg b/web/src/assets/svg/data-source/oracle-storage.svg
new file mode 100644
index 000000000..90768f8bc
--- /dev/null
+++ b/web/src/assets/svg/data-source/oracle-storage.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/web/src/assets/svg/data-source/r2.svg b/web/src/assets/svg/data-source/r2.svg
new file mode 100644
index 000000000..d31b48fb0
--- /dev/null
+++ b/web/src/assets/svg/data-source/r2.svg
@@ -0,0 +1,5 @@
+
\ No newline at end of file
diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts
index 9c353b367..97a94a137 100644
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@@ -849,6 +849,12 @@ Example: Virtual Hosted Style`,
'Integrate your Confluence workspace to search documentation.',
s3Description:
'Connect to your AWS S3 bucket to import and sync stored files.',
+ google_cloud_storageDescription:
+ 'Connect your Google Cloud Storage bucket to import and sync files.',
+ r2Description:
+ 'Connect your Cloudflare R2 bucket to import and sync files.',
+ oci_storageDescription:
+ 'Connect your Oracle Cloud Object Storage bucket to import and sync files.',
discordDescription:
'Link your Discord server to access and analyze chat data.',
notionDescription:
@@ -873,6 +879,7 @@ Example: Virtual Hosted Style`,
'Upload the OAuth JSON generated from Google Console. If it only contains client credentials, run the browser-based verification once to mint long-lived refresh tokens.',
dropboxDescription:
'Connect your Dropbox to sync files and folders from a chosen account.',
+ boxDescription: 'Connect your Box drive to sync files and folders.',
dropboxAccessTokenTip:
'Generate a long-lived access token in the Dropbox App Console with files.metadata.read, files.content.read, and sharing.read scopes.',
moodleDescription:
diff --git a/web/src/locales/ru.ts b/web/src/locales/ru.ts
index 433a4ffab..cf10bf73c 100644
--- a/web/src/locales/ru.ts
+++ b/web/src/locales/ru.ts
@@ -732,10 +732,18 @@ export default {
'Интегрируйте ваше рабочее пространство Confluence для поиска документации.',
s3Description:
'Подключитесь к вашему AWS S3 бакету для импорта и синхронизации хранимых файлов.',
+ oci_storageDescription:
+ 'Подключите бакет Oracle Cloud Object Storage для импорта и синхронизации файлов.',
+ r2Description:
+ 'Подключите ваш бакет Cloudflare R2 для импорта и синхронизации файлов.',
+ google_cloud_storageDescription:
+ 'Подключите бакет Google Cloud Storage для импорта и синхронизации файлов.',
discordDescription:
'Свяжите ваш Discord сервер для доступа и анализа данных чата.',
notionDescription:
'Синхронизируйте страницы и базы данных из Notion для извлечения знаний.',
+ boxDescription:
+ 'Подключите ваш диск Box для синхронизации файлов и папок.',
google_driveDescription:
'Подключите ваш Google Drive через OAuth и синхронизируйте определенные папки или диски.',
gmailDescription:
diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts
index 21791f872..399425391 100644
--- a/web/src/locales/zh.ts
+++ b/web/src/locales/zh.ts
@@ -732,8 +732,12 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
log: '日志',
confluenceDescription: '连接你的 Confluence 工作区以搜索文档内容。',
s3Description: ' 连接你的 AWS S3 存储桶以导入和同步文件。',
+ google_cloud_storageDescription:
+ '连接你的 Google Cloud Storage 存储桶以导入和同步文件。',
discordDescription: ' 连接你的 Discord 服务器以访问和分析聊天数据。',
notionDescription: ' 同步 Notion 页面与数据库,用于知识检索。',
+ oci_storageDescription:
+ '连接你的 Oracle Cloud Object Storage 存储桶以导入和同步文件。',
google_driveDescription:
'通过 OAuth 连接 Google Drive,并同步指定的文件夹或云端硬盘。',
gmailDescription: '通过 OAuth 连接 Gmail,用于同步邮件。',
@@ -749,6 +753,8 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
gmailTokenTip:
'请上传由 Google Console 生成的 OAuth JSON。如果仅包含 client credentials,请通过浏览器授权一次以获取长期有效的刷新 Token。',
dropboxDescription: '连接 Dropbox,同步指定账号下的文件与文件夹。',
+ boxDescription: '连接你的 Box 云盘以同步文件和文件夹。',
+ r2Description: '连接你的 Cloudflare R2 存储桶以导入和同步文件。',
dropboxAccessTokenTip:
'请在 Dropbox App Console 生成 Access Token,并勾选 files.metadata.read、files.content.read、sharing.read 等必要权限。',
jiraDescription: '接入 Jira 工作区,持续同步Issues、评论与附件。',
diff --git a/web/src/pages/user-setting/data-source/contant.tsx b/web/src/pages/user-setting/data-source/contant.tsx
index 483afc432..eec3a7fc9 100644
--- a/web/src/pages/user-setting/data-source/contant.tsx
+++ b/web/src/pages/user-setting/data-source/contant.tsx
@@ -20,6 +20,9 @@ export enum DataSourceKey {
WEBDAV = 'webdav',
BOX = 'box',
DROPBOX = 'dropbox',
+ R2 = 'r2',
+ OCI_STORAGE = 'oci_storage',
+ GOOGLE_CLOUD_STORAGE = 'google_cloud_storage',
// SHAREPOINT = 'sharepoint',
// SLACK = 'slack',
// TEAMS = 'teams',
@@ -27,6 +30,23 @@ export enum DataSourceKey {
export const generateDataSourceInfo = (t: TFunction) => {
return {
+ [DataSourceKey.GOOGLE_CLOUD_STORAGE]: {
+ name: 'Google Cloud Storage',
+ description: t(
+ `setting.${DataSourceKey.GOOGLE_CLOUD_STORAGE}Description`,
+ ),
+ icon: ,
+ },
+ [DataSourceKey.OCI_STORAGE]: {
+ name: 'Oracle Storage',
+ description: t(`setting.${DataSourceKey.OCI_STORAGE}Description`),
+ icon: ,
+ },
+ [DataSourceKey.R2]: {
+ name: 'R2',
+ description: t(`setting.${DataSourceKey.R2}Description`),
+ icon: ,
+ },
[DataSourceKey.S3]: {
name: 'S3',
description: t(`setting.${DataSourceKey.S3}Description`),
@@ -122,8 +142,85 @@ export const DataSourceFormBaseFields = [
})),
},
];
-
export const DataSourceFormFields = {
+ [DataSourceKey.GOOGLE_CLOUD_STORAGE]: [
+ {
+ label: 'GCS Access Key ID',
+ name: 'config.credentials.access_key_id',
+ type: FormFieldType.Text,
+ required: true,
+ },
+ {
+ label: 'GCS Secret Access Key',
+ name: 'config.credentials.secret_access_key',
+ type: FormFieldType.Password,
+ required: true,
+ },
+ {
+ label: 'Bucket Name',
+ name: 'config.bucket_name',
+ type: FormFieldType.Text,
+ required: true,
+ },
+ ],
+ [DataSourceKey.OCI_STORAGE]: [
+ {
+ label: 'OCI Namespace',
+ name: 'config.credentials.namespace',
+ type: FormFieldType.Text,
+ required: true,
+ },
+ {
+ label: 'OCI Region',
+ name: 'config.credentials.region',
+ type: FormFieldType.Text,
+ required: true,
+ },
+ {
+ label: 'OCI Access Key ID',
+ name: 'config.credentials.access_key_id',
+ type: FormFieldType.Text,
+ required: true,
+ },
+ {
+ label: 'OCI Secret Access Key',
+ name: 'config.credentials.secret_access_key',
+ type: FormFieldType.Password,
+ required: true,
+ },
+ {
+ label: 'Bucket Name',
+ name: 'config.bucket_name',
+ type: FormFieldType.Text,
+ required: true,
+ },
+ ],
+ [DataSourceKey.R2]: [
+ {
+ label: 'R2 Account ID',
+ name: 'config.credentials.account_id',
+ type: FormFieldType.Text,
+ required: true,
+ },
+ {
+ label: 'R2 Access Key ID',
+ name: 'config.credentials.r2_access_key_id',
+ type: FormFieldType.Text,
+ required: true,
+ },
+ {
+ label: 'R2 Secret Access Key',
+ name: 'config.credentials.r2_secret_access_key',
+ type: FormFieldType.Password,
+ required: true,
+ },
+ {
+ label: 'Bucket Name',
+ name: 'config.bucket_name',
+ type: FormFieldType.Text,
+ required: true,
+ },
+ ],
[DataSourceKey.S3]: [
{
label: 'AWS Access Key ID',
@@ -149,9 +246,6 @@ export const DataSourceFormFields = {
type: FormFieldType.Select,
options: [
{ label: 'S3', value: 's3' },
- { label: 'R2', value: 'r2' },
- { label: 'Google Cloud Storage', value: 'google_cloud_storage' },
- { label: 'OCI Storage', value: 'oci_storage' },
{ label: 'S3 Compatible', value: 's3_compatible' },
],
required: true,
@@ -304,7 +398,6 @@ export const DataSourceFormFields = {
),
tooltip: t('setting.google_driveTokenTip'),
@@ -399,7 +492,6 @@ export const DataSourceFormFields = {
),
tooltip: t('setting.gmailTokenTip'),
@@ -613,6 +705,18 @@ export const DataSourceFormDefaultValues = {
},
},
},
+ [DataSourceKey.R2]: {
+ name: '',
+ source: DataSourceKey.R2,
+ config: {
+ bucket_name: '',
+ credentials: {
+ account_id: '',
+ r2_access_key_id: '',
+ r2_secret_access_key: '',
+ },
+ },
+ },
[DataSourceKey.NOTION]: {
name: '',
source: DataSourceKey.NOTION,
@@ -678,6 +782,30 @@ export const DataSourceFormDefaultValues = {
},
},
},
+ [DataSourceKey.GOOGLE_CLOUD_STORAGE]: {
+ name: '',
+ source: DataSourceKey.GOOGLE_CLOUD_STORAGE,
+ config: {
+ bucket_name: '',
+ credentials: {
+ access_key_id: '',
+ secret_access_key: '',
+ },
+ },
+ },
+ [DataSourceKey.OCI_STORAGE]: {
+ name: '',
+ source: DataSourceKey.OCI_STORAGE,
+ config: {
+ bucket_name: '',
+ credentials: {
+ namespace: '',
+ region: '',
+ access_key_id: '',
+ secret_access_key: '',
+ },
+ },
+ },
[DataSourceKey.MOODLE]: {
name: '',
source: DataSourceKey.MOODLE,