diff --git a/common/constants.py b/common/constants.py
index 1c3404786..d9e75f66a 100644
--- a/common/constants.py
+++ b/common/constants.py
@@ -119,6 +119,7 @@ class FileSource(StrEnum):
SLACK = "slack"
TEAMS = "teams"
MOODLE = "moodle"
+ DROPBOX = "dropbox"
class PipelineTaskType(StrEnum):
diff --git a/common/data_source/config.py b/common/data_source/config.py
index 0c038c6d7..751d1f33c 100644
--- a/common/data_source/config.py
+++ b/common/data_source/config.py
@@ -50,6 +50,7 @@ class DocumentSource(str, Enum):
DISCORD = "discord"
MOODLE = "moodle"
S3_COMPATIBLE = "s3_compatible"
+ DROPBOX = "dropbox"
class FileOrigin(str, Enum):
diff --git a/common/data_source/dropbox_connector.py b/common/data_source/dropbox_connector.py
index fd349baa1..0a0a3c2de 100644
--- a/common/data_source/dropbox_connector.py
+++ b/common/data_source/dropbox_connector.py
@@ -1,13 +1,24 @@
"""Dropbox connector"""
+import logging
+from datetime import timezone
from typing import Any
from dropbox import Dropbox
from dropbox.exceptions import ApiError, AuthError
+from dropbox.files import FileMetadata, FolderMetadata
-from common.data_source.config import INDEX_BATCH_SIZE
-from common.data_source.exceptions import ConnectorValidationError, InsufficientPermissionsError, ConnectorMissingCredentialError
+from common.data_source.config import INDEX_BATCH_SIZE, DocumentSource
+from common.data_source.exceptions import (
+ ConnectorMissingCredentialError,
+ ConnectorValidationError,
+ InsufficientPermissionsError,
+)
from common.data_source.interfaces import LoadConnector, PollConnector, SecondsSinceUnixEpoch
+from common.data_source.models import Document, GenerateDocumentsOutput
+from common.data_source.utils import get_file_ext
+
+logger = logging.getLogger(__name__)
class DropboxConnector(LoadConnector, PollConnector):
@@ -19,29 +30,29 @@ class DropboxConnector(LoadConnector, PollConnector):
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
"""Load Dropbox credentials"""
- try:
- access_token = credentials.get("dropbox_access_token")
- if not access_token:
- raise ConnectorMissingCredentialError("Dropbox access token is required")
-
- self.dropbox_client = Dropbox(access_token)
- return None
- except Exception as e:
- raise ConnectorMissingCredentialError(f"Dropbox: {e}")
+ access_token = credentials.get("dropbox_access_token")
+ if not access_token:
+ raise ConnectorMissingCredentialError("Dropbox access token is required")
+
+ self.dropbox_client = Dropbox(access_token)
+ return None
def validate_connector_settings(self) -> None:
"""Validate Dropbox connector settings"""
- if not self.dropbox_client:
+ if self.dropbox_client is None:
raise ConnectorMissingCredentialError("Dropbox")
-
+
try:
- # Test connection by getting current account info
- self.dropbox_client.users_get_current_account()
- except (AuthError, ApiError) as e:
- if "invalid_access_token" in str(e).lower():
- raise InsufficientPermissionsError("Invalid Dropbox access token")
- else:
- raise ConnectorValidationError(f"Dropbox validation error: {e}")
+ self.dropbox_client.files_list_folder(path="", limit=1)
+ except AuthError as e:
+ logger.exception("[Dropbox]: Failed to validate Dropbox credentials")
+ raise ConnectorValidationError(f"Dropbox credential is invalid: {e}")
+ except ApiError as e:
+ if e.error is not None and "insufficient_permissions" in str(e.error).lower():
+ raise InsufficientPermissionsError("Your Dropbox token does not have sufficient permissions.")
+ raise ConnectorValidationError(f"Unexpected Dropbox error during validation: {e.user_message_text or e}")
+ except Exception as e:
+ raise ConnectorValidationError(f"Unexpected error during Dropbox settings validation: {e}")
def _download_file(self, path: str) -> bytes:
"""Download a single file from Dropbox."""
@@ -54,26 +65,105 @@ class DropboxConnector(LoadConnector, PollConnector):
"""Create a shared link for a file in Dropbox."""
if self.dropbox_client is None:
raise ConnectorMissingCredentialError("Dropbox")
-
+
try:
- # Try to get existing shared links first
shared_links = self.dropbox_client.sharing_list_shared_links(path=path)
if shared_links.links:
return shared_links.links[0].url
-
- # Create a new shared link
- link_settings = self.dropbox_client.sharing_create_shared_link_with_settings(path)
- return link_settings.url
- except Exception:
- # Fallback to basic link format
- return f"https://www.dropbox.com/home{path}"
- def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> Any:
+ link_metadata = self.dropbox_client.sharing_create_shared_link_with_settings(path)
+ return link_metadata.url
+ except ApiError as err:
+ logger.exception(f"[Dropbox]: Failed to create a shared link for {path}: {err}")
+ return ""
+
+ def _yield_files_recursive(
+ self,
+ path: str,
+ start: SecondsSinceUnixEpoch | None,
+ end: SecondsSinceUnixEpoch | None,
+ ) -> GenerateDocumentsOutput:
+ """Yield files in batches from a specified Dropbox folder, including subfolders."""
+ if self.dropbox_client is None:
+ raise ConnectorMissingCredentialError("Dropbox")
+
+ result = self.dropbox_client.files_list_folder(
+ path,
+ limit=self.batch_size,
+ recursive=False,
+ include_non_downloadable_files=False,
+ )
+
+ while True:
+ batch: list[Document] = []
+ for entry in result.entries:
+ if isinstance(entry, FileMetadata):
+ modified_time = entry.client_modified
+ if modified_time.tzinfo is None:
+ modified_time = modified_time.replace(tzinfo=timezone.utc)
+ else:
+ modified_time = modified_time.astimezone(timezone.utc)
+
+ time_as_seconds = modified_time.timestamp()
+ if start is not None and time_as_seconds <= start:
+ continue
+ if end is not None and time_as_seconds > end:
+ continue
+
+ try:
+ downloaded_file = self._download_file(entry.path_display)
+ except Exception:
+ logger.exception(f"[Dropbox]: Error downloading file {entry.path_display}")
+ continue
+
+ batch.append(
+ Document(
+ id=f"dropbox:{entry.id}",
+ blob=downloaded_file,
+ source=DocumentSource.DROPBOX,
+ semantic_identifier=entry.name,
+ extension=get_file_ext(entry.name),
+ doc_updated_at=modified_time,
+ size_bytes=entry.size if getattr(entry, "size", None) is not None else len(downloaded_file),
+ )
+ )
+
+ elif isinstance(entry, FolderMetadata):
+ yield from self._yield_files_recursive(entry.path_lower, start, end)
+
+ if batch:
+ yield batch
+
+ if not result.has_more:
+ break
+
+ result = self.dropbox_client.files_list_folder_continue(result.cursor)
+
+ def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> GenerateDocumentsOutput:
"""Poll Dropbox for recent file changes"""
- # Simplified implementation - in production this would handle actual polling
- return []
+ if self.dropbox_client is None:
+ raise ConnectorMissingCredentialError("Dropbox")
- def load_from_state(self) -> Any:
+ for batch in self._yield_files_recursive("", start, end):
+ yield batch
+
+ def load_from_state(self) -> GenerateDocumentsOutput:
"""Load files from Dropbox state"""
- # Simplified implementation
- return []
\ No newline at end of file
+ return self._yield_files_recursive("", None, None)
+
+
+if __name__ == "__main__":
+ import os
+
+ logging.basicConfig(level=logging.DEBUG)
+ connector = DropboxConnector()
+ connector.load_credentials({"dropbox_access_token": os.environ.get("DROPBOX_ACCESS_TOKEN")})
+ connector.validate_connector_settings()
+ document_batches = connector.load_from_state()
+ try:
+ first_batch = next(document_batches)
+ print(f"Loaded {len(first_batch)} documents in first batch.")
+ for doc in first_batch:
+ print(f"- {doc.semantic_identifier} ({doc.size_bytes} bytes)")
+ except StopIteration:
+ print("No documents available in Dropbox.")
diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py
index b29ad15de..bc9412205 100644
--- a/rag/svr/sync_data_source.py
+++ b/rag/svr/sync_data_source.py
@@ -37,7 +37,7 @@ from api.db.services.connector_service import ConnectorService, SyncLogsService
from api.db.services.knowledgebase_service import KnowledgebaseService
from common import settings
from common.config_utils import show_configs
-from common.data_source import BlobStorageConnector, NotionConnector, DiscordConnector, GoogleDriveConnector, MoodleConnector, JiraConnector
+from common.data_source import BlobStorageConnector, NotionConnector, DiscordConnector, GoogleDriveConnector, MoodleConnector, JiraConnector, DropboxConnector
from common.constants import FileSource, TaskStatus
from common.data_source.config import INDEX_BATCH_SIZE
from common.data_source.confluence_connector import ConfluenceConnector
@@ -211,6 +211,27 @@ class Gmail(SyncBase):
pass
+class Dropbox(SyncBase):
+ SOURCE_NAME: str = FileSource.DROPBOX
+
+ async def _generate(self, task: dict):
+ self.connector = DropboxConnector(batch_size=self.conf.get("batch_size", INDEX_BATCH_SIZE))
+ self.connector.load_credentials(self.conf["credentials"])
+
+ if task["reindex"] == "1" or not task["poll_range_start"]:
+ document_generator = self.connector.load_from_state()
+ begin_info = "totally"
+ else:
+ poll_start = task["poll_range_start"]
+ document_generator = self.connector.poll_source(
+ poll_start.timestamp(), datetime.now(timezone.utc).timestamp()
+ )
+ begin_info = f"from {poll_start}"
+
+ logging.info(f"[Dropbox] Connect to Dropbox {begin_info}")
+ return document_generator
+
+
class GoogleDrive(SyncBase):
SOURCE_NAME: str = FileSource.GOOGLE_DRIVE
@@ -454,7 +475,8 @@ func_factory = {
FileSource.SHAREPOINT: SharePoint,
FileSource.SLACK: Slack,
FileSource.TEAMS: Teams,
- FileSource.MOODLE: Moodle
+ FileSource.MOODLE: Moodle,
+ FileSource.DROPBOX: Dropbox,
}
diff --git a/web/src/assets/svg/data-source/dropbox.svg b/web/src/assets/svg/data-source/dropbox.svg
new file mode 100644
index 000000000..2890b48af
--- /dev/null
+++ b/web/src/assets/svg/data-source/dropbox.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts
index 44eff8144..233a0d1fc 100644
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@@ -742,6 +742,10 @@ Example: https://fsn1.your-objectstorage.com`,
'Comma-separated emails whose "My Drive" contents should be indexed (include the primary admin).',
google_driveSharedFoldersTip:
'Comma-separated Google Drive folder links to crawl.',
+ dropboxDescription:
+ 'Connect your Dropbox to sync files and folders from a chosen account.',
+ dropboxAccessTokenTip:
+ 'Generate a long-lived access token in the Dropbox App Console with files.metadata.read, files.content.read, and sharing.read scopes.',
moodleDescription:
'Connect to your Moodle LMS to sync course content, forums, and resources.',
moodleUrlTip:
diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts
index d2f4b1d16..46db2c2c3 100644
--- a/web/src/locales/zh.ts
+++ b/web/src/locales/zh.ts
@@ -722,6 +722,9 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
'需要索引其 “我的云端硬盘” 的邮箱,多个邮箱用逗号分隔(建议包含管理员)。',
google_driveSharedFoldersTip:
'需要同步的 Google Drive 文件夹链接,多个链接用逗号分隔。',
+ dropboxDescription: '连接 Dropbox,同步指定账号下的文件与文件夹。',
+ dropboxAccessTokenTip:
+ '请在 Dropbox App Console 生成 Access Token,并勾选 files.metadata.read、files.content.read、sharing.read 等必要权限。',
jiraDescription: '接入 Jira 工作区,持续同步Issues、评论与附件。',
jiraBaseUrlTip:
'Jira 的 Base URL,例如:https://your-domain.atlassian.net。',
diff --git a/web/src/pages/user-setting/data-source/contant.tsx b/web/src/pages/user-setting/data-source/contant.tsx
index cc45ad869..a39614177 100644
--- a/web/src/pages/user-setting/data-source/contant.tsx
+++ b/web/src/pages/user-setting/data-source/contant.tsx
@@ -12,6 +12,7 @@ export enum DataSourceKey {
MOODLE = 'moodle',
// GMAIL = 'gmail',
JIRA = 'jira',
+ DROPBOX = 'dropbox',
// SHAREPOINT = 'sharepoint',
// SLACK = 'slack',
// TEAMS = 'teams',
@@ -53,6 +54,11 @@ export const DataSourceInfo = {
description: t(`setting.${DataSourceKey.JIRA}Description`),
icon: ,
},
+ [DataSourceKey.DROPBOX]: {
+ name: 'Dropbox',
+ description: t(`setting.${DataSourceKey.DROPBOX}Description`),
+ icon: ,
+ },
};
export const DataSourceFormBaseFields = [
@@ -408,6 +414,22 @@ export const DataSourceFormFields = {
tooltip: t('setting.jiraPasswordTip'),
},
],
+ [DataSourceKey.DROPBOX]: [
+ {
+ label: 'Access Token',
+ name: 'config.credentials.dropbox_access_token',
+ type: FormFieldType.Password,
+ required: true,
+ tooltip: t('setting.dropboxAccessTokenTip'),
+ },
+ {
+ label: 'Batch Size',
+ name: 'config.batch_size',
+ type: FormFieldType.Number,
+ required: false,
+ placeholder: 'Defaults to 2',
+ },
+ ],
};
export const DataSourceFormDefaultValues = {
@@ -508,4 +530,14 @@ export const DataSourceFormDefaultValues = {
},
},
},
+ [DataSourceKey.DROPBOX]: {
+ name: '',
+ source: DataSourceKey.DROPBOX,
+ config: {
+ batch_size: 2,
+ credentials: {
+ dropbox_access_token: '',
+ },
+ },
+ },
};
diff --git a/web/src/pages/user-setting/data-source/index.tsx b/web/src/pages/user-setting/data-source/index.tsx
index 2ba7cecd0..6fc3bf9e0 100644
--- a/web/src/pages/user-setting/data-source/index.tsx
+++ b/web/src/pages/user-setting/data-source/index.tsx
@@ -56,6 +56,12 @@ const dataSourceTemplates = [
description: DataSourceInfo[DataSourceKey.JIRA].description,
icon: DataSourceInfo[DataSourceKey.JIRA].icon,
},
+ {
+ id: DataSourceKey.DROPBOX,
+ name: DataSourceInfo[DataSourceKey.DROPBOX].name,
+ description: DataSourceInfo[DataSourceKey.DROPBOX].description,
+ icon: DataSourceInfo[DataSourceKey.DROPBOX].icon,
+ },
];
const DataSource = () => {
const { t } = useTranslation();