mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-02 08:35:08 +08:00
Feat: Support multiple data sources synchronizations (#10954)
### What problem does this PR solve? #10953 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
121
common/data_source/sharepoint_connector.py
Normal file
121
common/data_source/sharepoint_connector.py
Normal file
@ -0,0 +1,121 @@
|
||||
"""SharePoint connector"""
|
||||
|
||||
from typing import Any
|
||||
import msal
|
||||
from office365.graph_client import GraphClient
|
||||
from office365.runtime.client_request import ClientRequestException
|
||||
from office365.sharepoint.client_context import ClientContext
|
||||
|
||||
from common.data_source.config import INDEX_BATCH_SIZE
|
||||
from common.data_source.exceptions import ConnectorValidationError, ConnectorMissingCredentialError
|
||||
from common.data_source.interfaces import (
|
||||
CheckpointedConnectorWithPermSync,
|
||||
SecondsSinceUnixEpoch,
|
||||
SlimConnectorWithPermSync
|
||||
)
|
||||
from common.data_source.models import (
|
||||
ConnectorCheckpoint
|
||||
)
|
||||
|
||||
|
||||
class SharePointConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync):
|
||||
"""SharePoint connector for accessing SharePoint sites and documents"""
|
||||
|
||||
def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
|
||||
self.batch_size = batch_size
|
||||
self.sharepoint_client = None
|
||||
self.graph_client = None
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||
"""Load SharePoint credentials"""
|
||||
try:
|
||||
tenant_id = credentials.get("tenant_id")
|
||||
client_id = credentials.get("client_id")
|
||||
client_secret = credentials.get("client_secret")
|
||||
site_url = credentials.get("site_url")
|
||||
|
||||
if not all([tenant_id, client_id, client_secret, site_url]):
|
||||
raise ConnectorMissingCredentialError("SharePoint credentials are incomplete")
|
||||
|
||||
# Create MSAL confidential client
|
||||
app = msal.ConfidentialClientApplication(
|
||||
client_id=client_id,
|
||||
client_credential=client_secret,
|
||||
authority=f"https://login.microsoftonline.com/{tenant_id}"
|
||||
)
|
||||
|
||||
# Get access token
|
||||
result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
|
||||
|
||||
if "access_token" not in result:
|
||||
raise ConnectorMissingCredentialError("Failed to acquire SharePoint access token")
|
||||
|
||||
# Create Graph client
|
||||
self.graph_client = GraphClient(result["access_token"])
|
||||
|
||||
# Create SharePoint client context
|
||||
self.sharepoint_client = ClientContext(site_url).with_access_token(result["access_token"])
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
raise ConnectorMissingCredentialError(f"SharePoint: {e}")
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
"""Validate SharePoint connector settings"""
|
||||
if not self.sharepoint_client or not self.graph_client:
|
||||
raise ConnectorMissingCredentialError("SharePoint")
|
||||
|
||||
try:
|
||||
# Test connection by getting site info
|
||||
site = self.sharepoint_client.site.get().execute_query()
|
||||
if not site:
|
||||
raise ConnectorValidationError("Failed to access SharePoint site")
|
||||
except ClientRequestException as e:
|
||||
if "401" in str(e) or "403" in str(e):
|
||||
raise ConnectorValidationError("Invalid credentials or insufficient permissions")
|
||||
else:
|
||||
raise ConnectorValidationError(f"SharePoint validation error: {e}")
|
||||
|
||||
def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> Any:
|
||||
"""Poll SharePoint for recent documents"""
|
||||
# Simplified implementation - in production this would handle actual polling
|
||||
return []
|
||||
|
||||
def load_from_checkpoint(
|
||||
self,
|
||||
start: SecondsSinceUnixEpoch,
|
||||
end: SecondsSinceUnixEpoch,
|
||||
checkpoint: ConnectorCheckpoint,
|
||||
) -> Any:
|
||||
"""Load documents from checkpoint"""
|
||||
# Simplified implementation
|
||||
return []
|
||||
|
||||
def load_from_checkpoint_with_perm_sync(
|
||||
self,
|
||||
start: SecondsSinceUnixEpoch,
|
||||
end: SecondsSinceUnixEpoch,
|
||||
checkpoint: ConnectorCheckpoint,
|
||||
) -> Any:
|
||||
"""Load documents from checkpoint with permission sync"""
|
||||
# Simplified implementation
|
||||
return []
|
||||
|
||||
def build_dummy_checkpoint(self) -> ConnectorCheckpoint:
|
||||
"""Build dummy checkpoint"""
|
||||
return ConnectorCheckpoint()
|
||||
|
||||
def validate_checkpoint_json(self, checkpoint_json: str) -> ConnectorCheckpoint:
|
||||
"""Validate checkpoint JSON"""
|
||||
# Simplified implementation
|
||||
return ConnectorCheckpoint()
|
||||
|
||||
def retrieve_all_slim_docs_perm_sync(
|
||||
self,
|
||||
start: SecondsSinceUnixEpoch | None = None,
|
||||
end: SecondsSinceUnixEpoch | None = None,
|
||||
callback: Any = None,
|
||||
) -> Any:
|
||||
"""Retrieve all simplified documents with permission sync"""
|
||||
# Simplified implementation
|
||||
return []
|
||||
Reference in New Issue
Block a user