From 96810b7d976791fe31529ae0c891a8c66175a686 Mon Sep 17 00:00:00 2001 From: Magicbook1108 Date: Wed, 31 Dec 2025 19:00:00 +0800 Subject: [PATCH] Fix: webdav connector (#12380) ### What problem does this PR solve? fix webdav #11422 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- common/data_source/__init__.py | 2 - common/data_source/webdav_connector.py | 79 +++++++++++++++----------- rag/svr/sync_data_source.py | 9 ++- 3 files changed, 54 insertions(+), 36 deletions(-) diff --git a/common/data_source/__init__.py b/common/data_source/__init__.py index 9fed196ab..97ce3f18e 100644 --- a/common/data_source/__init__.py +++ b/common/data_source/__init__.py @@ -34,7 +34,6 @@ from .google_drive.connector import GoogleDriveConnector from .jira.connector import JiraConnector from .sharepoint_connector import SharePointConnector from .teams_connector import TeamsConnector -from .webdav_connector import WebDAVConnector from .moodle_connector import MoodleConnector from .airtable_connector import AirtableConnector from .asana_connector import AsanaConnector @@ -62,7 +61,6 @@ __all__ = [ "JiraConnector", "SharePointConnector", "TeamsConnector", - "WebDAVConnector", "MoodleConnector", "BlobType", "DocumentSource", diff --git a/common/data_source/webdav_connector.py b/common/data_source/webdav_connector.py index f8e615789..ec06a64e1 100644 --- a/common/data_source/webdav_connector.py +++ b/common/data_source/webdav_connector.py @@ -82,10 +82,6 @@ class WebDAVConnector(LoadConnector, PollConnector): base_url=self.base_url, auth=(username, password) ) - - # Test connection - self.client.exists(self.remote_path) - except Exception as e: logging.error(f"Failed to connect to WebDAV server: {e}") raise ConnectorMissingCredentialError( @@ -308,60 +304,79 @@ class WebDAVConnector(LoadConnector, PollConnector): yield batch def validate_connector_settings(self) -> None: - """Validate WebDAV connector settings - - Raises: - ConnectorMissingCredentialError: If credentials are not loaded - ConnectorValidationError: If settings are invalid + """Validate WebDAV connector settings. + + Validation should exercise the same code-paths used by the connector + (directory listing / PROPFIND), avoiding exists() which may probe with + methods that differ across servers. """ if self.client is None: - raise ConnectorMissingCredentialError( - "WebDAV credentials not loaded." - ) + raise ConnectorMissingCredentialError("WebDAV credentials not loaded.") if not self.base_url: - raise ConnectorValidationError( - "No base URL was provided in connector settings." - ) + raise ConnectorValidationError("No base URL was provided in connector settings.") + + # Normalize directory path: for collections, many servers behave better with trailing '/' + test_path = self.remote_path or "/" + if not test_path.startswith("/"): + test_path = f"/{test_path}" + if test_path != "/" and not test_path.endswith("/"): + test_path = f"{test_path}/" try: - if not self.client.exists(self.remote_path): - raise ConnectorValidationError( - f"Remote path '{self.remote_path}' does not exist on WebDAV server." - ) + # Use the same behavior as real sync: list directory with details (PROPFIND) + self.client.ls(test_path, detail=True) except Exception as e: - error_message = str(e) - - if "401" in error_message or "unauthorized" in error_message.lower(): - raise CredentialExpiredError( - "WebDAV credentials appear invalid or expired." - ) - - if "403" in error_message or "forbidden" in error_message.lower(): + # Prefer structured status codes if present on the exception/response + status = None + for attr in ("status_code", "code"): + v = getattr(e, attr, None) + if isinstance(v, int): + status = v + break + if status is None: + resp = getattr(e, "response", None) + v = getattr(resp, "status_code", None) + if isinstance(v, int): + status = v + + # If we can classify by status code, do it + if status == 401: + raise CredentialExpiredError("WebDAV credentials appear invalid or expired.") + if status == 403: raise InsufficientPermissionsError( f"Insufficient permissions to access path '{self.remote_path}' on WebDAV server." ) - - if "404" in error_message or "not found" in error_message.lower(): + if status == 404: raise ConnectorValidationError( f"Remote path '{self.remote_path}' does not exist on WebDAV server." ) + # Fallback: avoid brittle substring matching that caused false positives. + # Provide the original exception for diagnosis. raise ConnectorValidationError( - f"Unexpected WebDAV client error: {e}" + f"WebDAV validation failed for path '{test_path}': {repr(e)}" ) + if __name__ == "__main__": credentials_dict = { "username": os.environ.get("WEBDAV_USERNAME"), "password": os.environ.get("WEBDAV_PASSWORD"), } + credentials_dict = { + "username": "user", + "password": "pass", + } + + + connector = WebDAVConnector( - base_url=os.environ.get("WEBDAV_URL") or "https://webdav.example.com", - remote_path=os.environ.get("WEBDAV_PATH") or "/", + base_url="http://172.17.0.1:8080/", + remote_path="/", ) try: diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py index 81478ab9b..c1aa3e3e9 100644 --- a/rag/svr/sync_data_source.py +++ b/rag/svr/sync_data_source.py @@ -46,7 +46,6 @@ from common.data_source import ( MoodleConnector, JiraConnector, DropboxConnector, - WebDAVConnector, AirtableConnector, AsanaConnector, ImapConnector, @@ -55,6 +54,7 @@ from common.data_source import ( from common.constants import FileSource, TaskStatus from common.data_source.config import INDEX_BATCH_SIZE from common.data_source.models import ConnectorFailure +from common.data_source.webdav_connector import WebDAVConnector from common.data_source.confluence_connector import ConfluenceConnector from common.data_source.gmail_connector import GmailConnector from common.data_source.box_connector import BoxConnector @@ -696,7 +696,12 @@ class WebDAV(SyncBase): self.conf.get("remote_path", "/"), begin_info )) - return document_batch_generator + + async def async_wrapper(): + for document_batch in document_batch_generator: + yield document_batch + + return async_wrapper() class Moodle(SyncBase):