Fix: webdav connector (#12380)

### What problem does this PR solve?

fix webdav #11422

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Magicbook1108
2025-12-31 19:00:00 +08:00
committed by GitHub
parent 365f9b01ae
commit 96810b7d97
3 changed files with 54 additions and 36 deletions

View File

@ -34,7 +34,6 @@ from .google_drive.connector import GoogleDriveConnector
from .jira.connector import JiraConnector from .jira.connector import JiraConnector
from .sharepoint_connector import SharePointConnector from .sharepoint_connector import SharePointConnector
from .teams_connector import TeamsConnector from .teams_connector import TeamsConnector
from .webdav_connector import WebDAVConnector
from .moodle_connector import MoodleConnector from .moodle_connector import MoodleConnector
from .airtable_connector import AirtableConnector from .airtable_connector import AirtableConnector
from .asana_connector import AsanaConnector from .asana_connector import AsanaConnector
@ -62,7 +61,6 @@ __all__ = [
"JiraConnector", "JiraConnector",
"SharePointConnector", "SharePointConnector",
"TeamsConnector", "TeamsConnector",
"WebDAVConnector",
"MoodleConnector", "MoodleConnector",
"BlobType", "BlobType",
"DocumentSource", "DocumentSource",

View File

@ -82,10 +82,6 @@ class WebDAVConnector(LoadConnector, PollConnector):
base_url=self.base_url, base_url=self.base_url,
auth=(username, password) auth=(username, password)
) )
# Test connection
self.client.exists(self.remote_path)
except Exception as e: except Exception as e:
logging.error(f"Failed to connect to WebDAV server: {e}") logging.error(f"Failed to connect to WebDAV server: {e}")
raise ConnectorMissingCredentialError( raise ConnectorMissingCredentialError(
@ -308,60 +304,79 @@ class WebDAVConnector(LoadConnector, PollConnector):
yield batch yield batch
def validate_connector_settings(self) -> None: def validate_connector_settings(self) -> None:
"""Validate WebDAV connector settings """Validate WebDAV connector settings.
Raises: Validation should exercise the same code-paths used by the connector
ConnectorMissingCredentialError: If credentials are not loaded (directory listing / PROPFIND), avoiding exists() which may probe with
ConnectorValidationError: If settings are invalid methods that differ across servers.
""" """
if self.client is None: if self.client is None:
raise ConnectorMissingCredentialError( raise ConnectorMissingCredentialError("WebDAV credentials not loaded.")
"WebDAV credentials not loaded."
)
if not self.base_url: if not self.base_url:
raise ConnectorValidationError( raise ConnectorValidationError("No base URL was provided in connector settings.")
"No base URL was provided in connector settings."
) # Normalize directory path: for collections, many servers behave better with trailing '/'
test_path = self.remote_path or "/"
if not test_path.startswith("/"):
test_path = f"/{test_path}"
if test_path != "/" and not test_path.endswith("/"):
test_path = f"{test_path}/"
try: try:
if not self.client.exists(self.remote_path): # Use the same behavior as real sync: list directory with details (PROPFIND)
raise ConnectorValidationError( self.client.ls(test_path, detail=True)
f"Remote path '{self.remote_path}' does not exist on WebDAV server."
)
except Exception as e: except Exception as e:
error_message = str(e) # Prefer structured status codes if present on the exception/response
status = None
if "401" in error_message or "unauthorized" in error_message.lower(): for attr in ("status_code", "code"):
raise CredentialExpiredError( v = getattr(e, attr, None)
"WebDAV credentials appear invalid or expired." if isinstance(v, int):
) status = v
break
if "403" in error_message or "forbidden" in error_message.lower(): if status is None:
resp = getattr(e, "response", None)
v = getattr(resp, "status_code", None)
if isinstance(v, int):
status = v
# If we can classify by status code, do it
if status == 401:
raise CredentialExpiredError("WebDAV credentials appear invalid or expired.")
if status == 403:
raise InsufficientPermissionsError( raise InsufficientPermissionsError(
f"Insufficient permissions to access path '{self.remote_path}' on WebDAV server." f"Insufficient permissions to access path '{self.remote_path}' on WebDAV server."
) )
if status == 404:
if "404" in error_message or "not found" in error_message.lower():
raise ConnectorValidationError( raise ConnectorValidationError(
f"Remote path '{self.remote_path}' does not exist on WebDAV server." f"Remote path '{self.remote_path}' does not exist on WebDAV server."
) )
# Fallback: avoid brittle substring matching that caused false positives.
# Provide the original exception for diagnosis.
raise ConnectorValidationError( raise ConnectorValidationError(
f"Unexpected WebDAV client error: {e}" f"WebDAV validation failed for path '{test_path}': {repr(e)}"
) )
if __name__ == "__main__": if __name__ == "__main__":
credentials_dict = { credentials_dict = {
"username": os.environ.get("WEBDAV_USERNAME"), "username": os.environ.get("WEBDAV_USERNAME"),
"password": os.environ.get("WEBDAV_PASSWORD"), "password": os.environ.get("WEBDAV_PASSWORD"),
} }
credentials_dict = {
"username": "user",
"password": "pass",
}
connector = WebDAVConnector( connector = WebDAVConnector(
base_url=os.environ.get("WEBDAV_URL") or "https://webdav.example.com", base_url="http://172.17.0.1:8080/",
remote_path=os.environ.get("WEBDAV_PATH") or "/", remote_path="/",
) )
try: try:

View File

@ -46,7 +46,6 @@ from common.data_source import (
MoodleConnector, MoodleConnector,
JiraConnector, JiraConnector,
DropboxConnector, DropboxConnector,
WebDAVConnector,
AirtableConnector, AirtableConnector,
AsanaConnector, AsanaConnector,
ImapConnector, ImapConnector,
@ -55,6 +54,7 @@ from common.data_source import (
from common.constants import FileSource, TaskStatus from common.constants import FileSource, TaskStatus
from common.data_source.config import INDEX_BATCH_SIZE from common.data_source.config import INDEX_BATCH_SIZE
from common.data_source.models import ConnectorFailure from common.data_source.models import ConnectorFailure
from common.data_source.webdav_connector import WebDAVConnector
from common.data_source.confluence_connector import ConfluenceConnector from common.data_source.confluence_connector import ConfluenceConnector
from common.data_source.gmail_connector import GmailConnector from common.data_source.gmail_connector import GmailConnector
from common.data_source.box_connector import BoxConnector from common.data_source.box_connector import BoxConnector
@ -696,7 +696,12 @@ class WebDAV(SyncBase):
self.conf.get("remote_path", "/"), self.conf.get("remote_path", "/"),
begin_info begin_info
)) ))
return document_batch_generator
async def async_wrapper():
for document_batch in document_batch_generator:
yield document_batch
return async_wrapper()
class Moodle(SyncBase): class Moodle(SyncBase):