mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-29 16:05:35 +08:00
Merge branch 'main' into alert-autofix-59
This commit is contained in:
@ -186,7 +186,7 @@ class OnyxConfluence:
|
||||
# between the db and redis everywhere the credentials might be updated
|
||||
new_credential_str = json.dumps(new_credentials)
|
||||
self.redis_client.set(
|
||||
self.credential_key, new_credential_str, nx=True, ex=self.CREDENTIAL_TTL
|
||||
self.credential_key, new_credential_str, exp=self.CREDENTIAL_TTL
|
||||
)
|
||||
self._credentials_provider.set_credentials(new_credentials)
|
||||
|
||||
@ -1599,8 +1599,8 @@ class ConfluenceConnector(
|
||||
semantic_identifier=semantic_identifier,
|
||||
extension=".html", # Confluence pages are HTML
|
||||
blob=page_content.encode("utf-8"), # Encode page content as bytes
|
||||
size_bytes=len(page_content.encode("utf-8")), # Calculate size in bytes
|
||||
doc_updated_at=datetime_from_string(page["version"]["when"]),
|
||||
size_bytes=len(page_content.encode("utf-8")), # Calculate size in bytes
|
||||
primary_owners=primary_owners if primary_owners else None,
|
||||
metadata=metadata if metadata else None,
|
||||
)
|
||||
|
||||
@ -94,6 +94,7 @@ class Document(BaseModel):
|
||||
blob: bytes
|
||||
doc_updated_at: datetime
|
||||
size_bytes: int
|
||||
primary_owners: list
|
||||
metadata: Optional[dict[str, Any]] = None
|
||||
|
||||
|
||||
|
||||
@ -167,7 +167,6 @@ def get_latest_message_time(thread: ThreadType) -> datetime:
|
||||
|
||||
|
||||
def _build_doc_id(channel_id: str, thread_ts: str) -> str:
|
||||
"""构建文档ID"""
|
||||
return f"{channel_id}__{thread_ts}"
|
||||
|
||||
|
||||
@ -179,7 +178,6 @@ def thread_to_doc(
|
||||
user_cache: dict[str, BasicExpertInfo | None],
|
||||
channel_access: Any | None,
|
||||
) -> Document:
|
||||
"""将线程转换为文档"""
|
||||
channel_id = channel["id"]
|
||||
|
||||
initial_sender_expert_info = expert_info_from_slack_id(
|
||||
@ -237,7 +235,6 @@ def filter_channels(
|
||||
channels_to_connect: list[str] | None,
|
||||
regex_enabled: bool,
|
||||
) -> list[ChannelType]:
|
||||
"""过滤频道"""
|
||||
if not channels_to_connect:
|
||||
return all_channels
|
||||
|
||||
@ -381,7 +378,6 @@ def _process_message(
|
||||
[MessageType], SlackMessageFilterReason | None
|
||||
] = default_msg_filter,
|
||||
) -> ProcessedSlackMessage:
|
||||
"""处理消息"""
|
||||
thread_ts = message.get("thread_ts")
|
||||
thread_or_message_ts = thread_ts or message["ts"]
|
||||
try:
|
||||
@ -536,7 +532,6 @@ class SlackConnector(
|
||||
end: SecondsSinceUnixEpoch | None = None,
|
||||
callback: Any = None,
|
||||
) -> GenerateSlimDocumentOutput:
|
||||
"""获取所有简化文档(带权限同步)"""
|
||||
if self.client is None:
|
||||
raise ConnectorMissingCredentialError("Slack")
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@ import logging
|
||||
import os
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
from common import settings
|
||||
import httpx
|
||||
@ -58,21 +58,34 @@ def _get_delay(backoff_factor: float, attempt: int) -> float:
|
||||
_SENSITIVE_QUERY_KEYS = {"client_secret", "secret", "code", "access_token", "refresh_token", "password", "token", "app_secret"}
|
||||
|
||||
def _redact_sensitive_url_params(url: str) -> str:
|
||||
"""
|
||||
Return a version of the URL that is safe to log.
|
||||
|
||||
We intentionally drop query parameters and userinfo to avoid leaking
|
||||
credentials or tokens via logs. Only scheme, host, port and path
|
||||
are preserved.
|
||||
"""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
if not parsed.query:
|
||||
return url
|
||||
clean_query = []
|
||||
for k, v in parse_qsl(parsed.query, keep_blank_values=True):
|
||||
if k.lower() in _SENSITIVE_QUERY_KEYS:
|
||||
clean_query.append((k, "***REDACTED***"))
|
||||
else:
|
||||
clean_query.append((k, v))
|
||||
new_query = urlencode(clean_query, doseq=True)
|
||||
redacted_url = urlunparse(parsed._replace(query=new_query))
|
||||
return redacted_url
|
||||
# Remove any potential userinfo (username:password@)
|
||||
netloc = parsed.hostname or ""
|
||||
if parsed.port:
|
||||
netloc = f"{netloc}:{parsed.port}"
|
||||
# Reconstruct URL without query, params, fragment, or userinfo.
|
||||
safe_url = urlunparse(
|
||||
(
|
||||
parsed.scheme,
|
||||
netloc,
|
||||
parsed.path,
|
||||
"", # params
|
||||
"", # query
|
||||
"", # fragment
|
||||
)
|
||||
)
|
||||
return safe_url
|
||||
except Exception:
|
||||
return url
|
||||
# If parsing fails, fall back to omitting the URL entirely.
|
||||
return "<redacted-url>"
|
||||
|
||||
def _is_sensitive_url(url: str) -> bool:
|
||||
"""Return True if URL is one of the configured OAuth endpoints."""
|
||||
@ -151,9 +164,15 @@ async def async_request(
|
||||
except httpx.RequestError as exc:
|
||||
last_exc = exc
|
||||
if attempt >= retries:
|
||||
# Do not log the full URL here to avoid leaking sensitive data.
|
||||
if not _is_sensitive_url(url):
|
||||
log_url = _redact_sensitive_url_params(url)
|
||||
logger.warning(f"async_request exhausted retries for {method}")
|
||||
raise
|
||||
delay = _get_delay(backoff_factor, attempt)
|
||||
if not _is_sensitive_url(url):
|
||||
log_url = _redact_sensitive_url_params(url)
|
||||
logger.warning(
|
||||
f"async_request exhausted retries for {method}; last error: {exc}"
|
||||
f"async_request attempt {attempt + 1}/{retries + 1} failed for {method}; retrying in {delay:.2f}s"
|
||||
)
|
||||
raise
|
||||
delay = _get_delay(backoff_factor, attempt)
|
||||
|
||||
Reference in New Issue
Block a user