Merge branch 'main' into alert-autofix-59

This commit is contained in:
Kevin Hu
2025-12-22 13:33:47 +08:00
committed by GitHub
8 changed files with 63 additions and 41 deletions

View File

@ -186,7 +186,7 @@ class OnyxConfluence:
# between the db and redis everywhere the credentials might be updated
new_credential_str = json.dumps(new_credentials)
self.redis_client.set(
self.credential_key, new_credential_str, nx=True, ex=self.CREDENTIAL_TTL
self.credential_key, new_credential_str, exp=self.CREDENTIAL_TTL
)
self._credentials_provider.set_credentials(new_credentials)
@ -1599,8 +1599,8 @@ class ConfluenceConnector(
semantic_identifier=semantic_identifier,
extension=".html", # Confluence pages are HTML
blob=page_content.encode("utf-8"), # Encode page content as bytes
size_bytes=len(page_content.encode("utf-8")), # Calculate size in bytes
doc_updated_at=datetime_from_string(page["version"]["when"]),
size_bytes=len(page_content.encode("utf-8")), # Calculate size in bytes
primary_owners=primary_owners if primary_owners else None,
metadata=metadata if metadata else None,
)

View File

@ -94,6 +94,7 @@ class Document(BaseModel):
blob: bytes
doc_updated_at: datetime
size_bytes: int
primary_owners: list
metadata: Optional[dict[str, Any]] = None

View File

@ -167,7 +167,6 @@ def get_latest_message_time(thread: ThreadType) -> datetime:
def _build_doc_id(channel_id: str, thread_ts: str) -> str:
"""构建文档ID"""
return f"{channel_id}__{thread_ts}"
@ -179,7 +178,6 @@ def thread_to_doc(
user_cache: dict[str, BasicExpertInfo | None],
channel_access: Any | None,
) -> Document:
"""将线程转换为文档"""
channel_id = channel["id"]
initial_sender_expert_info = expert_info_from_slack_id(
@ -237,7 +235,6 @@ def filter_channels(
channels_to_connect: list[str] | None,
regex_enabled: bool,
) -> list[ChannelType]:
"""过滤频道"""
if not channels_to_connect:
return all_channels
@ -381,7 +378,6 @@ def _process_message(
[MessageType], SlackMessageFilterReason | None
] = default_msg_filter,
) -> ProcessedSlackMessage:
"""处理消息"""
thread_ts = message.get("thread_ts")
thread_or_message_ts = thread_ts or message["ts"]
try:
@ -536,7 +532,6 @@ class SlackConnector(
end: SecondsSinceUnixEpoch | None = None,
callback: Any = None,
) -> GenerateSlimDocumentOutput:
"""获取所有简化文档(带权限同步)"""
if self.client is None:
raise ConnectorMissingCredentialError("Slack")

View File

@ -16,7 +16,7 @@ import logging
import os
import time
from typing import Any, Dict, Optional
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
from urllib.parse import urlparse, urlunparse
from common import settings
import httpx
@ -58,21 +58,34 @@ def _get_delay(backoff_factor: float, attempt: int) -> float:
_SENSITIVE_QUERY_KEYS = {"client_secret", "secret", "code", "access_token", "refresh_token", "password", "token", "app_secret"}
def _redact_sensitive_url_params(url: str) -> str:
"""
Return a version of the URL that is safe to log.
We intentionally drop query parameters and userinfo to avoid leaking
credentials or tokens via logs. Only scheme, host, port and path
are preserved.
"""
try:
parsed = urlparse(url)
if not parsed.query:
return url
clean_query = []
for k, v in parse_qsl(parsed.query, keep_blank_values=True):
if k.lower() in _SENSITIVE_QUERY_KEYS:
clean_query.append((k, "***REDACTED***"))
else:
clean_query.append((k, v))
new_query = urlencode(clean_query, doseq=True)
redacted_url = urlunparse(parsed._replace(query=new_query))
return redacted_url
# Remove any potential userinfo (username:password@)
netloc = parsed.hostname or ""
if parsed.port:
netloc = f"{netloc}:{parsed.port}"
# Reconstruct URL without query, params, fragment, or userinfo.
safe_url = urlunparse(
(
parsed.scheme,
netloc,
parsed.path,
"", # params
"", # query
"", # fragment
)
)
return safe_url
except Exception:
return url
# If parsing fails, fall back to omitting the URL entirely.
return "<redacted-url>"
def _is_sensitive_url(url: str) -> bool:
"""Return True if URL is one of the configured OAuth endpoints."""
@ -151,9 +164,15 @@ async def async_request(
except httpx.RequestError as exc:
last_exc = exc
if attempt >= retries:
# Do not log the full URL here to avoid leaking sensitive data.
if not _is_sensitive_url(url):
log_url = _redact_sensitive_url_params(url)
logger.warning(f"async_request exhausted retries for {method}")
raise
delay = _get_delay(backoff_factor, attempt)
if not _is_sensitive_url(url):
log_url = _redact_sensitive_url_params(url)
logger.warning(
f"async_request exhausted retries for {method}; last error: {exc}"
f"async_request attempt {attempt + 1}/{retries + 1} failed for {method}; retrying in {delay:.2f}s"
)
raise
delay = _get_delay(backoff_factor, attempt)