diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5341d83ae..a5bdc1735 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,4 +1,6 @@ name: tests +permissions: + contents: read on: push: diff --git a/admin/client/admin_client.py b/admin/client/admin_client.py index 4b210d2b5..8cad14bab 100644 --- a/admin/client/admin_client.py +++ b/admin/client/admin_client.py @@ -351,7 +351,7 @@ class AdminCLI(Cmd): def verify_admin(self, arguments: dict, single_command: bool): self.host = arguments['host'] self.port = arguments['port'] - print(f"Attempt to access ip: {self.host}, port: {self.port}") + print("Attempt to access server for admin login") url = f"http://{self.host}:{self.port}/api/v1/admin/login" attempt_count = 3 @@ -390,7 +390,7 @@ class AdminCLI(Cmd): print(f"Bad response,status: {response.status_code}, password is wrong") except Exception as e: print(str(e)) - print(f"Can't access {self.host}, port: {self.port}") + print("Can't access server for admin login (connection failed)") def _format_service_detail_table(self, data): if isinstance(data, list): @@ -674,7 +674,7 @@ class AdminCLI(Cmd): user_name: str = user_name_tree.children[0].strip("'\"") password_tree: Tree = command['password'] password: str = password_tree.children[0].strip("'\"") - print(f"Alter user: {user_name}, password: {password}") + print(f"Alter user: {user_name}, password: ******") url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/password' response = self.session.put(url, json={'new_password': encrypt(password)}) res_json = response.json() @@ -689,7 +689,7 @@ class AdminCLI(Cmd): password_tree: Tree = command['password'] password: str = password_tree.children[0].strip("'\"") role: str = command['role'] - print(f"Create user: {user_name}, password: {password}, role: {role}") + print(f"Create user: {user_name}, password: ******, role: {role}") url = f'http://{self.host}:{self.port}/api/v1/admin/users' response = self.session.post( url, @@ -951,7 +951,7 @@ def main(): args = cli.parse_connection_args(sys.argv) if 'error' in args: - print(f"Error: {args['error']}") + print("Error: Invalid connection arguments") return if 'command' in args: @@ -960,7 +960,7 @@ def main(): return if cli.verify_admin(args, single_command=True): command: str = args['command'] - print(f"Run single command: {command}") + # print(f"Run single command: {command}") cli.run_single_command(command) else: if cli.verify_admin(args, single_command=False): diff --git a/admin/server/auth.py b/admin/server/auth.py index 6c8bc2cb8..486b9a4fb 100644 --- a/admin/server/auth.py +++ b/admin/server/auth.py @@ -176,11 +176,11 @@ def login_verify(f): "message": "Access denied", "data": None }), 200 - except Exception as e: - error_msg = str(e) + except Exception: + logging.exception("An error occurred during admin login verification.") return jsonify({ "code": 500, - "message": error_msg + "message": "An internal server error occurred." }), 200 return f(*args, **kwargs) diff --git a/api/apps/canvas_app.py b/api/apps/canvas_app.py index fe32dca0b..ed8c8c7a0 100644 --- a/api/apps/canvas_app.py +++ b/api/apps/canvas_app.py @@ -342,7 +342,15 @@ async def test_db_connect(): f"UID={req['username']};" f"PWD={req['password']};" ) - logging.info(conn_str) + redacted_conn_str = ( + f"DATABASE={req['database']};" + f"HOSTNAME={req['host']};" + f"PORT={req['port']};" + f"PROTOCOL=TCPIP;" + f"UID={req['username']};" + f"PWD=****;" + ) + logging.info(redacted_conn_str) conn = ibm_db.connect(conn_str, "", "") stmt = ibm_db.exec_immediate(conn, "SELECT 1 FROM sysibm.sysdummy1") ibm_db.fetch_assoc(stmt) diff --git a/api/db/init_data.py b/api/db/init_data.py index d4873d332..7454965eb 100644 --- a/api/db/init_data.py +++ b/api/db/init_data.py @@ -73,7 +73,7 @@ def init_superuser(nickname=DEFAULT_SUPERUSER_NICKNAME, email=DEFAULT_SUPERUSER_ UserTenantService.insert(**usr_tenant) TenantLLMService.insert_many(tenant_llm) logging.info( - f"Super user initialized. email: {email}, password: {password}. Changing the password after login is strongly recommended.") + f"Super user initialized. email: {email},A default password has been set; changing the password after login is strongly recommended.") chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"]) msg = chat_mdl.chat(system="", history=[ diff --git a/api/db/joint_services/user_account_service.py b/api/db/joint_services/user_account_service.py index 34ceee648..48937653e 100644 --- a/api/db/joint_services/user_account_service.py +++ b/api/db/joint_services/user_account_service.py @@ -273,7 +273,7 @@ def delete_user_data(user_id: str) -> dict: except Exception as e: logging.exception(e) - return {"success": False, "message": f"Error: {str(e)}. Already done:\n{done_msg}"} + return {"success": False, "message": "An internal error occurred during user deletion. Some operations may have completed.","details": done_msg} def delete_user_agents(user_id: str) -> dict: diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py index 86356a7a7..e4bf64aac 100644 --- a/api/db/services/llm_service.py +++ b/api/db/services/llm_service.py @@ -109,7 +109,7 @@ class LLMBundle(LLM4Tenant): llm_name = getattr(self, "llm_name", None) if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name): - logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens)) + logging.error("LLMBundle.encode can't update token usage for /EMBEDDING used_tokens: {}".format(used_tokens)) if self.langfuse: generation.update(usage_details={"total_tokens": used_tokens}) @@ -124,7 +124,7 @@ class LLMBundle(LLM4Tenant): emd, used_tokens = self.mdl.encode_queries(query) llm_name = getattr(self, "llm_name", None) if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name): - logging.error("LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens)) + logging.error("LLMBundle.encode_queries can't update token usage for /EMBEDDING used_tokens: {}".format(used_tokens)) if self.langfuse: generation.update(usage_details={"total_tokens": used_tokens}) diff --git a/common/data_source/confluence_connector.py b/common/data_source/confluence_connector.py index a057d0694..aff225703 100644 --- a/common/data_source/confluence_connector.py +++ b/common/data_source/confluence_connector.py @@ -1110,7 +1110,10 @@ def _make_attachment_link( ) -> str | None: download_link = "" - if "api.atlassian.com" in confluence_client.url: + from urllib.parse import urlparse + netloc =urlparse(confluence_client.url).hostname + if netloc == "api.atlassian.com" or (netloc and netloc.endswith(".api.atlassian.com")): + # if "api.atlassian.com" in confluence_client.url: # https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content---attachments/#api-wiki-rest-api-content-id-child-attachment-attachmentid-download-get if not parent_content_id: logging.warning( diff --git a/common/data_source/jira/connector.py b/common/data_source/jira/connector.py index 06a0a9069..2a93aaf51 100644 --- a/common/data_source/jira/connector.py +++ b/common/data_source/jira/connector.py @@ -135,7 +135,7 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync except ValueError as exc: raise ConnectorValidationError(str(exc)) from exc else: - logger.warning(f"[Jira] Scoped token requested but Jira base URL {self.jira_base_url} does not appear to be an Atlassian Cloud domain; scoped token ignored.") + logger.warning("[Jira] Scoped token requested but Jira base URL does not appear to be an Atlassian Cloud domain; scoped token ignored.") user_email = credentials.get("jira_user_email") or credentials.get("username") api_token = credentials.get("jira_api_token") or credentials.get("token") or credentials.get("api_token") @@ -245,7 +245,7 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync while True: attempt += 1 jql = self._build_jql(attempt_start, end) - logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer}): {jql}") + logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer})") try: return (yield from self._load_from_checkpoint_internal(jql, checkpoint, start_filter=start)) except Exception as exc: @@ -927,9 +927,6 @@ def main(config: dict[str, Any] | None = None) -> None: base_url = config.get("base_url") credentials = config.get("credentials", {}) - print(f"[Jira] {config=}", flush=True) - print(f"[Jira] {credentials=}", flush=True) - if not base_url: raise RuntimeError("Jira base URL must be provided via config or CLI arguments.") if not (credentials.get("jira_api_token") or (credentials.get("jira_user_email") and credentials.get("jira_password"))): diff --git a/common/http_client.py b/common/http_client.py index 91ac0cadc..5c57f8638 100644 --- a/common/http_client.py +++ b/common/http_client.py @@ -16,6 +16,7 @@ import logging import os import time from typing import Any, Dict, Optional +from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse import httpx @@ -52,6 +53,27 @@ def _get_delay(backoff_factor: float, attempt: int) -> float: return backoff_factor * (2**attempt) +# List of sensitive parameters to redact from URLs before logging +_SENSITIVE_QUERY_KEYS = {"client_secret", "secret", "code", "access_token", "refresh_token", "password", "token", "app_secret"} + +def _redact_sensitive_url_params(url: str) -> str: + try: + parsed = urlparse(url) + if not parsed.query: + return url + clean_query = [] + for k, v in parse_qsl(parsed.query, keep_blank_values=True): + if k.lower() in _SENSITIVE_QUERY_KEYS: + clean_query.append((k, "***REDACTED***")) + else: + clean_query.append((k, v)) + new_query = urlencode(clean_query, doseq=True) + redacted_url = urlunparse(parsed._replace(query=new_query)) + return redacted_url + except Exception: + return url + + async def async_request( method: str, url: str, @@ -94,19 +116,19 @@ async def async_request( ) duration = time.monotonic() - start logger.debug( - f"async_request {method} {url} -> {response.status_code} in {duration:.3f}s" + f"async_request {method} {_redact_sensitive_url_params(url)} -> {response.status_code} in {duration:.3f}s" ) return response except httpx.RequestError as exc: last_exc = exc if attempt >= retries: logger.warning( - f"async_request exhausted retries for {method} {url}: {exc}" + f"async_request exhausted retries for {method} {_redact_sensitive_url_params(url)}: {exc}" ) raise delay = _get_delay(backoff_factor, attempt) logger.warning( - f"async_request attempt {attempt + 1}/{retries + 1} failed for {method} {url}: {exc}; retrying in {delay:.2f}s" + f"async_request attempt {attempt + 1}/{retries + 1} failed for {method} {_redact_sensitive_url_params(url)}: {exc}; retrying in {delay:.2f}s" ) await asyncio.sleep(delay) raise last_exc # pragma: no cover diff --git a/rag/utils/opendal_conn.py b/rag/utils/opendal_conn.py index c6cebf9ca..a260daebc 100644 --- a/rag/utils/opendal_conn.py +++ b/rag/utils/opendal_conn.py @@ -41,7 +41,13 @@ def get_opendal_config(): scheme = opendal_config.get("scheme") config_data = opendal_config.get("config", {}) kwargs = {"scheme": scheme, **config_data} - logging.info("Loaded OpenDAL configuration from yaml: %s", kwargs) + redacted_kwargs = kwargs.copy() + if 'password' in redacted_kwargs: + redacted_kwargs['password'] = '***REDACTED***' + if 'connection_string' in redacted_kwargs and 'password' in redacted_kwargs: + import re + redacted_kwargs['connection_string'] = re.sub(r':[^@]+@', ':***REDACTED***@', redacted_kwargs['connection_string']) + logging.info("Loaded OpenDAL configuration from yaml: %s", redacted_kwargs) return kwargs except Exception as e: logging.error("Failed to load OpenDAL configuration from yaml: %s", str(e))