Refa:remove sensitive information (#11873)

### What problem does this PR solve?

change:
remove sensitive information

### Type of change

- [x] Refactoring
This commit is contained in:
buua436
2025-12-10 19:08:45 +08:00
committed by GitHub
parent ab4b62031f
commit 3cb72377d7
11 changed files with 62 additions and 24 deletions

View File

@ -1,4 +1,6 @@
name: tests name: tests
permissions:
contents: read
on: on:
push: push:

View File

@ -351,7 +351,7 @@ class AdminCLI(Cmd):
def verify_admin(self, arguments: dict, single_command: bool): def verify_admin(self, arguments: dict, single_command: bool):
self.host = arguments['host'] self.host = arguments['host']
self.port = arguments['port'] self.port = arguments['port']
print(f"Attempt to access ip: {self.host}, port: {self.port}") print("Attempt to access server for admin login")
url = f"http://{self.host}:{self.port}/api/v1/admin/login" url = f"http://{self.host}:{self.port}/api/v1/admin/login"
attempt_count = 3 attempt_count = 3
@ -390,7 +390,7 @@ class AdminCLI(Cmd):
print(f"Bad responsestatus: {response.status_code}, password is wrong") print(f"Bad responsestatus: {response.status_code}, password is wrong")
except Exception as e: except Exception as e:
print(str(e)) print(str(e))
print(f"Can't access {self.host}, port: {self.port}") print("Can't access server for admin login (connection failed)")
def _format_service_detail_table(self, data): def _format_service_detail_table(self, data):
if isinstance(data, list): if isinstance(data, list):
@ -674,7 +674,7 @@ class AdminCLI(Cmd):
user_name: str = user_name_tree.children[0].strip("'\"") user_name: str = user_name_tree.children[0].strip("'\"")
password_tree: Tree = command['password'] password_tree: Tree = command['password']
password: str = password_tree.children[0].strip("'\"") password: str = password_tree.children[0].strip("'\"")
print(f"Alter user: {user_name}, password: {password}") print(f"Alter user: {user_name}, password: ******")
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/password' url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/password'
response = self.session.put(url, json={'new_password': encrypt(password)}) response = self.session.put(url, json={'new_password': encrypt(password)})
res_json = response.json() res_json = response.json()
@ -689,7 +689,7 @@ class AdminCLI(Cmd):
password_tree: Tree = command['password'] password_tree: Tree = command['password']
password: str = password_tree.children[0].strip("'\"") password: str = password_tree.children[0].strip("'\"")
role: str = command['role'] role: str = command['role']
print(f"Create user: {user_name}, password: {password}, role: {role}") print(f"Create user: {user_name}, password: ******, role: {role}")
url = f'http://{self.host}:{self.port}/api/v1/admin/users' url = f'http://{self.host}:{self.port}/api/v1/admin/users'
response = self.session.post( response = self.session.post(
url, url,
@ -951,7 +951,7 @@ def main():
args = cli.parse_connection_args(sys.argv) args = cli.parse_connection_args(sys.argv)
if 'error' in args: if 'error' in args:
print(f"Error: {args['error']}") print("Error: Invalid connection arguments")
return return
if 'command' in args: if 'command' in args:
@ -960,7 +960,7 @@ def main():
return return
if cli.verify_admin(args, single_command=True): if cli.verify_admin(args, single_command=True):
command: str = args['command'] command: str = args['command']
print(f"Run single command: {command}") # print(f"Run single command: {command}")
cli.run_single_command(command) cli.run_single_command(command)
else: else:
if cli.verify_admin(args, single_command=False): if cli.verify_admin(args, single_command=False):

View File

@ -176,11 +176,11 @@ def login_verify(f):
"message": "Access denied", "message": "Access denied",
"data": None "data": None
}), 200 }), 200
except Exception as e: except Exception:
error_msg = str(e) logging.exception("An error occurred during admin login verification.")
return jsonify({ return jsonify({
"code": 500, "code": 500,
"message": error_msg "message": "An internal server error occurred."
}), 200 }), 200
return f(*args, **kwargs) return f(*args, **kwargs)

View File

@ -342,7 +342,15 @@ async def test_db_connect():
f"UID={req['username']};" f"UID={req['username']};"
f"PWD={req['password']};" f"PWD={req['password']};"
) )
logging.info(conn_str) redacted_conn_str = (
f"DATABASE={req['database']};"
f"HOSTNAME={req['host']};"
f"PORT={req['port']};"
f"PROTOCOL=TCPIP;"
f"UID={req['username']};"
f"PWD=****;"
)
logging.info(redacted_conn_str)
conn = ibm_db.connect(conn_str, "", "") conn = ibm_db.connect(conn_str, "", "")
stmt = ibm_db.exec_immediate(conn, "SELECT 1 FROM sysibm.sysdummy1") stmt = ibm_db.exec_immediate(conn, "SELECT 1 FROM sysibm.sysdummy1")
ibm_db.fetch_assoc(stmt) ibm_db.fetch_assoc(stmt)

View File

@ -73,7 +73,7 @@ def init_superuser(nickname=DEFAULT_SUPERUSER_NICKNAME, email=DEFAULT_SUPERUSER_
UserTenantService.insert(**usr_tenant) UserTenantService.insert(**usr_tenant)
TenantLLMService.insert_many(tenant_llm) TenantLLMService.insert_many(tenant_llm)
logging.info( logging.info(
f"Super user initialized. email: {email}, password: {password}. Changing the password after login is strongly recommended.") f"Super user initialized. email: {email},A default password has been set; changing the password after login is strongly recommended.")
chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"]) chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
msg = chat_mdl.chat(system="", history=[ msg = chat_mdl.chat(system="", history=[

View File

@ -273,7 +273,7 @@ def delete_user_data(user_id: str) -> dict:
except Exception as e: except Exception as e:
logging.exception(e) logging.exception(e)
return {"success": False, "message": f"Error: {str(e)}. Already done:\n{done_msg}"} return {"success": False, "message": "An internal error occurred during user deletion. Some operations may have completed.","details": done_msg}
def delete_user_agents(user_id: str) -> dict: def delete_user_agents(user_id: str) -> dict:

View File

@ -109,7 +109,7 @@ class LLMBundle(LLM4Tenant):
llm_name = getattr(self, "llm_name", None) llm_name = getattr(self, "llm_name", None)
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name): if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens)) logging.error("LLMBundle.encode can't update token usage for <tenant redacted>/EMBEDDING used_tokens: {}".format(used_tokens))
if self.langfuse: if self.langfuse:
generation.update(usage_details={"total_tokens": used_tokens}) generation.update(usage_details={"total_tokens": used_tokens})
@ -124,7 +124,7 @@ class LLMBundle(LLM4Tenant):
emd, used_tokens = self.mdl.encode_queries(query) emd, used_tokens = self.mdl.encode_queries(query)
llm_name = getattr(self, "llm_name", None) llm_name = getattr(self, "llm_name", None)
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name): if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
logging.error("LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens)) logging.error("LLMBundle.encode_queries can't update token usage for <tenant redacted>/EMBEDDING used_tokens: {}".format(used_tokens))
if self.langfuse: if self.langfuse:
generation.update(usage_details={"total_tokens": used_tokens}) generation.update(usage_details={"total_tokens": used_tokens})

View File

@ -1110,7 +1110,10 @@ def _make_attachment_link(
) -> str | None: ) -> str | None:
download_link = "" download_link = ""
if "api.atlassian.com" in confluence_client.url: from urllib.parse import urlparse
netloc =urlparse(confluence_client.url).hostname
if netloc == "api.atlassian.com" or (netloc and netloc.endswith(".api.atlassian.com")):
# if "api.atlassian.com" in confluence_client.url:
# https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content---attachments/#api-wiki-rest-api-content-id-child-attachment-attachmentid-download-get # https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content---attachments/#api-wiki-rest-api-content-id-child-attachment-attachmentid-download-get
if not parent_content_id: if not parent_content_id:
logging.warning( logging.warning(

View File

@ -135,7 +135,7 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
except ValueError as exc: except ValueError as exc:
raise ConnectorValidationError(str(exc)) from exc raise ConnectorValidationError(str(exc)) from exc
else: else:
logger.warning(f"[Jira] Scoped token requested but Jira base URL {self.jira_base_url} does not appear to be an Atlassian Cloud domain; scoped token ignored.") logger.warning("[Jira] Scoped token requested but Jira base URL does not appear to be an Atlassian Cloud domain; scoped token ignored.")
user_email = credentials.get("jira_user_email") or credentials.get("username") user_email = credentials.get("jira_user_email") or credentials.get("username")
api_token = credentials.get("jira_api_token") or credentials.get("token") or credentials.get("api_token") api_token = credentials.get("jira_api_token") or credentials.get("token") or credentials.get("api_token")
@ -245,7 +245,7 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
while True: while True:
attempt += 1 attempt += 1
jql = self._build_jql(attempt_start, end) jql = self._build_jql(attempt_start, end)
logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer}): {jql}") logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer})")
try: try:
return (yield from self._load_from_checkpoint_internal(jql, checkpoint, start_filter=start)) return (yield from self._load_from_checkpoint_internal(jql, checkpoint, start_filter=start))
except Exception as exc: except Exception as exc:
@ -927,9 +927,6 @@ def main(config: dict[str, Any] | None = None) -> None:
base_url = config.get("base_url") base_url = config.get("base_url")
credentials = config.get("credentials", {}) credentials = config.get("credentials", {})
print(f"[Jira] {config=}", flush=True)
print(f"[Jira] {credentials=}", flush=True)
if not base_url: if not base_url:
raise RuntimeError("Jira base URL must be provided via config or CLI arguments.") raise RuntimeError("Jira base URL must be provided via config or CLI arguments.")
if not (credentials.get("jira_api_token") or (credentials.get("jira_user_email") and credentials.get("jira_password"))): if not (credentials.get("jira_api_token") or (credentials.get("jira_user_email") and credentials.get("jira_password"))):

View File

@ -16,6 +16,7 @@ import logging
import os import os
import time import time
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
import httpx import httpx
@ -52,6 +53,27 @@ def _get_delay(backoff_factor: float, attempt: int) -> float:
return backoff_factor * (2**attempt) return backoff_factor * (2**attempt)
# List of sensitive parameters to redact from URLs before logging
_SENSITIVE_QUERY_KEYS = {"client_secret", "secret", "code", "access_token", "refresh_token", "password", "token", "app_secret"}
def _redact_sensitive_url_params(url: str) -> str:
try:
parsed = urlparse(url)
if not parsed.query:
return url
clean_query = []
for k, v in parse_qsl(parsed.query, keep_blank_values=True):
if k.lower() in _SENSITIVE_QUERY_KEYS:
clean_query.append((k, "***REDACTED***"))
else:
clean_query.append((k, v))
new_query = urlencode(clean_query, doseq=True)
redacted_url = urlunparse(parsed._replace(query=new_query))
return redacted_url
except Exception:
return url
async def async_request( async def async_request(
method: str, method: str,
url: str, url: str,
@ -94,19 +116,19 @@ async def async_request(
) )
duration = time.monotonic() - start duration = time.monotonic() - start
logger.debug( logger.debug(
f"async_request {method} {url} -> {response.status_code} in {duration:.3f}s" f"async_request {method} {_redact_sensitive_url_params(url)} -> {response.status_code} in {duration:.3f}s"
) )
return response return response
except httpx.RequestError as exc: except httpx.RequestError as exc:
last_exc = exc last_exc = exc
if attempt >= retries: if attempt >= retries:
logger.warning( logger.warning(
f"async_request exhausted retries for {method} {url}: {exc}" f"async_request exhausted retries for {method} {_redact_sensitive_url_params(url)}: {exc}"
) )
raise raise
delay = _get_delay(backoff_factor, attempt) delay = _get_delay(backoff_factor, attempt)
logger.warning( logger.warning(
f"async_request attempt {attempt + 1}/{retries + 1} failed for {method} {url}: {exc}; retrying in {delay:.2f}s" f"async_request attempt {attempt + 1}/{retries + 1} failed for {method} {_redact_sensitive_url_params(url)}: {exc}; retrying in {delay:.2f}s"
) )
await asyncio.sleep(delay) await asyncio.sleep(delay)
raise last_exc # pragma: no cover raise last_exc # pragma: no cover

View File

@ -41,7 +41,13 @@ def get_opendal_config():
scheme = opendal_config.get("scheme") scheme = opendal_config.get("scheme")
config_data = opendal_config.get("config", {}) config_data = opendal_config.get("config", {})
kwargs = {"scheme": scheme, **config_data} kwargs = {"scheme": scheme, **config_data}
logging.info("Loaded OpenDAL configuration from yaml: %s", kwargs) redacted_kwargs = kwargs.copy()
if 'password' in redacted_kwargs:
redacted_kwargs['password'] = '***REDACTED***'
if 'connection_string' in redacted_kwargs and 'password' in redacted_kwargs:
import re
redacted_kwargs['connection_string'] = re.sub(r':[^@]+@', ':***REDACTED***@', redacted_kwargs['connection_string'])
logging.info("Loaded OpenDAL configuration from yaml: %s", redacted_kwargs)
return kwargs return kwargs
except Exception as e: except Exception as e:
logging.error("Failed to load OpenDAL configuration from yaml: %s", str(e)) logging.error("Failed to load OpenDAL configuration from yaml: %s", str(e))