mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-17 19:22:55 +08:00
Refa:remove sensitive information (#11873)
### What problem does this PR solve? change: remove sensitive information ### Type of change - [x] Refactoring
This commit is contained in:
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
@ -1,4 +1,6 @@
|
||||
name: tests
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
on:
|
||||
push:
|
||||
|
||||
@ -351,7 +351,7 @@ class AdminCLI(Cmd):
|
||||
def verify_admin(self, arguments: dict, single_command: bool):
|
||||
self.host = arguments['host']
|
||||
self.port = arguments['port']
|
||||
print(f"Attempt to access ip: {self.host}, port: {self.port}")
|
||||
print("Attempt to access server for admin login")
|
||||
url = f"http://{self.host}:{self.port}/api/v1/admin/login"
|
||||
|
||||
attempt_count = 3
|
||||
@ -390,7 +390,7 @@ class AdminCLI(Cmd):
|
||||
print(f"Bad response,status: {response.status_code}, password is wrong")
|
||||
except Exception as e:
|
||||
print(str(e))
|
||||
print(f"Can't access {self.host}, port: {self.port}")
|
||||
print("Can't access server for admin login (connection failed)")
|
||||
|
||||
def _format_service_detail_table(self, data):
|
||||
if isinstance(data, list):
|
||||
@ -674,7 +674,7 @@ class AdminCLI(Cmd):
|
||||
user_name: str = user_name_tree.children[0].strip("'\"")
|
||||
password_tree: Tree = command['password']
|
||||
password: str = password_tree.children[0].strip("'\"")
|
||||
print(f"Alter user: {user_name}, password: {password}")
|
||||
print(f"Alter user: {user_name}, password: ******")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/password'
|
||||
response = self.session.put(url, json={'new_password': encrypt(password)})
|
||||
res_json = response.json()
|
||||
@ -689,7 +689,7 @@ class AdminCLI(Cmd):
|
||||
password_tree: Tree = command['password']
|
||||
password: str = password_tree.children[0].strip("'\"")
|
||||
role: str = command['role']
|
||||
print(f"Create user: {user_name}, password: {password}, role: {role}")
|
||||
print(f"Create user: {user_name}, password: ******, role: {role}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users'
|
||||
response = self.session.post(
|
||||
url,
|
||||
@ -951,7 +951,7 @@ def main():
|
||||
|
||||
args = cli.parse_connection_args(sys.argv)
|
||||
if 'error' in args:
|
||||
print(f"Error: {args['error']}")
|
||||
print("Error: Invalid connection arguments")
|
||||
return
|
||||
|
||||
if 'command' in args:
|
||||
@ -960,7 +960,7 @@ def main():
|
||||
return
|
||||
if cli.verify_admin(args, single_command=True):
|
||||
command: str = args['command']
|
||||
print(f"Run single command: {command}")
|
||||
# print(f"Run single command: {command}")
|
||||
cli.run_single_command(command)
|
||||
else:
|
||||
if cli.verify_admin(args, single_command=False):
|
||||
|
||||
@ -176,11 +176,11 @@ def login_verify(f):
|
||||
"message": "Access denied",
|
||||
"data": None
|
||||
}), 200
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
except Exception:
|
||||
logging.exception("An error occurred during admin login verification.")
|
||||
return jsonify({
|
||||
"code": 500,
|
||||
"message": error_msg
|
||||
"message": "An internal server error occurred."
|
||||
}), 200
|
||||
|
||||
return f(*args, **kwargs)
|
||||
|
||||
@ -342,7 +342,15 @@ async def test_db_connect():
|
||||
f"UID={req['username']};"
|
||||
f"PWD={req['password']};"
|
||||
)
|
||||
logging.info(conn_str)
|
||||
redacted_conn_str = (
|
||||
f"DATABASE={req['database']};"
|
||||
f"HOSTNAME={req['host']};"
|
||||
f"PORT={req['port']};"
|
||||
f"PROTOCOL=TCPIP;"
|
||||
f"UID={req['username']};"
|
||||
f"PWD=****;"
|
||||
)
|
||||
logging.info(redacted_conn_str)
|
||||
conn = ibm_db.connect(conn_str, "", "")
|
||||
stmt = ibm_db.exec_immediate(conn, "SELECT 1 FROM sysibm.sysdummy1")
|
||||
ibm_db.fetch_assoc(stmt)
|
||||
|
||||
@ -73,7 +73,7 @@ def init_superuser(nickname=DEFAULT_SUPERUSER_NICKNAME, email=DEFAULT_SUPERUSER_
|
||||
UserTenantService.insert(**usr_tenant)
|
||||
TenantLLMService.insert_many(tenant_llm)
|
||||
logging.info(
|
||||
f"Super user initialized. email: {email}, password: {password}. Changing the password after login is strongly recommended.")
|
||||
f"Super user initialized. email: {email},A default password has been set; changing the password after login is strongly recommended.")
|
||||
|
||||
chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
|
||||
msg = chat_mdl.chat(system="", history=[
|
||||
|
||||
@ -273,7 +273,7 @@ def delete_user_data(user_id: str) -> dict:
|
||||
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
return {"success": False, "message": f"Error: {str(e)}. Already done:\n{done_msg}"}
|
||||
return {"success": False, "message": "An internal error occurred during user deletion. Some operations may have completed.","details": done_msg}
|
||||
|
||||
|
||||
def delete_user_agents(user_id: str) -> dict:
|
||||
|
||||
@ -109,7 +109,7 @@ class LLMBundle(LLM4Tenant):
|
||||
|
||||
llm_name = getattr(self, "llm_name", None)
|
||||
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
|
||||
logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
||||
logging.error("LLMBundle.encode can't update token usage for <tenant redacted>/EMBEDDING used_tokens: {}".format(used_tokens))
|
||||
|
||||
if self.langfuse:
|
||||
generation.update(usage_details={"total_tokens": used_tokens})
|
||||
@ -124,7 +124,7 @@ class LLMBundle(LLM4Tenant):
|
||||
emd, used_tokens = self.mdl.encode_queries(query)
|
||||
llm_name = getattr(self, "llm_name", None)
|
||||
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
|
||||
logging.error("LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
||||
logging.error("LLMBundle.encode_queries can't update token usage for <tenant redacted>/EMBEDDING used_tokens: {}".format(used_tokens))
|
||||
|
||||
if self.langfuse:
|
||||
generation.update(usage_details={"total_tokens": used_tokens})
|
||||
|
||||
@ -1110,7 +1110,10 @@ def _make_attachment_link(
|
||||
) -> str | None:
|
||||
download_link = ""
|
||||
|
||||
if "api.atlassian.com" in confluence_client.url:
|
||||
from urllib.parse import urlparse
|
||||
netloc =urlparse(confluence_client.url).hostname
|
||||
if netloc == "api.atlassian.com" or (netloc and netloc.endswith(".api.atlassian.com")):
|
||||
# if "api.atlassian.com" in confluence_client.url:
|
||||
# https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content---attachments/#api-wiki-rest-api-content-id-child-attachment-attachmentid-download-get
|
||||
if not parent_content_id:
|
||||
logging.warning(
|
||||
|
||||
@ -135,7 +135,7 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
|
||||
except ValueError as exc:
|
||||
raise ConnectorValidationError(str(exc)) from exc
|
||||
else:
|
||||
logger.warning(f"[Jira] Scoped token requested but Jira base URL {self.jira_base_url} does not appear to be an Atlassian Cloud domain; scoped token ignored.")
|
||||
logger.warning("[Jira] Scoped token requested but Jira base URL does not appear to be an Atlassian Cloud domain; scoped token ignored.")
|
||||
|
||||
user_email = credentials.get("jira_user_email") or credentials.get("username")
|
||||
api_token = credentials.get("jira_api_token") or credentials.get("token") or credentials.get("api_token")
|
||||
@ -245,7 +245,7 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
|
||||
while True:
|
||||
attempt += 1
|
||||
jql = self._build_jql(attempt_start, end)
|
||||
logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer}): {jql}")
|
||||
logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer})")
|
||||
try:
|
||||
return (yield from self._load_from_checkpoint_internal(jql, checkpoint, start_filter=start))
|
||||
except Exception as exc:
|
||||
@ -927,9 +927,6 @@ def main(config: dict[str, Any] | None = None) -> None:
|
||||
base_url = config.get("base_url")
|
||||
credentials = config.get("credentials", {})
|
||||
|
||||
print(f"[Jira] {config=}", flush=True)
|
||||
print(f"[Jira] {credentials=}", flush=True)
|
||||
|
||||
if not base_url:
|
||||
raise RuntimeError("Jira base URL must be provided via config or CLI arguments.")
|
||||
if not (credentials.get("jira_api_token") or (credentials.get("jira_user_email") and credentials.get("jira_password"))):
|
||||
|
||||
@ -16,6 +16,7 @@ import logging
|
||||
import os
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
|
||||
|
||||
import httpx
|
||||
|
||||
@ -52,6 +53,27 @@ def _get_delay(backoff_factor: float, attempt: int) -> float:
|
||||
return backoff_factor * (2**attempt)
|
||||
|
||||
|
||||
# List of sensitive parameters to redact from URLs before logging
|
||||
_SENSITIVE_QUERY_KEYS = {"client_secret", "secret", "code", "access_token", "refresh_token", "password", "token", "app_secret"}
|
||||
|
||||
def _redact_sensitive_url_params(url: str) -> str:
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
if not parsed.query:
|
||||
return url
|
||||
clean_query = []
|
||||
for k, v in parse_qsl(parsed.query, keep_blank_values=True):
|
||||
if k.lower() in _SENSITIVE_QUERY_KEYS:
|
||||
clean_query.append((k, "***REDACTED***"))
|
||||
else:
|
||||
clean_query.append((k, v))
|
||||
new_query = urlencode(clean_query, doseq=True)
|
||||
redacted_url = urlunparse(parsed._replace(query=new_query))
|
||||
return redacted_url
|
||||
except Exception:
|
||||
return url
|
||||
|
||||
|
||||
async def async_request(
|
||||
method: str,
|
||||
url: str,
|
||||
@ -94,19 +116,19 @@ async def async_request(
|
||||
)
|
||||
duration = time.monotonic() - start
|
||||
logger.debug(
|
||||
f"async_request {method} {url} -> {response.status_code} in {duration:.3f}s"
|
||||
f"async_request {method} {_redact_sensitive_url_params(url)} -> {response.status_code} in {duration:.3f}s"
|
||||
)
|
||||
return response
|
||||
except httpx.RequestError as exc:
|
||||
last_exc = exc
|
||||
if attempt >= retries:
|
||||
logger.warning(
|
||||
f"async_request exhausted retries for {method} {url}: {exc}"
|
||||
f"async_request exhausted retries for {method} {_redact_sensitive_url_params(url)}: {exc}"
|
||||
)
|
||||
raise
|
||||
delay = _get_delay(backoff_factor, attempt)
|
||||
logger.warning(
|
||||
f"async_request attempt {attempt + 1}/{retries + 1} failed for {method} {url}: {exc}; retrying in {delay:.2f}s"
|
||||
f"async_request attempt {attempt + 1}/{retries + 1} failed for {method} {_redact_sensitive_url_params(url)}: {exc}; retrying in {delay:.2f}s"
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
raise last_exc # pragma: no cover
|
||||
|
||||
@ -41,7 +41,13 @@ def get_opendal_config():
|
||||
scheme = opendal_config.get("scheme")
|
||||
config_data = opendal_config.get("config", {})
|
||||
kwargs = {"scheme": scheme, **config_data}
|
||||
logging.info("Loaded OpenDAL configuration from yaml: %s", kwargs)
|
||||
redacted_kwargs = kwargs.copy()
|
||||
if 'password' in redacted_kwargs:
|
||||
redacted_kwargs['password'] = '***REDACTED***'
|
||||
if 'connection_string' in redacted_kwargs and 'password' in redacted_kwargs:
|
||||
import re
|
||||
redacted_kwargs['connection_string'] = re.sub(r':[^@]+@', ':***REDACTED***@', redacted_kwargs['connection_string'])
|
||||
logging.info("Loaded OpenDAL configuration from yaml: %s", redacted_kwargs)
|
||||
return kwargs
|
||||
except Exception as e:
|
||||
logging.error("Failed to load OpenDAL configuration from yaml: %s", str(e))
|
||||
|
||||
Reference in New Issue
Block a user