mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-18 03:26:42 +08:00
Refa:remove sensitive information (#11873)
### What problem does this PR solve? change: remove sensitive information ### Type of change - [x] Refactoring
This commit is contained in:
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
@ -1,4 +1,6 @@
|
|||||||
name: tests
|
name: tests
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
|||||||
@ -351,7 +351,7 @@ class AdminCLI(Cmd):
|
|||||||
def verify_admin(self, arguments: dict, single_command: bool):
|
def verify_admin(self, arguments: dict, single_command: bool):
|
||||||
self.host = arguments['host']
|
self.host = arguments['host']
|
||||||
self.port = arguments['port']
|
self.port = arguments['port']
|
||||||
print(f"Attempt to access ip: {self.host}, port: {self.port}")
|
print("Attempt to access server for admin login")
|
||||||
url = f"http://{self.host}:{self.port}/api/v1/admin/login"
|
url = f"http://{self.host}:{self.port}/api/v1/admin/login"
|
||||||
|
|
||||||
attempt_count = 3
|
attempt_count = 3
|
||||||
@ -390,7 +390,7 @@ class AdminCLI(Cmd):
|
|||||||
print(f"Bad response,status: {response.status_code}, password is wrong")
|
print(f"Bad response,status: {response.status_code}, password is wrong")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(str(e))
|
print(str(e))
|
||||||
print(f"Can't access {self.host}, port: {self.port}")
|
print("Can't access server for admin login (connection failed)")
|
||||||
|
|
||||||
def _format_service_detail_table(self, data):
|
def _format_service_detail_table(self, data):
|
||||||
if isinstance(data, list):
|
if isinstance(data, list):
|
||||||
@ -674,7 +674,7 @@ class AdminCLI(Cmd):
|
|||||||
user_name: str = user_name_tree.children[0].strip("'\"")
|
user_name: str = user_name_tree.children[0].strip("'\"")
|
||||||
password_tree: Tree = command['password']
|
password_tree: Tree = command['password']
|
||||||
password: str = password_tree.children[0].strip("'\"")
|
password: str = password_tree.children[0].strip("'\"")
|
||||||
print(f"Alter user: {user_name}, password: {password}")
|
print(f"Alter user: {user_name}, password: ******")
|
||||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/password'
|
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/password'
|
||||||
response = self.session.put(url, json={'new_password': encrypt(password)})
|
response = self.session.put(url, json={'new_password': encrypt(password)})
|
||||||
res_json = response.json()
|
res_json = response.json()
|
||||||
@ -689,7 +689,7 @@ class AdminCLI(Cmd):
|
|||||||
password_tree: Tree = command['password']
|
password_tree: Tree = command['password']
|
||||||
password: str = password_tree.children[0].strip("'\"")
|
password: str = password_tree.children[0].strip("'\"")
|
||||||
role: str = command['role']
|
role: str = command['role']
|
||||||
print(f"Create user: {user_name}, password: {password}, role: {role}")
|
print(f"Create user: {user_name}, password: ******, role: {role}")
|
||||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users'
|
url = f'http://{self.host}:{self.port}/api/v1/admin/users'
|
||||||
response = self.session.post(
|
response = self.session.post(
|
||||||
url,
|
url,
|
||||||
@ -951,7 +951,7 @@ def main():
|
|||||||
|
|
||||||
args = cli.parse_connection_args(sys.argv)
|
args = cli.parse_connection_args(sys.argv)
|
||||||
if 'error' in args:
|
if 'error' in args:
|
||||||
print(f"Error: {args['error']}")
|
print("Error: Invalid connection arguments")
|
||||||
return
|
return
|
||||||
|
|
||||||
if 'command' in args:
|
if 'command' in args:
|
||||||
@ -960,7 +960,7 @@ def main():
|
|||||||
return
|
return
|
||||||
if cli.verify_admin(args, single_command=True):
|
if cli.verify_admin(args, single_command=True):
|
||||||
command: str = args['command']
|
command: str = args['command']
|
||||||
print(f"Run single command: {command}")
|
# print(f"Run single command: {command}")
|
||||||
cli.run_single_command(command)
|
cli.run_single_command(command)
|
||||||
else:
|
else:
|
||||||
if cli.verify_admin(args, single_command=False):
|
if cli.verify_admin(args, single_command=False):
|
||||||
|
|||||||
@ -176,11 +176,11 @@ def login_verify(f):
|
|||||||
"message": "Access denied",
|
"message": "Access denied",
|
||||||
"data": None
|
"data": None
|
||||||
}), 200
|
}), 200
|
||||||
except Exception as e:
|
except Exception:
|
||||||
error_msg = str(e)
|
logging.exception("An error occurred during admin login verification.")
|
||||||
return jsonify({
|
return jsonify({
|
||||||
"code": 500,
|
"code": 500,
|
||||||
"message": error_msg
|
"message": "An internal server error occurred."
|
||||||
}), 200
|
}), 200
|
||||||
|
|
||||||
return f(*args, **kwargs)
|
return f(*args, **kwargs)
|
||||||
|
|||||||
@ -342,7 +342,15 @@ async def test_db_connect():
|
|||||||
f"UID={req['username']};"
|
f"UID={req['username']};"
|
||||||
f"PWD={req['password']};"
|
f"PWD={req['password']};"
|
||||||
)
|
)
|
||||||
logging.info(conn_str)
|
redacted_conn_str = (
|
||||||
|
f"DATABASE={req['database']};"
|
||||||
|
f"HOSTNAME={req['host']};"
|
||||||
|
f"PORT={req['port']};"
|
||||||
|
f"PROTOCOL=TCPIP;"
|
||||||
|
f"UID={req['username']};"
|
||||||
|
f"PWD=****;"
|
||||||
|
)
|
||||||
|
logging.info(redacted_conn_str)
|
||||||
conn = ibm_db.connect(conn_str, "", "")
|
conn = ibm_db.connect(conn_str, "", "")
|
||||||
stmt = ibm_db.exec_immediate(conn, "SELECT 1 FROM sysibm.sysdummy1")
|
stmt = ibm_db.exec_immediate(conn, "SELECT 1 FROM sysibm.sysdummy1")
|
||||||
ibm_db.fetch_assoc(stmt)
|
ibm_db.fetch_assoc(stmt)
|
||||||
|
|||||||
@ -73,7 +73,7 @@ def init_superuser(nickname=DEFAULT_SUPERUSER_NICKNAME, email=DEFAULT_SUPERUSER_
|
|||||||
UserTenantService.insert(**usr_tenant)
|
UserTenantService.insert(**usr_tenant)
|
||||||
TenantLLMService.insert_many(tenant_llm)
|
TenantLLMService.insert_many(tenant_llm)
|
||||||
logging.info(
|
logging.info(
|
||||||
f"Super user initialized. email: {email}, password: {password}. Changing the password after login is strongly recommended.")
|
f"Super user initialized. email: {email},A default password has been set; changing the password after login is strongly recommended.")
|
||||||
|
|
||||||
chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
|
chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
|
||||||
msg = chat_mdl.chat(system="", history=[
|
msg = chat_mdl.chat(system="", history=[
|
||||||
|
|||||||
@ -273,7 +273,7 @@ def delete_user_data(user_id: str) -> dict:
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.exception(e)
|
logging.exception(e)
|
||||||
return {"success": False, "message": f"Error: {str(e)}. Already done:\n{done_msg}"}
|
return {"success": False, "message": "An internal error occurred during user deletion. Some operations may have completed.","details": done_msg}
|
||||||
|
|
||||||
|
|
||||||
def delete_user_agents(user_id: str) -> dict:
|
def delete_user_agents(user_id: str) -> dict:
|
||||||
|
|||||||
@ -109,7 +109,7 @@ class LLMBundle(LLM4Tenant):
|
|||||||
|
|
||||||
llm_name = getattr(self, "llm_name", None)
|
llm_name = getattr(self, "llm_name", None)
|
||||||
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
|
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
|
||||||
logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
logging.error("LLMBundle.encode can't update token usage for <tenant redacted>/EMBEDDING used_tokens: {}".format(used_tokens))
|
||||||
|
|
||||||
if self.langfuse:
|
if self.langfuse:
|
||||||
generation.update(usage_details={"total_tokens": used_tokens})
|
generation.update(usage_details={"total_tokens": used_tokens})
|
||||||
@ -124,7 +124,7 @@ class LLMBundle(LLM4Tenant):
|
|||||||
emd, used_tokens = self.mdl.encode_queries(query)
|
emd, used_tokens = self.mdl.encode_queries(query)
|
||||||
llm_name = getattr(self, "llm_name", None)
|
llm_name = getattr(self, "llm_name", None)
|
||||||
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
|
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
|
||||||
logging.error("LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
logging.error("LLMBundle.encode_queries can't update token usage for <tenant redacted>/EMBEDDING used_tokens: {}".format(used_tokens))
|
||||||
|
|
||||||
if self.langfuse:
|
if self.langfuse:
|
||||||
generation.update(usage_details={"total_tokens": used_tokens})
|
generation.update(usage_details={"total_tokens": used_tokens})
|
||||||
|
|||||||
@ -1110,7 +1110,10 @@ def _make_attachment_link(
|
|||||||
) -> str | None:
|
) -> str | None:
|
||||||
download_link = ""
|
download_link = ""
|
||||||
|
|
||||||
if "api.atlassian.com" in confluence_client.url:
|
from urllib.parse import urlparse
|
||||||
|
netloc =urlparse(confluence_client.url).hostname
|
||||||
|
if netloc == "api.atlassian.com" or (netloc and netloc.endswith(".api.atlassian.com")):
|
||||||
|
# if "api.atlassian.com" in confluence_client.url:
|
||||||
# https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content---attachments/#api-wiki-rest-api-content-id-child-attachment-attachmentid-download-get
|
# https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content---attachments/#api-wiki-rest-api-content-id-child-attachment-attachmentid-download-get
|
||||||
if not parent_content_id:
|
if not parent_content_id:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
|
|||||||
@ -135,7 +135,7 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
|
|||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
raise ConnectorValidationError(str(exc)) from exc
|
raise ConnectorValidationError(str(exc)) from exc
|
||||||
else:
|
else:
|
||||||
logger.warning(f"[Jira] Scoped token requested but Jira base URL {self.jira_base_url} does not appear to be an Atlassian Cloud domain; scoped token ignored.")
|
logger.warning("[Jira] Scoped token requested but Jira base URL does not appear to be an Atlassian Cloud domain; scoped token ignored.")
|
||||||
|
|
||||||
user_email = credentials.get("jira_user_email") or credentials.get("username")
|
user_email = credentials.get("jira_user_email") or credentials.get("username")
|
||||||
api_token = credentials.get("jira_api_token") or credentials.get("token") or credentials.get("api_token")
|
api_token = credentials.get("jira_api_token") or credentials.get("token") or credentials.get("api_token")
|
||||||
@ -245,7 +245,7 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
|
|||||||
while True:
|
while True:
|
||||||
attempt += 1
|
attempt += 1
|
||||||
jql = self._build_jql(attempt_start, end)
|
jql = self._build_jql(attempt_start, end)
|
||||||
logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer}): {jql}")
|
logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer})")
|
||||||
try:
|
try:
|
||||||
return (yield from self._load_from_checkpoint_internal(jql, checkpoint, start_filter=start))
|
return (yield from self._load_from_checkpoint_internal(jql, checkpoint, start_filter=start))
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
@ -927,9 +927,6 @@ def main(config: dict[str, Any] | None = None) -> None:
|
|||||||
base_url = config.get("base_url")
|
base_url = config.get("base_url")
|
||||||
credentials = config.get("credentials", {})
|
credentials = config.get("credentials", {})
|
||||||
|
|
||||||
print(f"[Jira] {config=}", flush=True)
|
|
||||||
print(f"[Jira] {credentials=}", flush=True)
|
|
||||||
|
|
||||||
if not base_url:
|
if not base_url:
|
||||||
raise RuntimeError("Jira base URL must be provided via config or CLI arguments.")
|
raise RuntimeError("Jira base URL must be provided via config or CLI arguments.")
|
||||||
if not (credentials.get("jira_api_token") or (credentials.get("jira_user_email") and credentials.get("jira_password"))):
|
if not (credentials.get("jira_api_token") or (credentials.get("jira_user_email") and credentials.get("jira_password"))):
|
||||||
|
|||||||
@ -16,6 +16,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
@ -52,6 +53,27 @@ def _get_delay(backoff_factor: float, attempt: int) -> float:
|
|||||||
return backoff_factor * (2**attempt)
|
return backoff_factor * (2**attempt)
|
||||||
|
|
||||||
|
|
||||||
|
# List of sensitive parameters to redact from URLs before logging
|
||||||
|
_SENSITIVE_QUERY_KEYS = {"client_secret", "secret", "code", "access_token", "refresh_token", "password", "token", "app_secret"}
|
||||||
|
|
||||||
|
def _redact_sensitive_url_params(url: str) -> str:
|
||||||
|
try:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
if not parsed.query:
|
||||||
|
return url
|
||||||
|
clean_query = []
|
||||||
|
for k, v in parse_qsl(parsed.query, keep_blank_values=True):
|
||||||
|
if k.lower() in _SENSITIVE_QUERY_KEYS:
|
||||||
|
clean_query.append((k, "***REDACTED***"))
|
||||||
|
else:
|
||||||
|
clean_query.append((k, v))
|
||||||
|
new_query = urlencode(clean_query, doseq=True)
|
||||||
|
redacted_url = urlunparse(parsed._replace(query=new_query))
|
||||||
|
return redacted_url
|
||||||
|
except Exception:
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
async def async_request(
|
async def async_request(
|
||||||
method: str,
|
method: str,
|
||||||
url: str,
|
url: str,
|
||||||
@ -94,19 +116,19 @@ async def async_request(
|
|||||||
)
|
)
|
||||||
duration = time.monotonic() - start
|
duration = time.monotonic() - start
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"async_request {method} {url} -> {response.status_code} in {duration:.3f}s"
|
f"async_request {method} {_redact_sensitive_url_params(url)} -> {response.status_code} in {duration:.3f}s"
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
except httpx.RequestError as exc:
|
except httpx.RequestError as exc:
|
||||||
last_exc = exc
|
last_exc = exc
|
||||||
if attempt >= retries:
|
if attempt >= retries:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"async_request exhausted retries for {method} {url}: {exc}"
|
f"async_request exhausted retries for {method} {_redact_sensitive_url_params(url)}: {exc}"
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
delay = _get_delay(backoff_factor, attempt)
|
delay = _get_delay(backoff_factor, attempt)
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"async_request attempt {attempt + 1}/{retries + 1} failed for {method} {url}: {exc}; retrying in {delay:.2f}s"
|
f"async_request attempt {attempt + 1}/{retries + 1} failed for {method} {_redact_sensitive_url_params(url)}: {exc}; retrying in {delay:.2f}s"
|
||||||
)
|
)
|
||||||
await asyncio.sleep(delay)
|
await asyncio.sleep(delay)
|
||||||
raise last_exc # pragma: no cover
|
raise last_exc # pragma: no cover
|
||||||
|
|||||||
@ -41,7 +41,13 @@ def get_opendal_config():
|
|||||||
scheme = opendal_config.get("scheme")
|
scheme = opendal_config.get("scheme")
|
||||||
config_data = opendal_config.get("config", {})
|
config_data = opendal_config.get("config", {})
|
||||||
kwargs = {"scheme": scheme, **config_data}
|
kwargs = {"scheme": scheme, **config_data}
|
||||||
logging.info("Loaded OpenDAL configuration from yaml: %s", kwargs)
|
redacted_kwargs = kwargs.copy()
|
||||||
|
if 'password' in redacted_kwargs:
|
||||||
|
redacted_kwargs['password'] = '***REDACTED***'
|
||||||
|
if 'connection_string' in redacted_kwargs and 'password' in redacted_kwargs:
|
||||||
|
import re
|
||||||
|
redacted_kwargs['connection_string'] = re.sub(r':[^@]+@', ':***REDACTED***@', redacted_kwargs['connection_string'])
|
||||||
|
logging.info("Loaded OpenDAL configuration from yaml: %s", redacted_kwargs)
|
||||||
return kwargs
|
return kwargs
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to load OpenDAL configuration from yaml: %s", str(e))
|
logging.error("Failed to load OpenDAL configuration from yaml: %s", str(e))
|
||||||
|
|||||||
Reference in New Issue
Block a user