From 061d8f78e53179181f10b482b669c2feb08a654f Mon Sep 17 00:00:00 2001 From: Lynn Date: Mon, 3 Nov 2025 11:01:24 +0800 Subject: [PATCH] Feat: location rule for http (#10901) ### What problem does this PR solve? Location rule for http. ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- docker/nginx/ragflow.conf | 5 +++++ rag/flow/parser/parser.py | 25 +++++++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/docker/nginx/ragflow.conf b/docker/nginx/ragflow.conf index 8f58c9a30..a06098276 100644 --- a/docker/nginx/ragflow.conf +++ b/docker/nginx/ragflow.conf @@ -10,6 +10,11 @@ server { gzip_vary on; gzip_disable "MSIE [1-6]\."; + location ~ ^/api/v1/admin { + proxy_pass http://localhost:9381; + include proxy.conf; + } + location ~ ^/(v1|api) { proxy_pass http://localhost:9380; include proxy.conf; diff --git a/rag/flow/parser/parser.py b/rag/flow/parser/parser.py index 6e55a629e..d67253d88 100644 --- a/rag/flow/parser/parser.py +++ b/rag/flow/parser/parser.py @@ -464,14 +464,27 @@ class Parser(ProcessBase): if "body" in target_fields: body_text, body_html = [], [] def _add_content(m, content_type): + def _decode_payload(payload, charset, target_list): + try: + target_list.append(payload.decode(charset)) + except (UnicodeDecodeError, LookupError): + for enc in ["utf-8", "gb2312", "gbk", "gb18030", "latin1"]: + try: + target_list.append(payload.decode(enc)) + break + except UnicodeDecodeError: + continue + else: + target_list.append(payload.decode("utf-8", errors="ignore")) + if content_type == "text/plain": - body_text.append( - m.get_payload(decode=True).decode(m.get_content_charset()) - ) + payload = msg.get_payload(decode=True) + charset = msg.get_content_charset() or "utf-8" + _decode_payload(payload, charset, body_text) elif content_type == "text/html": - body_html.append( - m.get_payload(decode=True).decode(m.get_content_charset()) - ) + payload = msg.get_payload(decode=True) + charset = msg.get_content_charset() or "utf-8" + _decode_payload(payload, charset, body_html) elif "multipart" in content_type: if m.is_multipart(): for part in m.iter_parts():