Feat: location rule for http (#10901)

### What problem does this PR solve?

Location rule for http.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Lynn
2025-11-03 11:01:24 +08:00
committed by GitHub
parent 7ec587fa9e
commit 061d8f78e5
2 changed files with 24 additions and 6 deletions

View File

@ -464,14 +464,27 @@ class Parser(ProcessBase):
if "body" in target_fields:
body_text, body_html = [], []
def _add_content(m, content_type):
def _decode_payload(payload, charset, target_list):
try:
target_list.append(payload.decode(charset))
except (UnicodeDecodeError, LookupError):
for enc in ["utf-8", "gb2312", "gbk", "gb18030", "latin1"]:
try:
target_list.append(payload.decode(enc))
break
except UnicodeDecodeError:
continue
else:
target_list.append(payload.decode("utf-8", errors="ignore"))
if content_type == "text/plain":
body_text.append(
m.get_payload(decode=True).decode(m.get_content_charset())
)
payload = msg.get_payload(decode=True)
charset = msg.get_content_charset() or "utf-8"
_decode_payload(payload, charset, body_text)
elif content_type == "text/html":
body_html.append(
m.get_payload(decode=True).decode(m.get_content_charset())
)
payload = msg.get_payload(decode=True)
charset = msg.get_content_charset() or "utf-8"
_decode_payload(payload, charset, body_html)
elif "multipart" in content_type:
if m.is_multipart():
for part in m.iter_parts():