mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat: parse email (#10181)
### What problem does this PR solve? - Dataflow support email. - Fix old email parser. - Add new depends to parse msg file. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [x] Other (please describe): add new depends.
This commit is contained in:
@ -496,7 +496,7 @@ class FileService(CommonService):
|
|||||||
return ParserType.AUDIO.value
|
return ParserType.AUDIO.value
|
||||||
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
||||||
return ParserType.PRESENTATION.value
|
return ParserType.PRESENTATION.value
|
||||||
if re.search(r"\.(eml)$", filename):
|
if re.search(r"\.(msg|eml)$", filename):
|
||||||
return ParserType.EMAIL.value
|
return ParserType.EMAIL.value
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|||||||
@ -155,7 +155,7 @@ def filename_type(filename):
|
|||||||
if re.match(r".*\.pdf$", filename):
|
if re.match(r".*\.pdf$", filename):
|
||||||
return FileType.PDF.value
|
return FileType.PDF.value
|
||||||
|
|
||||||
if re.match(r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
|
if re.match(r".*\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
|
||||||
return FileType.DOC.value
|
return FileType.DOC.value
|
||||||
|
|
||||||
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):
|
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):
|
||||||
|
|||||||
@ -34,6 +34,7 @@ dependencies = [
|
|||||||
"elastic-transport==8.12.0",
|
"elastic-transport==8.12.0",
|
||||||
"elasticsearch==8.12.1",
|
"elasticsearch==8.12.1",
|
||||||
"elasticsearch-dsl==8.12.0",
|
"elasticsearch-dsl==8.12.0",
|
||||||
|
"extract-msg>=0.39.0",
|
||||||
"filelock==3.15.4",
|
"filelock==3.15.4",
|
||||||
"flask==3.0.3",
|
"flask==3.0.3",
|
||||||
"flask-cors==5.0.0",
|
"flask-cors==5.0.0",
|
||||||
|
|||||||
@ -78,7 +78,7 @@ def chunk(
|
|||||||
_add_content(msg, msg.get_content_type())
|
_add_content(msg, msg.get_content_type())
|
||||||
|
|
||||||
sections = TxtParser.parser_txt("\n".join(text_txt)) + [
|
sections = TxtParser.parser_txt("\n".join(text_txt)) + [
|
||||||
(line, "") for line in HtmlParser.parser_txt("\n".join(html_txt)) if line
|
(line, "") for line in HtmlParser.parser_txt("\n".join(html_txt), chunk_token_num=parser_config["chunk_token_num"]) if line
|
||||||
]
|
]
|
||||||
|
|
||||||
st = timer()
|
st = timer()
|
||||||
|
|||||||
@ -13,7 +13,9 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
import io
|
import io
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import random
|
import random
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
@ -57,7 +59,10 @@ class ParserParam(ProcessParamBase):
|
|||||||
"image": [
|
"image": [
|
||||||
"text"
|
"text"
|
||||||
],
|
],
|
||||||
"email": [],
|
"email": [
|
||||||
|
"text",
|
||||||
|
"json"
|
||||||
|
],
|
||||||
"text": [
|
"text": [
|
||||||
"text",
|
"text",
|
||||||
"json"
|
"json"
|
||||||
@ -112,7 +117,11 @@ class ParserParam(ProcessParamBase):
|
|||||||
"output_format": "json",
|
"output_format": "json",
|
||||||
},
|
},
|
||||||
"email": {
|
"email": {
|
||||||
"fields": []
|
"suffix": [
|
||||||
|
"eml", "msg"
|
||||||
|
],
|
||||||
|
"fields": ["from", "to", "cc", "bcc", "date", "subject", "body", "attachments", "metadata"],
|
||||||
|
"output_format": "json",
|
||||||
},
|
},
|
||||||
"text": {
|
"text": {
|
||||||
"suffix": [
|
"suffix": [
|
||||||
@ -194,6 +203,11 @@ class ParserParam(ProcessParamBase):
|
|||||||
audio_language = audio_config.get("lang", "")
|
audio_language = audio_config.get("lang", "")
|
||||||
self.check_empty(audio_language, "Language")
|
self.check_empty(audio_language, "Language")
|
||||||
|
|
||||||
|
email_config = self.setups.get("email", "")
|
||||||
|
if email_config:
|
||||||
|
email_output_format = email_config.get("output_format", "")
|
||||||
|
self.check_valid_value(email_output_format, "Email output format abnormal.", self.allowed_output_format["email"])
|
||||||
|
|
||||||
def get_input_form(self) -> dict[str, dict]:
|
def get_input_form(self) -> dict[str, dict]:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@ -384,6 +398,124 @@ class Parser(ProcessBase):
|
|||||||
|
|
||||||
self.set_output("text", txt)
|
self.set_output("text", txt)
|
||||||
|
|
||||||
|
def _email(self, from_upstream: ParserFromUpstream):
|
||||||
|
self.callback(random.randint(1, 5) / 100.0, "Start to work on an email.")
|
||||||
|
|
||||||
|
blob = from_upstream.blob
|
||||||
|
name = from_upstream.name
|
||||||
|
|
||||||
|
email_content = {}
|
||||||
|
conf = self._param.setups["email"]
|
||||||
|
target_fields = conf["fields"]
|
||||||
|
|
||||||
|
_, ext = os.path.splitext(name)
|
||||||
|
if ext == ".eml":
|
||||||
|
# handle eml file
|
||||||
|
from email import policy
|
||||||
|
from email.parser import BytesParser
|
||||||
|
|
||||||
|
msg = BytesParser(policy=policy.default).parse(io.BytesIO(blob))
|
||||||
|
email_content['metadata'] = {}
|
||||||
|
# handle header info
|
||||||
|
for header, value in msg.items():
|
||||||
|
# get fields like from, to, cc, bcc, date, subject
|
||||||
|
if header.lower() in target_fields:
|
||||||
|
email_content[header.lower()] = value
|
||||||
|
# get metadata
|
||||||
|
elif header.lower() not in ["from", "to", "cc", "bcc", "date", "subject"]:
|
||||||
|
email_content["metadata"][header.lower()] = value
|
||||||
|
# get body
|
||||||
|
if "body" in target_fields:
|
||||||
|
body_text, body_html = [], []
|
||||||
|
def _add_content(m, content_type):
|
||||||
|
if content_type == "text/plain":
|
||||||
|
body_text.append(
|
||||||
|
m.get_payload(decode=True).decode(m.get_content_charset())
|
||||||
|
)
|
||||||
|
elif content_type == "text/html":
|
||||||
|
body_html.append(
|
||||||
|
m.get_payload(decode=True).decode(m.get_content_charset())
|
||||||
|
)
|
||||||
|
elif "multipart" in content_type:
|
||||||
|
if m.is_multipart():
|
||||||
|
for part in m.iter_parts():
|
||||||
|
_add_content(part, part.get_content_type())
|
||||||
|
|
||||||
|
_add_content(msg, msg.get_content_type())
|
||||||
|
|
||||||
|
email_content["text"] = body_text
|
||||||
|
email_content["text_html"] = body_html
|
||||||
|
# get attachment
|
||||||
|
if "attachments" in target_fields:
|
||||||
|
attachments = []
|
||||||
|
for part in msg.iter_attachments():
|
||||||
|
content_disposition = part.get("Content-Disposition")
|
||||||
|
if content_disposition:
|
||||||
|
dispositions = content_disposition.strip().split(";")
|
||||||
|
if dispositions[0].lower() == "attachment":
|
||||||
|
filename = part.get_filename()
|
||||||
|
payload = part.get_payload(decode=True)
|
||||||
|
attachments.append({
|
||||||
|
"filename": filename,
|
||||||
|
"payload": payload,
|
||||||
|
})
|
||||||
|
email_content["attachments"] = attachments
|
||||||
|
else:
|
||||||
|
# handle msg file
|
||||||
|
import extract_msg
|
||||||
|
print("handle a msg file.")
|
||||||
|
msg = extract_msg.Message(blob)
|
||||||
|
# handle header info
|
||||||
|
basic_content = {
|
||||||
|
"from": msg.sender,
|
||||||
|
"to": msg.to,
|
||||||
|
"cc": msg.cc,
|
||||||
|
"bcc": msg.bcc,
|
||||||
|
"date": msg.date,
|
||||||
|
"subject": msg.subject,
|
||||||
|
}
|
||||||
|
email_content.update({k: v for k, v in basic_content.items() if k in target_fields})
|
||||||
|
# get metadata
|
||||||
|
email_content['metadata'] = {
|
||||||
|
'message_id': msg.messageId,
|
||||||
|
'in_reply_to': msg.inReplyTo,
|
||||||
|
}
|
||||||
|
# get body
|
||||||
|
if "body" in target_fields:
|
||||||
|
email_content["text"] = msg.body # usually empty. try text_html instead
|
||||||
|
email_content["text_html"] = msg.htmlBody
|
||||||
|
# get attachments
|
||||||
|
if "attachments" in target_fields:
|
||||||
|
attachments = []
|
||||||
|
for t in msg.attachments:
|
||||||
|
attachments.append({
|
||||||
|
"filename": t.name,
|
||||||
|
"payload": t.data # binary
|
||||||
|
})
|
||||||
|
email_content["attachments"] = attachments
|
||||||
|
|
||||||
|
if conf["output_format"] == "json":
|
||||||
|
self.set_output("json", [email_content])
|
||||||
|
else:
|
||||||
|
content_txt = ''
|
||||||
|
for k, v in email_content.items():
|
||||||
|
if isinstance(v, str):
|
||||||
|
# basic info
|
||||||
|
content_txt += f'{k}:{v}' + "\n"
|
||||||
|
elif isinstance(v, dict):
|
||||||
|
# metadata
|
||||||
|
content_txt += f'{k}:{json.dumps(v)}' + "\n"
|
||||||
|
elif isinstance(v, list):
|
||||||
|
# attachments or others
|
||||||
|
for fb in v:
|
||||||
|
if isinstance(fb, dict):
|
||||||
|
# attachments
|
||||||
|
content_txt += f'{fb["filename"]}:{fb["payload"]}' + "\n"
|
||||||
|
else:
|
||||||
|
# str, usually plain text
|
||||||
|
content_txt += fb
|
||||||
|
self.set_output("text", content_txt)
|
||||||
|
|
||||||
async def _invoke(self, **kwargs):
|
async def _invoke(self, **kwargs):
|
||||||
function_map = {
|
function_map = {
|
||||||
"pdf": self._pdf,
|
"pdf": self._pdf,
|
||||||
@ -394,6 +526,7 @@ class Parser(ProcessBase):
|
|||||||
"text": self._text,
|
"text": self._text,
|
||||||
"image": self._image,
|
"image": self._image,
|
||||||
"audio": self._audio,
|
"audio": self._audio,
|
||||||
|
"email": self._email,
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
from_upstream = ParserFromUpstream.model_validate(kwargs)
|
from_upstream = ParserFromUpstream.model_validate(kwargs)
|
||||||
|
|||||||
@ -89,6 +89,22 @@
|
|||||||
"lang": "Chinese",
|
"lang": "Chinese",
|
||||||
"llm_id": "SenseVoiceSmall",
|
"llm_id": "SenseVoiceSmall",
|
||||||
"output_format": "json"
|
"output_format": "json"
|
||||||
|
},
|
||||||
|
"email": {
|
||||||
|
"suffix": [
|
||||||
|
"msg"
|
||||||
|
],
|
||||||
|
"fields": [
|
||||||
|
"from",
|
||||||
|
"to",
|
||||||
|
"cc",
|
||||||
|
"bcc",
|
||||||
|
"date",
|
||||||
|
"subject",
|
||||||
|
"body",
|
||||||
|
"attachments"
|
||||||
|
],
|
||||||
|
"output_format": "json"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
155
uv.lock
generated
155
uv.lock
generated
@ -1,5 +1,5 @@
|
|||||||
version = 1
|
version = 1
|
||||||
revision = 1
|
revision = 3
|
||||||
requires-python = ">=3.10, <3.13"
|
requires-python = ">=3.10, <3.13"
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"python_full_version >= '3.12' and sys_platform == 'darwin'",
|
"python_full_version >= '3.12' and sys_platform == 'darwin'",
|
||||||
@ -861,6 +861,15 @@ wheels = [
|
|||||||
{ url = "https://mirrors.aliyun.com/pypi/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" },
|
{ url = "https://mirrors.aliyun.com/pypi/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colorclass"
|
||||||
|
version = "2.2.2"
|
||||||
|
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||||
|
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/d7/1a/31ff00a33569a3b59d65bbdc445c73e12f92ad28195b7ace299f68b9af70/colorclass-2.2.2.tar.gz", hash = "sha256:6d4fe287766166a98ca7bc6f6312daf04a0481b1eda43e7173484051c0ab4366" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://mirrors.aliyun.com/pypi/packages/30/b6/daf3e2976932da4ed3579cff7a30a53d22ea9323ee4f0d8e43be60454897/colorclass-2.2.2-py2.py3-none-any.whl", hash = "sha256:6f10c273a0ef7a1150b1120b6095cbdd68e5cf36dfd5d0fc957a2500bbf99a55" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "coloredlogs"
|
name = "coloredlogs"
|
||||||
version = "15.0.1"
|
version = "15.0.1"
|
||||||
@ -873,6 +882,15 @@ wheels = [
|
|||||||
{ url = "https://mirrors.aliyun.com/pypi/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934" },
|
{ url = "https://mirrors.aliyun.com/pypi/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "compressed-rtf"
|
||||||
|
version = "1.0.7"
|
||||||
|
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||||
|
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/b7/0c/929a4e8ef9d7143f54d77dadb5f370cc7b98534b1bd6e1124d0abe8efb24/compressed_rtf-1.0.7.tar.gz", hash = "sha256:7c30859334839f3cdc7d10796af5b434bb326b9df7cb5a65e95a8eacb2951b0e" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://mirrors.aliyun.com/pypi/packages/07/1d/62f5bf92e12335eb63517f42671ed78512d48bbc69e02a942dd7b90f03f0/compressed_rtf-1.0.7-py3-none-any.whl", hash = "sha256:b7904921d78c67a0a4b7fff9fb361a00ae2b447b6edca010ce321cd98fa0fcc0" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "contourpy"
|
name = "contourpy"
|
||||||
version = "1.3.2"
|
version = "1.3.2"
|
||||||
@ -1322,6 +1340,23 @@ wheels = [
|
|||||||
{ url = "https://mirrors.aliyun.com/pypi/packages/fc/da/8376678b4a9ae0f9418d93df9c9cf851dced49c95ceb38daac6651e38f7a/duckduckgo_search-7.5.5-py3-none-any.whl", hash = "sha256:c71a0661aa436f215d9a05d653af424affb58825ab3e79f3b788053cbdee9ebc" },
|
{ url = "https://mirrors.aliyun.com/pypi/packages/fc/da/8376678b4a9ae0f9418d93df9c9cf851dced49c95ceb38daac6651e38f7a/duckduckgo_search-7.5.5-py3-none-any.whl", hash = "sha256:c71a0661aa436f215d9a05d653af424affb58825ab3e79f3b788053cbdee9ebc" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "easygui"
|
||||||
|
version = "0.98.3"
|
||||||
|
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||||
|
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/cc/ad/e35f7a30272d322be09dc98592d2f55d27cc933a7fde8baccbbeb2bd9409/easygui-0.98.3.tar.gz", hash = "sha256:d653ff79ee1f42f63b5a090f2f98ce02335d86ad8963b3ce2661805cafe99a04" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://mirrors.aliyun.com/pypi/packages/8e/a7/b276ff776533b423710a285c8168b52551cb2ab0855443131fdc7fd8c16f/easygui-0.98.3-py2.py3-none-any.whl", hash = "sha256:33498710c68b5376b459cd3fc48d1d1f33822139eb3ed01defbc0528326da3ba" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ebcdic"
|
||||||
|
version = "1.1.1"
|
||||||
|
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://mirrors.aliyun.com/pypi/packages/0d/2f/633031205333bee5f9f93761af8268746aa75f38754823aabb8570eb245b/ebcdic-1.1.1-py2.py3-none-any.whl", hash = "sha256:33b4cb729bc2d0bf46cc1847b0e5946897cb8d3f53520c5b9aa5fa98d7e735f1" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "editdistance"
|
name = "editdistance"
|
||||||
version = "0.8.1"
|
version = "0.8.1"
|
||||||
@ -1435,6 +1470,24 @@ wheels = [
|
|||||||
{ url = "https://mirrors.aliyun.com/pypi/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10" },
|
{ url = "https://mirrors.aliyun.com/pypi/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "extract-msg"
|
||||||
|
version = "0.55.0"
|
||||||
|
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "beautifulsoup4" },
|
||||||
|
{ name = "compressed-rtf" },
|
||||||
|
{ name = "ebcdic" },
|
||||||
|
{ name = "olefile" },
|
||||||
|
{ name = "red-black-tree-mod" },
|
||||||
|
{ name = "rtfde" },
|
||||||
|
{ name = "tzlocal" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/5e/65/c70afb3b119a44b3ee36b029485dc15326cf3a7c50da19a1ecbbf949c5d1/extract_msg-0.55.0.tar.gz", hash = "sha256:cf08283498c3dfcc7f894dad1579f52e3ced9fb76b865c2355cbe757af8a54e1" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://mirrors.aliyun.com/pypi/packages/53/81/87d5241036046ea17c5c8db228f4c9e04e07e53b627015d4496a99449aaf/extract_msg-0.55.0-py3-none-any.whl", hash = "sha256:baf0cdee9a8d267b70c366bc57ceb03dbfa1e7ab2dca6824169a7fe623f0917c" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fake-http-header"
|
name = "fake-http-header"
|
||||||
version = "0.3.5"
|
version = "0.3.5"
|
||||||
@ -2893,6 +2946,15 @@ wheels = [
|
|||||||
{ url = "https://mirrors.aliyun.com/pypi/packages/92/b0/8f08df3f0fa584c4132937690c6dd33e0a116f963ecf2b35567f614e0ca7/langfuse-3.2.1-py3-none-any.whl", hash = "sha256:07a84e8c1eed6ac8e149bdda1431fd866e4aee741b66124316336fb2bc7e6a32" },
|
{ url = "https://mirrors.aliyun.com/pypi/packages/92/b0/8f08df3f0fa584c4132937690c6dd33e0a116f963ecf2b35567f614e0ca7/langfuse-3.2.1-py3-none-any.whl", hash = "sha256:07a84e8c1eed6ac8e149bdda1431fd866e4aee741b66124316336fb2bc7e6a32" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lark"
|
||||||
|
version = "1.1.9"
|
||||||
|
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||||
|
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/2c/e1/804b6196b3fbdd0f8ba785fc62837b034782a891d6f663eea2f30ca23cfa/lark-1.1.9.tar.gz", hash = "sha256:15fa5236490824c2c4aba0e22d2d6d823575dcaf4cdd1848e34b6ad836240fba" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://mirrors.aliyun.com/pypi/packages/e7/9c/eef7c591e6dc952f3636cfe0df712c0f9916cedf317810a3bb53ccb65cdd/lark-1.1.9-py3-none-any.whl", hash = "sha256:a0dd3a87289f8ccbb325901e4222e723e7d745dbfc1803eaf5f3d2ace19cf2db" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.75.5.post1"
|
version = "1.75.5.post1"
|
||||||
@ -3377,6 +3439,19 @@ wheels = [
|
|||||||
{ url = "https://mirrors.aliyun.com/pypi/packages/b1/ef/27dd35a7049c9a4f4211c6cd6a8c9db0a50647546f003a5867827ec45391/msgspec-0.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:067f0de1c33cfa0b6a8206562efdf6be5985b988b53dd244a8e06f993f27c8c0" },
|
{ url = "https://mirrors.aliyun.com/pypi/packages/b1/ef/27dd35a7049c9a4f4211c6cd6a8c9db0a50647546f003a5867827ec45391/msgspec-0.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:067f0de1c33cfa0b6a8206562efdf6be5985b988b53dd244a8e06f993f27c8c0" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "msoffcrypto-tool"
|
||||||
|
version = "5.4.2"
|
||||||
|
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "cryptography" },
|
||||||
|
{ name = "olefile" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/d2/b7/0fd6573157e0ec60c0c470e732ab3322fba4d2834fd24e1088d670522a01/msoffcrypto_tool-5.4.2.tar.gz", hash = "sha256:44b545adba0407564a0cc3d6dde6ca36b7c0fdf352b85bca51618fa1d4817370" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://mirrors.aliyun.com/pypi/packages/03/54/7f6d3d9acad083dae8c22d9ab483b657359a1bf56fee1d7af88794677707/msoffcrypto_tool-5.4.2-py3-none-any.whl", hash = "sha256:274fe2181702d1e5a107ec1b68a4c9fea997a44972ae1cc9ae0cb4f6a50fef0e" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "multidict"
|
name = "multidict"
|
||||||
version = "6.6.3"
|
version = "6.6.3"
|
||||||
@ -3726,6 +3801,32 @@ wheels = [
|
|||||||
{ url = "https://mirrors.aliyun.com/pypi/packages/9e/4e/0d0c945463719429b7bd21dece907ad0bde437a2ff12b9b12fee94722ab0/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1" },
|
{ url = "https://mirrors.aliyun.com/pypi/packages/9e/4e/0d0c945463719429b7bd21dece907ad0bde437a2ff12b9b12fee94722ab0/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "olefile"
|
||||||
|
version = "0.47"
|
||||||
|
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||||
|
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/69/1b/077b508e3e500e1629d366249c3ccb32f95e50258b231705c09e3c7a4366/olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://mirrors.aliyun.com/pypi/packages/17/d3/b64c356a907242d719fc668b71befd73324e47ab46c8ebbbede252c154b2/olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "oletools"
|
||||||
|
version = "0.60.2"
|
||||||
|
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "colorclass" },
|
||||||
|
{ name = "easygui" },
|
||||||
|
{ name = "msoffcrypto-tool", marker = "(platform_python_implementation != 'PyPy' and sys_platform == 'darwin') or (platform_python_implementation != 'PyPy' and sys_platform == 'win32') or (sys_platform != 'darwin' and sys_platform != 'win32')" },
|
||||||
|
{ name = "olefile" },
|
||||||
|
{ name = "pcodedmp" },
|
||||||
|
{ name = "pyparsing" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/5c/2f/037f40e44706d542b94a2312ccc33ee2701ebfc9a83b46b55263d49ce55a/oletools-0.60.2.zip", hash = "sha256:ad452099f4695ffd8855113f453348200d195ee9fa341a09e197d66ee7e0b2c3" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://mirrors.aliyun.com/pypi/packages/ac/ff/05257b7183279b80ecec6333744de23f48f0faeeba46c93e6d13ce835515/oletools-0.60.2-py2.py3-none-any.whl", hash = "sha256:72ad8bd748fd0c4e7b5b4733af770d11543ebb2bf2697455f99f975fcd50cc96" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ollama"
|
name = "ollama"
|
||||||
version = "0.2.1"
|
version = "0.2.1"
|
||||||
@ -4188,6 +4289,19 @@ wheels = [
|
|||||||
{ url = "https://mirrors.aliyun.com/pypi/packages/87/2b/b50d3d08ea0fc419c183a84210571eba005328efa62b6b98bc28e9ead32a/patsy-1.0.1-py2.py3-none-any.whl", hash = "sha256:751fb38f9e97e62312e921a1954b81e1bb2bcda4f5eeabaf94db251ee791509c" },
|
{ url = "https://mirrors.aliyun.com/pypi/packages/87/2b/b50d3d08ea0fc419c183a84210571eba005328efa62b6b98bc28e9ead32a/patsy-1.0.1-py2.py3-none-any.whl", hash = "sha256:751fb38f9e97e62312e921a1954b81e1bb2bcda4f5eeabaf94db251ee791509c" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pcodedmp"
|
||||||
|
version = "1.2.6"
|
||||||
|
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "oletools" },
|
||||||
|
{ name = "win-unicode-console", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'win32'" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/3d/20/6d461e29135f474408d0d7f95b2456a9ba245560768ee51b788af10f7429/pcodedmp-1.2.6.tar.gz", hash = "sha256:025f8c809a126f45a082ffa820893e6a8d990d9d7ddb68694b5a9f0a6dbcd955" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://mirrors.aliyun.com/pypi/packages/ba/72/b380fb5c89d89c3afafac8cf02a71a45f4f4a4f35531ca949a34683962d1/pcodedmp-1.2.6-py2.py3-none-any.whl", hash = "sha256:4441f7c0ab4cbda27bd4668db3b14f36261d86e5059ce06c0828602cbe1c4278" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pdfminer-six"
|
name = "pdfminer-six"
|
||||||
version = "20221105"
|
version = "20221105"
|
||||||
@ -5300,6 +5414,7 @@ dependencies = [
|
|||||||
{ name = "elastic-transport" },
|
{ name = "elastic-transport" },
|
||||||
{ name = "elasticsearch" },
|
{ name = "elasticsearch" },
|
||||||
{ name = "elasticsearch-dsl" },
|
{ name = "elasticsearch-dsl" },
|
||||||
|
{ name = "extract-msg" },
|
||||||
{ name = "filelock" },
|
{ name = "filelock" },
|
||||||
{ name = "flasgger" },
|
{ name = "flasgger" },
|
||||||
{ name = "flask" },
|
{ name = "flask" },
|
||||||
@ -5452,6 +5567,7 @@ requires-dist = [
|
|||||||
{ name = "elastic-transport", specifier = "==8.12.0" },
|
{ name = "elastic-transport", specifier = "==8.12.0" },
|
||||||
{ name = "elasticsearch", specifier = "==8.12.1" },
|
{ name = "elasticsearch", specifier = "==8.12.1" },
|
||||||
{ name = "elasticsearch-dsl", specifier = "==8.12.0" },
|
{ name = "elasticsearch-dsl", specifier = "==8.12.0" },
|
||||||
|
{ name = "extract-msg", specifier = ">=0.39.0" },
|
||||||
{ name = "fastembed", marker = "(platform_machine != 'x86_64' and extra == 'full') or (sys_platform == 'darwin' and extra == 'full')", specifier = ">=0.3.6,<0.4.0" },
|
{ name = "fastembed", marker = "(platform_machine != 'x86_64' and extra == 'full') or (sys_platform == 'darwin' and extra == 'full')", specifier = ">=0.3.6,<0.4.0" },
|
||||||
{ name = "fastembed-gpu", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin' and extra == 'full'", specifier = ">=0.3.6,<0.4.0" },
|
{ name = "fastembed-gpu", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin' and extra == 'full'", specifier = ">=0.3.6,<0.4.0" },
|
||||||
{ name = "filelock", specifier = "==3.15.4" },
|
{ name = "filelock", specifier = "==3.15.4" },
|
||||||
@ -5630,6 +5746,12 @@ wheels = [
|
|||||||
{ url = "https://mirrors.aliyun.com/pypi/packages/c2/5a/2f2e7fc026d5e64b5408aa3fbe0296a6407b8481196cae4daacacb3a3ae0/readerwriterlock-1.0.9-py3-none-any.whl", hash = "sha256:8c4b704e60d15991462081a27ef46762fea49b478aa4426644f2146754759ca7" },
|
{ url = "https://mirrors.aliyun.com/pypi/packages/c2/5a/2f2e7fc026d5e64b5408aa3fbe0296a6407b8481196cae4daacacb3a3ae0/readerwriterlock-1.0.9-py3-none-any.whl", hash = "sha256:8c4b704e60d15991462081a27ef46762fea49b478aa4426644f2146754759ca7" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "red-black-tree-mod"
|
||||||
|
version = "1.22"
|
||||||
|
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||||
|
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/48/75/bfa342a2ebfc9623b701f1c6995b9906fd6dd2cedf6bce777d09e23303ac/red-black-tree-mod-1.22.tar.gz", hash = "sha256:38e3652903a2bf96379c27c2082ca0b7b905158662dd7ef0c97f4fd93a9aa908" }
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "referencing"
|
name = "referencing"
|
||||||
version = "0.36.2"
|
version = "0.36.2"
|
||||||
@ -5883,6 +6005,19 @@ wheels = [
|
|||||||
{ url = "https://mirrors.aliyun.com/pypi/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762" },
|
{ url = "https://mirrors.aliyun.com/pypi/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rtfde"
|
||||||
|
version = "0.1.2.1"
|
||||||
|
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "lark" },
|
||||||
|
{ name = "oletools" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/66/f1/3fafc33cd80cc605509ced36dbbb74c3c365d5859b0b57b6500e4a8ca8a5/rtfde-0.1.2.1.tar.gz", hash = "sha256:ea2653fb163ef1e9fdd1b0849bef88b0ba82537f860d4aca5b2c49f556efaaaa" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://mirrors.aliyun.com/pypi/packages/b6/dd/641e9cf68d4242aaf7ce9653498009d8925080b6664993988bd50468932a/rtfde-0.1.2.1-py3-none-any.whl", hash = "sha256:c44dfa923a435c54cdbdd0e0f5352a4075542af317af061f82f2d4f032271645" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ruamel-base"
|
name = "ruamel-base"
|
||||||
version = "1.0.0"
|
version = "1.0.0"
|
||||||
@ -6890,6 +7025,18 @@ wheels = [
|
|||||||
{ url = "https://mirrors.aliyun.com/pypi/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8" },
|
{ url = "https://mirrors.aliyun.com/pypi/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tzlocal"
|
||||||
|
version = "5.3.1"
|
||||||
|
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "tzdata", marker = "sys_platform == 'win32'" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://mirrors.aliyun.com/pypi/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "umap-learn"
|
name = "umap-learn"
|
||||||
version = "0.5.6"
|
version = "0.5.6"
|
||||||
@ -7134,6 +7281,12 @@ dependencies = [
|
|||||||
]
|
]
|
||||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/67/35/25e68fbc99e672127cc6fbb14b8ec1ba3dfef035bf1e4c90f78f24a80b7d/wikipedia-1.4.0.tar.gz", hash = "sha256:db0fad1829fdd441b1852306e9856398204dc0786d2996dd2e0c8bb8e26133b2" }
|
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/67/35/25e68fbc99e672127cc6fbb14b8ec1ba3dfef035bf1e4c90f78f24a80b7d/wikipedia-1.4.0.tar.gz", hash = "sha256:db0fad1829fdd441b1852306e9856398204dc0786d2996dd2e0c8bb8e26133b2" }
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "win-unicode-console"
|
||||||
|
version = "0.5"
|
||||||
|
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||||
|
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/89/8d/7aad74930380c8972ab282304a2ff45f3d4927108bb6693cabcc9fc6a099/win_unicode_console-0.5.zip", hash = "sha256:d4142d4d56d46f449d6f00536a73625a871cba040f0bc1a2e305a04578f07d1e" }
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "win32-setctime"
|
name = "win32-setctime"
|
||||||
version = "1.2.0"
|
version = "1.2.0"
|
||||||
|
|||||||
Reference in New Issue
Block a user