From 028c2d83e91272f62d434014d06f979fb0b67c05 Mon Sep 17 00:00:00 2001 From: Lynn Date: Mon, 22 Sep 2025 09:29:38 +0800 Subject: [PATCH] Feat: parse email (#10181) ### What problem does this PR solve? - Dataflow support email. - Fix old email parser. - Add new depends to parse msg file. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [x] Other (please describe): add new depends. --- api/db/services/file_service.py | 2 +- api/utils/file_utils.py | 2 +- pyproject.toml | 1 + rag/app/email.py | 2 +- rag/flow/parser/parser.py | 137 +++++++++++++++- .../tests/dsl_examples/general_pdf_all.json | 16 ++ uv.lock | 155 +++++++++++++++++- 7 files changed, 309 insertions(+), 6 deletions(-) diff --git a/api/db/services/file_service.py b/api/db/services/file_service.py index 68d720aa4..f2a7e5dc2 100644 --- a/api/db/services/file_service.py +++ b/api/db/services/file_service.py @@ -496,7 +496,7 @@ class FileService(CommonService): return ParserType.AUDIO.value if re.search(r"\.(ppt|pptx|pages)$", filename): return ParserType.PRESENTATION.value - if re.search(r"\.(eml)$", filename): + if re.search(r"\.(msg|eml)$", filename): return ParserType.EMAIL.value return default diff --git a/api/utils/file_utils.py b/api/utils/file_utils.py index c349453bb..63e96fb78 100644 --- a/api/utils/file_utils.py +++ b/api/utils/file_utils.py @@ -155,7 +155,7 @@ def filename_type(filename): if re.match(r".*\.pdf$", filename): return FileType.PDF.value - if re.match(r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename): + if re.match(r".*\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename): return FileType.DOC.value if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename): diff --git a/pyproject.toml b/pyproject.toml index c82956d77..b88a56349 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ "elastic-transport==8.12.0", "elasticsearch==8.12.1", "elasticsearch-dsl==8.12.0", + "extract-msg>=0.39.0", "filelock==3.15.4", "flask==3.0.3", "flask-cors==5.0.0", diff --git a/rag/app/email.py b/rag/app/email.py index d8520e43d..1affe4f25 100644 --- a/rag/app/email.py +++ b/rag/app/email.py @@ -78,7 +78,7 @@ def chunk( _add_content(msg, msg.get_content_type()) sections = TxtParser.parser_txt("\n".join(text_txt)) + [ - (line, "") for line in HtmlParser.parser_txt("\n".join(html_txt)) if line + (line, "") for line in HtmlParser.parser_txt("\n".join(html_txt), chunk_token_num=parser_config["chunk_token_num"]) if line ] st = timer() diff --git a/rag/flow/parser/parser.py b/rag/flow/parser/parser.py index 57e52215c..9feac02d6 100644 --- a/rag/flow/parser/parser.py +++ b/rag/flow/parser/parser.py @@ -13,7 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. import io +import json import logging +import os import random from functools import partial @@ -57,7 +59,10 @@ class ParserParam(ProcessParamBase): "image": [ "text" ], - "email": [], + "email": [ + "text", + "json" + ], "text": [ "text", "json" @@ -112,7 +117,11 @@ class ParserParam(ProcessParamBase): "output_format": "json", }, "email": { - "fields": [] + "suffix": [ + "eml", "msg" + ], + "fields": ["from", "to", "cc", "bcc", "date", "subject", "body", "attachments", "metadata"], + "output_format": "json", }, "text": { "suffix": [ @@ -194,6 +203,11 @@ class ParserParam(ProcessParamBase): audio_language = audio_config.get("lang", "") self.check_empty(audio_language, "Language") + email_config = self.setups.get("email", "") + if email_config: + email_output_format = email_config.get("output_format", "") + self.check_valid_value(email_output_format, "Email output format abnormal.", self.allowed_output_format["email"]) + def get_input_form(self) -> dict[str, dict]: return {} @@ -384,6 +398,124 @@ class Parser(ProcessBase): self.set_output("text", txt) + def _email(self, from_upstream: ParserFromUpstream): + self.callback(random.randint(1, 5) / 100.0, "Start to work on an email.") + + blob = from_upstream.blob + name = from_upstream.name + + email_content = {} + conf = self._param.setups["email"] + target_fields = conf["fields"] + + _, ext = os.path.splitext(name) + if ext == ".eml": + # handle eml file + from email import policy + from email.parser import BytesParser + + msg = BytesParser(policy=policy.default).parse(io.BytesIO(blob)) + email_content['metadata'] = {} + # handle header info + for header, value in msg.items(): + # get fields like from, to, cc, bcc, date, subject + if header.lower() in target_fields: + email_content[header.lower()] = value + # get metadata + elif header.lower() not in ["from", "to", "cc", "bcc", "date", "subject"]: + email_content["metadata"][header.lower()] = value + # get body + if "body" in target_fields: + body_text, body_html = [], [] + def _add_content(m, content_type): + if content_type == "text/plain": + body_text.append( + m.get_payload(decode=True).decode(m.get_content_charset()) + ) + elif content_type == "text/html": + body_html.append( + m.get_payload(decode=True).decode(m.get_content_charset()) + ) + elif "multipart" in content_type: + if m.is_multipart(): + for part in m.iter_parts(): + _add_content(part, part.get_content_type()) + + _add_content(msg, msg.get_content_type()) + + email_content["text"] = body_text + email_content["text_html"] = body_html + # get attachment + if "attachments" in target_fields: + attachments = [] + for part in msg.iter_attachments(): + content_disposition = part.get("Content-Disposition") + if content_disposition: + dispositions = content_disposition.strip().split(";") + if dispositions[0].lower() == "attachment": + filename = part.get_filename() + payload = part.get_payload(decode=True) + attachments.append({ + "filename": filename, + "payload": payload, + }) + email_content["attachments"] = attachments + else: + # handle msg file + import extract_msg + print("handle a msg file.") + msg = extract_msg.Message(blob) + # handle header info + basic_content = { + "from": msg.sender, + "to": msg.to, + "cc": msg.cc, + "bcc": msg.bcc, + "date": msg.date, + "subject": msg.subject, + } + email_content.update({k: v for k, v in basic_content.items() if k in target_fields}) + # get metadata + email_content['metadata'] = { + 'message_id': msg.messageId, + 'in_reply_to': msg.inReplyTo, + } + # get body + if "body" in target_fields: + email_content["text"] = msg.body # usually empty. try text_html instead + email_content["text_html"] = msg.htmlBody + # get attachments + if "attachments" in target_fields: + attachments = [] + for t in msg.attachments: + attachments.append({ + "filename": t.name, + "payload": t.data # binary + }) + email_content["attachments"] = attachments + + if conf["output_format"] == "json": + self.set_output("json", [email_content]) + else: + content_txt = '' + for k, v in email_content.items(): + if isinstance(v, str): + # basic info + content_txt += f'{k}:{v}' + "\n" + elif isinstance(v, dict): + # metadata + content_txt += f'{k}:{json.dumps(v)}' + "\n" + elif isinstance(v, list): + # attachments or others + for fb in v: + if isinstance(fb, dict): + # attachments + content_txt += f'{fb["filename"]}:{fb["payload"]}' + "\n" + else: + # str, usually plain text + content_txt += fb + self.set_output("text", content_txt) + async def _invoke(self, **kwargs): function_map = { "pdf": self._pdf, @@ -394,6 +526,7 @@ class Parser(ProcessBase): "text": self._text, "image": self._image, "audio": self._audio, + "email": self._email, } try: from_upstream = ParserFromUpstream.model_validate(kwargs) diff --git a/rag/flow/tests/dsl_examples/general_pdf_all.json b/rag/flow/tests/dsl_examples/general_pdf_all.json index 2a6973dd8..40f796af6 100644 --- a/rag/flow/tests/dsl_examples/general_pdf_all.json +++ b/rag/flow/tests/dsl_examples/general_pdf_all.json @@ -89,6 +89,22 @@ "lang": "Chinese", "llm_id": "SenseVoiceSmall", "output_format": "json" + }, + "email": { + "suffix": [ + "msg" + ], + "fields": [ + "from", + "to", + "cc", + "bcc", + "date", + "subject", + "body", + "attachments" + ], + "output_format": "json" } } } diff --git a/uv.lock b/uv.lock index f9d903a47..943912822 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 1 +revision = 3 requires-python = ">=3.10, <3.13" resolution-markers = [ "python_full_version >= '3.12' and sys_platform == 'darwin'", @@ -861,6 +861,15 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" }, ] +[[package]] +name = "colorclass" +version = "2.2.2" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/d7/1a/31ff00a33569a3b59d65bbdc445c73e12f92ad28195b7ace299f68b9af70/colorclass-2.2.2.tar.gz", hash = "sha256:6d4fe287766166a98ca7bc6f6312daf04a0481b1eda43e7173484051c0ab4366" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/30/b6/daf3e2976932da4ed3579cff7a30a53d22ea9323ee4f0d8e43be60454897/colorclass-2.2.2-py2.py3-none-any.whl", hash = "sha256:6f10c273a0ef7a1150b1120b6095cbdd68e5cf36dfd5d0fc957a2500bbf99a55" }, +] + [[package]] name = "coloredlogs" version = "15.0.1" @@ -873,6 +882,15 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934" }, ] +[[package]] +name = "compressed-rtf" +version = "1.0.7" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/b7/0c/929a4e8ef9d7143f54d77dadb5f370cc7b98534b1bd6e1124d0abe8efb24/compressed_rtf-1.0.7.tar.gz", hash = "sha256:7c30859334839f3cdc7d10796af5b434bb326b9df7cb5a65e95a8eacb2951b0e" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/07/1d/62f5bf92e12335eb63517f42671ed78512d48bbc69e02a942dd7b90f03f0/compressed_rtf-1.0.7-py3-none-any.whl", hash = "sha256:b7904921d78c67a0a4b7fff9fb361a00ae2b447b6edca010ce321cd98fa0fcc0" }, +] + [[package]] name = "contourpy" version = "1.3.2" @@ -1322,6 +1340,23 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/fc/da/8376678b4a9ae0f9418d93df9c9cf851dced49c95ceb38daac6651e38f7a/duckduckgo_search-7.5.5-py3-none-any.whl", hash = "sha256:c71a0661aa436f215d9a05d653af424affb58825ab3e79f3b788053cbdee9ebc" }, ] +[[package]] +name = "easygui" +version = "0.98.3" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/cc/ad/e35f7a30272d322be09dc98592d2f55d27cc933a7fde8baccbbeb2bd9409/easygui-0.98.3.tar.gz", hash = "sha256:d653ff79ee1f42f63b5a090f2f98ce02335d86ad8963b3ce2661805cafe99a04" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/8e/a7/b276ff776533b423710a285c8168b52551cb2ab0855443131fdc7fd8c16f/easygui-0.98.3-py2.py3-none-any.whl", hash = "sha256:33498710c68b5376b459cd3fc48d1d1f33822139eb3ed01defbc0528326da3ba" }, +] + +[[package]] +name = "ebcdic" +version = "1.1.1" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/0d/2f/633031205333bee5f9f93761af8268746aa75f38754823aabb8570eb245b/ebcdic-1.1.1-py2.py3-none-any.whl", hash = "sha256:33b4cb729bc2d0bf46cc1847b0e5946897cb8d3f53520c5b9aa5fa98d7e735f1" }, +] + [[package]] name = "editdistance" version = "0.8.1" @@ -1435,6 +1470,24 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10" }, ] +[[package]] +name = "extract-msg" +version = "0.55.0" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "compressed-rtf" }, + { name = "ebcdic" }, + { name = "olefile" }, + { name = "red-black-tree-mod" }, + { name = "rtfde" }, + { name = "tzlocal" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/5e/65/c70afb3b119a44b3ee36b029485dc15326cf3a7c50da19a1ecbbf949c5d1/extract_msg-0.55.0.tar.gz", hash = "sha256:cf08283498c3dfcc7f894dad1579f52e3ced9fb76b865c2355cbe757af8a54e1" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/53/81/87d5241036046ea17c5c8db228f4c9e04e07e53b627015d4496a99449aaf/extract_msg-0.55.0-py3-none-any.whl", hash = "sha256:baf0cdee9a8d267b70c366bc57ceb03dbfa1e7ab2dca6824169a7fe623f0917c" }, +] + [[package]] name = "fake-http-header" version = "0.3.5" @@ -2893,6 +2946,15 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/92/b0/8f08df3f0fa584c4132937690c6dd33e0a116f963ecf2b35567f614e0ca7/langfuse-3.2.1-py3-none-any.whl", hash = "sha256:07a84e8c1eed6ac8e149bdda1431fd866e4aee741b66124316336fb2bc7e6a32" }, ] +[[package]] +name = "lark" +version = "1.1.9" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/2c/e1/804b6196b3fbdd0f8ba785fc62837b034782a891d6f663eea2f30ca23cfa/lark-1.1.9.tar.gz", hash = "sha256:15fa5236490824c2c4aba0e22d2d6d823575dcaf4cdd1848e34b6ad836240fba" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/e7/9c/eef7c591e6dc952f3636cfe0df712c0f9916cedf317810a3bb53ccb65cdd/lark-1.1.9-py3-none-any.whl", hash = "sha256:a0dd3a87289f8ccbb325901e4222e723e7d745dbfc1803eaf5f3d2ace19cf2db" }, +] + [[package]] name = "litellm" version = "1.75.5.post1" @@ -3377,6 +3439,19 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/b1/ef/27dd35a7049c9a4f4211c6cd6a8c9db0a50647546f003a5867827ec45391/msgspec-0.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:067f0de1c33cfa0b6a8206562efdf6be5985b988b53dd244a8e06f993f27c8c0" }, ] +[[package]] +name = "msoffcrypto-tool" +version = "5.4.2" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "olefile" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/d2/b7/0fd6573157e0ec60c0c470e732ab3322fba4d2834fd24e1088d670522a01/msoffcrypto_tool-5.4.2.tar.gz", hash = "sha256:44b545adba0407564a0cc3d6dde6ca36b7c0fdf352b85bca51618fa1d4817370" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/03/54/7f6d3d9acad083dae8c22d9ab483b657359a1bf56fee1d7af88794677707/msoffcrypto_tool-5.4.2-py3-none-any.whl", hash = "sha256:274fe2181702d1e5a107ec1b68a4c9fea997a44972ae1cc9ae0cb4f6a50fef0e" }, +] + [[package]] name = "multidict" version = "6.6.3" @@ -3726,6 +3801,32 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/9e/4e/0d0c945463719429b7bd21dece907ad0bde437a2ff12b9b12fee94722ab0/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1" }, ] +[[package]] +name = "olefile" +version = "0.47" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/69/1b/077b508e3e500e1629d366249c3ccb32f95e50258b231705c09e3c7a4366/olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/17/d3/b64c356a907242d719fc668b71befd73324e47ab46c8ebbbede252c154b2/olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f" }, +] + +[[package]] +name = "oletools" +version = "0.60.2" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +dependencies = [ + { name = "colorclass" }, + { name = "easygui" }, + { name = "msoffcrypto-tool", marker = "(platform_python_implementation != 'PyPy' and sys_platform == 'darwin') or (platform_python_implementation != 'PyPy' and sys_platform == 'win32') or (sys_platform != 'darwin' and sys_platform != 'win32')" }, + { name = "olefile" }, + { name = "pcodedmp" }, + { name = "pyparsing" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/5c/2f/037f40e44706d542b94a2312ccc33ee2701ebfc9a83b46b55263d49ce55a/oletools-0.60.2.zip", hash = "sha256:ad452099f4695ffd8855113f453348200d195ee9fa341a09e197d66ee7e0b2c3" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/ac/ff/05257b7183279b80ecec6333744de23f48f0faeeba46c93e6d13ce835515/oletools-0.60.2-py2.py3-none-any.whl", hash = "sha256:72ad8bd748fd0c4e7b5b4733af770d11543ebb2bf2697455f99f975fcd50cc96" }, +] + [[package]] name = "ollama" version = "0.2.1" @@ -4188,6 +4289,19 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/87/2b/b50d3d08ea0fc419c183a84210571eba005328efa62b6b98bc28e9ead32a/patsy-1.0.1-py2.py3-none-any.whl", hash = "sha256:751fb38f9e97e62312e921a1954b81e1bb2bcda4f5eeabaf94db251ee791509c" }, ] +[[package]] +name = "pcodedmp" +version = "1.2.6" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +dependencies = [ + { name = "oletools" }, + { name = "win-unicode-console", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'win32'" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/3d/20/6d461e29135f474408d0d7f95b2456a9ba245560768ee51b788af10f7429/pcodedmp-1.2.6.tar.gz", hash = "sha256:025f8c809a126f45a082ffa820893e6a8d990d9d7ddb68694b5a9f0a6dbcd955" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/ba/72/b380fb5c89d89c3afafac8cf02a71a45f4f4a4f35531ca949a34683962d1/pcodedmp-1.2.6-py2.py3-none-any.whl", hash = "sha256:4441f7c0ab4cbda27bd4668db3b14f36261d86e5059ce06c0828602cbe1c4278" }, +] + [[package]] name = "pdfminer-six" version = "20221105" @@ -5300,6 +5414,7 @@ dependencies = [ { name = "elastic-transport" }, { name = "elasticsearch" }, { name = "elasticsearch-dsl" }, + { name = "extract-msg" }, { name = "filelock" }, { name = "flasgger" }, { name = "flask" }, @@ -5452,6 +5567,7 @@ requires-dist = [ { name = "elastic-transport", specifier = "==8.12.0" }, { name = "elasticsearch", specifier = "==8.12.1" }, { name = "elasticsearch-dsl", specifier = "==8.12.0" }, + { name = "extract-msg", specifier = ">=0.39.0" }, { name = "fastembed", marker = "(platform_machine != 'x86_64' and extra == 'full') or (sys_platform == 'darwin' and extra == 'full')", specifier = ">=0.3.6,<0.4.0" }, { name = "fastembed-gpu", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin' and extra == 'full'", specifier = ">=0.3.6,<0.4.0" }, { name = "filelock", specifier = "==3.15.4" }, @@ -5630,6 +5746,12 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/c2/5a/2f2e7fc026d5e64b5408aa3fbe0296a6407b8481196cae4daacacb3a3ae0/readerwriterlock-1.0.9-py3-none-any.whl", hash = "sha256:8c4b704e60d15991462081a27ef46762fea49b478aa4426644f2146754759ca7" }, ] +[[package]] +name = "red-black-tree-mod" +version = "1.22" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/48/75/bfa342a2ebfc9623b701f1c6995b9906fd6dd2cedf6bce777d09e23303ac/red-black-tree-mod-1.22.tar.gz", hash = "sha256:38e3652903a2bf96379c27c2082ca0b7b905158662dd7ef0c97f4fd93a9aa908" } + [[package]] name = "referencing" version = "0.36.2" @@ -5883,6 +6005,19 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762" }, ] +[[package]] +name = "rtfde" +version = "0.1.2.1" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +dependencies = [ + { name = "lark" }, + { name = "oletools" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/66/f1/3fafc33cd80cc605509ced36dbbb74c3c365d5859b0b57b6500e4a8ca8a5/rtfde-0.1.2.1.tar.gz", hash = "sha256:ea2653fb163ef1e9fdd1b0849bef88b0ba82537f860d4aca5b2c49f556efaaaa" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/b6/dd/641e9cf68d4242aaf7ce9653498009d8925080b6664993988bd50468932a/rtfde-0.1.2.1-py3-none-any.whl", hash = "sha256:c44dfa923a435c54cdbdd0e0f5352a4075542af317af061f82f2d4f032271645" }, +] + [[package]] name = "ruamel-base" version = "1.0.0" @@ -6890,6 +7025,18 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8" }, ] +[[package]] +name = "tzlocal" +version = "5.3.1" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +dependencies = [ + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d" }, +] + [[package]] name = "umap-learn" version = "0.5.6" @@ -7134,6 +7281,12 @@ dependencies = [ ] sdist = { url = "https://mirrors.aliyun.com/pypi/packages/67/35/25e68fbc99e672127cc6fbb14b8ec1ba3dfef035bf1e4c90f78f24a80b7d/wikipedia-1.4.0.tar.gz", hash = "sha256:db0fad1829fdd441b1852306e9856398204dc0786d2996dd2e0c8bb8e26133b2" } +[[package]] +name = "win-unicode-console" +version = "0.5" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/89/8d/7aad74930380c8972ab282304a2ff45f3d4927108bb6693cabcc9fc6a099/win_unicode_console-0.5.zip", hash = "sha256:d4142d4d56d46f449d6f00536a73625a871cba040f0bc1a2e305a04578f07d1e" } + [[package]] name = "win32-setctime" version = "1.2.0"