add support for eml file parser (#1768)

### What problem does this PR solve?

add support for eml file parser
#1363

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Co-authored-by: Zhedong Cen <cenzhedong2@126.com>
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
黄腾
2024-08-06 16:42:14 +08:00
committed by GitHub
parent b67484e77d
commit ede733e130
12 changed files with 178 additions and 28 deletions

View File

@ -39,7 +39,7 @@ from api.utils import get_uuid
from api.utils.api_utils import construct_json_result, construct_error_response
from api.utils.api_utils import construct_result, validate_request
from api.utils.file_utils import filename_type, thumbnail
from rag.app import book, laws, manual, naive, one, paper, presentation, qa, resume, table, picture, audio
from rag.app import book, laws, manual, naive, one, paper, presentation, qa, resume, table, picture, audio, email
from rag.nlp import search
from rag.utils.es_conn import ELASTICSEARCH
from rag.utils.minio_conn import MINIO
@ -652,6 +652,8 @@ def doc_parse(binary, doc_name, parser_name, tenant_id, doc_id):
table.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
case "audio":
audio.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
case "email":
email.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
case _:
return False