fix bug about fetching knowledge graph (#3394)

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu
2024-11-14 12:29:15 +08:00
committed by GitHub
parent 400fc3f5e9
commit 4caf932808
6 changed files with 120 additions and 62 deletions

View File

@ -15,6 +15,8 @@
#
import re
import os
from concurrent.futures import ThreadPoolExecutor
from flask_login import current_user
from peewee import fn
@ -385,6 +387,41 @@ class FileService(CommonService):
return err, files
@staticmethod
def parse_docs(file_objs, user_id):
from rag.app import presentation, picture, naive, audio, email
def dummy(prog=None, msg=""):
pass
FACTORY = {
ParserType.PRESENTATION.value: presentation,
ParserType.PICTURE.value: picture,
ParserType.AUDIO.value: audio,
ParserType.EMAIL.value: email
}
parser_config = {"chunk_token_num": 16096, "delimiter": "\n!?;。;!?", "layout_recognize": False}
exe = ThreadPoolExecutor(max_workers=12)
threads = []
for file in file_objs:
kwargs = {
"lang": "English",
"callback": dummy,
"parser_config": parser_config,
"from_page": 0,
"to_page": 100000,
"tenant_id": user_id
}
filetype = filename_type(file.filename)
blob = file.read()
threads.append(exe.submit(FACTORY.get(FileService.get_parser(filetype, file.filename, ""), naive).chunk, file.filename, blob, **kwargs))
res = []
for th in threads:
res.append("\n".join([ck["content_with_weight"] for ck in th.result()]))
return "\n\n".join(res)
@staticmethod
def get_parser(doc_type, filename, default):
if doc_type == FileType.VISUAL: