mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
fix bug about fetching knowledge graph (#3394)
### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -301,16 +301,13 @@ def retrieval_test():
|
||||
@login_required
|
||||
def knowledge_graph():
|
||||
doc_id = request.args["doc_id"]
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||
kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
|
||||
req = {
|
||||
"doc_ids":[doc_id],
|
||||
"knowledge_graph_kwd": ["graph", "mind_map"]
|
||||
}
|
||||
sres = retrievaler.search(req, search.index_name(tenant_id), kb_ids, doc.kb_id)
|
||||
sres = retrievaler.search(req, search.index_name(tenant_id), kb_ids)
|
||||
obj = {"graph": {}, "mind_map": {}}
|
||||
for id in sres.ids[:2]:
|
||||
ty = sres.field[id]["knowledge_graph_kwd"]
|
||||
|
||||
@ -524,7 +524,7 @@ def upload_and_parse():
|
||||
@manager.route('/parse', methods=['POST'])
|
||||
@login_required
|
||||
def parse():
|
||||
url = request.json.get("url")
|
||||
url = request.json.get("url") if request.json else ""
|
||||
if url:
|
||||
if not is_valid_url(url):
|
||||
return get_json_result(
|
||||
@ -537,7 +537,7 @@ def parse():
|
||||
options.add_argument('--disable-dev-shm-usage')
|
||||
driver = Chrome(options=options)
|
||||
driver.get(url)
|
||||
sections = RAGFlowHtmlParser()("", binary=driver.page_source)
|
||||
sections = RAGFlowHtmlParser().parser_txt(driver.page_source)
|
||||
return get_json_result(data="\n".join(sections))
|
||||
|
||||
if 'file' not in request.files:
|
||||
|
||||
@ -15,6 +15,8 @@
|
||||
#
|
||||
import re
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from flask_login import current_user
|
||||
from peewee import fn
|
||||
|
||||
@ -385,6 +387,41 @@ class FileService(CommonService):
|
||||
|
||||
return err, files
|
||||
|
||||
@staticmethod
|
||||
def parse_docs(file_objs, user_id):
|
||||
from rag.app import presentation, picture, naive, audio, email
|
||||
|
||||
def dummy(prog=None, msg=""):
|
||||
pass
|
||||
|
||||
FACTORY = {
|
||||
ParserType.PRESENTATION.value: presentation,
|
||||
ParserType.PICTURE.value: picture,
|
||||
ParserType.AUDIO.value: audio,
|
||||
ParserType.EMAIL.value: email
|
||||
}
|
||||
parser_config = {"chunk_token_num": 16096, "delimiter": "\n!?;。;!?", "layout_recognize": False}
|
||||
exe = ThreadPoolExecutor(max_workers=12)
|
||||
threads = []
|
||||
for file in file_objs:
|
||||
kwargs = {
|
||||
"lang": "English",
|
||||
"callback": dummy,
|
||||
"parser_config": parser_config,
|
||||
"from_page": 0,
|
||||
"to_page": 100000,
|
||||
"tenant_id": user_id
|
||||
}
|
||||
filetype = filename_type(file.filename)
|
||||
blob = file.read()
|
||||
threads.append(exe.submit(FACTORY.get(FileService.get_parser(filetype, file.filename, ""), naive).chunk, file.filename, blob, **kwargs))
|
||||
|
||||
res = []
|
||||
for th in threads:
|
||||
res.append("\n".join([ck["content_with_weight"] for ck in th.result()]))
|
||||
|
||||
return "\n\n".join(res)
|
||||
|
||||
@staticmethod
|
||||
def get_parser(doc_type, filename, default):
|
||||
if doc_type == FileType.VISUAL:
|
||||
|
||||
@ -73,7 +73,7 @@ class KnowledgebaseService(CommonService):
|
||||
cls.model.id,
|
||||
]
|
||||
kbs = cls.model.select(*fields).where(cls.model.tenant_id == tenant_id)
|
||||
kb_ids = [kb["id"] for kb in kbs]
|
||||
kb_ids = [kb.id for kb in kbs]
|
||||
return kb_ids
|
||||
|
||||
@classmethod
|
||||
|
||||
Reference in New Issue
Block a user