add local llm implementation (#119)

This commit is contained in:
KevinHuSh
2024-03-12 11:57:08 +08:00
committed by GitHub
parent 0452a6db73
commit f1f09df901
17 changed files with 196 additions and 25 deletions

View File

@ -22,6 +22,7 @@ from api.db.db_models import Task
from api.db.db_utils import bulk_insert_into_db
from api.db.services.task_service import TaskService
from deepdoc.parser import PdfParser
from deepdoc.parser.excel_parser import HuExcelParser
from rag.settings import cron_logger
from rag.utils import MINIO
from rag.utils import findMaxTm
@ -88,6 +89,13 @@ def dispatch():
task["from_page"] = p
task["to_page"] = min(p + 5, e)
tsks.append(task)
elif r["parser_id"] == "table":
rn = HuExcelParser.row_number(r["name"], MINIO.get(r["kb_id"], r["location"]))
for i in range(0, rn, 1000):
task = new_task()
task["from_page"] = i
task["to_page"] = min(i + 1000, rn)
tsks.append(task)
else:
tsks.append(new_task())

View File

@ -184,7 +184,7 @@ def embedding(docs, mdl, parser_config={}, callback=None):
if len(cnts_) == 0: cnts_ = vts
else: cnts_ = np.concatenate((cnts_, vts), axis=0)
tk_count += c
callback(msg="")
callback(prog=0.7+0.2*(i+1)/len(cnts), msg="")
cnts = cnts_
title_w = float(parser_config.get("filename_embd_weight", 0.1))