fix bug about fetching file from minio (#574)

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
KevinHuSh
2024-04-28 09:57:40 +08:00
committed by GitHub
parent f1c98aad6b
commit 944776f207
6 changed files with 36 additions and 11 deletions

View File

@ -20,6 +20,8 @@ import random
from datetime import datetime
from api.db.db_models import Task
from api.db.db_utils import bulk_insert_into_db
from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService
from api.db.services.task_service import TaskService
from deepdoc.parser import PdfParser
from deepdoc.parser.excel_parser import HuExcelParser
@ -87,10 +89,11 @@ def dispatch():
tsks = []
try:
file_bin = MINIO.get(r["kb_id"], r["location"])
bucket, name = File2DocumentService.get_minio_address(doc_id=r["id"])
file_bin = MINIO.get(bucket, name)
if REDIS_CONN.is_alive():
try:
REDIS_CONN.set("{}/{}".format(r["kb_id"], r["location"]), file_bin, 12*60)
REDIS_CONN.set("{}/{}".format(bucket, name), file_bin, 12*60)
except Exception as e:
cron_logger.warning("Put into redis[EXCEPTION]:" + str(e))

View File

@ -24,6 +24,8 @@ import sys
import time
import traceback
from functools import partial
from api.db.services.file2document_service import File2DocumentService
from rag.utils import MINIO
from api.db.db_models import close_connection
from rag.settings import database_logger
@ -135,7 +137,8 @@ def build(row):
pool = Pool(processes=1)
try:
st = timer()
thr = pool.apply_async(get_minio_binary, args=(row["kb_id"], row["location"]))
bucket, name = File2DocumentService.get_minio_address(doc_id=row["doc_id"])
thr = pool.apply_async(get_minio_binary, args=(bucket, name))
binary = thr.get(timeout=90)
pool.terminate()
cron_logger.info(