### What problem does this PR solve?

### Type of change
- [x] Refactoring
This commit is contained in:
Kevin Hu
2024-12-31 14:31:31 +08:00
committed by GitHub
parent b52b0f68fc
commit 4ba4f622a5
6 changed files with 11 additions and 15 deletions

View File

@ -92,10 +92,12 @@ DONE_TASKS = 0
FAILED_TASKS = 0
CURRENT_TASK = None
class TaskCanceledException(Exception):
def __init__(self, msg):
self.msg = msg
def set_progress(task_id, from_page=0, to_page=-1, prog=None, msg="Processing..."):
global PAYLOAD
if prog is not None and prog < 0:
@ -250,7 +252,7 @@ def build_chunks(task, progress_callback):
STORAGE_IMPL.put(task["kb_id"], d["id"], output_buffer.getvalue())
el += timer() - st
except Exception:
logging.exception("Saving image of chunk {}/{}/{} got exception".format(task["location"], task["name"], d["_id"]))
logging.exception("Saving image of chunk {}/{}/{} got exception".format(task["location"], task["name"], d["id"]))
raise
d["img_id"] = "{}-{}".format(task["kb_id"], d["id"])
@ -312,6 +314,8 @@ def embedding(docs, mdl, parser_config=None, callback=None):
if not c:
c = d["content_with_weight"]
c = re.sub(r"</?(table|td|caption|tr|th)( [^<>]{0,12})?>", " ", c)
if not c:
c = "None"
cnts.append(c)
tk_count = 0
@ -394,8 +398,6 @@ def run_raptor(row, chat_mdl, embd_mdl, callback=None):
return res, tk_count, vector_size
def do_handle_task(task):
task_id = task["id"]
task_from_page = task["from_page"]