mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 12:32:30 +08:00
fix task cancling bug (#98)
This commit is contained in:
@ -28,7 +28,7 @@ class Pdf(PdfParser):
|
||||
from_page,
|
||||
to_page,
|
||||
callback)
|
||||
callback("OCR finished")
|
||||
callback(msg="OCR finished")
|
||||
|
||||
from timeit import default_timer as timer
|
||||
start = timer()
|
||||
|
||||
@ -57,7 +57,7 @@ class Pdf(PdfParser):
|
||||
to_page,
|
||||
callback
|
||||
)
|
||||
callback("OCR finished")
|
||||
callback(msg="OCR finished")
|
||||
|
||||
from timeit import default_timer as timer
|
||||
start = timer()
|
||||
@ -135,6 +135,6 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
def dummy(a, b):
|
||||
def dummy(prog=None, msg=""):
|
||||
pass
|
||||
chunk(sys.argv[1], callback=dummy)
|
||||
|
||||
@ -22,7 +22,7 @@ class Pdf(PdfParser):
|
||||
to_page,
|
||||
callback
|
||||
)
|
||||
callback("OCR finished.")
|
||||
callback(msg="OCR finished.")
|
||||
|
||||
from timeit import default_timer as timer
|
||||
start = timer()
|
||||
|
||||
@ -29,7 +29,7 @@ class Pdf(PdfParser):
|
||||
to_page,
|
||||
callback
|
||||
)
|
||||
callback("OCR finished")
|
||||
callback(msg="OCR finished")
|
||||
|
||||
from timeit import default_timer as timer
|
||||
start = timer()
|
||||
|
||||
@ -36,7 +36,7 @@ class Pdf(PdfParser):
|
||||
to_page,
|
||||
callback
|
||||
)
|
||||
callback("OCR finished.")
|
||||
callback(msg="OCR finished.")
|
||||
|
||||
from timeit import default_timer as timer
|
||||
start = timer()
|
||||
|
||||
@ -305,8 +305,15 @@ class Dealer:
|
||||
"similarity": sim[i],
|
||||
"vector_similarity": vsim[i],
|
||||
"term_similarity": tsim[i],
|
||||
"vector": self.trans2floats(sres.field[id].get("q_%d_vec" % dim, "\t".join(["0"] * dim)))
|
||||
"vector": self.trans2floats(sres.field[id].get("q_%d_vec" % dim, "\t".join(["0"] * dim))),
|
||||
"positions": sres.field[id].get("position_int", "").split("\t")
|
||||
}
|
||||
if len(d["positions"]) % 5 == 0:
|
||||
poss = []
|
||||
for i in range(0, len(d["positions"]), 5):
|
||||
poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]),
|
||||
float(d["positions"][i + 3]), float(d["positions"][i + 4])])
|
||||
d["positions"] = poss
|
||||
ranks["chunks"].append(d)
|
||||
if dnm not in ranks["doc_aggs"]:
|
||||
ranks["doc_aggs"][dnm] = {"doc_id": did, "count": 0}
|
||||
|
||||
@ -25,6 +25,7 @@ import traceback
|
||||
from functools import partial
|
||||
from timeit import default_timer as timer
|
||||
|
||||
import numpy as np
|
||||
from elasticsearch_dsl import Q
|
||||
|
||||
from api.db.services.task_service import TaskService
|
||||
@ -177,10 +178,11 @@ def embedding(docs, mdl, parser_config={}, callback=None):
|
||||
tts, c = mdl.encode(tts)
|
||||
tk_count += c
|
||||
|
||||
cnts_ = []
|
||||
cnts_ = np.array([])
|
||||
for i in range(0, len(cnts), 32):
|
||||
vts, c = mdl.encode(cnts[i: i+32])
|
||||
cnts_.extend(vts)
|
||||
if len(cnts_) == 0: cnts_ = vts
|
||||
else: cnts_ = np.concatenate((cnts_, vts), axis=0)
|
||||
tk_count += c
|
||||
callback(msg="")
|
||||
cnts = cnts_
|
||||
|
||||
Reference in New Issue
Block a user