mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
add dockerfile for cuda envirement. Refine table search strategy, (#123)
This commit is contained in:
@ -34,7 +34,7 @@ class HuExcelParser:
|
||||
total = 0
|
||||
for sheetname in wb.sheetnames:
|
||||
ws = wb[sheetname]
|
||||
total += len(ws.rows)
|
||||
total += len(list(ws.rows))
|
||||
return total
|
||||
|
||||
if fnm.split(".")[-1].lower() in ["csv", "txt"]:
|
||||
|
||||
@ -655,14 +655,14 @@ class HuParser:
|
||||
#if min(tv, fv) > 2000:
|
||||
# i += 1
|
||||
# continue
|
||||
if tv < fv:
|
||||
if tv < fv and tk:
|
||||
tables[tk].insert(0, c)
|
||||
logging.debug(
|
||||
"TABLE:" +
|
||||
self.boxes[i]["text"] +
|
||||
"; Cap: " +
|
||||
tk)
|
||||
else:
|
||||
elif fk:
|
||||
figures[fk].insert(0, c)
|
||||
logging.debug(
|
||||
"FIGURE:" +
|
||||
|
||||
@ -31,7 +31,7 @@ class HuPptParser(object):
|
||||
|
||||
if shape.shape_type == 6:
|
||||
texts = []
|
||||
for p in shape.shapes:
|
||||
for p in sorted(shape.shapes, key=lambda x: (x.top//10, x.left)):
|
||||
t = self.__extract(p)
|
||||
if t: texts.append(t)
|
||||
return "\n".join(texts)
|
||||
@ -46,7 +46,7 @@ class HuPptParser(object):
|
||||
if i < from_page: continue
|
||||
if i >= to_page:break
|
||||
texts = []
|
||||
for shape in slide.shapes:
|
||||
for shape in sorted(slide.shapes, key=lambda x: (x.top//10, x.left)):
|
||||
txt = self.__extract(shape)
|
||||
if txt: texts.append(txt)
|
||||
txts.append("\n".join(texts))
|
||||
|
||||
@ -64,10 +64,15 @@ def load_model(model_dir, nm):
|
||||
raise ValueError("not find model file path {}".format(
|
||||
model_file_path))
|
||||
|
||||
options = ort.SessionOptions()
|
||||
options.enable_cpu_mem_arena = False
|
||||
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
||||
options.intra_op_num_threads = 2
|
||||
options.inter_op_num_threads = 2
|
||||
if ort.get_device() == "GPU":
|
||||
sess = ort.InferenceSession(model_file_path, providers=['CUDAExecutionProvider'])
|
||||
sess = ort.InferenceSession(model_file_path, options=options, providers=['CUDAExecutionProvider'])
|
||||
else:
|
||||
sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
|
||||
sess = ort.InferenceSession(model_file_path, options=options, providers=['CPUExecutionProvider'])
|
||||
return sess, sess.get_inputs()[0]
|
||||
|
||||
|
||||
@ -325,7 +330,13 @@ class TextRecognizer(object):
|
||||
|
||||
input_dict = {}
|
||||
input_dict[self.input_tensor.name] = norm_img_batch
|
||||
outputs = self.predictor.run(None, input_dict)
|
||||
for i in range(100000):
|
||||
try:
|
||||
outputs = self.predictor.run(None, input_dict)
|
||||
break
|
||||
except Exception as e:
|
||||
if i >= 3: raise e
|
||||
time.sleep(5)
|
||||
preds = outputs[0]
|
||||
rec_result = self.postprocess_op(preds)
|
||||
for rno in range(len(rec_result)):
|
||||
@ -430,7 +441,13 @@ class TextDetector(object):
|
||||
img = img.copy()
|
||||
input_dict = {}
|
||||
input_dict[self.input_tensor.name] = img
|
||||
outputs = self.predictor.run(None, input_dict)
|
||||
for i in range(100000):
|
||||
try:
|
||||
outputs = self.predictor.run(None, input_dict)
|
||||
break
|
||||
except Exception as e:
|
||||
if i >= 3: raise e
|
||||
time.sleep(5)
|
||||
|
||||
post_result = self.postprocess_op({"maps": outputs[0]}, shape_list)
|
||||
dt_boxes = post_result[0]['points']
|
||||
|
||||
@ -42,7 +42,9 @@ class Recognizer(object):
|
||||
raise ValueError("not find model file path {}".format(
|
||||
model_file_path))
|
||||
if ort.get_device() == "GPU":
|
||||
self.ort_sess = ort.InferenceSession(model_file_path, providers=['CUDAExecutionProvider'])
|
||||
options = ort.SessionOptions()
|
||||
options.enable_cpu_mem_arena = False
|
||||
self.ort_sess = ort.InferenceSession(model_file_path, options=options, providers=[('CUDAExecutionProvider')])
|
||||
else:
|
||||
self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
|
||||
self.input_names = [node.name for node in self.ort_sess.get_inputs()]
|
||||
|
||||
Reference in New Issue
Block a user