add dockerfile for cuda envirement. Refine table search strategy, (#123)

This commit is contained in:
KevinHuSh
2024-03-14 19:45:29 +08:00
committed by GitHub
parent 937048e5fb
commit 675a9f8d9a
18 changed files with 259 additions and 84 deletions

View File

@ -34,7 +34,7 @@ class HuExcelParser:
total = 0
for sheetname in wb.sheetnames:
ws = wb[sheetname]
total += len(ws.rows)
total += len(list(ws.rows))
return total
if fnm.split(".")[-1].lower() in ["csv", "txt"]:

View File

@ -655,14 +655,14 @@ class HuParser:
#if min(tv, fv) > 2000:
# i += 1
# continue
if tv < fv:
if tv < fv and tk:
tables[tk].insert(0, c)
logging.debug(
"TABLE:" +
self.boxes[i]["text"] +
"; Cap: " +
tk)
else:
elif fk:
figures[fk].insert(0, c)
logging.debug(
"FIGURE:" +

View File

@ -31,7 +31,7 @@ class HuPptParser(object):
if shape.shape_type == 6:
texts = []
for p in shape.shapes:
for p in sorted(shape.shapes, key=lambda x: (x.top//10, x.left)):
t = self.__extract(p)
if t: texts.append(t)
return "\n".join(texts)
@ -46,7 +46,7 @@ class HuPptParser(object):
if i < from_page: continue
if i >= to_page:break
texts = []
for shape in slide.shapes:
for shape in sorted(slide.shapes, key=lambda x: (x.top//10, x.left)):
txt = self.__extract(shape)
if txt: texts.append(txt)
txts.append("\n".join(texts))

View File

@ -64,10 +64,15 @@ def load_model(model_dir, nm):
raise ValueError("not find model file path {}".format(
model_file_path))
options = ort.SessionOptions()
options.enable_cpu_mem_arena = False
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
options.intra_op_num_threads = 2
options.inter_op_num_threads = 2
if ort.get_device() == "GPU":
sess = ort.InferenceSession(model_file_path, providers=['CUDAExecutionProvider'])
sess = ort.InferenceSession(model_file_path, options=options, providers=['CUDAExecutionProvider'])
else:
sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
sess = ort.InferenceSession(model_file_path, options=options, providers=['CPUExecutionProvider'])
return sess, sess.get_inputs()[0]
@ -325,7 +330,13 @@ class TextRecognizer(object):
input_dict = {}
input_dict[self.input_tensor.name] = norm_img_batch
outputs = self.predictor.run(None, input_dict)
for i in range(100000):
try:
outputs = self.predictor.run(None, input_dict)
break
except Exception as e:
if i >= 3: raise e
time.sleep(5)
preds = outputs[0]
rec_result = self.postprocess_op(preds)
for rno in range(len(rec_result)):
@ -430,7 +441,13 @@ class TextDetector(object):
img = img.copy()
input_dict = {}
input_dict[self.input_tensor.name] = img
outputs = self.predictor.run(None, input_dict)
for i in range(100000):
try:
outputs = self.predictor.run(None, input_dict)
break
except Exception as e:
if i >= 3: raise e
time.sleep(5)
post_result = self.postprocess_op({"maps": outputs[0]}, shape_list)
dt_boxes = post_result[0]['points']

View File

@ -42,7 +42,9 @@ class Recognizer(object):
raise ValueError("not find model file path {}".format(
model_file_path))
if ort.get_device() == "GPU":
self.ort_sess = ort.InferenceSession(model_file_path, providers=['CUDAExecutionProvider'])
options = ort.SessionOptions()
options.enable_cpu_mem_arena = False
self.ort_sess = ort.InferenceSession(model_file_path, options=options, providers=[('CUDAExecutionProvider')])
else:
self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
self.input_names = [node.name for node in self.ort_sess.get_inputs()]