add dockerfile for cuda envirement. Refine table search strategy, (#123)

This commit is contained in:
KevinHuSh
2024-03-14 19:45:29 +08:00
committed by GitHub
parent 937048e5fb
commit 675a9f8d9a
18 changed files with 259 additions and 84 deletions

View File

@ -34,7 +34,7 @@ class HuExcelParser:
total = 0
for sheetname in wb.sheetnames:
ws = wb[sheetname]
total += len(ws.rows)
total += len(list(ws.rows))
return total
if fnm.split(".")[-1].lower() in ["csv", "txt"]:

View File

@ -655,14 +655,14 @@ class HuParser:
#if min(tv, fv) > 2000:
# i += 1
# continue
if tv < fv:
if tv < fv and tk:
tables[tk].insert(0, c)
logging.debug(
"TABLE:" +
self.boxes[i]["text"] +
"; Cap: " +
tk)
else:
elif fk:
figures[fk].insert(0, c)
logging.debug(
"FIGURE:" +

View File

@ -31,7 +31,7 @@ class HuPptParser(object):
if shape.shape_type == 6:
texts = []
for p in shape.shapes:
for p in sorted(shape.shapes, key=lambda x: (x.top//10, x.left)):
t = self.__extract(p)
if t: texts.append(t)
return "\n".join(texts)
@ -46,7 +46,7 @@ class HuPptParser(object):
if i < from_page: continue
if i >= to_page:break
texts = []
for shape in slide.shapes:
for shape in sorted(slide.shapes, key=lambda x: (x.top//10, x.left)):
txt = self.__extract(shape)
if txt: texts.append(txt)
txts.append("\n".join(texts))