mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 12:32:30 +08:00
add dockerfile for cuda envirement. Refine table search strategy, (#123)
This commit is contained in:
@ -34,7 +34,7 @@ class HuExcelParser:
|
||||
total = 0
|
||||
for sheetname in wb.sheetnames:
|
||||
ws = wb[sheetname]
|
||||
total += len(ws.rows)
|
||||
total += len(list(ws.rows))
|
||||
return total
|
||||
|
||||
if fnm.split(".")[-1].lower() in ["csv", "txt"]:
|
||||
|
||||
@ -655,14 +655,14 @@ class HuParser:
|
||||
#if min(tv, fv) > 2000:
|
||||
# i += 1
|
||||
# continue
|
||||
if tv < fv:
|
||||
if tv < fv and tk:
|
||||
tables[tk].insert(0, c)
|
||||
logging.debug(
|
||||
"TABLE:" +
|
||||
self.boxes[i]["text"] +
|
||||
"; Cap: " +
|
||||
tk)
|
||||
else:
|
||||
elif fk:
|
||||
figures[fk].insert(0, c)
|
||||
logging.debug(
|
||||
"FIGURE:" +
|
||||
|
||||
@ -31,7 +31,7 @@ class HuPptParser(object):
|
||||
|
||||
if shape.shape_type == 6:
|
||||
texts = []
|
||||
for p in shape.shapes:
|
||||
for p in sorted(shape.shapes, key=lambda x: (x.top//10, x.left)):
|
||||
t = self.__extract(p)
|
||||
if t: texts.append(t)
|
||||
return "\n".join(texts)
|
||||
@ -46,7 +46,7 @@ class HuPptParser(object):
|
||||
if i < from_page: continue
|
||||
if i >= to_page:break
|
||||
texts = []
|
||||
for shape in slide.shapes:
|
||||
for shape in sorted(slide.shapes, key=lambda x: (x.top//10, x.left)):
|
||||
txt = self.__extract(shape)
|
||||
if txt: texts.append(txt)
|
||||
txts.append("\n".join(texts))
|
||||
|
||||
Reference in New Issue
Block a user