mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix "no tc element at grid_offset" (#9375)
### What problem does this PR solve?
fix "no `tc` element at grid_offset", just log warning and ignore.
stacktrace:
```
Traceback (most recent call last):
File "/ragflow/rag/svr/task_executor.py", line 620, in handle_task
await do_handle_task(task)
File "/ragflow/rag/svr/task_executor.py", line 553, in do_handle_task
chunks = await build_chunks(task, progress_callback)
File "/ragflow/rag/svr/task_executor.py", line 257, in build_chunks
cks = await trio.to_thread.run_sync(lambda: chunker.chunk(task["name"], binary=binary, from_page=task["from_page"],
File "/ragflow/.venv/lib/python3.10/site-packages/trio/_threads.py", line 447, in to_thread_run_sync
return msg_from_thread.unwrap()
File "/ragflow/.venv/lib/python3.10/site-packages/outcome/_impl.py", line 213, in unwrap
raise captured_error
File "/ragflow/.venv/lib/python3.10/site-packages/trio/_threads.py", line 373, in do_release_then_return_result
return result.unwrap()
File "/ragflow/.venv/lib/python3.10/site-packages/outcome/_impl.py", line 213, in unwrap
raise captured_error
File "/ragflow/.venv/lib/python3.10/site-packages/trio/_threads.py", line 392, in worker_fn
ret = context.run(sync_fn, *args)
File "/ragflow/rag/svr/task_executor.py", line 257, in <lambda>
cks = await trio.to_thread.run_sync(lambda: chunker.chunk(task["name"], binary=binary, from_page=task["from_page"],
File "/ragflow/rag/app/naive.py", line 384, in chunk
sections, tables = Docx()(filename, binary)
File "/ragflow/rag/app/naive.py", line 230, in __call__
while i < len(r.cells):
File "/ragflow/.venv/lib/python3.10/site-packages/docx/table.py", line 438, in cells
return tuple(_iter_row_cells())
File "/ragflow/.venv/lib/python3.10/site-packages/docx/table.py", line 436, in _iter_row_cells
yield from iter_tc_cells(tc)
File "/ragflow/.venv/lib/python3.10/site-packages/docx/table.py", line 424, in iter_tc_cells
yield from iter_tc_cells(tc._tc_above) # pyright: ignore[reportPrivateUsage]
File "/ragflow/.venv/lib/python3.10/site-packages/docx/oxml/table.py", line 741, in _tc_above
return self._tr_above.tc_at_grid_offset(self.grid_offset)
File "/ragflow/.venv/lib/python3.10/site-packages/docx/oxml/table.py", line 98, in tc_at_grid_offset
raise ValueError(f"no `tc` element at grid_offset={grid_offset}")
ValueError: no `tc` element at grid_offset=10
```
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -226,17 +226,20 @@ class Docx(DocxParser):
|
|||||||
for r in tb.rows:
|
for r in tb.rows:
|
||||||
html += "<tr>"
|
html += "<tr>"
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(r.cells):
|
try:
|
||||||
span = 1
|
while i < len(r.cells):
|
||||||
c = r.cells[i]
|
span = 1
|
||||||
for j in range(i + 1, len(r.cells)):
|
c = r.cells[i]
|
||||||
if c.text == r.cells[j].text:
|
for j in range(i + 1, len(r.cells)):
|
||||||
span += 1
|
if c.text == r.cells[j].text:
|
||||||
i = j
|
span += 1
|
||||||
else:
|
i = j
|
||||||
break
|
else:
|
||||||
i += 1
|
break
|
||||||
html += f"<td>{c.text}</td>" if span == 1 else f"<td colspan='{span}'>{c.text}</td>"
|
i += 1
|
||||||
|
html += f"<td>{c.text}</td>" if span == 1 else f"<td colspan='{span}'>{c.text}</td>"
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Error parsing table, ignore: {e}")
|
||||||
html += "</tr>"
|
html += "</tr>"
|
||||||
html += "</table>"
|
html += "</table>"
|
||||||
tbls.append(((None, html), ""))
|
tbls.append(((None, html), ""))
|
||||||
|
|||||||
Reference in New Issue
Block a user