From 7f08ba47d7b0a39a1002d42b8162d4c60a21e664 Mon Sep 17 00:00:00 2001 From: Jay Xu Date: Mon, 11 Aug 2025 17:13:10 +0800 Subject: [PATCH] Fix "no `tc` element at grid_offset" (#9375) ### What problem does this PR solve? fix "no `tc` element at grid_offset", just log warning and ignore. stacktrace: ``` Traceback (most recent call last): File "/ragflow/rag/svr/task_executor.py", line 620, in handle_task await do_handle_task(task) File "/ragflow/rag/svr/task_executor.py", line 553, in do_handle_task chunks = await build_chunks(task, progress_callback) File "/ragflow/rag/svr/task_executor.py", line 257, in build_chunks cks = await trio.to_thread.run_sync(lambda: chunker.chunk(task["name"], binary=binary, from_page=task["from_page"], File "/ragflow/.venv/lib/python3.10/site-packages/trio/_threads.py", line 447, in to_thread_run_sync return msg_from_thread.unwrap() File "/ragflow/.venv/lib/python3.10/site-packages/outcome/_impl.py", line 213, in unwrap raise captured_error File "/ragflow/.venv/lib/python3.10/site-packages/trio/_threads.py", line 373, in do_release_then_return_result return result.unwrap() File "/ragflow/.venv/lib/python3.10/site-packages/outcome/_impl.py", line 213, in unwrap raise captured_error File "/ragflow/.venv/lib/python3.10/site-packages/trio/_threads.py", line 392, in worker_fn ret = context.run(sync_fn, *args) File "/ragflow/rag/svr/task_executor.py", line 257, in cks = await trio.to_thread.run_sync(lambda: chunker.chunk(task["name"], binary=binary, from_page=task["from_page"], File "/ragflow/rag/app/naive.py", line 384, in chunk sections, tables = Docx()(filename, binary) File "/ragflow/rag/app/naive.py", line 230, in __call__ while i < len(r.cells): File "/ragflow/.venv/lib/python3.10/site-packages/docx/table.py", line 438, in cells return tuple(_iter_row_cells()) File "/ragflow/.venv/lib/python3.10/site-packages/docx/table.py", line 436, in _iter_row_cells yield from iter_tc_cells(tc) File "/ragflow/.venv/lib/python3.10/site-packages/docx/table.py", line 424, in iter_tc_cells yield from iter_tc_cells(tc._tc_above) # pyright: ignore[reportPrivateUsage] File "/ragflow/.venv/lib/python3.10/site-packages/docx/oxml/table.py", line 741, in _tc_above return self._tr_above.tc_at_grid_offset(self.grid_offset) File "/ragflow/.venv/lib/python3.10/site-packages/docx/oxml/table.py", line 98, in tc_at_grid_offset raise ValueError(f"no `tc` element at grid_offset={grid_offset}") ValueError: no `tc` element at grid_offset=10 ``` ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/app/naive.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/rag/app/naive.py b/rag/app/naive.py index 06d0f3369..1a6c0b92b 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -226,17 +226,20 @@ class Docx(DocxParser): for r in tb.rows: html += "" i = 0 - while i < len(r.cells): - span = 1 - c = r.cells[i] - for j in range(i + 1, len(r.cells)): - if c.text == r.cells[j].text: - span += 1 - i = j - else: - break - i += 1 - html += f"{c.text}" if span == 1 else f"{c.text}" + try: + while i < len(r.cells): + span = 1 + c = r.cells[i] + for j in range(i + 1, len(r.cells)): + if c.text == r.cells[j].text: + span += 1 + i = j + else: + break + i += 1 + html += f"{c.text}" if span == 1 else f"{c.text}" + except Exception as e: + logging.warning(f"Error parsing table, ignore: {e}") html += "" html += "" tbls.append(((None, html), ""))