mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-25 08:06:48 +08:00
Fix: table tag on chunks. (#12126)
### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -348,7 +348,8 @@ def tokenize_table(tbls, doc, eng, batch_size=10):
|
||||
d["doc_type_kwd"] = "table"
|
||||
if img:
|
||||
d["image"] = img
|
||||
d["doc_type_kwd"] = "image"
|
||||
if d["content_with_weight"].find("<tr>") < 0:
|
||||
d["doc_type_kwd"] = "image"
|
||||
if poss:
|
||||
add_positions(d, poss)
|
||||
res.append(d)
|
||||
@ -361,7 +362,8 @@ def tokenize_table(tbls, doc, eng, batch_size=10):
|
||||
d["doc_type_kwd"] = "table"
|
||||
if img:
|
||||
d["image"] = img
|
||||
d["doc_type_kwd"] = "image"
|
||||
if d["content_with_weight"].find("<tr>") < 0:
|
||||
d["doc_type_kwd"] = "image"
|
||||
add_positions(d, poss)
|
||||
res.append(d)
|
||||
return res
|
||||
|
||||
@ -395,8 +395,8 @@ async def build_chunks(task, progress_callback):
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
raise
|
||||
metadata = {}
|
||||
for ck in cks:
|
||||
metadata = update_metadata_to(metadata, ck["metadata_obj"])
|
||||
for doc in docs:
|
||||
metadata = update_metadata_to(metadata, doc["metadata_obj"])
|
||||
del ck["metadata_obj"]
|
||||
if metadata:
|
||||
e, doc = DocumentService.get_by_id(task["doc_id"])
|
||||
|
||||
Reference in New Issue
Block a user