mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Edit chunk shall update instead of insert it (#3709)
### What problem does this PR solve? Edit chunk shall update instead of insert it. Close #3679 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -99,11 +99,11 @@ class Pdf(PdfParser):
|
||||
i += 1
|
||||
txt = b["text"].lower().strip()
|
||||
if re.match("(abstract|摘要)", txt):
|
||||
if len(txt.split(" ")) > 32 or len(txt) > 64:
|
||||
if len(txt.split()) > 32 or len(txt) > 64:
|
||||
abstr = txt + self._line_tag(b, zoomin)
|
||||
break
|
||||
txt = self.boxes[i]["text"].lower().strip()
|
||||
if len(txt.split(" ")) > 32 or len(txt) > 64:
|
||||
if len(txt.split()) > 32 or len(txt) > 64:
|
||||
abstr = txt + self._line_tag(self.boxes[i], zoomin)
|
||||
i += 1
|
||||
break
|
||||
|
||||
@ -33,7 +33,7 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):
|
||||
txt = "\n".join([t[0] for _, t in bxs if t[0]])
|
||||
eng = lang.lower() == "english"
|
||||
callback(0.4, "Finish OCR: (%s ...)" % txt[:12])
|
||||
if (eng and len(txt.split(" ")) > 32) or len(txt) > 32:
|
||||
if (eng and len(txt.split()) > 32) or len(txt) > 32:
|
||||
tokenize(doc, txt, eng)
|
||||
callback(0.8, "OCR results is too long to use CV LLM.")
|
||||
return [doc]
|
||||
|
||||
Reference in New Issue
Block a user