mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Rename page_num_list, top_list, position_list (#3940)
### What problem does this PR solve? Rename page_num_list, top_list, position_list to page_num_int, top_int, position_int ### Type of change - [x] Refactoring
This commit is contained in:
@ -20,7 +20,6 @@ from rag.nlp import tokenize, is_english
|
||||
from rag.nlp import rag_tokenizer
|
||||
from deepdoc.parser import PdfParser, PptParser, PlainParser
|
||||
from PyPDF2 import PdfReader as pdf2_read
|
||||
import json
|
||||
|
||||
|
||||
class Ppt(PptParser):
|
||||
@ -109,9 +108,9 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
d = copy.deepcopy(doc)
|
||||
pn += from_page
|
||||
d["image"] = img
|
||||
d["page_num_list"] = json.dumps([pn + 1])
|
||||
d["top_list"] = json.dumps([0])
|
||||
d["position_list"] = json.dumps([(pn + 1, 0, img.size[0], 0, img.size[1])])
|
||||
d["page_num_int"] = [pn + 1]
|
||||
d["top_int"] = [0]
|
||||
d["position_int"] = [(pn + 1, 0, img.size[0], 0, img.size[1])]
|
||||
tokenize(d, txt, eng)
|
||||
res.append(d)
|
||||
return res
|
||||
@ -125,10 +124,9 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
pn += from_page
|
||||
if img:
|
||||
d["image"] = img
|
||||
d["page_num_list"] = json.dumps([pn + 1])
|
||||
d["top_list"] = json.dumps([0])
|
||||
d["position_list"] = json.dumps([
|
||||
(pn + 1, 0, img.size[0] if img else 0, 0, img.size[1] if img else 0)])
|
||||
d["page_num_int"] = [pn + 1]
|
||||
d["top_int"] = [0]
|
||||
d["position_int"] = [(pn + 1, 0, img.size[0] if img else 0, 0, img.size[1] if img else 0)]
|
||||
tokenize(d, txt, eng)
|
||||
res.append(d)
|
||||
return res
|
||||
|
||||
Reference in New Issue
Block a user