mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 12:32:30 +08:00
Add 'One' chunk method (#137)
This commit is contained in:
@ -84,6 +84,7 @@ def dispatch():
|
||||
pages = PdfParser.total_page_number(r["name"], MINIO.get(r["kb_id"], r["location"]))
|
||||
page_size = 5
|
||||
if r["parser_id"] == "paper": page_size = 12
|
||||
if r["parser_id"] == "one": page_size = 1000000000
|
||||
for s,e in r["parser_config"].get("pages", [(0,100000)]):
|
||||
e = min(e, pages)
|
||||
for p in range(s, e, page_size):
|
||||
|
||||
@ -39,7 +39,7 @@ from rag.nlp import search
|
||||
from io import BytesIO
|
||||
import pandas as pd
|
||||
|
||||
from rag.app import laws, paper, presentation, manual, qa, table, book, resume, picture, naive
|
||||
from rag.app import laws, paper, presentation, manual, qa, table, book, resume, picture, naive, one
|
||||
|
||||
from api.db import LLMType, ParserType
|
||||
from api.db.services.document_service import DocumentService
|
||||
@ -60,6 +60,7 @@ FACTORY = {
|
||||
ParserType.TABLE.value: table,
|
||||
ParserType.RESUME.value: resume,
|
||||
ParserType.PICTURE.value: picture,
|
||||
ParserType.ONE.value: one,
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user