mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-01 08:05:07 +08:00
Fix: overlap cannot be properly applied (#12828)
### What problem does this PR solve? Overlap cannot be properly applied. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -23,6 +23,7 @@ from rag.utils.base64_image import id2image, image2id
|
||||
from deepdoc.parser.pdf_parser import RAGFlowPdfParser
|
||||
from rag.flow.base import ProcessBase, ProcessParamBase
|
||||
from rag.flow.splitter.schema import SplitterFromUpstream
|
||||
from common.float_utils import normalize_overlapped_percent
|
||||
from rag.nlp import attach_media_context, naive_merge, naive_merge_with_images
|
||||
from common import settings
|
||||
|
||||
@ -68,6 +69,7 @@ class Splitter(ProcessBase):
|
||||
|
||||
self.set_output("output_format", "chunks")
|
||||
self.callback(random.randint(1, 5) / 100.0, "Start to split into chunks.")
|
||||
overlapped_percent = normalize_overlapped_percent(self._param.overlapped_percent)
|
||||
if from_upstream.output_format in ["markdown", "text", "html"]:
|
||||
if from_upstream.output_format == "markdown":
|
||||
payload = from_upstream.markdown_result
|
||||
@ -83,7 +85,7 @@ class Splitter(ProcessBase):
|
||||
payload,
|
||||
self._param.chunk_token_size,
|
||||
deli,
|
||||
self._param.overlapped_percent,
|
||||
overlapped_percent,
|
||||
)
|
||||
if custom_pattern:
|
||||
docs = []
|
||||
@ -129,7 +131,7 @@ class Splitter(ProcessBase):
|
||||
section_images,
|
||||
self._param.chunk_token_size,
|
||||
deli,
|
||||
self._param.overlapped_percent,
|
||||
overlapped_percent,
|
||||
)
|
||||
cks = [
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user