Fix: potential negative max_tokens in RAPTOR (#10701 )

### What problem does this PR solve? Fix potential negative max_tokens in RAPTOR. #10235. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue
Feat: Move the pipeline translation field to flow #9869 (#10697 )
2026-01-04 03:25:30 +08:00 · 2025-10-21 15:49:51 +08:00 · 2025-10-21 15:23:37 +08:00 · 2025-10-21 13:55:46 +08:00 · 2025-10-21 13:02:29 +08:00 · 2025-10-21 13:02:01 +08:00
48 changed files with 924 additions and 462 deletions
--- a/agent/tools/retrieval.py
+++ b/agent/tools/retrieval.py
@ -18,12 +18,14 @@ import re
 from abc import ABC
 from agent.tools.base import ToolParamBase, ToolBase, ToolMeta
 from api.db import LLMType
+from api.db.services.document_service import DocumentService
+from api.db.services.dialog_service import meta_filter
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.llm_service import LLMBundle
 from api import settings
 from api.utils.api_utils import timeout
 from rag.app.tag import label_question
-from rag.prompts.generator import cross_languages, kb_prompt
+from rag.prompts.generator import cross_languages, kb_prompt, gen_meta_filter


 class RetrievalParam(ToolParamBase):
@ -58,6 +60,7 @@ class RetrievalParam(ToolParamBase):
        self.use_kg = False
        self.cross_languages = []
        self.toc_enhance = False
+        self.meta_data_filter={}

    def check(self):
        self.check_decimal_float(self.similarity_threshold, "[Retrieval] Similarity threshold")
@ -117,6 +120,21 @@ class Retrieval(ToolBase, ABC):
        vars = self.get_input_elements_from_text(kwargs["query"])
        vars = {k:o["value"] for k,o in vars.items()}
        query = self.string_format(kwargs["query"], vars)
+        
+        doc_ids=[]
+        if self._param.meta_data_filter!={}:
+            metas = DocumentService.get_meta_by_kbs(kb_ids)
+            if self._param.meta_data_filter.get("method") == "auto":
+                chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT)
+                filters = gen_meta_filter(chat_mdl, metas, query)
+                doc_ids.extend(meta_filter(metas, filters))
+                if not doc_ids:
+                    doc_ids = None
+            elif self._param.meta_data_filter.get("method") == "manual":
+                doc_ids.extend(meta_filter(metas, self._param.meta_data_filter["manual"]))
+                if not doc_ids:
+                    doc_ids = None
+
        if self._param.cross_languages:
            query = cross_languages(kbs[0].tenant_id, None, query, self._param.cross_languages)

@ -131,6 +149,7 @@ class Retrieval(ToolBase, ABC):
                self._param.top_n,
                self._param.similarity_threshold,
                1 - self._param.keywords_similarity_weight,
+                doc_ids=doc_ids,
                aggs=False,
                rerank_mdl=rerank_mdl,
                rank_feature=label_question(query, kbs),
--- a/api/apps/document_app.py
+++ b/api/apps/document_app.py
@ -45,7 +45,7 @@ from api.utils.api_utils import (
 from api.utils.file_utils import filename_type, get_project_base_directory, thumbnail
 from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url
 from deepdoc.parser.html_parser import RAGFlowHtmlParser
-from rag.nlp import search
+from rag.nlp import search, rag_tokenizer
 from rag.utils.storage_factory import STORAGE_IMPL


@ -524,6 +524,21 @@ def rename():
            e, file = FileService.get_by_id(informs[0].file_id)
            FileService.update_by_id(file.id, {"name": req["name"]})

+        tenant_id = DocumentService.get_tenant_id(req["doc_id"])
+        title_tks = rag_tokenizer.tokenize(req["name"])
+        es_body = {
+            "docnm_kwd": req["name"],
+            "title_tks": title_tks,
+            "title_sm_tks": rag_tokenizer.fine_grained_tokenize(title_tks),
+        }
+        if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
+            settings.docStoreConn.update(
+                {"doc_id": req["doc_id"]},
+                es_body,
+                search.index_name(tenant_id),
+                doc.kb_id,
+            )
+
        return get_json_result(data=True)
    except Exception as e:
        return server_error_response(e)
--- a/api/apps/sdk/doc.py
+++ b/api/apps/sdk/doc.py
@ -470,6 +470,20 @@ def list_docs(dataset_id, tenant_id):
        required: false
        default: 0
        description: Unix timestamp for filtering documents created before this time. 0 means no filter.
+      - in: query
+        name: suffix
+        type: array
+        items:
+          type: string
+        required: false
+        description: Filter by file suffix (e.g., ["pdf", "txt", "docx"]).
+      - in: query
+        name: run
+        type: array
+        items:
+          type: string
+        required: false
+        description: Filter by document run status. Supports both numeric ("0", "1", "2", "3", "4") and text formats ("UNSTART", "RUNNING", "CANCEL", "DONE", "FAIL").
      - in: header
        name: Authorization
        type: string
@ -512,63 +526,62 @@ def list_docs(dataset_id, tenant_id):
                    description: Processing status.
    """
    if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
-        return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
-    id = request.args.get("id")
-    name = request.args.get("name")
+      return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")

-    if id and not DocumentService.query(id=id, kb_id=dataset_id):
-        return get_error_data_result(message=f"You don't own the document {id}.")
+    q = request.args
+    document_id = q.get("id")  
+    name        = q.get("name")
+
+    if document_id and not DocumentService.query(id=document_id, kb_id=dataset_id):
+        return get_error_data_result(message=f"You don't own the document {document_id}.")
    if name and not DocumentService.query(name=name, kb_id=dataset_id):
        return get_error_data_result(message=f"You don't own the document {name}.")

-    page = int(request.args.get("page", 1))
-    keywords = request.args.get("keywords", "")
-    page_size = int(request.args.get("page_size", 30))
-    orderby = request.args.get("orderby", "create_time")
-    if request.args.get("desc") == "False":
-        desc = False
-    else:
-        desc = True
-    docs, tol = DocumentService.get_list(dataset_id, page, page_size, orderby, desc, keywords, id, name)
+    page        = int(q.get("page", 1))
+    page_size   = int(q.get("page_size", 30))  
+    orderby     = q.get("orderby", "create_time")
+    desc        = str(q.get("desc", "true")).strip().lower() != "false"
+    keywords    = q.get("keywords", "")

-    create_time_from = int(request.args.get("create_time_from", 0))
-    create_time_to = int(request.args.get("create_time_to", 0))
+    # filters - align with OpenAPI parameter names
+    suffix               = q.getlist("suffix") 
+    run_status           = q.getlist("run")   
+    create_time_from     = int(q.get("create_time_from", 0))  
+    create_time_to       = int(q.get("create_time_to", 0))    

+    # map run status (accept text or numeric) - align with API parameter
+    run_status_text_to_numeric = {"UNSTART": "0", "RUNNING": "1", "CANCEL": "2", "DONE": "3", "FAIL": "4"}
+    run_status_converted = [run_status_text_to_numeric.get(v, v) for v in run_status]
+
+    docs, total = DocumentService.get_list(
+        dataset_id, page, page_size, orderby, desc, keywords, document_id, name, suffix, run_status_converted
+    )
+
+    # time range filter (0 means no bound)
    if create_time_from or create_time_to:
-        filtered_docs = []
-        for doc in docs:
-            doc_create_time = doc.get("create_time", 0)
-            if (create_time_from == 0 or doc_create_time >= create_time_from) and (create_time_to == 0 or doc_create_time <= create_time_to):
-                filtered_docs.append(doc)
-        docs = filtered_docs
+        docs = [
+            d for d in docs
+            if (create_time_from == 0 or d.get("create_time", 0) >= create_time_from)
+            and (create_time_to == 0 or d.get("create_time", 0) <= create_time_to)
+        ]

-    # rename key's name
-    renamed_doc_list = []
+    # rename keys + map run status back to text for output
    key_mapping = {
        "chunk_num": "chunk_count",
-        "kb_id": "dataset_id",
+        "kb_id": "dataset_id", 
        "token_num": "token_count",
        "parser_id": "chunk_method",
    }
-    run_mapping = {
-        "0": "UNSTART",
-        "1": "RUNNING",
-        "2": "CANCEL",
-        "3": "DONE",
-        "4": "FAIL",
-    }
-    for doc in docs:
-        renamed_doc = {}
-        for key, value in doc.items():
-            if key == "run":
-                renamed_doc["run"] = run_mapping.get(str(value))
-            new_key = key_mapping.get(key, key)
-            renamed_doc[new_key] = value
-            if key == "run":
-                renamed_doc["run"] = run_mapping.get(value)
-        renamed_doc_list.append(renamed_doc)
-    return get_result(data={"total": tol, "docs": renamed_doc_list})
+    run_status_numeric_to_text = {"0": "UNSTART", "1": "RUNNING", "2": "CANCEL", "3": "DONE", "4": "FAIL"}

+    output_docs = []
+    for d in docs:
+        renamed_doc = {key_mapping.get(k, k): v for k, v in d.items()}
+        if "run" in d:
+            renamed_doc["run"] = run_status_numeric_to_text.get(str(d["run"]), d["run"])
+        output_docs.append(renamed_doc)
+
+    return get_result(data={"total": total, "docs": output_docs})

@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"])  # noqa: F821
@token_required
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@ -79,7 +79,7 @@ class DocumentService(CommonService):
    @classmethod
    @DB.connection_context()
    def get_list(cls, kb_id, page_number, items_per_page,
-                 orderby, desc, keywords, id, name):
+                 orderby, desc, keywords, id, name, suffix=None, run = None):
        fields = cls.get_cls_model_fields()
        docs = cls.model.select(*[*fields, UserCanvas.title]).join(File2Document, on = (File2Document.document_id == cls.model.id))\
            .join(File, on = (File.id == File2Document.file_id))\
@ -96,6 +96,10 @@ class DocumentService(CommonService):
            docs = docs.where(
                fn.LOWER(cls.model.name).contains(keywords.lower())
            )
+        if suffix:
+            docs = docs.where(cls.model.suffix.in_(suffix))
+        if run:
+            docs = docs.where(cls.model.run.in_(run))
        if desc:
            docs = docs.order_by(cls.model.getter_by(orderby).desc())
        else:
--- a/deepdoc/parser/excel_parser.py
+++ b/deepdoc/parser/excel_parser.py
@ -54,8 +54,8 @@ class RAGFlowExcelParser:
            try:
                file_like_object.seek(0)
                try:
-                    df = pd.read_excel(file_like_object)
-                    return RAGFlowExcelParser._dataframe_to_workbook(df)
+                    dfs = pd.read_excel(file_like_object, sheet_name=None)
+                    return RAGFlowExcelParser._dataframe_to_workbook(dfs)
                except Exception as ex:
                    logging.info(f"pandas with default engine load error: {ex}, try calamine instead")
                    file_like_object.seek(0)
@ -75,6 +75,10 @@ class RAGFlowExcelParser:

    @staticmethod
    def _dataframe_to_workbook(df):
+        # if contains multiple sheets use _dataframes_to_workbook
+        if isinstance(df, dict) and len(df) > 1:
+            return RAGFlowExcelParser._dataframes_to_workbook(df)
+
        df = RAGFlowExcelParser._clean_dataframe(df)
        wb = Workbook()
        ws = wb.active
@ -88,6 +92,22 @@ class RAGFlowExcelParser:
                ws.cell(row=row_num, column=col_num, value=value)

        return wb
+    
+    @staticmethod
+    def _dataframes_to_workbook(dfs: dict):
+        wb = Workbook()
+        default_sheet = wb.active
+        wb.remove(default_sheet)
+        
+        for sheet_name, df in dfs.items():
+            df = RAGFlowExcelParser._clean_dataframe(df)
+            ws = wb.create_sheet(title=sheet_name)
+            for col_num, column_name in enumerate(df.columns, 1):
+                ws.cell(row=1, column=col_num, value=column_name)
+            for row_num, row in enumerate(df.values, 2):
+                for col_num, value in enumerate(row, 1):
+                    ws.cell(row=row_num, column=col_num, value=value)
+        return wb

    def html(self, fnm, chunk_rows=256):
        from html import escape
--- a/deepdoc/parser/figure_parser.py
+++ b/deepdoc/parser/figure_parser.py
@ -17,6 +17,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed

 from PIL import Image

+from api.db import LLMType
+from api.db.services.llm_service import LLMBundle
 from api.utils.api_utils import timeout
 from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk
 from rag.prompts.generator import vision_llm_figure_describe_prompt
@ -32,6 +34,43 @@ def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
        if isinstance(figure_data[1], Image.Image)
    ]

+def vision_figure_parser_docx_wrapper(sections,tbls,callback=None,**kwargs):
+    try:
+        vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
+        callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")
+    except Exception:
+        vision_model = None
+    if vision_model:
+            figures_data = vision_figure_parser_figure_data_wrapper(sections)
+            try:
+                docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs)
+                boosted_figures = docx_vision_parser(callback=callback)
+                tbls.extend(boosted_figures)
+            except Exception as e:
+                callback(0.8, f"Visual model error: {e}. Skipping figure parsing enhancement.")
+    return tbls
+
+def vision_figure_parser_pdf_wrapper(tbls,callback=None,**kwargs):
+    try:
+        vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
+        callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")
+    except Exception:
+        vision_model = None
+    if vision_model:
+        def is_figure_item(item):
+            return (
+                isinstance(item[0][0], Image.Image) and
+                isinstance(item[0][1], list)
+            )
+        figures_data = [item for item in tbls if is_figure_item(item)]
+        try:
+            docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs)
+            boosted_figures = docx_vision_parser(callback=callback)
+            tbls = [item for item in tbls if not is_figure_item(item)]
+            tbls.extend(boosted_figures)
+        except Exception as e:
+            callback(0.8, f"Visual model error: {e}. Skipping figure parsing enhancement.")
+    return tbls

 shared_executor = ThreadPoolExecutor(max_workers=10)

--- a/docs/guides/agent/agent_component_reference/chunker_token.md
+++ b/docs/guides/agent/agent_component_reference/chunker_token.md
@ -0,0 +1,17 @@
+---
+sidebar_position: 32
+slug: /chunker_token_component
+---
+
+# Parser component
+
+A component that sets the parsing rules for your dataset.
+
+---
+
+A **Parser** component defines how various file types should be parsed, including parsing methods for PDFs , fields to parse for Emails, and OCR methods for images.
+
+
+## Scenario
+
+A **Parser** component is auto-populated on the ingestion pipeline canvas and required in all ingestion pipeline workflows.
--- a/docs/references/http_api_reference.md
+++ b/docs/references/http_api_reference.md
@ -1198,23 +1198,24 @@ Failure:

 ### List documents

-**GET** `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}`
+**GET** `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}&suffix={file_suffix}&run={run_status}`

 Lists documents in a specified dataset.

 #### Request

 - Method: GET
- URL: `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}`
+- URL: `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}&suffix={file_suffix}&run={run_status}`
 - Headers:
  - `'content-Type: application/json'`
  - `'Authorization: Bearer <YOUR_API_KEY>'`

-##### Request example
+##### Request examples

+**A basic request with pagination:**
 ```bash
 curl --request GET \
-     --url http://{address}/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp} \
+     --url http://{address}/api/v1/datasets/{dataset_id}/documents?page=1&page_size=10 \
     --header 'Authorization: Bearer <YOUR_API_KEY>'
 ```

@ -1236,10 +1237,34 @@ curl --request GET \
  Indicates whether the retrieved documents should be sorted in descending order. Defaults to `true`.
 - `id`: (*Filter parameter*), `string`  
  The ID of the document to retrieve.
- `create_time_from`: (*Filter parameter*), `integer`
+- `create_time_from`: (*Filter parameter*), `integer`  
  Unix timestamp for filtering documents created after this time. 0 means no filter. Defaults to `0`.
- `create_time_to`: (*Filter parameter*), `integer`
+- `create_time_to`: (*Filter parameter*), `integer`  
  Unix timestamp for filtering documents created before this time. 0 means no filter. Defaults to `0`.
+- `suffix`: (*Filter parameter*), `array[string]`  
+  Filter by file suffix. Supports multiple values, e.g., `pdf`, `txt`, and `docx`. Defaults to all suffixes.
+- `run`: (*Filter parameter*), `array[string]`  
+  Filter by document processing status. Supports numeric, text, and mixed formats:  
+  - Numeric format: `["0", "1", "2", "3", "4"]`
+  - Text format: `[UNSTART, RUNNING, CANCEL, DONE, FAIL]`
+  - Mixed format: `[UNSTART, 1, DONE]` (mixing numeric and text formats)
+  - Status mapping:
+    - `0` / `UNSTART`: Document not yet processed
+    - `1` / `RUNNING`: Document is currently being processed
+    - `2` / `CANCEL`: Document processing was cancelled
+    - `3` / `DONE`: Document processing completed successfully
+    - `4` / `FAIL`: Document processing failed  
+  Defaults to all statuses.
+
+##### Usage examples
+
+**A request with multiple filtering parameters**
+
+```bash
+curl --request GET \
+     --url 'http://{address}/api/v1/datasets/{dataset_id}/documents?suffix=pdf&run=DONE&page=1&page_size=10' \
+     --header 'Authorization: Bearer <YOUR_API_KEY>'
+```

 #### Response

@ -1270,7 +1295,7 @@ Success:
                "process_duration": 0.0,
                "progress": 0.0,
                "progress_msg": "",
-                "run": "0",
+                "run": "UNSTART",
                "size": 7,
                "source_type": "local",
                "status": "1",
--- a/rag/app/book.py
+++ b/rag/app/book.py
@ -20,11 +20,14 @@ import re
 from io import BytesIO

 from deepdoc.parser.utils import get_text
+from rag.app import naive
 from rag.nlp import bullets_category, is_english,remove_contents_table, \
    hierarchical_merge, make_colon_as_title, naive_merge, random_choices, tokenize_table, \
    tokenize_chunks
 from rag.nlp import rag_tokenizer
-from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
+from deepdoc.parser import PdfParser, PlainParser, HtmlParser
+from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper,vision_figure_parser_docx_wrapper
+from PIL import Image


 class Pdf(PdfParser):
@ -81,13 +84,15 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
    sections, tbls = [], []
    if re.search(r"\.docx$", filename, re.IGNORECASE):
        callback(0.1, "Start to parse.")
-        doc_parser = DocxParser()
+        doc_parser = naive.Docx()
        # TODO: table of contents need to be removed
        sections, tbls = doc_parser(
-            binary if binary else filename, from_page=from_page, to_page=to_page)
+            filename, binary=binary, from_page=from_page, to_page=to_page)
        remove_contents_table(sections, eng=is_english(
            random_choices([t for t, _ in sections], k=200)))
-        tbls = [((None, lns), None) for lns in tbls]
+        tbls=vision_figure_parser_docx_wrapper(sections=sections,tbls=tbls,callback=callback,**kwargs)
+        # tbls = [((None, lns), None) for lns in tbls]
+        sections=[(item[0],item[1] if item[1] is not None else "") for item in sections if not isinstance(item[1], Image.Image)]
        callback(0.8, "Finish parsing.")

    elif re.search(r"\.pdf$", filename, re.IGNORECASE):
@ -96,6 +101,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
            pdf_parser = PlainParser()
        sections, tbls = pdf_parser(filename if not binary else binary,
                                    from_page=from_page, to_page=to_page, callback=callback)
+        tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)

    elif re.search(r"\.txt$", filename, re.IGNORECASE):
        callback(0.1, "Start to parse.")
--- a/rag/app/manual.py
+++ b/rag/app/manual.py
@ -23,6 +23,7 @@ from io import BytesIO
 from rag.nlp import rag_tokenizer, tokenize, tokenize_table, bullets_category, title_frequency, tokenize_chunks, docx_question_level
 from rag.utils import num_tokens_from_string
 from deepdoc.parser import PdfParser, PlainParser, DocxParser
+from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper,vision_figure_parser_docx_wrapper
 from docx import Document
 from PIL import Image

@ -252,7 +253,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
            tk_cnt = num_tokens_from_string(txt)
            if sec_id > -1:
                last_sid = sec_id
-
+        tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)
        res = tokenize_table(tbls, doc, eng)
        res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser))
        return res
@ -261,6 +262,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
        docx_parser = Docx()
        ti_list, tbls = docx_parser(filename, binary,
                                    from_page=0, to_page=10000, callback=callback)
+        tbls=vision_figure_parser_docx_wrapper(sections=sections,tbls=tbls,callback=callback,**kwargs)
        res = tokenize_table(tbls, doc, eng)
        for text, image in ti_list:
            d = copy.deepcopy(doc)
--- a/rag/app/naive.py
+++ b/rag/app/naive.py
@ -32,7 +32,7 @@ from api.db import LLMType
 from api.db.services.llm_service import LLMBundle
 from api.utils.file_utils import extract_embed_file
 from deepdoc.parser import DocxParser, ExcelParser, HtmlParser, JsonParser, MarkdownElementExtractor, MarkdownParser, PdfParser, TxtParser
-from deepdoc.parser.figure_parser import VisionFigureParser, vision_figure_parser_figure_data_wrapper
+from deepdoc.parser.figure_parser import VisionFigureParser,vision_figure_parser_docx_wrapper,vision_figure_parser_pdf_wrapper
 from deepdoc.parser.pdf_parser import PlainParser, VisionParser
 from deepdoc.parser.mineru_parser import MinerUParser
 from rag.nlp import concat_img, find_codec, naive_merge, naive_merge_with_images, naive_merge_docx, rag_tokenizer, tokenize_chunks, tokenize_chunks_with_images, tokenize_table
@ -475,24 +475,13 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
    if re.search(r"\.docx$", filename, re.IGNORECASE):
        callback(0.1, "Start to parse.")

-        try:
-            vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
-            callback(0.15, "Visual model detected. Attempting to enhance figure extraction...")
-        except Exception:
-            vision_model = None
+        

        # fix "There is no item named 'word/NULL' in the archive", referring to https://github.com/python-openxml/python-docx/issues/1105#issuecomment-1298075246
        _SerializedRelationships.load_from_xml = load_from_xml_v2
        sections, tables = Docx()(filename, binary)

-        if vision_model:
-            figures_data = vision_figure_parser_figure_data_wrapper(sections)
-            try:
-                docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs)
-                boosted_figures = docx_vision_parser(callback=callback)
-                tables.extend(boosted_figures)
-            except Exception as e:
-                callback(0.6, f"Visual model error: {e}. Skipping figure parsing enhancement.")
+        tables=vision_figure_parser_docx_wrapper(sections=sections,tbls=tables,callback=callback,**kwargs)

        res = tokenize_table(tables, doc, is_english)
        callback(0.8, "Finish parsing.")
@ -521,25 +510,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,

        if layout_recognizer == "DeepDOC":
            pdf_parser = Pdf()
-
-            try:
-                vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
-                callback(0.15, "Visual model detected. Attempting to enhance figure extraction...")
-            except Exception:
-                vision_model = None
-
-            if vision_model:
-                sections, tables, figures = pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page, callback=callback, separate_tables_figures=True)
-                callback(0.5, "Basic parsing complete. Proceeding with figure enhancement...")
-                try:
-                    pdf_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures, **kwargs)
-                    boosted_figures = pdf_vision_parser(callback=callback)
-                    tables.extend(boosted_figures)
-                except Exception as e:
-                    callback(0.6, f"Visual model error: {e}. Skipping figure parsing enhancement.")
-                    tables.extend(figures)
-            else:
-                sections, tables = pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page, callback=callback)
+            sections, tables = pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page, callback=callback)
+            tables=vision_figure_parser_pdf_wrapper(tbls=tables,callback=callback,**kwargs)

            res = tokenize_table(tables, doc, is_english)
            callback(0.8, "Finish parsing.")
--- a/rag/app/one.py
+++ b/rag/app/one.py
@ -23,6 +23,7 @@ from deepdoc.parser.utils import get_text
 from rag.app import naive
 from rag.nlp import rag_tokenizer, tokenize
 from deepdoc.parser import PdfParser, ExcelParser, PlainParser, HtmlParser
+from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper,vision_figure_parser_docx_wrapper


 class Pdf(PdfParser):
@ -57,13 +58,8 @@ class Pdf(PdfParser):

        sections = [(b["text"], self.get_position(b, zoomin))
                    for i, b in enumerate(self.boxes)]
-        for (img, rows), poss in tbls:
-            if not rows:
-                continue
-            sections.append((rows if isinstance(rows, str) else rows[0],
-                             [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
        return [(txt, "") for txt, _ in sorted(sections, key=lambda x: (
-            x[-1][0][0], x[-1][0][3], x[-1][0][1]))], None
+            x[-1][0][0], x[-1][0][3], x[-1][0][1]))], tbls


 def chunk(filename, binary=None, from_page=0, to_page=100000,
@ -80,6 +76,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
    if re.search(r"\.docx$", filename, re.IGNORECASE):
        callback(0.1, "Start to parse.")
        sections, tbls = naive.Docx()(filename, binary)
+        tbls=vision_figure_parser_docx_wrapper(sections=sections,tbls=tbls,callback=callback,**kwargs)
        sections = [s for s, _ in sections if s]
        for (_, html), _ in tbls:
            sections.append(html)
@ -89,8 +86,14 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
        pdf_parser = Pdf()
        if parser_config.get("layout_recognize", "DeepDOC") == "Plain Text":
            pdf_parser = PlainParser()
-        sections, _ = pdf_parser(
+        sections, tbls = pdf_parser(
            filename if not binary else binary, to_page=to_page, callback=callback)
+        tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)
+        for (img, rows), poss in tbls:
+            if not rows:
+                continue
+            sections.append((rows if isinstance(rows, str) else rows[0],
+                             [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
        sections = [s for s, _ in sections if s]

    elif re.search(r"\.xlsx?$", filename, re.IGNORECASE):
--- a/rag/app/paper.py
+++ b/rag/app/paper.py
@ -18,12 +18,12 @@ import logging
 import copy
 import re

+from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper
 from api.db import ParserType
 from rag.nlp import rag_tokenizer, tokenize, tokenize_table, add_positions, bullets_category, title_frequency, tokenize_chunks
 from deepdoc.parser import PdfParser, PlainParser
 import numpy as np

-
 class Pdf(PdfParser):
    def __init__(self):
        self.model_speciess = ParserType.PAPER.value
@ -160,6 +160,9 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
            pdf_parser = Pdf()
            paper = pdf_parser(filename if not binary else binary,
                               from_page=from_page, to_page=to_page, callback=callback)
+        tbls=paper["tables"]
+        tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)
+        paper["tables"] = tbls
    else:
        raise NotImplementedError("file type not supported yet(pdf supported)")

--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@ -13,13 +13,16 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
+
 import base64
 import json
 import os
+import tempfile
 import logging
 from abc import ABC
 from copy import deepcopy
 from io import BytesIO
+from pathlib import Path
 from urllib.parse import urljoin
 import requests
 from openai import OpenAI
@ -171,6 +174,7 @@ class GptV4(Base):
    def __init__(self, key, model_name="gpt-4-vision-preview", lang="Chinese", base_url="https://api.openai.com/v1", **kwargs):
        if not base_url:
            base_url = "https://api.openai.com/v1"
+        self.api_key = key
        self.client = OpenAI(api_key=key, base_url=base_url)
        self.model_name = model_name
        self.lang = lang
@ -224,6 +228,61 @@ class QWenCV(GptV4):
            base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
        super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs)

+    def chat(self, system, history, gen_conf, images=[], video_bytes=None, filename=""):
+        if video_bytes:
+            try:
+                summary, summary_num_tokens = self._process_video(video_bytes, filename)
+                return summary, summary_num_tokens
+            except Exception as e:
+                return "**ERROR**: " + str(e), 0
+
+        return "**ERROR**: Method chat not supported yet.", 0
+
+    def _process_video(self, video_bytes, filename):
+        from dashscope import MultiModalConversation
+
+        video_suffix = Path(filename).suffix or ".mp4"
+        with tempfile.NamedTemporaryFile(delete=False, suffix=video_suffix) as tmp:
+            tmp.write(video_bytes)
+            tmp_path = tmp.name
+
+        video_path = f"file://{tmp_path}"
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "video": video_path,
+                        "fps": 2,
+                    },
+                    {
+                        "text": "Please summarize this video in proper sentences.",
+                    },
+                ],
+            }
+        ]
+
+        def call_api():
+            response = MultiModalConversation.call(
+                api_key=self.api_key,
+                model=self.model_name,
+                messages=messages,
+            )
+            summary = response["output"]["choices"][0]["message"].content[0]["text"]
+            return summary, num_tokens_from_string(summary)
+
+        try:
+            return call_api()
+        except Exception as e1:
+            import dashscope
+
+            dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"
+            try:
+                return call_api()
+            except Exception as e2:
+                raise RuntimeError(f"Both default and intl endpoint failed.\nFirst error: {e1}\nSecond error: {e2}")
+
+

 class HunyuanCV(GptV4):
    _FACTORY_NAME = "Tencent Hunyuan"
@ -616,8 +675,6 @@ class GeminiCV(Base):
    def _process_video(self, video_bytes, filename):
        from google import genai
        from google.genai import types
-        import tempfile
-        from pathlib import Path

        video_size_mb = len(video_bytes) / (1024 * 1024)
        client = genai.Client(api_key=self.api_key)
--- a/rag/nlp/init.py
+++ b/rag/nlp/init.py
@ -459,12 +459,10 @@ def tree_merge(bull, sections, depth):
                return len(BULLET_PATTERN[bull])+1, text
            else:
                return len(BULLET_PATTERN[bull])+2, text
-    
    level_set = set()
    lines = []
    for section in sections:
        level, text = get_level(bull, section)
-
        if not text.strip("\n"):
            continue
            
@ -797,8 +795,8 @@ class Node:
    def __init__(self, level, depth=-1, texts=None):
        self.level = level
        self.depth = depth
-        self.texts = texts if texts is not None else []  # 存放内容
-        self.children = []  # 子节点
+        self.texts = texts or []
+        self.children = [] 

    def add_child(self, child_node):
        self.children.append(child_node)
@ -825,35 +823,51 @@ class Node:
        return f"Node(level={self.level}, texts={self.texts}, children={len(self.children)})"

    def build_tree(self, lines):
-        stack = [self]  
-        for line in lines:
-            level, text = line
-            node = Node(level=level, texts=[text])
-
-            if level <= self.depth or self.depth == -1:
-                while stack and level <= stack[-1].get_level():
-                    stack.pop()
-
-                stack[-1].add_child(node)
-                stack.append(node)
-            else:
+        stack = [self]
+        for level, text in lines:
+            if self.depth != -1 and level > self.depth:
+                # Beyond target depth: merge content into the current leaf instead of creating deeper nodes
                stack[-1].add_text(text)
-        return self  
+                continue
+
+            # Move up until we find the proper parent whose level is strictly smaller than current
+            while len(stack) > 1 and level <= stack[-1].get_level():
+                stack.pop()
+
+            node = Node(level=level, texts=[text])
+            # Attach as child of current parent and descend
+            stack[-1].add_child(node)
+            stack.append(node)
+
+        return self

    def get_tree(self):
        tree_list = []  
-        self._dfs(self, tree_list, 0, [])
+        self._dfs(self, tree_list, [])
        return tree_list

-    def _dfs(self, node, tree_list, current_depth, titles):
+    def _dfs(self, node, tree_list, titles):
+        level = node.get_level()
+        texts = node.get_texts()
+        child = node.get_children()

-        if node.get_texts():
-            if 0 < node.get_level() < self.depth:
-                titles.extend(node.get_texts())
-            else:
-                combined_text = ["\n".join(titles + node.get_texts())]
-                tree_list.append(combined_text)
+        if level == 0 and texts:
+            tree_list.append("\n".join(titles+texts))

+        # Titles within configured depth are accumulated into the current path
+        if 1 <= level <= self.depth:
+            path_titles = titles + texts
+        else:
+            path_titles = titles

-        for child in node.get_children():
-            self._dfs(child, tree_list, current_depth + 1, titles.copy())
+        # Body outside the depth limit becomes its own chunk under the current title path
+        if level > self.depth and texts:
+            tree_list.append("\n".join(path_titles + texts))
+
+        # A leaf title within depth emits its title path as a chunk (header-only section)
+        elif not child and (1 <= level <= self.depth):
+            tree_list.append("\n".join(path_titles))
+        
+        # Recurse into children with the updated title path
+        for c in child:
+            self._dfs(c, tree_list, path_titles)
--- a/rag/raptor.py
+++ b/rag/raptor.py
@ -114,7 +114,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
                            ),
                        }
                    ],
-                    {"max_tokens": self._max_token},
+                    {"max_tokens": max(self._max_token, 512)}, # fix issue:  #10235
                )
                cnt = re.sub(
                    "(······\n由于长度的原因，回答被截断了，要继续吗？|For the content length reason, it stopped, continue?)",
--- a/rag/svr/task_executor.py
+++ b/rag/svr/task_executor.py
@ -1052,13 +1052,14 @@ async def task_manager():

 async def main():
    logging.info(r"""
-  ______           __      ______                     __
- /_  __/___ ______/ /__   / ____/  _____  _______  __/ /_____  _____
-  / / / __ `/ ___/ //_/  / __/ | |/_/ _ \/ ___/ / / / __/ __ \/ ___/
- / / / /_/ (__  ) ,<    / /____>  </  __/ /__/ /_/ / /_/ /_/ / /
-/_/  \__,_/____/_/|_|  /_____/_/|_|\___/\___/\__,_/\__/\____/_/
+    ____                      __  _                                              
+   /  _/___  ____ ____  _____/ /_(_)___  ____     ________  ______   _____  _____
+   / // __ \/ __ `/ _ \/ ___/ __/ / __ \/ __ \   / ___/ _ \/ ___/ | / / _ \/ ___/
+ _/ // / / / /_/ /  __(__  ) /_/ / /_/ / / / /  (__  )  __/ /   | |/ /  __/ /    
+/___/_/ /_/\__, /\___/____/\__/_/\____/_/ /_/  /____/\___/_/    |___/\___/_/     
+          /____/        
    """)
-    logging.info(f'TaskExecutor: RAGFlow version: {get_ragflow_version()}')
+    logging.info(f'RAGFlow version: {get_ragflow_version()}')
    settings.init_settings()
    print_rag_settings()
    if sys.platform != "win32":
--- a/test/testcases/test_http_api/test_chunk_management_within_dataset/test_retrieval_chunks.py
+++ b/test/testcases/test_http_api/test_chunk_management_within_dataset/test_retrieval_chunks.py
@ -83,7 +83,7 @@ class TestChunksRetrieval:
                "ValueError('Search does not support negative slicing.')",
                marks=pytest.mark.skip,
            ),
-            pytest.param({"page": 2, "page_size": 2}, 0, 2, "", marks=pytest.mark.skip(reason="issues/6646")),
+            ({"page": 2, "page_size": 2}, 0, 2, ""),
            ({"page": 3, "page_size": 2}, 0, 0, ""),
            ({"page": "3", "page_size": 2}, 0, 0, ""),
            pytest.param(
@ -124,9 +124,9 @@ class TestChunksRetrieval:
                marks=pytest.mark.skip,
            ),
            # ({"page_size": 0}, 0, 0, ""),
-            ({"page_size": 1}, 0, 1, ""),
+            pytest.param({"page_size": 1}, 0, 1, "", marks=pytest.mark.skip(reason="issues/10692")),
            ({"page_size": 5}, 0, 4, ""),
-            ({"page_size": "1"}, 0, 1, ""),
+            pytest.param({"page_size": "1"}, 0, 1, "", marks=pytest.mark.skip(reason="issues/10692")),
            # ({"page_size": -1}, 0, 0, ""),
            pytest.param(
                {"page_size": "a"},
--- a/web/src/components/edit-tag/index.tsx
+++ b/web/src/components/edit-tag/index.tsx
@ -1,5 +1,4 @@
 import { PlusOutlined } from '@ant-design/icons';
-import { TweenOneGroup } from 'rc-tween-one';
 import React, { useEffect, useRef, useState } from 'react';

 import { X } from 'lucide-react';
@ -57,7 +56,7 @@ const EditTag = React.forwardRef<HTMLDivElement, EditTagsProps>(
        <HoverCard key={tag}>
          <HoverCardContent side="top">{tag}</HoverCardContent>
          <HoverCardTrigger asChild>
-            <div className="w-fit flex items-center justify-center gap-2 border-dashed border px-1 rounded-sm bg-bg-card">
+            <div className="w-fit flex items-center justify-center gap-2 border-dashed border px-2 py-1 rounded-sm bg-bg-card">
              <div className="flex gap-2 items-center">
                <div className="max-w-80 overflow-hidden text-ellipsis">
                  {tag}
@ -84,11 +83,11 @@ const EditTag = React.forwardRef<HTMLDivElement, EditTagsProps>(

    return (
      <div>
-        {inputVisible ? (
+        {inputVisible && (
          <Input
            ref={inputRef}
            type="text"
-            className="h-8 bg-bg-card"
+            className="h-8 bg-bg-card mb-1"
            value={inputValue}
            onChange={handleInputChange}
            onBlur={handleInputConfirm}
@ -98,36 +97,20 @@ const EditTag = React.forwardRef<HTMLDivElement, EditTagsProps>(
              }
            }}
          />
-        ) : (
-          <Button
-            variant="dashed"
-            className="w-fit flex items-center justify-center gap-2 bg-bg-card"
-            onClick={showInput}
-            style={tagPlusStyle}
-          >
-            <PlusOutlined />
-          </Button>
-        )}
-        {Array.isArray(tagChild) && tagChild.length > 0 && (
-          <TweenOneGroup
-            className="flex gap-2 flex-wrap mt-2"
-            enter={{
-              scale: 0.8,
-              opacity: 0,
-              type: 'from',
-              duration: 100,
-            }}
-            onEnd={(e) => {
-              if (e.type === 'appear' || e.type === 'enter') {
-                (e.target as any).style = 'display: inline-block';
-              }
-            }}
-            leave={{ opacity: 0, width: 0, scale: 0, duration: 200 }}
-            appear={false}
-          >
-            {tagChild}
-          </TweenOneGroup>
        )}
+        <div className="flex gap-2 py-1">
+          {Array.isArray(tagChild) && tagChild.length > 0 && <>{tagChild}</>}
+          {!inputVisible && (
+            <Button
+              variant="dashed"
+              className="w-fit flex items-center justify-center gap-2 bg-bg-card"
+              onClick={showInput}
+              style={tagPlusStyle}
+            >
+              <PlusOutlined />
+            </Button>
+          )}
+        </div>
      </div>
    );
  },
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@ -1533,8 +1533,8 @@ This delimiter is used to split the input text into several text pieces echo of
        'Your users will see this welcome message at the beginning.',
      modeTip: 'The mode defines how the workflow is initiated.',
      mode: 'Mode',
-      conversational: 'conversational',
-      task: 'task',
+      conversational: 'Conversational',
+      task: 'Task',
      beginInputTip:
        'By defining input parameters, this content can be accessed by other components in subsequent processes.',
      query: 'Query variables',
@ -1605,6 +1605,119 @@ This delimiter is used to split the input text into several text pieces echo of
      ceateAgent: 'Agent flow',
      createPipeline: 'Ingestion pipeline',
      chooseAgentType: 'Choose Agent Type',
+      parser: 'Parser',
+      parserDescription:
+        'Extracts raw text and structure from files for downstream processing.',
+      tokenizer: 'Indexer',
+      tokenizerRequired: 'Please add the Indexer node first',
+      tokenizerDescription:
+        'Transforms text into the required data structure (e.g., vector embeddings for Embedding Search) depending on the chosen search method.',
+      splitter: 'Token',
+      splitterDescription:
+        'Split text into chunks by token length with optional delimiters and overlap.',
+      hierarchicalMergerDescription:
+        'Split documents into sections by title hierarchy with regex rules for finer control.',
+      hierarchicalMerger: 'Title',
+      extractor: 'Transformer',
+      extractorDescription:
+        'Use an LLM to extract structured insights from document chunks—such as summaries, classifications, etc.',
+      outputFormat: 'Output format',
+      fileFormats: 'File format',
+      fileFormatOptions: {
+        pdf: 'PDF',
+        spreadsheet: 'Spreadsheet',
+        image: 'Image',
+        email: 'Email',
+        'text&markdown': 'Text & Markup',
+        word: 'Word',
+        slides: 'PPT',
+        audio: 'Audio',
+      },
+      fields: 'Field',
+      addParser: 'Add Parser',
+      hierarchy: 'Hierarchy',
+      regularExpressions: 'Regular Expressions',
+      overlappedPercent: 'Overlapped percent (%)',
+      searchMethod: 'Search method',
+      searchMethodTip: `Defines how the content can be searched — by full-text, embedding, or both.
+The Indexer will store the content in the corresponding data structures for the selected methods.`,
+      // file: 'File',
+      parserMethod: 'Parsing method',
+      // systemPrompt: 'System Prompt',
+      systemPromptPlaceholder:
+        'Enter system prompt for image analysis, if empty the system default value will be used',
+      exportJson: 'Export JSON',
+      viewResult: 'View result',
+      running: 'Running',
+      summary: 'Summary',
+      keywords: 'Keywords',
+      questions: 'Questions',
+      metadata: 'Metadata',
+      fieldName: 'Result destination',
+      prompts: {
+        system: {
+          keywords: `Role
+You are a text analyzer.
+
+Task
+Extract the most important keywords/phrases of a given piece of text content.
+
+Requirements
+- Summarize the text content, and give the top 5 important keywords/phrases.
+- The keywords MUST be in the same language as the given piece of text content.
+- The keywords are delimited by ENGLISH COMMA.
+- Output keywords ONLY.`,
+          questions: `Role
+You are a text analyzer.
+
+Task
+Propose 3 questions about a given piece of text content.
+
+Requirements
+- Understand and summarize the text content, and propose the top 3 important questions.
+- The questions SHOULD NOT have overlapping meanings.
+- The questions SHOULD cover the main content of the text as much as possible.
+- The questions MUST be in the same language as the given piece of text content.
+- One question per line.
+- Output questions ONLY.`,
+          summary: `Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.
+
+Key Instructions:
+1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.
+2. Language: Write the summary in the same language as the source text.
+3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.
+4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.`,
+          metadata: `Extract important structured information from the given content. Output ONLY a valid JSON string with no additional text. If no important structured information is found, output an empty JSON object: {}.
+
+Important structured information may include: names, dates, locations, events, key facts, numerical data, or other extractable entities.`,
+        },
+        user: {
+          keywords: `Text Content
+[Insert text here]`,
+          questions: `Text Content
+[Insert text here]`,
+          summary: `Text to Summarize:
+[Insert text here]`,
+          metadata: `Content: [INSERT CONTENT HERE]`,
+        },
+      },
+      cancel: 'Cancel',
+      swicthPromptMessage:
+        'The prompt word will change. Please confirm whether to abandon the existing prompt word?',
+      tokenizerSearchMethodOptions: {
+        full_text: 'Full-text',
+        embedding: 'Embedding',
+      },
+      filenameEmbeddingWeight: 'Filename embedding weight',
+      tokenizerFieldsOptions: {
+        text: 'Processed Text',
+        keywords: 'Keywords',
+        questions: 'Questions',
+        summary: 'Augmented Context',
+      },
+      imageParseMethodOptions: {
+        ocr: 'OCR',
+      },
    },
    llmTools: {
      bad_calculator: {
@ -1705,125 +1818,6 @@ This delimiter is used to split the input text into several text pieces echo of
      <p>Are you sure you want to proceed?</p> `,
      unlinkPipelineModalConfirmText: 'Unlink',
    },
-    dataflow: {
-      parser: 'Parser',
-      parserDescription:
-        'Extracts raw text and structure from files for downstream processing.',
-      tokenizer: 'Indexer',
-      tokenizerRequired: 'Please add the Indexer node first',
-      tokenizerDescription:
-        'Transforms text into the required data structure (e.g., vector embeddings for Embedding Search) depending on the chosen search method.',
-      splitter: 'Token',
-      splitterDescription:
-        'Split text into chunks by token length with optional delimiters and overlap.',
-      hierarchicalMergerDescription:
-        'Split documents into sections by title hierarchy with regex rules for finer control.',
-      hierarchicalMerger: 'Title',
-      extractor: 'Transformer',
-      extractorDescription:
-        'Use an LLM to extract structured insights from document chunks—such as summaries, classifications, etc.',
-      outputFormat: 'Output format',
-      lang: 'Language',
-      fileFormats: 'File format',
-      fileFormatOptions: {
-        pdf: 'PDF',
-        spreadsheet: 'Spreadsheet',
-        image: 'Image',
-        email: 'Email',
-        'text&markdown': 'Text & Markup',
-        word: 'Word',
-        slides: 'PPT',
-        audio: 'Audio',
-      },
-      fields: 'Field',
-      addParser: 'Add Parser',
-      hierarchy: 'Hierarchy',
-      regularExpressions: 'Regular Expressions',
-      overlappedPercent: 'Overlapped percent (%)',
-      searchMethod: 'Search method',
-      searchMethodTip: `Defines how the content can be searched — by full-text, embedding, or both.
-The Indexer will store the content in the corresponding data structures for the selected methods.`,
-      begin: 'File',
-      parserMethod: 'Parsing method',
-      systemPrompt: 'System Prompt',
-      systemPromptPlaceholder:
-        'Enter system prompt for image analysis, if empty the system default value will be used',
-      exportJson: 'Export JSON',
-      viewResult: 'View result',
-      running: 'Running',
-      summary: 'Summary',
-      keywords: 'Keywords',
-      questions: 'Questions',
-      metadata: 'Metadata',
-      fieldName: 'Result destination',
-      prompts: {
-        system: {
-          keywords: `Role
-You are a text analyzer.
-
-Task
-Extract the most important keywords/phrases of a given piece of text content.
-
-Requirements
- Summarize the text content, and give the top 5 important keywords/phrases.
- The keywords MUST be in the same language as the given piece of text content.
- The keywords are delimited by ENGLISH COMMA.
- Output keywords ONLY.`,
-          questions: `Role
-You are a text analyzer.
-
-Task
-Propose 3 questions about a given piece of text content.
-
-Requirements
- Understand and summarize the text content, and propose the top 3 important questions.
- The questions SHOULD NOT have overlapping meanings.
- The questions SHOULD cover the main content of the text as much as possible.
- The questions MUST be in the same language as the given piece of text content.
- One question per line.
- Output questions ONLY.`,
-          summary: `Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.
-
-Key Instructions:
-1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.
-2. Language: Write the summary in the same language as the source text.
-3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.
-4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.`,
-          metadata: `Extract important structured information from the given content. Output ONLY a valid JSON string with no additional text. If no important structured information is found, output an empty JSON object: {}.
-
-Important structured information may include: names, dates, locations, events, key facts, numerical data, or other extractable entities.`,
-        },
-        user: {
-          keywords: `Text Content
-[Insert text here]`,
-          questions: `Text Content
-[Insert text here]`,
-          summary: `Text to Summarize:
-[Insert text here]`,
-          metadata: `Content: [INSERT CONTENT HERE]`,
-        },
-      },
-      cancel: 'Cancel',
-      swicthPromptMessage:
-        'The prompt word will change. Please confirm whether to abandon the existing prompt word?',
-      tokenizerSearchMethodOptions: {
-        full_text: 'Full-text',
-        embedding: 'Embedding',
-      },
-      filenameEmbeddingWeight: 'Filename embedding weight',
-      tokenizerFieldsOptions: {
-        text: 'Processed Text',
-        keywords: 'Keywords',
-        questions: 'Questions',
-        summary: 'Augmented Context',
-      },
-      imageParseMethodOptions: {
-        ocr: 'OCR',
-      },
-      note: 'Note',
-      noteDescription: 'Note',
-      notePlaceholder: 'Please enter a note',
-    },
    datasetOverview: {
      downloadTip: 'Files being downloaded from data sources. ',
      processingTip: 'Files being processed by Ingestion pipeline.',
--- a/web/src/locales/zh.ts
+++ b/web/src/locales/zh.ts
@ -1511,6 +1511,93 @@ General：实体和关系提取提示来自 GitHub - microsoft/graphrag：基于
      createFromTemplate: '从模板创建',
      importJsonFile: '导入 JSON 文件',
      chooseAgentType: '选择智能体类型',
+      parser: '解析器',
+      parserDescription: '从文件中提取原始文本和结构以供下游处理。',
+      tokenizer: '分词器',
+      tokenizerRequired: '请先添加Tokenizer节点',
+      tokenizerDescription:
+        '根据所选的搜索方法，将文本转换为所需的数据结构（例如，用于嵌入搜索的向量嵌入）。',
+      splitter: '按字符分割',
+      splitterDescription:
+        '根据分词器长度将文本拆分成块，并带有可选的分隔符和重叠。',
+      hierarchicalMergerDescription:
+        '使用正则表达式规则按标题层次结构将文档拆分成多个部分，以实现更精细的控制。',
+      hierarchicalMerger: '按标题分割',
+      extractor: '提取器',
+      extractorDescription:
+        '使用 LLM 从文档块（例如摘要、分类等）中提取结构化见解。',
+      outputFormat: '输出格式',
+      fileFormats: '文件格式',
+      fields: '字段',
+      addParser: '增加解析器',
+      hierarchy: '层次结构',
+      regularExpressions: '正则表达式',
+      overlappedPercent: '重叠百分比（%）',
+      searchMethod: '搜索方法',
+      searchMethodTip: `决定该数据集启用的搜索方式，可选择全文、向量，或两者兼有。
+Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
+      filenameEmbdWeight: '文件名嵌入权重',
+      parserMethod: '解析方法',
+      systemPromptPlaceholder:
+        '请输入用于图像分析的系统提示词，若为空则使用系统缺省值',
+      exportJson: '导出 JSON',
+      viewResult: '查看结果',
+      running: '运行中',
+      summary: '增强上下文',
+      keywords: '关键词',
+      questions: '问题',
+      metadata: '元数据',
+      fieldName: '结果目的地',
+      prompts: {
+        system: {
+          keywords: `角色
+你是一名文本分析员。
+
+任务
+从给定的文本内容中提取最重要的关键词/短语。
+
+要求
+- 总结文本内容，并给出最重要的5个关键词/短语。
+- 关键词必须与给定的文本内容使用相同的语言。
+- 关键词之间用英文逗号分隔。
+- 仅输出关键词。`,
+          questions: `角色
+你是一名文本分析员。
+
+任务
+针对给定的文本内容提出3个问题。
+
+要求
+- 理解并总结文本内容，并提出最重要的3个问题。
+- 问题的含义不应重叠。
+- 问题应尽可能涵盖文本的主要内容。
+- 问题必须与给定的文本内容使用相同的语言。
+- 每行一个问题。
+- 仅输出问题。`,
+          summary: `扮演一个精准的摘要者。你的任务是为提供的内容创建一个简洁且忠实于原文的摘要。
+
+关键说明：
+1. 准确性：摘要必须严格基于所提供的信息。请勿引入任何未明确说明的新事实、结论或解释。
+2. 语言：摘要必须使用与原文相同的语言。
+3. 客观性：不带偏见地呈现要点，保留内容的原始意图和语气。请勿进行编辑。
+4. 简洁性：专注于最重要的思想，省略细节和多余的内容。`,
+          metadata: `从给定内容中提取重要的结构化信息。仅输出有效的 JSON 字符串，不包含任何附加文本。如果未找到重要的结构化信息，则输出一个空的 JSON 对象：{}。
+
+重要的结构化信息可能包括：姓名、日期、地点、事件、关键事实、数字数据或其他可提取实体。`,
+        },
+        user: {
+          keywords: `文本内容
+[在此处插入文本]`,
+          questions: `文本内容
+[在此处插入文本]`,
+          summary: `要总结的文本：
+[在此处插入文本]`,
+          metadata: `内容：[在此处插入内容]`,
+        },
+      },
+      cancel: '取消',
+      filenameEmbeddingWeight: '文件名嵌入权重',
+      switchPromptMessage: '提示词将发生变化，请确认是否放弃已有提示词？',
    },
    footer: {
      profile: 'All rights reserved @ React',
@ -1618,101 +1705,6 @@ General：实体和关系提取提示来自 GitHub - microsoft/graphrag：基于
      <p>你确定要继续吗?</p> `,
      unlinkPipelineModalConfirmText: '解绑',
    },
-    dataflow: {
-      parser: '解析器',
-      parserDescription: '从文件中提取原始文本和结构以供下游处理。',
-      tokenizer: '分词器',
-      tokenizerRequired: '请先添加Tokenizer节点',
-      tokenizerDescription:
-        '根据所选的搜索方法，将文本转换为所需的数据结构（例如，用于嵌入搜索的向量嵌入）。',
-      splitter: '按字符分割',
-      splitterDescription:
-        '根据分词器长度将文本拆分成块，并带有可选的分隔符和重叠。',
-      hierarchicalMergerDescription:
-        '使用正则表达式规则按标题层次结构将文档拆分成多个部分，以实现更精细的控制。',
-      hierarchicalMerger: '按标题分割',
-      extractor: '提取器',
-      extractorDescription:
-        '使用 LLM 从文档块（例如摘要、分类等）中提取结构化见解。',
-      outputFormat: '输出格式',
-      lang: '语言',
-      fileFormats: '文件格式',
-      fields: '字段',
-      addParser: '增加解析器',
-      hierarchy: '层次结构',
-      regularExpressions: '正则表达式',
-      overlappedPercent: '重叠百分比（%）',
-      searchMethod: '搜索方法',
-      searchMethodTip: `决定该数据集启用的搜索方式，可选择全文、向量，或两者兼有。
-Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
-      filenameEmbdWeight: '文件名嵌入权重',
-      begin: '文件',
-      parserMethod: '解析方法',
-      systemPrompt: '系统提示词',
-      systemPromptPlaceholder:
-        '请输入用于图像分析的系统提示词，若为空则使用系统缺省值',
-      exportJson: '导出 JSON',
-      viewResult: '查看结果',
-      running: '运行中',
-      summary: '增强上下文',
-      keywords: '关键词',
-      questions: '问题',
-      metadata: '元数据',
-      fieldName: '结果目的地',
-      prompts: {
-        system: {
-          keywords: `角色
-你是一名文本分析员。
-
-任务
-从给定的文本内容中提取最重要的关键词/短语。
-
-要求
- 总结文本内容，并给出最重要的5个关键词/短语。
- 关键词必须与给定的文本内容使用相同的语言。
- 关键词之间用英文逗号分隔。
- 仅输出关键词。`,
-          questions: `角色
-你是一名文本分析员。
-
-任务
-针对给定的文本内容提出3个问题。
-
-要求
- 理解并总结文本内容，并提出最重要的3个问题。
- 问题的含义不应重叠。
- 问题应尽可能涵盖文本的主要内容。
- 问题必须与给定的文本内容使用相同的语言。
- 每行一个问题。
- 仅输出问题。`,
-          summary: `扮演一个精准的摘要者。你的任务是为提供的内容创建一个简洁且忠实于原文的摘要。
-
-关键说明：
-1. 准确性：摘要必须严格基于所提供的信息。请勿引入任何未明确说明的新事实、结论或解释。
-2. 语言：摘要必须使用与原文相同的语言。
-3. 客观性：不带偏见地呈现要点，保留内容的原始意图和语气。请勿进行编辑。
-4. 简洁性：专注于最重要的思想，省略细节和多余的内容。`,
-          metadata: `从给定内容中提取重要的结构化信息。仅输出有效的 JSON 字符串，不包含任何附加文本。如果未找到重要的结构化信息，则输出一个空的 JSON 对象：{}。
-
-重要的结构化信息可能包括：姓名、日期、地点、事件、关键事实、数字数据或其他可提取实体。`,
-        },
-        user: {
-          keywords: `文本内容
-[在此处插入文本]`,
-          questions: `文本内容
-[在此处插入文本]`,
-          summary: `要总结的文本：
-[在此处插入文本]`,
-          metadata: `内容：[在此处插入内容]`,
-        },
-      },
-      cancel: '取消',
-      filenameEmbeddingWeight: '文件名嵌入权重',
-      switchPromptMessage: '提示词将发生变化，请确认是否放弃已有提示词？',
-      note: '注释',
-      noteDescription: '注释',
-      notePlaceholder: '请输入注释',
-    },
    datasetOverview: {
      downloadTip: '正在从数据源下载文件。',
      processingTip: '正在由pipeline处理文件。',
--- a/web/src/pages/agent/canvas/node/file-node.tsx
+++ b/web/src/pages/agent/canvas/node/file-node.tsx
@ -36,7 +36,7 @@ function InnerFileNode({ data, id, selected }: NodeProps<IBeginNode>) {
      <section className="flex items-center  gap-2">
        <OperatorIcon name={data.label as Operator}></OperatorIcon>
        <div className="truncate text-center font-semibold text-sm">
-          {t(`dataflow.begin`)}
+          {t(`flow.begin`)}
        </div>
      </section>
      <section className={cn(styles.generateParameters, 'flex gap-2 flex-col')}>
--- a/web/src/pages/agent/canvas/node/handle.tsx
+++ b/web/src/pages/agent/canvas/node/handle.tsx
@ -5,6 +5,8 @@ import { Plus } from 'lucide-react';
 import { useMemo } from 'react';
 import { NodeHandleId } from '../../constant';
 import { HandleContext } from '../../context';
+import { useIsPipeline } from '../../hooks/use-is-pipeline';
+import useGraphStore from '../../store';
 import { useDropdownManager } from '../context';
 import { NextStepDropdown } from './dropdown/next-step-dropdown';

@ -14,9 +16,12 @@ export function CommonHandle({
  ...props
 }: HandleProps & { nodeId: string }) {
  const { visible, hideModal, showModal } = useSetModalState();
-
  const { canShowDropdown, setActiveDropdown, clearActiveDropdown } =
    useDropdownManager();
+  const { hasChildNode } = useGraphStore((state) => state);
+  const isPipeline = useIsPipeline();
+
+  const isConnectable = !(isPipeline && hasChildNode(nodeId)); // Using useMemo will cause isConnectable to not be updated when the subsequent connection line is deleted

  const value = useMemo(
    () => ({
@ -33,6 +38,7 @@ export function CommonHandle({
    <HandleContext.Provider value={value}>
      <Handle
        {...props}
+        isConnectable={isConnectable}
        className={cn(
          'inline-flex justify-center items-center !bg-accent-primary !border-none group-hover:!size-4 group-hover:!rounded-sm',
          className,
@ -40,6 +46,10 @@ export function CommonHandle({
        onClick={(e) => {
          e.stopPropagation();

+          if (!isConnectable) {
+            return;
+          }
+
          if (!canShowDropdown()) {
            return;
          }
--- a/web/src/pages/agent/canvas/node/parser-node.tsx
+++ b/web/src/pages/agent/canvas/node/parser-node.tsx
@ -46,7 +46,7 @@ function ParserNode({
            className="flex flex-col text-text-primary gap-1"
          >
            <span className="text-text-secondary">Parser {idx + 1}</span>
-            {t(`dataflow.fileFormatOptions.${x.fileFormat}`)}
+            {t(`flow.fileFormatOptions.${x.fileFormat}`)}
          </LabelCard>
        )}
      </NodeCollapsible>
--- a/web/src/pages/agent/canvas/node/tokenizer-node.tsx
+++ b/web/src/pages/agent/canvas/node/tokenizer-node.tsx
@ -38,12 +38,10 @@ function TokenizerNode({
        ></CommonHandle>
        <NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
        <LabelCard className="text-text-primary flex justify-between flex-col gap-1">
-          <span className="text-text-secondary">
-            {t('dataflow.searchMethod')}
-          </span>
+          <span className="text-text-secondary">{t('flow.searchMethod')}</span>
          <ul className="space-y-1">
            {data.form?.search_method.map((x) => (
-              <li key={x}>{t(`dataflow.tokenizerSearchMethodOptions.${x}`)}</li>
+              <li key={x}>{t(`flow.tokenizerSearchMethodOptions.${x}`)}</li>
            ))}
          </ul>
        </LabelCard>
--- a/web/src/pages/agent/context.ts
+++ b/web/src/pages/agent/context.ts
@ -48,13 +48,3 @@ export type HandleContextType = {
 export const HandleContext = createContext<HandleContextType>(
  {} as HandleContextType,
 );
-
-export type PipelineLogContextType = {
-  messageId: string;
-  setMessageId: (messageId: string) => void;
-  setUploadedFileData: (data: Record<string, any>) => void;
-};
-
-export const PipelineLogContext = createContext<PipelineLogContextType>(
-  {} as PipelineLogContextType,
-);
--- a/web/src/pages/agent/form/extractor-form/index.tsx
+++ b/web/src/pages/agent/form/extractor-form/index.tsx
@ -47,7 +47,7 @@ const ExtractorForm = ({ node }: INextOperatorForm) => {

  const promptOptions = useBuildNodeOutputOptions(node?.id);

-  const options = buildOptions(ContextGeneratorFieldName, t, 'dataflow');
+  const options = buildOptions(ContextGeneratorFieldName, t, 'flow');

  const {
    handleFieldNameChange,
@ -63,7 +63,7 @@ const ExtractorForm = ({ node }: INextOperatorForm) => {
    <Form {...form}>
      <FormWrapper>
        <LargeModelFormField></LargeModelFormField>
-        <RAGFlowFormItem label={t('dataflow.fieldName')} name="field_name">
+        <RAGFlowFormItem label={t('flow.fieldName')} name="field_name">
          {(field) => (
            <SelectWithSearch
              onChange={(value) => {
@ -93,7 +93,7 @@ const ExtractorForm = ({ node }: INextOperatorForm) => {
      </FormWrapper>
      {visible && (
        <ConfirmDeleteDialog
-          title={t('dataflow.switchPromptMessage')}
+          title={t('flow.switchPromptMessage')}
          open
          onOpenChange={hideModal}
          onOk={confirmSwitch}
--- a/web/src/pages/agent/form/extractor-form/use-switch-prompt.ts
+++ b/web/src/pages/agent/form/extractor-form/use-switch-prompt.ts
@ -21,7 +21,7 @@ export function useSwitchPrompt(form: UseFormReturn<ExtractorFormSchemaType>) {

  const setPromptValue = useCallback(
    (field: keyof ExtractorFormSchemaType, key: string, value: string) => {
-      form.setValue(field, t(`dataflow.prompts.${key}.${value}`), {
+      form.setValue(field, t(`flow.prompts.${key}.${value}`), {
        shouldDirty: true,
        shouldValidate: true,
      });
--- a/web/src/pages/agent/form/hierarchical-merger-form/index.tsx
+++ b/web/src/pages/agent/form/hierarchical-merger-form/index.tsx
@ -98,7 +98,7 @@ export function RegularExpressions({
      </CardHeader>
      <CardContent>
        <FormLabel required className="mb-2 text-text-secondary">
-          {t('dataflow.regularExpressions')}
+          {t('flow.regularExpressions')}
        </FormLabel>
        <section className="space-y-4">
          {fields.map((field, index) => (
@ -158,7 +158,7 @@ const HierarchicalMergerForm = ({ node }: INextOperatorForm) => {
  return (
    <Form {...form}>
      <FormWrapper>
-        <RAGFlowFormItem name={'hierarchy'} label={t('dataflow.hierarchy')}>
+        <RAGFlowFormItem name={'hierarchy'} label={t('flow.hierarchy')}>
          <SelectWithSearch options={HierarchyOptions}></SelectWithSearch>
        </RAGFlowFormItem>
        {fields.map((field, index) => (
--- a/web/src/pages/agent/form/parser-form/common-form-fields.tsx
+++ b/web/src/pages/agent/form/parser-form/common-form-fields.tsx
@ -50,7 +50,7 @@ export function OutputFormatFormField({
  return (
    <RAGFlowFormItem
      name={buildFieldNameWithPrefix(`output_format`, prefix)}
-      label={t('dataflow.outputFormat')}
+      label={t('flow.outputFormat')}
    >
      <SelectWithSearch
        options={buildOutputOptionsFormatMap()[fileType]}
@ -69,7 +69,7 @@ export function ParserMethodFormField({
      name={buildFieldNameWithPrefix(`parse_method`, prefix)}
      horizontal={false}
      optionsWithoutLLM={optionsWithoutLLM}
-      label={t('dataflow.parserMethod')}
+      label={t('flow.parserMethod')}
    ></LayoutRecognizeFormField>
  );
 }
@ -92,7 +92,7 @@ export function LanguageFormField({ prefix }: CommonProps) {
  return (
    <RAGFlowFormItem
      name={buildFieldNameWithPrefix(`lang`, prefix)}
-      label={t('dataflow.lang')}
+      label={t('flow.lang')}
    >
      {(field) => (
        <SelectWithSearch
--- a/web/src/pages/agent/form/parser-form/email-form-fields.tsx
+++ b/web/src/pages/agent/form/parser-form/email-form-fields.tsx
@ -14,7 +14,7 @@ export function EmailFormFields({ prefix }: CommonProps) {
    <>
      <RAGFlowFormItem
        name={buildFieldNameWithPrefix(`fields`, prefix)}
-        label={t('dataflow.fields')}
+        label={t('flow.fields')}
      >
        {(field) => (
          <MultiSelect
--- a/web/src/pages/agent/form/parser-form/image-form-fields.tsx
+++ b/web/src/pages/agent/form/parser-form/image-form-fields.tsx
@ -17,7 +17,7 @@ export function ImageFormFields({ prefix }: CommonProps) {
  const options = buildOptions(
    ImageParseMethod,
    t,
-    'dataflow.imageParseMethodOptions',
+    'flow.imageParseMethodOptions',
  );
  const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);

@ -50,9 +50,9 @@ export function ImageFormFields({ prefix }: CommonProps) {
      {languageShown && (
        <RAGFlowFormItem
          name={buildFieldNameWithPrefix('system_prompt', prefix)}
-          label={t('dataflow.systemPrompt')}
+          label={t('flow.systemPrompt')}
        >
-          <Textarea placeholder={t('dataflow.systemPromptPlaceholder')} />
+          <Textarea placeholder={t('flow.systemPromptPlaceholder')} />
        </RAGFlowFormItem>
      )}
    </>
--- a/web/src/pages/agent/form/parser-form/index.tsx
+++ b/web/src/pages/agent/form/parser-form/index.tsx
@ -133,7 +133,7 @@ function ParserItem({
      </div>
      <RAGFlowFormItem
        name={buildFieldNameWithPrefix(`fileFormat`, prefix)}
-        label={t('dataflow.fileFormats')}
+        label={t('flow.fileFormats')}
      >
        {(field) => (
          <SelectWithSearch
@ -165,7 +165,7 @@ const ParserForm = ({ node }: INextOperatorForm) => {
  const FileFormatOptions = buildOptions(
    FileType,
    t,
-    'dataflow.fileFormatOptions',
+    'flow.fileFormatOptions',
  ).filter(
    (x) => x.value !== FileType.Video, // Temporarily hide the video option
  );
@ -212,7 +212,7 @@ const ParserForm = ({ node }: INextOperatorForm) => {
        })}
        {fields.length < FileFormatOptions.length && (
          <BlockButton onClick={add} type="button" className="mt-2.5">
-            {t('dataflow.addParser')}
+            {t('flow.addParser')}
          </BlockButton>
        )}
      </form>
--- a/web/src/pages/agent/form/retrieval-form/next.tsx
+++ b/web/src/pages/agent/form/retrieval-form/next.tsx
@ -2,6 +2,10 @@ import { Collapse } from '@/components/collapse';
 import { CrossLanguageFormField } from '@/components/cross-language-form-field';
 import { FormContainer } from '@/components/form-container';
 import { KnowledgeBaseFormField } from '@/components/knowledge-base-item';
+import {
+  MetadataFilter,
+  MetadataFilterSchema,
+} from '@/components/metadata-filter';
 import { RAGFlowFormItem } from '@/components/ragflow-form';
 import { RerankFormFields } from '@/components/rerank';
 import { SimilaritySliderFormField } from '@/components/similarity-slider';
@ -41,6 +45,7 @@ export const RetrievalPartialSchema = {
  cross_languages: z.array(z.string()),
  use_kg: z.boolean(),
  toc_enhance: z.boolean(),
+  ...MetadataFilterSchema,
 };

 export const FormSchema = z.object({
@ -118,6 +123,7 @@ function RetrievalForm({ node }: INextOperatorForm) {
            ></SimilaritySliderFormField>
            <TopNFormField></TopNFormField>
            <RerankFormFields></RerankFormFields>
+            <MetadataFilter></MetadataFilter>
            <EmptyResponseField></EmptyResponseField>
            <CrossLanguageFormField name="cross_languages"></CrossLanguageFormField>
            <UseKnowledgeGraphFormField name="use_kg"></UseKnowledgeGraphFormField>
--- a/web/src/pages/agent/form/splitter-form/index.tsx
+++ b/web/src/pages/agent/form/splitter-form/index.tsx
@ -60,7 +60,7 @@ const SplitterForm = ({ node }: INextOperatorForm) => {
          name="overlapped_percent"
          max={30}
          min={0}
-          label={t('dataflow.overlappedPercent')}
+          label={t('flow.overlappedPercent')}
        ></SliderInputFormField>
        <section>
          <span className="mb-2 inline-block">{t('flow.delimiters')}</span>
--- a/web/src/pages/agent/form/tokenizer-form/index.tsx
+++ b/web/src/pages/agent/form/tokenizer-form/index.tsx
@ -38,12 +38,12 @@ const TokenizerForm = ({ node }: INextOperatorForm) => {
  const SearchMethodOptions = buildOptions(
    TokenizerSearchMethod,
    t,
-    `dataflow.tokenizerSearchMethodOptions`,
+    `flow.tokenizerSearchMethodOptions`,
  );
  const FieldsOptions = buildOptions(
    TokenizerFields,
    t,
-    'dataflow.tokenizerFieldsOptions',
+    'flow.tokenizerFieldsOptions',
  );

  const form = useForm<TokenizerFormSchemaType>({
@ -59,8 +59,8 @@ const TokenizerForm = ({ node }: INextOperatorForm) => {
      <FormWrapper>
        <RAGFlowFormItem
          name="search_method"
-          label={t('dataflow.searchMethod')}
-          tooltip={t('dataflow.searchMethodTip')}
+          label={t('flow.searchMethod')}
+          tooltip={t('flow.searchMethodTip')}
        >
          {(field) => (
            <MultiSelect
@ -73,11 +73,11 @@ const TokenizerForm = ({ node }: INextOperatorForm) => {
        </RAGFlowFormItem>
        <SliderInputFormField
          name="filename_embd_weight"
-          label={t('dataflow.filenameEmbeddingWeight')}
+          label={t('flow.filenameEmbeddingWeight')}
          max={0.5}
          step={0.01}
        ></SliderInputFormField>
-        <RAGFlowFormItem name="fields" label={t('dataflow.fields')}>
+        <RAGFlowFormItem name="fields" label={t('flow.fields')}>
          {(field) => <SelectWithSearch options={FieldsOptions} {...field} />}
        </RAGFlowFormItem>
      </FormWrapper>
--- a/web/src/pages/agent/form/tool-form/retrieval-form/index.tsx
+++ b/web/src/pages/agent/form/tool-form/retrieval-form/index.tsx
@ -2,6 +2,7 @@ import { Collapse } from '@/components/collapse';
 import { CrossLanguageFormField } from '@/components/cross-language-form-field';
 import { FormContainer } from '@/components/form-container';
 import { KnowledgeBaseFormField } from '@/components/knowledge-base-item';
+import { MetadataFilter } from '@/components/metadata-filter';
 import { RerankFormFields } from '@/components/rerank';
 import { SimilaritySliderFormField } from '@/components/similarity-slider';
 import { TOCEnhanceFormField } from '@/components/toc-enhance-form-field';
@ -51,6 +52,7 @@ const RetrievalForm = () => {
            ></SimilaritySliderFormField>
            <TopNFormField></TopNFormField>
            <RerankFormFields></RerankFormFields>
+            <MetadataFilter></MetadataFilter>
            <EmptyResponseField></EmptyResponseField>
            <CrossLanguageFormField name="cross_languages"></CrossLanguageFormField>
            <UseKnowledgeGraphFormField name="use_kg"></UseKnowledgeGraphFormField>
--- a/web/src/pages/agent/hooks/use-add-node.ts
+++ b/web/src/pages/agent/hooks/use-add-node.ts
@ -128,8 +128,8 @@ export const useInitializeOperatorParams = () => {
      [Operator.Extractor]: {
        ...initialExtractorValues,
        llm_id: llmId,
-        sys_prompt: t('dataflow.prompts.system.summary'),
-        prompts: t('dataflow.prompts.user.summary'),
+        sys_prompt: t('flow.prompts.system.summary'),
+        prompts: t('flow.prompts.user.summary'),
      },
    };
  }, [llmId]);
--- a/web/src/pages/agent/hooks/use-run-dataflow.ts
+++ b/web/src/pages/agent/hooks/use-run-dataflow.ts
@ -0,0 +1,55 @@
+import message from '@/components/ui/message';
+import { useSendMessageBySSE } from '@/hooks/use-send-message';
+import api from '@/utils/api';
+import { get } from 'lodash';
+import { useCallback, useState } from 'react';
+import { useParams } from 'umi';
+import { UseFetchLogReturnType } from './use-fetch-pipeline-log';
+import { useSaveGraph } from './use-save-graph';
+
+export function useRunDataflow({
+  showLogSheet,
+  setMessageId,
+}: {
+  showLogSheet: () => void;
+} & Pick<UseFetchLogReturnType, 'setMessageId'>) {
+  const { send } = useSendMessageBySSE(api.runCanvas);
+  const { id } = useParams();
+  const { saveGraph, loading } = useSaveGraph();
+  const [uploadedFileData, setUploadedFileData] =
+    useState<Record<string, any>>();
+
+  const run = useCallback(
+    async (fileResponseData: Record<string, any>) => {
+      const saveRet = await saveGraph();
+      const success = saveRet?.code === 0;
+      if (!success) return;
+
+      showLogSheet();
+      const res = await send({
+        id,
+        query: '',
+        session_id: null,
+        files: [fileResponseData.file],
+      });
+
+      if (res && res?.response.status === 200 && get(res, 'data.code') === 0) {
+        // fetch canvas
+        setUploadedFileData(fileResponseData.file);
+        const msgId = get(res, 'data.data.message_id');
+        if (msgId) {
+          setMessageId(msgId);
+        }
+
+        return msgId;
+      } else {
+        message.error(get(res, 'data.message', ''));
+      }
+    },
+    [id, saveGraph, send, setMessageId, setUploadedFileData, showLogSheet],
+  );
+
+  return { run, loading: loading, uploadedFileData };
+}
+
+export type RunDataflowType = ReturnType<typeof useRunDataflow>;
--- a/web/src/pages/agent/hooks/use-show-drawer.tsx
+++ b/web/src/pages/agent/hooks/use-show-drawer.tsx
@ -61,7 +61,7 @@ export const useShowSingleDebugDrawer = () => {
  };
 };

-const ExcludedNodes = [Operator.Note, Operator.Placeholder];
+const ExcludedNodes = [Operator.Note, Operator.Placeholder, Operator.File];

 export function useShowDrawer({
  drawerVisible,
--- a/web/src/pages/agent/index.tsx
+++ b/web/src/pages/agent/index.tsx
@ -32,25 +32,26 @@ import {
  Settings,
  Upload,
 } from 'lucide-react';
-import { ComponentPropsWithoutRef, useCallback, useState } from 'react';
+import { ComponentPropsWithoutRef, useCallback } from 'react';
 import { useTranslation } from 'react-i18next';
 import { useParams } from 'umi';
 import AgentCanvas from './canvas';
 import { DropdownProvider } from './canvas/context';
 import { Operator } from './constant';
-import { PipelineLogContext } from './context';
 import { useCancelCurrentDataflow } from './hooks/use-cancel-dataflow';
 import { useHandleExportJsonFile } from './hooks/use-export-json';
 import { useFetchDataOnMount } from './hooks/use-fetch-data';
 import { useFetchPipelineLog } from './hooks/use-fetch-pipeline-log';
 import { useGetBeginNodeDataInputs } from './hooks/use-get-begin-query';
 import { useIsPipeline } from './hooks/use-is-pipeline';
+import { useRunDataflow } from './hooks/use-run-dataflow';
 import {
  useSaveGraph,
  useSaveGraphBeforeOpeningDebugDrawer,
  useWatchAgentChange,
 } from './hooks/use-save-graph';
 import { PipelineLogSheet } from './pipeline-log-sheet';
+import PipelineRunSheet from './pipeline-run-sheet';
 import { SettingDialog } from './setting-dialog';
 import useGraphStore from './store';
 import { useAgentHistoryManager } from './use-agent-history-manager';
@ -110,6 +111,12 @@ export default function Agent() {

  // pipeline

+  const {
+    visible: pipelineRunSheetVisible,
+    hideModal: hidePipelineRunSheet,
+    showModal: showPipelineRunSheet,
+  } = useSetModalState();
+
  const {
    visible: pipelineLogSheetVisible,
    showModal: showPipelineLogSheet,
@ -126,13 +133,11 @@ export default function Agent() {
    isLogEmpty,
  } = useFetchPipelineLog(pipelineLogSheetVisible);

-  const [uploadedFileData, setUploadedFileData] =
-    useState<Record<string, any>>();
  const findNodeByName = useGraphStore((state) => state.findNodeByName);

  const handleRunPipeline = useCallback(() => {
    if (!findNodeByName(Operator.Tokenizer)) {
-      message.warning(t('dataflow.tokenizerRequired'));
+      message.warning(t('flow.tokenizerRequired'));
      return;
    }

@ -141,14 +146,15 @@ export default function Agent() {
      showPipelineLogSheet();
    } else {
      hidePipelineLogSheet();
-      handleRun();
+      // handleRun();
+      showPipelineRunSheet();
    }
  }, [
    findNodeByName,
-    handleRun,
    hidePipelineLogSheet,
    isParsing,
    showPipelineLogSheet,
+    showPipelineRunSheet,
    t,
  ]);

@ -157,7 +163,7 @@ export default function Agent() {
    stopFetchTrace,
  });

-  const run = useCallback(() => {
+  const handleButtonRunClick = useCallback(() => {
    if (isPipeline) {
      handleRunPipeline();
    } else {
@ -165,6 +171,12 @@ export default function Agent() {
    }
  }, [handleRunAgent, handleRunPipeline, isPipeline]);

+  const {
+    run: runPipeline,
+    loading: pipelineRunning,
+    uploadedFileData,
+  } = useRunDataflow({ showLogSheet: showPipelineLogSheet, setMessageId });
+
  return (
    <section className="h-full">
      <PageHeader>
@ -194,7 +206,7 @@ export default function Agent() {
          >
            <LaptopMinimalCheck /> {t('flow.save')}
          </ButtonLoading>
-          <Button variant={'secondary'} onClick={run}>
+          <Button variant={'secondary'} onClick={handleButtonRunClick}>
            <CirclePlay />
            {t('flow.run')}
          </Button>
@ -241,18 +253,14 @@ export default function Agent() {
          </DropdownMenu>
        </div>
      </PageHeader>
-      <PipelineLogContext.Provider
-        value={{ messageId, setMessageId, setUploadedFileData }}
-      >
-        <ReactFlowProvider>
-          <DropdownProvider>
-            <AgentCanvas
-              drawerVisible={chatDrawerVisible}
-              hideDrawer={hideChatDrawer}
-            ></AgentCanvas>
-          </DropdownProvider>
-        </ReactFlowProvider>
-      </PipelineLogContext.Provider>
+      <ReactFlowProvider>
+        <DropdownProvider>
+          <AgentCanvas
+            drawerVisible={chatDrawerVisible}
+            hideDrawer={hideChatDrawer}
+          ></AgentCanvas>
+        </DropdownProvider>
+      </ReactFlowProvider>
      {embedVisible && (
        <EmbedDialog
          visible={embedVisible}
@ -284,6 +292,13 @@ export default function Agent() {
          uploadedFileData={uploadedFileData}
        ></PipelineLogSheet>
      )}
+      {pipelineRunSheetVisible && (
+        <PipelineRunSheet
+          hideModal={hidePipelineRunSheet}
+          run={runPipeline}
+          loading={pipelineRunning}
+        ></PipelineRunSheet>
+      )}
    </section>
  );
 }
--- a/web/src/pages/agent/pipeline-log-sheet/index.tsx
+++ b/web/src/pages/agent/pipeline-log-sheet/index.tsx
@ -77,7 +77,7 @@ export function PipelineLogSheet({
                    uploadedFileData?.extension,
                })}
              >
-                {t('dataflow.viewResult')} <ArrowUpRight />
+                {t('flow.viewResult')} <ArrowUpRight />
              </Button>
            )}
          </SheetTitle>
@ -95,7 +95,7 @@ export function PipelineLogSheet({
              className="w-full mt-8 bg-state-error/10 text-state-error hover:bg-state-error hover:text-bg-base"
              onClick={handleCancel}
            >
-              <CirclePause /> {t('dataflow.cancel')}
+              <CirclePause /> {t('flow.cancel')}
            </Button>
          ) : (
            <Button
@ -104,7 +104,7 @@ export function PipelineLogSheet({
              className="w-full mt-8 bg-accent-primary-5 text-text-secondary hover:bg-accent-primary-5  hover:text-accent-primary hover:border-accent-primary hover:border"
            >
              <SquareArrowOutUpRight />
-              {t('dataflow.exportJson')}
+              {t('flow.exportJson')}
            </Button>
          )}
        </div>
--- a/web/src/pages/agent/pipeline-run-sheet/index.tsx
+++ b/web/src/pages/agent/pipeline-run-sheet/index.tsx
@ -0,0 +1,31 @@
+import {
+  Sheet,
+  SheetContent,
+  SheetHeader,
+  SheetTitle,
+} from '@/components/ui/sheet';
+import { IModalProps } from '@/interfaces/common';
+import { cn } from '@/lib/utils';
+import { useTranslation } from 'react-i18next';
+import { RunDataflowType } from '../hooks/use-run-dataflow';
+import { UploaderForm } from './uploader';
+
+type RunSheetProps = IModalProps<any> &
+  Pick<RunDataflowType, 'run' | 'loading'>;
+
+const PipelineRunSheet = ({ hideModal, run, loading }: RunSheetProps) => {
+  const { t } = useTranslation();
+
+  return (
+    <Sheet onOpenChange={hideModal} open modal={false}>
+      <SheetContent className={cn('top-20 p-2')}>
+        <SheetHeader>
+          <SheetTitle>{t('flow.testRun')}</SheetTitle>
+          <UploaderForm ok={run} loading={loading}></UploaderForm>
+        </SheetHeader>
+      </SheetContent>
+    </Sheet>
+  );
+};
+
+export default PipelineRunSheet;
--- a/web/src/pages/agent/pipeline-run-sheet/uploader.tsx
+++ b/web/src/pages/agent/pipeline-run-sheet/uploader.tsx
@ -0,0 +1,57 @@
+'use client';
+
+import { z } from 'zod';
+
+import { RAGFlowFormItem } from '@/components/ragflow-form';
+import { ButtonLoading } from '@/components/ui/button';
+import { Form } from '@/components/ui/form';
+import { FileUploadDirectUpload } from '@/pages/agent/debug-content/uploader';
+import { zodResolver } from '@hookform/resolvers/zod';
+import { useForm } from 'react-hook-form';
+import { useTranslation } from 'react-i18next';
+
+const formSchema = z.object({
+  file: z.record(z.any()),
+});
+
+export type FormSchemaType = z.infer<typeof formSchema>;
+
+type UploaderFormProps = {
+  ok: (values: FormSchemaType) => void;
+  loading: boolean;
+};
+
+export function UploaderForm({ ok, loading }: UploaderFormProps) {
+  const { t } = useTranslation();
+  const form = useForm<FormSchemaType>({
+    resolver: zodResolver(formSchema),
+    defaultValues: {},
+  });
+
+  return (
+    <Form {...form}>
+      <form onSubmit={form.handleSubmit(ok)} className="space-y-8">
+        <RAGFlowFormItem name="file">
+          {(field) => {
+            return (
+              <FileUploadDirectUpload
+                value={field.value}
+                onChange={field.onChange}
+              ></FileUploadDirectUpload>
+            );
+          }}
+        </RAGFlowFormItem>
+
+        <div>
+          <ButtonLoading
+            type="submit"
+            loading={loading}
+            className="w-full mt-1"
+          >
+            {t('flow.run')}
+          </ButtonLoading>
+        </div>
+      </form>
+    </Form>
+  );
+}
--- a/web/src/pages/agent/store.ts
+++ b/web/src/pages/agent/store.ts
@ -89,6 +89,7 @@ export type RFState = {
  ) => void; // Deleting a condition of a classification operator will delete the related edge
  findAgentToolNodeById: (id: string | null) => string | undefined;
  selectNodeIds: (nodeIds: string[]) => void;
+  hasChildNode: (nodeId: string) => boolean;
 };

 // this is our useStore hook that we can use in our components to get parts of the store and call actions
@ -527,6 +528,10 @@ const useGraphStore = create<RFState>()(
          })),
        );
      },
+      hasChildNode: (nodeId) => {
+        const { edges } = get();
+        return edges.some((edge) => edge.source === nodeId);
+      },
    })),
    { name: 'graph', trace: true },
  ),
--- a/web/src/pages/agent/utils.ts
+++ b/web/src/pages/agent/utils.ts
@ -9,16 +9,30 @@ import { removeUselessFieldsFromValues } from '@/utils/form';
 import { Edge, Node, XYPosition } from '@xyflow/react';
 import { FormInstance, FormListFieldData } from 'antd';
 import { humanId } from 'human-id';
-import { curry, get, intersectionWith, isEqual, omit, sample } from 'lodash';
+import {
+  curry,
+  get,
+  intersectionWith,
+  isEmpty,
+  isEqual,
+  omit,
+  sample,
+} from 'lodash';
 import pipe from 'lodash/fp/pipe';
 import isObject from 'lodash/isObject';
 import {
  CategorizeAnchorPointPositions,
+  FileType,
+  FileTypeSuffixMap,
  NoCopyOperatorsList,
  NoDebugOperatorsList,
  NodeHandleId,
  Operator,
 } from './constant';
+import { ExtractorFormSchemaType } from './form/extractor-form';
+import { HierarchicalMergerFormSchemaType } from './form/hierarchical-merger-form';
+import { ParserFormSchemaType } from './form/parser-form';
+import { SplitterFormSchemaType } from './form/splitter-form';
 import { BeginQuery, IPosition } from './interface';

 function buildAgentExceptionGoto(edges: Edge[], nodeId: string) {
@ -170,6 +184,92 @@ export function hasSubAgent(edges: Edge[], nodeId?: string) {
  return !!edge;
 }

+// Because the array of react-hook-form must be object data,
+// it needs to be converted into a simple data type array required by the backend
+function transformObjectArrayToPureArray(
+  list: Array<Record<string, any>>,
+  field: string,
+) {
+  return Array.isArray(list)
+    ? list.filter((x) => !isEmpty(x[field])).map((y) => y[field])
+    : [];
+}
+
+function transformParserParams(params: ParserFormSchemaType) {
+  const setups = params.setups.reduce<
+    Record<string, ParserFormSchemaType['setups'][0]>
+  >((pre, cur) => {
+    if (cur.fileFormat) {
+      let filteredSetup: Partial<
+        ParserFormSchemaType['setups'][0] & { suffix: string[] }
+      > = {
+        output_format: cur.output_format,
+        suffix: FileTypeSuffixMap[cur.fileFormat as FileType],
+      };
+
+      switch (cur.fileFormat) {
+        case FileType.PDF:
+          filteredSetup = {
+            ...filteredSetup,
+            parse_method: cur.parse_method,
+            lang: cur.lang,
+          };
+          break;
+        case FileType.Image:
+          filteredSetup = {
+            ...filteredSetup,
+            parse_method: cur.parse_method,
+            lang: cur.lang,
+            system_prompt: cur.system_prompt,
+          };
+          break;
+        case FileType.Email:
+          filteredSetup = {
+            ...filteredSetup,
+            fields: cur.fields,
+          };
+          break;
+        case FileType.Video:
+        case FileType.Audio:
+          filteredSetup = {
+            ...filteredSetup,
+            llm_id: cur.llm_id,
+          };
+          break;
+        default:
+          break;
+      }
+
+      pre[cur.fileFormat] = filteredSetup;
+    }
+    return pre;
+  }, {});
+
+  return { ...params, setups };
+}
+
+function transformSplitterParams(params: SplitterFormSchemaType) {
+  return {
+    ...params,
+    overlapped_percent: Number(params.overlapped_percent) / 100,
+    delimiters: transformObjectArrayToPureArray(params.delimiters, 'value'),
+  };
+}
+
+function transformHierarchicalMergerParams(
+  params: HierarchicalMergerFormSchemaType,
+) {
+  const levels = params.levels.map((x) =>
+    transformObjectArrayToPureArray(x.expressions, 'expression'),
+  );
+
+  return { ...params, hierarchy: Number(params.hierarchy), levels };
+}
+
+function transformExtractorParams(params: ExtractorFormSchemaType) {
+  return { ...params, prompts: [{ content: params.prompts, role: 'user' }] };
+}
+
 // construct a dsl based on the node information of the graph
 export const buildDslComponentsByGraph = (
  nodes: RAGFlowNodeType[],
@ -202,6 +302,21 @@ export const buildDslComponentsByGraph = (
          params = buildCategorize(edges, nodes, id);
          break;

+        case Operator.Parser:
+          params = transformParserParams(params);
+          break;
+
+        case Operator.Splitter:
+          params = transformSplitterParams(params);
+          break;
+
+        case Operator.HierarchicalMerger:
+          params = transformHierarchicalMergerParams(params);
+          break;
+        case Operator.Extractor:
+          params = transformExtractorParams(params);
+          break;
+
        default:
          break;
      }
--- a/web/src/pages/dataflow-result/interface.ts
+++ b/web/src/pages/dataflow-result/interface.ts
@ -148,6 +148,6 @@ export interface NavigateToDataflowResultProps {
  [PipelineResultSearchParams.AgentTitle]?: string;
  [PipelineResultSearchParams.IsReadOnly]?: string;
  [PipelineResultSearchParams.Type]: string;
-  [PipelineResultSearchParams.CreatedBy]: string;
-  [PipelineResultSearchParams.DocumentExtension]: string;
+  [PipelineResultSearchParams.CreatedBy]?: string;
+  [PipelineResultSearchParams.DocumentExtension]?: string;
 }
--- a/web/src/pages/dataset/dataset-overview/overview-table.tsx
+++ b/web/src/pages/dataset/dataset-overview/overview-table.tsx
@ -311,7 +311,6 @@ const FileLogsTable: FC<FileLogsTableProps> = ({
  data,
  pagination,
  setPagination,
-  loading,
  active = LogTabs.FILE_LOGS,
 }) => {
  const [sorting, setSorting] = useState<SortingState>([]);
@ -328,13 +327,13 @@ const FileLogsTable: FC<FileLogsTableProps> = ({
      fileName: row.original.document_name,
      source: row.original.source_from,
      task: row.original?.task_type,
-      status: row.original.statusName,
+      status: row.original.status as RunningStatus,
      startDate: formatDate(row.original.process_begin_at),
      duration: formatSecondsToHumanReadable(
        row.original.process_duration || 0,
      ),
      details: row.original.progress_msg,
-    };
+    } as unknown as IFileLogItem;
    console.log('logDetail', logDetail);
    setLogInfo(logDetail);
    setIsModalVisible(true);
Author	SHA1	Message	Date
Yongteng Lei	cd77425b87	Fix: potential negative max_tokens in RAPTOR (#10701 ) ### What problem does this PR solve? Fix potential negative max_tokens in RAPTOR. #10235. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue	2025-10-21 15:49:51 +08:00
balibabu	544c9990e3	Feat: Move the pipeline translation field to flow #9869 (#10697 ) ### What problem does this PR solve? Feat: Move the pipeline translation field to flow #9869 ### Type of change - [X] New Feature (non-breaking change which adds functionality)	2025-10-21 15:23:37 +08:00
balibabu	41a647fe32	Feat: A pipeline's child node can only have one node #9869 (#10695 ) ### What problem does this PR solve? Feat: A pipeline's child node can only have one node #9869 ### Type of change - [x] New Feature (non-breaking change which adds functionality)	2025-10-21 13:55:46 +08:00
Liu An	594bf485d4	Test: update test cases for chunk retrieval pagination (#10694 ) ### What problem does this PR solve? Updated test cases in test_retrieval_chunks.py to: - Remove skip mark from page pagination test case (issues/6646 resolved) - Add skip marks for page_size=1 tests due to new issue (issues/10692) ### Type of change - [x] Test	2025-10-21 13:02:29 +08:00
Billy Bao	863c3e3d9c	Fix: tree merge (#10691 ) ### What problem does this PR solve? Fix: Fix tree merge, solved #10636 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)	2025-10-21 13:02:01 +08:00
balibabu	1767039be3	Feat: Display the pipeline operation sheet on the agent page #9869 (#10690 ) ### What problem does this PR solve? Feat: Display the pipeline operation sheet on the agent page #9869 ### Type of change - [x] New Feature (non-breaking change which adds functionality)	2025-10-21 12:59:30 +08:00
Billy Bao	cd75fa02b1	Feat: Make knowledge base renaming automatically reflected in agent discussions, solved #10597 (#10680 ) ### What problem does this PR solve? Feat: Make knowledge base renaming automatically reflected in agent discussions, solved #10597 ### Type of change - [x] New Feature (non-breaking change which adds functionality)	2025-10-21 10:42:05 +08:00
Billy Bao	cfdd37820a	Feat: Support attribute filtering #8703 (#10670 ) ### What problem does this PR solve? Feat: Support attribute filtering #8703 ### Type of change - [X] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> Co-authored-by: writinwaters <cai.keith@gmail.com>	2025-10-21 10:38:40 +08:00
Stephen Hu	9d12380806	Fix: Excel2HTML can't support XLS（Excel 97-2003） (#10660 ) ### What problem does this PR solve? https://github.com/infiniflow/ragflow/issues/10602 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)	2025-10-21 09:52:59 +08:00
buua436	866098634b	Feat:setting metadata in the retrieval (#10682 ) ### What problem does this PR solve? issue: [#9272](https://github.com/infiniflow/ragflow/issues/9272) change: setting metadata in the retrieval ### Type of change - [x] New Feature (non-breaking change which adds functionality)	2025-10-21 09:52:26 +08:00
chanx	8013505daf	Fix(edit-tag): Fix the bug that the edit-tag tag cannot be deleted #9869 (#10679 ) ### What problem does this PR solve? fix(edit-tag): Fix the bug that the edit-tag tag cannot be deleted #9869 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)	2025-10-21 09:38:36 +08:00
Jin Hai	deb81810e9	Update message printout when start ingestion server (#10677 ) ### What problem does this PR solve? ``` ____ __ _ / _/ ____ ____ _ ___ _____ / /_ (_) ____ ____ _____ ___ _____ _ __ ___ _____ / / / __ \ / __ `/ / _ \ / ___/ / __/ / / / __ \ / __ \ / ___/ / _ \ / ___/\| \| / / / _ \ / ___/ _/ / / / / / / /_/ / / __/ (__ ) / /_ / / / /_/ / / / / / (__ ) / __/ / / \| \|/ / / __/ / / /___/ /_/ /_/ \__, / \___/ /____/ \__/ /_/ \____/ /_/ /_/ /____/ \___/ /_/ \|___/ \___/ /_/ /____/ ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>	2025-10-21 09:38:20 +08:00
buua436	6ab96287c9	Feat:Vision Model Image Enhancement in Manual/Paper/Book/One chunker (#10640 ) ### What problem does this PR solve? issue: [#7472](https://github.com/infiniflow/ragflow/issues/7472) change: Vision Model Image Enhancement in Manual chunker ### Type of change - [x] New Feature (non-breaking change which adds functionality)	2025-10-21 09:36:27 +08:00
Yongteng Lei	aaa4776657	Feat: Qwen-VL series supports video parsing (#10676 ) ### What problem does this PR solve? Qwen-VL series supports video parsing. #10617. ### Type of change - [x] New Feature (non-breaking change which adds functionality)	2025-10-21 09:36:13 +08:00