From 6c93157b148efd08cb349a47f2acfead961e5896 Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Tue, 23 Dec 2025 19:51:01 +0800 Subject: [PATCH] Refa: image table context window (#12132) ### What problem does this PR solve? Image table context window ### Type of change - [x] Refactoring --- web/src/components/chunk-method-dialog/index.tsx | 2 +- .../chunk-method-dialog/use-default-parser-values.ts | 2 +- web/src/locales/de.ts | 3 +++ web/src/locales/en.ts | 4 ++-- web/src/locales/fr.ts | 3 +++ web/src/locales/id.ts | 3 +++ web/src/locales/it.ts | 3 +++ web/src/locales/ja.ts | 3 +++ web/src/locales/pt-br.ts | 3 +++ web/src/locales/ru.ts | 3 +++ web/src/locales/vi.ts | 3 +++ web/src/locales/zh-traditional.ts | 3 +++ web/src/locales/zh.ts | 3 +++ .../dataset/dataset-setting/configuration/common-item.tsx | 6 +++--- web/src/pages/dataset/dataset-setting/form-schema.ts | 2 +- web/src/pages/dataset/dataset-setting/index.tsx | 2 +- web/src/pages/dataset/dataset-setting/saving-button.tsx | 4 ++++ 17 files changed, 43 insertions(+), 9 deletions(-) diff --git a/web/src/components/chunk-method-dialog/index.tsx b/web/src/components/chunk-method-dialog/index.tsx index b14d812a8..f9af844fc 100644 --- a/web/src/components/chunk-method-dialog/index.tsx +++ b/web/src/components/chunk-method-dialog/index.tsx @@ -120,7 +120,7 @@ export function ChunkMethodDialog({ auto_questions: z.coerce.number().optional(), html4excel: z.boolean().optional(), toc_extraction: z.boolean().optional(), - image_context_window: z.coerce.number().optional(), + image_table_context_window: z.coerce.number().optional(), mineru_parse_method: z.enum(['auto', 'txt', 'ocr']).optional(), mineru_formula_enable: z.boolean().optional(), mineru_table_enable: z.boolean().optional(), diff --git a/web/src/components/chunk-method-dialog/use-default-parser-values.ts b/web/src/components/chunk-method-dialog/use-default-parser-values.ts index ac89330ce..a82a32621 100644 --- a/web/src/components/chunk-method-dialog/use-default-parser-values.ts +++ b/web/src/components/chunk-method-dialog/use-default-parser-values.ts @@ -18,7 +18,7 @@ export function useDefaultParserValues() { auto_questions: 0, html4excel: false, toc_extraction: false, - image_context_window: 0, + image_table_context_window: 0, mineru_parse_method: 'auto', mineru_formula_enable: true, mineru_table_enable: true, diff --git a/web/src/locales/de.ts b/web/src/locales/de.ts index 69b659586..f7a812ec5 100644 --- a/web/src/locales/de.ts +++ b/web/src/locales/de.ts @@ -208,6 +208,9 @@ export default { plainText: 'Einfach', }, knowledgeConfiguration: { + imageTableContextWindow: 'Kontextfenster für Bild und Tabelle', + imageTableContextWindowTip: + 'Erfasst N Token Text ober- und unterhalb von Bild und Tabelle, um reicheren Kontext bereitzustellen.', titleDescription: 'Aktualisieren Sie hier Ihre Wissensdatenbank-Konfiguration, insbesondere die Chunk-Methode.', name: 'Name der Wissensdatenbank', diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 0d2ceb1d0..bdc4e600c 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -371,8 +371,8 @@ Procedural Memory: Learned skills, habits, and automated procedures.`, globalIndexModel: 'Indexing model', settings: 'Settings', autoMetadataTip: `Automatically generate metadata. Applies to new files during parsing. Existing files require re-parsing to update (chunks remain preserved). Be aware that extra tokens will be consumed by the indexing model specified in 'Configuration'.`, - imageContextWindow: 'Image & table context window', - imageContextWindowTip: + imageTableContextWindow: 'Image & table context window', + imageTableContextWindowTip: 'Captures N tokens of text above and below the image & table to provide richer background context.', autoMetadata: 'Auto metadata', mineruOptions: 'MinerU Options', diff --git a/web/src/locales/fr.ts b/web/src/locales/fr.ts index 99249a1f6..fb69d047d 100644 --- a/web/src/locales/fr.ts +++ b/web/src/locales/fr.ts @@ -196,6 +196,9 @@ export default { knowledgeConfiguration: { titleDescription: 'Modifiez ici la configuration de votre base de connaissances, notamment la méthode de découpage.', + imageTableContextWindow: 'Fenêtre de contexte image & tableau', + imageTableContextWindowTip: + "Capture N jetons de texte au-dessus et au-dessous de l'image et du tableau pour fournir un contexte plus riche.", name: 'Nom de la base de connaissances', photo: 'Photo de la base de connaissances', photoTip: 'Vous pouvez téléverser un fichier de 4 Mo', diff --git a/web/src/locales/id.ts b/web/src/locales/id.ts index f529f8524..11f78f819 100644 --- a/web/src/locales/id.ts +++ b/web/src/locales/id.ts @@ -163,6 +163,9 @@ export default { html4excelTip: `Gunakan bersama dengan metode pemotongan General. Ketika dinonaktifkan, file spreadsheet (XLSX, XLS (Excel 97-2003)) akan dianalisis baris demi baris menjadi pasangan kunci-nilai. Ketika diaktifkan, file spreadsheet akan dianalisis menjadi tabel HTML. Jika tabel asli memiliki lebih dari 12 baris, sistem akan secara otomatis membagi menjadi beberapa tabel HTML setiap 12 baris. Untuk informasi lebih lanjut, lihat https://ragflow.io/docs/dev/enable_excel2html.`, }, knowledgeConfiguration: { + imageTableContextWindow: 'Jendela konteks gambar & tabel', + imageTableContextWindowTip: + 'Mengambil N token teks di atas dan di bawah gambar dan tabel untuk memberikan konteks latar yang lebih kaya.', titleDescription: 'Perbarui detail basis pengetahuan Anda terutama metode parsing di sini.', name: 'Nama basis pengetahuan', diff --git a/web/src/locales/it.ts b/web/src/locales/it.ts index 7e99681ae..564c93953 100644 --- a/web/src/locales/it.ts +++ b/web/src/locales/it.ts @@ -291,6 +291,9 @@ export default { reRankModelWaring: 'Il modello re-rank richiede molto tempo.', }, knowledgeConfiguration: { + imageTableContextWindow: 'Finestra di contesto immagine e tabella', + imageTableContextWindowTip: + "Cattura N token di testo sopra e sotto l'immagine e la tabella per fornire un contesto più ricco.", generationScopeTip: "Determina se RAPTOR viene generato per l'intero dataset o per un singolo file.", scopeDataset: 'Dataset', diff --git a/web/src/locales/ja.ts b/web/src/locales/ja.ts index 7d139a27b..79e7fbc56 100644 --- a/web/src/locales/ja.ts +++ b/web/src/locales/ja.ts @@ -171,6 +171,9 @@ export default { autoQuestionsTip: `ランキングスコアを向上させるために、「システムモデル設定」で定義されたチャットモデルを使用して、ナレッジベースのチャンクごとにN個の質問を抽出します。 これにより、追加のトークンが消費されることに注意してください。 結果はチャンクリストで表示および編集できます。 質問抽出エラーはチャンク処理をブロックしません。空の結果が元のチャンクに追加されます。詳細は https://ragflow.io/docs/dev/autokeyword_autoquestion をご覧ください。`, }, knowledgeConfiguration: { + imageTableContextWindow: '画像・表コンテキストウィンドウ', + imageTableContextWindowTip: + '画像と表の上下のテキストをNトークン取得し、より豊かな背景コンテキストを提供します。', titleDescription: 'ナレッジベースの設定、特にチャンク方法をここで更新してください。', name: 'ナレッジベース名', diff --git a/web/src/locales/pt-br.ts b/web/src/locales/pt-br.ts index 8593cf268..5cb3b1991 100644 --- a/web/src/locales/pt-br.ts +++ b/web/src/locales/pt-br.ts @@ -201,6 +201,9 @@ export default { metaData: 'Metadados', }, knowledgeConfiguration: { + imageTableContextWindow: 'Janela de contexto de imagem e tabela', + imageTableContextWindowTip: + 'Captura N tokens de texto acima e abaixo da imagem e da tabela para fornecer um contexto de fundo mais rico.', titleDescription: 'Atualize a configuração da sua base de conhecimento aqui, especialmente o método de fragmentação.', name: 'Nome da base de conhecimento', diff --git a/web/src/locales/ru.ts b/web/src/locales/ru.ts index cf10bf73c..e2855d487 100644 --- a/web/src/locales/ru.ts +++ b/web/src/locales/ru.ts @@ -277,6 +277,9 @@ export default { reRankModelWaring: 'Re-rank модель очень требовательна ко времени.', }, knowledgeConfiguration: { + imageTableContextWindow: 'Окно контекста изображения и таблицы', + imageTableContextWindowTip: + 'Захватывает N токенов текста выше и ниже изображения и таблицы, чтобы дать более богатый контекст.', generationScopeTip: 'Определяет, генерируется ли RAPTOR для всего набора данных или для одного файла.', scopeDataset: 'Датасет', diff --git a/web/src/locales/vi.ts b/web/src/locales/vi.ts index a506babf9..ad1c059dc 100644 --- a/web/src/locales/vi.ts +++ b/web/src/locales/vi.ts @@ -182,6 +182,9 @@ export default { documentMetaTips: `

Dữ liệu meta ở định dạng Json (không thể tìm kiếm). Nó sẽ được thêm vào prompt cho LLM nếu bất kỳ đoạn nào của tài liệu này được đưa vào prompt.

Ví dụ:

Dữ liệu meta là:
{ "Author": "Alex Dowson", "Date": "2024-11-12" }
Prompt sẽ là:

Tài liệu: the_name_of_document

Tác giả: Alex Dowson

Ngày: 2024-11-12

Các đoạn liên quan như sau:

`, }, knowledgeConfiguration: { + imageTableContextWindow: 'Cửa sổ ngữ cảnh hình ảnh & bảng', + imageTableContextWindowTip: + 'Trích xuất N token văn bản phía trên và phía dưới hình ảnh và bảng để cung cấp bối cảnh phong phú hơn.', titleDescription: 'Cập nhật cấu hình cơ sở kiến thức của bạn tại đây, đặc biệt là phương thức phân khối.', name: 'Tên cơ sở kiến thức', diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index 0eaf5f436..cae1b91c5 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -203,6 +203,9 @@ export default { }, knowledgeConfiguration: { titleDescription: '在這裡更新您的知識庫詳細信息,尤其是切片方法。', + imageTableContextWindow: '影像與表格上下文視窗', + imageTableContextWindowTip: + '擷取影像與表格上下方的 N 個 token,為該 chunk 提供更豐富的背景上下文。', name: '知識庫名稱', photo: '知識庫圖片', photoTip: '你可以上傳4MB的文件', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 8af883ee7..66b4e57a4 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -345,6 +345,9 @@ export default { }, knowledgeConfiguration: { settings: '设置', + imageTableContextWindow: '图像与表格上下文窗口', + imageTableContextWindowTip: + '抓取图像与表格上下方的 N 个 token,为该 chunk 提供更丰富的背景上下文。', autoMetadata: '自动元数据', mineruOptions: 'MinerU 选项', mineruParseMethod: '解析方法', diff --git a/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx b/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx index 8599af56d..ef78724df 100644 --- a/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx +++ b/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx @@ -324,14 +324,14 @@ export function ImageContextWindow() { return ( (