From f540559c419381a2dbca91293ec394d0bd8695d4 Mon Sep 17 00:00:00 2001 From: writinwaters <93570324+writinwaters@users.noreply.github.com> Date: Tue, 18 Mar 2025 19:49:06 +0800 Subject: [PATCH] Miscellaneous updates (#6245) ### What problem does this PR solve? ### Type of change - [x] Documentation Update --- README.md | 7 ++++-- README_id.md | 9 ++++--- README_ja.md | 7 ++++-- README_ko.md | 7 ++++-- README_pt_br.md | 11 +++++---- README_tzh.md | 7 ++++-- README_zh.md | 7 ++++-- .../agent_component_reference/retrieval.mdx | 13 +++++++--- .../dataset/configure_knowledge_base.md | 24 +++++++++---------- docs/guides/dataset/use_tag_sets.md | 4 ++++ docs/guides/manage_team_members.md | 6 +++++ docs/release_notes.md | 8 +++---- web/src/locales/de.ts | 4 ++-- web/src/locales/en.ts | 6 ++--- web/src/locales/id.ts | 2 +- web/src/locales/ja.ts | 4 ++-- web/src/locales/pt-br.ts | 4 ++-- web/src/locales/vi.ts | 4 ++-- web/src/locales/zh-traditional.ts | 4 ++-- web/src/locales/zh.ts | 11 +++++---- 20 files changed, 94 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 3128e2b29..20407d16f 100644 --- a/README.md +++ b/README.md @@ -353,9 +353,12 @@ docker build -f Dockerfile -t infiniflow/ragflow:nightly . ## 📚 Documentation - [Quickstart](https://ragflow.io/docs/dev/) -- [User guide](https://ragflow.io/docs/dev/category/guides) +- [Configuration](https://ragflow.io/docs/dev/configurations) +- [Release notes](https://ragflow.io/docs/dev/release_notes) +- [User guides](https://ragflow.io/docs/dev/category/guides) +- [Developer guides](https://ragflow.io/docs/dev/category/developers) - [References](https://ragflow.io/docs/dev/category/references) -- [FAQ](https://ragflow.io/docs/dev/faq) +- [FAQs](https://ragflow.io/docs/dev/faq) ## 📜 Roadmap diff --git a/README_id.md b/README_id.md index d920052d2..c6adef8c2 100644 --- a/README_id.md +++ b/README_id.md @@ -319,9 +319,12 @@ docker build -f Dockerfile -t infiniflow/ragflow:nightly . ## 📚 Dokumentasi - [Quickstart](https://ragflow.io/docs/dev/) -- [Panduan Pengguna](https://ragflow.io/docs/dev/category/guides) -- [Referensi](https://ragflow.io/docs/dev/category/references) -- [FAQ](https://ragflow.io/docs/dev/faq) +- [Configuration](https://ragflow.io/docs/dev/configurations) +- [Release notes](https://ragflow.io/docs/dev/release_notes) +- [User guides](https://ragflow.io/docs/dev/category/guides) +- [Developer guides](https://ragflow.io/docs/dev/category/developers) +- [References](https://ragflow.io/docs/dev/category/references) +- [FAQs](https://ragflow.io/docs/dev/faq) ## 📜 Roadmap diff --git a/README_ja.md b/README_ja.md index 4bbb82bc0..372e566fa 100644 --- a/README_ja.md +++ b/README_ja.md @@ -315,9 +315,12 @@ docker build -f Dockerfile -t infiniflow/ragflow:nightly . ## 📚 ドキュメンテーション - [Quickstart](https://ragflow.io/docs/dev/) -- [User guide](https://ragflow.io/docs/dev/category/guides) +- [Configuration](https://ragflow.io/docs/dev/configurations) +- [Release notes](https://ragflow.io/docs/dev/release_notes) +- [User guides](https://ragflow.io/docs/dev/category/guides) +- [Developer guides](https://ragflow.io/docs/dev/category/developers) - [References](https://ragflow.io/docs/dev/category/references) -- [FAQ](https://ragflow.io/docs/dev/faq) +- [FAQs](https://ragflow.io/docs/dev/faq) ## 📜 ロードマップ diff --git a/README_ko.md b/README_ko.md index 05da534a2..b316ea568 100644 --- a/README_ko.md +++ b/README_ko.md @@ -315,9 +315,12 @@ docker build -f Dockerfile -t infiniflow/ragflow:nightly . ## 📚 문서 - [Quickstart](https://ragflow.io/docs/dev/) -- [User guide](https://ragflow.io/docs/dev/category/guides) +- [Configuration](https://ragflow.io/docs/dev/configurations) +- [Release notes](https://ragflow.io/docs/dev/release_notes) +- [User guides](https://ragflow.io/docs/dev/category/guides) +- [Developer guides](https://ragflow.io/docs/dev/category/developers) - [References](https://ragflow.io/docs/dev/category/references) -- [FAQ](https://ragflow.io/docs/dev/faq) +- [FAQs](https://ragflow.io/docs/dev/faq) ## 📜 로드맵 diff --git a/README_pt_br.md b/README_pt_br.md index 874bd5d37..6477150b0 100644 --- a/README_pt_br.md +++ b/README_pt_br.md @@ -339,10 +339,13 @@ docker build -f Dockerfile -t infiniflow/ragflow:nightly . ## 📚 Documentação -- [Início rápido](https://ragflow.io/docs/dev/) -- [Guia do usuário](https://ragflow.io/docs/dev/category/guides) -- [Referências](https://ragflow.io/docs/dev/category/references) -- [FAQ](https://ragflow.io/docs/dev/faq) +- [Quickstart](https://ragflow.io/docs/dev/) +- [Configuration](https://ragflow.io/docs/dev/configurations) +- [Release notes](https://ragflow.io/docs/dev/release_notes) +- [User guides](https://ragflow.io/docs/dev/category/guides) +- [Developer guides](https://ragflow.io/docs/dev/category/developers) +- [References](https://ragflow.io/docs/dev/category/references) +- [FAQs](https://ragflow.io/docs/dev/faq) ## 📜 Roadmap diff --git a/README_tzh.md b/README_tzh.md index fcedea901..4daf4a023 100644 --- a/README_tzh.md +++ b/README_tzh.md @@ -329,9 +329,12 @@ npm install ## 📚 技術文檔 - [Quickstart](https://ragflow.io/docs/dev/) -- [User guide](https://ragflow.io/docs/dev/category/guides) +- [Configuration](https://ragflow.io/docs/dev/configurations) +- [Release notes](https://ragflow.io/docs/dev/release_notes) +- [User guides](https://ragflow.io/docs/dev/category/guides) +- [Developer guides](https://ragflow.io/docs/dev/category/developers) - [References](https://ragflow.io/docs/dev/category/references) -- [FAQ](https://ragflow.io/docs/dev/faq) +- [FAQs](https://ragflow.io/docs/dev/faq) ## 📜 路線圖 diff --git a/README_zh.md b/README_zh.md index a4c5e7d15..a8b71dd53 100644 --- a/README_zh.md +++ b/README_zh.md @@ -328,9 +328,12 @@ docker build --build-arg NEED_MIRROR=1 -f Dockerfile -t infiniflow/ragflow:night ## 📚 技术文档 - [Quickstart](https://ragflow.io/docs/dev/) -- [User guide](https://ragflow.io/docs/dev/category/guides) +- [Configuration](https://ragflow.io/docs/dev/configurations) +- [Release notes](https://ragflow.io/docs/dev/release_notes) +- [User guides](https://ragflow.io/docs/dev/category/guides) +- [Developer guides](https://ragflow.io/docs/dev/category/developers) - [References](https://ragflow.io/docs/dev/category/references) -- [FAQ](https://ragflow.io/docs/dev/faq) +- [FAQs](https://ragflow.io/docs/dev/faq) ## 📜 路线图 diff --git a/docs/guides/agent/agent_component_reference/retrieval.mdx b/docs/guides/agent/agent_component_reference/retrieval.mdx index 790a029c7..c80aabbe3 100644 --- a/docs/guides/agent/agent_component_reference/retrieval.mdx +++ b/docs/guides/agent/agent_component_reference/retrieval.mdx @@ -51,18 +51,25 @@ If a rerank model is selected, a combination of weighted keyword similarity and Using a rerank model will *significantly* increase the system's response time. ::: +### Tavily API key + +If an API key is correctly set here, Tavily-based web searches will be used to supplement knowledge base retrieval. + +### Use knowledge graph + +It will retrieve descriptions of relevant entities,relations and community reports, which will enhance inference of multi-hop and complex question. ### Knowledge bases -*Required* - -You are required to select the knowledge base(s) to retrieve data from. +Select the knowledge base(s) to retrieve data from. :::danger IMPORTANT If you select multiple knowledge bases, you must ensure that the knowledge bases (datasets) you select use the same embedding model; otherwise, an error message would occur. ::: +### Empty response +Set this as a response if no results are retrieved from the knowledge bases for your query, or leave this field blank to allow the LLM to improvise when nothing is found. ## Examples diff --git a/docs/guides/dataset/configure_knowledge_base.md b/docs/guides/dataset/configure_knowledge_base.md index cff9f9984..ce9a19564 100644 --- a/docs/guides/dataset/configure_knowledge_base.md +++ b/docs/guides/dataset/configure_knowledge_base.md @@ -39,18 +39,18 @@ This section covers the following topics: RAGFlow offers multiple chunking template to facilitate chunking files of different layouts and ensure semantic integrity. In **Chunk method**, you can choose the default template that suits the layouts and formats of your files. The following table shows the descriptions and the compatible file formats of each supported chunk template: -| **Template** | Description | File format | -|--------------|-----------------------------------------------------------------------|------------------------------------------------------------------------------| -| General | Files are consecutively chunked based on a preset chunk token number. | DOCX, XLSX, XLS (Excel97~2003), PPT, PDF, TXT, JPEG, JPG, PNG, TIF, GIF, CSV | -| Q&A | | XLSX, XLS (Excel97~2003), CSV/TXT | -| Manual | | PDF | -| Table | | XLSX, XLS (Excel97~2003), CSV/TXT | -| Paper | | PDF | -| Book | | DOCX, PDF, TXT | -| Laws | | DOCX, PDF, TXT | -| Presentation | | PDF, PPTX | -| Picture | | JPEG, JPG, PNG, TIF, GIF | -| One | The entire document is chunked as one. | DOCX, XLSX, XLS (Excel97~2003), PDF, TXT | +| **Template** | Description | File format | +|--------------|-----------------------------------------------------------------------|-----------------------------------------------------------------------------------------------| +| General | Files are consecutively chunked based on a preset chunk token number. | DOCX, XLSX, XLS (Excel97~2003), PPT, PDF, TXT, JPEG, JPG, PNG, TIF, GIF, CSV, JSON, EML, HTML | +| Q&A | | XLSX, XLS (Excel97~2003), CSV/TXT | +| Manual | | PDF | +| Table | | XLSX, XLS (Excel97~2003), CSV/TXT | +| Paper | | PDF | +| Book | | DOCX, PDF, TXT | +| Laws | | DOCX, PDF, TXT | +| Presentation | | PDF, PPTX | +| Picture | | JPEG, JPG, PNG, TIF, GIF | +| One | The entire document is chunked as one. | DOCX, XLSX, XLS (Excel97~2003), PDF, TXT | You can also change a file's chunk method on the **Datasets** page. diff --git a/docs/guides/dataset/use_tag_sets.md b/docs/guides/dataset/use_tag_sets.md index de25843ce..e8f821d4a 100644 --- a/docs/guides/dataset/use_tag_sets.md +++ b/docs/guides/dataset/use_tag_sets.md @@ -13,6 +13,10 @@ Retrieval accuracy is the touchstone for a production-ready RAG framework. In ad To use this feature, ensure you have at least one properly configured tag set, specify the tag set(s) on the **Configuration** page of your knowledge base (dataset), and then re-parse your documents to initiate the auto-tag process. During this process, each chunk in your dataset is compared with every entry in the specified tag set(s), and tags are automatically applied based on similarity. +:::danger IMPORTANT +The auto-tagging feature is *unavailable* on the [Infinity](https://github.com/infiniflow/infinity) document engine. +::: + ## Scenarios Auto-tagging applies in situations where chunks are so similar to each other that the intended chunks cannot be distinguished from the rest. For example, when you have a few chunks about iPhone and a majority about iPhone case or iPhone accessaries, it becomes difficult to retrieve the iPhone-specific chunks without additional information. diff --git a/docs/guides/manage_team_members.md b/docs/guides/manage_team_members.md index eeff940fa..ec9d34e47 100644 --- a/docs/guides/manage_team_members.md +++ b/docs/guides/manage_team_members.md @@ -16,6 +16,10 @@ By default, each RAGFlow user is assigned a single team named after their name. - Update the default configurations for your datasets. - Parse documents in your datasets. +:::danger IMPORTANT +To allow your team members to view and update your knowledge base, ensure that you set **Permissions** on its **Configuration** page from **Only me** to **Team**. +::: + :::tip NOTE Team members are currently *not* allowed to invite users to your team, and only you, the team owner, is permitted to do so. ::: @@ -43,3 +47,5 @@ When using email address to invite a team member, ensure it is associated with a ## Accept or decline team invite  + +_After accepting the team invite, you should be able to view and update the team owner's knowledge bases whose **Permissions** is set to **Team**._ \ No newline at end of file diff --git a/docs/release_notes.md b/docs/release_notes.md index 5798bc755..00c10a400 100644 --- a/docs/release_notes.md +++ b/docs/release_notes.md @@ -51,7 +51,7 @@ Released on March 11, 2025. - A repetitive knowledge graph extraction issue. - Issues with API calling. -- Options in the **Document parser** dropdown are missing. +- Options in the **PDF parser**, aka **Document parser**, dropdown are missing. - A Tavily web search issue. - Unable to preview diagrams or images in an AI chat. @@ -59,7 +59,7 @@ Released on March 11, 2025. #### Added documents -[Use tag set](./guides/dataset/use_tag_sets.md) +- [Use tag set](./guides/dataset/use_tag_sets.md) ## v0.17.0 @@ -71,7 +71,7 @@ Released on March 3, 2025. - AI chat: Leverages Tavily-based web search to enhance contexts in agentic reasoning. To activate this, enter the correct Tavily API key under the **Assistant Setting** tab of your chat assistant dialogue. - AI chat: Supports starting a chat without specifying knowledge bases. - AI chat: HTML files can also be previewed and referenced, in addition to PDF files. -- Dataset: Adds a **Document parser** dropdown menu to dataset configurations. This includes a DeepDoc model option, which is time-consuming, a much faster **naive** option (plain text), which skips DLA (Document Layout Analysis), OCR (Optical Character Recognition), and TSR (Table Structure Recognition) tasks, and several currently *experimental* large model options. +- Dataset: Adds a **PDF parser**, aka **Document parser**, dropdown menu to dataset configurations. This includes a DeepDoc model option, which is time-consuming, a much faster **naive** option (plain text), which skips DLA (Document Layout Analysis), OCR (Optical Character Recognition), and TSR (Table Structure Recognition) tasks, and several currently *experimental* large model options. - Agent component: **(x)** or a forward slash `/` can be used to insert available keys (variables) in the system prompt field of the **Generate** or **Template** component. - Object storage: Supports using Aliyun OSS (Object Storage Service) as a file storage option. - Models: Updates the supported model list for Tongyi-Qianwen (Qwen), adding DeepSeek-specific models; adds ModelScope as a model provider. @@ -99,7 +99,7 @@ Adds a key option `"meta_fields"` to the [Update document](./references/python_a #### Added documents -[Run retrieval test](./guides/dataset/run_retrieval_test.md) +- [Run retrieval test](./guides/dataset/run_retrieval_test.md) ## v0.16.0 diff --git a/web/src/locales/de.ts b/web/src/locales/de.ts index 0709f65ad..e9d2faa9c 100644 --- a/web/src/locales/de.ts +++ b/web/src/locales/de.ts @@ -168,7 +168,7 @@ export default { html4excel: 'Excel zu HTML', html4excelTip: 'Wenn aktiviert, wird die Tabelle in HTML-Tabellen umgewandelt, mit maximal 256 Zeilen pro Tabelle. Andernfalls wird sie in Schlüssel-Wert-Paare nach Zeilen umgewandelt.', autoKeywords: 'Auto-Schlüsselwort', - autoKeywordsTip: 'Extrahiert automatisch N Schlüsselwörter für jeden Chunk, um deren Ranking für Anfragen mit diesen Schlüsselwörtern zu erhöhen. Sie können die hinzugefügten Schlüsselwörter für einen Chunk in der Chunk-Liste überprüfen oder aktualisieren. Beachten Sie, dass zusätzliche Tokens vom in den "Systemmodelleinstellungen" angegebenen LLM verbraucht werden.', + autoKeywordsTip: 'Extrahieren Sie automatisch N Schlüsselwörter für jeden Abschnitt, um deren Ranking in Abfragen mit diesen Schlüsselwörtern zu verbessern. Beachten Sie, dass zusätzliche Tokens vom in den "Systemmodelleinstellungen" angegebenen Chat-Modell verbraucht werden. Sie können die hinzugefügten Schlüsselwörter eines Abschnitts in der Abschnittsliste überprüfen oder aktualisieren.', autoQuestions: 'Auto-Frage', autoQuestionsTip: 'Extrahiert automatisch N Fragen für jeden Chunk, um deren Ranking für Anfragen mit diesen Fragen zu erhöhen. Sie können die hinzugefügten Fragen für einen Chunk in der Chunk-Liste überprüfen oder aktualisieren. Diese Funktion unterbricht den Chunking-Prozess nicht, wenn ein Fehler auftritt, außer dass sie möglicherweise ein leeres Ergebnis zum ursprünglichen Chunk hinzufügt. Beachten Sie, dass zusätzliche Tokens vom in den "Systemmodelleinstellungen" angegebenen LLM verbraucht werden.', redo: 'Möchten Sie die vorhandenen {{chunkNum}} Chunks löschen?', @@ -246,7 +246,7 @@ export default { manual: `
Nur PDF wird unterstützt.
Wir gehen davon aus, dass das Handbuch eine hierarchische Abschnittsstruktur aufweist und verwenden die Titel der untersten Abschnitte als Grundeinheit für die Aufteilung der Dokumente. Daher werden Abbildungen und Tabellen im selben Abschnitt nicht getrennt, was zu größeren Chunk-Größen führen kann.
`, - naive: `Unterstützte Dateiformate sind DOCX, EXCEL, PPT, IMAGE, PDF, TXT, MD, JSON, EML, HTML.
+ naive: `Unterstützte Dateiformate sind DOCX, XLSX, XLS (Excel97~2003), PPT, PDF, TXT, JPEG, JPG, PNG, TIF, GIF, CSV, JSON, EML, HTML.
Diese Methode teilt Dateien mit einer 'naiven' Methode auf:
Only PDF is supported.
We assume that the manual has a hierarchical section structure, using the lowest section titles as basic unit for chunking documents. Therefore, figures and tables in the same section will not be separated, which may result in larger chunk sizes.
`, - naive: `Supported file formats are DOCX, EXCEL, PPT, IMAGE, PDF, TXT, MD, JSON, EML, HTML.
+ naive: `Supported file formats are DOCX, XLSX, XLS (Excel97~2003), PPT, PDF, TXT, JPEG, JPG, PNG, TIF, GIF, CSV, JSON, EML, HTML.
This method chunks files using a 'naive' method:
Format file yang didukung adalah DOCX, EXCEL, PPT, IMAGE, PDF, TXT, MD, JSON, EML, HTML.
+ naive: `Format file yang didukung adalah DOCX, XLSX, XLS (Excel97~2003), PPT, PDF, TXT, JPEG, JPG, PNG, TIF, GIF, CSV, JSON, EML, HTML.
Metode ini menerapkan cara naif untuk memotong file:
対応するのはPDFのみです。
マニュアルは階層的なセクション構造を持つと仮定され、最下位のセクションタイトルを基にチャンク分割を行います。そのため、同じセクション内の図表は分割されませんが、大きなチャンクサイズになる可能性があります。
`, - naive: `対応ファイル形式はDOCX, EXCEL, PPT, IMAGE, PDF, TXT, MD, JSON, EML, HTMLです。
+ naive: `対応ファイル形式はDOCX, XLSX, XLS (Excel97~2003), PPT, PDF, TXT, JPEG, JPG, PNG, TIF, GIF, CSV, JSON, EML, HTMLです。
この方法では、'ナイーブ'な方法でファイルを分割します:
Apenas PDF é suportado.
Assumimos que o manual tem uma estrutura hierárquica de seções, usando os títulos das seções inferiores como unidade básica para fragmentação. Assim, figuras e tabelas na mesma seção não serão separadas, o que pode resultar em fragmentos maiores.
`, - naive: `Os formatos de arquivo suportados são DOCX, EXCEL, PPT, IMAGE, PDF, TXT, MD, JSON, EML, HTML.
+ naive: `Os formatos de arquivo suportados são DOCX, XLSX, XLS (Excel97~2003), PPT, PDF, TXT, JPEG, JPG, PNG, TIF, GIF, CSV, JSON, EML, HTML.
Este método fragmenta arquivos de maneira 'simples':
Các định dạng tệp được hỗ trợ là DOCX, EXCEL, PPT, IMAGE, PDF, TXT, MD, JSON, EML, HTML.
`, +Các định dạng tệp được hỗ trợ là DOCX, XLSX, XLS (Excel97~2003), PPT, PDF, TXT, JPEG, JPG, PNG, TIF, GIF, CSV, JSON, EML, HTML.
`, paper: `Chỉ hỗ trợ tệp PDF.
Bài báo sẽ được chia theo các phần, chẳng hạn như tóm tắt, 1.1, 1.2.
Cách tiếp cận này cho phép LLM tóm tắt bài báo hiệu quả hơn và cung cấp các phản hồi toàn diện, dễ hiểu hơn. diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index 3fce2588b..808ffba00 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -165,7 +165,7 @@ export default { html4excel: '表格轉HTML', html4excelTip: `啟用後,電子表格將解析為 HTML 表格,一張表格最多 256 行。否則,會按行解析成鍵值對。`, autoKeywords: '自動關鍵字', - autoKeywordsTip: `在查詢此類關鍵字時,為每個區塊提取 N 個關鍵字以提高其排名分數。在「系統模型設定」中設定的 LLM 將消耗額外的 token。您可以在區塊清單中查看結果。 `, + autoKeywordsTip: `自動為每個文字區塊中提取 N 個關鍵詞,以提升查詢精度。請注意:此功能採用「系統模型設定」中設定的預設聊天模型提取關鍵詞,因此也會產生更多 Token 消耗。此外,你也可以手動更新生成的關鍵詞。`, autoQuestions: '自動問題', autoQuestionsTip: `在查詢此類問題時,為每個區塊提取 N 個問題以提高其排名分數。在「系統模型設定」中設定的 LLM 將消耗額外的 token。您可以在區塊清單中查看結果。如果發生錯誤,此功能不會破壞整個分塊過程,除了將空結果新增至原始區塊。 `, redo: '是否清空已有 {{chunkNum}}個 chunk?', @@ -240,7 +240,7 @@ export default { 我們假設手冊具有分層部分結構。我們使用最低的部分標題作為對文檔進行切片的樞軸。 因此,同一部分中的圖和表不會被分割,並且塊大小可能會很大。
`, - naive: `支持的文件格式為DOCX、EXCEL、PPT、IMAGE、PDF、TXT、MD、JSON、EML、HTML。
+ naive: `支持的文件格式為DOCX、XLSX、XLS (Excel97~2003)、PPT、PDF、TXT、JPEG、JPG、PNG、TIF、GIF、CSV、JSON、EML、HTML。
此方法將簡單的方法應用於塊文件:
支持的文件格式为DOCX、EXCEL、PPT、IMAGE、PDF、TXT、MD、JSON、EML、HTML。
+ naive: `支持的文件格式为DOCX、XLSX、XLS (Excel97~2003)、PPT、PDF、TXT、JPEG、JPG、PNG、TIF、GIF、CSV、JSON、EML、HTML。
此方法将简单的方法应用于块文件:
详见:https://ragflow.io/docs/dev/use_tag_sets
`, tags: '标签', addTag: '增加标签',