mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-07 02:55:08 +08:00
Compare commits
9 Commits
23d0b564d3
...
8d8a5f73b6
| Author | SHA1 | Date | |
|---|---|---|---|
| 8d8a5f73b6 | |||
| d0fa66f4d5 | |||
| 9dd22e141b | |||
| b6c1ca828e | |||
| d367c7e226 | |||
| a3aa3f0d36 | |||
| 7b8752fe24 | |||
| 5e2c33e5b0 | |||
| e40be8e541 |
@ -84,18 +84,10 @@ def create_agent_session(tenant_id, agent_id):
|
||||
session_id=get_uuid()
|
||||
canvas = Canvas(cvs.dsl, tenant_id, agent_id)
|
||||
canvas.reset()
|
||||
conv = {
|
||||
"id": session_id,
|
||||
"dialog_id": cvs.id,
|
||||
"user_id": user_id,
|
||||
"message": [],
|
||||
"source": "agent",
|
||||
"dsl": cvs.dsl
|
||||
}
|
||||
API4ConversationService.save(**conv)
|
||||
|
||||
cvs.dsl = json.loads(str(canvas))
|
||||
conv = {"id": session_id, "dialog_id": cvs.id, "user_id": user_id, "message": [{"role": "assistant", "content": canvas.get_prologue()}], "source": "agent", "dsl": cvs.dsl}
|
||||
API4ConversationService.save(**conv)
|
||||
conv["agent_id"] = conv.pop("dialog_id")
|
||||
return get_result(data=conv)
|
||||
|
||||
@ -570,6 +562,9 @@ def list_agent_session(tenant_id, agent_id):
|
||||
"chunks" in conv["reference"][chunk_num]):
|
||||
chunks = conv["reference"][chunk_num]["chunks"]
|
||||
for chunk in chunks:
|
||||
# Ensure chunk is a dictionary before calling get method
|
||||
if not isinstance(chunk, dict):
|
||||
continue
|
||||
new_chunk = {
|
||||
"id": chunk.get("chunk_id", chunk.get("id")),
|
||||
"content": chunk.get("content_with_weight", chunk.get("content")),
|
||||
|
||||
@ -182,7 +182,8 @@ def completion(tenant_id, agent_id, session_id=None, **kwargs):
|
||||
"user_id": user_id,
|
||||
"message": [],
|
||||
"source": "agent",
|
||||
"dsl": cvs.dsl
|
||||
"dsl": cvs.dsl,
|
||||
"reference": []
|
||||
}
|
||||
API4ConversationService.save(**conv)
|
||||
conv = API4Conversation(**conv)
|
||||
|
||||
@ -256,10 +256,10 @@ def repair_bad_citation_formats(answer: str, kbinfos: dict, idx: set):
|
||||
|
||||
|
||||
def meta_filter(metas: dict, filters: list[dict]):
|
||||
doc_ids = []
|
||||
doc_ids = set([])
|
||||
|
||||
def filter_out(v2docs, operator, value):
|
||||
nonlocal doc_ids
|
||||
ids = []
|
||||
for input, docids in v2docs.items():
|
||||
try:
|
||||
input = float(input)
|
||||
@ -284,16 +284,24 @@ def meta_filter(metas: dict, filters: list[dict]):
|
||||
]:
|
||||
try:
|
||||
if all(conds):
|
||||
doc_ids.extend(docids)
|
||||
ids.extend(docids)
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
return ids
|
||||
|
||||
for k, v2docs in metas.items():
|
||||
for f in filters:
|
||||
if k != f["key"]:
|
||||
continue
|
||||
filter_out(v2docs, f["op"], f["value"])
|
||||
return doc_ids
|
||||
ids = filter_out(v2docs, f["op"], f["value"])
|
||||
if not doc_ids:
|
||||
doc_ids = set(ids)
|
||||
else:
|
||||
doc_ids = doc_ids & set(ids)
|
||||
if not doc_ids:
|
||||
return []
|
||||
return list(doc_ids)
|
||||
|
||||
|
||||
def chat(dialog, messages, stream=True, **kwargs):
|
||||
|
||||
@ -17,6 +17,7 @@ import asyncio
|
||||
import functools
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import queue
|
||||
import random
|
||||
import threading
|
||||
@ -667,7 +668,10 @@ def timeout(seconds: float | int = None, attempts: int = 2, *, exception: Option
|
||||
|
||||
for a in range(attempts):
|
||||
try:
|
||||
if os.environ.get("ENABLE_TIMEOUT_ASSERTION"):
|
||||
result = result_queue.get(timeout=seconds)
|
||||
else:
|
||||
result = result_queue.get()
|
||||
if isinstance(result, Exception):
|
||||
raise result
|
||||
return result
|
||||
@ -682,8 +686,11 @@ def timeout(seconds: float | int = None, attempts: int = 2, *, exception: Option
|
||||
|
||||
for a in range(attempts):
|
||||
try:
|
||||
if os.environ.get("ENABLE_TIMEOUT_ASSERTION"):
|
||||
with trio.fail_after(seconds):
|
||||
return await func(*args, **kwargs)
|
||||
else:
|
||||
return await func(*args, **kwargs)
|
||||
except trio.TooSlowError:
|
||||
if a < attempts - 1:
|
||||
continue
|
||||
|
||||
@ -3501,7 +3501,7 @@ Failure:
|
||||
|
||||
### Generate related questions
|
||||
|
||||
**POST** `/v1/sessions/related_questions`
|
||||
**POST** `/api/v1/sessions/related_questions`
|
||||
|
||||
Generates five to ten alternative question strings from the user's original query to retrieve more relevant search results.
|
||||
|
||||
@ -3516,7 +3516,7 @@ The chat model autonomously determines the number of questions to generate based
|
||||
#### Request
|
||||
|
||||
- Method: POST
|
||||
- URL: `/v1/sessions/related_questions`
|
||||
- URL: `/api/v1/sessions/related_questions`
|
||||
- Headers:
|
||||
- `'content-Type: application/json'`
|
||||
- `'Authorization: Bearer <YOUR_LOGIN_TOKEN>'`
|
||||
@ -3528,7 +3528,7 @@ The chat model autonomously determines the number of questions to generate based
|
||||
|
||||
```bash
|
||||
curl --request POST \
|
||||
--url http://{address}/v1/sessions/related_questions \
|
||||
--url http://{address}/api/v1/sessions/related_questions \
|
||||
--header 'Content-Type: application/json' \
|
||||
--header 'Authorization: Bearer <YOUR_LOGIN_TOKEN>' \
|
||||
--data '
|
||||
|
||||
@ -15,6 +15,7 @@
|
||||
#
|
||||
import logging
|
||||
import itertools
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable
|
||||
@ -106,7 +107,8 @@ class EntityResolution(Extractor):
|
||||
nonlocal remain_candidates_to_resolve, callback
|
||||
async with semaphore:
|
||||
try:
|
||||
with trio.move_on_after(280) as cancel_scope:
|
||||
enable_timeout_assertion = os.environ.get("ENABLE_TIMEOUT_ASSERTION")
|
||||
with trio.move_on_after(280 if enable_timeout_assertion else 1000000000) as cancel_scope:
|
||||
await self._resolve_candidate(candidate_batch, result_set, result_lock)
|
||||
remain_candidates_to_resolve = remain_candidates_to_resolve - len(candidate_batch[1])
|
||||
callback(msg=f"Resolved {len(candidate_batch[1])} pairs, {remain_candidates_to_resolve} are remained to resolve. ")
|
||||
@ -169,7 +171,8 @@ class EntityResolution(Extractor):
|
||||
logging.info(f"Created resolution prompt {len(text)} bytes for {len(candidate_resolution_i[1])} entity pairs of type {candidate_resolution_i[0]}")
|
||||
async with chat_limiter:
|
||||
try:
|
||||
with trio.move_on_after(280) as cancel_scope:
|
||||
enable_timeout_assertion = os.environ.get("ENABLE_TIMEOUT_ASSERTION")
|
||||
with trio.move_on_after(280 if enable_timeout_assertion else 1000000000) as cancel_scope:
|
||||
response = await trio.to_thread.run_sync(self._chat, text, [{"role": "user", "content": "Output:"}], {})
|
||||
if cancel_scope.cancelled_caught:
|
||||
logging.warning("_resolve_candidate._chat timeout, skipping...")
|
||||
|
||||
@ -7,6 +7,7 @@ Reference:
|
||||
|
||||
import logging
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from typing import Callable
|
||||
from dataclasses import dataclass
|
||||
@ -51,6 +52,7 @@ class CommunityReportsExtractor(Extractor):
|
||||
self._max_report_length = max_report_length or 1500
|
||||
|
||||
async def __call__(self, graph: nx.Graph, callback: Callable | None = None):
|
||||
enable_timeout_assertion = os.environ.get("ENABLE_TIMEOUT_ASSERTION")
|
||||
for node_degree in graph.degree:
|
||||
graph.nodes[str(node_degree[0])]["rank"] = int(node_degree[1])
|
||||
|
||||
@ -92,7 +94,7 @@ class CommunityReportsExtractor(Extractor):
|
||||
text = perform_variable_replacements(self._extraction_prompt, variables=prompt_variables)
|
||||
async with chat_limiter:
|
||||
try:
|
||||
with trio.move_on_after(180) as cancel_scope:
|
||||
with trio.move_on_after(180 if enable_timeout_assertion else 1000000000) as cancel_scope:
|
||||
response = await trio.to_thread.run_sync( self._chat, text, [{"role": "user", "content": "Output:"}], {})
|
||||
if cancel_scope.cancelled_caught:
|
||||
logging.warning("extract_community_report._chat timeout, skipping...")
|
||||
|
||||
@ -15,6 +15,8 @@
|
||||
#
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
import networkx as nx
|
||||
import trio
|
||||
|
||||
@ -49,6 +51,7 @@ async def run_graphrag(
|
||||
embedding_model,
|
||||
callback,
|
||||
):
|
||||
enable_timeout_assertion=os.environ.get("ENABLE_TIMEOUT_ASSERTION")
|
||||
start = trio.current_time()
|
||||
tenant_id, kb_id, doc_id = row["tenant_id"], str(row["kb_id"]), row["doc_id"]
|
||||
chunks = []
|
||||
@ -57,7 +60,7 @@ async def run_graphrag(
|
||||
):
|
||||
chunks.append(d["content_with_weight"])
|
||||
|
||||
with trio.fail_after(max(120, len(chunks)*60*10)):
|
||||
with trio.fail_after(max(120, len(chunks)*60*10) if enable_timeout_assertion else 10000000000):
|
||||
subgraph = await generate_subgraph(
|
||||
LightKGExt
|
||||
if "method" not in row["kb_parser_config"].get("graphrag", {}) or row["kb_parser_config"]["graphrag"]["method"] != "general"
|
||||
|
||||
@ -130,7 +130,36 @@ Output:
|
||||
|
||||
PROMPTS[
|
||||
"entiti_continue_extraction"
|
||||
] = """MANY entities were missed in the last extraction. Add them below using the same format:
|
||||
] = """
|
||||
MANY entities and relationships were missed in the last extraction. Please find only the missing entities and relationships from previous text.
|
||||
|
||||
---Remember Steps---
|
||||
|
||||
1. Identify all entities. For each identified entity, extract the following information:
|
||||
- entity_name: Name of the entity, use same language as input text. If English, capitalized the name
|
||||
- entity_type: One of the following types: [{entity_types}]
|
||||
- entity_description: Provide a comprehensive description of the entity's attributes and activities *based solely on the information present in the input text*. **Do not infer or hallucinate information not explicitly stated.** If the text provides insufficient information to create a comprehensive description, state "Description not available in text."
|
||||
Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>)
|
||||
|
||||
2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
|
||||
For each pair of related entities, extract the following information:
|
||||
- source_entity: name of the source entity, as identified in step 1
|
||||
- target_entity: name of the target entity, as identified in step 1
|
||||
- relationship_description: explanation as to why you think the source entity and the target entity are related to each other
|
||||
- relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity
|
||||
- relationship_keywords: one or more high-level key words that summarize the overarching nature of the relationship, focusing on concepts or themes rather than specific details
|
||||
Format each relationship as ("relationship"{tuple_delimiter}<source_entity>{tuple_delimiter}<target_entity>{tuple_delimiter}<relationship_description>{tuple_delimiter}<relationship_keywords>{tuple_delimiter}<relationship_strength>)
|
||||
|
||||
3. Identify high-level key words that summarize the main concepts, themes, or topics of the entire text. These should capture the overarching ideas present in the document.
|
||||
Format the content-level key words as ("content_keywords"{tuple_delimiter}<high_level_keywords>)
|
||||
|
||||
4. Return output in {language} as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
|
||||
|
||||
5. When finished, output {completion_delimiter}
|
||||
|
||||
---Output---
|
||||
|
||||
Add new entities and relations below using the same format, and do not include entities and relations that have been previously extracted. :
|
||||
"""
|
||||
|
||||
PROMPTS[
|
||||
|
||||
@ -307,6 +307,7 @@ def chunk_id(chunk):
|
||||
|
||||
async def graph_node_to_chunk(kb_id, embd_mdl, ent_name, meta, chunks):
|
||||
global chat_limiter
|
||||
enable_timeout_assertion=os.environ.get("ENABLE_TIMEOUT_ASSERTION")
|
||||
chunk = {
|
||||
"id": get_uuid(),
|
||||
"important_kwd": [ent_name],
|
||||
@ -324,7 +325,7 @@ async def graph_node_to_chunk(kb_id, embd_mdl, ent_name, meta, chunks):
|
||||
ebd = get_embed_cache(embd_mdl.llm_name, ent_name)
|
||||
if ebd is None:
|
||||
async with chat_limiter:
|
||||
with trio.fail_after(3):
|
||||
with trio.fail_after(3 if enable_timeout_assertion else 30000000):
|
||||
ebd, _ = await trio.to_thread.run_sync(lambda: embd_mdl.encode([ent_name]))
|
||||
ebd = ebd[0]
|
||||
set_embed_cache(embd_mdl.llm_name, ent_name, ebd)
|
||||
@ -362,6 +363,7 @@ def get_relation(tenant_id, kb_id, from_ent_name, to_ent_name, size=1):
|
||||
|
||||
|
||||
async def graph_edge_to_chunk(kb_id, embd_mdl, from_ent_name, to_ent_name, meta, chunks):
|
||||
enable_timeout_assertion=os.environ.get("ENABLE_TIMEOUT_ASSERTION")
|
||||
chunk = {
|
||||
"id": get_uuid(),
|
||||
"from_entity_kwd": from_ent_name,
|
||||
@ -380,7 +382,7 @@ async def graph_edge_to_chunk(kb_id, embd_mdl, from_ent_name, to_ent_name, meta,
|
||||
ebd = get_embed_cache(embd_mdl.llm_name, txt)
|
||||
if ebd is None:
|
||||
async with chat_limiter:
|
||||
with trio.fail_after(3):
|
||||
with trio.fail_after(3 if enable_timeout_assertion else 300000000):
|
||||
ebd, _ = await trio.to_thread.run_sync(lambda: embd_mdl.encode([txt+f": {meta['description']}"]))
|
||||
ebd = ebd[0]
|
||||
set_embed_cache(embd_mdl.llm_name, txt, ebd)
|
||||
@ -514,9 +516,10 @@ async def set_graph(tenant_id: str, kb_id: str, embd_mdl, graph: nx.Graph, chang
|
||||
callback(msg=f"set_graph converted graph change to {len(chunks)} chunks in {now - start:.2f}s.")
|
||||
start = now
|
||||
|
||||
enable_timeout_assertion=os.environ.get("ENABLE_TIMEOUT_ASSERTION")
|
||||
es_bulk_size = 4
|
||||
for b in range(0, len(chunks), es_bulk_size):
|
||||
with trio.fail_after(3):
|
||||
with trio.fail_after(3 if enable_timeout_assertion else 30000000):
|
||||
doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert(chunks[b:b + es_bulk_size], search.index_name(tenant_id), kb_id))
|
||||
if b % 100 == es_bulk_size and callback:
|
||||
callback(msg=f"Insert chunks: {b}/{len(chunks)}")
|
||||
|
||||
@ -36,6 +36,7 @@ class SupportedLiteLLMProvider(StrEnum):
|
||||
Nvidia = "NVIDIA"
|
||||
TogetherAI = "TogetherAI"
|
||||
Anthropic = "Anthropic"
|
||||
Ollama = "Ollama"
|
||||
|
||||
|
||||
FACTORY_DEFAULT_BASE_URL = {
|
||||
@ -59,6 +60,7 @@ LITELLM_PROVIDER_PREFIX = {
|
||||
SupportedLiteLLMProvider.Nvidia: "nvidia_nim/",
|
||||
SupportedLiteLLMProvider.TogetherAI: "together_ai/",
|
||||
SupportedLiteLLMProvider.Anthropic: "", # don't need a prefix
|
||||
SupportedLiteLLMProvider.Ollama: "ollama_chat/",
|
||||
}
|
||||
|
||||
ChatModel = globals().get("ChatModel", {})
|
||||
|
||||
@ -29,7 +29,6 @@ import json_repair
|
||||
import litellm
|
||||
import openai
|
||||
import requests
|
||||
from ollama import Client
|
||||
from openai import OpenAI
|
||||
from openai.lib.azure import AzureOpenAI
|
||||
from strenum import StrEnum
|
||||
@ -683,73 +682,6 @@ class ZhipuChat(Base):
|
||||
return super().chat_streamly_with_tools(system, history, gen_conf)
|
||||
|
||||
|
||||
class OllamaChat(Base):
|
||||
_FACTORY_NAME = "Ollama"
|
||||
|
||||
def __init__(self, key, model_name, base_url=None, **kwargs):
|
||||
super().__init__(key, model_name, base_url=base_url, **kwargs)
|
||||
|
||||
self.client = Client(host=base_url) if not key or key == "x" else Client(host=base_url, headers={"Authorization": f"Bearer {key}"})
|
||||
self.model_name = model_name
|
||||
self.keep_alive = kwargs.get("ollama_keep_alive", int(os.environ.get("OLLAMA_KEEP_ALIVE", -1)))
|
||||
|
||||
def _clean_conf(self, gen_conf):
|
||||
options = {}
|
||||
if "max_tokens" in gen_conf:
|
||||
options["num_predict"] = gen_conf["max_tokens"]
|
||||
for k in ["temperature", "top_p", "presence_penalty", "frequency_penalty"]:
|
||||
if k not in gen_conf:
|
||||
continue
|
||||
options[k] = gen_conf[k]
|
||||
return options
|
||||
|
||||
def _chat(self, history, gen_conf={}, **kwargs):
|
||||
# Calculate context size
|
||||
ctx_size = self._calculate_dynamic_ctx(history)
|
||||
|
||||
gen_conf["num_ctx"] = ctx_size
|
||||
response = self.client.chat(model=self.model_name, messages=history, options=gen_conf, keep_alive=self.keep_alive)
|
||||
ans = response["message"]["content"].strip()
|
||||
token_count = response.get("eval_count", 0) + response.get("prompt_eval_count", 0)
|
||||
return ans, token_count
|
||||
|
||||
def chat_streamly(self, system, history, gen_conf={}, **kwargs):
|
||||
if system:
|
||||
history.insert(0, {"role": "system", "content": system})
|
||||
if "max_tokens" in gen_conf:
|
||||
del gen_conf["max_tokens"]
|
||||
try:
|
||||
# Calculate context size
|
||||
ctx_size = self._calculate_dynamic_ctx(history)
|
||||
options = {"num_ctx": ctx_size}
|
||||
if "temperature" in gen_conf:
|
||||
options["temperature"] = gen_conf["temperature"]
|
||||
if "max_tokens" in gen_conf:
|
||||
options["num_predict"] = gen_conf["max_tokens"]
|
||||
if "top_p" in gen_conf:
|
||||
options["top_p"] = gen_conf["top_p"]
|
||||
if "presence_penalty" in gen_conf:
|
||||
options["presence_penalty"] = gen_conf["presence_penalty"]
|
||||
if "frequency_penalty" in gen_conf:
|
||||
options["frequency_penalty"] = gen_conf["frequency_penalty"]
|
||||
|
||||
ans = ""
|
||||
try:
|
||||
response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options, keep_alive=self.keep_alive)
|
||||
for resp in response:
|
||||
if resp["done"]:
|
||||
token_count = resp.get("prompt_eval_count", 0) + resp.get("eval_count", 0)
|
||||
yield token_count
|
||||
ans = resp["message"]["content"]
|
||||
yield ans
|
||||
except Exception as e:
|
||||
yield ans + "\n**ERROR**: " + str(e)
|
||||
yield 0
|
||||
except Exception as e:
|
||||
yield "**ERROR**: " + str(e)
|
||||
yield 0
|
||||
|
||||
|
||||
class LocalAIChat(Base):
|
||||
_FACTORY_NAME = "LocalAI"
|
||||
|
||||
@ -1422,7 +1354,7 @@ class Ai302Chat(Base):
|
||||
|
||||
|
||||
class LiteLLMBase(ABC):
|
||||
_FACTORY_NAME = ["Tongyi-Qianwen", "Bedrock", "Moonshot", "xAI", "DeepInfra", "Groq", "Cohere", "Gemini", "DeepSeek", "NVIDIA", "TogetherAI", "Anthropic"]
|
||||
_FACTORY_NAME = ["Tongyi-Qianwen", "Bedrock", "Moonshot", "xAI", "DeepInfra", "Groq", "Cohere", "Gemini", "DeepSeek", "NVIDIA", "TogetherAI", "Anthropic", "Ollama"]
|
||||
|
||||
def __init__(self, key, model_name, base_url=None, **kwargs):
|
||||
self.timeout = int(os.environ.get("LM_TIMEOUT_SECONDS", 600))
|
||||
|
||||
@ -21,7 +21,7 @@ import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
from api.utils.api_utils import timeout, is_strong_enough
|
||||
from api.utils.api_utils import timeout
|
||||
from api.utils.log_utils import init_root_logger, get_project_base_directory
|
||||
from graphrag.general.index import run_graphrag
|
||||
from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache
|
||||
@ -478,8 +478,6 @@ async def embedding(docs, mdl, parser_config=None, callback=None):
|
||||
|
||||
@timeout(3600)
|
||||
async def run_raptor(row, chat_mdl, embd_mdl, vector_size, callback=None):
|
||||
# Pressure test for GraphRAG task
|
||||
await is_strong_enough(chat_mdl, embd_mdl)
|
||||
chunks = []
|
||||
vctr_nm = "q_%d_vec"%vector_size
|
||||
for d in settings.retrievaler.chunk_list(row["doc_id"], row["tenant_id"], [str(row["kb_id"])],
|
||||
@ -553,7 +551,6 @@ async def do_handle_task(task):
|
||||
try:
|
||||
# bind embedding model
|
||||
embedding_model = LLMBundle(task_tenant_id, LLMType.EMBEDDING, llm_name=task_embedding_id, lang=task_language)
|
||||
await is_strong_enough(None, embedding_model)
|
||||
vts, _ = embedding_model.encode(["ok"])
|
||||
vector_size = len(vts[0])
|
||||
except Exception as e:
|
||||
@ -568,7 +565,6 @@ async def do_handle_task(task):
|
||||
if task.get("task_type", "") == "raptor":
|
||||
# bind LLM for raptor
|
||||
chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language)
|
||||
await is_strong_enough(chat_model, None)
|
||||
# run RAPTOR
|
||||
async with kg_limiter:
|
||||
chunks, token_count = await run_raptor(task, chat_model, embedding_model, vector_size, progress_callback)
|
||||
@ -580,7 +576,6 @@ async def do_handle_task(task):
|
||||
graphrag_conf = task["kb_parser_config"].get("graphrag", {})
|
||||
start_ts = timer()
|
||||
chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language)
|
||||
await is_strong_enough(chat_model, None)
|
||||
with_resolution = graphrag_conf.get("resolution", False)
|
||||
with_community = graphrag_conf.get("community", False)
|
||||
async with kg_limiter:
|
||||
|
||||
@ -2,7 +2,6 @@ import { Toaster as Sonner } from '@/components/ui/sonner';
|
||||
import { Toaster } from '@/components/ui/toaster';
|
||||
import i18n from '@/locales/config';
|
||||
import { QueryClient, QueryClientProvider } from '@tanstack/react-query';
|
||||
import { ReactQueryDevtools } from '@tanstack/react-query-devtools';
|
||||
import { App, ConfigProvider, ConfigProviderProps, theme } from 'antd';
|
||||
import pt_BR from 'antd/lib/locale/pt_BR';
|
||||
import deDE from 'antd/locale/de_DE';
|
||||
@ -85,7 +84,7 @@ function Root({ children }: React.PropsWithChildren) {
|
||||
<Sonner position={'top-right'} expand richColors closeButton></Sonner>
|
||||
<Toaster />
|
||||
</ConfigProvider>
|
||||
<ReactQueryDevtools buttonPosition={'top-left'} initialIsOpen={false} />
|
||||
{/* <ReactQueryDevtools buttonPosition={'top-left'} initialIsOpen={false} /> */}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
@ -68,7 +68,7 @@ export function LayoutRecognizeFormField() {
|
||||
<div className="flex items-center">
|
||||
<FormLabel
|
||||
tooltip={t('layoutRecognizeTip')}
|
||||
className="text-sm text-muted-foreground whitespace-nowrap w-1/4"
|
||||
className="text-sm text-muted-foreground whitespace-wrap w-1/4"
|
||||
>
|
||||
{t('layoutRecognize')}
|
||||
</FormLabel>
|
||||
|
||||
@ -28,6 +28,7 @@ import {
|
||||
PopoverTrigger,
|
||||
} from '@/components/ui/popover';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { t } from 'i18next';
|
||||
import { RAGFlowSelectOptionType } from '../ui/select';
|
||||
import { Separator } from '../ui/separator';
|
||||
|
||||
@ -114,7 +115,9 @@ export const SelectWithSearch = forwardRef<
|
||||
<span className="leading-none truncate">{selectLabel}</span>
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-muted-foreground">Select value</span>
|
||||
<span className="text-muted-foreground">
|
||||
{t('common.selectPlaceholder')}
|
||||
</span>
|
||||
)}
|
||||
<div className="flex items-center justify-between">
|
||||
{value && allowClear && (
|
||||
|
||||
@ -3,7 +3,7 @@ import { DocumentParserType } from '@/constants/knowledge';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import random from 'lodash/random';
|
||||
import { Plus } from 'lucide-react';
|
||||
import { useCallback, useEffect } from 'react';
|
||||
import { useCallback } from 'react';
|
||||
import { useFormContext, useWatch } from 'react-hook-form';
|
||||
import { SliderInputFormField } from '../slider-input-form-field';
|
||||
import { Button } from '../ui/button';
|
||||
@ -57,15 +57,19 @@ const RaptorFormFields = () => {
|
||||
const form = useFormContext();
|
||||
const { t } = useTranslate('knowledgeConfiguration');
|
||||
const useRaptor = useWatch({ name: UseRaptorField });
|
||||
useEffect(() => {
|
||||
if (useRaptor) {
|
||||
|
||||
const changeRaptor = useCallback(
|
||||
(isUseRaptor: boolean) => {
|
||||
if (isUseRaptor) {
|
||||
form.setValue(MaxTokenField, 256);
|
||||
form.setValue(ThresholdField, 0.1);
|
||||
form.setValue(MaxCluster, 64);
|
||||
form.setValue(RandomSeedField, 0);
|
||||
form.setValue(Prompt, t('promptText'));
|
||||
}
|
||||
}, [form, useRaptor, t]);
|
||||
},
|
||||
[form],
|
||||
);
|
||||
|
||||
const handleGenerate = useCallback(() => {
|
||||
form.setValue(RandomSeedField, random(10000));
|
||||
@ -97,7 +101,10 @@ const RaptorFormFields = () => {
|
||||
<FormControl>
|
||||
<Switch
|
||||
checked={field.value}
|
||||
onCheckedChange={field.onChange}
|
||||
onCheckedChange={(e) => {
|
||||
changeRaptor(e);
|
||||
field.onChange(e);
|
||||
}}
|
||||
></Switch>
|
||||
</FormControl>
|
||||
</div>
|
||||
@ -127,7 +134,13 @@ const RaptorFormFields = () => {
|
||||
</FormLabel>
|
||||
<div className="w-3/4">
|
||||
<FormControl>
|
||||
<Textarea {...field} rows={8} />
|
||||
<Textarea
|
||||
{...field}
|
||||
rows={8}
|
||||
onChange={(e) => {
|
||||
field.onChange(e?.target?.value);
|
||||
}}
|
||||
/>
|
||||
</FormControl>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -209,10 +209,16 @@ export const MultiSelect = React.forwardRef<
|
||||
const [isAnimating, setIsAnimating] = React.useState(false);
|
||||
|
||||
React.useEffect(() => {
|
||||
if (selectedValues === undefined) {
|
||||
if (!selectedValues && props.value) {
|
||||
setSelectedValues(props.value as string[]);
|
||||
}
|
||||
}, [props.value, selectedValues]);
|
||||
|
||||
React.useEffect(() => {
|
||||
if (!selectedValues && !props.value && defaultValue) {
|
||||
setSelectedValues(defaultValue);
|
||||
}
|
||||
}, [defaultValue, selectedValues]);
|
||||
}, [defaultValue, props.value, selectedValues]);
|
||||
|
||||
const flatOptions = React.useMemo(() => {
|
||||
return options.flatMap((option) =>
|
||||
@ -293,15 +299,18 @@ export const MultiSelect = React.forwardRef<
|
||||
variant="secondary"
|
||||
className={cn(
|
||||
isAnimating ? 'animate-bounce' : '',
|
||||
'px-1',
|
||||
multiSelectVariants({ variant }),
|
||||
)}
|
||||
style={{ animationDuration: `${animation}s` }}
|
||||
>
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="flex justify-between items-center gap-1">
|
||||
{IconComponent && (
|
||||
<IconComponent className="h-4 w-4" />
|
||||
)}
|
||||
<div>{option?.label}</div>
|
||||
<div className="max-w-28 text-ellipsis overflow-hidden">
|
||||
{option?.label}
|
||||
</div>
|
||||
<XCircle
|
||||
className="h-4 w-4 cursor-pointer"
|
||||
onClick={(event) => {
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
export default {
|
||||
translation: {
|
||||
common: {
|
||||
selectPlaceholder: 'select value',
|
||||
delete: 'Delete',
|
||||
deleteModalTitle: 'Are you sure to delete this item?',
|
||||
ok: 'Yes',
|
||||
@ -94,6 +95,16 @@ export default {
|
||||
noMoreData: `That's all. Nothing more.`,
|
||||
},
|
||||
knowledgeDetails: {
|
||||
created: 'Created',
|
||||
learnMore: 'Learn More',
|
||||
general: 'General',
|
||||
chunkMethodTab: 'Chunk Method',
|
||||
testResults: 'Test Results',
|
||||
testSetting: 'Test Setting',
|
||||
retrievalTesting: 'Retrieval Testing',
|
||||
retrievalTestingDescription:
|
||||
'Conduct a retrieval test to check if RAGFlow can recover the intended content for the LLM.',
|
||||
Parse: 'Parse',
|
||||
dataset: 'Dataset',
|
||||
testing: 'Retrieval testing',
|
||||
files: 'files',
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
export default {
|
||||
translation: {
|
||||
common: {
|
||||
selectPlaceholder: '请选择',
|
||||
delete: '删除',
|
||||
deleteModalTitle: '确定删除吗?',
|
||||
ok: '是',
|
||||
@ -86,6 +87,16 @@ export default {
|
||||
noMoreData: '没有更多数据了',
|
||||
},
|
||||
knowledgeDetails: {
|
||||
created: '创建于',
|
||||
learnMore: '了解更多',
|
||||
general: '通用',
|
||||
chunkMethodTab: '切片方法',
|
||||
testResults: '测试结果',
|
||||
testSetting: '测试设置',
|
||||
retrievalTesting: '知识检索测试',
|
||||
retrievalTestingDescription:
|
||||
'进行检索测试,检查 RAGFlow 是否能够为大语言模型(LLM)恢复预期的内容。',
|
||||
Parse: '解析',
|
||||
dataset: '数据集',
|
||||
testing: '检索测试',
|
||||
configuration: '配置',
|
||||
|
||||
@ -30,7 +30,9 @@ export default function DatasetWrapper() {
|
||||
</BreadcrumbItem>
|
||||
<BreadcrumbSeparator />
|
||||
<BreadcrumbItem>
|
||||
<BreadcrumbPage>{data.name}</BreadcrumbPage>
|
||||
<BreadcrumbPage className="w-28 whitespace-nowrap text-ellipsis overflow-hidden">
|
||||
{data.name}
|
||||
</BreadcrumbPage>
|
||||
</BreadcrumbItem>
|
||||
</BreadcrumbList>
|
||||
</Breadcrumb>
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { t } from 'i18next';
|
||||
import { X } from 'lucide-react';
|
||||
import { useState } from 'react';
|
||||
import CategoryPanel from './category-panel';
|
||||
@ -26,7 +27,7 @@ export default ({
|
||||
setVisible(!visible);
|
||||
}}
|
||||
>
|
||||
Learn More
|
||||
{t('knowledgeDetails.learnMore')}
|
||||
</Button>
|
||||
</div>
|
||||
<div
|
||||
|
||||
@ -29,7 +29,7 @@ export function ChunkMethodItem() {
|
||||
<div className="flex items-center">
|
||||
<FormLabel
|
||||
tooltip={t('chunkMethodTip')}
|
||||
className="text-sm text-muted-foreground whitespace-nowrap w-1/4"
|
||||
className="text-sm text-muted-foreground whitespace-wrap w-1/4"
|
||||
>
|
||||
{t('chunkMethod')}
|
||||
</FormLabel>
|
||||
@ -69,7 +69,7 @@ export function EmbeddingModelItem() {
|
||||
<div className="flex items-center">
|
||||
<FormLabel
|
||||
tooltip={t('embeddingModelTip')}
|
||||
className="text-sm text-muted-foreground whitespace-nowrap w-1/4"
|
||||
className="text-sm text-muted-foreground whitespace-wrap w-1/4"
|
||||
>
|
||||
{t('embeddingModel')}
|
||||
</FormLabel>
|
||||
|
||||
@ -107,7 +107,7 @@ export default function DatasetSettings() {
|
||||
>
|
||||
<div className="flex w-full h-full justify-center items-center">
|
||||
<span className="h-full group-data-[state=active]:border-b-2 border-foreground ">
|
||||
General
|
||||
{t('knowledgeDetails.general')}
|
||||
</span>
|
||||
</div>
|
||||
</TabsTrigger>
|
||||
@ -117,7 +117,7 @@ export default function DatasetSettings() {
|
||||
>
|
||||
<div className="flex w-full h-full justify-center items-center">
|
||||
<span className="h-full group-data-[state=active]:border-b-2 border-foreground ">
|
||||
Chunk Method
|
||||
{t('knowledgeDetails.chunkMethodTab')}
|
||||
</span>
|
||||
</div>
|
||||
</TabsTrigger>
|
||||
|
||||
@ -67,10 +67,14 @@ export function SideBar({ refreshCount }: PropType) {
|
||||
{data.name}
|
||||
</h3>
|
||||
<div className="flex justify-between">
|
||||
<span>{data.doc_num} files</span>
|
||||
<span>
|
||||
{data.doc_num} {t('knowledgeDetails.files')}
|
||||
</span>
|
||||
<span>{formatBytes(data.size)}</span>
|
||||
</div>
|
||||
<div>Created {formatPureDate(data.create_time)}</div>
|
||||
<div>
|
||||
{t('knowledgeDetails.created')} {formatPureDate(data.create_time)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import { useTestRetrieval } from '@/hooks/use-knowledge-request';
|
||||
import { t } from 'i18next';
|
||||
import { useState } from 'react';
|
||||
import { TopTitle } from '../dataset-title';
|
||||
import TestingForm from './testing-form';
|
||||
@ -23,8 +24,8 @@ export default function RetrievalTesting() {
|
||||
<div className="p-5">
|
||||
<section className="flex justify-between items-center">
|
||||
<TopTitle
|
||||
title={'Retrieval testing'}
|
||||
description={`Conduct a retrieval test to check if RAGFlow can recover the intended content for the LLM.`}
|
||||
title={t('knowledgeDetails.retrievalTesting')}
|
||||
description={t('knowledgeDetails.retrievalTestingDescription')}
|
||||
></TopTitle>
|
||||
{/* <Button>Save as Preset</Button> */}
|
||||
</section>
|
||||
@ -33,7 +34,7 @@ export default function RetrievalTesting() {
|
||||
<div className="p-4 flex-1">
|
||||
<div className="flex justify-between pb-2.5">
|
||||
<span className="text-text-primary font-semibold text-2xl">
|
||||
Test setting
|
||||
{t('knowledgeDetails.testSetting')}
|
||||
</span>
|
||||
{/* <Button variant={'outline'} onClick={addCount}>
|
||||
<Plus /> Add New Test
|
||||
|
||||
@ -6,6 +6,7 @@ import { RAGFlowPagination } from '@/components/ui/ragflow-pagination';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { useTestRetrieval } from '@/hooks/use-knowledge-request';
|
||||
import { ITestingChunk } from '@/interfaces/database/knowledge';
|
||||
import { t } from 'i18next';
|
||||
import camelCase from 'lodash/camelCase';
|
||||
import { useMemo } from 'react';
|
||||
|
||||
@ -66,7 +67,7 @@ export function TestingResult({
|
||||
<div className="p-4 flex-1">
|
||||
<div className="flex justify-between pb-2.5">
|
||||
<span className="text-text-primary font-semibold text-2xl">
|
||||
Test results
|
||||
{t('knowledgeDetails.testResults')}
|
||||
</span>
|
||||
<FilterPopover
|
||||
filters={filters}
|
||||
|
||||
@ -39,7 +39,7 @@ export function SeeAllCard() {
|
||||
const { navigateToDatasetList } = useNavigatePage();
|
||||
|
||||
return (
|
||||
<Card className="w-40 flex-none" onClick={navigateToDatasetList}>
|
||||
<Card className="w-40 flex-none h-full" onClick={navigateToDatasetList}>
|
||||
<CardContent className="p-2.5 pt-1 w-full h-full flex items-center justify-center gap-1.5 text-text-secondary">
|
||||
See All <ChevronRight className="size-4" />
|
||||
</CardContent>
|
||||
|
||||
@ -31,16 +31,20 @@ export function Datasets() {
|
||||
</div>
|
||||
) : (
|
||||
<div className="grid gap-6 sm:grid-cols-1 md:grid-cols-2 lg:grid-cols-4 xl:grid-cols-6 2xl:grid-cols-8 max-h-[78vh] overflow-auto">
|
||||
{kbs.slice(0, 6).map((dataset) => (
|
||||
{kbs
|
||||
?.slice(0, 6)
|
||||
.map((dataset) => (
|
||||
<DatasetCard
|
||||
key={dataset.id}
|
||||
dataset={dataset}
|
||||
showDatasetRenameModal={showDatasetRenameModal}
|
||||
></DatasetCard>
|
||||
))}
|
||||
<div className="min-h-24">
|
||||
<SeeAllCard></SeeAllCard>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
<SeeAllCard></SeeAllCard>
|
||||
</div>
|
||||
{datasetRenameVisible && (
|
||||
<RenameDialog
|
||||
|
||||
@ -9,6 +9,7 @@ import {
|
||||
setLLMSettingEnabledValues,
|
||||
} from '@/utils/form';
|
||||
import { zodResolver } from '@hookform/resolvers/zod';
|
||||
import { omit } from 'lodash';
|
||||
import { X } from 'lucide-react';
|
||||
import { useEffect } from 'react';
|
||||
import { useForm } from 'react-hook-form';
|
||||
@ -69,7 +70,7 @@ export function ChatSettings({ switchSettingVisible }: ChatSettingsProps) {
|
||||
? await transformFile2Base64(icon[0])
|
||||
: '';
|
||||
setDialog({
|
||||
...data,
|
||||
...omit(data, 'operator_permission'),
|
||||
...nextValues,
|
||||
icon: avatar,
|
||||
dialog_id: id,
|
||||
|
||||
@ -48,7 +48,10 @@ export const useRenameChat = () => {
|
||||
const nextChat = {
|
||||
...(isEmpty(chat)
|
||||
? InitialData
|
||||
: { ...omit(chat, 'nickname', 'tenant_avatar'), dialog_id: chat.id }),
|
||||
: {
|
||||
...omit(chat, 'nickname', 'tenant_avatar', 'operator_permission'),
|
||||
dialog_id: chat.id,
|
||||
}),
|
||||
name,
|
||||
};
|
||||
const ret = await setDialog(nextChat);
|
||||
|
||||
Reference in New Issue
Block a user