diff --git a/agent/canvas.py b/agent/canvas.py index 78606fcd0..eb31aadf1 100644 --- a/agent/canvas.py +++ b/agent/canvas.py @@ -426,7 +426,7 @@ class Canvas: convs = [] if window_size <= 0: return convs - for role, obj in self.history[window_size * -1:]: + for role, obj in self.history[window_size * -2:]: if isinstance(obj, dict): convs.append({"role": role, "content": obj.get("content", "")}) else: diff --git a/agent/component/base.py b/agent/component/base.py index 529c1de56..3471a4631 100644 --- a/agent/component/base.py +++ b/agent/component/base.py @@ -36,7 +36,7 @@ _IS_RAW_CONF = "_is_raw_conf" class ComponentParamBase(ABC): def __init__(self): - self.message_history_window_size = 22 + self.message_history_window_size = 13 self.inputs = {} self.outputs = {} self.description = "" diff --git a/agent/component/llm.py b/agent/component/llm.py index 963a7e9f0..b68f6abe1 100644 --- a/agent/component/llm.py +++ b/agent/component/llm.py @@ -18,11 +18,8 @@ import logging import os import re from typing import Any, Generator - import json_repair -from copy import deepcopy from functools import partial - from api.db import LLMType from api.db.services.llm_service import LLMBundle from api.db.services.tenant_llm_service import TenantLLMService @@ -130,7 +127,7 @@ class LLM(ComponentBase): args = {} vars = self.get_input_elements() if not self._param.debug_inputs else self._param.debug_inputs - prompt = self._param.sys_prompt + sys_prompt = self._param.sys_prompt for k, o in vars.items(): args[k] = o["value"] if not isinstance(args[k], str): @@ -141,14 +138,18 @@ class LLM(ComponentBase): self.set_input_value(k, args[k]) msg = self._canvas.get_history(self._param.message_history_window_size)[:-1] - msg.extend(deepcopy(self._param.prompts)) - prompt = self.string_format(prompt, args) + for p in self._param.prompts: + if msg and msg[-1]["role"] == p["role"]: + continue + msg.append(p) + + sys_prompt = self.string_format(sys_prompt, args) for m in msg: m["content"] = self.string_format(m["content"], args) if self._param.cite and self._canvas.get_reference()["chunks"]: - prompt += citation_prompt() + sys_prompt += citation_prompt() - return prompt, msg + return sys_prompt, msg def _generate(self, msg:list[dict], **kwargs) -> str: if not self.imgs: diff --git a/api/apps/sdk/dify_retrieval.py b/api/apps/sdk/dify_retrieval.py index cd119a7b1..ca95d5ab7 100644 --- a/api/apps/sdk/dify_retrieval.py +++ b/api/apps/sdk/dify_retrieval.py @@ -44,9 +44,6 @@ def retrieval(tenant_id): if not e: return build_error_result(message="Knowledgebase not found!", code=settings.RetCode.NOT_FOUND) - if kb.tenant_id != tenant_id: - return build_error_result(message="Knowledgebase not found!", code=settings.RetCode.NOT_FOUND) - embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id) ranks = settings.retrievaler.retrieval( diff --git a/graphrag/entity_resolution.py b/graphrag/entity_resolution.py index c324492fe..9cdb16bb9 100644 --- a/graphrag/entity_resolution.py +++ b/graphrag/entity_resolution.py @@ -169,7 +169,7 @@ class EntityResolution(Extractor): logging.info(f"Created resolution prompt {len(text)} bytes for {len(candidate_resolution_i[1])} entity pairs of type {candidate_resolution_i[0]}") async with chat_limiter: try: - with trio.move_on_after(240) as cancel_scope: + with trio.move_on_after(280) as cancel_scope: response = await trio.to_thread.run_sync(self._chat, text, [{"role": "user", "content": "Output:"}], {}) if cancel_scope.cancelled_caught: logging.warning("_resolve_candidate._chat timeout, skipping...") diff --git a/graphrag/general/extractor.py b/graphrag/general/extractor.py index a49074c90..8a8655308 100644 --- a/graphrag/general/extractor.py +++ b/graphrag/general/extractor.py @@ -47,7 +47,7 @@ class Extractor: self._language = language self._entity_types = entity_types or DEFAULT_ENTITY_TYPES - @timeout(60*5) + @timeout(60*20) def _chat(self, system, history, gen_conf={}): hist = deepcopy(history) conf = deepcopy(gen_conf) diff --git a/rag/raptor.py b/rag/raptor.py index f2fbecd72..9e0a8ad97 100644 --- a/rag/raptor.py +++ b/rag/raptor.py @@ -42,7 +42,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval: self._prompt = prompt self._max_token = max_token - @timeout(60*3) + @timeout(60*20) async def _chat(self, system, history, gen_conf): response = get_llm_cache(self._llm_model.llm_name, system, history, gen_conf) if response: @@ -56,7 +56,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval: set_llm_cache(self._llm_model.llm_name, system, response, history, gen_conf) return response - @timeout(2) + @timeout(20) async def _embedding_encode(self, txt): response = get_embed_cache(self._embd_model.llm_name, txt) if response is not None: @@ -86,7 +86,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval: layers = [(0, len(chunks))] start, end = 0, len(chunks) - @timeout(60*3) + @timeout(60*20) async def summarize(ck_idx: list[int]): nonlocal chunks texts = [chunks[i][0] for i in ck_idx]