mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: duplicated role... (#9622)
### What problem does this PR solve? #9611 #9603 #9597 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -426,7 +426,7 @@ class Canvas:
|
|||||||
convs = []
|
convs = []
|
||||||
if window_size <= 0:
|
if window_size <= 0:
|
||||||
return convs
|
return convs
|
||||||
for role, obj in self.history[window_size * -1:]:
|
for role, obj in self.history[window_size * -2:]:
|
||||||
if isinstance(obj, dict):
|
if isinstance(obj, dict):
|
||||||
convs.append({"role": role, "content": obj.get("content", "")})
|
convs.append({"role": role, "content": obj.get("content", "")})
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -36,7 +36,7 @@ _IS_RAW_CONF = "_is_raw_conf"
|
|||||||
|
|
||||||
class ComponentParamBase(ABC):
|
class ComponentParamBase(ABC):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.message_history_window_size = 22
|
self.message_history_window_size = 13
|
||||||
self.inputs = {}
|
self.inputs = {}
|
||||||
self.outputs = {}
|
self.outputs = {}
|
||||||
self.description = ""
|
self.description = ""
|
||||||
|
|||||||
@ -18,11 +18,8 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from typing import Any, Generator
|
from typing import Any, Generator
|
||||||
|
|
||||||
import json_repair
|
import json_repair
|
||||||
from copy import deepcopy
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from api.db import LLMType
|
from api.db import LLMType
|
||||||
from api.db.services.llm_service import LLMBundle
|
from api.db.services.llm_service import LLMBundle
|
||||||
from api.db.services.tenant_llm_service import TenantLLMService
|
from api.db.services.tenant_llm_service import TenantLLMService
|
||||||
@ -130,7 +127,7 @@ class LLM(ComponentBase):
|
|||||||
|
|
||||||
args = {}
|
args = {}
|
||||||
vars = self.get_input_elements() if not self._param.debug_inputs else self._param.debug_inputs
|
vars = self.get_input_elements() if not self._param.debug_inputs else self._param.debug_inputs
|
||||||
prompt = self._param.sys_prompt
|
sys_prompt = self._param.sys_prompt
|
||||||
for k, o in vars.items():
|
for k, o in vars.items():
|
||||||
args[k] = o["value"]
|
args[k] = o["value"]
|
||||||
if not isinstance(args[k], str):
|
if not isinstance(args[k], str):
|
||||||
@ -141,14 +138,18 @@ class LLM(ComponentBase):
|
|||||||
self.set_input_value(k, args[k])
|
self.set_input_value(k, args[k])
|
||||||
|
|
||||||
msg = self._canvas.get_history(self._param.message_history_window_size)[:-1]
|
msg = self._canvas.get_history(self._param.message_history_window_size)[:-1]
|
||||||
msg.extend(deepcopy(self._param.prompts))
|
for p in self._param.prompts:
|
||||||
prompt = self.string_format(prompt, args)
|
if msg and msg[-1]["role"] == p["role"]:
|
||||||
|
continue
|
||||||
|
msg.append(p)
|
||||||
|
|
||||||
|
sys_prompt = self.string_format(sys_prompt, args)
|
||||||
for m in msg:
|
for m in msg:
|
||||||
m["content"] = self.string_format(m["content"], args)
|
m["content"] = self.string_format(m["content"], args)
|
||||||
if self._param.cite and self._canvas.get_reference()["chunks"]:
|
if self._param.cite and self._canvas.get_reference()["chunks"]:
|
||||||
prompt += citation_prompt()
|
sys_prompt += citation_prompt()
|
||||||
|
|
||||||
return prompt, msg
|
return sys_prompt, msg
|
||||||
|
|
||||||
def _generate(self, msg:list[dict], **kwargs) -> str:
|
def _generate(self, msg:list[dict], **kwargs) -> str:
|
||||||
if not self.imgs:
|
if not self.imgs:
|
||||||
|
|||||||
@ -44,9 +44,6 @@ def retrieval(tenant_id):
|
|||||||
if not e:
|
if not e:
|
||||||
return build_error_result(message="Knowledgebase not found!", code=settings.RetCode.NOT_FOUND)
|
return build_error_result(message="Knowledgebase not found!", code=settings.RetCode.NOT_FOUND)
|
||||||
|
|
||||||
if kb.tenant_id != tenant_id:
|
|
||||||
return build_error_result(message="Knowledgebase not found!", code=settings.RetCode.NOT_FOUND)
|
|
||||||
|
|
||||||
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
|
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
|
||||||
|
|
||||||
ranks = settings.retrievaler.retrieval(
|
ranks = settings.retrievaler.retrieval(
|
||||||
|
|||||||
@ -169,7 +169,7 @@ class EntityResolution(Extractor):
|
|||||||
logging.info(f"Created resolution prompt {len(text)} bytes for {len(candidate_resolution_i[1])} entity pairs of type {candidate_resolution_i[0]}")
|
logging.info(f"Created resolution prompt {len(text)} bytes for {len(candidate_resolution_i[1])} entity pairs of type {candidate_resolution_i[0]}")
|
||||||
async with chat_limiter:
|
async with chat_limiter:
|
||||||
try:
|
try:
|
||||||
with trio.move_on_after(240) as cancel_scope:
|
with trio.move_on_after(280) as cancel_scope:
|
||||||
response = await trio.to_thread.run_sync(self._chat, text, [{"role": "user", "content": "Output:"}], {})
|
response = await trio.to_thread.run_sync(self._chat, text, [{"role": "user", "content": "Output:"}], {})
|
||||||
if cancel_scope.cancelled_caught:
|
if cancel_scope.cancelled_caught:
|
||||||
logging.warning("_resolve_candidate._chat timeout, skipping...")
|
logging.warning("_resolve_candidate._chat timeout, skipping...")
|
||||||
|
|||||||
@ -47,7 +47,7 @@ class Extractor:
|
|||||||
self._language = language
|
self._language = language
|
||||||
self._entity_types = entity_types or DEFAULT_ENTITY_TYPES
|
self._entity_types = entity_types or DEFAULT_ENTITY_TYPES
|
||||||
|
|
||||||
@timeout(60*5)
|
@timeout(60*20)
|
||||||
def _chat(self, system, history, gen_conf={}):
|
def _chat(self, system, history, gen_conf={}):
|
||||||
hist = deepcopy(history)
|
hist = deepcopy(history)
|
||||||
conf = deepcopy(gen_conf)
|
conf = deepcopy(gen_conf)
|
||||||
|
|||||||
@ -42,7 +42,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
|
|||||||
self._prompt = prompt
|
self._prompt = prompt
|
||||||
self._max_token = max_token
|
self._max_token = max_token
|
||||||
|
|
||||||
@timeout(60*3)
|
@timeout(60*20)
|
||||||
async def _chat(self, system, history, gen_conf):
|
async def _chat(self, system, history, gen_conf):
|
||||||
response = get_llm_cache(self._llm_model.llm_name, system, history, gen_conf)
|
response = get_llm_cache(self._llm_model.llm_name, system, history, gen_conf)
|
||||||
if response:
|
if response:
|
||||||
@ -56,7 +56,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
|
|||||||
set_llm_cache(self._llm_model.llm_name, system, response, history, gen_conf)
|
set_llm_cache(self._llm_model.llm_name, system, response, history, gen_conf)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
@timeout(2)
|
@timeout(20)
|
||||||
async def _embedding_encode(self, txt):
|
async def _embedding_encode(self, txt):
|
||||||
response = get_embed_cache(self._embd_model.llm_name, txt)
|
response = get_embed_cache(self._embd_model.llm_name, txt)
|
||||||
if response is not None:
|
if response is not None:
|
||||||
@ -86,7 +86,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
|
|||||||
layers = [(0, len(chunks))]
|
layers = [(0, len(chunks))]
|
||||||
start, end = 0, len(chunks)
|
start, end = 0, len(chunks)
|
||||||
|
|
||||||
@timeout(60*3)
|
@timeout(60*20)
|
||||||
async def summarize(ck_idx: list[int]):
|
async def summarize(ck_idx: list[int]):
|
||||||
nonlocal chunks
|
nonlocal chunks
|
||||||
texts = [chunks[i][0] for i in ck_idx]
|
texts = [chunks[i][0] for i in ck_idx]
|
||||||
|
|||||||
Reference in New Issue
Block a user