diff --git a/agent/canvas.py b/agent/canvas.py index 572b68e5c..cdb9233c4 100644 --- a/agent/canvas.py +++ b/agent/canvas.py @@ -456,6 +456,7 @@ class Canvas(Graph): for c in path: o = self.get_component_obj(c) if o.component_name.lower() == "userfillup": + o.invoke() another_inputs.update(o.get_input_elements()) if o.get_param("enable_tips"): tips = o.output("tips") diff --git a/agent/component/fillup.py b/agent/component/fillup.py index 60009c101..7d27280c5 100644 --- a/agent/component/fillup.py +++ b/agent/component/fillup.py @@ -13,10 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from agent.component.message import MessageParam, Message +import json +import re +from functools import partial + +from agent.component.base import ComponentParamBase, ComponentBase -class UserFillUpParam(MessageParam): +class UserFillUpParam(ComponentParamBase): def __init__(self): super().__init__() @@ -27,13 +31,32 @@ class UserFillUpParam(MessageParam): return True -class UserFillUp(Message): +class UserFillUp(ComponentBase): component_name = "UserFillUp" def _invoke(self, **kwargs): if self._param.enable_tips: - tips, kwargs = self.get_kwargs(self._param.tips) - self.set_output("tips", tips) + content = self._param.tips + for k, v in self.get_input_elements_from_text(self._param.tips).items(): + v = v["value"] + ans = "" + if isinstance(v, partial): + for t in v(): + ans += t + elif isinstance(v, list): + ans = ",".join([str(vv) for vv in v]) + elif not isinstance(v, str): + try: + ans = json.dumps(v, ensure_ascii=False) + except Exception: + pass + else: + ans = v + if not ans: + ans = "" + content = re.sub(r"\{%s\}"%k, ans, content) + + self.set_output("tips", content) for k, v in kwargs.get("inputs", {}).items(): self.set_output(k, v) diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py index f7b385c11..7094c28d7 100644 --- a/api/apps/kb_app.py +++ b/api/apps/kb_app.py @@ -127,6 +127,7 @@ def update(): logging.error("Link KB errors: ", errors) kb = kb.to_dict() kb.update(req) + kb["connectors"] = connectors return get_json_result(data=kb) except Exception as e: diff --git a/api/db/services/connector_service.py b/api/db/services/connector_service.py index c6188c490..c87f6056c 100644 --- a/api/db/services/connector_service.py +++ b/api/db/services/connector_service.py @@ -236,6 +236,7 @@ class Connector2KbService(CommonService): conn_id = conn["id"] connector_ids.append(conn_id) if conn_id in old_conn_ids: + cls.update_by_id(conn_id, {"auto_parse": conn.get("auto_parse", "1")}) continue cls.save(**{ "id": get_uuid(), diff --git a/conf/llm_factories.json b/conf/llm_factories.json index 41ebf096c..702f90fd2 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -4549,7 +4549,7 @@ ] }, { - "name": "Meituan", + "name": "LongCat", "logo": "", "tags": "LLM", "status": "1", diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py index 8b6d40fdd..4d3d5fe82 100644 --- a/rag/llm/__init__.py +++ b/rag/llm/__init__.py @@ -37,7 +37,7 @@ class SupportedLiteLLMProvider(StrEnum): TogetherAI = "TogetherAI" Anthropic = "Anthropic" Ollama = "Ollama" - Meituan = "Meituan" + LongCat = "LongCat" CometAPI = "CometAPI" SILICONFLOW = "SILICONFLOW" OpenRouter = "OpenRouter" @@ -56,7 +56,7 @@ FACTORY_DEFAULT_BASE_URL = { SupportedLiteLLMProvider.Dashscope: "https://dashscope.aliyuncs.com/compatible-mode/v1", SupportedLiteLLMProvider.Moonshot: "https://api.moonshot.cn/v1", SupportedLiteLLMProvider.Ollama: "", - SupportedLiteLLMProvider.Meituan: "https://api.longcat.chat/openai", + SupportedLiteLLMProvider.LongCat: "https://api.longcat.chat/openai", SupportedLiteLLMProvider.CometAPI: "https://api.cometapi.com/v1", SupportedLiteLLMProvider.SILICONFLOW: "https://api.siliconflow.cn/v1", SupportedLiteLLMProvider.OpenRouter: "https://openrouter.ai/api/v1", @@ -87,7 +87,7 @@ LITELLM_PROVIDER_PREFIX = { SupportedLiteLLMProvider.TogetherAI: "together_ai/", SupportedLiteLLMProvider.Anthropic: "", # don't need a prefix SupportedLiteLLMProvider.Ollama: "ollama_chat/", - SupportedLiteLLMProvider.Meituan: "openai/", + SupportedLiteLLMProvider.LongCat: "openai/", SupportedLiteLLMProvider.CometAPI: "openai/", SupportedLiteLLMProvider.SILICONFLOW: "openai/", SupportedLiteLLMProvider.OpenRouter: "openai/", diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index e938ef844..17ddbc138 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -1390,7 +1390,7 @@ class LiteLLMBase(ABC): "TogetherAI", "Anthropic", "Ollama", - "Meituan", + "LongCat", "CometAPI", "SILICONFLOW", "OpenRouter", diff --git a/rag/raptor.py b/rag/raptor.py index 6c7b5f2f5..5dfe33dd3 100644 --- a/rag/raptor.py +++ b/rag/raptor.py @@ -97,7 +97,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval: async def __call__(self, chunks, random_state, callback=None, task_id: str = ""): if len(chunks) <= 1: return [] - chunks = [(s, a) for s, a in chunks if s and len(a) > 0] + chunks = [(s, a) for s, a in chunks if s and a and len(a) > 0] layers = [(0, len(chunks))] start, end = 0, len(chunks) diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index 73b61cd4e..44f29162c 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -642,47 +642,64 @@ async def run_raptor_for_kb(row, kb_parser_config, chat_mdl, embd_mdl, vector_si fake_doc_id = GRAPH_RAPTOR_FAKE_DOC_ID raptor_config = kb_parser_config.get("raptor", {}) - - chunks = [] vctr_nm = "q_%d_vec"%vector_size - for doc_id in doc_ids: - for d in settings.retriever.chunk_list(doc_id, row["tenant_id"], [str(row["kb_id"])], - fields=["content_with_weight", vctr_nm], - sort_by_position=True): - chunks.append((d["content_with_weight"], np.array(d[vctr_nm]))) - raptor = Raptor( - raptor_config.get("max_cluster", 64), - chat_mdl, - embd_mdl, - raptor_config["prompt"], - raptor_config["max_token"], - raptor_config["threshold"], - ) - original_length = len(chunks) - chunks = await raptor(chunks, kb_parser_config["raptor"]["random_seed"], callback, row["id"]) - doc = { - "doc_id": fake_doc_id, - "kb_id": [str(row["kb_id"])], - "docnm_kwd": row["name"], - "title_tks": rag_tokenizer.tokenize(row["name"]), - "raptor_kwd": "raptor" - } - if row["pagerank"]: - doc[PAGERANK_FLD] = int(row["pagerank"]) res = [] tk_count = 0 - for content, vctr in chunks[original_length:]: - d = copy.deepcopy(doc) - d["id"] = xxhash.xxh64((content + str(fake_doc_id)).encode("utf-8")).hexdigest() - d["create_time"] = str(datetime.now()).replace("T", " ")[:19] - d["create_timestamp_flt"] = datetime.now().timestamp() - d[vctr_nm] = vctr.tolist() - d["content_with_weight"] = content - d["content_ltks"] = rag_tokenizer.tokenize(content) - d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) - res.append(d) - tk_count += num_tokens_from_string(content) + async def generate(chunks): + nonlocal tk_count, res + raptor = Raptor( + raptor_config.get("max_cluster", 64), + chat_mdl, + embd_mdl, + raptor_config["prompt"], + raptor_config["max_token"], + raptor_config["threshold"], + ) + original_length = len(chunks) + chunks = await raptor(chunks, kb_parser_config["raptor"]["random_seed"], callback, row["id"]) + doc = { + "doc_id": fake_doc_id, + "kb_id": [str(row["kb_id"])], + "docnm_kwd": row["name"], + "title_tks": rag_tokenizer.tokenize(row["name"]), + "raptor_kwd": "raptor" + } + if row["pagerank"]: + doc[PAGERANK_FLD] = int(row["pagerank"]) + + for content, vctr in chunks[original_length:]: + d = copy.deepcopy(doc) + d["id"] = xxhash.xxh64((content + str(fake_doc_id)).encode("utf-8")).hexdigest() + d["create_time"] = str(datetime.now()).replace("T", " ")[:19] + d["create_timestamp_flt"] = datetime.now().timestamp() + d[vctr_nm] = vctr.tolist() + d["content_with_weight"] = content + d["content_ltks"] = rag_tokenizer.tokenize(content) + d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) + res.append(d) + tk_count += num_tokens_from_string(content) + + if raptor_config.get("scope", "file") == "file": + for x, doc_id in enumerate(doc_ids): + chunks = [] + for d in settings.retriever.chunk_list(doc_id, row["tenant_id"], [str(row["kb_id"])], + fields=["content_with_weight", vctr_nm], + sort_by_position=True): + chunks.append((d["content_with_weight"], np.array(d[vctr_nm]))) + callback(progress=(x+1.)/len(doc_ids)) + await generate(chunks) + + else: + chunks = [] + for doc_id in doc_ids: + for d in settings.retriever.chunk_list(doc_id, row["tenant_id"], [str(row["kb_id"])], + fields=["content_with_weight", vctr_nm], + sort_by_position=True): + chunks.append((d["content_with_weight"], np.array(d[vctr_nm]))) + + await generate(chunks) + return res, tk_count @@ -795,6 +812,7 @@ async def do_handle_task(task): "threshold": 0.1, "max_cluster": 64, "random_seed": 0, + "scope": "file" }, } )