From 4eb76594998fc5dca56b1ad8379ac624e47eea05 Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Tue, 23 Sep 2025 10:19:25 +0800 Subject: [PATCH] Fix bug: broken import from rag.prompts.prompts (#10217) ### What problem does this PR solve? Fix broken imports ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Signed-off-by: jinhai --- agent/canvas.py | 2 +- agent/component/agent_with_tools.py | 5 +- agent/component/llm.py | 3 +- agent/tools/base.py | 2 +- agent/tools/retrieval.py | 3 +- api/apps/api_app.py | 2 +- api/apps/canvas_app.py | 4 +- api/apps/chunk_app.py | 3 +- api/apps/conversation_app.py | 4 +- api/apps/dataflow_app.py | 2 +- api/apps/sdk/doc.py | 2 +- api/apps/sdk/session.py | 5 +- api/db/services/conversation_service.py | 2 +- api/db/services/dialog_service.py | 68 ++++++++++--------- deepdoc/parser/figure_parser.py | 2 +- deepdoc/parser/pdf_parser.py | 2 +- graphrag/general/extractor.py | 2 +- rag/flow/chunker/chunker.py | 2 +- rag/llm/cv_model.py | 2 +- rag/prompts/__init__.py | 6 +- rag/prompts/{prompts.py => generator.py} | 2 +- .../{prompt_template.py => template.py} | 0 rag/svr/task_executor.py | 2 +- 23 files changed, 64 insertions(+), 63 deletions(-) rename rag/prompts/{prompts.py => generator.py} (99%) rename rag/prompts/{prompt_template.py => template.py} (100%) diff --git a/agent/canvas.py b/agent/canvas.py index cc6080e07..664a68ad1 100644 --- a/agent/canvas.py +++ b/agent/canvas.py @@ -27,7 +27,7 @@ from agent.component import component_class from agent.component.base import ComponentBase from api.db.services.file_service import FileService from api.utils import get_uuid, hash_str2int -from rag.prompts.prompts import chunks_format +from rag.prompts.generator import chunks_format from rag.utils.redis_conn import REDIS_CONN class Graph: diff --git a/agent/component/agent_with_tools.py b/agent/component/agent_with_tools.py index 6b57fa120..8a4319fd0 100644 --- a/agent/component/agent_with_tools.py +++ b/agent/component/agent_with_tools.py @@ -28,9 +28,8 @@ from api.db.services.llm_service import LLMBundle from api.db.services.tenant_llm_service import TenantLLMService from api.db.services.mcp_server_service import MCPServerService from api.utils.api_utils import timeout -from rag.prompts import message_fit_in -from rag.prompts.prompts import next_step, COMPLETE_TASK, analyze_task, \ - citation_prompt, reflect, rank_memories, kb_prompt, citation_plus, full_question +from rag.prompts.generator import next_step, COMPLETE_TASK, analyze_task, \ + citation_prompt, reflect, rank_memories, kb_prompt, citation_plus, full_question, message_fit_in from rag.utils.mcp_tool_call_conn import MCPToolCallSession, mcp_tool_metadata_to_openai_tool from agent.component.llm import LLMParam, LLM diff --git a/agent/component/llm.py b/agent/component/llm.py index b13c4a87e..4e4dbcfe6 100644 --- a/agent/component/llm.py +++ b/agent/component/llm.py @@ -26,8 +26,7 @@ from api.db.services.llm_service import LLMBundle from api.db.services.tenant_llm_service import TenantLLMService from agent.component.base import ComponentBase, ComponentParamBase from api.utils.api_utils import timeout -from rag.prompts import message_fit_in, citation_prompt -from rag.prompts.prompts import tool_call_summary +from rag.prompts.generator import tool_call_summary, message_fit_in, citation_prompt class LLMParam(ComponentParamBase): diff --git a/agent/tools/base.py b/agent/tools/base.py index 0d946a696..e775615ac 100644 --- a/agent/tools/base.py +++ b/agent/tools/base.py @@ -22,7 +22,7 @@ from typing import TypedDict, List, Any from agent.component.base import ComponentParamBase, ComponentBase from api.utils import hash_str2int from rag.llm.chat_model import ToolCallSession -from rag.prompts.prompts import kb_prompt +from rag.prompts.generator import kb_prompt from rag.utils.mcp_tool_call_conn import MCPToolCallSession from timeit import default_timer as timer diff --git a/agent/tools/retrieval.py b/agent/tools/retrieval.py index 156469811..d6b0213c1 100644 --- a/agent/tools/retrieval.py +++ b/agent/tools/retrieval.py @@ -23,8 +23,7 @@ from api.db.services.llm_service import LLMBundle from api import settings from api.utils.api_utils import timeout from rag.app.tag import label_question -from rag.prompts import kb_prompt -from rag.prompts.prompts import cross_languages +from rag.prompts.generator import cross_languages, kb_prompt class RetrievalParam(ToolParamBase): diff --git a/api/apps/api_app.py b/api/apps/api_app.py index 8a5b29166..1bdb7c2f8 100644 --- a/api/apps/api_app.py +++ b/api/apps/api_app.py @@ -39,7 +39,7 @@ from api.utils.api_utils import server_error_response, get_data_error_result, ge from api.utils.file_utils import filename_type, thumbnail from rag.app.tag import label_question -from rag.prompts import keyword_extraction +from rag.prompts.generator import keyword_extraction from rag.utils.storage_factory import STORAGE_IMPL from api.db.services.canvas_service import UserCanvasService diff --git a/api/apps/canvas_app.py b/api/apps/canvas_app.py index 81f67b098..340fddc10 100644 --- a/api/apps/canvas_app.py +++ b/api/apps/canvas_app.py @@ -23,7 +23,7 @@ import trio from flask import request, Response from flask_login import login_required, current_user -from agent.component import LLM +from agent.component.llm import LLM from api.db import CanvasCategory, FileType from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService, API4ConversationService from api.db.services.document_service import DocumentService @@ -474,7 +474,7 @@ def sessions(canvas_id): @manager.route('/prompts', methods=['GET']) # noqa: F821 @login_required def prompts(): - from rag.prompts.prompts import ANALYZE_TASK_SYSTEM, ANALYZE_TASK_USER, NEXT_STEP, REFLECT, CITATION_PROMPT_TEMPLATE + from rag.prompts.generator import ANALYZE_TASK_SYSTEM, ANALYZE_TASK_USER, NEXT_STEP, REFLECT, CITATION_PROMPT_TEMPLATE return get_json_result(data={ "task_analysis": ANALYZE_TASK_SYSTEM +"\n\n"+ ANALYZE_TASK_USER, "plan_generation": NEXT_STEP, diff --git a/api/apps/chunk_app.py b/api/apps/chunk_app.py index 9b4c341b6..bfd80ea9f 100644 --- a/api/apps/chunk_app.py +++ b/api/apps/chunk_app.py @@ -33,8 +33,7 @@ from api.utils.api_utils import get_data_error_result, get_json_result, server_e from rag.app.qa import beAdoc, rmPrefix from rag.app.tag import label_question from rag.nlp import rag_tokenizer, search -from rag.prompts import cross_languages, keyword_extraction -from rag.prompts.prompts import gen_meta_filter +from rag.prompts.generator import gen_meta_filter, cross_languages, keyword_extraction from rag.settings import PAGERANK_FLD from rag.utils import rmSpace diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py index 9ef4d6453..36543d4e2 100644 --- a/api/apps/conversation_app.py +++ b/api/apps/conversation_app.py @@ -29,8 +29,8 @@ from api.db.services.search_service import SearchService from api.db.services.tenant_llm_service import TenantLLMService from api.db.services.user_service import TenantService, UserTenantService from api.utils.api_utils import get_data_error_result, get_json_result, server_error_response, validate_request -from rag.prompts.prompt_template import load_prompt -from rag.prompts.prompts import chunks_format +from rag.prompts.template import load_prompt +from rag.prompts.generator import chunks_format @manager.route("/set", methods=["POST"]) # noqa: F821 diff --git a/api/apps/dataflow_app.py b/api/apps/dataflow_app.py index 49bc8687b..36ccf57a2 100644 --- a/api/apps/dataflow_app.py +++ b/api/apps/dataflow_app.py @@ -24,7 +24,7 @@ from flask import request from flask_login import current_user, login_required from agent.canvas import Canvas -from agent.component import LLM +from agent.component.llm import LLM from api.db import CanvasCategory, FileType from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService from api.db.services.document_service import DocumentService diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 5009b6fee..8d5a413b0 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -40,7 +40,7 @@ from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_ from rag.app.qa import beAdoc, rmPrefix from rag.app.tag import label_question from rag.nlp import rag_tokenizer, search -from rag.prompts import cross_languages, keyword_extraction +from rag.prompts.generator import cross_languages, keyword_extraction from rag.utils import rmSpace from rag.utils.storage_factory import STORAGE_IMPL diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index d9db95526..dc15c32d9 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -38,9 +38,8 @@ from api.db.services.user_service import UserTenantService from api.utils import get_uuid from api.utils.api_utils import check_duplicate_ids, get_data_openai, get_error_data_result, get_json_result, get_result, server_error_response, token_required, validate_request from rag.app.tag import label_question -from rag.prompts import chunks_format -from rag.prompts.prompt_template import load_prompt -from rag.prompts.prompts import cross_languages, gen_meta_filter, keyword_extraction +from rag.prompts.template import load_prompt +from rag.prompts.generator import cross_languages, gen_meta_filter, keyword_extraction, chunks_format @manager.route("/chats//sessions", methods=["POST"]) # noqa: F821 diff --git a/api/db/services/conversation_service.py b/api/db/services/conversation_service.py index 5e247c21c..26361fc7b 100644 --- a/api/db/services/conversation_service.py +++ b/api/db/services/conversation_service.py @@ -23,7 +23,7 @@ from api.db.services.dialog_service import DialogService, chat from api.utils import get_uuid import json -from rag.prompts import chunks_format +from rag.prompts.generator import chunks_format class ConversationService(CommonService): diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py index 3855c1ded..a51d6b925 100644 --- a/api/db/services/dialog_service.py +++ b/api/db/services/dialog_service.py @@ -39,8 +39,8 @@ from graphrag.general.mind_map_extractor import MindMapExtractor from rag.app.resume import forbidden_select_fields4resume from rag.app.tag import label_question from rag.nlp.search import index_name -from rag.prompts import chunks_format, citation_prompt, cross_languages, full_question, kb_prompt, keyword_extraction, message_fit_in -from rag.prompts.prompts import gen_meta_filter, PROMPT_JINJA_ENV, ASK_SUMMARY +from rag.prompts.generator import chunks_format, citation_prompt, cross_languages, full_question, kb_prompt, keyword_extraction, message_fit_in, \ + gen_meta_filter, PROMPT_JINJA_ENV, ASK_SUMMARY from rag.utils import num_tokens_from_string, rmSpace from rag.utils.tavily_conn import Tavily @@ -176,7 +176,7 @@ def chat_solo(dialog, messages, stream=True): delta_ans = "" for ans in chat_mdl.chat_streamly(prompt_config.get("system", ""), msg, dialog.llm_setting): answer = ans - delta_ans = ans[len(last_ans) :] + delta_ans = ans[len(last_ans):] if num_tokens_from_string(delta_ans) < 16: continue last_ans = answer @@ -261,13 +261,13 @@ def convert_conditions(metadata_condition): "not is": "≠" } return [ - { - "op": op_mapping.get(cond["comparison_operator"], cond["comparison_operator"]), - "key": cond["name"], - "value": cond["value"] - } - for cond in metadata_condition.get("conditions", []) -] + { + "op": op_mapping.get(cond["comparison_operator"], cond["comparison_operator"]), + "key": cond["name"], + "value": cond["value"] + } + for cond in metadata_condition.get("conditions", []) + ] def meta_filter(metas: dict, filters: list[dict]): @@ -284,19 +284,19 @@ def meta_filter(metas: dict, filters: list[dict]): value = str(value) for conds in [ - (operator == "contains", str(value).lower() in str(input).lower()), - (operator == "not contains", str(value).lower() not in str(input).lower()), - (operator == "start with", str(input).lower().startswith(str(value).lower())), - (operator == "end with", str(input).lower().endswith(str(value).lower())), - (operator == "empty", not input), - (operator == "not empty", input), - (operator == "=", input == value), - (operator == "≠", input != value), - (operator == ">", input > value), - (operator == "<", input < value), - (operator == "≥", input >= value), - (operator == "≤", input <= value), - ]: + (operator == "contains", str(value).lower() in str(input).lower()), + (operator == "not contains", str(value).lower() not in str(input).lower()), + (operator == "start with", str(input).lower().startswith(str(value).lower())), + (operator == "end with", str(input).lower().endswith(str(value).lower())), + (operator == "empty", not input), + (operator == "not empty", input), + (operator == "=", input == value), + (operator == "≠", input != value), + (operator == ">", input > value), + (operator == "<", input < value), + (operator == "≥", input >= value), + (operator == "≤", input <= value), + ]: try: if all(conds): ids.extend(docids) @@ -456,7 +456,8 @@ def chat(dialog, messages, stream=True, **kwargs): kbinfos["chunks"].extend(tav_res["chunks"]) kbinfos["doc_aggs"].extend(tav_res["doc_aggs"]) if prompt_config.get("use_kg"): - ck = settings.kg_retrievaler.retrieval(" ".join(questions), tenant_ids, dialog.kb_ids, embd_mdl, LLMBundle(dialog.tenant_id, LLMType.CHAT)) + ck = settings.kg_retrievaler.retrieval(" ".join(questions), tenant_ids, dialog.kb_ids, embd_mdl, + LLMBundle(dialog.tenant_id, LLMType.CHAT)) if ck["content_with_weight"]: kbinfos["chunks"].insert(0, ck) @@ -467,7 +468,8 @@ def chat(dialog, messages, stream=True, **kwargs): retrieval_ts = timer() if not knowledges and prompt_config.get("empty_response"): empty_res = prompt_config["empty_response"] - yield {"answer": empty_res, "reference": kbinfos, "prompt": "\n\n### Query:\n%s" % " ".join(questions), "audio_binary": tts(tts_mdl, empty_res)} + yield {"answer": empty_res, "reference": kbinfos, "prompt": "\n\n### Query:\n%s" % " ".join(questions), + "audio_binary": tts(tts_mdl, empty_res)} return {"answer": prompt_config["empty_response"], "reference": kbinfos} kwargs["knowledge"] = "\n------\n" + "\n\n------\n\n".join(knowledges) @@ -565,7 +567,8 @@ def chat(dialog, messages, stream=True, **kwargs): if langfuse_tracer: langfuse_generation = langfuse_tracer.start_generation( - trace_context=trace_context, name="chat", model=llm_model_config["llm_name"], input={"prompt": prompt, "prompt4citation": prompt4citation, "messages": msg} + trace_context=trace_context, name="chat", model=llm_model_config["llm_name"], + input={"prompt": prompt, "prompt4citation": prompt4citation, "messages": msg} ) if stream: @@ -575,12 +578,12 @@ def chat(dialog, messages, stream=True, **kwargs): if thought: ans = re.sub(r"^.*", "", ans, flags=re.DOTALL) answer = ans - delta_ans = ans[len(last_ans) :] + delta_ans = ans[len(last_ans):] if num_tokens_from_string(delta_ans) < 16: continue last_ans = answer yield {"answer": thought + answer, "reference": {}, "audio_binary": tts(tts_mdl, delta_ans)} - delta_ans = answer[len(last_ans) :] + delta_ans = answer[len(last_ans):] if delta_ans: yield {"answer": thought + answer, "reference": {}, "audio_binary": tts(tts_mdl, delta_ans)} yield decorate_answer(thought + answer) @@ -676,7 +679,9 @@ Please write the SQL, only SQL, without any other explanations or text. # compose Markdown table columns = ( - "|" + "|".join([re.sub(r"(/.*|([^()]+))", "", field_map.get(tbl["columns"][i]["name"], tbl["columns"][i]["name"])) for i in column_idx]) + ("|Source|" if docid_idx and docid_idx else "|") + "|" + "|".join( + [re.sub(r"(/.*|([^()]+))", "", field_map.get(tbl["columns"][i]["name"], tbl["columns"][i]["name"])) for i in column_idx]) + ( + "|Source|" if docid_idx and docid_idx else "|") ) line = "|" + "|".join(["------" for _ in range(len(column_idx))]) + ("|------|" if docid_idx and docid_idx else "") @@ -753,7 +758,7 @@ def ask(question, kb_ids, tenant_id, chat_llm_name=None, search_config={}): doc_ids = None kbinfos = retriever.retrieval( - question = question, + question=question, embd_mdl=embd_mdl, tenant_ids=tenant_ids, kb_ids=kb_ids, @@ -775,7 +780,8 @@ def ask(question, kb_ids, tenant_id, chat_llm_name=None, search_config={}): def decorate_answer(answer): nonlocal knowledges, kbinfos, sys_prompt - answer, idx = retriever.insert_citations(answer, [ck["content_ltks"] for ck in kbinfos["chunks"]], [ck["vector"] for ck in kbinfos["chunks"]], embd_mdl, tkweight=0.7, vtweight=0.3) + answer, idx = retriever.insert_citations(answer, [ck["content_ltks"] for ck in kbinfos["chunks"]], [ck["vector"] for ck in kbinfos["chunks"]], + embd_mdl, tkweight=0.7, vtweight=0.3) idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx]) recall_docs = [d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx] if not recall_docs: diff --git a/deepdoc/parser/figure_parser.py b/deepdoc/parser/figure_parser.py index 0ec315876..0274f549d 100644 --- a/deepdoc/parser/figure_parser.py +++ b/deepdoc/parser/figure_parser.py @@ -19,7 +19,7 @@ from PIL import Image from api.utils.api_utils import timeout from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk -from rag.prompts import vision_llm_figure_describe_prompt +from rag.prompts.generator import vision_llm_figure_describe_prompt def vision_figure_parser_figure_data_wrapper(figures_data_without_positions): diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index 6311ecc7f..09a41ae68 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -37,7 +37,7 @@ from api.utils.file_utils import get_project_base_directory from deepdoc.vision import OCR, AscendLayoutRecognizer, LayoutRecognizer, Recognizer, TableStructureRecognizer from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk from rag.nlp import rag_tokenizer -from rag.prompts import vision_llm_describe_prompt +from rag.prompts.generator import vision_llm_describe_prompt from rag.settings import PARALLEL_DEVICES LOCK_KEY_pdfplumber = "global_shared_lock_pdfplumber" diff --git a/graphrag/general/extractor.py b/graphrag/general/extractor.py index 61d89e27c..df8af1c8f 100644 --- a/graphrag/general/extractor.py +++ b/graphrag/general/extractor.py @@ -37,7 +37,7 @@ from graphrag.utils import ( split_string_by_multi_markers, ) from rag.llm.chat_model import Base as CompletionLLM -from rag.prompts import message_fit_in +from rag.prompts.generator import message_fit_in from rag.utils import truncate GRAPH_FIELD_SEP = "" diff --git a/rag/flow/chunker/chunker.py b/rag/flow/chunker/chunker.py index a8281c306..d7795aeb8 100644 --- a/rag/flow/chunker/chunker.py +++ b/rag/flow/chunker/chunker.py @@ -23,7 +23,7 @@ from graphrag.utils import chat_limiter, get_llm_cache, set_llm_cache from rag.flow.base import ProcessBase, ProcessParamBase from rag.flow.chunker.schema import ChunkerFromUpstream from rag.nlp import naive_merge, naive_merge_with_images -from rag.prompts.prompts import keyword_extraction, question_proposal +from rag.prompts.generator import keyword_extraction, question_proposal class ChunkerParam(ProcessParamBase): diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index e9ae73770..c14b9d8d4 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -25,7 +25,7 @@ from openai import OpenAI from openai.lib.azure import AzureOpenAI from zhipuai import ZhipuAI from rag.nlp import is_english -from rag.prompts import vision_llm_describe_prompt +from rag.prompts.generator import vision_llm_describe_prompt from rag.utils import num_tokens_from_string diff --git a/rag/prompts/__init__.py b/rag/prompts/__init__.py index f5616dddd..b8b924b93 100644 --- a/rag/prompts/__init__.py +++ b/rag/prompts/__init__.py @@ -1,6 +1,6 @@ -from . import prompts +from . import generator -__all__ = [name for name in dir(prompts) +__all__ = [name for name in dir(generator) if not name.startswith('_')] -globals().update({name: getattr(prompts, name) for name in __all__}) \ No newline at end of file +globals().update({name: getattr(generator, name) for name in __all__}) \ No newline at end of file diff --git a/rag/prompts/prompts.py b/rag/prompts/generator.py similarity index 99% rename from rag/prompts/prompts.py rename to rag/prompts/generator.py index 13ea801b0..8291b4e21 100644 --- a/rag/prompts/prompts.py +++ b/rag/prompts/generator.py @@ -22,7 +22,7 @@ from typing import Tuple import jinja2 import json_repair from api.utils import hash_str2int -from rag.prompts.prompt_template import load_prompt +from rag.prompts.template import load_prompt from rag.settings import TAG_FLD from rag.utils import encoder, num_tokens_from_string diff --git a/rag/prompts/prompt_template.py b/rag/prompts/template.py similarity index 100% rename from rag/prompts/prompt_template.py rename to rag/prompts/template.py diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index 84c73d2b6..ff89d2e55 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -27,7 +27,7 @@ from api.utils.log_utils import init_root_logger, get_project_base_directory from graphrag.general.index import run_graphrag from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache from rag.flow.pipeline import Pipeline -from rag.prompts import keyword_extraction, question_proposal, content_tagging +from rag.prompts.generator import keyword_extraction, question_proposal, content_tagging import logging import os