mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-30 23:26:36 +08:00
Fix: add tokenized content (#12793)
### What problem does this PR solve? Add tokenized content es field to query zh message. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -31,7 +31,7 @@ from api.db.services.tenant_llm_service import LLMFactoriesService, TenantLLMSer
|
||||
from api.db.services.llm_service import LLMService, LLMBundle, get_init_tenant_llm
|
||||
from api.db.services.user_service import TenantService, UserTenantService
|
||||
from api.db.services.system_settings_service import SystemSettingsService
|
||||
from api.db.joint_services.memory_message_service import init_message_id_sequence, init_memory_size_cache
|
||||
from api.db.joint_services.memory_message_service import init_message_id_sequence, init_memory_size_cache, fix_missing_tokenized_memory
|
||||
from common.constants import LLMType
|
||||
from common.file_utils import get_project_base_directory
|
||||
from common import settings
|
||||
@ -175,6 +175,7 @@ def init_web_data():
|
||||
add_graph_templates()
|
||||
init_message_id_sequence()
|
||||
init_memory_size_cache()
|
||||
fix_missing_tokenized_memory()
|
||||
logging.info("init web data success:{}".format(time.time() - start_time))
|
||||
|
||||
def init_table():
|
||||
|
||||
@ -306,6 +306,24 @@ def init_memory_size_cache():
|
||||
logging.info("Memory size cache init done.")
|
||||
|
||||
|
||||
def fix_missing_tokenized_memory():
|
||||
if settings.DOC_ENGINE != "elasticsearch":
|
||||
logging.info("Not using elasticsearch as doc engine, no need to fix missing tokenized memory.")
|
||||
return
|
||||
memory_list = MemoryService.get_all_memory()
|
||||
if not memory_list:
|
||||
logging.info("No memory found, no need to fix missing tokenized memory.")
|
||||
else:
|
||||
for m in memory_list:
|
||||
message_list = MessageService.get_missing_field_messages(m.id, m.tenant_id, "tokenized_content_ltks")
|
||||
for msg in message_list:
|
||||
# update content to refresh tokenized field
|
||||
MessageService.update_message({"message_id": msg["message_id"], "memory_id": m.id}, {"content": msg["content"]}, m.tenant_id, m.id)
|
||||
if message_list:
|
||||
logging.info(f"Fixed {len(message_list)} messages missing tokenized field in memory: {m.name}.")
|
||||
logging.info("Fix missing tokenized memory done.")
|
||||
|
||||
|
||||
def judge_system_prompt_is_default(system_prompt: str, memory_type: int|list[str]):
|
||||
memory_type_list = memory_type if isinstance(memory_type, list) else get_memory_type_human(memory_type)
|
||||
return system_prompt == PromptAssembler.assemble_system_prompt({"memory_type": memory_type_list})
|
||||
|
||||
Reference in New Issue
Block a user