mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-25 16:26:51 +08:00
Feat: message manage (#12083)
### What problem does this PR solve? Message CRUD. Issue #4213 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
201
memory/utils/prompt_util.py
Normal file
201
memory/utils/prompt_util.py
Normal file
@ -0,0 +1,201 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from typing import Optional, List
|
||||
|
||||
from common.constants import MemoryType
|
||||
from common.time_utils import current_timestamp
|
||||
|
||||
class PromptAssembler:
|
||||
|
||||
SYSTEM_BASE_TEMPLATE = """**Memory Extraction Specialist**
|
||||
You are an expert at analyzing conversations to extract structured memory.
|
||||
|
||||
{type_specific_instructions}
|
||||
|
||||
|
||||
**OUTPUT REQUIREMENTS:**
|
||||
1. Output MUST be valid JSON
|
||||
2. Follow the specified output format exactly
|
||||
3. Each extracted item MUST have: content, valid_at, invalid_at
|
||||
4. Timestamps in {timestamp_format} format
|
||||
5. Only extract memory types specified above
|
||||
6. Maximum {max_items} items per type
|
||||
"""
|
||||
|
||||
TYPE_INSTRUCTIONS = {
|
||||
MemoryType.SEMANTIC.name.lower(): """
|
||||
**EXTRACT SEMANTIC KNOWLEDGE:**
|
||||
- Universal facts, definitions, concepts, relationships
|
||||
- Time-invariant, generally true information
|
||||
- Examples: "The capital of France is Paris", "Water boils at 100°C"
|
||||
|
||||
**Timestamp Rules for Semantic Knowledge:**
|
||||
- valid_at: When the fact became true (e.g., law enactment, discovery)
|
||||
- invalid_at: When it becomes false (e.g., repeal, disproven) or empty if still true
|
||||
- Default: valid_at = conversation time, invalid_at = "" for timeless facts
|
||||
""",
|
||||
|
||||
MemoryType.EPISODIC.name.lower(): """
|
||||
**EXTRACT EPISODIC KNOWLEDGE:**
|
||||
- Specific experiences, events, personal stories
|
||||
- Time-bound, person-specific, contextual
|
||||
- Examples: "Yesterday I fixed the bug", "User reported issue last week"
|
||||
|
||||
**Timestamp Rules for Episodic Knowledge:**
|
||||
- valid_at: Event start/occurrence time
|
||||
- invalid_at: Event end time or empty if instantaneous
|
||||
- Extract explicit times: "at 3 PM", "last Monday", "from X to Y"
|
||||
""",
|
||||
|
||||
MemoryType.PROCEDURAL.name.lower(): """
|
||||
**EXTRACT PROCEDURAL KNOWLEDGE:**
|
||||
- Processes, methods, step-by-step instructions
|
||||
- Goal-oriented, actionable, often includes conditions
|
||||
- Examples: "To reset password, click...", "Debugging steps: 1)..."
|
||||
|
||||
**Timestamp Rules for Procedural Knowledge:**
|
||||
- valid_at: When procedure becomes valid/effective
|
||||
- invalid_at: When it expires/becomes obsolete or empty if current
|
||||
- For version-specific: use release dates
|
||||
- For best practices: invalid_at = ""
|
||||
"""
|
||||
}
|
||||
|
||||
OUTPUT_TEMPLATES = {
|
||||
MemoryType.SEMANTIC.name.lower(): """
|
||||
"semantic": [
|
||||
{
|
||||
"content": "Clear factual statement",
|
||||
"valid_at": "timestamp or empty",
|
||||
"invalid_at": "timestamp or empty"
|
||||
}
|
||||
]
|
||||
""",
|
||||
|
||||
MemoryType.EPISODIC.name.lower(): """
|
||||
"episodic": [
|
||||
{
|
||||
"content": "Narrative event description",
|
||||
"valid_at": "event start timestamp",
|
||||
"invalid_at": "event end timestamp or empty"
|
||||
}
|
||||
]
|
||||
""",
|
||||
|
||||
MemoryType.PROCEDURAL.name.lower(): """
|
||||
"procedural": [
|
||||
{
|
||||
"content": "Actionable instructions",
|
||||
"valid_at": "procedure effective timestamp",
|
||||
"invalid_at": "procedure expiration timestamp or empty"
|
||||
}
|
||||
]
|
||||
"""
|
||||
}
|
||||
|
||||
BASE_USER_PROMPT = """
|
||||
**CONVERSATION:**
|
||||
{conversation}
|
||||
|
||||
**CONVERSATION TIME:** {conversation_time}
|
||||
**CURRENT TIME:** {current_time}
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def assemble_system_prompt(cls, config: dict) -> str:
|
||||
types_to_extract = cls._get_types_to_extract(config["memory_type"])
|
||||
|
||||
type_instructions = cls._generate_type_instructions(types_to_extract)
|
||||
|
||||
output_format = cls._generate_output_format(types_to_extract)
|
||||
|
||||
full_prompt = cls.SYSTEM_BASE_TEMPLATE.format(
|
||||
type_specific_instructions=type_instructions,
|
||||
timestamp_format=config.get("timestamp_format", "ISO 8601"),
|
||||
max_items=config.get("max_items_per_type", 5)
|
||||
)
|
||||
|
||||
full_prompt += f"\n**REQUIRED OUTPUT FORMAT (JSON):**\n```json\n{{\n{output_format}\n}}\n```\n"
|
||||
|
||||
examples = cls._generate_examples(types_to_extract)
|
||||
if examples:
|
||||
full_prompt += f"\n**EXAMPLES:**\n{examples}\n"
|
||||
|
||||
return full_prompt
|
||||
|
||||
@staticmethod
|
||||
def _get_types_to_extract(requested_types: List[str]) -> List[str]:
|
||||
types = set()
|
||||
for rt in requested_types:
|
||||
if rt in [e.name.lower() for e in MemoryType] and rt != MemoryType.RAW.name.lower():
|
||||
types.add(rt)
|
||||
return list(types)
|
||||
|
||||
@classmethod
|
||||
def _generate_type_instructions(cls, types_to_extract: List[str]) -> str:
|
||||
target_types = set(types_to_extract)
|
||||
instructions = [cls.TYPE_INSTRUCTIONS[mt] for mt in target_types]
|
||||
return "\n".join(instructions)
|
||||
|
||||
@classmethod
|
||||
def _generate_output_format(cls, types_to_extract: List[str]) -> str:
|
||||
target_types = set(types_to_extract)
|
||||
output_parts = [cls.OUTPUT_TEMPLATES[mt] for mt in target_types]
|
||||
return ",\n".join(output_parts)
|
||||
|
||||
@staticmethod
|
||||
def _generate_examples(types_to_extract: list[str]) -> str:
|
||||
examples = []
|
||||
|
||||
if MemoryType.SEMANTIC.name.lower() in types_to_extract:
|
||||
examples.append("""
|
||||
**Semantic Example:**
|
||||
Input: "Python lists are mutable and support various operations."
|
||||
Output: {"semantic": [{"content": "Python lists are mutable data structures", "valid_at": "2024-01-15T10:00:00", "invalid_at": ""}]}
|
||||
""")
|
||||
|
||||
if MemoryType.EPISODIC.name.lower() in types_to_extract:
|
||||
examples.append("""
|
||||
**Episodic Example:**
|
||||
Input: "I deployed the new feature yesterday afternoon."
|
||||
Output: {"episodic": [{"content": "User deployed new feature", "valid_at": "2024-01-14T14:00:00", "invalid_at": "2024-01-14T18:00:00"}]}
|
||||
""")
|
||||
|
||||
if MemoryType.PROCEDURAL.name.lower() in types_to_extract:
|
||||
examples.append("""
|
||||
**Procedural Example:**
|
||||
Input: "To debug API errors: 1) Check logs 2) Verify endpoints 3) Test connectivity."
|
||||
Output: {"procedural": [{"content": "API error debugging: 1. Check logs 2. Verify endpoints 3. Test connectivity", "valid_at": "2024-01-15T10:00:00", "invalid_at": ""}]}
|
||||
""")
|
||||
|
||||
return "\n".join(examples)
|
||||
|
||||
@classmethod
|
||||
def assemble_user_prompt(
|
||||
cls,
|
||||
conversation: str,
|
||||
conversation_time: Optional[str] = None,
|
||||
current_time: Optional[str] = None
|
||||
) -> str:
|
||||
return cls.BASE_USER_PROMPT.format(
|
||||
conversation=conversation,
|
||||
conversation_time=conversation_time or "Not specified",
|
||||
current_time=current_time or current_timestamp(),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_raw_user_prompt(cls):
|
||||
return cls.BASE_USER_PROMPT
|
||||
Reference in New Issue
Block a user