# # Copyright 2025 The InfiniFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from typing import Optional, List from common.constants import MemoryType from common.time_utils import current_timestamp class PromptAssembler: SYSTEM_BASE_TEMPLATE = """**Memory Extraction Specialist** You are an expert at analyzing conversations to extract structured memory. {type_specific_instructions} **OUTPUT REQUIREMENTS:** 1. Output MUST be valid JSON 2. Follow the specified output format exactly 3. Each extracted item MUST have: content, valid_at, invalid_at 4. Timestamps in {timestamp_format} format 5. Only extract memory types specified above 6. Maximum {max_items} items per type """ TYPE_INSTRUCTIONS = { MemoryType.SEMANTIC.name.lower(): """ **EXTRACT SEMANTIC KNOWLEDGE:** - Universal facts, definitions, concepts, relationships - Time-invariant, generally true information - Examples: "The capital of France is Paris", "Water boils at 100°C" **Timestamp Rules for Semantic Knowledge:** - valid_at: When the fact became true (e.g., law enactment, discovery) - invalid_at: When it becomes false (e.g., repeal, disproven) or empty if still true - Default: valid_at = conversation time, invalid_at = "" for timeless facts """, MemoryType.EPISODIC.name.lower(): """ **EXTRACT EPISODIC KNOWLEDGE:** - Specific experiences, events, personal stories - Time-bound, person-specific, contextual - Examples: "Yesterday I fixed the bug", "User reported issue last week" **Timestamp Rules for Episodic Knowledge:** - valid_at: Event start/occurrence time - invalid_at: Event end time or empty if instantaneous - Extract explicit times: "at 3 PM", "last Monday", "from X to Y" """, MemoryType.PROCEDURAL.name.lower(): """ **EXTRACT PROCEDURAL KNOWLEDGE:** - Processes, methods, step-by-step instructions - Goal-oriented, actionable, often includes conditions - Examples: "To reset password, click...", "Debugging steps: 1)..." **Timestamp Rules for Procedural Knowledge:** - valid_at: When procedure becomes valid/effective - invalid_at: When it expires/becomes obsolete or empty if current - For version-specific: use release dates - For best practices: invalid_at = "" """ } OUTPUT_TEMPLATES = { MemoryType.SEMANTIC.name.lower(): """ "semantic": [ { "content": "Clear factual statement", "valid_at": "timestamp or empty", "invalid_at": "timestamp or empty" } ] """, MemoryType.EPISODIC.name.lower(): """ "episodic": [ { "content": "Narrative event description", "valid_at": "event start timestamp", "invalid_at": "event end timestamp or empty" } ] """, MemoryType.PROCEDURAL.name.lower(): """ "procedural": [ { "content": "Actionable instructions", "valid_at": "procedure effective timestamp", "invalid_at": "procedure expiration timestamp or empty" } ] """ } BASE_USER_PROMPT = """ **CONVERSATION:** {conversation} **CONVERSATION TIME:** {conversation_time} **CURRENT TIME:** {current_time} """ @classmethod def assemble_system_prompt(cls, config: dict) -> str: types_to_extract = cls._get_types_to_extract(config["memory_type"]) type_instructions = cls._generate_type_instructions(types_to_extract) output_format = cls._generate_output_format(types_to_extract) full_prompt = cls.SYSTEM_BASE_TEMPLATE.format( type_specific_instructions=type_instructions, timestamp_format=config.get("timestamp_format", "ISO 8601"), max_items=config.get("max_items_per_type", 5) ) full_prompt += f"\n**REQUIRED OUTPUT FORMAT (JSON):**\n```json\n{{\n{output_format}\n}}\n```\n" examples = cls._generate_examples(types_to_extract) if examples: full_prompt += f"\n**EXAMPLES:**\n{examples}\n" return full_prompt @staticmethod def _get_types_to_extract(requested_types: List[str]) -> List[str]: types = set() for rt in requested_types: if rt in [e.name.lower() for e in MemoryType] and rt != MemoryType.RAW.name.lower(): types.add(rt) return list(types) @classmethod def _generate_type_instructions(cls, types_to_extract: List[str]) -> str: target_types = set(types_to_extract) instructions = [cls.TYPE_INSTRUCTIONS[mt] for mt in target_types] return "\n".join(instructions) @classmethod def _generate_output_format(cls, types_to_extract: List[str]) -> str: target_types = set(types_to_extract) output_parts = [cls.OUTPUT_TEMPLATES[mt] for mt in target_types] return ",\n".join(output_parts) @staticmethod def _generate_examples(types_to_extract: list[str]) -> str: examples = [] if MemoryType.SEMANTIC.name.lower() in types_to_extract: examples.append(""" **Semantic Example:** Input: "Python lists are mutable and support various operations." Output: {"semantic": [{"content": "Python lists are mutable data structures", "valid_at": "2024-01-15T10:00:00", "invalid_at": ""}]} """) if MemoryType.EPISODIC.name.lower() in types_to_extract: examples.append(""" **Episodic Example:** Input: "I deployed the new feature yesterday afternoon." Output: {"episodic": [{"content": "User deployed new feature", "valid_at": "2024-01-14T14:00:00", "invalid_at": "2024-01-14T18:00:00"}]} """) if MemoryType.PROCEDURAL.name.lower() in types_to_extract: examples.append(""" **Procedural Example:** Input: "To debug API errors: 1) Check logs 2) Verify endpoints 3) Test connectivity." Output: {"procedural": [{"content": "API error debugging: 1. Check logs 2. Verify endpoints 3. Test connectivity", "valid_at": "2024-01-15T10:00:00", "invalid_at": ""}]} """) return "\n".join(examples) @classmethod def assemble_user_prompt( cls, conversation: str, conversation_time: Optional[str] = None, current_time: Optional[str] = None ) -> str: return cls.BASE_USER_PROMPT.format( conversation=conversation, conversation_time=conversation_time or "Not specified", current_time=current_time or current_timestamp(), ) @classmethod def get_raw_user_prompt(cls): return cls.BASE_USER_PROMPT