mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
add auto keywords and auto-question (#2965)
### What problem does this PR solve? #2687 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -28,7 +28,6 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
|
||||
from api.settings import chat_logger, retrievaler, kg_retrievaler
|
||||
from rag.app.resume import forbidden_select_fields4resume
|
||||
from rag.nlp import keyword_extraction
|
||||
from rag.nlp.search import index_name
|
||||
from rag.utils import rmSpace, num_tokens_from_string, encoder
|
||||
from api.utils.file_utils import get_project_base_directory
|
||||
@ -80,6 +79,7 @@ class ConversationService(CommonService):
|
||||
|
||||
return list(sessions.dicts())
|
||||
|
||||
|
||||
def message_fit_in(msg, max_length=4000):
|
||||
def count():
|
||||
nonlocal msg
|
||||
@ -456,6 +456,58 @@ def rewrite(tenant_id, llm_id, question):
|
||||
return ans
|
||||
|
||||
|
||||
def keyword_extraction(chat_mdl, content, topn=3):
|
||||
prompt = f"""
|
||||
Role: You're a text analyzer.
|
||||
Task: extract the most important keywords/phrases of a given piece of text content.
|
||||
Requirements:
|
||||
- Summarize the text content, and give top {topn} important keywords/phrases.
|
||||
- The keywords MUST be in language of the given piece of text content.
|
||||
- The keywords are delimited by ENGLISH COMMA.
|
||||
- Keywords ONLY in output.
|
||||
|
||||
### Text Content
|
||||
{content}
|
||||
|
||||
"""
|
||||
msg = [
|
||||
{"role": "system", "content": prompt},
|
||||
{"role": "user", "content": "Output: "}
|
||||
]
|
||||
_, msg = message_fit_in(msg, chat_mdl.max_length)
|
||||
kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
|
||||
if isinstance(kwd, tuple): kwd = kwd[0]
|
||||
if kwd.find("**ERROR**") >=0: return ""
|
||||
return kwd
|
||||
|
||||
|
||||
def question_proposal(chat_mdl, content, topn=3):
|
||||
prompt = f"""
|
||||
Role: You're a text analyzer.
|
||||
Task: propose {topn} questions about a given piece of text content.
|
||||
Requirements:
|
||||
- Understand and summarize the text content, and propose top {topn} important questions.
|
||||
- The questions SHOULD NOT have overlapping meanings.
|
||||
- The questions SHOULD cover the main content of the text as much as possible.
|
||||
- The questions MUST be in language of the given piece of text content.
|
||||
- One question per line.
|
||||
- Question ONLY in output.
|
||||
|
||||
### Text Content
|
||||
{content}
|
||||
|
||||
"""
|
||||
msg = [
|
||||
{"role": "system", "content": prompt},
|
||||
{"role": "user", "content": "Output: "}
|
||||
]
|
||||
_, msg = message_fit_in(msg, chat_mdl.max_length)
|
||||
kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
|
||||
if isinstance(kwd, tuple): kwd = kwd[0]
|
||||
if kwd.find("**ERROR**") >= 0: return ""
|
||||
return kwd
|
||||
|
||||
|
||||
def full_question(tenant_id, llm_id, messages):
|
||||
if llm_id2llm_type(llm_id) == "image2text":
|
||||
chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id)
|
||||
|
||||
Reference in New Issue
Block a user