|
|
|
|
@ -15,31 +15,23 @@
|
|
|
|
|
#
|
|
|
|
|
import logging
|
|
|
|
|
import binascii
|
|
|
|
|
import os
|
|
|
|
|
import json
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
import json_repair
|
|
|
|
|
from functools import partial
|
|
|
|
|
import re
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
from copy import deepcopy
|
|
|
|
|
from timeit import default_timer as timer
|
|
|
|
|
import datetime
|
|
|
|
|
from datetime import timedelta
|
|
|
|
|
from agentic_reasoning import DeepResearcher
|
|
|
|
|
from api.db import LLMType, ParserType, StatusEnum
|
|
|
|
|
from api.db.db_models import Dialog, DB
|
|
|
|
|
from api.db.services.common_service import CommonService
|
|
|
|
|
from api.db.services.document_service import DocumentService
|
|
|
|
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
|
|
|
|
from api.db.services.llm_service import TenantLLMService, LLMBundle
|
|
|
|
|
from api import settings
|
|
|
|
|
from graphrag.utils import get_tags_from_cache, set_tags_to_cache
|
|
|
|
|
from rag.app.resume import forbidden_select_fields4resume
|
|
|
|
|
from rag.nlp import extract_between
|
|
|
|
|
from rag.app.tag import label_question
|
|
|
|
|
from rag.nlp.search import index_name
|
|
|
|
|
from rag.settings import TAG_FLD
|
|
|
|
|
from rag.utils import rmSpace, num_tokens_from_string, encoder
|
|
|
|
|
from api.utils.file_utils import get_project_base_directory
|
|
|
|
|
from rag.prompts import kb_prompt, message_fit_in, llm_id2llm_type, keyword_extraction, full_question
|
|
|
|
|
from rag.utils import rmSpace, num_tokens_from_string
|
|
|
|
|
from rag.utils.tavily_conn import Tavily
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -69,109 +61,6 @@ class DialogService(CommonService):
|
|
|
|
|
return list(chats.dicts())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def message_fit_in(msg, max_length=4000):
|
|
|
|
|
def count():
|
|
|
|
|
nonlocal msg
|
|
|
|
|
tks_cnts = []
|
|
|
|
|
for m in msg:
|
|
|
|
|
tks_cnts.append(
|
|
|
|
|
{"role": m["role"], "count": num_tokens_from_string(m["content"])})
|
|
|
|
|
total = 0
|
|
|
|
|
for m in tks_cnts:
|
|
|
|
|
total += m["count"]
|
|
|
|
|
return total
|
|
|
|
|
|
|
|
|
|
c = count()
|
|
|
|
|
if c < max_length:
|
|
|
|
|
return c, msg
|
|
|
|
|
|
|
|
|
|
msg_ = [m for m in msg[:-1] if m["role"] == "system"]
|
|
|
|
|
if len(msg) > 1:
|
|
|
|
|
msg_.append(msg[-1])
|
|
|
|
|
msg = msg_
|
|
|
|
|
c = count()
|
|
|
|
|
if c < max_length:
|
|
|
|
|
return c, msg
|
|
|
|
|
|
|
|
|
|
ll = num_tokens_from_string(msg_[0]["content"])
|
|
|
|
|
ll2 = num_tokens_from_string(msg_[-1]["content"])
|
|
|
|
|
if ll / (ll + ll2) > 0.8:
|
|
|
|
|
m = msg_[0]["content"]
|
|
|
|
|
m = encoder.decode(encoder.encode(m)[:max_length - ll2])
|
|
|
|
|
msg[0]["content"] = m
|
|
|
|
|
return max_length, msg
|
|
|
|
|
|
|
|
|
|
m = msg_[1]["content"]
|
|
|
|
|
m = encoder.decode(encoder.encode(m)[:max_length - ll2])
|
|
|
|
|
msg[1]["content"] = m
|
|
|
|
|
return max_length, msg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def llm_id2llm_type(llm_id):
|
|
|
|
|
llm_id, _ = TenantLLMService.split_model_name_and_factory(llm_id)
|
|
|
|
|
fnm = os.path.join(get_project_base_directory(), "conf")
|
|
|
|
|
llm_factories = json.load(open(os.path.join(fnm, "llm_factories.json"), "r"))
|
|
|
|
|
for llm_factory in llm_factories["factory_llm_infos"]:
|
|
|
|
|
for llm in llm_factory["llm"]:
|
|
|
|
|
if llm_id == llm["llm_name"]:
|
|
|
|
|
return llm["model_type"].strip(",")[-1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def kb_prompt(kbinfos, max_tokens):
|
|
|
|
|
knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
|
|
|
|
|
used_token_count = 0
|
|
|
|
|
chunks_num = 0
|
|
|
|
|
for i, c in enumerate(knowledges):
|
|
|
|
|
used_token_count += num_tokens_from_string(c)
|
|
|
|
|
chunks_num += 1
|
|
|
|
|
if max_tokens * 0.97 < used_token_count:
|
|
|
|
|
knowledges = knowledges[:i]
|
|
|
|
|
logging.warning(f"Not all the retrieval into prompt: {i+1}/{len(knowledges)}")
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
docs = DocumentService.get_by_ids([ck["doc_id"] for ck in kbinfos["chunks"][:chunks_num]])
|
|
|
|
|
docs = {d.id: d.meta_fields for d in docs}
|
|
|
|
|
|
|
|
|
|
doc2chunks = defaultdict(lambda: {"chunks": [], "meta": []})
|
|
|
|
|
for ck in kbinfos["chunks"][:chunks_num]:
|
|
|
|
|
doc2chunks[ck["docnm_kwd"]]["chunks"].append((f"URL: {ck['url']}\n" if "url" in ck else "") + ck["content_with_weight"])
|
|
|
|
|
doc2chunks[ck["docnm_kwd"]]["meta"] = docs.get(ck["doc_id"], {})
|
|
|
|
|
|
|
|
|
|
knowledges = []
|
|
|
|
|
for nm, cks_meta in doc2chunks.items():
|
|
|
|
|
txt = f"Document: {nm} \n"
|
|
|
|
|
for k, v in cks_meta["meta"].items():
|
|
|
|
|
txt += f"{k}: {v}\n"
|
|
|
|
|
txt += "Relevant fragments as following:\n"
|
|
|
|
|
for i, chunk in enumerate(cks_meta["chunks"], 1):
|
|
|
|
|
txt += f"{i}. {chunk}\n"
|
|
|
|
|
knowledges.append(txt)
|
|
|
|
|
return knowledges
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def label_question(question, kbs):
|
|
|
|
|
tags = None
|
|
|
|
|
tag_kb_ids = []
|
|
|
|
|
for kb in kbs:
|
|
|
|
|
if kb.parser_config.get("tag_kb_ids"):
|
|
|
|
|
tag_kb_ids.extend(kb.parser_config["tag_kb_ids"])
|
|
|
|
|
if tag_kb_ids:
|
|
|
|
|
all_tags = get_tags_from_cache(tag_kb_ids)
|
|
|
|
|
if not all_tags:
|
|
|
|
|
all_tags = settings.retrievaler.all_tags_in_portion(kb.tenant_id, tag_kb_ids)
|
|
|
|
|
set_tags_to_cache(all_tags, tag_kb_ids)
|
|
|
|
|
else:
|
|
|
|
|
all_tags = json.loads(all_tags)
|
|
|
|
|
tag_kbs = KnowledgebaseService.get_by_ids(tag_kb_ids)
|
|
|
|
|
tags = settings.retrievaler.tag_query(question,
|
|
|
|
|
list(set([kb.tenant_id for kb in tag_kbs])),
|
|
|
|
|
tag_kb_ids,
|
|
|
|
|
all_tags,
|
|
|
|
|
kb.parser_config.get("topn_tags", 3)
|
|
|
|
|
)
|
|
|
|
|
return tags
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def chat_solo(dialog, messages, stream=True):
|
|
|
|
|
if llm_id2llm_type(dialog.llm_id) == "image2text":
|
|
|
|
|
chat_mdl = LLMBundle(dialog.tenant_id, LLMType.IMAGE2TEXT, dialog.llm_id)
|
|
|
|
|
@ -297,7 +186,11 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|
|
|
|
|
|
|
|
|
knowledges = []
|
|
|
|
|
if prompt_config.get("reasoning", False):
|
|
|
|
|
for think in reasoning(kbinfos, " ".join(questions), chat_mdl, embd_mdl, tenant_ids, dialog.kb_ids, prompt_config, MAX_SEARCH_LIMIT=3):
|
|
|
|
|
reasoner = DeepResearcher(chat_mdl,
|
|
|
|
|
prompt_config,
|
|
|
|
|
partial(retriever.retrieval, embd_mdl=embd_mdl, tenant_ids=tenant_ids, kb_ids=dialog.kb_ids, page=1, page_size=dialog.top_n, similarity_threshold=0.2, vector_similarity_weight=0.3))
|
|
|
|
|
|
|
|
|
|
for think in reasoner.thinking(kbinfos, " ".join(questions)):
|
|
|
|
|
if isinstance(think, str):
|
|
|
|
|
thought = think
|
|
|
|
|
knowledges = [t for t in think.split("\n") if t]
|
|
|
|
|
@ -552,175 +445,6 @@ Please write the SQL, only SQL, without any other explanations or text.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def relevant(tenant_id, llm_id, question, contents: list):
|
|
|
|
|
if llm_id2llm_type(llm_id) == "image2text":
|
|
|
|
|
chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id)
|
|
|
|
|
else:
|
|
|
|
|
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id)
|
|
|
|
|
prompt = """
|
|
|
|
|
You are a grader assessing relevance of a retrieved document to a user question.
|
|
|
|
|
It does not need to be a stringent test. The goal is to filter out erroneous retrievals.
|
|
|
|
|
If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant.
|
|
|
|
|
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
|
|
|
|
|
No other words needed except 'yes' or 'no'.
|
|
|
|
|
"""
|
|
|
|
|
if not contents:
|
|
|
|
|
return False
|
|
|
|
|
contents = "Documents: \n" + " - ".join(contents)
|
|
|
|
|
contents = f"Question: {question}\n" + contents
|
|
|
|
|
if num_tokens_from_string(contents) >= chat_mdl.max_length - 4:
|
|
|
|
|
contents = encoder.decode(encoder.encode(contents)[:chat_mdl.max_length - 4])
|
|
|
|
|
ans = chat_mdl.chat(prompt, [{"role": "user", "content": contents}], {"temperature": 0.01})
|
|
|
|
|
if ans.lower().find("yes") >= 0:
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def rewrite(tenant_id, llm_id, question):
|
|
|
|
|
if llm_id2llm_type(llm_id) == "image2text":
|
|
|
|
|
chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id)
|
|
|
|
|
else:
|
|
|
|
|
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id)
|
|
|
|
|
prompt = """
|
|
|
|
|
You are an expert at query expansion to generate a paraphrasing of a question.
|
|
|
|
|
I can't retrieval relevant information from the knowledge base by using user's question directly.
|
|
|
|
|
You need to expand or paraphrase user's question by multiple ways such as using synonyms words/phrase,
|
|
|
|
|
writing the abbreviation in its entirety, adding some extra descriptions or explanations,
|
|
|
|
|
changing the way of expression, translating the original question into another language (English/Chinese), etc.
|
|
|
|
|
And return 5 versions of question and one is from translation.
|
|
|
|
|
Just list the question. No other words are needed.
|
|
|
|
|
"""
|
|
|
|
|
ans = chat_mdl.chat(prompt, [{"role": "user", "content": question}], {"temperature": 0.8})
|
|
|
|
|
return ans
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def keyword_extraction(chat_mdl, content, topn=3):
|
|
|
|
|
prompt = f"""
|
|
|
|
|
Role: You're a text analyzer.
|
|
|
|
|
Task: extract the most important keywords/phrases of a given piece of text content.
|
|
|
|
|
Requirements:
|
|
|
|
|
- Summarize the text content, and give top {topn} important keywords/phrases.
|
|
|
|
|
- The keywords MUST be in language of the given piece of text content.
|
|
|
|
|
- The keywords are delimited by ENGLISH COMMA.
|
|
|
|
|
- Keywords ONLY in output.
|
|
|
|
|
|
|
|
|
|
### Text Content
|
|
|
|
|
{content}
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
msg = [
|
|
|
|
|
{"role": "system", "content": prompt},
|
|
|
|
|
{"role": "user", "content": "Output: "}
|
|
|
|
|
]
|
|
|
|
|
_, msg = message_fit_in(msg, chat_mdl.max_length)
|
|
|
|
|
kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
|
|
|
|
|
if isinstance(kwd, tuple):
|
|
|
|
|
kwd = kwd[0]
|
|
|
|
|
kwd = re.sub(r"<think>.*</think>", "", kwd, flags=re.DOTALL)
|
|
|
|
|
if kwd.find("**ERROR**") >= 0:
|
|
|
|
|
return ""
|
|
|
|
|
return kwd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def question_proposal(chat_mdl, content, topn=3):
|
|
|
|
|
prompt = f"""
|
|
|
|
|
Role: You're a text analyzer.
|
|
|
|
|
Task: propose {topn} questions about a given piece of text content.
|
|
|
|
|
Requirements:
|
|
|
|
|
- Understand and summarize the text content, and propose top {topn} important questions.
|
|
|
|
|
- The questions SHOULD NOT have overlapping meanings.
|
|
|
|
|
- The questions SHOULD cover the main content of the text as much as possible.
|
|
|
|
|
- The questions MUST be in language of the given piece of text content.
|
|
|
|
|
- One question per line.
|
|
|
|
|
- Question ONLY in output.
|
|
|
|
|
|
|
|
|
|
### Text Content
|
|
|
|
|
{content}
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
msg = [
|
|
|
|
|
{"role": "system", "content": prompt},
|
|
|
|
|
{"role": "user", "content": "Output: "}
|
|
|
|
|
]
|
|
|
|
|
_, msg = message_fit_in(msg, chat_mdl.max_length)
|
|
|
|
|
kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
|
|
|
|
|
if isinstance(kwd, tuple):
|
|
|
|
|
kwd = kwd[0]
|
|
|
|
|
kwd = re.sub(r"<think>.*</think>", "", kwd, flags=re.DOTALL)
|
|
|
|
|
if kwd.find("**ERROR**") >= 0:
|
|
|
|
|
return ""
|
|
|
|
|
return kwd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def full_question(tenant_id, llm_id, messages):
|
|
|
|
|
if llm_id2llm_type(llm_id) == "image2text":
|
|
|
|
|
chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id)
|
|
|
|
|
else:
|
|
|
|
|
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id)
|
|
|
|
|
conv = []
|
|
|
|
|
for m in messages:
|
|
|
|
|
if m["role"] not in ["user", "assistant"]:
|
|
|
|
|
continue
|
|
|
|
|
conv.append("{}: {}".format(m["role"].upper(), m["content"]))
|
|
|
|
|
conv = "\n".join(conv)
|
|
|
|
|
today = datetime.date.today().isoformat()
|
|
|
|
|
yesterday = (datetime.date.today() - timedelta(days=1)).isoformat()
|
|
|
|
|
tomorrow = (datetime.date.today() + timedelta(days=1)).isoformat()
|
|
|
|
|
prompt = f"""
|
|
|
|
|
Role: A helpful assistant
|
|
|
|
|
|
|
|
|
|
Task and steps:
|
|
|
|
|
1. Generate a full user question that would follow the conversation.
|
|
|
|
|
2. If the user's question involves relative date, you need to convert it into absolute date based on the current date, which is {today}. For example: 'yesterday' would be converted to {yesterday}.
|
|
|
|
|
|
|
|
|
|
Requirements & Restrictions:
|
|
|
|
|
- Text generated MUST be in the same language of the original user's question.
|
|
|
|
|
- If the user's latest question is completely, don't do anything, just return the original question.
|
|
|
|
|
- DON'T generate anything except a refined question.
|
|
|
|
|
|
|
|
|
|
######################
|
|
|
|
|
-Examples-
|
|
|
|
|
######################
|
|
|
|
|
|
|
|
|
|
# Example 1
|
|
|
|
|
## Conversation
|
|
|
|
|
USER: What is the name of Donald Trump's father?
|
|
|
|
|
ASSISTANT: Fred Trump.
|
|
|
|
|
USER: And his mother?
|
|
|
|
|
###############
|
|
|
|
|
Output: What's the name of Donald Trump's mother?
|
|
|
|
|
|
|
|
|
|
------------
|
|
|
|
|
# Example 2
|
|
|
|
|
## Conversation
|
|
|
|
|
USER: What is the name of Donald Trump's father?
|
|
|
|
|
ASSISTANT: Fred Trump.
|
|
|
|
|
USER: And his mother?
|
|
|
|
|
ASSISTANT: Mary Trump.
|
|
|
|
|
User: What's her full name?
|
|
|
|
|
###############
|
|
|
|
|
Output: What's the full name of Donald Trump's mother Mary Trump?
|
|
|
|
|
|
|
|
|
|
------------
|
|
|
|
|
# Example 3
|
|
|
|
|
## Conversation
|
|
|
|
|
USER: What's the weather today in London?
|
|
|
|
|
ASSISTANT: Cloudy.
|
|
|
|
|
USER: What's about tomorrow in Rochester?
|
|
|
|
|
###############
|
|
|
|
|
Output: What's the weather in Rochester on {tomorrow}?
|
|
|
|
|
######################
|
|
|
|
|
|
|
|
|
|
# Real Data
|
|
|
|
|
## Conversation
|
|
|
|
|
{conv}
|
|
|
|
|
###############
|
|
|
|
|
"""
|
|
|
|
|
ans = chat_mdl.chat(prompt, [{"role": "user", "content": "Output: "}], {"temperature": 0.2})
|
|
|
|
|
ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL)
|
|
|
|
|
return ans if ans.find("**ERROR**") < 0 else messages[-1]["content"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def tts(tts_mdl, text):
|
|
|
|
|
if not tts_mdl or not text:
|
|
|
|
|
return
|
|
|
|
|
@ -796,298 +520,3 @@ def ask(question, kb_ids, tenant_id):
|
|
|
|
|
yield decorate_answer(answer)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def content_tagging(chat_mdl, content, all_tags, examples, topn=3):
|
|
|
|
|
prompt = f"""
|
|
|
|
|
Role: You're a text analyzer.
|
|
|
|
|
|
|
|
|
|
Task: Tag (put on some labels) to a given piece of text content based on the examples and the entire tag set.
|
|
|
|
|
|
|
|
|
|
Steps::
|
|
|
|
|
- Comprehend the tag/label set.
|
|
|
|
|
- Comprehend examples which all consist of both text content and assigned tags with relevance score in format of JSON.
|
|
|
|
|
- Summarize the text content, and tag it with top {topn} most relevant tags from the set of tag/label and the corresponding relevance score.
|
|
|
|
|
|
|
|
|
|
Requirements
|
|
|
|
|
- The tags MUST be from the tag set.
|
|
|
|
|
- The output MUST be in JSON format only, the key is tag and the value is its relevance score.
|
|
|
|
|
- The relevance score must be range from 1 to 10.
|
|
|
|
|
- Keywords ONLY in output.
|
|
|
|
|
|
|
|
|
|
# TAG SET
|
|
|
|
|
{", ".join(all_tags)}
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
for i, ex in enumerate(examples):
|
|
|
|
|
prompt += """
|
|
|
|
|
# Examples {}
|
|
|
|
|
### Text Content
|
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
Output:
|
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
""".format(i, ex["content"], json.dumps(ex[TAG_FLD], indent=2, ensure_ascii=False))
|
|
|
|
|
|
|
|
|
|
prompt += f"""
|
|
|
|
|
# Real Data
|
|
|
|
|
### Text Content
|
|
|
|
|
{content}
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
msg = [
|
|
|
|
|
{"role": "system", "content": prompt},
|
|
|
|
|
{"role": "user", "content": "Output: "}
|
|
|
|
|
]
|
|
|
|
|
_, msg = message_fit_in(msg, chat_mdl.max_length)
|
|
|
|
|
kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.5})
|
|
|
|
|
if isinstance(kwd, tuple):
|
|
|
|
|
kwd = kwd[0]
|
|
|
|
|
kwd = re.sub(r"<think>.*</think>", "", kwd, flags=re.DOTALL)
|
|
|
|
|
if kwd.find("**ERROR**") >= 0:
|
|
|
|
|
raise Exception(kwd)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
return json_repair.loads(kwd)
|
|
|
|
|
except json_repair.JSONDecodeError:
|
|
|
|
|
try:
|
|
|
|
|
result = kwd.replace(prompt[:-1], '').replace('user', '').replace('model', '').strip()
|
|
|
|
|
result = '{' + result.split('{')[1].split('}')[0] + '}'
|
|
|
|
|
return json_repair.loads(result)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logging.exception(f"JSON parsing error: {result} -> {e}")
|
|
|
|
|
raise e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def reasoning(chunk_info: dict, question: str, chat_mdl: LLMBundle, embd_mdl: LLMBundle,
|
|
|
|
|
tenant_ids: list[str], kb_ids: list[str], prompt_config, MAX_SEARCH_LIMIT: int = 6,
|
|
|
|
|
top_n: int = 5, similarity_threshold: float = 0.4, vector_similarity_weight: float = 0.3):
|
|
|
|
|
BEGIN_SEARCH_QUERY = "<|begin_search_query|>"
|
|
|
|
|
END_SEARCH_QUERY = "<|end_search_query|>"
|
|
|
|
|
BEGIN_SEARCH_RESULT = "<|begin_search_result|>"
|
|
|
|
|
END_SEARCH_RESULT = "<|end_search_result|>"
|
|
|
|
|
|
|
|
|
|
def rm_query_tags(line):
|
|
|
|
|
pattern = re.escape(BEGIN_SEARCH_QUERY) + r"(.*?)" + re.escape(END_SEARCH_QUERY)
|
|
|
|
|
return re.sub(pattern, "", line)
|
|
|
|
|
|
|
|
|
|
def rm_result_tags(line):
|
|
|
|
|
pattern = re.escape(BEGIN_SEARCH_RESULT) + r"(.*?)" + re.escape(END_SEARCH_RESULT)
|
|
|
|
|
return re.sub(pattern, "", line)
|
|
|
|
|
|
|
|
|
|
reason_prompt = (
|
|
|
|
|
"You are a reasoning assistant with the ability to perform dataset searches to help "
|
|
|
|
|
"you answer the user's question accurately. You have special tools:\n\n"
|
|
|
|
|
f"- To perform a search: write {BEGIN_SEARCH_QUERY} your query here {END_SEARCH_QUERY}.\n"
|
|
|
|
|
f"Then, the system will search and analyze relevant content, then provide you with helpful information in the format {BEGIN_SEARCH_RESULT} ...search results... {END_SEARCH_RESULT}.\n\n"
|
|
|
|
|
f"You can repeat the search process multiple times if necessary. The maximum number of search attempts is limited to {MAX_SEARCH_LIMIT}.\n\n"
|
|
|
|
|
"Once you have all the information you need, continue your reasoning.\n\n"
|
|
|
|
|
"-- Example 1 --\n" ########################################
|
|
|
|
|
"Question: \"Are both the directors of Jaws and Casino Royale from the same country?\"\n"
|
|
|
|
|
"Assistant:\n"
|
|
|
|
|
f" {BEGIN_SEARCH_QUERY}Who is the director of Jaws?{END_SEARCH_QUERY}\n\n"
|
|
|
|
|
"User:\n"
|
|
|
|
|
f" {BEGIN_SEARCH_RESULT}\nThe director of Jaws is Steven Spielberg...\n{END_SEARCH_RESULT}\n\n"
|
|
|
|
|
"Continues reasoning with the new information.\n"
|
|
|
|
|
"Assistant:\n"
|
|
|
|
|
f" {BEGIN_SEARCH_QUERY}Where is Steven Spielberg from?{END_SEARCH_QUERY}\n\n"
|
|
|
|
|
"User:\n"
|
|
|
|
|
f" {BEGIN_SEARCH_RESULT}\nSteven Allan Spielberg is an American filmmaker...\n{END_SEARCH_RESULT}\n\n"
|
|
|
|
|
"Continues reasoning with the new information...\n\n"
|
|
|
|
|
"Assistant:\n"
|
|
|
|
|
f" {BEGIN_SEARCH_QUERY}Who is the director of Casino Royale?{END_SEARCH_QUERY}\n\n"
|
|
|
|
|
"User:\n"
|
|
|
|
|
f" {BEGIN_SEARCH_RESULT}\nCasino Royale is a 2006 spy film directed by Martin Campbell...\n{END_SEARCH_RESULT}\n\n"
|
|
|
|
|
"Continues reasoning with the new information...\n\n"
|
|
|
|
|
"Assistant:\n"
|
|
|
|
|
f" {BEGIN_SEARCH_QUERY}Where is Martin Campbell from?{END_SEARCH_QUERY}\n\n"
|
|
|
|
|
"User:\n"
|
|
|
|
|
f" {BEGIN_SEARCH_RESULT}\nMartin Campbell (born 24 October 1943) is a New Zealand film and television director...\n{END_SEARCH_RESULT}\n\n"
|
|
|
|
|
"Continues reasoning with the new information...\n\n"
|
|
|
|
|
"Assistant:\nIt's enough to answer the question\n"
|
|
|
|
|
|
|
|
|
|
"-- Example 2 --\n" #########################################
|
|
|
|
|
"Question: \"When was the founder of craigslist born?\"\n"
|
|
|
|
|
"Assistant:\n"
|
|
|
|
|
f" {BEGIN_SEARCH_QUERY}Who was the founder of craigslist?{END_SEARCH_QUERY}\n\n"
|
|
|
|
|
"User:\n"
|
|
|
|
|
f" {BEGIN_SEARCH_RESULT}\nCraigslist was founded by Craig Newmark...\n{END_SEARCH_RESULT}\n\n"
|
|
|
|
|
"Continues reasoning with the new information.\n"
|
|
|
|
|
"Assistant:\n"
|
|
|
|
|
f" {BEGIN_SEARCH_QUERY} When was Craig Newmark born?{END_SEARCH_QUERY}\n\n"
|
|
|
|
|
"User:\n"
|
|
|
|
|
f" {BEGIN_SEARCH_RESULT}\nCraig Newmark was born on December 6, 1952...\n{END_SEARCH_RESULT}\n\n"
|
|
|
|
|
"Continues reasoning with the new information...\n\n"
|
|
|
|
|
"Assistant:\nIt's enough to answer the question\n"
|
|
|
|
|
"**Remember**:\n"
|
|
|
|
|
f"- You have a dataset to search, so you just provide a proper search query.\n"
|
|
|
|
|
f"- Use {BEGIN_SEARCH_QUERY} to request a dataset search and end with {END_SEARCH_QUERY}.\n"
|
|
|
|
|
"- The language of query MUST be as the same as 'Question' or 'search result'.\n"
|
|
|
|
|
"- When done searching, continue your reasoning.\n\n"
|
|
|
|
|
'Please answer the following question. You should think step by step to solve it.\n\n'
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
relevant_extraction_prompt = """**Task Instruction:**
|
|
|
|
|
|
|
|
|
|
You are tasked with reading and analyzing web pages based on the following inputs: **Previous Reasoning Steps**, **Current Search Query**, and **Searched Web Pages**. Your objective is to extract relevant and helpful information for **Current Search Query** from the **Searched Web Pages** and seamlessly integrate this information into the **Previous Reasoning Steps** to continue reasoning for the original question.
|
|
|
|
|
|
|
|
|
|
**Guidelines:**
|
|
|
|
|
|
|
|
|
|
1. **Analyze the Searched Web Pages:**
|
|
|
|
|
- Carefully review the content of each searched web page.
|
|
|
|
|
- Identify factual information that is relevant to the **Current Search Query** and can aid in the reasoning process for the original question.
|
|
|
|
|
|
|
|
|
|
2. **Extract Relevant Information:**
|
|
|
|
|
- Select the information from the Searched Web Pages that directly contributes to advancing the **Previous Reasoning Steps**.
|
|
|
|
|
- Ensure that the extracted information is accurate and relevant.
|
|
|
|
|
|
|
|
|
|
3. **Output Format:**
|
|
|
|
|
- **If the web pages provide helpful information for current search query:** Present the information beginning with `**Final Information**` as shown below.
|
|
|
|
|
- The language of query **MUST BE** as the same as 'Search Query' or 'Web Pages'.\n"
|
|
|
|
|
**Final Information**
|
|
|
|
|
|
|
|
|
|
[Helpful information]
|
|
|
|
|
|
|
|
|
|
- **If the web pages do not provide any helpful information for current search query:** Output the following text.
|
|
|
|
|
|
|
|
|
|
**Final Information**
|
|
|
|
|
|
|
|
|
|
No helpful information found.
|
|
|
|
|
|
|
|
|
|
**Inputs:**
|
|
|
|
|
- **Previous Reasoning Steps:**
|
|
|
|
|
{prev_reasoning}
|
|
|
|
|
|
|
|
|
|
- **Current Search Query:**
|
|
|
|
|
{search_query}
|
|
|
|
|
|
|
|
|
|
- **Searched Web Pages:**
|
|
|
|
|
{document}
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
executed_search_queries = []
|
|
|
|
|
msg_hisotry = [{"role": "user", "content": f'Question:\"{question}\"\n'}]
|
|
|
|
|
all_reasoning_steps = []
|
|
|
|
|
think = "<think>"
|
|
|
|
|
for ii in range(MAX_SEARCH_LIMIT + 1):
|
|
|
|
|
if ii == MAX_SEARCH_LIMIT - 1:
|
|
|
|
|
summary_think = f"\n{BEGIN_SEARCH_RESULT}\nThe maximum search limit is exceeded. You are not allowed to search.\n{END_SEARCH_RESULT}\n"
|
|
|
|
|
yield {"answer": think + summary_think + "</think>", "reference": {}, "audio_binary": None}
|
|
|
|
|
all_reasoning_steps.append(summary_think)
|
|
|
|
|
msg_hisotry.append({"role": "assistant", "content": summary_think})
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
query_think = ""
|
|
|
|
|
if msg_hisotry[-1]["role"] != "user":
|
|
|
|
|
msg_hisotry.append({"role": "user", "content": "Continues reasoning with the new information.\n"})
|
|
|
|
|
else:
|
|
|
|
|
msg_hisotry[-1]["content"] += "\n\nContinues reasoning with the new information.\n"
|
|
|
|
|
for ans in chat_mdl.chat_streamly(reason_prompt, msg_hisotry, {"temperature": 0.7}):
|
|
|
|
|
ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL)
|
|
|
|
|
if not ans:
|
|
|
|
|
continue
|
|
|
|
|
query_think = ans
|
|
|
|
|
yield {"answer": think + rm_query_tags(query_think) + "</think>", "reference": {}, "audio_binary": None}
|
|
|
|
|
|
|
|
|
|
think += rm_query_tags(query_think)
|
|
|
|
|
all_reasoning_steps.append(query_think)
|
|
|
|
|
queries = extract_between(query_think, BEGIN_SEARCH_QUERY, END_SEARCH_QUERY)
|
|
|
|
|
if not queries:
|
|
|
|
|
if ii > 0:
|
|
|
|
|
break
|
|
|
|
|
queries = [question]
|
|
|
|
|
|
|
|
|
|
for search_query in queries:
|
|
|
|
|
logging.info(f"[THINK]Query: {ii}. {search_query}")
|
|
|
|
|
msg_hisotry.append({"role": "assistant", "content": search_query})
|
|
|
|
|
think += f"\n\n> {ii+1}. {search_query}\n\n"
|
|
|
|
|
yield {"answer": think + "</think>", "reference": {}, "audio_binary": None}
|
|
|
|
|
|
|
|
|
|
summary_think = ""
|
|
|
|
|
# The search query has been searched in previous steps.
|
|
|
|
|
if search_query in executed_search_queries:
|
|
|
|
|
summary_think = f"\n{BEGIN_SEARCH_RESULT}\nYou have searched this query. Please refer to previous results.\n{END_SEARCH_RESULT}\n"
|
|
|
|
|
yield {"answer": think + summary_think + "</think>", "reference": {}, "audio_binary": None}
|
|
|
|
|
all_reasoning_steps.append(summary_think)
|
|
|
|
|
msg_hisotry.append({"role": "user", "content": summary_think})
|
|
|
|
|
think += summary_think
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
truncated_prev_reasoning = ""
|
|
|
|
|
for i, step in enumerate(all_reasoning_steps):
|
|
|
|
|
truncated_prev_reasoning += f"Step {i + 1}: {step}\n\n"
|
|
|
|
|
|
|
|
|
|
prev_steps = truncated_prev_reasoning.split('\n\n')
|
|
|
|
|
if len(prev_steps) <= 5:
|
|
|
|
|
truncated_prev_reasoning = '\n\n'.join(prev_steps)
|
|
|
|
|
else:
|
|
|
|
|
truncated_prev_reasoning = ''
|
|
|
|
|
for i, step in enumerate(prev_steps):
|
|
|
|
|
if i == 0 or i >= len(prev_steps) - 4 or BEGIN_SEARCH_QUERY in step or BEGIN_SEARCH_RESULT in step:
|
|
|
|
|
truncated_prev_reasoning += step + '\n\n'
|
|
|
|
|
else:
|
|
|
|
|
if truncated_prev_reasoning[-len('\n\n...\n\n'):] != '\n\n...\n\n':
|
|
|
|
|
truncated_prev_reasoning += '...\n\n'
|
|
|
|
|
truncated_prev_reasoning = truncated_prev_reasoning.strip('\n')
|
|
|
|
|
|
|
|
|
|
# Retrieval procedure:
|
|
|
|
|
# 1. KB search
|
|
|
|
|
# 2. Web search (optional)
|
|
|
|
|
# 3. KG search (optional)
|
|
|
|
|
kbinfos = settings.retrievaler.retrieval(search_query, embd_mdl, tenant_ids, kb_ids, 1, top_n,
|
|
|
|
|
similarity_threshold,
|
|
|
|
|
vector_similarity_weight
|
|
|
|
|
)
|
|
|
|
|
if prompt_config.get("tavily_api_key", "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"):
|
|
|
|
|
tav = Tavily(prompt_config["tavily_api_key"])
|
|
|
|
|
tav_res = tav.retrieve_chunks(" ".join(search_query))
|
|
|
|
|
kbinfos["chunks"].extend(tav_res["chunks"])
|
|
|
|
|
kbinfos["doc_aggs"].extend(tav_res["doc_aggs"])
|
|
|
|
|
if prompt_config.get("use_kg"):
|
|
|
|
|
ck = settings.kg_retrievaler.retrieval(search_query,
|
|
|
|
|
tenant_ids,
|
|
|
|
|
kb_ids,
|
|
|
|
|
embd_mdl,
|
|
|
|
|
chat_mdl)
|
|
|
|
|
if ck["content_with_weight"]:
|
|
|
|
|
kbinfos["chunks"].insert(0, ck)
|
|
|
|
|
|
|
|
|
|
# Merge chunk info for citations
|
|
|
|
|
if not chunk_info["chunks"]:
|
|
|
|
|
for k in chunk_info.keys():
|
|
|
|
|
chunk_info[k] = kbinfos[k]
|
|
|
|
|
else:
|
|
|
|
|
cids = [c["chunk_id"] for c in chunk_info["chunks"]]
|
|
|
|
|
for c in kbinfos["chunks"]:
|
|
|
|
|
if c["chunk_id"] in cids:
|
|
|
|
|
continue
|
|
|
|
|
chunk_info["chunks"].append(c)
|
|
|
|
|
dids = [d["doc_id"] for d in chunk_info["doc_aggs"]]
|
|
|
|
|
for d in kbinfos["doc_aggs"]:
|
|
|
|
|
if d["doc_id"] in dids:
|
|
|
|
|
continue
|
|
|
|
|
chunk_info["doc_aggs"].append(d)
|
|
|
|
|
|
|
|
|
|
think += "\n\n"
|
|
|
|
|
for ans in chat_mdl.chat_streamly(
|
|
|
|
|
relevant_extraction_prompt.format(
|
|
|
|
|
prev_reasoning=truncated_prev_reasoning,
|
|
|
|
|
search_query=search_query,
|
|
|
|
|
document="\n".join(kb_prompt(kbinfos, 4096))
|
|
|
|
|
),
|
|
|
|
|
[{"role": "user",
|
|
|
|
|
"content": f'Now you should analyze each web page and find helpful information based on the current search query "{search_query}" and previous reasoning steps.'}],
|
|
|
|
|
{"temperature": 0.7}):
|
|
|
|
|
ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL)
|
|
|
|
|
if not ans:
|
|
|
|
|
continue
|
|
|
|
|
summary_think = ans
|
|
|
|
|
yield {"answer": think + rm_result_tags(summary_think) + "</think>", "reference": {}, "audio_binary": None}
|
|
|
|
|
|
|
|
|
|
all_reasoning_steps.append(summary_think)
|
|
|
|
|
msg_hisotry.append(
|
|
|
|
|
{"role": "user", "content": f"\n\n{BEGIN_SEARCH_RESULT}{summary_think}{END_SEARCH_RESULT}\n\n"})
|
|
|
|
|
think += rm_result_tags(summary_think)
|
|
|
|
|
logging.info(f"[THINK]Summary: {ii}. {summary_think}")
|
|
|
|
|
|
|
|
|
|
yield think + "</think>"
|
|
|
|
|
|