mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix web search and template max tokens (#1564)
### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -188,6 +188,7 @@ class Canvas(ABC):
|
|||||||
def prepare2run(cpns):
|
def prepare2run(cpns):
|
||||||
nonlocal ran, ans
|
nonlocal ran, ans
|
||||||
for c in cpns:
|
for c in cpns:
|
||||||
|
if self.path[-1] and c == self.path[-1][-1]: continue
|
||||||
cpn = self.components[c]["obj"]
|
cpn = self.components[c]["obj"]
|
||||||
if cpn.component_name == "Answer":
|
if cpn.component_name == "Answer":
|
||||||
self.answer.append(c)
|
self.answer.append(c)
|
||||||
|
|||||||
@ -43,7 +43,7 @@ class Baidu(ComponentBase, ABC):
|
|||||||
ans = self.get_input()
|
ans = self.get_input()
|
||||||
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
||||||
if not ans:
|
if not ans:
|
||||||
return Baidu.be_output(self._param.no)
|
return Baidu.be_output("")
|
||||||
|
|
||||||
url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n)
|
url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n)
|
||||||
headers = {
|
headers = {
|
||||||
@ -56,8 +56,10 @@ class Baidu(ComponentBase, ABC):
|
|||||||
baidu_res = [{"content": re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a> ' + body)} for url, title, body in zip(url_res, title_res, body_res)]
|
baidu_res = [{"content": re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a> ' + body)} for url, title, body in zip(url_res, title_res, body_res)]
|
||||||
del body_res, url_res, title_res
|
del body_res, url_res, title_res
|
||||||
|
|
||||||
df = pd.DataFrame(baidu_res)
|
if not baidu_res:
|
||||||
print(df, ":::::::::::::::::::::::::::::::::")
|
return Baidu.be_output("")
|
||||||
|
|
||||||
|
df = pd.DataFrame(baidu_res)
|
||||||
|
if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|||||||
@ -44,7 +44,7 @@ class DuckDuckGo(ComponentBase, ABC):
|
|||||||
ans = self.get_input()
|
ans = self.get_input()
|
||||||
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
||||||
if not ans:
|
if not ans:
|
||||||
return DuckDuckGo.be_output(self._param.no)
|
return DuckDuckGo.be_output("")
|
||||||
|
|
||||||
if self._param.channel == "text":
|
if self._param.channel == "text":
|
||||||
with DDGS() as ddgs:
|
with DDGS() as ddgs:
|
||||||
@ -57,6 +57,9 @@ class DuckDuckGo(ComponentBase, ABC):
|
|||||||
duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a> ' + i["body"]} for i in
|
duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a> ' + i["body"]} for i in
|
||||||
ddgs.news(ans, max_results=self._param.top_n)]
|
ddgs.news(ans, max_results=self._param.top_n)]
|
||||||
|
|
||||||
|
if not duck_res:
|
||||||
|
return DuckDuckGo.be_output("")
|
||||||
|
|
||||||
df = pd.DataFrame(duck_res)
|
df = pd.DataFrame(duck_res)
|
||||||
print(df, ":::::::::::::::::::::::::::::::::")
|
if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
|
||||||
return df
|
return df
|
||||||
|
|||||||
@ -72,14 +72,14 @@ class Generate(ComponentBase):
|
|||||||
prompt = self._param.prompt
|
prompt = self._param.prompt
|
||||||
|
|
||||||
retrieval_res = self.get_input()
|
retrieval_res = self.get_input()
|
||||||
input = "\n- ".join(retrieval_res["content"]) if "content" in retrieval_res else ""
|
input = (" - " + "\n - ".join(retrieval_res["content"])) if "content" in retrieval_res else ""
|
||||||
for para in self._param.parameters:
|
for para in self._param.parameters:
|
||||||
cpn = self._canvas.get_component(para["component_id"])["obj"]
|
cpn = self._canvas.get_component(para["component_id"])["obj"]
|
||||||
_, out = cpn.output(allow_partial=False)
|
_, out = cpn.output(allow_partial=False)
|
||||||
if "content" not in out.columns:
|
if "content" not in out.columns:
|
||||||
kwargs[para["key"]] = "Nothing"
|
kwargs[para["key"]] = "Nothing"
|
||||||
else:
|
else:
|
||||||
kwargs[para["key"]] = "\n - ".join(out["content"])
|
kwargs[para["key"]] = " - " + "\n - ".join(out["content"])
|
||||||
|
|
||||||
kwargs["input"] = input
|
kwargs["input"] = input
|
||||||
for n, v in kwargs.items():
|
for n, v in kwargs.items():
|
||||||
|
|||||||
@ -30,7 +30,7 @@ class WikipediaParam(ComponentParamBase):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.top_n = 10
|
self.top_n = 10
|
||||||
self.language = 'en'
|
self.language = "en"
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
self.check_positive_integer(self.top_n, "Top N")
|
self.check_positive_integer(self.top_n, "Top N")
|
||||||
@ -49,7 +49,7 @@ class Wikipedia(ComponentBase, ABC):
|
|||||||
ans = self.get_input()
|
ans = self.get_input()
|
||||||
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
||||||
if not ans:
|
if not ans:
|
||||||
return Wikipedia.be_output(self._param.no)
|
return Wikipedia.be_output("")
|
||||||
|
|
||||||
wiki_res = []
|
wiki_res = []
|
||||||
wikipedia.set_lang(self._param.language)
|
wikipedia.set_lang(self._param.language)
|
||||||
@ -63,7 +63,7 @@ class Wikipedia(ComponentBase, ABC):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
if not wiki_res:
|
if not wiki_res:
|
||||||
return Wikipedia.be_output(self._param.no)
|
return Wikipedia.be_output("")
|
||||||
|
|
||||||
df = pd.DataFrame(wiki_res)
|
df = pd.DataFrame(wiki_res)
|
||||||
if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
|
if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
|
||||||
|
|||||||
@ -59,7 +59,6 @@
|
|||||||
"cite": true,
|
"cite": true,
|
||||||
"frequency_penalty": 0.7,
|
"frequency_penalty": 0.7,
|
||||||
"llm_id": "deepseek-chat",
|
"llm_id": "deepseek-chat",
|
||||||
"max_tokens": 2048,
|
|
||||||
"message_history_window_size": 12,
|
"message_history_window_size": 12,
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
@ -108,7 +107,7 @@
|
|||||||
"frequencyPenaltyEnabled": true,
|
"frequencyPenaltyEnabled": true,
|
||||||
"frequency_penalty": 0.7,
|
"frequency_penalty": 0.7,
|
||||||
"llm_id": "deepseek-chat",
|
"llm_id": "deepseek-chat",
|
||||||
"maxTokensEnabled": false,
|
"maxTokensEnabled": true,
|
||||||
"max_tokens": 256,
|
"max_tokens": 256,
|
||||||
"parameter": "Precise",
|
"parameter": "Precise",
|
||||||
"presencePenaltyEnabled": true,
|
"presencePenaltyEnabled": true,
|
||||||
@ -366,7 +365,7 @@
|
|||||||
"frequencyPenaltyEnabled": true,
|
"frequencyPenaltyEnabled": true,
|
||||||
"frequency_penalty": 0.7,
|
"frequency_penalty": 0.7,
|
||||||
"llm_id": "deepseek-chat",
|
"llm_id": "deepseek-chat",
|
||||||
"maxTokensEnabled": false,
|
"maxTokensEnabled": true,
|
||||||
"max_tokens": 256,
|
"max_tokens": 256,
|
||||||
"parameter": "Precise",
|
"parameter": "Precise",
|
||||||
"presencePenaltyEnabled": true,
|
"presencePenaltyEnabled": true,
|
||||||
@ -510,8 +509,6 @@
|
|||||||
"frequencyPenaltyEnabled": true,
|
"frequencyPenaltyEnabled": true,
|
||||||
"frequency_penalty": 0.7,
|
"frequency_penalty": 0.7,
|
||||||
"llm_id": "deepseek-chat",
|
"llm_id": "deepseek-chat",
|
||||||
"maxTokensEnabled": true,
|
|
||||||
"max_tokens": 2048,
|
|
||||||
"message_history_window_size": 12,
|
"message_history_window_size": 12,
|
||||||
"parameter": "Precise",
|
"parameter": "Precise",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
@ -538,7 +535,7 @@
|
|||||||
],
|
],
|
||||||
"presencePenaltyEnabled": true,
|
"presencePenaltyEnabled": true,
|
||||||
"presence_penalty": 0.4,
|
"presence_penalty": 0.4,
|
||||||
"prompt": "Role: You are an intelligent assistant. \nTask: Chat with user. Answer the question based on the provided content from: Knowledge Base, Wikipedia, Duckduckgo, Baidu.\nRequirements:\n - Answer should be in markdown format.\n - Summarize and label the sources of the cited content separately: (Knowledge Base, Wikipedia, Duckduckgo, Baidu).\n - Attach URL links to the content which is quoted from Wikipedia, DuckDuckGo or Baidu.\n - Do not make thing up when there's no relevant information to user's question. \n\n## Knowledge base content\n {kb_input}\n\n\n## Wikipedia content\n{wikipedia}\n\n\n## Duckduckgo content\n{duckduckgo}\n\n\n## Baidu content\n{baidu}",
|
"prompt": "Role: You are an intelligent assistant. \nTask: Chat with user. Answer the question based on the provided content from: Knowledge Base, Wikipedia, Duckduckgo, Baidu.\nRequirements:\n - Answer should be in markdown format.\n - Answer should include all sources(Knowledge Base, Wikipedia, Duckduckgo, Baidu) as long as they are relevant, and label the sources of the cited content separately.\n - Attach URL links to the content which is quoted from Wikipedia, DuckDuckGo or Baidu.\n - Do not make thing up when there's no relevant information to user's question. \n\n## Knowledge base content\n {kb_input}\n\n\n## Wikipedia content\n{wikipedia}\n\n\n## Duckduckgo content\n{duckduckgo}\n\n\n## Baidu content\n{baidu}",
|
||||||
"temperature": 0.1,
|
"temperature": 0.1,
|
||||||
"temperatureEnabled": true,
|
"temperatureEnabled": true,
|
||||||
"topPEnabled": true,
|
"topPEnabled": true,
|
||||||
|
|||||||
Reference in New Issue
Block a user