From d5618749c98bded847e94dd22dc1f60e964e815e Mon Sep 17 00:00:00 2001
From: H <43509927+guoyuhao2330@users.noreply.github.com>
Date: Fri, 12 Jul 2024 16:28:24 +0800
Subject: [PATCH] Fix baidusearch and duckduckgosearch (#1488)
### What problem does this PR solve?
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
---
graph/component/baidu.py | 9 +++++----
graph/component/duckduckgosearch.py | 11 ++++++-----
2 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/graph/component/baidu.py b/graph/component/baidu.py
index 69319a8fe..5a7779553 100644
--- a/graph/component/baidu.py
+++ b/graph/component/baidu.py
@@ -53,10 +53,11 @@ class Baidu(ComponentBase, ABC):
url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text)
title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text)
body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text)
- baidu_res = [re.sub('|', '', '' + title + ' ' + body) for url, title, body
- in zip(url_res, title_res, body_res)]
+ baidu_res = [{"content": re.sub('|', '', '' + title + ' ' + body)} for url, title, body in zip(url_res, title_res, body_res)]
del body_res, url_res, title_res
- print(baidu_res, ":::::::::::::::::::::::::::::::::")
- return Baidu.be_output(baidu_res)
+ df = pd.DataFrame(baidu_res)
+ print(df, ":::::::::::::::::::::::::::::::::")
+
+ return df
diff --git a/graph/component/duckduckgosearch.py b/graph/component/duckduckgosearch.py
index 8e287bd46..8f8f20604 100644
--- a/graph/component/duckduckgosearch.py
+++ b/graph/component/duckduckgosearch.py
@@ -44,18 +44,19 @@ class DuckDuckGoSearch(ComponentBase, ABC):
ans = self.get_input()
ans = " - ".join(ans["content"]) if "content" in ans else ""
if not ans:
- return Baidu.be_output(self._param.no)
+ return DuckDuckGoSearch.be_output(self._param.no)
if self.channel == "text":
with DDGS() as ddgs:
# {'title': '', 'href': '', 'body': ''}
- duck_res = ['' + i["title"] + ' ' + i["body"] for i in
+ duck_res = [{"content": '' + i["title"] + ' ' + i["body"]} for i in
ddgs.text(ans, max_results=self._param.top_n)]
elif self.channel == "news":
with DDGS() as ddgs:
# {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''}
- duck_res = ['' + i["title"] + ' ' + i["body"] for i in
+ duck_res = [{"content": '' + i["title"] + ' ' + i["body"]} for i in
ddgs.news(ans, max_results=self._param.top_n)]
- print(duck_res, ":::::::::::::::::::::::::::::::::")
- return DuckDuckGoSearch.be_output(duck_res)
\ No newline at end of file
+ df = pd.DataFrame(duck_res)
+ print(df, ":::::::::::::::::::::::::::::::::")
+ return df