From d5618749c98bded847e94dd22dc1f60e964e815e Mon Sep 17 00:00:00 2001 From: H <43509927+guoyuhao2330@users.noreply.github.com> Date: Fri, 12 Jul 2024 16:28:24 +0800 Subject: [PATCH] Fix baidusearch and duckduckgosearch (#1488) ### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- graph/component/baidu.py | 9 +++++---- graph/component/duckduckgosearch.py | 11 ++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/graph/component/baidu.py b/graph/component/baidu.py index 69319a8fe..5a7779553 100644 --- a/graph/component/baidu.py +++ b/graph/component/baidu.py @@ -53,10 +53,11 @@ class Baidu(ComponentBase, ABC): url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text) title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text) body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text) - baidu_res = [re.sub('|', '', '' + title + ' ' + body) for url, title, body - in zip(url_res, title_res, body_res)] + baidu_res = [{"content": re.sub('|', '', '' + title + ' ' + body)} for url, title, body in zip(url_res, title_res, body_res)] del body_res, url_res, title_res - print(baidu_res, ":::::::::::::::::::::::::::::::::") - return Baidu.be_output(baidu_res) + df = pd.DataFrame(baidu_res) + print(df, ":::::::::::::::::::::::::::::::::") + + return df diff --git a/graph/component/duckduckgosearch.py b/graph/component/duckduckgosearch.py index 8e287bd46..8f8f20604 100644 --- a/graph/component/duckduckgosearch.py +++ b/graph/component/duckduckgosearch.py @@ -44,18 +44,19 @@ class DuckDuckGoSearch(ComponentBase, ABC): ans = self.get_input() ans = " - ".join(ans["content"]) if "content" in ans else "" if not ans: - return Baidu.be_output(self._param.no) + return DuckDuckGoSearch.be_output(self._param.no) if self.channel == "text": with DDGS() as ddgs: # {'title': '', 'href': '', 'body': ''} - duck_res = ['' + i["title"] + ' ' + i["body"] for i in + duck_res = [{"content": '' + i["title"] + ' ' + i["body"]} for i in ddgs.text(ans, max_results=self._param.top_n)] elif self.channel == "news": with DDGS() as ddgs: # {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''} - duck_res = ['' + i["title"] + ' ' + i["body"] for i in + duck_res = [{"content": '' + i["title"] + ' ' + i["body"]} for i in ddgs.news(ans, max_results=self._param.top_n)] - print(duck_res, ":::::::::::::::::::::::::::::::::") - return DuckDuckGoSearch.be_output(duck_res) \ No newline at end of file + df = pd.DataFrame(duck_res) + print(df, ":::::::::::::::::::::::::::::::::") + return df