mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Add agent component for web crawler (#2878)
### What problem does this PR solve? Add agent component for web crawler ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
1
web/src/assets/svg/crawler.svg
Normal file
1
web/src/assets/svg/crawler.svg
Normal file
@ -0,0 +1 @@
|
||||
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg class="icon" width="200px" height="200.00px" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg"><path d="M 777.121 313.158 a 265.121 265.121 0 0 0 -530.243 0 Z m 165.7 265.121 H 843.402 V 425.836 l 84.176 -84.176 a 33.1402 33.1402 0 1 0 -47.0591 -47.0591 l -66.2803 66.2803 h -596.524 l -66.2803 -66.2803 a 33.1402 33.1402 0 1 0 -47.0591 47.0591 L 180.598 425.836 V 578.281 H 81.177 a 33.1402 33.1402 0 0 0 0 66.2803 H 180.598 v 33.1402 a 294.285 294.285 0 0 0 39.7682 145.817 l -96.1069 95.4441 a 33.1402 33.1402 0 0 0 47.0591 47.0591 l 88.8157 -88.1529 A 296.937 296.937 0 0 0 478.859 975.959 h 3.97682 V 401.974 h 66.2803 V 975.959 a 296.937 296.937 0 0 0 215.411 -98.0944 l 88.8157 88.8157 a 33.1402 33.1402 0 0 0 47.0591 -47.0591 l -93.4554 -93.4554 a 293.621 293.621 0 0 0 36.4542 -149.131 V 644.561 h 99.4209 a 33.1402 33.1402 0 0 0 0 -66.2803 Z" fill="#1B69FD" /></svg>
|
||||
|
After Width: | Height: | Size: 1.0 KiB |
@ -928,6 +928,16 @@ The above is the content you need to summarize.`,
|
||||
yahooFinance: 'YahooFinance',
|
||||
yahooFinanceDescription:
|
||||
'The component queries information about the company based on the provided ticker symbol.',
|
||||
crawler: 'Web Crawler',
|
||||
crawlerDescription:
|
||||
'This component can be used to crawl HTML source code from a specified URL.',
|
||||
proxy: 'Proxy',
|
||||
crawlerResultOptions: {
|
||||
html: 'Html',
|
||||
markdown: 'Markdown',
|
||||
content: 'Content',
|
||||
},
|
||||
extractType: 'extractType',
|
||||
info: 'Info',
|
||||
history: 'History',
|
||||
financials: 'Financials',
|
||||
|
||||
@ -877,6 +877,15 @@ export default {
|
||||
akShareDescription: '此組件可用於從東方財富網取得對應股票的新聞資訊。',
|
||||
yahooFinance: '雅虎財經',
|
||||
yahooFinanceDescription: '該組件根據提供的股票代碼查詢有關公司的資訊。',
|
||||
crawler: '網頁爬蟲',
|
||||
crawlerDescription: '該組件可用於從指定url爬取HTML源碼。',
|
||||
proxy: '代理',
|
||||
crawlerResultOptions: {
|
||||
html: 'Html',
|
||||
markdown: 'Markdown',
|
||||
content: '文本',
|
||||
},
|
||||
extractType: '提取類型',
|
||||
info: '訊息',
|
||||
history: '歷史',
|
||||
financials: '財務',
|
||||
|
||||
@ -897,6 +897,15 @@ export default {
|
||||
akShareDescription: '该组件可用于从东方财富网站获取相应股票的新闻信息。',
|
||||
yahooFinance: '雅虎财经',
|
||||
yahooFinanceDescription: '该组件根据提供的股票代码查询有关公司的信息。',
|
||||
crawler: '网页爬虫',
|
||||
crawlerDescription: '该组件可用于从指定url爬取html源码。',
|
||||
proxy: '代理',
|
||||
crawlerResultOptions: {
|
||||
html: 'Html',
|
||||
markdown: 'Markdown',
|
||||
content: '文本',
|
||||
},
|
||||
extractType: '提取类型',
|
||||
info: '信息',
|
||||
history: '历史',
|
||||
financials: '财务',
|
||||
|
||||
@ -4,6 +4,7 @@ import { ReactComponent as baiduFanyiIcon } from '@/assets/svg/baidu-fanyi.svg';
|
||||
import { ReactComponent as BaiduIcon } from '@/assets/svg/baidu.svg';
|
||||
import { ReactComponent as BingIcon } from '@/assets/svg/bing.svg';
|
||||
import { ReactComponent as ConcentratorIcon } from '@/assets/svg/concentrator.svg';
|
||||
import { ReactComponent as CrawlerIcon } from '@/assets/svg/crawler.svg';
|
||||
import { ReactComponent as DeepLIcon } from '@/assets/svg/deepl.svg';
|
||||
import { ReactComponent as DuckIcon } from '@/assets/svg/duck.svg';
|
||||
import { ReactComponent as ExeSqlIcon } from '@/assets/svg/exesql.svg';
|
||||
@ -73,6 +74,7 @@ export enum Operator {
|
||||
Concentrator = 'Concentrator',
|
||||
TuShare = 'TuShare',
|
||||
Note = 'Note',
|
||||
Crawler = 'Crawler',
|
||||
}
|
||||
|
||||
export const CommonOperatorList = Object.values(Operator).filter(
|
||||
@ -110,6 +112,7 @@ export const operatorIconMap = {
|
||||
[Operator.Concentrator]: ConcentratorIcon,
|
||||
[Operator.TuShare]: TuShareIcon,
|
||||
[Operator.Note]: NoteIcon,
|
||||
[Operator.Crawler]: CrawlerIcon,
|
||||
};
|
||||
|
||||
export const operatorMap: Record<
|
||||
@ -233,6 +236,9 @@ export const operatorMap: Record<
|
||||
},
|
||||
[Operator.TuShare]: { backgroundColor: '#f8cfa0' },
|
||||
[Operator.Note]: { backgroundColor: '#f8cfa0' },
|
||||
[Operator.Crawler]: {
|
||||
backgroundColor: '#dee0e2',
|
||||
},
|
||||
};
|
||||
|
||||
export const componentMenuList = [
|
||||
@ -323,6 +329,9 @@ export const componentMenuList = [
|
||||
{
|
||||
name: Operator.TuShare,
|
||||
},
|
||||
{
|
||||
name: Operator.Crawler,
|
||||
},
|
||||
];
|
||||
|
||||
export const initialRetrievalValues = {
|
||||
@ -572,6 +581,7 @@ export const RestrictedUpstreamMap = {
|
||||
[Operator.Jin10]: [Operator.Begin],
|
||||
[Operator.Concentrator]: [Operator.Begin],
|
||||
[Operator.TuShare]: [Operator.Begin],
|
||||
[Operator.Crawler]: [Operator.Begin],
|
||||
};
|
||||
|
||||
export const NodeMap = {
|
||||
@ -605,6 +615,7 @@ export const NodeMap = {
|
||||
[Operator.Jin10]: 'ragNode',
|
||||
[Operator.TuShare]: 'ragNode',
|
||||
[Operator.Note]: 'noteNode',
|
||||
[Operator.Crawler]: 'ragNode',
|
||||
};
|
||||
|
||||
export const LanguageOptions = [
|
||||
@ -2791,3 +2802,4 @@ export const TuShareSrcOptions = [
|
||||
'fenghuang',
|
||||
'jinrongjie',
|
||||
];
|
||||
export const CrawlerResultOptions = ['markdown', 'html', 'content'];
|
||||
|
||||
@ -12,6 +12,7 @@ import BaiduForm from '../form/baidu-form';
|
||||
import BeginForm from '../form/begin-form';
|
||||
import BingForm from '../form/bing-form';
|
||||
import CategorizeForm from '../form/categorize-form';
|
||||
import CrawlerForm from '../form/crawler-form';
|
||||
import DeepLForm from '../form/deepl-form';
|
||||
import DuckDuckGoForm from '../form/duckduckgo-form';
|
||||
import ExeSQLForm from '../form/exesql-form';
|
||||
@ -70,6 +71,7 @@ const FormMap = {
|
||||
[Operator.YahooFinance]: YahooFinanceForm,
|
||||
[Operator.Jin10]: Jin10Form,
|
||||
[Operator.TuShare]: TuShareForm,
|
||||
[Operator.Crawler]: CrawlerForm,
|
||||
};
|
||||
|
||||
const EmptyContent = () => <div>empty</div>;
|
||||
|
||||
37
web/src/pages/flow/form/crawler-form/index.tsx
Normal file
37
web/src/pages/flow/form/crawler-form/index.tsx
Normal file
@ -0,0 +1,37 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Form, Input, Select } from 'antd';
|
||||
import { useMemo } from 'react';
|
||||
import { CrawlerResultOptions } from '../../constant';
|
||||
import { IOperatorForm } from '../../interface';
|
||||
const CrawlerForm = ({ onValuesChange, form }: IOperatorForm) => {
|
||||
const { t } = useTranslate('flow');
|
||||
const crawlerResultOptions = useMemo(() => {
|
||||
return CrawlerResultOptions.map((x) => ({
|
||||
value: x,
|
||||
label: t(`crawlerResultOptions.${x}`),
|
||||
}));
|
||||
}, [t]);
|
||||
return (
|
||||
<Form
|
||||
name="basic"
|
||||
labelCol={{ span: 6 }}
|
||||
wrapperCol={{ span: 18 }}
|
||||
autoComplete="off"
|
||||
form={form}
|
||||
onValuesChange={onValuesChange}
|
||||
>
|
||||
<Form.Item label={t('proxy')} name={'proxy'}>
|
||||
<Input placeholder="like: http://127.0.0.1:8888"></Input>
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
label={t('extractType')}
|
||||
name={'extract_type'}
|
||||
initialValue="markdown"
|
||||
>
|
||||
<Select options={crawlerResultOptions}></Select>
|
||||
</Form.Item>
|
||||
</Form>
|
||||
);
|
||||
};
|
||||
|
||||
export default CrawlerForm;
|
||||
Reference in New Issue
Block a user