mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-24 23:46:52 +08:00
Feat: add splitter (#10161)
### What problem does this PR solve? ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Lynn <lynn_inf@hotmail.com> Co-authored-by: chanx <1243304602@qq.com> Co-authored-by: balibabu <cike8899@users.noreply.github.com> Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com> Co-authored-by: huangzl <huangzl@shinemo.com> Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> Co-authored-by: Wilmer <33392318@qq.com> Co-authored-by: Adrian Weidig <adrianweidig@gmx.net> Co-authored-by: Zhichang Yu <yuzhichang@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Yongteng Lei <yongtengrey@outlook.com> Co-authored-by: Liu An <asiro@qq.com> Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com> Co-authored-by: BadwomanCraZY <511528396@qq.com> Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com> Co-authored-by: Russell Valentine <russ@coldstonelabs.org> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Billy Bao <newyorkupperbay@gmail.com> Co-authored-by: Zhedong Cen <cenzhedong2@126.com> Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com> Co-authored-by: TensorNull <tensor.null@gmail.com>
This commit is contained in:
6
web/src/assets/svg/llm/cometapi.svg
Normal file
6
web/src/assets/svg/llm/cometapi.svg
Normal file
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 96 KiB |
8
web/src/assets/svg/llm/token-pony.svg
Normal file
8
web/src/assets/svg/llm/token-pony.svg
Normal file
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 16 KiB |
@ -139,7 +139,7 @@ function EmbedDialog({
|
||||
</form>
|
||||
</Form>
|
||||
<div>
|
||||
<span>Embed code</span>
|
||||
<span>{t('embedCode', { keyPrefix: 'search' })}</span>
|
||||
<HightLightMarkdown>{text}</HightLightMarkdown>
|
||||
</div>
|
||||
<div className=" font-medium mt-4 mb-1">
|
||||
|
||||
@ -54,7 +54,9 @@ export enum LLMFactory {
|
||||
DeepInfra = 'DeepInfra',
|
||||
Grok = 'Grok',
|
||||
XAI = 'xAI',
|
||||
TokenPony = 'TokenPony',
|
||||
Meituan = 'Meituan',
|
||||
CometAPI = 'CometAPI',
|
||||
}
|
||||
|
||||
// Please lowercase the file name
|
||||
@ -114,5 +116,7 @@ export const IconMap = {
|
||||
[LLMFactory.DeepInfra]: 'deepinfra',
|
||||
[LLMFactory.Grok]: 'grok',
|
||||
[LLMFactory.XAI]: 'xai',
|
||||
[LLMFactory.TokenPony]: 'token-pony',
|
||||
[LLMFactory.Meituan]: 'longcat',
|
||||
[LLMFactory.CometAPI]: 'cometapi',
|
||||
};
|
||||
|
||||
@ -136,6 +136,7 @@ export const useSelectLlmOptionsByModelType = () => {
|
||||
};
|
||||
};
|
||||
|
||||
// Merge different types of models from the same manufacturer under one manufacturer
|
||||
export const useComposeLlmOptionsByModelTypes = (
|
||||
modelTypes: LlmModelType[],
|
||||
) => {
|
||||
@ -155,7 +156,12 @@ export const useComposeLlmOptionsByModelTypes = (
|
||||
options.forEach((x) => {
|
||||
const item = pre.find((y) => y.label === x.label);
|
||||
if (item) {
|
||||
item.options.push(...x.options);
|
||||
x.options.forEach((y) => {
|
||||
// A model that is both an image2text and speech2text model
|
||||
if (!item.options.some((z) => z.value === y.value)) {
|
||||
item.options.push(y);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
pre.push(x);
|
||||
}
|
||||
|
||||
@ -155,7 +155,7 @@ export default {
|
||||
similarityThreshold: '相似度阈值',
|
||||
similarityThresholdTip:
|
||||
'我们使用混合相似度得分来评估两行文本之间的距离。 它是加权关键词相似度和向量余弦相似度。 如果查询和块之间的相似度小于此阈值,则该块将被过滤掉。默认设置为 0.2,也就是说文本块的混合相似度得分至少 20 才会被召回。',
|
||||
vectorSimilarityWeight: '相似度相似度权重',
|
||||
vectorSimilarityWeight: '向量相似度权重',
|
||||
vectorSimilarityWeightTip:
|
||||
'我们使用混合相似性评分来评估两行文本之间的距离。它是加权关键字相似性和矢量余弦相似性或rerank得分(0〜1)。两个权重的总和为1.0。',
|
||||
keywordSimilarityWeight: '关键词相似度权重',
|
||||
@ -633,6 +633,8 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
},
|
||||
cancel: '取消',
|
||||
chatSetting: '聊天设置',
|
||||
avatarHidden: '隐藏头像',
|
||||
locale: '地区',
|
||||
},
|
||||
setting: {
|
||||
profile: '概要',
|
||||
|
||||
@ -62,7 +62,7 @@ function AgentChatBox() {
|
||||
|
||||
return (
|
||||
<>
|
||||
<section className="flex flex-1 flex-col px-5 h-[90vh]">
|
||||
<section className="flex flex-1 flex-col px-5 min-h-0 pb-4">
|
||||
<div className="flex-1 overflow-auto" ref={messageContainerRef}>
|
||||
<div>
|
||||
{/* <Spin spinning={sendLoading}> */}
|
||||
|
||||
@ -9,7 +9,7 @@ export function ChatSheet({ hideModal }: IModalProps<any>) {
|
||||
return (
|
||||
<Sheet open modal={false} onOpenChange={hideModal}>
|
||||
<SheetContent
|
||||
className={cn('top-20 p-0')}
|
||||
className={cn('top-20 bottom-0 p-0 flex flex-col h-auto')}
|
||||
onInteractOutside={(e) => e.preventDefault()}
|
||||
>
|
||||
<SheetTitle className="hidden"></SheetTitle>
|
||||
|
||||
@ -145,7 +145,7 @@ function AgentForm({ node }: INextOperatorForm) {
|
||||
<PromptEditor
|
||||
{...field}
|
||||
placeholder={t('flow.messagePlaceholder')}
|
||||
showToolbar={false}
|
||||
showToolbar={true}
|
||||
extraOptions={extraOptions}
|
||||
></PromptEditor>
|
||||
</FormControl>
|
||||
@ -166,7 +166,7 @@ function AgentForm({ node }: INextOperatorForm) {
|
||||
<section>
|
||||
<PromptEditor
|
||||
{...field}
|
||||
showToolbar={false}
|
||||
showToolbar={true}
|
||||
></PromptEditor>
|
||||
</section>
|
||||
</FormControl>
|
||||
|
||||
@ -2133,7 +2133,7 @@ export const QWeatherTimePeriodOptions = [
|
||||
'30d',
|
||||
];
|
||||
|
||||
export const ExeSQLOptions = ['mysql', 'postgresql', 'mariadb', 'mssql'].map(
|
||||
export const ExeSQLOptions = ['mysql', 'postgres', 'mariadb', 'mssql'].map(
|
||||
(x) => ({
|
||||
label: upperFirst(x),
|
||||
value: x,
|
||||
|
||||
@ -2133,7 +2133,7 @@ export const QWeatherTimePeriodOptions = [
|
||||
'30d',
|
||||
];
|
||||
|
||||
export const ExeSQLOptions = ['mysql', 'postgresql', 'mariadb', 'mssql'].map(
|
||||
export const ExeSQLOptions = ['mysql', 'postgres', 'mariadb', 'mssql'].map(
|
||||
(x) => ({
|
||||
label: upperFirst(x),
|
||||
value: x,
|
||||
|
||||
@ -9,13 +9,7 @@ import { cn, formatBytes } from '@/lib/utils';
|
||||
import { Routes } from '@/routes';
|
||||
import { formatPureDate } from '@/utils/date';
|
||||
import { isEmpty } from 'lodash';
|
||||
import {
|
||||
Banknote,
|
||||
Database,
|
||||
DatabaseZap,
|
||||
FileSearch2,
|
||||
GitGraph,
|
||||
} from 'lucide-react';
|
||||
import { Banknote, Database, FileSearch2, GitGraph } from 'lucide-react';
|
||||
import { useMemo } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useHandleMenuClick } from './hooks';
|
||||
@ -34,11 +28,11 @@ export function SideBar({ refreshCount }: PropType) {
|
||||
|
||||
const items = useMemo(() => {
|
||||
const list = [
|
||||
{
|
||||
icon: DatabaseZap,
|
||||
label: t(`knowledgeDetails.overview`),
|
||||
key: Routes.DataSetOverview,
|
||||
},
|
||||
// {
|
||||
// icon: DatabaseZap,
|
||||
// label: t(`knowledgeDetails.overview`),
|
||||
// key: Routes.DataSetOverview,
|
||||
// },
|
||||
{
|
||||
icon: Database,
|
||||
label: t(`knowledgeDetails.dataset`),
|
||||
|
||||
@ -19,9 +19,10 @@ import { Input } from '@/components/ui/input';
|
||||
import { useNavigatePage } from '@/hooks/logic-hooks/navigate-hooks';
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { zodResolver } from '@hookform/resolvers/zod';
|
||||
import { useForm, useWatch } from 'react-hook-form';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { z } from 'zod';
|
||||
|
||||
import {
|
||||
ChunkMethodItem,
|
||||
EmbeddingModelItem,
|
||||
@ -89,6 +90,7 @@ export function InputForm({ onOk }: IModalProps<any>) {
|
||||
console.log('submit', data);
|
||||
onOk?.(data);
|
||||
}
|
||||
|
||||
const parseType = useWatch({
|
||||
control: form.control,
|
||||
name: 'parseType',
|
||||
@ -121,6 +123,7 @@ export function InputForm({ onOk }: IModalProps<any>) {
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
|
||||
<EmbeddingModelItem line={2} isEdit={false} />
|
||||
<ParseTypeItem />
|
||||
{parseType === 1 && (
|
||||
|
||||
123
web/src/pages/datasets/dataset-dataflow-creating-dialog.tsx
Normal file
123
web/src/pages/datasets/dataset-dataflow-creating-dialog.tsx
Normal file
@ -0,0 +1,123 @@
|
||||
import { ButtonLoading } from '@/components/ui/button';
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogFooter,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
} from '@/components/ui/dialog';
|
||||
import {
|
||||
Form,
|
||||
FormControl,
|
||||
FormField,
|
||||
FormItem,
|
||||
FormLabel,
|
||||
FormMessage,
|
||||
} from '@/components/ui/form';
|
||||
import { Input } from '@/components/ui/input';
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { zodResolver } from '@hookform/resolvers/zod';
|
||||
import { useForm, useWatch } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { z } from 'zod';
|
||||
import {
|
||||
DataExtractKnowledgeItem,
|
||||
DataFlowItem,
|
||||
EmbeddingModelItem,
|
||||
ParseTypeItem,
|
||||
TeamItem,
|
||||
} from '../dataset/dataset-setting/configuration/common-item';
|
||||
|
||||
const FormId = 'dataset-creating-form';
|
||||
|
||||
export function InputForm({ onOk }: IModalProps<any>) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const FormSchema = z.object({
|
||||
name: z
|
||||
.string()
|
||||
.min(1, {
|
||||
message: t('knowledgeList.namePlaceholder'),
|
||||
})
|
||||
.trim(),
|
||||
parseType: z.number().optional(),
|
||||
});
|
||||
|
||||
const form = useForm<z.infer<typeof FormSchema>>({
|
||||
resolver: zodResolver(FormSchema),
|
||||
defaultValues: {
|
||||
name: '',
|
||||
parseType: 1,
|
||||
},
|
||||
});
|
||||
|
||||
function onSubmit(data: z.infer<typeof FormSchema>) {
|
||||
onOk?.(data.name);
|
||||
}
|
||||
const parseType = useWatch({
|
||||
control: form.control,
|
||||
name: 'parseType',
|
||||
});
|
||||
return (
|
||||
<Form {...form}>
|
||||
<form
|
||||
onSubmit={form.handleSubmit(onSubmit)}
|
||||
className="space-y-6"
|
||||
id={FormId}
|
||||
>
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="name"
|
||||
render={({ field }) => (
|
||||
<FormItem>
|
||||
<FormLabel>
|
||||
<span className="text-destructive mr-1"> *</span>
|
||||
{t('knowledgeList.name')}
|
||||
</FormLabel>
|
||||
<FormControl>
|
||||
<Input
|
||||
placeholder={t('knowledgeList.namePlaceholder')}
|
||||
{...field}
|
||||
/>
|
||||
</FormControl>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
<EmbeddingModelItem line={2} />
|
||||
<ParseTypeItem />
|
||||
{parseType === 2 && (
|
||||
<>
|
||||
<DataFlowItem />
|
||||
<DataExtractKnowledgeItem />
|
||||
<TeamItem />
|
||||
</>
|
||||
)}
|
||||
</form>
|
||||
</Form>
|
||||
);
|
||||
}
|
||||
|
||||
export function DatasetCreatingDialog({
|
||||
hideModal,
|
||||
onOk,
|
||||
loading,
|
||||
}: IModalProps<any>) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<Dialog open onOpenChange={hideModal}>
|
||||
<DialogContent className="sm:max-w-[425px]">
|
||||
<DialogHeader>
|
||||
<DialogTitle>{t('knowledgeList.createKnowledgeBase')}</DialogTitle>
|
||||
</DialogHeader>
|
||||
<InputForm onOk={onOk}></InputForm>
|
||||
<DialogFooter>
|
||||
<ButtonLoading type="submit" form={FormId} loading={loading}>
|
||||
{t('common.save')}
|
||||
</ButtonLoading>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
}
|
||||
@ -2911,7 +2911,7 @@ export const QWeatherTimePeriodOptions = [
|
||||
'30d',
|
||||
];
|
||||
|
||||
export const ExeSQLOptions = ['mysql', 'postgresql', 'mariadb', 'mssql'].map(
|
||||
export const ExeSQLOptions = ['mysql', 'postgres', 'mariadb', 'mssql'].map(
|
||||
(x) => ({
|
||||
label: upperFirst(x),
|
||||
value: x,
|
||||
|
||||
@ -37,6 +37,7 @@ const llmFactoryToUrlMap = {
|
||||
'https://huggingface.co/docs/text-embeddings-inference/quick_tour',
|
||||
[LLMFactory.GPUStack]: 'https://docs.gpustack.ai/latest/quickstart',
|
||||
[LLMFactory.VLLM]: 'https://docs.vllm.ai/en/latest/',
|
||||
[LLMFactory.TokenPony]: 'https://docs.tokenpony.cn/#/',
|
||||
};
|
||||
type LlmFactory = keyof typeof llmFactoryToUrlMap;
|
||||
|
||||
|
||||
@ -1,7 +1,10 @@
|
||||
import { IModalManagerChildrenProps } from '@/components/modal-manager';
|
||||
import { LlmModelType } from '@/constants/knowledge';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { ISystemModelSettingSavingParams } from '@/hooks/llm-hooks';
|
||||
import {
|
||||
ISystemModelSettingSavingParams,
|
||||
useComposeLlmOptionsByModelTypes,
|
||||
} from '@/hooks/llm-hooks';
|
||||
import { Form, Modal, Select } from 'antd';
|
||||
import { useEffect } from 'react';
|
||||
import { useFetchSystemModelSettingOnMount } from '../hooks';
|
||||
@ -43,6 +46,11 @@ const SystemModelSettingModal = ({
|
||||
|
||||
const onFormLayoutChange = () => {};
|
||||
|
||||
const modelOptions = useComposeLlmOptionsByModelTypes([
|
||||
LlmModelType.Chat,
|
||||
LlmModelType.Image2text,
|
||||
]);
|
||||
|
||||
return (
|
||||
<Modal
|
||||
title={t('systemModelSettings')}
|
||||
@ -58,14 +66,7 @@ const SystemModelSettingModal = ({
|
||||
name="llm_id"
|
||||
tooltip={t('chatModelTip')}
|
||||
>
|
||||
<Select
|
||||
options={[
|
||||
...allOptions[LlmModelType.Chat],
|
||||
...allOptions[LlmModelType.Image2text],
|
||||
]}
|
||||
allowClear
|
||||
showSearch
|
||||
/>
|
||||
<Select options={modelOptions} allowClear showSearch />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
label={t('embeddingModel')}
|
||||
|
||||
@ -44,6 +44,7 @@ const orderFactoryList = [
|
||||
LLMFactory.Ollama,
|
||||
LLMFactory.Xinference,
|
||||
LLMFactory.Ai302,
|
||||
LLMFactory.CometAPI,
|
||||
];
|
||||
|
||||
export const sortLLmFactoryListBySpecifiedOrder = (list: IFactory[]) => {
|
||||
|
||||
Reference in New Issue
Block a user