mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-30 23:26:36 +08:00
feat: add paddleocr parser (#12513)
### What problem does this PR solve? Add PaddleOCR as a new PDF parser. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
14
web/src/assets/svg/llm/paddleocr.svg
Normal file
14
web/src/assets/svg/llm/paddleocr.svg
Normal file
@ -0,0 +1,14 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg version="1.1" xmlns="http://www.w3.org/2000/svg" width="204" height="204">
|
||||
<path d="M0 0 C5.28 0 10.56 0 16 0 C16.15427797 5.78542375 16.22313666 9.66447202 14 15 C13.4176812 17.42810911 12.8778743 19.86670228 12.375 22.3125 C12.11460938 23.56675781 11.85421875 24.82101563 11.5859375 26.11328125 C11.39257812 27.06589844 11.19921875 28.01851562 11 29 C10.34 29 9.68 29 9 29 C9.04125 29.86625 9.0825 30.7325 9.125 31.625 C8.99380505 35.16726367 8.15585649 37.66841364 7 41 C6.65596692 43.33177975 6.32050798 45.66487041 6 48 C5.67 48.33 5.34 48.66 5 49 C3.55833967 54.24854466 2.68835557 59.60788136 2 65 C1.34 65 0.68 65 0 65 C-0.09796875 65.63164063 -0.1959375 66.26328125 -0.296875 66.9140625 C-1.22159497 72.44643887 -2.45538005 77.74845326 -4.0625 83.125 C-5.64813561 88.477257 -6.61641909 93.43807684 -7 99 C-7.66 99 -8.32 99 -9 99 C-8.95875 100.0725 -8.9175 101.145 -8.875 102.25 C-8.99814042 105.94421261 -9.40497 106.89631346 -11 110 C-11.52482966 111.79980854 -11.9996991 113.61456629 -12.4375 115.4375 C-12.69273438 116.49453125 -12.94796875 117.5515625 -13.2109375 118.640625 C-13.47132812 119.74921875 -13.73171875 120.8578125 -14 122 C-14.26039062 123.10859375 -14.52078125 124.2171875 -14.7890625 125.359375 C-16.23108798 131.56445435 -17.62923242 137.77882404 -19 144 C-19.66 144 -20.32 144 -21 144 C-21.12375 145.19625 -21.2475 146.3925 -21.375 147.625 C-21.77380462 151.48011134 -22.70431438 154.11294313 -24 158 C-29.28 158 -34.56 158 -40 158 C-39.50048992 151.75612399 -37.93599284 146.12864787 -36.16015625 140.1328125 C-34.83094643 135.52383644 -33.76462788 130.86061164 -32.6875 126.1875 C-30.51622401 116.78617956 -27.95798491 107.51855943 -25.23510742 98.26245117 C-24.94482666 97.25802979 -24.6545459 96.2536084 -24.35546875 95.21875 C-24.09403076 94.33123047 -23.83259277 93.44371094 -23.56323242 92.52929688 C-22.81886716 89.6184652 -22.81886716 89.6184652 -23 85 C-32.9 85 -42.8 85 -53 85 C-51.89473684 75.05263158 -51.89473684 75.05263158 -50 70 C-49.76023438 69.23042969 -49.52046875 68.46085937 -49.2734375 67.66796875 C-48.89058594 66.44013672 -48.89058594 66.44013672 -48.5 65.1875 C-48.2628125 64.41792969 -48.025625 63.64835937 -47.78125 62.85546875 C-47 61 -47 61 -45 60 C-42.67583704 59.91413191 -40.34916693 59.89288957 -38.0234375 59.90234375 C-36.99556625 59.90446617 -36.99556625 59.90446617 -35.94692993 59.90663147 C-33.75624239 59.91223325 -31.5656567 59.92478594 -29.375 59.9375 C-27.89062648 59.94251478 -26.40625135 59.94707772 -24.921875 59.95117188 C-21.28121685 59.96220323 -17.64061836 59.97946901 -14 60 C-13.94376465 59.35498535 -13.8875293 58.7099707 -13.82958984 58.04541016 C-12.96693744 48.72910992 -11.24508629 39.82715762 -7.74609375 31.125 C-6.4362897 27.39445861 -5.68000873 23.5319145 -4.828125 19.67578125 C-3.34357442 13.08633739 -1.65158 6.54936895 0 0 Z " fill="#2831DF" transform="translate(53,46)"/>
|
||||
<path d="M0 0 C5.28 0 10.56 0 16 0 C15.235399 10.70441398 12.26943536 20.9810415 9.08203125 31.1875 C7.02129712 37.82748353 5.22384225 44.51569215 3.5 51.25 C3.19739258 52.42224121 2.89478516 53.59448242 2.58300781 54.80224609 C0.71701016 62.07123696 -1.04512446 69.35696753 -2.71826172 76.67236328 C-3.85473071 81.57050729 -5.31273138 86.26962192 -7 91 C-8.59119364 96.03877987 -9.44914797 100.76690573 -10 106 C-10.66 106 -11.32 106 -12 106 C-12.03738281 106.70125 -12.07476562 107.4025 -12.11328125 108.125 C-12.48264852 113.45547666 -13.1249218 117.94818256 -15.18359375 122.87109375 C-16.93149684 127.42902286 -17.86250115 132.26131996 -19 137 C-19.34588869 138.38455735 -19.69363488 139.76865163 -20.04296875 141.15234375 C-20.222229 141.86261719 -20.40148926 142.57289063 -20.58618164 143.3046875 C-20.93785233 144.69671066 -21.28988648 146.08864207 -21.64233398 147.48046875 C-22.52090977 150.97902837 -23.29193239 154.45966195 -24 158 C-29.61 158 -35.22 158 -41 158 C-40.24791147 152.73538029 -39.34974669 147.7690517 -38.125 142.625 C-37.94718994 141.87186523 -37.76937988 141.11873047 -37.58618164 140.34277344 C-36.50974786 135.8706144 -35.25275445 131.54235526 -33.7109375 127.20703125 C-32.48712712 123.40783969 -31.6904287 119.50981261 -30.8203125 115.6171875 C-29.90622587 111.58650076 -28.93409458 107.5734341 -27.9375 103.5625 C-27.77515869 102.90048584 -27.61281738 102.23847168 -27.44555664 101.55639648 C-26.70403999 98.58317455 -25.89147112 95.67598058 -24.91796875 92.76953125 C-23.84587401 89.53499862 -23.40522292 86.37685768 -23 83 C-22.34 83 -21.68 83 -21 83 C-20.896875 81.741875 -20.79375 80.48375 -20.6875 79.1875 C-20.17690392 74.48853603 -19.00603931 70.13009242 -17.69921875 65.59765625 C-16.83798598 62.39810434 -16.36546873 59.2892186 -16 56 C-15.34 56 -14.68 56 -14 56 C-13.96624268 55.33282959 -13.93248535 54.66565918 -13.89770508 53.97827148 C-13.41579319 46.91496121 -11.9250941 40.75547542 -9.6875 34.0625 C-6.29781688 23.49159445 -3.92624433 12.9233607 -2 2 C-1.34 2 -0.68 2 0 2 C0 1.34 0 0.68 0 0 Z " fill="#2831DF" transform="translate(134,46)"/>
|
||||
<path d="M0 0 C2.43757413 -0.02698422 4.87484915 -0.04683099 7.3125 -0.0625 C8.00279297 -0.07087891 8.69308594 -0.07925781 9.40429688 -0.08789062 C12.5483025 -0.10307906 14.99420019 -0.00193327 18 1 C18 1.66 18 2.32 18 3 C18.66 3 19.32 3 20 3 C21.0064275 4.66279327 22.00585407 6.32983484 23 8 C23.99 8.66 24.98 9.32 26 10 C27.08301035 12.29598193 28.07796291 14.63477442 29 17 C29.350625 17.763125 29.70125 18.52625 30.0625 19.3125 C34.88894127 33.14829831 35.10756712 51.23425219 30 65 C28.24403886 68.52667826 26.22195061 71.75208274 24 75 C23.62488281 75.63164063 23.24976563 76.26328125 22.86328125 76.9140625 C21.38081833 79.38678722 20.48665592 80.81074492 17.75 81.875 C17.1725 81.91625 16.595 81.9575 16 82 C16 82.66 16 83.32 16 84 C6.01354054 84.86352013 -3.85054488 85.12644402 -13.875 85.0625 C-15.25650876 85.05746511 -16.63801933 85.05290579 -18.01953125 85.04882812 C-21.34640246 85.03719571 -24.67317602 85.0208528 -28 85 C-25.88008565 64.88008565 -25.88008565 64.88008565 -22 61 C-14.05344286 59.6982022 -6.19562196 60.02377079 1.82421875 60.390625 C7.89338929 60.57880147 7.89338929 60.57880147 13.078125 57.671875 C19.2379721 49.10818513 18.49422308 39.18370502 17 29 C16.34 29 15.68 29 15 29 C14.4740625 27.824375 14.4740625 27.824375 13.9375 26.625 C11.4077885 23.19764893 9.50197264 22.98385012 5.4296875 22.26953125 C1.91534221 21.87967623 -1.46670023 21.87151637 -5 22 C-4.76233962 20.09871698 -4.52388673 18.19752936 -4.28125 16.296875 C-4.09907454 14.80910876 -3.93145621 13.31945809 -3.78125 11.828125 C-3.39419678 8.15111943 -2.95070301 5.16989238 -1 2 C-0.67 1.34 -0.34 0.68 0 0 Z " fill="#2932DF" transform="translate(82,46)"/>
|
||||
<path d="M0 0 C15.71406556 -1.02980527 15.71406556 -1.02980527 20.5 2 C28.76609987 9.71502654 33.63280425 23.17948938 34.09179688 34.20507812 C34.57281659 49.21054692 33.72804383 61.68962202 26 75 C24.38541822 76.71717245 22.72413232 78.39285243 21 80 C20.38125 80.7425 19.7625 81.485 19.125 82.25 C14.68229769 85.90869602 8.01215129 85.1279183 2.4921875 85.1328125 C1.45942184 85.13424759 1.45942184 85.13424759 0.40579224 85.13571167 C-1.03790377 85.13638732 -2.48160293 85.13457633 -3.92529297 85.13037109 C-6.1260461 85.12499745 -8.32659672 85.13034546 -10.52734375 85.13671875 C-11.93750045 85.13605818 -13.34765702 85.13477746 -14.7578125 85.1328125 C-16.66264404 85.13112061 -16.66264404 85.13112061 -18.60595703 85.12939453 C-21.54244033 85.01744401 -24.15099128 84.68422413 -27 84 C-26.31697126 78.58609855 -25.22293698 73.36322421 -23.9375 68.0625 C-23.75123047 67.28326172 -23.56496094 66.50402344 -23.37304688 65.70117188 C-22.91818991 63.80010303 -22.4594938 61.89995343 -22 60 C-21.01604248 60.00523682 -20.03208496 60.01047363 -19.01831055 60.01586914 C-15.36744089 60.03397029 -11.71657521 60.04545075 -8.06567383 60.05493164 C-6.48558892 60.05996014 -4.90550866 60.06678378 -3.32543945 60.07543945 C-1.05418754 60.08756992 1.21700285 60.09324313 3.48828125 60.09765625 C4.54780754 60.10539818 4.54780754 60.10539818 5.6287384 60.11329651 C8.70254391 60.11364803 11.0590442 59.9803186 14 59 C15.10836423 57.11683841 15.10836423 57.11683841 16 55 C16.66 54.01 17.32 53.02 18 52 C20.37643288 44.15777151 19.63162649 35.30648664 16 28 C13.87039364 25.37894602 12.27236626 23.57037108 9.1875 22.1875 C7.08572233 22.00734763 5.04601849 21.98845702 2.9375 22 C-1 22 -1 22 -4 21 C-3.25428948 16.35197907 -2.3717461 11.7381195 -1.4375 7.125 C-1.29892578 6.43664063 -1.16035156 5.74828125 -1.01757812 5.0390625 C-0.67927173 3.35919626 -0.33976088 1.67957266 0 0 Z " fill="#2831DF" transform="translate(161,46)"/>
|
||||
<path d="M0 0 C2.8125 1.0625 2.8125 1.0625 5 3 C5.8125 6.6875 5.8125 6.6875 6 10 C6.66 10 7.32 10 8 10 C7.40672298 19.64855779 6.96022527 26.87405508 0 34 C-2 35.125 -2 35.125 -4 35 C-8.45279556 32.34952645 -9.59522559 29.86268066 -11 25 C-11.66 24.67 -12.32 24.34 -13 24 C-13.05425226 21.60385874 -13.09379027 19.20896147 -13.125 16.8125 C-13.14175781 16.13896484 -13.15851563 15.46542969 -13.17578125 14.77148438 C-13.22959846 9.25521997 -12.63330947 5.67867918 -8.75 1.6875 C-5.44146305 -0.34273858 -3.8200098 -0.55708476 0 0 Z " fill="#2832DE" transform="translate(102,2)"/>
|
||||
<path d="M0 0 C4.83670567 4.83670567 5.20245233 9.25686475 5.25 15.8125 C5.270625 16.51181641 5.29125 17.21113281 5.3125 17.93164062 C5.34861844 22.75796677 4.39813869 25.85170455 2 30 C1.67 30.99 1.34 31.98 1 33 C-2.3 33 -5.6 33 -9 33 C-13.20796432 26.12699161 -16.24978351 19.27600545 -15 11 C-13.68274847 6.82417685 -12.03704153 3.21584179 -9.1875 -0.125 C-5.75721453 -1.49711419 -3.50872971 -1.0024942 0 0 Z " fill="#2931DF" transform="translate(184,3)"/>
|
||||
<path d="M0 0 C1.0219043 -0.00676758 1.0219043 -0.00676758 2.06445312 -0.01367188 C5.73102226 -0.00285604 9.12254501 0.25617151 12.6875 1.125 C12.6875 1.785 12.6875 2.445 12.6875 3.125 C2.4575 3.125 -7.7725 3.125 -18.3125 3.125 C-18.3125 2.465 -18.3125 1.805 -18.3125 1.125 C-12.2065942 0.30305114 -6.15338301 0.01692039 0 0 Z " fill="#2C35ED" transform="translate(159.3125,105.875)"/>
|
||||
<path d="M0 0 C3.8544062 0.13989034 7.70845629 0.28798006 11.5625 0.4375 C12.64853516 0.47681641 13.73457031 0.51613281 14.85351562 0.55664062 C20.30410746 0.77201355 25.62075441 1.08159222 31 2 C31 2.33 31 2.66 31 3 C20.44 3 9.88 3 -1 3 C-0.67 2.01 -0.34 1.02 0 0 Z " fill="#2B35EB" transform="translate(8,106)"/>
|
||||
<path d="M0 0 C0 0.33 0 0.66 0 1 C-10.23 1 -20.46 1 -31 1 C-31 0.67 -31 0.34 -31 0 C-20.35831508 -2.10743678 -10.627168 -2.28562253 0 0 Z " fill="#2C35EF" transform="translate(92,108)"/>
|
||||
<path d="" fill="#0000FF" transform="translate(0,0)"/>
|
||||
<path d="" fill="#000000" transform="translate(0,0)"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 10 KiB |
@ -6,6 +6,7 @@ import { camelCase } from 'lodash';
|
||||
import { ReactNode, useMemo } from 'react';
|
||||
import { useFormContext } from 'react-hook-form';
|
||||
import { MinerUOptionsFormField } from './mineru-options-form-field';
|
||||
import { PaddleOCROptionsFormField } from './paddleocr-options-form-field';
|
||||
import { SelectWithSearch } from './originui/select-with-search';
|
||||
import {
|
||||
FormControl,
|
||||
@ -28,12 +29,14 @@ export function LayoutRecognizeFormField({
|
||||
optionsWithoutLLM,
|
||||
label,
|
||||
showMineruOptions = true,
|
||||
showPaddleocrOptions = true,
|
||||
}: {
|
||||
name?: string;
|
||||
horizontal?: boolean;
|
||||
optionsWithoutLLM?: { value: string; label: string }[];
|
||||
label?: ReactNode;
|
||||
showMineruOptions?: boolean;
|
||||
showPaddleocrOptions?: boolean;
|
||||
}) {
|
||||
const form = useFormContext();
|
||||
|
||||
@ -113,6 +116,7 @@ export function LayoutRecognizeFormField({
|
||||
</div>
|
||||
</FormItem>
|
||||
{showMineruOptions && <MinerUOptionsFormField />}
|
||||
{showPaddleocrOptions && <PaddleOCROptionsFormField />}
|
||||
</>
|
||||
);
|
||||
}}
|
||||
|
||||
95
web/src/components/paddleocr-options-form-field.tsx
Normal file
95
web/src/components/paddleocr-options-form-field.tsx
Normal file
@ -0,0 +1,95 @@
|
||||
import { RAGFlowFormItem } from '@/components/ragflow-form';
|
||||
import { Input } from '@/components/ui/input';
|
||||
import { RAGFlowSelect } from '@/components/ui/select';
|
||||
import { LLMFactory } from '@/constants/llm';
|
||||
import { buildOptions } from '@/utils/form';
|
||||
import { useFormContext, useWatch } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
const algorithmOptions = buildOptions(['PaddleOCR-VL']);
|
||||
|
||||
export function PaddleOCROptionsFormField({
|
||||
namePrefix = 'parser_config',
|
||||
}: {
|
||||
namePrefix?: string;
|
||||
}) {
|
||||
const form = useFormContext();
|
||||
const { t } = useTranslation();
|
||||
const buildName = (field: string) =>
|
||||
namePrefix ? `${namePrefix}.${field}` : field;
|
||||
|
||||
const layoutRecognize = useWatch({
|
||||
control: form.control,
|
||||
name: 'parser_config.layout_recognize',
|
||||
});
|
||||
|
||||
// Check if PaddleOCR is selected (the value contains 'PaddleOCR' or matches the factory name)
|
||||
const isPaddleOCRSelected =
|
||||
layoutRecognize?.includes(LLMFactory.PaddleOCR) ||
|
||||
layoutRecognize?.toLowerCase()?.includes('paddleocr');
|
||||
|
||||
if (!isPaddleOCRSelected) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4 border-l-2 border-primary/30 pl-4 ml-2">
|
||||
<div className="text-sm font-medium text-text-secondary">
|
||||
{t('knowledgeConfiguration.paddleocrOptions', 'PaddleOCR Options')}
|
||||
</div>
|
||||
|
||||
<RAGFlowFormItem
|
||||
name={buildName('paddleocr_api_url')}
|
||||
label={t('knowledgeConfiguration.paddleocrApiUrl', 'PaddleOCR API URL')}
|
||||
tooltip={t(
|
||||
'knowledgeConfiguration.paddleocrApiUrlTip',
|
||||
'The API endpoint URL for PaddleOCR service',
|
||||
)}
|
||||
horizontal={true}
|
||||
>
|
||||
{(field) => (
|
||||
<Input
|
||||
{...field}
|
||||
placeholder={t('knowledgeConfiguration.paddleocrApiUrlPlaceholder')}
|
||||
/>
|
||||
)}
|
||||
</RAGFlowFormItem>
|
||||
|
||||
<RAGFlowFormItem
|
||||
name={buildName('paddleocr_access_token')}
|
||||
label={t('knowledgeConfiguration.paddleocrAccessToken', 'AI Studio Access Token')}
|
||||
tooltip={t(
|
||||
'knowledgeConfiguration.paddleocrAccessTokenTip',
|
||||
'Access token for PaddleOCR API (optional)',
|
||||
)}
|
||||
horizontal={true}
|
||||
>
|
||||
{(field) => (
|
||||
<Input
|
||||
{...field}
|
||||
placeholder={t('knowledgeConfiguration.paddleocrAccessTokenPlaceholder')}
|
||||
/>
|
||||
)}
|
||||
</RAGFlowFormItem>
|
||||
|
||||
<RAGFlowFormItem
|
||||
name={buildName('paddleocr_algorithm')}
|
||||
label={t('knowledgeConfiguration.paddleocrAlgorithm', 'PaddleOCR Algorithm')}
|
||||
tooltip={t(
|
||||
'knowledgeConfiguration.paddleocrAlgorithmTip',
|
||||
'Algorithm to use for PaddleOCR parsing',
|
||||
)}
|
||||
horizontal={true}
|
||||
>
|
||||
{(field) => (
|
||||
<RAGFlowSelect
|
||||
value={field.value || 'PaddleOCR-VL'}
|
||||
onChange={field.onChange}
|
||||
options={algorithmOptions}
|
||||
placeholder={t('common.selectPlaceholder', 'Select value')}
|
||||
/>
|
||||
)}
|
||||
</RAGFlowFormItem>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@ -105,6 +105,7 @@ export const LlmIcon = ({
|
||||
LLMFactory.Gemini,
|
||||
LLMFactory.StepFun,
|
||||
LLMFactory.MinerU,
|
||||
LLMFactory.PaddleOCR,
|
||||
// LLMFactory.DeerAPI,
|
||||
];
|
||||
if (svgIcons.includes(name as LLMFactory)) {
|
||||
|
||||
@ -61,6 +61,7 @@ export enum LLMFactory {
|
||||
JiekouAI = 'Jiekou.AI',
|
||||
Builtin = 'Builtin',
|
||||
MinerU = 'MinerU',
|
||||
PaddleOCR = 'PaddleOCR',
|
||||
}
|
||||
|
||||
// Please lowercase the file name
|
||||
@ -127,6 +128,7 @@ export const IconMap = {
|
||||
[LLMFactory.JiekouAI]: 'jiekouai',
|
||||
[LLMFactory.Builtin]: 'builtin',
|
||||
[LLMFactory.MinerU]: 'mineru',
|
||||
[LLMFactory.PaddleOCR]: 'paddleocr',
|
||||
};
|
||||
|
||||
export const APIMapUrl = {
|
||||
@ -178,4 +180,5 @@ export const APIMapUrl = {
|
||||
[LLMFactory.DeerAPI]: 'https://api.deerapi.com/token',
|
||||
[LLMFactory.TokenPony]: 'https://www.tokenpony.cn/#/user/keys',
|
||||
[LLMFactory.DeepInfra]: 'https://deepinfra.com/dash/api_keys',
|
||||
[LLMFactory.PaddleOCR]: 'https://www.paddleocr.ai/latest/',
|
||||
};
|
||||
|
||||
@ -385,6 +385,17 @@ Prozedurales Gedächtnis: Erlernte Fähigkeiten, Gewohnheiten und automatisierte
|
||||
'Formelerkennung aktivieren. Hinweis: Dies funktioniert möglicherweise nicht korrekt bei kyrillischen Dokumenten.',
|
||||
mineruTableEnable: 'Tabellenerkennung',
|
||||
mineruTableEnableTip: 'Tabellenerkennung und -extraktion aktivieren.',
|
||||
paddleocrOptions: 'PaddleOCR-Optionen',
|
||||
paddleocrApiUrl: 'PaddleOCR API-URL',
|
||||
paddleocrApiUrlTip: 'API-Endpunkt-URL des PaddleOCR-Dienstes',
|
||||
paddleocrApiUrlPlaceholder: 'Zum Beispiel: https://paddleocr-server.com/layout-parsing',
|
||||
paddleocrAccessToken: 'AI Studio-Zugriffstoken',
|
||||
paddleocrAccessTokenTip: 'Zugriffstoken für die PaddleOCR-API (optional)',
|
||||
paddleocrAccessTokenPlaceholder: 'Ihr AI Studio-Token (optional)',
|
||||
paddleocrAlgorithm: 'PaddleOCR-Algorithmus',
|
||||
paddleocrAlgorithmTip: 'Algorithmus, der für die PaddleOCR-Verarbeitung verwendet wird',
|
||||
paddleocrSelectAlgorithm: 'Algorithmus auswählen',
|
||||
paddleocrModelNamePlaceholder: 'Zum Beispiel: paddleocr-umgebung-1',
|
||||
overlappedPercent: 'Überlappungsprozent(%)',
|
||||
generationScopeTip:
|
||||
'Bestimmt, ob RAPTOR für den gesamten Datensatz oder für eine einzelne Datei generiert wird.',
|
||||
@ -475,7 +486,7 @@ Prozedurales Gedächtnis: Erlernte Fähigkeiten, Gewohnheiten und automatisierte
|
||||
book: `<p>Unterstützte Dateiformate sind <b>DOCX</b>, <b>PDF</b>, <b>TXT</b>.</p><p>
|
||||
Für jedes Buch im PDF-Format stellen Sie bitte die <i>Seitenbereiche</i> ein, um unerwünschte Informationen zu entfernen und die Analysezeit zu reduzieren.</p>`,
|
||||
laws: `<p>Unterstützte Dateiformate sind <b>DOCX</b>, <b>PDF</b>, <b>TXT</b>.</p><p>
|
||||
Rechtliche Dokumente folgen in der Regel einem strengen Schreibformat. Wir verwenden Textmerkmale, um Teilungspunkte zu identifizieren.
|
||||
Rechtliche Dokumente folgen in der Regel einem strengen Schreibformat. Wir verwenden Textmerkmale, um Teilungspunkte zu identifizieren.
|
||||
</p><p>
|
||||
Der Chunk hat eine Granularität, die mit 'ARTIKEL' übereinstimmt, wobei sichergestellt wird, dass der gesamte übergeordnete Text im Chunk enthalten ist.
|
||||
</p>`,
|
||||
@ -489,7 +500,7 @@ Prozedurales Gedächtnis: Erlernte Fähigkeiten, Gewohnheiten und automatisierte
|
||||
<li>Dann werden benachbarte Segmente kombiniert, bis die Token-Anzahl den durch 'Chunk-Token-Anzahl' festgelegten Schwellenwert überschreitet, woraufhin ein Chunk erstellt wird.</li></p>`,
|
||||
paper: `<p>Nur <b>PDF</b>-Dateien werden unterstützt.</p><p>
|
||||
Papers werden nach Abschnitten wie <i>abstract, 1.1, 1.2</i> aufgeteilt. </p><p>
|
||||
Dieser Ansatz ermöglicht es dem LLM, das Paper effektiver zusammenzufassen und umfassendere, verständlichere Antworten zu liefern.
|
||||
Dieser Ansatz ermöglicht es dem LLM, das Paper effektiver zusammenzufassen und umfassendere, verständlichere Antworten zu liefern.
|
||||
Es erhöht jedoch auch den Kontext für KI-Gespräche und die Rechenkosten für das LLM. Daher sollten Sie während eines Gesprächs erwägen, den Wert von '<b>topN</b>' zu reduzieren.</p>`,
|
||||
presentation: `<p>Unterstützte Dateiformate sind <b>PDF</b>, <b>PPTX</b>.</p><p>
|
||||
Jede Seite in den Folien wird als Chunk behandelt, wobei ihr Vorschaubild gespeichert wird.</p><p>
|
||||
@ -1108,6 +1119,17 @@ Beispiel: Virtual Hosted Style`,
|
||||
modelTypeMessage: 'Bitte geben Sie Ihren Modelltyp ein!',
|
||||
addLlmBaseUrl: 'Basis-URL',
|
||||
baseUrlNameMessage: 'Bitte geben Sie Ihre Basis-URL ein!',
|
||||
paddleocr: {
|
||||
apiUrl: 'PaddleOCR API-URL',
|
||||
apiUrlPlaceholder: 'Zum Beispiel: https://paddleocr-server.com/layout-parsing',
|
||||
accessToken: 'AI Studio-Zugriffstoken',
|
||||
accessTokenPlaceholder: 'Ihr AI Studio-Token (optional)',
|
||||
algorithm: 'PaddleOCR-Algorithmus',
|
||||
selectAlgorithm: 'Algorithmus auswählen',
|
||||
modelNamePlaceholder: 'Zum Beispiel: paddleocr-from-env-1',
|
||||
modelNameRequired: 'Der Modellname ist ein Pflichtfeld',
|
||||
apiUrlRequired: 'Die PaddleOCR API-URL ist ein Pflichtfeld'
|
||||
},
|
||||
vision: 'Unterstützt es Vision?',
|
||||
ollamaLink: 'Wie integriere ich {{name}}',
|
||||
FishAudioLink: 'Wie verwende ich FishAudio',
|
||||
|
||||
@ -148,7 +148,7 @@ Procedural Memory: Learned skills, habits, and automated procedures.`,
|
||||
action: 'Action',
|
||||
},
|
||||
config: {
|
||||
memorySizeTooltip: `Accounts for each message's content + its embedding vector (≈ Content + Dimensions × 8 Bytes).
|
||||
memorySizeTooltip: `Accounts for each message's content + its embedding vector (≈ Content + Dimensions × 8 Bytes).
|
||||
Example: A 1 KB message with 1024-dim embedding uses ~9 KB. The 5 MB default limit holds ~500 such messages.`,
|
||||
avatar: 'Avatar',
|
||||
description: 'Description',
|
||||
@ -424,6 +424,17 @@ Example: A 1 KB message with 1024-dim embedding uses ~9 KB. The 5 MB default lim
|
||||
'Enable formula recognition. Note: This may not work correctly for Cyrillic documents.',
|
||||
mineruTableEnable: 'Table recognition',
|
||||
mineruTableEnableTip: 'Enable table recognition and extraction.',
|
||||
paddleocrOptions: 'PaddleOCR Options',
|
||||
paddleocrApiUrl: 'PaddleOCR API URL',
|
||||
paddleocrApiUrlTip: 'The API endpoint URL for PaddleOCR service',
|
||||
paddleocrApiUrlPlaceholder: 'e.g. https://paddleocr-server.com/layout-parsing',
|
||||
paddleocrAccessToken: 'AI Studio Access Token',
|
||||
paddleocrAccessTokenTip: 'Access token for PaddleOCR API (optional)',
|
||||
paddleocrAccessTokenPlaceholder: 'Your AI Studio token (optional)',
|
||||
paddleocrAlgorithm: 'PaddleOCR Algorithm',
|
||||
paddleocrAlgorithmTip: 'Algorithm to use for PaddleOCR parsing',
|
||||
paddleocrSelectAlgorithm: 'Select Algorithm',
|
||||
paddleocrModelNamePlaceholder: 'e.g. paddleocr-from-env-1',
|
||||
overlappedPercent: 'Overlapped percent(%)',
|
||||
generationScopeTip:
|
||||
'Determines whether RAPTOR is generated for the entire dataset or for a single file.',
|
||||
@ -1094,6 +1105,17 @@ Example: Virtual Hosted Style`,
|
||||
modelTypeMessage: 'Please input your model type!',
|
||||
addLlmBaseUrl: 'Base url',
|
||||
baseUrlNameMessage: 'Please input your base url!',
|
||||
paddleocr: {
|
||||
apiUrl: 'PaddleOCR API URL',
|
||||
apiUrlPlaceholder: 'For example: https://paddleocr-server.com/layout-parsing',
|
||||
accessToken: 'AI Studio Access Token',
|
||||
accessTokenPlaceholder: 'Your AI Studio token (optional)',
|
||||
algorithm: 'PaddleOCR Algorithm',
|
||||
selectAlgorithm: 'Select Algorithm',
|
||||
modelNamePlaceholder: 'For example: paddleocr-from-env-1',
|
||||
modelNameRequired: 'Model name is required',
|
||||
apiUrlRequired: 'PaddleOCR API URL is required'
|
||||
},
|
||||
vision: 'Does it support Vision?',
|
||||
ollamaLink: 'How to integrate {{name}}',
|
||||
FishAudioLink: 'How to use FishAudio',
|
||||
|
||||
@ -159,6 +159,20 @@ export default {
|
||||
html4excelTip: `Usar junto con el método de fragmentación General. Cuando está desactivado, los archivos de hoja de cálculo (XLSX, XLS (Excel 97-2003)) se analizan línea por línea como pares clave-valor. Cuando está activado, los archivos de hoja de cálculo se convierten en tablas HTML. Si la tabla original tiene más de 12 filas, el sistema la dividirá automáticamente en varias tablas HTML cada 12 filas. Para más información, consulte https://ragflow.io/docs/dev/enable_excel2html.`,
|
||||
},
|
||||
|
||||
knowledgeConfiguration: {
|
||||
paddleocrOptions: 'Opciones de PaddleOCR',
|
||||
paddleocrApiUrl: 'URL de API de PaddleOCR',
|
||||
paddleocrApiUrlTip: 'La URL del endpoint de la API para el servicio PaddleOCR',
|
||||
paddleocrApiUrlPlaceholder: 'ej: https://servidor-paddleocr.com/api',
|
||||
paddleocrAccessToken: 'Token de acceso de AI Studio',
|
||||
paddleocrAccessTokenTip: 'Token de acceso para la API de PaddleOCR (opcional)',
|
||||
paddleocrAccessTokenPlaceholder: 'Su token de AI Studio (opcional)',
|
||||
paddleocrAlgorithm: 'Algoritmo de PaddleOCR',
|
||||
paddleocrAlgorithmTip: 'Algoritmo a utilizar para el análisis de PaddleOCR',
|
||||
paddleocrSelectAlgorithm: 'Seleccionar algoritmo',
|
||||
paddleocrModelNamePlaceholder: 'ej: paddleocr-desde-env-1',
|
||||
},
|
||||
|
||||
// Otros bloques de traducción
|
||||
// Continua con la misma estructura
|
||||
chat: {
|
||||
@ -379,6 +393,17 @@ export default {
|
||||
modelTypeMessage: '¡Por favor ingresa el tipo de tu modelo!',
|
||||
addLlmBaseUrl: 'URL base',
|
||||
baseUrlNameMessage: '¡Por favor ingresa tu URL base!',
|
||||
paddleocr: {
|
||||
apiUrl: 'URL de la API de PaddleOCR',
|
||||
apiUrlPlaceholder: 'Por ejemplo: https://paddleocr-server.com/layout-parsing',
|
||||
accessToken: 'Token de acceso de AI Studio',
|
||||
accessTokenPlaceholder: 'Su token de AI Studio (opcional)',
|
||||
algorithm: 'Algoritmo de PaddleOCR',
|
||||
selectAlgorithm: 'Seleccionar algoritmo',
|
||||
modelNamePlaceholder: 'Por ejemplo: paddleocr-from-env-1',
|
||||
modelNameRequired: 'El nombre del modelo es obligatorio',
|
||||
apiUrlRequired: 'La URL de la API de PaddleOCR es obligatoria'
|
||||
},
|
||||
vision: '¿Soporta visión?',
|
||||
ollamaLink: 'Cómo integrar {{name}}',
|
||||
FishAudioLink: 'Cómo usar FishAudio',
|
||||
|
||||
@ -293,6 +293,17 @@ export default {
|
||||
communityTip: `Un "community" est un groupe d’entités liées. Le LLM peut générer un résumé pour chaque groupe. Voir plus ici : https: //www.microsoft.com/en-us/research/blog/graphrag-improving-global-search-via-dynamic-community-selection/`,
|
||||
theDocumentBeingParsedCannotBeDeleted:
|
||||
'Le document en cours d’analyse ne peut pas être supprimé',
|
||||
paddleocrOptions: 'Options PaddleOCR',
|
||||
paddleocrApiUrl: 'URL de l’API PaddleOCR',
|
||||
paddleocrApiUrlTip: 'URL du point de terminaison de l’API du service PaddleOCR',
|
||||
paddleocrApiUrlPlaceholder: 'Par exemple : https://paddleocr-server.com/layout-parsing',
|
||||
paddleocrAccessToken: 'Jeton d’accès AI Studio',
|
||||
paddleocrAccessTokenTip: 'Jeton d’accès à l’API PaddleOCR (optionnel)',
|
||||
paddleocrAccessTokenPlaceholder: 'Votre jeton AI Studio (optionnel)',
|
||||
paddleocrAlgorithm: 'Algorithme PaddleOCR',
|
||||
paddleocrAlgorithmTip: 'Algorithme utilisé pour l’analyse PaddleOCR',
|
||||
paddleocrSelectAlgorithm: 'Sélectionner un algorithme',
|
||||
paddleocrModelNamePlaceholder: 'Par exemple : paddleocr-environnement-1',
|
||||
},
|
||||
chunk: {
|
||||
chunk: 'Segment',
|
||||
@ -566,6 +577,17 @@ export default {
|
||||
modelTypeMessage: 'Veuillez saisir le type de votre modèle !',
|
||||
addLlmBaseUrl: 'URL de base',
|
||||
baseUrlNameMessage: 'Veuillez saisir votre URL de base !',
|
||||
paddleocr: {
|
||||
apiUrl: 'URL de l’API PaddleOCR',
|
||||
apiUrlPlaceholder: 'Par exemple : https://paddleocr-server.com/layout-parsing',
|
||||
accessToken: 'Jeton d’accès AI Studio',
|
||||
accessTokenPlaceholder: 'Votre jeton AI Studio (optionnel)',
|
||||
algorithm: 'Algorithme PaddleOCR',
|
||||
selectAlgorithm: 'Sélectionner un algorithme',
|
||||
modelNamePlaceholder: 'Par exemple : paddleocr-from-env-1',
|
||||
modelNameRequired: 'Le nom du modèle est obligatoire',
|
||||
apiUrlRequired: 'L’URL de l’API PaddleOCR est obligatoire'
|
||||
},
|
||||
vision: 'Supporte-t-il la vision ?',
|
||||
ollamaLink: 'Comment intégrer {{name}}',
|
||||
FishAudioLink: 'Comment utiliser FishAudio',
|
||||
|
||||
@ -316,6 +316,17 @@ export default {
|
||||
randomSeed: 'Benih acak',
|
||||
randomSeedMessage: 'Benih acak diperlukan',
|
||||
entityTypes: 'Jenis entitas',
|
||||
paddleocrOptions: 'Opsi PaddleOCR',
|
||||
paddleocrApiUrl: 'URL API PaddleOCR',
|
||||
paddleocrApiUrlTip: 'URL endpoint API layanan PaddleOCR',
|
||||
paddleocrApiUrlPlaceholder: 'Contoh: https://paddleocr-server.com/layout-parsing',
|
||||
paddleocrAccessToken: 'Token Akses AI Studio',
|
||||
paddleocrAccessTokenTip: 'Token akses untuk API PaddleOCR (opsional)',
|
||||
paddleocrAccessTokenPlaceholder: 'Token AI Studio Anda (opsional)',
|
||||
paddleocrAlgorithm: 'Algoritma PaddleOCR',
|
||||
paddleocrAlgorithmTip: 'Algoritma yang digunakan untuk pemrosesan PaddleOCR',
|
||||
paddleocrSelectAlgorithm: 'Pilih algoritma',
|
||||
paddleocrModelNamePlaceholder: 'Contoh: paddleocr-lingkungan-1',
|
||||
},
|
||||
chunk: {
|
||||
chunk: 'Potongan',
|
||||
@ -553,6 +564,17 @@ export default {
|
||||
modelTypeMessage: 'Silakan masukkan jenis model Anda!',
|
||||
addLlmBaseUrl: 'Base url',
|
||||
baseUrlNameMessage: 'Silakan masukkan base url Anda!',
|
||||
paddleocr: {
|
||||
apiUrl: 'URL API PaddleOCR',
|
||||
apiUrlPlaceholder: 'Contoh: https://paddleocr-server.com/layout-parsing',
|
||||
accessToken: 'Token Akses AI Studio',
|
||||
accessTokenPlaceholder: 'Token AI Studio Anda (opsional)',
|
||||
algorithm: 'Algoritma PaddleOCR',
|
||||
selectAlgorithm: 'Pilih algoritma',
|
||||
modelNamePlaceholder: 'Contoh: paddleocr-from-env-1',
|
||||
modelNameRequired: 'Nama model wajib diisi',
|
||||
apiUrlRequired: 'URL API PaddleOCR wajib diisi'
|
||||
},
|
||||
vision: 'Apakah mendukung Vision?',
|
||||
ollamaLink: 'Cara mengintegrasikan {{name}}',
|
||||
FishAudioLink: 'Cara menggunakan FishAudio',
|
||||
|
||||
@ -488,6 +488,17 @@ Quanto sopra è il contenuto che devi riassumere.`,
|
||||
'In un grafo della conoscenza, una comunità è un cluster di entità collegate da relazioni. Puoi far generare al LLM un abstract per ogni comunità, noto come report comunità.',
|
||||
theDocumentBeingParsedCannotBeDeleted:
|
||||
'Il documento in fase di analisi non può essere eliminato',
|
||||
paddleocrOptions: 'Opzioni PaddleOCR',
|
||||
paddleocrApiUrl: 'URL API di PaddleOCR',
|
||||
paddleocrApiUrlTip: 'URL dell’endpoint API del servizio PaddleOCR',
|
||||
paddleocrApiUrlPlaceholder: 'Ad esempio: https://paddleocr-server.com/layout-parsing',
|
||||
paddleocrAccessToken: 'Token di accesso AI Studio',
|
||||
paddleocrAccessTokenTip: 'Token di accesso per l’API PaddleOCR (facoltativo)',
|
||||
paddleocrAccessTokenPlaceholder: 'Il tuo token AI Studio (facoltativo)',
|
||||
paddleocrAlgorithm: 'Algoritmo PaddleOCR',
|
||||
paddleocrAlgorithmTip: 'Algoritmo utilizzato per l’elaborazione PaddleOCR',
|
||||
paddleocrSelectAlgorithm: 'Seleziona algoritmo',
|
||||
paddleocrModelNamePlaceholder: 'Ad esempio: paddleocr-ambiente-1',
|
||||
},
|
||||
chunk: {
|
||||
chunk: 'Chunk',
|
||||
@ -785,6 +796,17 @@ Quanto sopra è il contenuto che devi riassumere.`,
|
||||
modelTypeMessage: 'Inserisci il tuo tipo di modello!',
|
||||
addLlmBaseUrl: 'URL base',
|
||||
baseUrlNameMessage: 'Inserisci il tuo URL base!',
|
||||
paddleocr: {
|
||||
apiUrl: 'URL API di PaddleOCR',
|
||||
apiUrlPlaceholder: 'Ad esempio: https://paddleocr-server.com/layout-parsing',
|
||||
accessToken: 'Token di accesso AI Studio',
|
||||
accessTokenPlaceholder: 'Il tuo token AI Studio (facoltativo)',
|
||||
algorithm: 'Algoritmo PaddleOCR',
|
||||
selectAlgorithm: 'Seleziona algoritmo',
|
||||
modelNamePlaceholder: 'Ad esempio: paddleocr-from-env-1',
|
||||
modelNameRequired: 'Il nome del modello è obbligatorio',
|
||||
apiUrlRequired: 'L’URL API di PaddleOCR è obbligatorio'
|
||||
},
|
||||
vision: 'Supporta Vision?',
|
||||
ollamaLink: 'Come integrare {{name}}',
|
||||
FishAudioLink: 'Come usare FishAudio',
|
||||
|
||||
@ -240,7 +240,7 @@ export default {
|
||||
<b>XLSX</b>形式のファイルには、ヘッダーのない2つの
|
||||
列が必要です: 1つは質問の列でもう1つは回答の列です
|
||||
(質問列が先行)。複数のシートも可能です。
|
||||
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<b>CSV/TXT</b>形式のファイルは、TABで区切られたUTF-8エンコードである必要があります。
|
||||
@ -285,7 +285,7 @@ export default {
|
||||
LLMがその量のコンテキスト長を処理できる場合に、ドキュメント全体を要約する必要があるときに適用されます。
|
||||
</p>`,
|
||||
knowledgeGraph: `<p>対応ファイル形式は<b>DOCX, EXCEL, PPT, IMAGE, PDF, TXT, MD, JSON, EML</b>です。
|
||||
|
||||
|
||||
<p>このアプローチでは、ファイルを'ナイーブ'/'一般'メソッドを使用してチャンクに分割します。ドキュメントをセグメントに分割し、隣接するセグメントを結合してトークン数が'チャンクトークン数'で指定されたしきい値を超えるまで続け、その時点でチャンクが作成されます。</p>
|
||||
<p>その後、チャンクはLLMに入力され、ナレッジグラフとマインドマップのエンティティと関係を抽出します。</p>
|
||||
<p><b>エンティティタイプ</b>を設定することを忘れないでください。</p>`,
|
||||
@ -314,6 +314,17 @@ export default {
|
||||
entityTypes: 'エンティティタイプ',
|
||||
pageRank: 'ページランク',
|
||||
pageRankTip: `検索時に特定の知識ベースにより高いPageRankスコアを割り当てることができます。対応するスコアは、これらの知識ベースから取得されたチャンクのハイブリッド類似度スコアに加算され、ランキングが向上します。詳細については、https://ragflow.io/docs/dev/set_page_rank を参照してください。`,
|
||||
paddleocrOptions: 'PaddleOCRオプション',
|
||||
paddleocrApiUrl: 'PaddleOCR API URL',
|
||||
paddleocrApiUrlTip: 'PaddleOCRサービスのAPIエンドポイントURL',
|
||||
paddleocrApiUrlPlaceholder: '例: https://paddleocr-server.com/api',
|
||||
paddleocrAccessToken: 'AI Studioアクセストークン',
|
||||
paddleocrAccessTokenTip: 'PaddleOCR APIのアクセストークン(オプション)',
|
||||
paddleocrAccessTokenPlaceholder: 'AI Studioトークン(オプション)',
|
||||
paddleocrAlgorithm: 'PaddleOCRアルゴリズム',
|
||||
paddleocrAlgorithmTip: 'PaddleOCR解析に使用するアルゴリズム',
|
||||
paddleocrSelectAlgorithm: 'アルゴリズムを選択',
|
||||
paddleocrModelNamePlaceholder: '例: paddleocr-from-env-1',
|
||||
},
|
||||
chunk: {
|
||||
chunk: 'チャンク',
|
||||
@ -596,6 +607,17 @@ export default {
|
||||
modelTypeMessage: 'モデルタイプを入力してください!',
|
||||
addLlmBaseUrl: 'ベースURL',
|
||||
baseUrlNameMessage: 'ベースURLを入力してください!',
|
||||
paddleocr: {
|
||||
apiUrl: 'PaddleOCR API URL',
|
||||
apiUrlPlaceholder: '例:https://paddleocr-server.com/layout-parsing',
|
||||
accessToken: 'AI Studio アクセストークン',
|
||||
accessTokenPlaceholder: 'AI Studio のトークン(任意)',
|
||||
algorithm: 'PaddleOCR アルゴリズム',
|
||||
selectAlgorithm: 'アルゴリズムを選択',
|
||||
modelNamePlaceholder: '例:paddleocr-from-env-1',
|
||||
modelNameRequired: 'モデル名は必須です',
|
||||
apiUrlRequired: 'PaddleOCR API URL は必須です'
|
||||
},
|
||||
vision: 'ビジョンをサポートしていますか?',
|
||||
ollamaLink: '{{name}}を統合する方法',
|
||||
FishAudioLink: 'FishAudioの使用方法',
|
||||
|
||||
@ -310,6 +310,17 @@ export default {
|
||||
topnTags: 'Top-N Etiquetas',
|
||||
tags: 'Etiquetas',
|
||||
addTag: 'Adicionar etiqueta',
|
||||
paddleocrOptions: 'Opções do PaddleOCR',
|
||||
paddleocrApiUrl: 'URL da API do PaddleOCR',
|
||||
paddleocrApiUrlTip: 'A URL do endpoint da API para o serviço PaddleOCR',
|
||||
paddleocrApiUrlPlaceholder: 'ex: https://servidor-paddleocr.com/api',
|
||||
paddleocrAccessToken: 'Token de Acesso do AI Studio',
|
||||
paddleocrAccessTokenTip: 'Token de acesso para a API do PaddleOCR (opcional)',
|
||||
paddleocrAccessTokenPlaceholder: 'Seu token do AI Studio (opcional)',
|
||||
paddleocrAlgorithm: 'Algoritmo do PaddleOCR',
|
||||
paddleocrAlgorithmTip: 'Algoritmo a ser usado para a análise do PaddleOCR',
|
||||
paddleocrSelectAlgorithm: 'Selecionar algoritmo',
|
||||
paddleocrModelNamePlaceholder: 'ex: paddleocr-do-ambiente-1',
|
||||
},
|
||||
chunk: {
|
||||
chunk: 'Fragmento',
|
||||
@ -546,6 +557,17 @@ export default {
|
||||
modelTypeMessage: 'Por favor, insira o tipo do seu modelo!',
|
||||
addLlmBaseUrl: 'URL base',
|
||||
baseUrlNameMessage: 'Por favor, insira sua URL base!',
|
||||
paddleocr: {
|
||||
apiUrl: 'URL da API do PaddleOCR',
|
||||
apiUrlPlaceholder: 'Por exemplo: https://paddleocr-server.com/layout-parsing',
|
||||
accessToken: 'Token de acesso do AI Studio',
|
||||
accessTokenPlaceholder: 'Seu token do AI Studio (opcional)',
|
||||
algorithm: 'Algoritmo do PaddleOCR',
|
||||
selectAlgorithm: 'Selecionar algoritmo',
|
||||
modelNamePlaceholder: 'Por exemplo: paddleocr-from-env-1',
|
||||
modelNameRequired: 'O nome do modelo é obrigatório',
|
||||
apiUrlRequired: 'A URL da API do PaddleOCR é obrigatória'
|
||||
},
|
||||
vision: 'Suporta visão?',
|
||||
ollamaLink: 'Como integrar {{name}}',
|
||||
FishAudioLink: 'Como usar FishAudio',
|
||||
|
||||
@ -510,6 +510,17 @@ export default {
|
||||
'В графе знаний сообщество - это кластер сущностей, связанных отношениями. Вы можете поручить LLM генерировать аннотацию для каждого сообщества, известную как отчет сообщества. Более подробная информация здесь: https://www.microsoft.com/en-us/research/blog/graphrag-improving-global-search-via-dynamic-community-selection/',
|
||||
theDocumentBeingParsedCannotBeDeleted:
|
||||
'Документ, который в данный момент парсится, не может быть удален',
|
||||
paddleocrOptions: 'Параметры PaddleOCR',
|
||||
paddleocrApiUrl: 'URL API PaddleOCR',
|
||||
paddleocrApiUrlTip: 'URL конечной точки API сервиса PaddleOCR',
|
||||
paddleocrApiUrlPlaceholder: 'Например: https://paddleocr-server.com/layout-parsing',
|
||||
paddleocrAccessToken: 'Токен доступа AI Studio',
|
||||
paddleocrAccessTokenTip: 'Токен доступа к API PaddleOCR (необязательно)',
|
||||
paddleocrAccessTokenPlaceholder: 'Ваш токен AI Studio (необязательно)',
|
||||
paddleocrAlgorithm: 'Алгоритм PaddleOCR',
|
||||
paddleocrAlgorithmTip: 'Алгоритм, используемый для обработки PaddleOCR',
|
||||
paddleocrSelectAlgorithm: 'Выбрать алгоритм',
|
||||
paddleocrModelNamePlaceholder: 'Например: paddleocr-среда-1',
|
||||
},
|
||||
chunk: {
|
||||
chunk: 'Чанк',
|
||||
@ -716,7 +727,7 @@ export default {
|
||||
'Базовый URL вашего экземпляра Confluence (например, https://your-domain.atlassian.net/wiki)',
|
||||
confluenceSpaceKeyTip:
|
||||
'Необязательно: Укажите ключ пространства для синхронизации только определенного пространства. Оставьте пустым для синхронизации всех доступных пространств. Для нескольких пространств разделите запятыми (например, DEV,DOCS,HR)',
|
||||
s3PrefixTip: `Укажите путь к папке в вашем S3 бакете для получения файлов.
|
||||
s3PrefixTip: `Укажите путь к папке в вашем S3 бакете для получения файлов.
|
||||
Пример: general/v2/`,
|
||||
S3CompatibleEndpointUrlTip: `Требуется для S3 совместимого Storage Box. Укажите URL конечной точки, совместимой с S3.
|
||||
Пример: https://fsn1.your-objectstorage.com`,
|
||||
@ -1034,6 +1045,17 @@ export default {
|
||||
modelsToBeAddedTooltip:
|
||||
'Если ваш провайдер моделей не указан, но заявляет о "совместимости с OpenAI-API", выберите карточку OpenAI-API-compatible, чтобы добавить соответствующие модели. ',
|
||||
mcp: 'MCP',
|
||||
paddleocr: {
|
||||
apiUrl: 'URL API PaddleOCR',
|
||||
apiUrlPlaceholder: 'Например: https://paddleocr-server.com/layout-parsing',
|
||||
accessToken: 'Токен доступа AI Studio',
|
||||
accessTokenPlaceholder: 'Ваш токен AI Studio (необязательно)',
|
||||
algorithm: 'Алгоритм PaddleOCR',
|
||||
selectAlgorithm: 'Выбрать алгоритм',
|
||||
modelNamePlaceholder: 'Например: paddleocr-from-env-1',
|
||||
modelNameRequired: 'Имя модели является обязательным',
|
||||
apiUrlRequired: 'URL API PaddleOCR является обязательным'
|
||||
},
|
||||
},
|
||||
message: {
|
||||
registered: 'Зарегистрирован!',
|
||||
|
||||
@ -354,6 +354,17 @@ export default {
|
||||
community: 'Xây dựng mối quan hệ cộng đồng',
|
||||
communityTip:
|
||||
'Các liên kết được nhóm lại thành các cộng đồng phân cấp, với các thực thể và mối quan hệ kết nối từng phân đoạn lên các cấp độ trừu tượng cao hơn. Sau đó, chúng tôi sử dụng một LLM để tạo ra bản tóm tắt cho mỗi cộng đồng, được gọi là báo cáo cộng đồng. Xem thêm: https://www.microsoft.com/en-us/research/blog/graphrag-improving-global-search-via-dynamic-community-selection/',
|
||||
paddleocrOptions: 'Tùy chọn PaddleOCR',
|
||||
paddleocrApiUrl: 'URL API PaddleOCR',
|
||||
paddleocrApiUrlTip: 'URL điểm cuối API của dịch vụ PaddleOCR',
|
||||
paddleocrApiUrlPlaceholder: 'Ví dụ: https://paddleocr-server.com/layout-parsing',
|
||||
paddleocrAccessToken: 'Token truy cập AI Studio',
|
||||
paddleocrAccessTokenTip: 'Token truy cập cho API PaddleOCR (tùy chọn)',
|
||||
paddleocrAccessTokenPlaceholder: 'Token AI Studio của bạn (tùy chọn)',
|
||||
paddleocrAlgorithm: 'Thuật toán PaddleOCR',
|
||||
paddleocrAlgorithmTip: 'Thuật toán được sử dụng để xử lý PaddleOCR',
|
||||
paddleocrSelectAlgorithm: 'Chọn thuật toán',
|
||||
paddleocrModelNamePlaceholder: 'Ví dụ: paddleocr-môi-trường-1',
|
||||
},
|
||||
chunk: {
|
||||
chunk: 'Khối',
|
||||
@ -595,6 +606,17 @@ export default {
|
||||
modelTypeMessage: 'Vui lòng nhập loại mô hình của bạn!',
|
||||
addLlmBaseUrl: 'URL cơ sở',
|
||||
baseUrlNameMessage: 'Vui lòng nhập URL cơ sở của bạn!',
|
||||
paddleocr: {
|
||||
apiUrl: 'URL API PaddleOCR',
|
||||
apiUrlPlaceholder: 'Ví dụ: https://paddleocr-server.com/layout-parsing',
|
||||
accessToken: 'Token truy cập AI Studio',
|
||||
accessTokenPlaceholder: 'Token AI Studio của bạn (tùy chọn)',
|
||||
algorithm: 'Thuật toán PaddleOCR',
|
||||
selectAlgorithm: 'Chọn thuật toán',
|
||||
modelNamePlaceholder: 'Ví dụ: paddleocr-from-env-1',
|
||||
modelNameRequired: 'Tên mô hình là bắt buộc',
|
||||
apiUrlRequired: 'URL API PaddleOCR là bắt buộc'
|
||||
},
|
||||
vision: 'Có hỗ trợ Tầm nhìn không?',
|
||||
ollamaLink: 'Cách tích hợp {{name}}',
|
||||
FishAudioLink: 'Cách sử dụng FishAudio',
|
||||
|
||||
@ -367,6 +367,17 @@ export default {
|
||||
`,
|
||||
tags: '標籤',
|
||||
addTag: '增加標籤',
|
||||
paddleocrOptions: 'PaddleOCR 選項',
|
||||
paddleocrApiUrl: 'PaddleOCR API URL',
|
||||
paddleocrApiUrlTip: 'PaddleOCR 服務的 API 端點 URL',
|
||||
paddleocrApiUrlPlaceholder: '例如:https://paddleocr-server.com/layout-parsing',
|
||||
paddleocrAccessToken: 'AI Studio 訪問令牌',
|
||||
paddleocrAccessTokenTip: 'PaddleOCR API 的訪問令牌(可選)',
|
||||
paddleocrAccessTokenPlaceholder: '您的 AI Studio 令牌(可選)',
|
||||
paddleocrAlgorithm: 'PaddleOCR 算法',
|
||||
paddleocrAlgorithmTip: '用於 PaddleOCR 解析的算法',
|
||||
paddleocrSelectAlgorithm: '選擇算法',
|
||||
paddleocrModelNamePlaceholder: '例如:paddleocr-環境-1',
|
||||
useGraphRag: '提取知識圖譜',
|
||||
useGraphRagTip:
|
||||
'基於知識庫內所有切好的文本塊構建知識圖譜,用以提升多跳和複雜問題回答的正確率。請注意:構建知識圖譜將消耗大量 token 和時間。詳見 https://ragflow.io/docs/dev/construct_knowledge_graph。',
|
||||
@ -644,6 +655,17 @@ export default {
|
||||
modelNameMessage: '請輸入模型名稱!',
|
||||
modelTypeMessage: '請輸入模型類型!',
|
||||
baseUrlNameMessage: '請輸入基礎 Url!',
|
||||
paddleocr: {
|
||||
apiUrl: 'PaddleOCR API URL',
|
||||
apiUrlPlaceholder: '例如:https://paddleocr-server.com/layout-parsing',
|
||||
accessToken: 'AI Studio 存取權杖',
|
||||
accessTokenPlaceholder: '您的 AI Studio 權杖(選填)',
|
||||
algorithm: 'PaddleOCR 演算法',
|
||||
selectAlgorithm: '選擇演算法',
|
||||
modelNamePlaceholder: '例如:paddleocr-from-env-1',
|
||||
modelNameRequired: '模型名稱為必填項目',
|
||||
apiUrlRequired: 'PaddleOCR API URL 為必填項目'
|
||||
},
|
||||
ollamaLink: '如何集成 {{name}}',
|
||||
FishAudioLink: '如何使用Fish Audio',
|
||||
TencentCloudLink: '如何使用騰訊雲語音識別',
|
||||
|
||||
@ -390,6 +390,17 @@ export default {
|
||||
'启用公式识别。注意:对于西里尔文档可能无法正常工作。',
|
||||
mineruTableEnable: '表格识别',
|
||||
mineruTableEnableTip: '启用表格识别和提取。',
|
||||
paddleocrOptions: 'PaddleOCR 选项',
|
||||
paddleocrApiUrl: 'PaddleOCR API URL',
|
||||
paddleocrApiUrlTip: 'PaddleOCR 服务的 API 端点 URL',
|
||||
paddleocrApiUrlPlaceholder: '例如:https://paddleocr-server.com/layout-parsing',
|
||||
paddleocrAccessToken: 'AI Studio 访问令牌',
|
||||
paddleocrAccessTokenTip: 'PaddleOCR API 的访问令牌(可选)',
|
||||
paddleocrAccessTokenPlaceholder: '您的 AI Studio 令牌(可选)',
|
||||
paddleocrAlgorithm: 'PaddleOCR 算法',
|
||||
paddleocrAlgorithmTip: '用于 PaddleOCR 解析的算法',
|
||||
paddleocrSelectAlgorithm: '选择算法',
|
||||
paddleocrModelNamePlaceholder: '例如:paddleocr-环境-1',
|
||||
generationScopeTip: '选择 RAPTOR 的生成范围:整个知识库或单个文件。',
|
||||
generationScope: '生成范围',
|
||||
scopeSingleFile: '单文件',
|
||||
@ -1113,6 +1124,17 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
vlmLmdeployEngine: '基于LMDeploy引擎的视觉语言模型(实验性)',
|
||||
},
|
||||
},
|
||||
paddleocr: {
|
||||
apiUrl: 'PaddleOCR API URL',
|
||||
apiUrlPlaceholder: '例如:https://paddleocr-server.com/layout-parsing',
|
||||
accessToken: 'AI Studio访问令牌',
|
||||
accessTokenPlaceholder: '您的 AI Studio 令牌(可选)',
|
||||
algorithm: 'PaddleOCR算法',
|
||||
selectAlgorithm: '选择算法',
|
||||
modelNamePlaceholder: '例如:paddleocr-from-env-1',
|
||||
modelNameRequired: '模型名称为必填项',
|
||||
apiUrlRequired: 'PaddleOCR API URL 为必填项'
|
||||
},
|
||||
},
|
||||
message: {
|
||||
registered: '注册成功',
|
||||
|
||||
@ -504,3 +504,43 @@ export const useSubmitMinerU = () => {
|
||||
mineruLoading: loading,
|
||||
};
|
||||
};
|
||||
|
||||
export const useSubmitPaddleOCR = () => {
|
||||
const { addLlm, loading } = useAddLlm();
|
||||
const {
|
||||
visible: paddleocrVisible,
|
||||
hideModal: hidePaddleOCRModal,
|
||||
showModal: showPaddleOCRModal,
|
||||
} = useSetModalState();
|
||||
|
||||
const onPaddleOCROk = useCallback(
|
||||
async (payload: any) => {
|
||||
const cfg: any = {
|
||||
...payload,
|
||||
};
|
||||
const req: IAddLlmRequestBody = {
|
||||
llm_factory: LLMFactory.PaddleOCR,
|
||||
llm_name: payload.llm_name,
|
||||
model_type: 'ocr',
|
||||
api_key: cfg,
|
||||
api_base: '',
|
||||
max_tokens: 0,
|
||||
};
|
||||
const ret = await addLlm(req);
|
||||
if (ret === 0) {
|
||||
hidePaddleOCRModal();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
},
|
||||
[addLlm, hidePaddleOCRModal],
|
||||
);
|
||||
|
||||
return {
|
||||
paddleocrVisible,
|
||||
hidePaddleOCRModal,
|
||||
showPaddleOCRModal,
|
||||
onPaddleOCROk,
|
||||
paddleocrLoading: loading,
|
||||
};
|
||||
};
|
||||
|
||||
@ -15,6 +15,7 @@ import {
|
||||
useSubmitHunyuan,
|
||||
useSubmitMinerU,
|
||||
useSubmitOllama,
|
||||
useSubmitPaddleOCR,
|
||||
useSubmitSpark,
|
||||
useSubmitSystemModelSetting,
|
||||
useSubmitTencentCloud,
|
||||
@ -28,6 +29,7 @@ import FishAudioModal from './modal/fish-audio-modal';
|
||||
import GoogleModal from './modal/google-modal';
|
||||
import HunyuanModal from './modal/hunyuan-modal';
|
||||
import MinerUModal from './modal/mineru-modal';
|
||||
import PaddleOCRModal from './modal/paddleocr-modal';
|
||||
import TencentCloudModal from './modal/next-tencent-modal';
|
||||
import OllamaModal from './modal/ollama-modal';
|
||||
import SparkModal from './modal/spark-modal';
|
||||
@ -138,6 +140,14 @@ const ModelProviders = () => {
|
||||
mineruLoading,
|
||||
} = useSubmitMinerU();
|
||||
|
||||
const {
|
||||
paddleocrVisible,
|
||||
hidePaddleOCRModal,
|
||||
showPaddleOCRModal,
|
||||
onPaddleOCROk,
|
||||
paddleocrLoading,
|
||||
} = useSubmitPaddleOCR();
|
||||
|
||||
const ModalMap = useMemo(
|
||||
() => ({
|
||||
[LLMFactory.Bedrock]: showBedrockAddingModal,
|
||||
@ -150,6 +160,7 @@ const ModelProviders = () => {
|
||||
[LLMFactory.GoogleCloud]: showGoogleAddingModal,
|
||||
[LLMFactory.AzureOpenAI]: showAzureAddingModal,
|
||||
[LLMFactory.MinerU]: showMineruModal,
|
||||
[LLMFactory.PaddleOCR]: showPaddleOCRModal,
|
||||
}),
|
||||
[
|
||||
showBedrockAddingModal,
|
||||
@ -162,6 +173,7 @@ const ModelProviders = () => {
|
||||
showGoogleAddingModal,
|
||||
showAzureAddingModal,
|
||||
showMineruModal,
|
||||
showPaddleOCRModal,
|
||||
],
|
||||
);
|
||||
|
||||
@ -309,6 +321,12 @@ const ModelProviders = () => {
|
||||
onOk={onMineruOk}
|
||||
loading={mineruLoading}
|
||||
></MinerUModal>
|
||||
<PaddleOCRModal
|
||||
visible={paddleocrVisible}
|
||||
hideModal={hidePaddleOCRModal}
|
||||
onOk={onPaddleOCROk}
|
||||
loading={paddleocrLoading}
|
||||
></PaddleOCRModal>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@ -0,0 +1,135 @@
|
||||
import { useForm } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { z } from 'zod';
|
||||
import { zodResolver } from '@hookform/resolvers/zod';
|
||||
import { t } from 'i18next';
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
} from '@/components/ui/dialog';
|
||||
import { RAGFlowFormItem } from '@/components/ragflow-form';
|
||||
import { RAGFlowSelect, RAGFlowSelectOptionType } from '@/components/ui/select';
|
||||
import { Input } from '@/components/ui/input';
|
||||
import { Form } from '@/components/ui/form';
|
||||
import { LLMHeader } from '../../components/llm-header';
|
||||
import { LLMFactory } from '@/constants/llm';
|
||||
|
||||
const FormSchema = z.object({
|
||||
llm_name: z.string().min(1, {
|
||||
message: t('setting.paddleocr.modelNameRequired'),
|
||||
}),
|
||||
paddleocr_api_url: z.string().min(1, {
|
||||
message: t('setting.paddleocr.apiUrlRequired'),
|
||||
}),
|
||||
paddleocr_access_token: z.string().optional(),
|
||||
paddleocr_algorithm: z.string().default('PaddleOCR-VL'),
|
||||
});
|
||||
|
||||
export type PaddleOCRFormValues = z.infer<typeof FormSchema>;
|
||||
|
||||
export interface IModalProps<T> {
|
||||
visible: boolean;
|
||||
hideModal: () => void;
|
||||
onOk?: (data: T) => Promise<boolean>;
|
||||
loading?: boolean;
|
||||
}
|
||||
|
||||
const algorithmOptions: RAGFlowSelectOptionType[] = [
|
||||
{ label: 'PaddleOCR-VL', value: 'PaddleOCR-VL' },
|
||||
];
|
||||
|
||||
const PaddleOCRModal = ({
|
||||
visible,
|
||||
hideModal,
|
||||
onOk,
|
||||
loading,
|
||||
}: IModalProps<PaddleOCRFormValues>) => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const form = useForm<PaddleOCRFormValues>({
|
||||
resolver: zodResolver(FormSchema),
|
||||
defaultValues: {
|
||||
paddleocr_algorithm: 'PaddleOCR-VL',
|
||||
},
|
||||
});
|
||||
|
||||
const handleOk = async (values: PaddleOCRFormValues) => {
|
||||
const ret = await onOk?.(values as any);
|
||||
if (ret) {
|
||||
hideModal?.();
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog open={visible} onOpenChange={hideModal}>
|
||||
<DialogContent>
|
||||
<DialogHeader>
|
||||
<DialogTitle>
|
||||
<LLMHeader name={LLMFactory.PaddleOCR} />
|
||||
</DialogTitle>
|
||||
</DialogHeader>
|
||||
<Form {...form}>
|
||||
<form
|
||||
onSubmit={form.handleSubmit(handleOk)}
|
||||
className="space-y-6"
|
||||
id="paddleocr-form"
|
||||
>
|
||||
<RAGFlowFormItem
|
||||
name="llm_name"
|
||||
label={t('setting.modelName')}
|
||||
required
|
||||
>
|
||||
<Input placeholder={t('setting.paddleocr.modelNamePlaceholder')} />
|
||||
</RAGFlowFormItem>
|
||||
<RAGFlowFormItem
|
||||
name="paddleocr_api_url"
|
||||
label={t('setting.paddleocr.apiUrl')}
|
||||
required
|
||||
>
|
||||
<Input placeholder={t('setting.paddleocr.apiUrlPlaceholder')} />
|
||||
</RAGFlowFormItem>
|
||||
<RAGFlowFormItem
|
||||
name="paddleocr_access_token"
|
||||
label={t('setting.paddleocr.accessToken')}
|
||||
>
|
||||
<Input placeholder={t('setting.paddleocr.accessTokenPlaceholder')} />
|
||||
</RAGFlowFormItem>
|
||||
<RAGFlowFormItem
|
||||
name="paddleocr_algorithm"
|
||||
label={t('setting.paddleocr.algorithm')}
|
||||
>
|
||||
{(field) => (
|
||||
<RAGFlowSelect
|
||||
value={field.value}
|
||||
onChange={field.onChange}
|
||||
options={algorithmOptions}
|
||||
placeholder={t('setting.paddleocr.selectAlgorithm')}
|
||||
/>
|
||||
)}
|
||||
</RAGFlowFormItem>
|
||||
<div className="flex justify-end space-x-2">
|
||||
<button
|
||||
type="button"
|
||||
onClick={hideModal}
|
||||
className="btn btn-secondary"
|
||||
>
|
||||
{t('common.cancel')}
|
||||
</button>
|
||||
<button
|
||||
type="submit"
|
||||
disabled={loading}
|
||||
className="btn btn-primary"
|
||||
>
|
||||
{loading ? t('common.adding') : t('common.add')}
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
</Form>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
|
||||
export default PaddleOCRModal;
|
||||
Reference in New Issue
Block a user