diff --git a/api/db/services/task_service.py b/api/db/services/task_service.py index 9c771223f..d9b6d29ed 100644 --- a/api/db/services/task_service.py +++ b/api/db/services/task_service.py @@ -121,6 +121,13 @@ class TaskService(CommonService): .where(cls.model.id == task_id) ) docs = list(docs.dicts()) + # Assuming docs = list(docs.dicts()) + if docs: + kb_config = docs[0]['kb_parser_config'] # Dict from Knowledgebase.parser_config + mineru_method = kb_config.get('mineru_parse_method', 'auto') + mineru_formula = kb_config.get('mineru_formula_enable', True) + mineru_table = kb_config.get('mineru_table_enable', True) + print(mineru_method, mineru_formula, mineru_table) if not docs: return None diff --git a/deepdoc/parser/mineru_parser.py b/deepdoc/parser/mineru_parser.py index 2883bf881..056018a35 100644 --- a/deepdoc/parser/mineru_parser.py +++ b/deepdoc/parser/mineru_parser.py @@ -24,6 +24,7 @@ import tempfile import threading import time import zipfile +from dataclasses import dataclass from io import BytesIO from os import PathLike from pathlib import Path @@ -53,6 +54,87 @@ class MinerUContentType(StrEnum): DISCARDED = "discarded" +# Mapping from language names to MinerU language codes +LANGUAGE_TO_MINERU_MAP = { + 'English': 'en', + 'Chinese': 'ch', + 'Traditional Chinese': 'chinese_cht', + 'Russian': 'east_slavic', + 'Ukrainian': 'east_slavic', + 'Indonesian': 'latin', + 'Spanish': 'latin', + 'Vietnamese': 'latin', + 'Japanese': 'japan', + 'Korean': 'korean', + 'Portuguese BR': 'latin', + 'German': 'latin', + 'French': 'latin', + 'Italian': 'latin', + 'Tamil': 'ta', + 'Telugu': 'te', + 'Kannada': 'ka', + 'Thai': 'th', + 'Greek': 'el', + 'Hindi': 'devanagari', +} + + +class MinerUBackend(StrEnum): + """MinerU processing backend options.""" + + PIPELINE = "pipeline" # Traditional multimodel pipeline (default) + VLM_TRANSFORMERS = "vlm-transformers" # Vision-language model using HuggingFace Transformers + VLM_MLX_ENGINE = "vlm-mlx-engine" # Faster, requires Apple Silicon and macOS 13.5+ + VLM_VLLM_ENGINE = "vlm-vllm-engine" # Local vLLM engine, requires local GPU + VLM_VLLM_ASYNC_ENGINE = "vlm-vllm-async-engine" # Asynchronous vLLM engine, new in MinerU API + VLM_LMDEPLOY_ENGINE = "vlm-lmdeploy-engine" # LMDeploy engine + VLM_HTTP_CLIENT = "vlm-http-client" # HTTP client for remote vLLM server (CPU only) + + +class MinerULanguage(StrEnum): + """MinerU supported languages for OCR (pipeline backend only).""" + + CH = "ch" # Chinese + CH_SERVER = "ch_server" # Chinese (server) + CH_LITE = "ch_lite" # Chinese (lite) + EN = "en" # English + KOREAN = "korean" # Korean + JAPAN = "japan" # Japanese + CHINESE_CHT = "chinese_cht" # Chinese Traditional + TA = "ta" # Tamil + TE = "te" # Telugu + KA = "ka" # Kannada + TH = "th" # Thai + EL = "el" # Greek + LATIN = "latin" # Latin + ARABIC = "arabic" # Arabic + EAST_SLAVIC = "east_slavic" # East Slavic + CYRILLIC = "cyrillic" # Cyrillic + DEVANAGARI = "devanagari" # Devanagari + + +class MinerUParseMethod(StrEnum): + """MinerU PDF parsing methods (pipeline backend only).""" + + AUTO = "auto" # Automatically determine the method based on the file type + TXT = "txt" # Use text extraction method + OCR = "ocr" # Use OCR method for image-based PDFs + + +@dataclass +class MinerUParseOptions: + """Options for MinerU PDF parsing.""" + + backend: MinerUBackend = MinerUBackend.PIPELINE + lang: Optional[MinerULanguage] = None # language for OCR (pipeline backend only) + method: MinerUParseMethod = MinerUParseMethod.AUTO + server_url: Optional[str] = None + delete_output: bool = True + parse_method: str = "raw" + formula_enable: bool = True + table_enable: bool = True + + class MinerUParser(RAGFlowPdfParser): def __init__(self, mineru_path: str = "mineru", mineru_api: str = "", mineru_server_url: str = ""): self.mineru_path = Path(mineru_path) @@ -96,7 +178,8 @@ class MinerUParser(RAGFlowPdfParser): with open(full_path, "wb") as f: f.write(zip_ref.read(filename)) - def _is_http_endpoint_valid(self, url, timeout=5): + @staticmethod + def _is_http_endpoint_valid(url, timeout=5): try: response = requests.head(url, timeout=timeout, allow_redirects=True) return response.status_code in [200, 301, 302, 307, 308] @@ -141,7 +224,8 @@ class MinerUParser(RAGFlowPdfParser): self.logger.warning(f"[MinerU] vlm-http-client server check failed: {e}") try: response = requests.get(server_url, timeout=5) - self.logger.info(f"[MinerU] vlm-http-client server connection check: success with status {response.status_code}") + self.logger.info( + f"[MinerU] vlm-http-client server connection check: success with status {response.status_code}") self.using_api = False return True, reason except Exception as e: @@ -185,14 +269,15 @@ class MinerUParser(RAGFlowPdfParser): return False, reason def _run_mineru( - self, input_path: Path, output_dir: Path, method: str = "auto", backend: str = "pipeline", lang: Optional[str] = None, server_url: Optional[str] = None, callback: Optional[Callable] = None + self, input_path: Path, output_dir: Path, options: MinerUParseOptions, callback: Optional[Callable] = None ): if self.using_api: - self._run_mineru_api(input_path, output_dir, method, backend, lang, callback) + self._run_mineru_api(input_path, output_dir, options, callback) else: - self._run_mineru_executable(input_path, output_dir, method, backend, lang, server_url, callback) + self._run_mineru_executable(input_path, output_dir, options, callback) - def _run_mineru_api(self, input_path: Path, output_dir: Path, method: str = "auto", backend: str = "pipeline", lang: Optional[str] = None, callback: Optional[Callable] = None): + def _run_mineru_api(self, input_path: Path, output_dir: Path, options: MinerUParseOptions, + callback: Optional[Callable] = None): output_zip_path = os.path.join(str(output_dir), "output.zip") pdf_file_path = str(input_path) @@ -201,18 +286,18 @@ class MinerUParser(RAGFlowPdfParser): raise RuntimeError(f"[MinerU] PDF file not exists: {pdf_file_path}") pdf_file_name = Path(pdf_file_path).stem.strip() - output_path = os.path.join(str(output_dir), pdf_file_name, method) + output_path = os.path.join(str(output_dir), pdf_file_name, options.method) os.makedirs(output_path, exist_ok=True) files = {"files": (pdf_file_name + ".pdf", open(pdf_file_path, "rb"), "application/pdf")} data = { "output_dir": "./output", - "lang_list": lang, - "backend": backend, - "parse_method": method, - "formula_enable": True, - "table_enable": True, + "lang_list": options.lang, + "backend": options.backend, + "parse_method": options.method, + "formula_enable": options.formula_enable, + "table_enable": options.table_enable, "server_url": None, "return_md": True, "return_middle_json": True, @@ -229,7 +314,8 @@ class MinerUParser(RAGFlowPdfParser): self.logger.info(f"[MinerU] invoke api: {self.mineru_api}/file_parse") if callback: callback(0.20, f"[MinerU] invoke api: {self.mineru_api}/file_parse") - response = requests.post(url=f"{self.mineru_api}/file_parse", files=files, data=data, headers=headers, timeout=1800) + response = requests.post(url=f"{self.mineru_api}/file_parse", files=files, data=data, headers=headers, + timeout=1800) response.raise_for_status() if response.headers.get("Content-Type") == "application/zip": @@ -253,15 +339,15 @@ class MinerUParser(RAGFlowPdfParser): self.logger.info("[MinerU] Api completed successfully.") def _run_mineru_executable( - self, input_path: Path, output_dir: Path, method: str = "auto", backend: str = "pipeline", lang: Optional[str] = None, server_url: Optional[str] = None, callback: Optional[Callable] = None + self, input_path: Path, output_dir: Path, options: MinerUParseOptions, callback: Optional[Callable] = None ): - cmd = [str(self.mineru_path), "-p", str(input_path), "-o", str(output_dir), "-m", method] - if backend: - cmd.extend(["-b", backend]) - if lang: - cmd.extend(["-l", lang]) - if server_url and backend == "vlm-http-client": - cmd.extend(["-u", server_url]) + cmd = [str(self.mineru_path), "-p", str(input_path), "-o", str(output_dir), "-m", options.method] + if options.backend: + cmd.extend(["-b", options.backend]) + if options.lang: + cmd.extend(["-l", options.lang]) + if options.server_url and options.backend == "vlm-http-client": + cmd.extend(["-u", options.server_url]) self.logger.info(f"[MinerU] Running command: {' '.join(cmd)}") @@ -313,7 +399,8 @@ class MinerUParser(RAGFlowPdfParser): try: with pdfplumber.open(fnm) if isinstance(fnm, (str, PathLike)) else pdfplumber.open(BytesIO(fnm)) as pdf: self.pdf = pdf - self.page_images = [p.to_image(resolution=72 * zoomin, antialias=True).original for _, p in enumerate(self.pdf.pages[page_from:page_to])] + self.page_images = [p.to_image(resolution=72 * zoomin, antialias=True).original for _, p in + enumerate(self.pdf.pages[page_from:page_to])] except Exception as e: self.page_images = None self.total_page = 0 @@ -375,7 +462,8 @@ class MinerUParser(RAGFlowPdfParser): pos = poss[-1] last_page_idx = pos[0][-1] if not (0 <= last_page_idx < page_count): - self.logger.warning(f"[MinerU] Last page index {last_page_idx} out of range for {page_count} pages; skipping crop.") + self.logger.warning( + f"[MinerU] Last page index {last_page_idx} out of range for {page_count} pages; skipping crop.") if need_position: return None, None return @@ -401,10 +489,12 @@ class MinerUParser(RAGFlowPdfParser): if 0 <= pn - 1 < page_count: bottom += self.page_images[pn - 1].size[1] else: - self.logger.warning(f"[MinerU] Page index {pn}-1 out of range for {page_count} pages during crop; skipping height accumulation.") + self.logger.warning( + f"[MinerU] Page index {pn}-1 out of range for {page_count} pages during crop; skipping height accumulation.") if not (0 <= pns[0] < page_count): - self.logger.warning(f"[MinerU] Base page index {pns[0]} out of range for {page_count} pages during crop; skipping this segment.") + self.logger.warning( + f"[MinerU] Base page index {pns[0]} out of range for {page_count} pages during crop; skipping this segment.") continue img0 = self.page_images[pns[0]] @@ -417,7 +507,8 @@ class MinerUParser(RAGFlowPdfParser): bottom -= img0.size[1] for pn in pns[1:]: if not (0 <= pn < page_count): - self.logger.warning(f"[MinerU] Page index {pn} out of range for {page_count} pages during crop; skipping this page.") + self.logger.warning( + f"[MinerU] Page index {pn} out of range for {page_count} pages during crop; skipping this page.") continue page = self.page_images[pn] x0, y0, x1, y1 = int(left), 0, int(right), int(min(bottom, page.size[1])) @@ -461,7 +552,8 @@ class MinerUParser(RAGFlowPdfParser): poss.append(([int(p) - 1 for p in pn.split("-")], left, right, top, bottom)) return poss - def _read_output(self, output_dir: Path, file_stem: str, method: str = "auto", backend: str = "pipeline") -> list[dict[str, Any]]: + def _read_output(self, output_dir: Path, file_stem: str, method: str = "auto", backend: str = "pipeline") -> list[ + dict[str, Any]]: candidates = [] seen = set() @@ -543,11 +635,13 @@ class MinerUParser(RAGFlowPdfParser): case MinerUContentType.TEXT: section = output["text"] case MinerUContentType.TABLE: - section = output.get("table_body", "") + "\n".join(output.get("table_caption", [])) + "\n".join(output.get("table_footnote", [])) + section = output.get("table_body", "") + "\n".join(output.get("table_caption", [])) + "\n".join( + output.get("table_footnote", [])) if not section.strip(): section = "FAILED TO PARSE TABLE" case MinerUContentType.IMAGE: - section = "".join(output.get("image_caption", [])) + "\n" + "".join(output.get("image_footnote", [])) + section = "".join(output.get("image_caption", [])) + "\n" + "".join( + output.get("image_footnote", [])) case MinerUContentType.EQUATION: section = output["text"] case MinerUContentType.CODE: @@ -569,24 +663,30 @@ class MinerUParser(RAGFlowPdfParser): return [] def parse_pdf( - self, - filepath: str | PathLike[str], - binary: BytesIO | bytes, - callback: Optional[Callable] = None, - *, - output_dir: Optional[str] = None, - backend: str = "pipeline", - lang: Optional[str] = None, - method: str = "auto", - server_url: Optional[str] = None, - delete_output: bool = True, - parse_method: str = "raw", + self, + filepath: str | PathLike[str], + binary: BytesIO | bytes, + callback: Optional[Callable] = None, + *, + output_dir: Optional[str] = None, + backend: str = "pipeline", + server_url: Optional[str] = None, + delete_output: bool = True, + parse_method: str = "raw", + **kwargs, ) -> tuple: import shutil temp_pdf = None created_tmp_dir = False + # Assuming the dict is defined as shown + lang = kwargs.get('lang', 'English') + mineru_lang_code = LANGUAGE_TO_MINERU_MAP.get(lang, 'ch') # Returns 'ch' if lang not found + mineru_method_raw_str = kwargs.get('parser_config', {}).get('mineru_parse_method', 'auto') + enable_formula = kwargs.get('parser_config', {}).get('mineru_formula_enable', True) + enable_table = kwargs.get('parser_config', {}).get('mineru_enable', True) + # remove spaces, or mineru crash, and _read_output fail too file_path = Path(filepath) pdf_file_name = file_path.stem.replace(" ", "") + ".pdf" @@ -625,8 +725,18 @@ class MinerUParser(RAGFlowPdfParser): self.__images__(pdf, zoomin=1) try: - self._run_mineru(pdf, out_dir, method=method, backend=backend, lang=lang, server_url=server_url, callback=callback) - outputs = self._read_output(out_dir, pdf.stem, method=method, backend=backend) + options = MinerUParseOptions( + backend=MinerUBackend(backend), + lang=MinerULanguage(mineru_lang_code), + method=MinerUParseMethod(mineru_method_raw_str), + server_url=server_url, + delete_output=delete_output, + parse_method=parse_method, + formula_enable=enable_formula, + table_enable=enable_table, + ) + self._run_mineru(pdf, out_dir, options, callback=callback) + outputs = self._read_output(out_dir, pdf.stem, method=mineru_method_raw_str, backend=backend) self.logger.info(f"[MinerU] Parsed {len(outputs)} blocks from PDF.") if callback: callback(0.75, f"[MinerU] Parsed {len(outputs)} blocks from PDF.") diff --git a/rag/app/naive.py b/rag/app/naive.py index 353504d77..4d07d0983 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -85,6 +85,8 @@ def by_mineru(filename, binary=None, from_page=0, to_page=100000, lang="Chinese" binary=binary, callback=callback, parse_method=parse_method, + lang=lang, + **kwargs ) return sections, tables, pdf_parser except Exception as e: @@ -94,6 +96,9 @@ def by_mineru(filename, binary=None, from_page=0, to_page=100000, lang="Chinese" callback(-1, "MinerU not found.") return None, None, None + + + def by_docling(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, pdf_cls = None ,**kwargs): pdf_parser = DoclingParser() parse_method = kwargs.get("parse_method", "raw") diff --git a/rag/llm/ocr_model.py b/rag/llm/ocr_model.py index 73c3ed009..97eb6f3fd 100644 --- a/rag/llm/ocr_model.py +++ b/rag/llm/ocr_model.py @@ -67,7 +67,7 @@ class MinerUOcrModel(Base, MinerUParser): server_url = server_url or self.mineru_server_url return self.check_installation(backend=backend, server_url=server_url) - def parse_pdf(self, filepath: str, binary=None, callback=None, parse_method: str = "raw", **kwargs): + def parse_pdf(self, filepath: str, binary=None, callback=None, parse_method: str = "raw",**kwargs): ok, reason = self.check_available() if not ok: raise RuntimeError(f"MinerU not found or server not accessible: {reason}. Please install it via: pip install -U 'mineru[core]'.") @@ -82,5 +82,6 @@ class MinerUOcrModel(Base, MinerUParser): server_url=self.mineru_server_url, delete_output=self.mineru_delete_output, parse_method=parse_method, + **kwargs ) return sections, tables diff --git a/web/src/components/mineru-options-form-field.tsx b/web/src/components/mineru-options-form-field.tsx new file mode 100644 index 000000000..5a64a7337 --- /dev/null +++ b/web/src/components/mineru-options-form-field.tsx @@ -0,0 +1,97 @@ +import { RAGFlowFormItem } from '@/components/ragflow-form'; +import { RAGFlowSelect } from '@/components/ui/select'; +import { Switch } from '@/components/ui/switch'; +import { LLMFactory } from '@/constants/llm'; +import { buildOptions } from '@/utils/form'; +import { useFormContext, useWatch } from 'react-hook-form'; +import { useTranslation } from 'react-i18next'; + +const parseMethodOptions = buildOptions(['auto', 'txt', 'ocr']); + +export function MinerUOptionsFormField() { + const form = useFormContext(); + const { t } = useTranslation(); + + const layoutRecognize = useWatch({ + control: form.control, + name: 'parser_config.layout_recognize', + }); + + // Check if MinerU is selected (the value contains 'MinerU' or matches the factory name) + const isMinerUSelected = + layoutRecognize?.includes(LLMFactory.MinerU) || + layoutRecognize?.toLowerCase()?.includes('mineru'); + + if (!isMinerUSelected) { + return null; + } + + return ( +
+
+ {t('knowledgeConfiguration.mineruOptions', 'MinerU Options')} +
+ + + {(field) => ( + + )} + + + + {(field) => ( + + )} + + + + {(field) => ( + + )} + +
+ ); +} diff --git a/web/src/constants/common.ts b/web/src/constants/common.ts index 18d06ca39..205c28f4c 100644 --- a/web/src/constants/common.ts +++ b/web/src/constants/common.ts @@ -103,14 +103,22 @@ export const LanguageTranslationMap = { Chinese: 'zh', 'Traditional Chinese': 'zh-TRADITIONAL', Russian: 'ru', - Indonesia: 'id', + Indonesian: 'id', Spanish: 'es', Vietnamese: 'vi', Japanese: 'ja', + Korean: 'ko', 'Portuguese BR': 'pt-br', German: 'de', French: 'fr', Italian: 'it', + Tamil: 'ta', + Telugu: 'te', + Kannada: 'ka', + Thai: 'th', + Greek: 'el', + Hindi: 'hi', + Ukrainian: 'uk', }; export enum FileMimeType { diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index fa536ce1c..8750fe8a0 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -330,6 +330,15 @@ export default { reRankModelWaring: 'Re-rank model is very time consuming.', }, knowledgeConfiguration: { + mineruOptions: 'MinerU Options', + mineruParseMethod: 'Parse Method', + mineruParseMethodTip: + 'Method for parsing PDF: auto (automatic detection), txt (text extraction), ocr (optical character recognition)', + mineruFormulaEnable: 'Formula Recognition', + mineruFormulaEnableTip: + 'Enable formula recognition. Note: This may not work correctly for Cyrillic documents.', + mineruTableEnable: 'Table Recognition', + mineruTableEnableTip: 'Enable table recognition and extraction.', overlappedPercent: 'Overlapped percent(%)', generationScopeTip: 'Determines whether RAPTOR is generated for the entire dataset or for a single file.', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 85be01db7..c846bd2dd 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -265,6 +265,15 @@ export default { theDocumentBeingParsedCannotBeDeleted: '正在解析的文档不能被删除', }, knowledgeConfiguration: { + mineruOptions: 'MinerU 选项', + mineruParseMethod: '解析方法', + mineruParseMethodTip: + 'PDF 解析方法:auto(自动检测)、txt(文本提取)、ocr(光学字符识别)', + mineruFormulaEnable: '公式识别', + mineruFormulaEnableTip: + '启用公式识别。注意:对于西里尔文档可能无法正常工作。', + mineruTableEnable: '表格识别', + mineruTableEnableTip: '启用表格识别和提取。', generationScopeTip: '选择 RAPTOR 的生成范围:整个知识库或单个文件。', generationScope: '生成范围', scopeSingleFile: '单文件', diff --git a/web/src/pages/dataset/dataset-setting/configuration/naive.tsx b/web/src/pages/dataset/dataset-setting/configuration/naive.tsx index 3742db39f..d14eca96f 100644 --- a/web/src/pages/dataset/dataset-setting/configuration/naive.tsx +++ b/web/src/pages/dataset/dataset-setting/configuration/naive.tsx @@ -6,6 +6,7 @@ import { DelimiterFormField } from '@/components/delimiter-form-field'; import { ExcelToHtmlFormField } from '@/components/excel-to-html-form-field'; import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field'; import { MaxTokenNumberFormField } from '@/components/max-token-number-from-field'; +import { MinerUOptionsFormField } from '@/components/mineru-options-form-field'; import { ConfigurationFormContainer, MainContainer, @@ -17,6 +18,7 @@ export function NaiveConfiguration() { + diff --git a/web/src/pages/dataset/dataset-setting/form-schema.ts b/web/src/pages/dataset/dataset-setting/form-schema.ts index ae7342ede..00c00a725 100644 --- a/web/src/pages/dataset/dataset-setting/form-schema.ts +++ b/web/src/pages/dataset/dataset-setting/form-schema.ts @@ -13,6 +13,7 @@ export const formSchema = z // avatar: z.instanceof(File), avatar: z.any().nullish(), permission: z.string().optional(), + language: z.string().optional(), parser_id: z.string(), pipeline_id: z.string().optional(), pipeline_name: z.string().optional(), @@ -30,6 +31,10 @@ export const formSchema = z topn_tags: z.number().optional(), toc_extraction: z.boolean().optional(), overlapped_percent: z.number().optional(), + // MinerU-specific options + mineru_parse_method: z.enum(['auto', 'txt', 'ocr']).optional(), + mineru_formula_enable: z.boolean().optional(), + mineru_table_enable: z.boolean().optional(), raptor: z .object({ use_raptor: z.boolean().optional(), diff --git a/web/src/pages/dataset/dataset-setting/general-form.tsx b/web/src/pages/dataset/dataset-setting/general-form.tsx index 110c03a3e..a93dae8be 100644 --- a/web/src/pages/dataset/dataset-setting/general-form.tsx +++ b/web/src/pages/dataset/dataset-setting/general-form.tsx @@ -1,5 +1,7 @@ import { AvatarUpload } from '@/components/avatar-upload'; +import { SelectWithSearch } from '@/components/originui/select-with-search'; import PageRankFormField from '@/components/page-rank-form-field'; +import { RAGFlowFormItem } from '@/components/ragflow-form'; import { FormControl, FormField, @@ -8,6 +10,8 @@ import { FormMessage, } from '@/components/ui/form'; import { Input } from '@/components/ui/input'; +import { LanguageTranslationMap } from '@/constants/common'; +import { useMemo } from 'react'; import { useFormContext } from 'react-hook-form'; import { useTranslation } from 'react-i18next'; import { TagItems } from './components/tag-item'; @@ -18,6 +22,13 @@ export function GeneralForm() { const form = useFormContext(); const { t } = useTranslation(); + const languageOptions = useMemo(() => { + return Object.keys(LanguageTranslationMap).map((x) => ({ + label: x, + value: x, + })); + }, []); + return ( <> )} /> +
+ + + +
) { const { t } = useTranslation(); + const languageOptions = useMemo(() => { + return Object.keys(LanguageTranslationMap).map((x) => ({ + label: x, + value: x, + })); + }, []); + const FormSchema = z .object({ name: z @@ -51,6 +66,7 @@ export function InputForm({ onOk }: IModalProps) { .trim(), parser_id: z.string().optional(), pipeline_id: z.string().optional(), + language: z.string().optional(), }) .superRefine((data, ctx) => { // When parseType === 1, parser_id is required @@ -83,6 +99,7 @@ export function InputForm({ onOk }: IModalProps) { parseType: 1, parser_id: '', embd_id: '', + language: 'English', }, }); @@ -130,6 +147,33 @@ export function InputForm({ onOk }: IModalProps) { )} /> + ( + + {t('common.language')} + + + + )} + /> + {parseType === 1 && }