mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-03 00:55:10 +08:00
feat: add paddleocr parser (#12513)
### What problem does this PR solve? Add PaddleOCR as a new PDF parser. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -148,7 +148,7 @@ Procedural Memory: Learned skills, habits, and automated procedures.`,
|
||||
action: 'Action',
|
||||
},
|
||||
config: {
|
||||
memorySizeTooltip: `Accounts for each message's content + its embedding vector (≈ Content + Dimensions × 8 Bytes).
|
||||
memorySizeTooltip: `Accounts for each message's content + its embedding vector (≈ Content + Dimensions × 8 Bytes).
|
||||
Example: A 1 KB message with 1024-dim embedding uses ~9 KB. The 5 MB default limit holds ~500 such messages.`,
|
||||
avatar: 'Avatar',
|
||||
description: 'Description',
|
||||
@ -424,6 +424,17 @@ Example: A 1 KB message with 1024-dim embedding uses ~9 KB. The 5 MB default lim
|
||||
'Enable formula recognition. Note: This may not work correctly for Cyrillic documents.',
|
||||
mineruTableEnable: 'Table recognition',
|
||||
mineruTableEnableTip: 'Enable table recognition and extraction.',
|
||||
paddleocrOptions: 'PaddleOCR Options',
|
||||
paddleocrApiUrl: 'PaddleOCR API URL',
|
||||
paddleocrApiUrlTip: 'The API endpoint URL for PaddleOCR service',
|
||||
paddleocrApiUrlPlaceholder: 'e.g. https://paddleocr-server.com/layout-parsing',
|
||||
paddleocrAccessToken: 'AI Studio Access Token',
|
||||
paddleocrAccessTokenTip: 'Access token for PaddleOCR API (optional)',
|
||||
paddleocrAccessTokenPlaceholder: 'Your AI Studio token (optional)',
|
||||
paddleocrAlgorithm: 'PaddleOCR Algorithm',
|
||||
paddleocrAlgorithmTip: 'Algorithm to use for PaddleOCR parsing',
|
||||
paddleocrSelectAlgorithm: 'Select Algorithm',
|
||||
paddleocrModelNamePlaceholder: 'e.g. paddleocr-from-env-1',
|
||||
overlappedPercent: 'Overlapped percent(%)',
|
||||
generationScopeTip:
|
||||
'Determines whether RAPTOR is generated for the entire dataset or for a single file.',
|
||||
@ -1094,6 +1105,17 @@ Example: Virtual Hosted Style`,
|
||||
modelTypeMessage: 'Please input your model type!',
|
||||
addLlmBaseUrl: 'Base url',
|
||||
baseUrlNameMessage: 'Please input your base url!',
|
||||
paddleocr: {
|
||||
apiUrl: 'PaddleOCR API URL',
|
||||
apiUrlPlaceholder: 'For example: https://paddleocr-server.com/layout-parsing',
|
||||
accessToken: 'AI Studio Access Token',
|
||||
accessTokenPlaceholder: 'Your AI Studio token (optional)',
|
||||
algorithm: 'PaddleOCR Algorithm',
|
||||
selectAlgorithm: 'Select Algorithm',
|
||||
modelNamePlaceholder: 'For example: paddleocr-from-env-1',
|
||||
modelNameRequired: 'Model name is required',
|
||||
apiUrlRequired: 'PaddleOCR API URL is required'
|
||||
},
|
||||
vision: 'Does it support Vision?',
|
||||
ollamaLink: 'How to integrate {{name}}',
|
||||
FishAudioLink: 'How to use FishAudio',
|
||||
|
||||
Reference in New Issue
Block a user