mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat: add vision LLM PDF parser (#6173)
### What problem does this PR solve? Add vision LLM PDF parser ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
@ -15,13 +15,12 @@
|
||||
#
|
||||
import logging
|
||||
|
||||
from api.db.services.user_service import TenantService
|
||||
from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel, TTSModel
|
||||
from api import settings
|
||||
from api.db import LLMType
|
||||
from api.db.db_models import DB
|
||||
from api.db.db_models import LLMFactories, LLM, TenantLLM
|
||||
from api.db.db_models import DB, LLM, LLMFactories, TenantLLM
|
||||
from api.db.services.common_service import CommonService
|
||||
from api.db.services.user_service import TenantService
|
||||
from rag.llm import ChatModel, CvModel, EmbeddingModel, RerankModel, Seq2txtModel, TTSModel
|
||||
|
||||
|
||||
class LLMFactoriesService(CommonService):
|
||||
@ -266,6 +265,14 @@ class LLMBundle:
|
||||
"LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
|
||||
return txt
|
||||
|
||||
def describe_with_prompt(self, image, prompt):
|
||||
txt, used_tokens = self.mdl.describe_with_prompt(image, prompt)
|
||||
if not TenantLLMService.increase_usage(
|
||||
self.tenant_id, self.llm_type, used_tokens):
|
||||
logging.error(
|
||||
"LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
|
||||
return txt
|
||||
|
||||
def transcription(self, audio):
|
||||
txt, used_tokens = self.mdl.transcription(audio)
|
||||
if not TenantLLMService.increase_usage(
|
||||
|
||||
Reference in New Issue
Block a user