mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
### What problem does this PR solve? Fix: The max tokens defined by the tenant are not used (#4297) (#2817) ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
@ -29,7 +29,7 @@ from api.db.db_models import Dialog, DB
|
||||
from api.db.services.common_service import CommonService
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
|
||||
from api.db.services.llm_service import TenantLLMService, LLMBundle
|
||||
from api import settings
|
||||
from graphrag.utils import get_tags_from_cache, set_tags_to_cache
|
||||
from rag.app.resume import forbidden_select_fields4resume
|
||||
@ -172,21 +172,12 @@ def chat(dialog, messages, stream=True, **kwargs):
|
||||
|
||||
chat_start_ts = timer()
|
||||
|
||||
# Get llm model name and model provider name
|
||||
llm_id, model_provider = TenantLLMService.split_model_name_and_factory(dialog.llm_id)
|
||||
|
||||
# Get llm model instance by model and provide name
|
||||
llm = LLMService.query(llm_name=llm_id) if not model_provider else LLMService.query(llm_name=llm_id, fid=model_provider)
|
||||
|
||||
if not llm:
|
||||
# Model name is provided by tenant, but not system built-in
|
||||
llm = TenantLLMService.query(tenant_id=dialog.tenant_id, llm_name=llm_id) if not model_provider else \
|
||||
TenantLLMService.query(tenant_id=dialog.tenant_id, llm_name=llm_id, llm_factory=model_provider)
|
||||
if not llm:
|
||||
raise LookupError("LLM(%s) not found" % dialog.llm_id)
|
||||
max_tokens = 8192
|
||||
if llm_id2llm_type(dialog.llm_id) == "image2text":
|
||||
llm_model_config = TenantLLMService.get_model_config(dialog.tenant_id, LLMType.IMAGE2TEXT, dialog.llm_id)
|
||||
else:
|
||||
max_tokens = llm[0].max_tokens
|
||||
llm_model_config = TenantLLMService.get_model_config(dialog.tenant_id, LLMType.CHAT, dialog.llm_id)
|
||||
|
||||
max_tokens = llm_model_config.get("max_tokens", 8192)
|
||||
|
||||
check_llm_ts = timer()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user