mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Refa: remove temperature since some LLMs fail to support. (#8981)
### What problem does this PR solve? ### Type of change - [x] Refactoring
This commit is contained in:
@ -107,7 +107,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
|
||||
),
|
||||
}
|
||||
],
|
||||
{"temperature": 0.3, "max_tokens": self._max_token},
|
||||
{"max_tokens": self._max_token},
|
||||
)
|
||||
cnt = re.sub(
|
||||
"(······\n由于长度的原因,回答被截断了,要继续吗?|For the content length reason, it stopped, continue?)",
|
||||
|
||||
@ -103,6 +103,7 @@ MAX_CONCURRENT_CHUNK_BUILDERS = int(os.environ.get('MAX_CONCURRENT_CHUNK_BUILDER
|
||||
MAX_CONCURRENT_MINIO = int(os.environ.get('MAX_CONCURRENT_MINIO', '10'))
|
||||
task_limiter = trio.Semaphore(MAX_CONCURRENT_TASKS)
|
||||
chunk_limiter = trio.CapacityLimiter(MAX_CONCURRENT_CHUNK_BUILDERS)
|
||||
embed_limiter = trio.CapacityLimiter(MAX_CONCURRENT_CHUNK_BUILDERS)
|
||||
minio_limiter = trio.CapacityLimiter(MAX_CONCURRENT_MINIO)
|
||||
kg_limiter = trio.CapacityLimiter(2)
|
||||
WORKER_HEARTBEAT_TIMEOUT = int(os.environ.get('WORKER_HEARTBEAT_TIMEOUT', '120'))
|
||||
@ -442,7 +443,8 @@ async def embedding(docs, mdl, parser_config=None, callback=None):
|
||||
|
||||
cnts_ = np.array([])
|
||||
for i in range(0, len(cnts), EMBEDDING_BATCH_SIZE):
|
||||
vts, c = await trio.to_thread.run_sync(lambda: mdl.encode([truncate(c, mdl.max_length-10) for c in cnts[i: i + EMBEDDING_BATCH_SIZE]]))
|
||||
async with embed_limiter:
|
||||
vts, c = await trio.to_thread.run_sync(lambda: mdl.encode([truncate(c, mdl.max_length-10) for c in cnts[i: i + EMBEDDING_BATCH_SIZE]]))
|
||||
if len(cnts_) == 0:
|
||||
cnts_ = vts
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user