Refa: remove temperature since some LLMs fail to support. (#8981)

### What problem does this PR solve?


### Type of change

- [x] Refactoring
This commit is contained in:
Kevin Hu
2025-07-23 10:17:04 +08:00
committed by GitHub
parent 0020c50000
commit 935ce872d8
7 changed files with 10 additions and 12 deletions

View File

@ -107,7 +107,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
),
}
],
{"temperature": 0.3, "max_tokens": self._max_token},
{"max_tokens": self._max_token},
)
cnt = re.sub(
"(······\n由于长度的原因,回答被截断了,要继续吗?|For the content length reason, it stopped, continue?)",

View File

@ -103,6 +103,7 @@ MAX_CONCURRENT_CHUNK_BUILDERS = int(os.environ.get('MAX_CONCURRENT_CHUNK_BUILDER
MAX_CONCURRENT_MINIO = int(os.environ.get('MAX_CONCURRENT_MINIO', '10'))
task_limiter = trio.Semaphore(MAX_CONCURRENT_TASKS)
chunk_limiter = trio.CapacityLimiter(MAX_CONCURRENT_CHUNK_BUILDERS)
embed_limiter = trio.CapacityLimiter(MAX_CONCURRENT_CHUNK_BUILDERS)
minio_limiter = trio.CapacityLimiter(MAX_CONCURRENT_MINIO)
kg_limiter = trio.CapacityLimiter(2)
WORKER_HEARTBEAT_TIMEOUT = int(os.environ.get('WORKER_HEARTBEAT_TIMEOUT', '120'))
@ -442,7 +443,8 @@ async def embedding(docs, mdl, parser_config=None, callback=None):
cnts_ = np.array([])
for i in range(0, len(cnts), EMBEDDING_BATCH_SIZE):
vts, c = await trio.to_thread.run_sync(lambda: mdl.encode([truncate(c, mdl.max_length-10) for c in cnts[i: i + EMBEDDING_BATCH_SIZE]]))
async with embed_limiter:
vts, c = await trio.to_thread.run_sync(lambda: mdl.encode([truncate(c, mdl.max_length-10) for c in cnts[i: i + EMBEDDING_BATCH_SIZE]]))
if len(cnts_) == 0:
cnts_ = vts
else: