mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Refa: remove temperature since some LLMs fail to support. (#8981)
### What problem does this PR solve? ### Type of change - [x] Refactoring
This commit is contained in:
@ -152,7 +152,6 @@ class EntityResolution(Extractor):
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def _resolve_candidate(self, candidate_resolution_i: tuple[str, list[tuple[str, str]]], resolution_result: set[str], resolution_result_lock: trio.Lock):
|
async def _resolve_candidate(self, candidate_resolution_i: tuple[str, list[tuple[str, str]]], resolution_result: set[str], resolution_result_lock: trio.Lock):
|
||||||
gen_conf = {"temperature": 0.5}
|
|
||||||
pair_txt = [
|
pair_txt = [
|
||||||
f'When determining whether two {candidate_resolution_i[0]}s are the same, you should only focus on critical properties and overlook noisy factors.\n']
|
f'When determining whether two {candidate_resolution_i[0]}s are the same, you should only focus on critical properties and overlook noisy factors.\n']
|
||||||
for index, candidate in enumerate(candidate_resolution_i[1]):
|
for index, candidate in enumerate(candidate_resolution_i[1]):
|
||||||
@ -171,7 +170,7 @@ class EntityResolution(Extractor):
|
|||||||
async with chat_limiter:
|
async with chat_limiter:
|
||||||
try:
|
try:
|
||||||
with trio.move_on_after(120) as cancel_scope:
|
with trio.move_on_after(120) as cancel_scope:
|
||||||
response = await trio.to_thread.run_sync(self._chat, text, [{"role": "user", "content": "Output:"}], gen_conf)
|
response = await trio.to_thread.run_sync(self._chat, text, [{"role": "user", "content": "Output:"}], {})
|
||||||
if cancel_scope.cancelled_caught:
|
if cancel_scope.cancelled_caught:
|
||||||
logging.warning("_resolve_candidate._chat timeout, skipping...")
|
logging.warning("_resolve_candidate._chat timeout, skipping...")
|
||||||
return
|
return
|
||||||
|
|||||||
@ -90,11 +90,10 @@ class CommunityReportsExtractor(Extractor):
|
|||||||
"relation_df": rela_df.to_csv(index_label="id")
|
"relation_df": rela_df.to_csv(index_label="id")
|
||||||
}
|
}
|
||||||
text = perform_variable_replacements(self._extraction_prompt, variables=prompt_variables)
|
text = perform_variable_replacements(self._extraction_prompt, variables=prompt_variables)
|
||||||
gen_conf = {"temperature": 0.3}
|
|
||||||
async with chat_limiter:
|
async with chat_limiter:
|
||||||
try:
|
try:
|
||||||
with trio.move_on_after(80) as cancel_scope:
|
with trio.move_on_after(80) as cancel_scope:
|
||||||
response = await trio.to_thread.run_sync( self._chat, text, [{"role": "user", "content": "Output:"}], gen_conf)
|
response = await trio.to_thread.run_sync( self._chat, text, [{"role": "user", "content": "Output:"}], {})
|
||||||
if cancel_scope.cancelled_caught:
|
if cancel_scope.cancelled_caught:
|
||||||
logging.warning("extract_community_report._chat timeout, skipping...")
|
logging.warning("extract_community_report._chat timeout, skipping...")
|
||||||
return
|
return
|
||||||
|
|||||||
@ -105,10 +105,9 @@ class GraphExtractor(Extractor):
|
|||||||
**self._prompt_variables,
|
**self._prompt_variables,
|
||||||
self._input_text_key: content,
|
self._input_text_key: content,
|
||||||
}
|
}
|
||||||
gen_conf = {"temperature": 0.3}
|
|
||||||
hint_prompt = perform_variable_replacements(self._extraction_prompt, variables=variables)
|
hint_prompt = perform_variable_replacements(self._extraction_prompt, variables=variables)
|
||||||
async with chat_limiter:
|
async with chat_limiter:
|
||||||
response = await trio.to_thread.run_sync(lambda: self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], gen_conf))
|
response = await trio.to_thread.run_sync(lambda: self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], {}))
|
||||||
token_count += num_tokens_from_string(hint_prompt + response)
|
token_count += num_tokens_from_string(hint_prompt + response)
|
||||||
|
|
||||||
results = response or ""
|
results = response or ""
|
||||||
@ -118,7 +117,7 @@ class GraphExtractor(Extractor):
|
|||||||
for i in range(self._max_gleanings):
|
for i in range(self._max_gleanings):
|
||||||
history.append({"role": "user", "content": CONTINUE_PROMPT})
|
history.append({"role": "user", "content": CONTINUE_PROMPT})
|
||||||
async with chat_limiter:
|
async with chat_limiter:
|
||||||
response = await trio.to_thread.run_sync(lambda: self._chat("", history, gen_conf))
|
response = await trio.to_thread.run_sync(lambda: self._chat("", history, {}))
|
||||||
token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + response)
|
token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + response)
|
||||||
results += response or ""
|
results += response or ""
|
||||||
|
|
||||||
|
|||||||
@ -171,9 +171,8 @@ class MindMapExtractor(Extractor):
|
|||||||
self._input_text_key: text,
|
self._input_text_key: text,
|
||||||
}
|
}
|
||||||
text = perform_variable_replacements(self._mind_map_prompt, variables=variables)
|
text = perform_variable_replacements(self._mind_map_prompt, variables=variables)
|
||||||
gen_conf = {"temperature": 0.5}
|
|
||||||
async with chat_limiter:
|
async with chat_limiter:
|
||||||
response = await trio.to_thread.run_sync(lambda: self._chat(text, [{"role": "user", "content": "Output:"}], gen_conf))
|
response = await trio.to_thread.run_sync(lambda: self._chat(text, [{"role": "user", "content": "Output:"}], {}))
|
||||||
response = re.sub(r"```[^\n]*", "", response)
|
response = re.sub(r"```[^\n]*", "", response)
|
||||||
logging.debug(response)
|
logging.debug(response)
|
||||||
logging.debug(self._todict(markdown_to_json.dictify(response)))
|
logging.debug(self._todict(markdown_to_json.dictify(response)))
|
||||||
|
|||||||
@ -45,7 +45,7 @@ class KGSearch(Dealer):
|
|||||||
ty2ents = trio.run(lambda: get_entity_type2sampels(idxnms, kb_ids))
|
ty2ents = trio.run(lambda: get_entity_type2sampels(idxnms, kb_ids))
|
||||||
hint_prompt = PROMPTS["minirag_query2kwd"].format(query=question,
|
hint_prompt = PROMPTS["minirag_query2kwd"].format(query=question,
|
||||||
TYPE_POOL=json.dumps(ty2ents, ensure_ascii=False, indent=2))
|
TYPE_POOL=json.dumps(ty2ents, ensure_ascii=False, indent=2))
|
||||||
result = self._chat(llm, hint_prompt, [{"role": "user", "content": "Output:"}], {"temperature": .5})
|
result = self._chat(llm, hint_prompt, [{"role": "user", "content": "Output:"}], {})
|
||||||
try:
|
try:
|
||||||
keywords_data = json_repair.loads(result)
|
keywords_data = json_repair.loads(result)
|
||||||
type_keywords = keywords_data.get("answer_type_keywords", [])
|
type_keywords = keywords_data.get("answer_type_keywords", [])
|
||||||
|
|||||||
@ -107,7 +107,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
|
|||||||
),
|
),
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
{"temperature": 0.3, "max_tokens": self._max_token},
|
{"max_tokens": self._max_token},
|
||||||
)
|
)
|
||||||
cnt = re.sub(
|
cnt = re.sub(
|
||||||
"(······\n由于长度的原因,回答被截断了,要继续吗?|For the content length reason, it stopped, continue?)",
|
"(······\n由于长度的原因,回答被截断了,要继续吗?|For the content length reason, it stopped, continue?)",
|
||||||
|
|||||||
@ -103,6 +103,7 @@ MAX_CONCURRENT_CHUNK_BUILDERS = int(os.environ.get('MAX_CONCURRENT_CHUNK_BUILDER
|
|||||||
MAX_CONCURRENT_MINIO = int(os.environ.get('MAX_CONCURRENT_MINIO', '10'))
|
MAX_CONCURRENT_MINIO = int(os.environ.get('MAX_CONCURRENT_MINIO', '10'))
|
||||||
task_limiter = trio.Semaphore(MAX_CONCURRENT_TASKS)
|
task_limiter = trio.Semaphore(MAX_CONCURRENT_TASKS)
|
||||||
chunk_limiter = trio.CapacityLimiter(MAX_CONCURRENT_CHUNK_BUILDERS)
|
chunk_limiter = trio.CapacityLimiter(MAX_CONCURRENT_CHUNK_BUILDERS)
|
||||||
|
embed_limiter = trio.CapacityLimiter(MAX_CONCURRENT_CHUNK_BUILDERS)
|
||||||
minio_limiter = trio.CapacityLimiter(MAX_CONCURRENT_MINIO)
|
minio_limiter = trio.CapacityLimiter(MAX_CONCURRENT_MINIO)
|
||||||
kg_limiter = trio.CapacityLimiter(2)
|
kg_limiter = trio.CapacityLimiter(2)
|
||||||
WORKER_HEARTBEAT_TIMEOUT = int(os.environ.get('WORKER_HEARTBEAT_TIMEOUT', '120'))
|
WORKER_HEARTBEAT_TIMEOUT = int(os.environ.get('WORKER_HEARTBEAT_TIMEOUT', '120'))
|
||||||
@ -442,7 +443,8 @@ async def embedding(docs, mdl, parser_config=None, callback=None):
|
|||||||
|
|
||||||
cnts_ = np.array([])
|
cnts_ = np.array([])
|
||||||
for i in range(0, len(cnts), EMBEDDING_BATCH_SIZE):
|
for i in range(0, len(cnts), EMBEDDING_BATCH_SIZE):
|
||||||
vts, c = await trio.to_thread.run_sync(lambda: mdl.encode([truncate(c, mdl.max_length-10) for c in cnts[i: i + EMBEDDING_BATCH_SIZE]]))
|
async with embed_limiter:
|
||||||
|
vts, c = await trio.to_thread.run_sync(lambda: mdl.encode([truncate(c, mdl.max_length-10) for c in cnts[i: i + EMBEDDING_BATCH_SIZE]]))
|
||||||
if len(cnts_) == 0:
|
if len(cnts_) == 0:
|
||||||
cnts_ = vts
|
cnts_ = vts
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user