Fix: metadata issue & graphrag speeding up. (#12113)

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Co-authored-by: Liu An <asiro@qq.com>
This commit is contained in:
Kevin Hu
2025-12-23 15:57:27 +08:00
committed by GitHub
parent 063b06494a
commit 00bb6fbd28
6 changed files with 6 additions and 11 deletions

View File

@ -71,18 +71,17 @@ class Extractor:
_, system_msg = message_fit_in([{"role": "system", "content": system}], int(self._llm.max_length * 0.92))
response = ""
for attempt in range(3):
if task_id:
if has_canceled(task_id):
logging.info(f"Task {task_id} cancelled during entity resolution candidate processing.")
raise TaskCanceledException(f"Task {task_id} was cancelled")
try:
response = asyncio.run(self._llm.async_chat(system_msg[0]["content"], hist, conf))
response = re.sub(r"^.*</think>", "", response, flags=re.DOTALL)
if response.find("**ERROR**") >= 0:
raise Exception(response)
set_llm_cache(self._llm.llm_name, system, response, history, gen_conf)
break
except Exception as e:
logging.exception(e)
if attempt == 2:

View File

@ -198,7 +198,7 @@ async def run_graphrag_for_kb(
for d in raw_chunks:
content = d["content_with_weight"]
if num_tokens_from_string(current_chunk + content) < 1024:
if num_tokens_from_string(current_chunk + content) < 4096:
current_chunk += content
else:
if current_chunk:

View File

@ -78,10 +78,6 @@ class GraphExtractor(Extractor):
hint_prompt = self._entity_extract_prompt.format(**self._context_base, input_text=content)
gen_conf = {}
final_result = ""
glean_result = ""
if_loop_result = ""
history = []
logging.info(f"Start processing for {chunk_key}: {content[:25]}...")
if self.callback:
self.callback(msg=f"Start processing for {chunk_key}: {content[:25]}...")