mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: meta data filter with AND logic operations. (#9687)
### What problem does this PR solve? Close #9648 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -15,6 +15,7 @@
|
||||
#
|
||||
import logging
|
||||
import itertools
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable
|
||||
@ -106,7 +107,8 @@ class EntityResolution(Extractor):
|
||||
nonlocal remain_candidates_to_resolve, callback
|
||||
async with semaphore:
|
||||
try:
|
||||
with trio.move_on_after(280) as cancel_scope:
|
||||
enable_timeout_assertion = os.environ.get("ENABLE_TIMEOUT_ASSERTION")
|
||||
with trio.move_on_after(280 if enable_timeout_assertion else 1000000000) as cancel_scope:
|
||||
await self._resolve_candidate(candidate_batch, result_set, result_lock)
|
||||
remain_candidates_to_resolve = remain_candidates_to_resolve - len(candidate_batch[1])
|
||||
callback(msg=f"Resolved {len(candidate_batch[1])} pairs, {remain_candidates_to_resolve} are remained to resolve. ")
|
||||
@ -169,7 +171,8 @@ class EntityResolution(Extractor):
|
||||
logging.info(f"Created resolution prompt {len(text)} bytes for {len(candidate_resolution_i[1])} entity pairs of type {candidate_resolution_i[0]}")
|
||||
async with chat_limiter:
|
||||
try:
|
||||
with trio.move_on_after(280) as cancel_scope:
|
||||
enable_timeout_assertion = os.environ.get("ENABLE_TIMEOUT_ASSERTION")
|
||||
with trio.move_on_after(280 if enable_timeout_assertion else 1000000000) as cancel_scope:
|
||||
response = await trio.to_thread.run_sync(self._chat, text, [{"role": "user", "content": "Output:"}], {})
|
||||
if cancel_scope.cancelled_caught:
|
||||
logging.warning("_resolve_candidate._chat timeout, skipping...")
|
||||
|
||||
@ -7,6 +7,7 @@ Reference:
|
||||
|
||||
import logging
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from typing import Callable
|
||||
from dataclasses import dataclass
|
||||
@ -51,6 +52,7 @@ class CommunityReportsExtractor(Extractor):
|
||||
self._max_report_length = max_report_length or 1500
|
||||
|
||||
async def __call__(self, graph: nx.Graph, callback: Callable | None = None):
|
||||
enable_timeout_assertion = os.environ.get("ENABLE_TIMEOUT_ASSERTION")
|
||||
for node_degree in graph.degree:
|
||||
graph.nodes[str(node_degree[0])]["rank"] = int(node_degree[1])
|
||||
|
||||
@ -92,7 +94,7 @@ class CommunityReportsExtractor(Extractor):
|
||||
text = perform_variable_replacements(self._extraction_prompt, variables=prompt_variables)
|
||||
async with chat_limiter:
|
||||
try:
|
||||
with trio.move_on_after(180) as cancel_scope:
|
||||
with trio.move_on_after(180 if enable_timeout_assertion else 1000000000) as cancel_scope:
|
||||
response = await trio.to_thread.run_sync( self._chat, text, [{"role": "user", "content": "Output:"}], {})
|
||||
if cancel_scope.cancelled_caught:
|
||||
logging.warning("extract_community_report._chat timeout, skipping...")
|
||||
|
||||
@ -15,6 +15,8 @@
|
||||
#
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
import networkx as nx
|
||||
import trio
|
||||
|
||||
@ -49,6 +51,7 @@ async def run_graphrag(
|
||||
embedding_model,
|
||||
callback,
|
||||
):
|
||||
enable_timeout_assertion=os.environ.get("ENABLE_TIMEOUT_ASSERTION")
|
||||
start = trio.current_time()
|
||||
tenant_id, kb_id, doc_id = row["tenant_id"], str(row["kb_id"]), row["doc_id"]
|
||||
chunks = []
|
||||
@ -57,7 +60,7 @@ async def run_graphrag(
|
||||
):
|
||||
chunks.append(d["content_with_weight"])
|
||||
|
||||
with trio.fail_after(max(120, len(chunks)*60*10)):
|
||||
with trio.fail_after(max(120, len(chunks)*60*10) if enable_timeout_assertion else 10000000000):
|
||||
subgraph = await generate_subgraph(
|
||||
LightKGExt
|
||||
if "method" not in row["kb_parser_config"].get("graphrag", {}) or row["kb_parser_config"]["graphrag"]["method"] != "general"
|
||||
|
||||
@ -307,6 +307,7 @@ def chunk_id(chunk):
|
||||
|
||||
async def graph_node_to_chunk(kb_id, embd_mdl, ent_name, meta, chunks):
|
||||
global chat_limiter
|
||||
enable_timeout_assertion=os.environ.get("ENABLE_TIMEOUT_ASSERTION")
|
||||
chunk = {
|
||||
"id": get_uuid(),
|
||||
"important_kwd": [ent_name],
|
||||
@ -324,7 +325,7 @@ async def graph_node_to_chunk(kb_id, embd_mdl, ent_name, meta, chunks):
|
||||
ebd = get_embed_cache(embd_mdl.llm_name, ent_name)
|
||||
if ebd is None:
|
||||
async with chat_limiter:
|
||||
with trio.fail_after(3):
|
||||
with trio.fail_after(3 if enable_timeout_assertion else 30000000):
|
||||
ebd, _ = await trio.to_thread.run_sync(lambda: embd_mdl.encode([ent_name]))
|
||||
ebd = ebd[0]
|
||||
set_embed_cache(embd_mdl.llm_name, ent_name, ebd)
|
||||
@ -362,6 +363,7 @@ def get_relation(tenant_id, kb_id, from_ent_name, to_ent_name, size=1):
|
||||
|
||||
|
||||
async def graph_edge_to_chunk(kb_id, embd_mdl, from_ent_name, to_ent_name, meta, chunks):
|
||||
enable_timeout_assertion=os.environ.get("ENABLE_TIMEOUT_ASSERTION")
|
||||
chunk = {
|
||||
"id": get_uuid(),
|
||||
"from_entity_kwd": from_ent_name,
|
||||
@ -380,7 +382,7 @@ async def graph_edge_to_chunk(kb_id, embd_mdl, from_ent_name, to_ent_name, meta,
|
||||
ebd = get_embed_cache(embd_mdl.llm_name, txt)
|
||||
if ebd is None:
|
||||
async with chat_limiter:
|
||||
with trio.fail_after(3):
|
||||
with trio.fail_after(3 if enable_timeout_assertion else 300000000):
|
||||
ebd, _ = await trio.to_thread.run_sync(lambda: embd_mdl.encode([txt+f": {meta['description']}"]))
|
||||
ebd = ebd[0]
|
||||
set_embed_cache(embd_mdl.llm_name, txt, ebd)
|
||||
@ -514,9 +516,10 @@ async def set_graph(tenant_id: str, kb_id: str, embd_mdl, graph: nx.Graph, chang
|
||||
callback(msg=f"set_graph converted graph change to {len(chunks)} chunks in {now - start:.2f}s.")
|
||||
start = now
|
||||
|
||||
enable_timeout_assertion=os.environ.get("ENABLE_TIMEOUT_ASSERTION")
|
||||
es_bulk_size = 4
|
||||
for b in range(0, len(chunks), es_bulk_size):
|
||||
with trio.fail_after(3):
|
||||
with trio.fail_after(3 if enable_timeout_assertion else 30000000):
|
||||
doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert(chunks[b:b + es_bulk_size], search.index_name(tenant_id), kb_id))
|
||||
if b % 100 == es_bulk_size and callback:
|
||||
callback(msg=f"Insert chunks: {b}/{len(chunks)}")
|
||||
|
||||
Reference in New Issue
Block a user