diff --git a/graphrag/general/index.py b/graphrag/general/index.py index db3645b35..8c9863679 100644 --- a/graphrag/general/index.py +++ b/graphrag/general/index.py @@ -166,7 +166,7 @@ async def generate_subgraph( ) if ignored_rels: callback(msg=f"ignored {ignored_rels} relations due to missing entities.") - tidy_graph(subgraph, callback) + tidy_graph(subgraph, callback, check_attribute=False) subgraph.graph["source_id"] = [doc_id] chunk = { diff --git a/graphrag/utils.py b/graphrag/utils.py index 151b5aab7..81df2a24b 100644 --- a/graphrag/utils.py +++ b/graphrag/utils.py @@ -157,30 +157,32 @@ def set_tags_to_cache(kb_ids, tags): k = hasher.hexdigest() REDIS_CONN.set(k, json.dumps(tags).encode("utf-8"), 600) -def tidy_graph(graph: nx.Graph, callback): +def tidy_graph(graph: nx.Graph, callback, check_attribute: bool = True): """ Ensure all nodes and edges in the graph have some essential attribute. """ - def is_valid_node(node_attrs: dict) -> bool: + def is_valid_item(node_attrs: dict) -> bool: valid_node = True for attr in ["description", "source_id"]: if attr not in node_attrs: valid_node = False break return valid_node - purged_nodes = [] - for node, node_attrs in graph.nodes(data=True): - if not is_valid_node(node_attrs): - purged_nodes.append(node) - for node in purged_nodes: - graph.remove_node(node) - if purged_nodes and callback: - callback(msg=f"Purged {len(purged_nodes)} nodes from graph due to missing essential attributes.") + if check_attribute: + purged_nodes = [] + for node, node_attrs in graph.nodes(data=True): + if not is_valid_item(node_attrs): + purged_nodes.append(node) + for node in purged_nodes: + graph.remove_node(node) + if purged_nodes and callback: + callback(msg=f"Purged {len(purged_nodes)} nodes from graph due to missing essential attributes.") purged_edges = [] for source, target, attr in graph.edges(data=True): - if not is_valid_node(attr): - purged_edges.append((source, target)) + if check_attribute: + if not is_valid_item(attr): + purged_edges.append((source, target)) if "keywords" not in attr: attr["keywords"] = [] for source, target in purged_edges: