From 412a088008a6eec0f07f4b772855add2d080ba02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=A9=E6=B5=B7=E8=92=BC=E7=81=86?= Date: Thu, 17 Jul 2025 19:20:48 +0800 Subject: [PATCH] Feat: Add knowledge graph http api (#8896) ### What problem does this PR solve? Add knowledge graph http api ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- api/apps/sdk/dataset.py | 54 +++++++++++ docs/references/http_api_reference.md | 125 ++++++++++++++++++++++++++ 2 files changed, 179 insertions(+) diff --git a/api/apps/sdk/dataset.py b/api/apps/sdk/dataset.py index 0be206ec0..cfaca4674 100644 --- a/api/apps/sdk/dataset.py +++ b/api/apps/sdk/dataset.py @@ -17,6 +17,7 @@ import logging import os +import json from flask import request from peewee import OperationalError @@ -473,3 +474,56 @@ def list_datasets(tenant_id): except OperationalError as e: logging.exception(e) return get_error_data_result(message="Database operation failed") + +@manager.route('/datasets//knowledge_graph', methods=['GET']) # noqa: F821 +@token_required +def knowledge_graph(tenant_id,dataset_id): + if not KnowledgebaseService.accessible(dataset_id, tenant_id): + return get_result( + data=False, + message='No authorization.', + code=settings.RetCode.AUTHENTICATION_ERROR + ) + _, kb = KnowledgebaseService.get_by_id(dataset_id) + req = { + "kb_id": [dataset_id], + "knowledge_graph_kwd": ["graph"] + } + + obj = {"graph": {}, "mind_map": {}} + if not settings.docStoreConn.indexExist(search.index_name(kb.tenant_id), dataset_id): + return get_result(data=obj) + sres = settings.retrievaler.search(req, search.index_name(kb.tenant_id), [dataset_id]) + if not len(sres.ids): + return get_result(data=obj) + + for id in sres.ids[:1]: + ty = sres.field[id]["knowledge_graph_kwd"] + try: + content_json = json.loads(sres.field[id]["content_with_weight"]) + except Exception: + continue + + obj[ty] = content_json + + if "nodes" in obj["graph"]: + obj["graph"]["nodes"] = sorted(obj["graph"]["nodes"], key=lambda x: x.get("pagerank", 0), reverse=True)[:256] + if "edges" in obj["graph"]: + node_id_set = { o["id"] for o in obj["graph"]["nodes"] } + filtered_edges = [o for o in obj["graph"]["edges"] if o["source"] != o["target"] and o["source"] in node_id_set and o["target"] in node_id_set] + obj["graph"]["edges"] = sorted(filtered_edges, key=lambda x: x.get("weight", 0), reverse=True)[:128] + return get_result(data=obj) + +@manager.route('/datasets//knowledge_graph', methods=['DELETE']) # noqa: F821 +@token_required +def delete_knowledge_graph(tenant_id,dataset_id): + if not KnowledgebaseService.accessible(dataset_id, tenant_id): + return get_result( + data=False, + message='No authorization.', + code=settings.RetCode.AUTHENTICATION_ERROR + ) + _, kb = KnowledgebaseService.get_by_id(dataset_id) + settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), dataset_id) + + return get_result(data=True) \ No newline at end of file diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index 098ee8bb9..b400c9694 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -766,7 +766,132 @@ Failure: "message": "The dataset doesn't exist" } ``` + --- +## Get dataset's knowledge graph + +**GET** `/api/v1/datasets/{dataset_id}/knowledge_graph` + +Gets the knowledge graph of a specific datasets. + +#### Request + +- Method: GET +- URL: `/api/v1/datasets/{dataset_id}/knowledge_graph` +- Headers: + - `'Authorization: Bearer '` + +##### Request example + +```bash +curl --request GET \ + --url http://{address}/api/v1/datasets/{dataset_id}/knowledge_graph \ + --header 'Authorization: Bearer ' +``` + +##### Request parameters + +- `dataset_id`: (*Path parameter*) + The ID of the dataset. + +#### Response + +Success: + +```json +{ + "code": 0, + "data": { + "graph": { + "directed": false, + "edges": [ + { + "description": "The notice is a document issued to convey risk warnings and operational alerts.The notice is a specific instance of a notification document issued under the risk warning framework.", + "keywords": ["9", "8"], + "source": "notice", + "source_id": ["8a46cdfe4b5c11f0a5281a58e595aa1c"], + "src_id": "xxx", + "target": "xxx", + "tgt_id": "xxx", + "weight": 17.0 + } + ], + "graph": { + "source_id": ["8a46cdfe4b5c11f0a5281a58e595aa1c", "8a7eb6424b5c11f0a5281a58e595aa1c"] + }, + "multigraph": false, + "nodes": [ + { + "description": "xxx", + "entity_name": "xxx", + "entity_type": "ORGANIZATION", + "id": "xxx", + "pagerank": 0.10804906590624092, + "rank": 3, + "source_id": ["8a7eb6424b5c11f0a5281a58e595aa1c"] + } + ] + }, + "mind_map": {} + } +} +``` + +Failure: + +```json +{ + "code": 102, + "message": "The dataset doesn't exist" +} +``` +--- + +## Delete dataset's knowledge graph + +**DELETE** `/api/v1/datasets/{dataset_id}/knowledge_graph` + +Deletes the knowledge graph of a specific datasets. + +#### Request + +- Method: DELETE +- URL: `/api/v1/datasets/{dataset_id}/knowledge_graph` +- Headers: + - `'Authorization: Bearer '` + +##### Request example + +```bash +curl --request DELETE \ + --url http://{address}/api/v1/datasets/{dataset_id}/knowledge_graph \ + --header 'Authorization: Bearer ' +``` + +##### Request parameters + +- `dataset_id`: (*Path parameter*) + The ID of the dataset. + +#### Response + +Success: + +```json +{ + "code": 0, + "data": true +} +``` + +Failure: + +```json +{ + "code": 102, + "message": "The dataset doesn't exist" +} +``` --- ## FILE MANAGEMENT WITHIN DATASET