Feat: Add knowledge graph http api (#8896)

### What problem does this PR solve?

Add knowledge graph http api

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
天海蒼灆
2025-07-17 19:20:48 +08:00
committed by GitHub
parent 9767c26535
commit 412a088008
2 changed files with 179 additions and 0 deletions

View File

@ -17,6 +17,7 @@
import logging
import os
import json
from flask import request
from peewee import OperationalError
@ -473,3 +474,56 @@ def list_datasets(tenant_id):
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
@manager.route('/datasets/<dataset_id>/knowledge_graph', methods=['GET']) # noqa: F821
@token_required
def knowledge_graph(tenant_id,dataset_id):
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
return get_result(
data=False,
message='No authorization.',
code=settings.RetCode.AUTHENTICATION_ERROR
)
_, kb = KnowledgebaseService.get_by_id(dataset_id)
req = {
"kb_id": [dataset_id],
"knowledge_graph_kwd": ["graph"]
}
obj = {"graph": {}, "mind_map": {}}
if not settings.docStoreConn.indexExist(search.index_name(kb.tenant_id), dataset_id):
return get_result(data=obj)
sres = settings.retrievaler.search(req, search.index_name(kb.tenant_id), [dataset_id])
if not len(sres.ids):
return get_result(data=obj)
for id in sres.ids[:1]:
ty = sres.field[id]["knowledge_graph_kwd"]
try:
content_json = json.loads(sres.field[id]["content_with_weight"])
except Exception:
continue
obj[ty] = content_json
if "nodes" in obj["graph"]:
obj["graph"]["nodes"] = sorted(obj["graph"]["nodes"], key=lambda x: x.get("pagerank", 0), reverse=True)[:256]
if "edges" in obj["graph"]:
node_id_set = { o["id"] for o in obj["graph"]["nodes"] }
filtered_edges = [o for o in obj["graph"]["edges"] if o["source"] != o["target"] and o["source"] in node_id_set and o["target"] in node_id_set]
obj["graph"]["edges"] = sorted(filtered_edges, key=lambda x: x.get("weight", 0), reverse=True)[:128]
return get_result(data=obj)
@manager.route('/datasets/<dataset_id>/knowledge_graph', methods=['DELETE']) # noqa: F821
@token_required
def delete_knowledge_graph(tenant_id,dataset_id):
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
return get_result(
data=False,
message='No authorization.',
code=settings.RetCode.AUTHENTICATION_ERROR
)
_, kb = KnowledgebaseService.get_by_id(dataset_id)
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), dataset_id)
return get_result(data=True)

View File

@ -766,7 +766,132 @@ Failure:
"message": "The dataset doesn't exist"
}
```
---
## Get dataset's knowledge graph
**GET** `/api/v1/datasets/{dataset_id}/knowledge_graph`
Gets the knowledge graph of a specific datasets.
#### Request
- Method: GET
- URL: `/api/v1/datasets/{dataset_id}/knowledge_graph`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Request example
```bash
curl --request GET \
--url http://{address}/api/v1/datasets/{dataset_id}/knowledge_graph \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
##### Request parameters
- `dataset_id`: (*Path parameter*)
The ID of the dataset.
#### Response
Success:
```json
{
"code": 0,
"data": {
"graph": {
"directed": false,
"edges": [
{
"description": "The notice is a document issued to convey risk warnings and operational alerts.<SEP>The notice is a specific instance of a notification document issued under the risk warning framework.",
"keywords": ["9", "8"],
"source": "notice",
"source_id": ["8a46cdfe4b5c11f0a5281a58e595aa1c"],
"src_id": "xxx",
"target": "xxx",
"tgt_id": "xxx",
"weight": 17.0
}
],
"graph": {
"source_id": ["8a46cdfe4b5c11f0a5281a58e595aa1c", "8a7eb6424b5c11f0a5281a58e595aa1c"]
},
"multigraph": false,
"nodes": [
{
"description": "xxx",
"entity_name": "xxx",
"entity_type": "ORGANIZATION",
"id": "xxx",
"pagerank": 0.10804906590624092,
"rank": 3,
"source_id": ["8a7eb6424b5c11f0a5281a58e595aa1c"]
}
]
},
"mind_map": {}
}
}
```
Failure:
```json
{
"code": 102,
"message": "The dataset doesn't exist"
}
```
---
## Delete dataset's knowledge graph
**DELETE** `/api/v1/datasets/{dataset_id}/knowledge_graph`
Deletes the knowledge graph of a specific datasets.
#### Request
- Method: DELETE
- URL: `/api/v1/datasets/{dataset_id}/knowledge_graph`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Request example
```bash
curl --request DELETE \
--url http://{address}/api/v1/datasets/{dataset_id}/knowledge_graph \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
##### Request parameters
- `dataset_id`: (*Path parameter*)
The ID of the dataset.
#### Response
Success:
```json
{
"code": 0,
"data": true
}
```
Failure:
```json
{
"code": 102,
"message": "The dataset doesn't exist"
}
```
---
## FILE MANAGEMENT WITHIN DATASET