mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 12:32:30 +08:00
Feat:support API for generating knowledge graph and raptor (#11229)
### What problem does this PR solve? issue: [#11195](https://github.com/infiniflow/ragflow/issues/11195) change: support API for generating knowledge graph and raptor ### Type of change - [x] New Feature (non-breaking change which adds functionality) - [x] Documentation Update
This commit is contained in:
@ -21,10 +21,11 @@ import json
|
||||
from flask import request
|
||||
from peewee import OperationalError
|
||||
from api.db.db_models import File
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.document_service import DocumentService, queue_raptor_o_graphrag_tasks
|
||||
from api.db.services.file2document_service import File2DocumentService
|
||||
from api.db.services.file_service import FileService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.task_service import GRAPH_RAPTOR_FAKE_DOC_ID, TaskService
|
||||
from api.db.services.user_service import TenantService
|
||||
from common.constants import RetCode, FileSource, StatusEnum
|
||||
from api.utils.api_utils import (
|
||||
@ -118,7 +119,6 @@ def create(tenant_id):
|
||||
req, err = validate_and_parse_json_request(request, CreateDatasetReq)
|
||||
if err is not None:
|
||||
return get_error_argument_result(err)
|
||||
|
||||
req = KnowledgebaseService.create_with_name(
|
||||
name = req.pop("name", None),
|
||||
tenant_id = tenant_id,
|
||||
@ -144,7 +144,6 @@ def create(tenant_id):
|
||||
ok, k = KnowledgebaseService.get_by_id(req["id"])
|
||||
if not ok:
|
||||
return get_error_data_result(message="Dataset created failed")
|
||||
|
||||
response_data = remap_dictionary_keys(k.to_dict())
|
||||
return get_result(data=response_data)
|
||||
except Exception as e:
|
||||
@ -532,3 +531,157 @@ def delete_knowledge_graph(tenant_id, dataset_id):
|
||||
search.index_name(kb.tenant_id), dataset_id)
|
||||
|
||||
return get_result(data=True)
|
||||
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/run_graphrag", methods=["POST"]) # noqa: F821
|
||||
@token_required
|
||||
def run_graphrag(tenant_id,dataset_id):
|
||||
if not dataset_id:
|
||||
return get_error_data_result(message='Lack of "Dataset ID"')
|
||||
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||
return get_result(
|
||||
data=False,
|
||||
message='No authorization.',
|
||||
code=RetCode.AUTHENTICATION_ERROR
|
||||
)
|
||||
|
||||
ok, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||
if not ok:
|
||||
return get_error_data_result(message="Invalid Dataset ID")
|
||||
|
||||
task_id = kb.graphrag_task_id
|
||||
if task_id:
|
||||
ok, task = TaskService.get_by_id(task_id)
|
||||
if not ok:
|
||||
logging.warning(f"A valid GraphRAG task id is expected for Dataset {dataset_id}")
|
||||
|
||||
if task and task.progress not in [-1, 1]:
|
||||
return get_error_data_result(message=f"Task {task_id} in progress with status {task.progress}. A Graph Task is already running.")
|
||||
|
||||
documents, _ = DocumentService.get_by_kb_id(
|
||||
kb_id=dataset_id,
|
||||
page_number=0,
|
||||
items_per_page=0,
|
||||
orderby="create_time",
|
||||
desc=False,
|
||||
keywords="",
|
||||
run_status=[],
|
||||
types=[],
|
||||
suffix=[],
|
||||
)
|
||||
if not documents:
|
||||
return get_error_data_result(message=f"No documents in Dataset {dataset_id}")
|
||||
|
||||
sample_document = documents[0]
|
||||
document_ids = [document["id"] for document in documents]
|
||||
|
||||
task_id = queue_raptor_o_graphrag_tasks(sample_doc_id=sample_document, ty="graphrag", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
|
||||
|
||||
if not KnowledgebaseService.update_by_id(kb.id, {"graphrag_task_id": task_id}):
|
||||
logging.warning(f"Cannot save graphrag_task_id for Dataset {dataset_id}")
|
||||
|
||||
return get_result(data={"graphrag_task_id": task_id})
|
||||
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/trace_graphrag", methods=["GET"]) # noqa: F821
|
||||
@token_required
|
||||
def trace_graphrag(tenant_id,dataset_id):
|
||||
if not dataset_id:
|
||||
return get_error_data_result(message='Lack of "Dataset ID"')
|
||||
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||
return get_result(
|
||||
data=False,
|
||||
message='No authorization.',
|
||||
code=RetCode.AUTHENTICATION_ERROR
|
||||
)
|
||||
|
||||
ok, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||
if not ok:
|
||||
return get_error_data_result(message="Invalid Dataset ID")
|
||||
|
||||
task_id = kb.graphrag_task_id
|
||||
if not task_id:
|
||||
return get_result(data={})
|
||||
|
||||
ok, task = TaskService.get_by_id(task_id)
|
||||
if not ok:
|
||||
return get_result(data={})
|
||||
|
||||
return get_result(data=task.to_dict())
|
||||
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/run_raptor", methods=["POST"]) # noqa: F821
|
||||
@token_required
|
||||
def run_raptor(tenant_id,dataset_id):
|
||||
if not dataset_id:
|
||||
return get_error_data_result(message='Lack of "Dataset ID"')
|
||||
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||
return get_result(
|
||||
data=False,
|
||||
message='No authorization.',
|
||||
code=RetCode.AUTHENTICATION_ERROR
|
||||
)
|
||||
|
||||
ok, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||
if not ok:
|
||||
return get_error_data_result(message="Invalid Dataset ID")
|
||||
|
||||
task_id = kb.raptor_task_id
|
||||
if task_id:
|
||||
ok, task = TaskService.get_by_id(task_id)
|
||||
if not ok:
|
||||
logging.warning(f"A valid RAPTOR task id is expected for Dataset {dataset_id}")
|
||||
|
||||
if task and task.progress not in [-1, 1]:
|
||||
return get_error_data_result(message=f"Task {task_id} in progress with status {task.progress}. A RAPTOR Task is already running.")
|
||||
|
||||
documents, _ = DocumentService.get_by_kb_id(
|
||||
kb_id=dataset_id,
|
||||
page_number=0,
|
||||
items_per_page=0,
|
||||
orderby="create_time",
|
||||
desc=False,
|
||||
keywords="",
|
||||
run_status=[],
|
||||
types=[],
|
||||
suffix=[],
|
||||
)
|
||||
if not documents:
|
||||
return get_error_data_result(message=f"No documents in Dataset {dataset_id}")
|
||||
|
||||
sample_document = documents[0]
|
||||
document_ids = [document["id"] for document in documents]
|
||||
|
||||
task_id = queue_raptor_o_graphrag_tasks(sample_doc_id=sample_document, ty="raptor", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
|
||||
|
||||
if not KnowledgebaseService.update_by_id(kb.id, {"raptor_task_id": task_id}):
|
||||
logging.warning(f"Cannot save raptor_task_id for Dataset {dataset_id}")
|
||||
|
||||
return get_result(data={"raptor_task_id": task_id})
|
||||
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/trace_raptor", methods=["GET"]) # noqa: F821
|
||||
@token_required
|
||||
def trace_raptor(tenant_id,dataset_id):
|
||||
if not dataset_id:
|
||||
return get_error_data_result(message='Lack of "Dataset ID"')
|
||||
|
||||
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||
return get_result(
|
||||
data=False,
|
||||
message='No authorization.',
|
||||
code=RetCode.AUTHENTICATION_ERROR
|
||||
)
|
||||
ok, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||
if not ok:
|
||||
return get_error_data_result(message="Invalid Dataset ID")
|
||||
|
||||
task_id = kb.raptor_task_id
|
||||
if not task_id:
|
||||
return get_result(data={})
|
||||
|
||||
ok, task = TaskService.get_by_id(task_id)
|
||||
if not ok:
|
||||
return get_error_data_result(message="RAPTOR Task Not Found or Error Occurred")
|
||||
|
||||
return get_result(data=task.to_dict())
|
||||
@ -974,6 +974,237 @@ Failure:
|
||||
|
||||
---
|
||||
|
||||
### Construct knowledge graph
|
||||
|
||||
**POST** `/api/v1/datasets/{dataset_id}/run_graphrag`
|
||||
|
||||
Constructs a knowledge graph from a specified dataset.
|
||||
|
||||
#### Request
|
||||
|
||||
- Method: POST
|
||||
- URL: `/api/v1/datasets/{dataset_id}/run_graphrag`
|
||||
- Headers:
|
||||
- `'Authorization: Bearer <YOUR_API_KEY>'`
|
||||
|
||||
##### Request example
|
||||
|
||||
```bash
|
||||
curl --request POST \
|
||||
--url http://{address}/api/v1/datasets/{dataset_id}/run_graphrag \
|
||||
--header 'Authorization: Bearer <YOUR_API_KEY>'
|
||||
```
|
||||
|
||||
##### Request parameters
|
||||
|
||||
- `dataset_id`: (*Path parameter*)
|
||||
The ID of the target dataset.
|
||||
|
||||
#### Response
|
||||
|
||||
Success:
|
||||
|
||||
```json
|
||||
{
|
||||
"code":0,
|
||||
"data":{
|
||||
"graphrag_task_id":"e498de54bfbb11f0ba028f704583b57b"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Failure:
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 102,
|
||||
"message": "Invalid Dataset ID"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Get knowledge graph construction status
|
||||
|
||||
**GET** `/api/v1/datasets/{dataset_id}/trace_graphrag`
|
||||
|
||||
Retrieves the knowledge graph construction status for a specified dataset.
|
||||
|
||||
#### Request
|
||||
|
||||
- Method: GET
|
||||
- URL: `/api/v1/datasets/{dataset_id}/trace_graphrag`
|
||||
- Headers:
|
||||
- `'Authorization: Bearer <YOUR_API_KEY>'`
|
||||
|
||||
##### Request example
|
||||
|
||||
```bash
|
||||
curl --request GET \
|
||||
--url http://{address}/api/v1/datasets/{dataset_id}/trace_graphrag \
|
||||
--header 'Authorization: Bearer <YOUR_API_KEY>'
|
||||
```
|
||||
|
||||
##### Request parameters
|
||||
|
||||
- `dataset_id`: (*Path parameter*)
|
||||
The ID of the target dataset.
|
||||
|
||||
#### Response
|
||||
|
||||
Success:
|
||||
|
||||
```json
|
||||
{
|
||||
"code":0,
|
||||
"data":{
|
||||
"begin_at":"Wed, 12 Nov 2025 19:36:56 GMT",
|
||||
"chunk_ids":"",
|
||||
"create_date":"Wed, 12 Nov 2025 19:36:56 GMT",
|
||||
"create_time":1762947416350,
|
||||
"digest":"39e43572e3dcd84f",
|
||||
"doc_id":"44661c10bde211f0bc93c164a47ffc40",
|
||||
"from_page":100000000,
|
||||
"id":"e498de54bfbb11f0ba028f704583b57b",
|
||||
"priority":0,
|
||||
"process_duration":2.45419,
|
||||
"progress":1.0,
|
||||
"progress_msg":"19:36:56 created task graphrag\n19:36:57 Task has been received.\n19:36:58 [GraphRAG] doc:083661febe2411f0bc79456921e5745f has no available chunks, skip generation.\n19:36:58 [GraphRAG] build_subgraph doc:44661c10bde211f0bc93c164a47ffc40 start (chunks=1, timeout=10000000000s)\n19:36:58 Graph already contains 44661c10bde211f0bc93c164a47ffc40\n19:36:58 [GraphRAG] build_subgraph doc:44661c10bde211f0bc93c164a47ffc40 empty\n19:36:58 [GraphRAG] kb:33137ed0bde211f0bc93c164a47ffc40 no subgraphs generated successfully, end.\n19:36:58 Knowledge Graph done (0.72s)","retry_count":1,
|
||||
"task_type":"graphrag",
|
||||
"to_page":100000000,
|
||||
"update_date":"Wed, 12 Nov 2025 19:36:58 GMT",
|
||||
"update_time":1762947418454
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Failure:
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 102,
|
||||
"message": "Invalid Dataset ID"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Construct RAPTOR
|
||||
|
||||
**POST** `/api/v1/datasets/{dataset_id}/run_raptor`
|
||||
|
||||
Construct a RAPTOR from a specified dataset.
|
||||
|
||||
#### Request
|
||||
|
||||
- Method: POST
|
||||
- URL: `/api/v1/datasets/{dataset_id}/run_raptor`
|
||||
- Headers:
|
||||
- `'Authorization: Bearer <YOUR_API_KEY>'`
|
||||
|
||||
##### Request example
|
||||
|
||||
```bash
|
||||
curl --request POST \
|
||||
--url http://{address}/api/v1/datasets/{dataset_id}/run_raptor \
|
||||
--header 'Authorization: Bearer <YOUR_API_KEY>'
|
||||
```
|
||||
|
||||
##### Request parameters
|
||||
|
||||
- `dataset_id`: (*Path parameter*)
|
||||
The ID of the target dataset.
|
||||
|
||||
#### Response
|
||||
|
||||
Success:
|
||||
|
||||
```json
|
||||
{
|
||||
"code":0,
|
||||
"data":{
|
||||
"raptor_task_id":"50d3c31cbfbd11f0ba028f704583b57b"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Failure:
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 102,
|
||||
"message": "Invalid Dataset ID"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Get RAPTOR construction status
|
||||
|
||||
**GET** `/api/v1/datasets/{dataset_id}/trace_raptor`
|
||||
|
||||
Retrieves the RAPTOR construction status for a specified dataset.
|
||||
|
||||
#### Request
|
||||
|
||||
- Method: GET
|
||||
- URL: `/api/v1/datasets/{dataset_id}/trace_raptor`
|
||||
- Headers:
|
||||
- `'Authorization: Bearer <YOUR_API_KEY>'`
|
||||
|
||||
##### Request example
|
||||
|
||||
```bash
|
||||
curl --request GET \
|
||||
--url http://{address}/api/v1/datasets/{dataset_id}/trace_raptor \
|
||||
--header 'Authorization: Bearer <YOUR_API_KEY>'
|
||||
```
|
||||
|
||||
##### Request parameters
|
||||
|
||||
- `dataset_id`: (*Path parameter*)
|
||||
The ID of the target dataset.
|
||||
|
||||
#### Response
|
||||
|
||||
Success:
|
||||
|
||||
```json
|
||||
{
|
||||
"code":0,
|
||||
"data":{
|
||||
"begin_at":"Wed, 12 Nov 2025 19:47:07 GMT",
|
||||
"chunk_ids":"",
|
||||
"create_date":"Wed, 12 Nov 2025 19:47:07 GMT",
|
||||
"create_time":1762948027427,
|
||||
"digest":"8b279a6248cb8fc6",
|
||||
"doc_id":"44661c10bde211f0bc93c164a47ffc40",
|
||||
"from_page":100000000,
|
||||
"id":"50d3c31cbfbd11f0ba028f704583b57b",
|
||||
"priority":0,
|
||||
"process_duration":0.948244,
|
||||
"progress":1.0,
|
||||
"progress_msg":"19:47:07 created task raptor\n19:47:07 Task has been received.\n19:47:07 Processing...\n19:47:07 Processing...\n19:47:07 Indexing done (0.01s).\n19:47:07 Task done (0.29s)",
|
||||
"retry_count":1,
|
||||
"task_type":"raptor",
|
||||
"to_page":100000000,
|
||||
"update_date":"Wed, 12 Nov 2025 19:47:07 GMT",
|
||||
"update_time":1762948027948
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Failure:
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 102,
|
||||
"message": "Invalid Dataset ID"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FILE MANAGEMENT WITHIN DATASET
|
||||
|
||||
---
|
||||
|
||||
Reference in New Issue
Block a user