mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-06 10:35:06 +08:00
Compare commits
4 Commits
ba71160b14
...
70a0f081f6
| Author | SHA1 | Date | |
|---|---|---|---|
| 70a0f081f6 | |||
| 93422fa8cc | |||
| bfc84ba95b | |||
| 871055b0fc |
@ -96,12 +96,12 @@ login_manager.init_app(app)
|
|||||||
commands.register_commands(app)
|
commands.register_commands(app)
|
||||||
|
|
||||||
|
|
||||||
def search_pages_path(pages_dir):
|
def search_pages_path(page_path):
|
||||||
app_path_list = [
|
app_path_list = [
|
||||||
path for path in pages_dir.glob("*_app.py") if not path.name.startswith(".")
|
path for path in page_path.glob("*_app.py") if not path.name.startswith(".")
|
||||||
]
|
]
|
||||||
api_path_list = [
|
api_path_list = [
|
||||||
path for path in pages_dir.glob("*sdk/*.py") if not path.name.startswith(".")
|
path for path in page_path.glob("*sdk/*.py") if not path.name.startswith(".")
|
||||||
]
|
]
|
||||||
app_path_list.extend(api_path_list)
|
app_path_list.extend(api_path_list)
|
||||||
return app_path_list
|
return app_path_list
|
||||||
@ -138,7 +138,7 @@ pages_dir = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
client_urls_prefix = [
|
client_urls_prefix = [
|
||||||
register_page(path) for dir in pages_dir for path in search_pages_path(dir)
|
register_page(path) for directory in pages_dir for path in search_pages_path(directory)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -177,5 +177,7 @@ def load_user(web_request):
|
|||||||
|
|
||||||
|
|
||||||
@app.teardown_request
|
@app.teardown_request
|
||||||
def _db_close(exc):
|
def _db_close(exception):
|
||||||
|
if exception:
|
||||||
|
logging.exception(f"Request failed: {exception}")
|
||||||
close_connection()
|
close_connection()
|
||||||
|
|||||||
@ -426,7 +426,6 @@ def test_db_connect():
|
|||||||
try:
|
try:
|
||||||
import trino
|
import trino
|
||||||
import os
|
import os
|
||||||
from trino.auth import BasicAuthentication
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(f"Missing dependency 'trino'. Please install: pip install trino, detail: {e}")
|
return server_error_response(f"Missing dependency 'trino'. Please install: pip install trino, detail: {e}")
|
||||||
|
|
||||||
@ -438,7 +437,7 @@ def test_db_connect():
|
|||||||
|
|
||||||
auth = None
|
auth = None
|
||||||
if http_scheme == "https" and req.get("password"):
|
if http_scheme == "https" and req.get("password"):
|
||||||
auth = BasicAuthentication(req.get("username") or "ragflow", req["password"])
|
auth = trino.BasicAuthentication(req.get("username") or "ragflow", req["password"])
|
||||||
|
|
||||||
conn = trino.dbapi.connect(
|
conn = trino.dbapi.connect(
|
||||||
host=req["host"],
|
host=req["host"],
|
||||||
@ -471,8 +470,8 @@ def test_db_connect():
|
|||||||
@login_required
|
@login_required
|
||||||
def getlistversion(canvas_id):
|
def getlistversion(canvas_id):
|
||||||
try:
|
try:
|
||||||
list =sorted([c.to_dict() for c in UserCanvasVersionService.list_by_canvas_id(canvas_id)], key=lambda x: x["update_time"]*-1)
|
versions =sorted([c.to_dict() for c in UserCanvasVersionService.list_by_canvas_id(canvas_id)], key=lambda x: x["update_time"]*-1)
|
||||||
return get_json_result(data=list)
|
return get_json_result(data=versions)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return get_data_error_result(message=f"Error getting history files: {e}")
|
return get_data_error_result(message=f"Error getting history files: {e}")
|
||||||
|
|
||||||
|
|||||||
@ -55,7 +55,6 @@ def set_connector():
|
|||||||
"timeout_secs": int(req.get("timeout_secs", 60 * 29)),
|
"timeout_secs": int(req.get("timeout_secs", 60 * 29)),
|
||||||
"status": TaskStatus.SCHEDULE,
|
"status": TaskStatus.SCHEDULE,
|
||||||
}
|
}
|
||||||
conn["status"] = TaskStatus.SCHEDULE
|
|
||||||
ConnectorService.save(**conn)
|
ConnectorService.save(**conn)
|
||||||
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|||||||
@ -85,7 +85,6 @@ def get():
|
|||||||
if not e:
|
if not e:
|
||||||
return get_data_error_result(message="Conversation not found!")
|
return get_data_error_result(message="Conversation not found!")
|
||||||
tenants = UserTenantService.query(user_id=current_user.id)
|
tenants = UserTenantService.query(user_id=current_user.id)
|
||||||
avatar = None
|
|
||||||
for tenant in tenants:
|
for tenant in tenants:
|
||||||
dialog = DialogService.query(tenant_id=tenant.tenant_id, id=conv.dialog_id)
|
dialog = DialogService.query(tenant_id=tenant.tenant_id, id=conv.dialog_id)
|
||||||
if dialog and len(dialog) > 0:
|
if dialog and len(dialog) > 0:
|
||||||
|
|||||||
@ -154,15 +154,15 @@ def get_kb_names(kb_ids):
|
|||||||
@login_required
|
@login_required
|
||||||
def list_dialogs():
|
def list_dialogs():
|
||||||
try:
|
try:
|
||||||
diags = DialogService.query(
|
conversations = DialogService.query(
|
||||||
tenant_id=current_user.id,
|
tenant_id=current_user.id,
|
||||||
status=StatusEnum.VALID.value,
|
status=StatusEnum.VALID.value,
|
||||||
reverse=True,
|
reverse=True,
|
||||||
order_by=DialogService.model.create_time)
|
order_by=DialogService.model.create_time)
|
||||||
diags = [d.to_dict() for d in diags]
|
conversations = [d.to_dict() for d in conversations]
|
||||||
for d in diags:
|
for conversation in conversations:
|
||||||
d["kb_ids"], d["kb_names"] = get_kb_names(d["kb_ids"])
|
conversation["kb_ids"], conversation["kb_names"] = get_kb_names(conversation["kb_ids"])
|
||||||
return get_json_result(data=diags)
|
return get_json_result(data=conversations)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|||||||
@ -308,7 +308,7 @@ def get_filter():
|
|||||||
|
|
||||||
@manager.route("/infos", methods=["POST"]) # noqa: F821
|
@manager.route("/infos", methods=["POST"]) # noqa: F821
|
||||||
@login_required
|
@login_required
|
||||||
def docinfos():
|
def doc_infos():
|
||||||
req = request.json
|
req = request.json
|
||||||
doc_ids = req["doc_ids"]
|
doc_ids = req["doc_ids"]
|
||||||
for doc_id in doc_ids:
|
for doc_id in doc_ids:
|
||||||
@ -544,6 +544,7 @@ def change_parser():
|
|||||||
return get_data_error_result(message="Tenant not found!")
|
return get_data_error_result(message="Tenant not found!")
|
||||||
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
|
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
|
||||||
settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
|
settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
|
||||||
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if "pipeline_id" in req and req["pipeline_id"] != "":
|
if "pipeline_id" in req and req["pipeline_id"] != "":
|
||||||
|
|||||||
@ -246,8 +246,8 @@ def rm():
|
|||||||
try:
|
try:
|
||||||
if file.location:
|
if file.location:
|
||||||
settings.STORAGE_IMPL.rm(file.parent_id, file.location)
|
settings.STORAGE_IMPL.rm(file.parent_id, file.location)
|
||||||
except Exception:
|
except Exception as e:
|
||||||
logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}")
|
logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}, error: {e}")
|
||||||
|
|
||||||
informs = File2DocumentService.get_by_file_id(file.id)
|
informs = File2DocumentService.get_by_file_id(file.id)
|
||||||
for inform in informs:
|
for inform in informs:
|
||||||
|
|||||||
@ -731,6 +731,8 @@ def delete_kb_task():
|
|||||||
def cancel_task(task_id):
|
def cancel_task(task_id):
|
||||||
REDIS_CONN.set(f"{task_id}-cancel", "x")
|
REDIS_CONN.set(f"{task_id}-cancel", "x")
|
||||||
|
|
||||||
|
kb_task_id_field: str = ""
|
||||||
|
kb_task_finish_at: str = ""
|
||||||
match pipeline_task_type:
|
match pipeline_task_type:
|
||||||
case PipelineTaskType.GRAPH_RAG:
|
case PipelineTaskType.GRAPH_RAG:
|
||||||
kb_task_id_field = "graphrag_task_id"
|
kb_task_id_field = "graphrag_task_id"
|
||||||
|
|||||||
@ -21,10 +21,11 @@ import json
|
|||||||
from flask import request
|
from flask import request
|
||||||
from peewee import OperationalError
|
from peewee import OperationalError
|
||||||
from api.db.db_models import File
|
from api.db.db_models import File
|
||||||
from api.db.services.document_service import DocumentService
|
from api.db.services.document_service import DocumentService, queue_raptor_o_graphrag_tasks
|
||||||
from api.db.services.file2document_service import File2DocumentService
|
from api.db.services.file2document_service import File2DocumentService
|
||||||
from api.db.services.file_service import FileService
|
from api.db.services.file_service import FileService
|
||||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||||
|
from api.db.services.task_service import GRAPH_RAPTOR_FAKE_DOC_ID, TaskService
|
||||||
from api.db.services.user_service import TenantService
|
from api.db.services.user_service import TenantService
|
||||||
from common.constants import RetCode, FileSource, StatusEnum
|
from common.constants import RetCode, FileSource, StatusEnum
|
||||||
from api.utils.api_utils import (
|
from api.utils.api_utils import (
|
||||||
@ -118,7 +119,6 @@ def create(tenant_id):
|
|||||||
req, err = validate_and_parse_json_request(request, CreateDatasetReq)
|
req, err = validate_and_parse_json_request(request, CreateDatasetReq)
|
||||||
if err is not None:
|
if err is not None:
|
||||||
return get_error_argument_result(err)
|
return get_error_argument_result(err)
|
||||||
|
|
||||||
req = KnowledgebaseService.create_with_name(
|
req = KnowledgebaseService.create_with_name(
|
||||||
name = req.pop("name", None),
|
name = req.pop("name", None),
|
||||||
tenant_id = tenant_id,
|
tenant_id = tenant_id,
|
||||||
@ -144,7 +144,6 @@ def create(tenant_id):
|
|||||||
ok, k = KnowledgebaseService.get_by_id(req["id"])
|
ok, k = KnowledgebaseService.get_by_id(req["id"])
|
||||||
if not ok:
|
if not ok:
|
||||||
return get_error_data_result(message="Dataset created failed")
|
return get_error_data_result(message="Dataset created failed")
|
||||||
|
|
||||||
response_data = remap_dictionary_keys(k.to_dict())
|
response_data = remap_dictionary_keys(k.to_dict())
|
||||||
return get_result(data=response_data)
|
return get_result(data=response_data)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -532,3 +531,157 @@ def delete_knowledge_graph(tenant_id, dataset_id):
|
|||||||
search.index_name(kb.tenant_id), dataset_id)
|
search.index_name(kb.tenant_id), dataset_id)
|
||||||
|
|
||||||
return get_result(data=True)
|
return get_result(data=True)
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/datasets/<dataset_id>/run_graphrag", methods=["POST"]) # noqa: F821
|
||||||
|
@token_required
|
||||||
|
def run_graphrag(tenant_id,dataset_id):
|
||||||
|
if not dataset_id:
|
||||||
|
return get_error_data_result(message='Lack of "Dataset ID"')
|
||||||
|
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||||
|
return get_result(
|
||||||
|
data=False,
|
||||||
|
message='No authorization.',
|
||||||
|
code=RetCode.AUTHENTICATION_ERROR
|
||||||
|
)
|
||||||
|
|
||||||
|
ok, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||||
|
if not ok:
|
||||||
|
return get_error_data_result(message="Invalid Dataset ID")
|
||||||
|
|
||||||
|
task_id = kb.graphrag_task_id
|
||||||
|
if task_id:
|
||||||
|
ok, task = TaskService.get_by_id(task_id)
|
||||||
|
if not ok:
|
||||||
|
logging.warning(f"A valid GraphRAG task id is expected for Dataset {dataset_id}")
|
||||||
|
|
||||||
|
if task and task.progress not in [-1, 1]:
|
||||||
|
return get_error_data_result(message=f"Task {task_id} in progress with status {task.progress}. A Graph Task is already running.")
|
||||||
|
|
||||||
|
documents, _ = DocumentService.get_by_kb_id(
|
||||||
|
kb_id=dataset_id,
|
||||||
|
page_number=0,
|
||||||
|
items_per_page=0,
|
||||||
|
orderby="create_time",
|
||||||
|
desc=False,
|
||||||
|
keywords="",
|
||||||
|
run_status=[],
|
||||||
|
types=[],
|
||||||
|
suffix=[],
|
||||||
|
)
|
||||||
|
if not documents:
|
||||||
|
return get_error_data_result(message=f"No documents in Dataset {dataset_id}")
|
||||||
|
|
||||||
|
sample_document = documents[0]
|
||||||
|
document_ids = [document["id"] for document in documents]
|
||||||
|
|
||||||
|
task_id = queue_raptor_o_graphrag_tasks(sample_doc_id=sample_document, ty="graphrag", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
|
||||||
|
|
||||||
|
if not KnowledgebaseService.update_by_id(kb.id, {"graphrag_task_id": task_id}):
|
||||||
|
logging.warning(f"Cannot save graphrag_task_id for Dataset {dataset_id}")
|
||||||
|
|
||||||
|
return get_result(data={"graphrag_task_id": task_id})
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/datasets/<dataset_id>/trace_graphrag", methods=["GET"]) # noqa: F821
|
||||||
|
@token_required
|
||||||
|
def trace_graphrag(tenant_id,dataset_id):
|
||||||
|
if not dataset_id:
|
||||||
|
return get_error_data_result(message='Lack of "Dataset ID"')
|
||||||
|
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||||
|
return get_result(
|
||||||
|
data=False,
|
||||||
|
message='No authorization.',
|
||||||
|
code=RetCode.AUTHENTICATION_ERROR
|
||||||
|
)
|
||||||
|
|
||||||
|
ok, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||||
|
if not ok:
|
||||||
|
return get_error_data_result(message="Invalid Dataset ID")
|
||||||
|
|
||||||
|
task_id = kb.graphrag_task_id
|
||||||
|
if not task_id:
|
||||||
|
return get_result(data={})
|
||||||
|
|
||||||
|
ok, task = TaskService.get_by_id(task_id)
|
||||||
|
if not ok:
|
||||||
|
return get_result(data={})
|
||||||
|
|
||||||
|
return get_result(data=task.to_dict())
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/datasets/<dataset_id>/run_raptor", methods=["POST"]) # noqa: F821
|
||||||
|
@token_required
|
||||||
|
def run_raptor(tenant_id,dataset_id):
|
||||||
|
if not dataset_id:
|
||||||
|
return get_error_data_result(message='Lack of "Dataset ID"')
|
||||||
|
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||||
|
return get_result(
|
||||||
|
data=False,
|
||||||
|
message='No authorization.',
|
||||||
|
code=RetCode.AUTHENTICATION_ERROR
|
||||||
|
)
|
||||||
|
|
||||||
|
ok, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||||
|
if not ok:
|
||||||
|
return get_error_data_result(message="Invalid Dataset ID")
|
||||||
|
|
||||||
|
task_id = kb.raptor_task_id
|
||||||
|
if task_id:
|
||||||
|
ok, task = TaskService.get_by_id(task_id)
|
||||||
|
if not ok:
|
||||||
|
logging.warning(f"A valid RAPTOR task id is expected for Dataset {dataset_id}")
|
||||||
|
|
||||||
|
if task and task.progress not in [-1, 1]:
|
||||||
|
return get_error_data_result(message=f"Task {task_id} in progress with status {task.progress}. A RAPTOR Task is already running.")
|
||||||
|
|
||||||
|
documents, _ = DocumentService.get_by_kb_id(
|
||||||
|
kb_id=dataset_id,
|
||||||
|
page_number=0,
|
||||||
|
items_per_page=0,
|
||||||
|
orderby="create_time",
|
||||||
|
desc=False,
|
||||||
|
keywords="",
|
||||||
|
run_status=[],
|
||||||
|
types=[],
|
||||||
|
suffix=[],
|
||||||
|
)
|
||||||
|
if not documents:
|
||||||
|
return get_error_data_result(message=f"No documents in Dataset {dataset_id}")
|
||||||
|
|
||||||
|
sample_document = documents[0]
|
||||||
|
document_ids = [document["id"] for document in documents]
|
||||||
|
|
||||||
|
task_id = queue_raptor_o_graphrag_tasks(sample_doc_id=sample_document, ty="raptor", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
|
||||||
|
|
||||||
|
if not KnowledgebaseService.update_by_id(kb.id, {"raptor_task_id": task_id}):
|
||||||
|
logging.warning(f"Cannot save raptor_task_id for Dataset {dataset_id}")
|
||||||
|
|
||||||
|
return get_result(data={"raptor_task_id": task_id})
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/datasets/<dataset_id>/trace_raptor", methods=["GET"]) # noqa: F821
|
||||||
|
@token_required
|
||||||
|
def trace_raptor(tenant_id,dataset_id):
|
||||||
|
if not dataset_id:
|
||||||
|
return get_error_data_result(message='Lack of "Dataset ID"')
|
||||||
|
|
||||||
|
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||||
|
return get_result(
|
||||||
|
data=False,
|
||||||
|
message='No authorization.',
|
||||||
|
code=RetCode.AUTHENTICATION_ERROR
|
||||||
|
)
|
||||||
|
ok, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||||
|
if not ok:
|
||||||
|
return get_error_data_result(message="Invalid Dataset ID")
|
||||||
|
|
||||||
|
task_id = kb.raptor_task_id
|
||||||
|
if not task_id:
|
||||||
|
return get_result(data={})
|
||||||
|
|
||||||
|
ok, task = TaskService.get_by_id(task_id)
|
||||||
|
if not ok:
|
||||||
|
return get_error_data_result(message="RAPTOR Task Not Found or Error Occurred")
|
||||||
|
|
||||||
|
return get_result(data=task.to_dict())
|
||||||
@ -974,6 +974,237 @@ Failure:
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
### Construct knowledge graph
|
||||||
|
|
||||||
|
**POST** `/api/v1/datasets/{dataset_id}/run_graphrag`
|
||||||
|
|
||||||
|
Constructs a knowledge graph from a specified dataset.
|
||||||
|
|
||||||
|
#### Request
|
||||||
|
|
||||||
|
- Method: POST
|
||||||
|
- URL: `/api/v1/datasets/{dataset_id}/run_graphrag`
|
||||||
|
- Headers:
|
||||||
|
- `'Authorization: Bearer <YOUR_API_KEY>'`
|
||||||
|
|
||||||
|
##### Request example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --request POST \
|
||||||
|
--url http://{address}/api/v1/datasets/{dataset_id}/run_graphrag \
|
||||||
|
--header 'Authorization: Bearer <YOUR_API_KEY>'
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Request parameters
|
||||||
|
|
||||||
|
- `dataset_id`: (*Path parameter*)
|
||||||
|
The ID of the target dataset.
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
Success:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"code":0,
|
||||||
|
"data":{
|
||||||
|
"graphrag_task_id":"e498de54bfbb11f0ba028f704583b57b"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Failure:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"code": 102,
|
||||||
|
"message": "Invalid Dataset ID"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Get knowledge graph construction status
|
||||||
|
|
||||||
|
**GET** `/api/v1/datasets/{dataset_id}/trace_graphrag`
|
||||||
|
|
||||||
|
Retrieves the knowledge graph construction status for a specified dataset.
|
||||||
|
|
||||||
|
#### Request
|
||||||
|
|
||||||
|
- Method: GET
|
||||||
|
- URL: `/api/v1/datasets/{dataset_id}/trace_graphrag`
|
||||||
|
- Headers:
|
||||||
|
- `'Authorization: Bearer <YOUR_API_KEY>'`
|
||||||
|
|
||||||
|
##### Request example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --request GET \
|
||||||
|
--url http://{address}/api/v1/datasets/{dataset_id}/trace_graphrag \
|
||||||
|
--header 'Authorization: Bearer <YOUR_API_KEY>'
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Request parameters
|
||||||
|
|
||||||
|
- `dataset_id`: (*Path parameter*)
|
||||||
|
The ID of the target dataset.
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
Success:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"code":0,
|
||||||
|
"data":{
|
||||||
|
"begin_at":"Wed, 12 Nov 2025 19:36:56 GMT",
|
||||||
|
"chunk_ids":"",
|
||||||
|
"create_date":"Wed, 12 Nov 2025 19:36:56 GMT",
|
||||||
|
"create_time":1762947416350,
|
||||||
|
"digest":"39e43572e3dcd84f",
|
||||||
|
"doc_id":"44661c10bde211f0bc93c164a47ffc40",
|
||||||
|
"from_page":100000000,
|
||||||
|
"id":"e498de54bfbb11f0ba028f704583b57b",
|
||||||
|
"priority":0,
|
||||||
|
"process_duration":2.45419,
|
||||||
|
"progress":1.0,
|
||||||
|
"progress_msg":"19:36:56 created task graphrag\n19:36:57 Task has been received.\n19:36:58 [GraphRAG] doc:083661febe2411f0bc79456921e5745f has no available chunks, skip generation.\n19:36:58 [GraphRAG] build_subgraph doc:44661c10bde211f0bc93c164a47ffc40 start (chunks=1, timeout=10000000000s)\n19:36:58 Graph already contains 44661c10bde211f0bc93c164a47ffc40\n19:36:58 [GraphRAG] build_subgraph doc:44661c10bde211f0bc93c164a47ffc40 empty\n19:36:58 [GraphRAG] kb:33137ed0bde211f0bc93c164a47ffc40 no subgraphs generated successfully, end.\n19:36:58 Knowledge Graph done (0.72s)","retry_count":1,
|
||||||
|
"task_type":"graphrag",
|
||||||
|
"to_page":100000000,
|
||||||
|
"update_date":"Wed, 12 Nov 2025 19:36:58 GMT",
|
||||||
|
"update_time":1762947418454
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Failure:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"code": 102,
|
||||||
|
"message": "Invalid Dataset ID"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Construct RAPTOR
|
||||||
|
|
||||||
|
**POST** `/api/v1/datasets/{dataset_id}/run_raptor`
|
||||||
|
|
||||||
|
Construct a RAPTOR from a specified dataset.
|
||||||
|
|
||||||
|
#### Request
|
||||||
|
|
||||||
|
- Method: POST
|
||||||
|
- URL: `/api/v1/datasets/{dataset_id}/run_raptor`
|
||||||
|
- Headers:
|
||||||
|
- `'Authorization: Bearer <YOUR_API_KEY>'`
|
||||||
|
|
||||||
|
##### Request example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --request POST \
|
||||||
|
--url http://{address}/api/v1/datasets/{dataset_id}/run_raptor \
|
||||||
|
--header 'Authorization: Bearer <YOUR_API_KEY>'
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Request parameters
|
||||||
|
|
||||||
|
- `dataset_id`: (*Path parameter*)
|
||||||
|
The ID of the target dataset.
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
Success:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"code":0,
|
||||||
|
"data":{
|
||||||
|
"raptor_task_id":"50d3c31cbfbd11f0ba028f704583b57b"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Failure:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"code": 102,
|
||||||
|
"message": "Invalid Dataset ID"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Get RAPTOR construction status
|
||||||
|
|
||||||
|
**GET** `/api/v1/datasets/{dataset_id}/trace_raptor`
|
||||||
|
|
||||||
|
Retrieves the RAPTOR construction status for a specified dataset.
|
||||||
|
|
||||||
|
#### Request
|
||||||
|
|
||||||
|
- Method: GET
|
||||||
|
- URL: `/api/v1/datasets/{dataset_id}/trace_raptor`
|
||||||
|
- Headers:
|
||||||
|
- `'Authorization: Bearer <YOUR_API_KEY>'`
|
||||||
|
|
||||||
|
##### Request example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --request GET \
|
||||||
|
--url http://{address}/api/v1/datasets/{dataset_id}/trace_raptor \
|
||||||
|
--header 'Authorization: Bearer <YOUR_API_KEY>'
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Request parameters
|
||||||
|
|
||||||
|
- `dataset_id`: (*Path parameter*)
|
||||||
|
The ID of the target dataset.
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
Success:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"code":0,
|
||||||
|
"data":{
|
||||||
|
"begin_at":"Wed, 12 Nov 2025 19:47:07 GMT",
|
||||||
|
"chunk_ids":"",
|
||||||
|
"create_date":"Wed, 12 Nov 2025 19:47:07 GMT",
|
||||||
|
"create_time":1762948027427,
|
||||||
|
"digest":"8b279a6248cb8fc6",
|
||||||
|
"doc_id":"44661c10bde211f0bc93c164a47ffc40",
|
||||||
|
"from_page":100000000,
|
||||||
|
"id":"50d3c31cbfbd11f0ba028f704583b57b",
|
||||||
|
"priority":0,
|
||||||
|
"process_duration":0.948244,
|
||||||
|
"progress":1.0,
|
||||||
|
"progress_msg":"19:47:07 created task raptor\n19:47:07 Task has been received.\n19:47:07 Processing...\n19:47:07 Processing...\n19:47:07 Indexing done (0.01s).\n19:47:07 Task done (0.29s)",
|
||||||
|
"retry_count":1,
|
||||||
|
"task_type":"raptor",
|
||||||
|
"to_page":100000000,
|
||||||
|
"update_date":"Wed, 12 Nov 2025 19:47:07 GMT",
|
||||||
|
"update_time":1762948027948
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Failure:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"code": 102,
|
||||||
|
"message": "Invalid Dataset ID"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## FILE MANAGEMENT WITHIN DATASET
|
## FILE MANAGEMENT WITHIN DATASET
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|||||||
@ -482,7 +482,7 @@ def tree_merge(bull, sections, depth):
|
|||||||
root = Node(level=0, depth=target_level, texts=[])
|
root = Node(level=0, depth=target_level, texts=[])
|
||||||
root.build_tree(lines)
|
root.build_tree(lines)
|
||||||
|
|
||||||
return [("\n").join(element) for element in root.get_tree() if element]
|
return [element for element in root.get_tree() if element]
|
||||||
|
|
||||||
def hierarchical_merge(bull, sections, depth):
|
def hierarchical_merge(bull, sections, depth):
|
||||||
|
|
||||||
|
|||||||
@ -16,14 +16,15 @@
|
|||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from common import create_dataset
|
from configs import DATASET_NAME_LIMIT, DEFAULT_PARSER_CONFIG, INVALID_API_TOKEN
|
||||||
from configs import DATASET_NAME_LIMIT, INVALID_API_TOKEN
|
|
||||||
from hypothesis import example, given, settings
|
from hypothesis import example, given, settings
|
||||||
from libs.auth import RAGFlowHttpApiAuth
|
from libs.auth import RAGFlowHttpApiAuth
|
||||||
from utils import encode_avatar
|
from utils import encode_avatar
|
||||||
from utils.file_utils import create_image_file
|
from utils.file_utils import create_image_file
|
||||||
from utils.hypothesis_utils import valid_names
|
from utils.hypothesis_utils import valid_names
|
||||||
from configs import DEFAULT_PARSER_CONFIG
|
|
||||||
|
from common import create_dataset
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures("clear_datasets")
|
@pytest.mark.usefixtures("clear_datasets")
|
||||||
class TestAuthorization:
|
class TestAuthorization:
|
||||||
@ -125,8 +126,8 @@ class TestDatasetCreate:
|
|||||||
assert res["code"] == 0, res
|
assert res["code"] == 0, res
|
||||||
|
|
||||||
res = create_dataset(HttpApiAuth, payload)
|
res = create_dataset(HttpApiAuth, payload)
|
||||||
assert res["code"] == 103, res
|
assert res["code"] == 0, res
|
||||||
assert res["message"] == f"Dataset name '{name}' already exists", res
|
assert res["data"]["name"] == name + "(1)", res
|
||||||
|
|
||||||
@pytest.mark.p3
|
@pytest.mark.p3
|
||||||
def test_name_case_insensitive(self, HttpApiAuth):
|
def test_name_case_insensitive(self, HttpApiAuth):
|
||||||
@ -137,8 +138,8 @@ class TestDatasetCreate:
|
|||||||
|
|
||||||
payload = {"name": name.lower()}
|
payload = {"name": name.lower()}
|
||||||
res = create_dataset(HttpApiAuth, payload)
|
res = create_dataset(HttpApiAuth, payload)
|
||||||
assert res["code"] == 103, res
|
assert res["code"] == 0, res
|
||||||
assert res["message"] == f"Dataset name '{name.lower()}' already exists", res
|
assert res["data"]["name"] == name.lower() + "(1)", res
|
||||||
|
|
||||||
@pytest.mark.p2
|
@pytest.mark.p2
|
||||||
def test_avatar(self, HttpApiAuth, tmp_path):
|
def test_avatar(self, HttpApiAuth, tmp_path):
|
||||||
|
|||||||
@ -17,13 +17,13 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|||||||
from operator import attrgetter
|
from operator import attrgetter
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from configs import DATASET_NAME_LIMIT, HOST_ADDRESS, INVALID_API_TOKEN
|
from configs import DATASET_NAME_LIMIT, DEFAULT_PARSER_CONFIG, HOST_ADDRESS, INVALID_API_TOKEN
|
||||||
from hypothesis import example, given, settings
|
from hypothesis import example, given, settings
|
||||||
from ragflow_sdk import DataSet, RAGFlow
|
from ragflow_sdk import DataSet, RAGFlow
|
||||||
from utils import encode_avatar
|
from utils import encode_avatar
|
||||||
from utils.file_utils import create_image_file
|
from utils.file_utils import create_image_file
|
||||||
from utils.hypothesis_utils import valid_names
|
from utils.hypothesis_utils import valid_names
|
||||||
from configs import DEFAULT_PARSER_CONFIG
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures("clear_datasets")
|
@pytest.mark.usefixtures("clear_datasets")
|
||||||
class TestAuthorization:
|
class TestAuthorization:
|
||||||
@ -95,9 +95,8 @@ class TestDatasetCreate:
|
|||||||
payload = {"name": name}
|
payload = {"name": name}
|
||||||
client.create_dataset(**payload)
|
client.create_dataset(**payload)
|
||||||
|
|
||||||
with pytest.raises(Exception) as excinfo:
|
dataset = client.create_dataset(**payload)
|
||||||
client.create_dataset(**payload)
|
assert dataset.name == name + "(1)", str(dataset)
|
||||||
assert str(excinfo.value) == f"Dataset name '{name}' already exists", str(excinfo.value)
|
|
||||||
|
|
||||||
@pytest.mark.p3
|
@pytest.mark.p3
|
||||||
def test_name_case_insensitive(self, client):
|
def test_name_case_insensitive(self, client):
|
||||||
@ -106,9 +105,8 @@ class TestDatasetCreate:
|
|||||||
client.create_dataset(**payload)
|
client.create_dataset(**payload)
|
||||||
|
|
||||||
payload = {"name": name.lower()}
|
payload = {"name": name.lower()}
|
||||||
with pytest.raises(Exception) as excinfo:
|
dataset = client.create_dataset(**payload)
|
||||||
client.create_dataset(**payload)
|
assert dataset.name == name.lower() + "(1)", str(dataset)
|
||||||
assert str(excinfo.value) == f"Dataset name '{name.lower()}' already exists", str(excinfo.value)
|
|
||||||
|
|
||||||
@pytest.mark.p2
|
@pytest.mark.p2
|
||||||
def test_avatar(self, client, tmp_path):
|
def test_avatar(self, client, tmp_path):
|
||||||
|
|||||||
Reference in New Issue
Block a user