Fix: Move pagerank field from create to update dataset API (#8217)

### What problem does this PR solve?

- Remove pagerank from CreateDatasetReq and add to UpdateDatasetReq
- Add pagerank update logic in dataset update endpoint
- Update API documentation to reflect changes
- Modify related test cases and SDK references

#8208

This change makes pagerank a mutable property that can only be set after
dataset creation, and only when using elasticsearch as the doc engine.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Liu An
2025-06-12 15:47:49 +08:00
committed by GitHub
parent d0c5ff04a6
commit 7fbbc9650d
9 changed files with 78 additions and 109 deletions

View File

@ -16,10 +16,12 @@
import logging
import os
from flask import request
from peewee import OperationalError
from api import settings
from api.db import FileSource, StatusEnum
from api.db.db_models import File
from api.db.services.document_service import DocumentService
@ -48,6 +50,8 @@ from api.utils.validation_utils import (
validate_and_parse_json_request,
validate_and_parse_request_args,
)
from rag.nlp import search
from rag.settings import PAGERANK_FLD
@manager.route("/datasets", methods=["POST"]) # noqa: F821
@ -97,9 +101,6 @@ def create(tenant_id):
"picture", "presentation", "qa", "table", "tag"
]
description: Chunking method.
pagerank:
type: integer
description: Set page rank.
parser_config:
type: object
description: Parser configuration.
@ -352,6 +353,16 @@ def update(tenant_id, dataset_id):
if not ok:
return err
if "pagerank" in req and req["pagerank"] != kb.pagerank:
if os.environ.get("DOC_ENGINE", "elasticsearch") == "infinity":
return get_error_argument_result(message="'pagerank' can only be set when doc_engine is elasticsearch")
if req["pagerank"] > 0:
settings.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: req["pagerank"]}, search.index_name(kb.tenant_id), kb.id)
else:
# Elasticsearch requires PAGERANK_FLD be non-zero!
settings.docStoreConn.update({"exists": PAGERANK_FLD}, {"remove": PAGERANK_FLD}, search.index_name(kb.tenant_id), kb.id)
if not KnowledgebaseService.update_by_id(kb.id, req):
return get_error_data_result(message="Update dataset error.(Database error)")

View File

@ -383,7 +383,6 @@ class CreateDatasetReq(Base):
embedding_model: Annotated[str, StringConstraints(strip_whitespace=True, max_length=255), Field(default="", serialization_alias="embd_id")]
permission: PermissionEnum = Field(default=PermissionEnum.me, min_length=1, max_length=16)
chunk_method: ChunkMethodnEnum = Field(default=ChunkMethodnEnum.naive, min_length=1, max_length=32, serialization_alias="parser_id")
pagerank: int = Field(default=0, ge=0, le=100)
parser_config: ParserConfig | None = Field(default=None)
@field_validator("avatar")
@ -539,6 +538,7 @@ class CreateDatasetReq(Base):
class UpdateDatasetReq(CreateDatasetReq):
dataset_id: str = Field(...)
name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=DATASET_NAME_LIMIT), Field(default="")]
pagerank: int = Field(default=0, ge=0, le=100)
@field_validator("dataset_id", mode="before")
@classmethod