mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: Move pagerank field from create to update dataset API (#8217)
### What problem does this PR solve? - Remove pagerank from CreateDatasetReq and add to UpdateDatasetReq - Add pagerank update logic in dataset update endpoint - Update API documentation to reflect changes - Modify related test cases and SDK references #8208 This change makes pagerank a mutable property that can only be set after dataset creation, and only when using elasticsearch as the doc engine. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -16,10 +16,12 @@
|
||||
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
from flask import request
|
||||
from peewee import OperationalError
|
||||
|
||||
from api import settings
|
||||
from api.db import FileSource, StatusEnum
|
||||
from api.db.db_models import File
|
||||
from api.db.services.document_service import DocumentService
|
||||
@ -48,6 +50,8 @@ from api.utils.validation_utils import (
|
||||
validate_and_parse_json_request,
|
||||
validate_and_parse_request_args,
|
||||
)
|
||||
from rag.nlp import search
|
||||
from rag.settings import PAGERANK_FLD
|
||||
|
||||
|
||||
@manager.route("/datasets", methods=["POST"]) # noqa: F821
|
||||
@ -97,9 +101,6 @@ def create(tenant_id):
|
||||
"picture", "presentation", "qa", "table", "tag"
|
||||
]
|
||||
description: Chunking method.
|
||||
pagerank:
|
||||
type: integer
|
||||
description: Set page rank.
|
||||
parser_config:
|
||||
type: object
|
||||
description: Parser configuration.
|
||||
@ -352,6 +353,16 @@ def update(tenant_id, dataset_id):
|
||||
if not ok:
|
||||
return err
|
||||
|
||||
if "pagerank" in req and req["pagerank"] != kb.pagerank:
|
||||
if os.environ.get("DOC_ENGINE", "elasticsearch") == "infinity":
|
||||
return get_error_argument_result(message="'pagerank' can only be set when doc_engine is elasticsearch")
|
||||
|
||||
if req["pagerank"] > 0:
|
||||
settings.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: req["pagerank"]}, search.index_name(kb.tenant_id), kb.id)
|
||||
else:
|
||||
# Elasticsearch requires PAGERANK_FLD be non-zero!
|
||||
settings.docStoreConn.update({"exists": PAGERANK_FLD}, {"remove": PAGERANK_FLD}, search.index_name(kb.tenant_id), kb.id)
|
||||
|
||||
if not KnowledgebaseService.update_by_id(kb.id, req):
|
||||
return get_error_data_result(message="Update dataset error.(Database error)")
|
||||
|
||||
|
||||
@ -383,7 +383,6 @@ class CreateDatasetReq(Base):
|
||||
embedding_model: Annotated[str, StringConstraints(strip_whitespace=True, max_length=255), Field(default="", serialization_alias="embd_id")]
|
||||
permission: PermissionEnum = Field(default=PermissionEnum.me, min_length=1, max_length=16)
|
||||
chunk_method: ChunkMethodnEnum = Field(default=ChunkMethodnEnum.naive, min_length=1, max_length=32, serialization_alias="parser_id")
|
||||
pagerank: int = Field(default=0, ge=0, le=100)
|
||||
parser_config: ParserConfig | None = Field(default=None)
|
||||
|
||||
@field_validator("avatar")
|
||||
@ -539,6 +538,7 @@ class CreateDatasetReq(Base):
|
||||
class UpdateDatasetReq(CreateDatasetReq):
|
||||
dataset_id: str = Field(...)
|
||||
name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=DATASET_NAME_LIMIT), Field(default="")]
|
||||
pagerank: int = Field(default=0, ge=0, le=100)
|
||||
|
||||
@field_validator("dataset_id", mode="before")
|
||||
@classmethod
|
||||
|
||||
Reference in New Issue
Block a user