mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feature/doc upload api add parent path 20251112 (#11231)
### What problem does this PR solve? Add the specified parent_path to the document upload api interface (#11230) ### Type of change - [x] New Feature (non-breaking change which adds functionality) Co-authored-by: virgilwong <hyhvirgil@gmail.com>
This commit is contained in:
@ -93,6 +93,10 @@ def upload(dataset_id, tenant_id):
|
|||||||
type: file
|
type: file
|
||||||
required: true
|
required: true
|
||||||
description: Document files to upload.
|
description: Document files to upload.
|
||||||
|
- in: formData
|
||||||
|
name: parent_path
|
||||||
|
type: string
|
||||||
|
description: Optional nested path under the parent folder. Uses '/' separators.
|
||||||
responses:
|
responses:
|
||||||
200:
|
200:
|
||||||
description: Successfully uploaded documents.
|
description: Successfully uploaded documents.
|
||||||
@ -151,7 +155,7 @@ def upload(dataset_id, tenant_id):
|
|||||||
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||||
if not e:
|
if not e:
|
||||||
raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
|
raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
|
||||||
err, files = FileService.upload_document(kb, file_objs, tenant_id)
|
err, files = FileService.upload_document(kb, file_objs, tenant_id, parent_path=request.form.get("parent_path"))
|
||||||
if err:
|
if err:
|
||||||
return get_result(message="\n".join(err), code=RetCode.SERVER_ERROR)
|
return get_result(message="\n".join(err), code=RetCode.SERVER_ERROR)
|
||||||
# rename key's name
|
# rename key's name
|
||||||
|
|||||||
@ -31,7 +31,7 @@ from common.misc_utils import get_uuid
|
|||||||
from common.constants import TaskStatus, FileSource, ParserType
|
from common.constants import TaskStatus, FileSource, ParserType
|
||||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||||
from api.db.services.task_service import TaskService
|
from api.db.services.task_service import TaskService
|
||||||
from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img
|
from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img, sanitize_path
|
||||||
from rag.llm.cv_model import GptV4
|
from rag.llm.cv_model import GptV4
|
||||||
from common import settings
|
from common import settings
|
||||||
|
|
||||||
@ -329,7 +329,7 @@ class FileService(CommonService):
|
|||||||
current_id = start_id
|
current_id = start_id
|
||||||
while current_id:
|
while current_id:
|
||||||
e, file = cls.get_by_id(current_id)
|
e, file = cls.get_by_id(current_id)
|
||||||
if file.parent_id != file.id and e:
|
if e and file.parent_id != file.id:
|
||||||
parent_folders.append(file)
|
parent_folders.append(file)
|
||||||
current_id = file.parent_id
|
current_id = file.parent_id
|
||||||
else:
|
else:
|
||||||
@ -423,13 +423,15 @@ class FileService(CommonService):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def upload_document(self, kb, file_objs, user_id, src="local"):
|
def upload_document(self, kb, file_objs, user_id, src="local", parent_path: str | None = None):
|
||||||
root_folder = self.get_root_folder(user_id)
|
root_folder = self.get_root_folder(user_id)
|
||||||
pf_id = root_folder["id"]
|
pf_id = root_folder["id"]
|
||||||
self.init_knowledgebase_docs(pf_id, user_id)
|
self.init_knowledgebase_docs(pf_id, user_id)
|
||||||
kb_root_folder = self.get_kb_folder(user_id)
|
kb_root_folder = self.get_kb_folder(user_id)
|
||||||
kb_folder = self.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"])
|
kb_folder = self.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"])
|
||||||
|
|
||||||
|
safe_parent_path = sanitize_path(parent_path)
|
||||||
|
|
||||||
err, files = [], []
|
err, files = [], []
|
||||||
for file in file_objs:
|
for file in file_objs:
|
||||||
try:
|
try:
|
||||||
@ -439,7 +441,7 @@ class FileService(CommonService):
|
|||||||
if filetype == FileType.OTHER.value:
|
if filetype == FileType.OTHER.value:
|
||||||
raise RuntimeError("This type of file has not been supported yet!")
|
raise RuntimeError("This type of file has not been supported yet!")
|
||||||
|
|
||||||
location = filename
|
location = filename if not safe_parent_path else f"{safe_parent_path}/{filename}"
|
||||||
while settings.STORAGE_IMPL.obj_exist(kb.id, location):
|
while settings.STORAGE_IMPL.obj_exist(kb.id, location):
|
||||||
location += "_"
|
location += "_"
|
||||||
|
|
||||||
|
|||||||
@ -164,3 +164,23 @@ def read_potential_broken_pdf(blob):
|
|||||||
return repaired
|
return repaired
|
||||||
|
|
||||||
return blob
|
return blob
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_path(raw_path: str | None) -> str:
|
||||||
|
"""Normalize and sanitize a user-provided path segment.
|
||||||
|
|
||||||
|
- Converts backslashes to forward slashes
|
||||||
|
- Strips leading/trailing slashes
|
||||||
|
- Removes '.' and '..' segments
|
||||||
|
- Restricts characters to A-Za-z0-9, underscore, dash, and '/'
|
||||||
|
"""
|
||||||
|
if not raw_path:
|
||||||
|
return ""
|
||||||
|
backslash_re = re.compile(r"[\\]+")
|
||||||
|
unsafe_re = re.compile(r"[^A-Za-z0-9_\-/]")
|
||||||
|
normalized = backslash_re.sub("/", raw_path)
|
||||||
|
normalized = normalized.strip("/")
|
||||||
|
parts = [seg for seg in normalized.split("/") if seg and seg not in (".", "..")]
|
||||||
|
sanitized = "/".join(parts)
|
||||||
|
sanitized = unsafe_re.sub("", sanitized)
|
||||||
|
return sanitized
|
||||||
|
|||||||
Reference in New Issue
Block a user