diff --git a/api/apps/sdk/files.py b/api/apps/sdk/files.py index d8fd84aed..84491a02c 100644 --- a/api/apps/sdk/files.py +++ b/api/apps/sdk/files.py @@ -3,9 +3,11 @@ import re import flask from flask import request +from pathlib import Path from api.db.services.document_service import DocumentService from api.db.services.file2document_service import File2DocumentService +from api.db.services.knowledgebase_service import KnowledgebaseService from api.utils.api_utils import server_error_response, token_required from api.utils import get_uuid from api.db import FileType @@ -666,3 +668,71 @@ def move(tenant_id): return get_json_result(data=True) except Exception as e: return server_error_response(e) + +@manager.route('/file/convert', methods=['POST']) # noqa: F821 +@token_required +def convert(tenant_id): + req = request.json + kb_ids = req["kb_ids"] + file_ids = req["file_ids"] + file2documents = [] + + try: + files = FileService.get_by_ids(file_ids) + files_set = dict({file.id: file for file in files}) + for file_id in file_ids: + file = files_set[file_id] + if not file: + return get_json_result(message="File not found!", code=404) + file_ids_list = [file_id] + if file.type == FileType.FOLDER.value: + file_ids_list = FileService.get_all_innermost_file_ids(file_id, []) + for id in file_ids_list: + informs = File2DocumentService.get_by_file_id(id) + # delete + for inform in informs: + doc_id = inform.document_id + e, doc = DocumentService.get_by_id(doc_id) + if not e: + return get_json_result(message="Document not found!", code=404) + tenant_id = DocumentService.get_tenant_id(doc_id) + if not tenant_id: + return get_json_result(message="Tenant not found!", code=404) + if not DocumentService.remove_document(doc, tenant_id): + return get_json_result( + message="Database error (Document removal)!", code=404) + File2DocumentService.delete_by_file_id(id) + + # insert + for kb_id in kb_ids: + e, kb = KnowledgebaseService.get_by_id(kb_id) + if not e: + return get_json_result( + message="Can't find this knowledgebase!", code=404) + e, file = FileService.get_by_id(id) + if not e: + return get_json_result( + message="Can't find this file!", code=404) + + doc = DocumentService.insert({ + "id": get_uuid(), + "kb_id": kb.id, + "parser_id": FileService.get_parser(file.type, file.name, kb.parser_id), + "parser_config": kb.parser_config, + "created_by": tenant_id, + "type": file.type, + "name": file.name, + "suffix": Path(file.name).suffix.lstrip("."), + "location": file.location, + "size": file.size + }) + file2document = File2DocumentService.insert({ + "id": get_uuid(), + "file_id": id, + "document_id": doc.id, + }) + + file2documents.append(file2document.to_json()) + return get_json_result(data=file2documents) + except Exception as e: + return server_error_response(e) \ No newline at end of file