Storage: Support the s3, azure blob as the object storage of ragflow. (#2278)

### What problem does this PR solve? issue: https://github.com/infiniflow/ragflow/issues/2277 _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
2026-01-30 15:16:45 +08:00 · 2024-09-09 09:41:14 +08:00
parent e85fea31a8
commit 8dd3adc443
17 changed files with 395 additions and 38 deletions
--- a/api/apps/dataset_api.py
+++ b/api/apps/dataset_api.py
@ -42,7 +42,7 @@ from api.utils.file_utils import filename_type, thumbnail
 from rag.app import book, laws, manual, naive, one, paper, presentation, qa, resume, table, picture, audio, email
 from rag.nlp import search
 from rag.utils.es_conn import ELASTICSEARCH
-from rag.utils.minio_conn import MINIO
+from rag.utils.storage_factory import STORAGE_IMPL

 MAXIMUM_OF_UPLOADING_FILES = 256

@ -352,7 +352,7 @@ def upload_documents(dataset_id):

            # upload to the minio
            location = filename
-            while MINIO.obj_exist(dataset_id, location):
+            while STORAGE_IMPL.obj_exist(dataset_id, location):
                location += "_"

            blob = file.read()
@ -361,7 +361,7 @@ def upload_documents(dataset_id):
            if blob == b'':
                warnings.warn(f"[WARNING]: The content of the file {filename} is empty.")

-            MINIO.put(dataset_id, location, blob)
+            STORAGE_IMPL.put(dataset_id, location, blob)

            doc = {
                "id": get_uuid(),
@ -441,7 +441,7 @@ def delete_document(document_id, dataset_id):  # string
        File2DocumentService.delete_by_document_id(document_id)

        # delete it from minio
-        MINIO.rm(dataset_id, location)
+        STORAGE_IMPL.rm(dataset_id, location)
    except Exception as e:
        errors += str(e)
    if errors:
@ -596,7 +596,7 @@ def download_document(dataset_id, document_id):

        # The process of downloading
        doc_id, doc_location = File2DocumentService.get_minio_address(doc_id=document_id)  # minio address
-        file_stream = MINIO.get(doc_id, doc_location)
+        file_stream = STORAGE_IMPL.get(doc_id, doc_location)
        if not file_stream:
            return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)

@ -737,7 +737,7 @@ def parsing_document_internal(id):
        doc_id = doc_attributes["id"]

        bucket, doc_name = File2DocumentService.get_minio_address(doc_id=doc_id)
-        binary = MINIO.get(bucket, doc_name)
+        binary = STORAGE_IMPL.get(bucket, doc_name)
        parser_name = doc_attributes["parser_id"]
        if binary:
            res = doc_parse(binary, doc_name, parser_name, tenant_id, doc_id)