Storage: Support the s3, azure blob as the object storage of ragflow. (#2278)

### What problem does this PR solve? issue: https://github.com/infiniflow/ragflow/issues/2277 _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
2026-01-30 23:26:36 +08:00 · 2024-09-09 09:41:14 +08:00
parent e85fea31a8
commit 8dd3adc443
17 changed files with 395 additions and 38 deletions
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@ -34,7 +34,7 @@ from api.utils.file_utils import get_project_base_directory
 from graphrag.mind_map_extractor import MindMapExtractor
 from rag.settings import SVR_QUEUE_NAME
 from rag.utils.es_conn import ELASTICSEARCH
-from rag.utils.minio_conn import MINIO
+from rag.utils.storage_factory import STORAGE_IMPL
 from rag.nlp import search, rag_tokenizer

 from api.db import FileType, TaskStatus, ParserType, LLMType
@ -473,7 +473,7 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
            else:
                d["image"].save(output_buffer, format='JPEG')

-            MINIO.put(kb.id, d["_id"], output_buffer.getvalue())
+            STORAGE_IMPL.put(kb.id, d["_id"], output_buffer.getvalue())
            d["img_id"] = "{}-{}".format(kb.id, d["_id"])
            del d["image"]
            docs.append(d)
--- a/api/db/services/file_service.py
+++ b/api/db/services/file_service.py
@ -27,7 +27,7 @@ from api.db.services.document_service import DocumentService
 from api.db.services.file2document_service import File2DocumentService
 from api.utils import get_uuid
 from api.utils.file_utils import filename_type, thumbnail
-from rag.utils.minio_conn import MINIO
+from rag.utils.storage_factory import STORAGE_IMPL


 class FileService(CommonService):
@ -350,10 +350,10 @@ class FileService(CommonService):
                    raise RuntimeError("This type of file has not been supported yet!")

                location = filename
-                while MINIO.obj_exist(kb.id, location):
+                while STORAGE_IMPL.obj_exist(kb.id, location):
                    location += "_"
                blob = file.read()
-                MINIO.put(kb.id, location, blob)
+                STORAGE_IMPL.put(kb.id, location, blob)
                doc = {
                    "id": get_uuid(),
                    "kb_id": kb.id,
--- a/api/db/services/task_service.py
+++ b/api/db/services/task_service.py
@ -27,7 +27,7 @@ from api.db.services.document_service import DocumentService
 from api.utils import current_timestamp, get_uuid
 from deepdoc.parser.excel_parser import RAGFlowExcelParser
 from rag.settings import SVR_QUEUE_NAME
-from rag.utils.minio_conn import MINIO
+from rag.utils.storage_factory import STORAGE_IMPL
 from rag.utils.redis_conn import REDIS_CONN


@ -143,7 +143,7 @@ def queue_tasks(doc, bucket, name):
    tsks = []

    if doc["type"] == FileType.PDF.value:
-        file_bin = MINIO.get(bucket, name)
+        file_bin = STORAGE_IMPL.get(bucket, name)
        do_layout = doc["parser_config"].get("layout_recognize", True)
        pages = PdfParser.total_page_number(doc["name"], file_bin)
        page_size = doc["parser_config"].get("task_page_size", 12)
@ -169,7 +169,7 @@ def queue_tasks(doc, bucket, name):
                tsks.append(task)

    elif doc["parser_id"] == "table":
-        file_bin = MINIO.get(bucket, name)
+        file_bin = STORAGE_IMPL.get(bucket, name)
        rn = RAGFlowExcelParser.row_number(
            doc["name"], file_bin)
        for i in range(0, rn, 3000):