mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Storage: Support the s3, azure blob as the object storage of ragflow. (#2278)
### What problem does this PR solve? issue: https://github.com/infiniflow/ragflow/issues/2277 _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
@ -39,7 +39,7 @@ from itsdangerous import URLSafeTimedSerializer
|
||||
|
||||
from api.utils.file_utils import filename_type, thumbnail
|
||||
from rag.nlp import keyword_extraction
|
||||
from rag.utils.minio_conn import MINIO
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
|
||||
from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
|
||||
from agent.canvas import Canvas
|
||||
@ -427,10 +427,10 @@ def upload():
|
||||
retmsg="This type of file has not been supported yet!")
|
||||
|
||||
location = filename
|
||||
while MINIO.obj_exist(kb_id, location):
|
||||
while STORAGE_IMPL.obj_exist(kb_id, location):
|
||||
location += "_"
|
||||
blob = request.files['file'].read()
|
||||
MINIO.put(kb_id, location, blob)
|
||||
STORAGE_IMPL.put(kb_id, location, blob)
|
||||
doc = {
|
||||
"id": get_uuid(),
|
||||
"kb_id": kb.id,
|
||||
@ -650,7 +650,7 @@ def document_rm():
|
||||
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
|
||||
File2DocumentService.delete_by_document_id(doc_id)
|
||||
|
||||
MINIO.rm(b, n)
|
||||
STORAGE_IMPL.rm(b, n)
|
||||
except Exception as e:
|
||||
errors += str(e)
|
||||
|
||||
@ -723,7 +723,7 @@ def completion_faq():
|
||||
if ans["reference"]["chunks"][chunk_idx]["img_id"]:
|
||||
try:
|
||||
bkt, nm = ans["reference"]["chunks"][chunk_idx]["img_id"].split("-")
|
||||
response = MINIO.get(bkt, nm)
|
||||
response = STORAGE_IMPL.get(bkt, nm)
|
||||
data_type_picture["url"] = base64.b64encode(response).decode('utf-8')
|
||||
data.append(data_type_picture)
|
||||
break
|
||||
|
||||
@ -42,7 +42,7 @@ from api.utils.file_utils import filename_type, thumbnail
|
||||
from rag.app import book, laws, manual, naive, one, paper, presentation, qa, resume, table, picture, audio, email
|
||||
from rag.nlp import search
|
||||
from rag.utils.es_conn import ELASTICSEARCH
|
||||
from rag.utils.minio_conn import MINIO
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
|
||||
MAXIMUM_OF_UPLOADING_FILES = 256
|
||||
|
||||
@ -352,7 +352,7 @@ def upload_documents(dataset_id):
|
||||
|
||||
# upload to the minio
|
||||
location = filename
|
||||
while MINIO.obj_exist(dataset_id, location):
|
||||
while STORAGE_IMPL.obj_exist(dataset_id, location):
|
||||
location += "_"
|
||||
|
||||
blob = file.read()
|
||||
@ -361,7 +361,7 @@ def upload_documents(dataset_id):
|
||||
if blob == b'':
|
||||
warnings.warn(f"[WARNING]: The content of the file {filename} is empty.")
|
||||
|
||||
MINIO.put(dataset_id, location, blob)
|
||||
STORAGE_IMPL.put(dataset_id, location, blob)
|
||||
|
||||
doc = {
|
||||
"id": get_uuid(),
|
||||
@ -441,7 +441,7 @@ def delete_document(document_id, dataset_id): # string
|
||||
File2DocumentService.delete_by_document_id(document_id)
|
||||
|
||||
# delete it from minio
|
||||
MINIO.rm(dataset_id, location)
|
||||
STORAGE_IMPL.rm(dataset_id, location)
|
||||
except Exception as e:
|
||||
errors += str(e)
|
||||
if errors:
|
||||
@ -596,7 +596,7 @@ def download_document(dataset_id, document_id):
|
||||
|
||||
# The process of downloading
|
||||
doc_id, doc_location = File2DocumentService.get_minio_address(doc_id=document_id) # minio address
|
||||
file_stream = MINIO.get(doc_id, doc_location)
|
||||
file_stream = STORAGE_IMPL.get(doc_id, doc_location)
|
||||
if not file_stream:
|
||||
return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
|
||||
|
||||
@ -737,7 +737,7 @@ def parsing_document_internal(id):
|
||||
doc_id = doc_attributes["id"]
|
||||
|
||||
bucket, doc_name = File2DocumentService.get_minio_address(doc_id=doc_id)
|
||||
binary = MINIO.get(bucket, doc_name)
|
||||
binary = STORAGE_IMPL.get(bucket, doc_name)
|
||||
parser_name = doc_attributes["parser_id"]
|
||||
if binary:
|
||||
res = doc_parse(binary, doc_name, parser_name, tenant_id, doc_id)
|
||||
|
||||
@ -48,7 +48,7 @@ from api.db import FileType, TaskStatus, ParserType, FileSource, LLMType
|
||||
from api.db.services.document_service import DocumentService, doc_upload_and_parse
|
||||
from api.settings import RetCode, stat_logger
|
||||
from api.utils.api_utils import get_json_result
|
||||
from rag.utils.minio_conn import MINIO
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
from api.utils.file_utils import filename_type, thumbnail, get_project_base_directory
|
||||
from api.utils.web_utils import html2pdf, is_valid_url
|
||||
|
||||
@ -118,9 +118,9 @@ def web_crawl():
|
||||
raise RuntimeError("This type of file has not been supported yet!")
|
||||
|
||||
location = filename
|
||||
while MINIO.obj_exist(kb_id, location):
|
||||
while STORAGE_IMPL.obj_exist(kb_id, location):
|
||||
location += "_"
|
||||
MINIO.put(kb_id, location, blob)
|
||||
STORAGE_IMPL.put(kb_id, location, blob)
|
||||
doc = {
|
||||
"id": get_uuid(),
|
||||
"kb_id": kb.id,
|
||||
@ -307,7 +307,7 @@ def rm():
|
||||
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
|
||||
File2DocumentService.delete_by_document_id(doc_id)
|
||||
|
||||
MINIO.rm(b, n)
|
||||
STORAGE_IMPL.rm(b, n)
|
||||
except Exception as e:
|
||||
errors += str(e)
|
||||
|
||||
@ -394,7 +394,7 @@ def get(doc_id):
|
||||
return get_data_error_result(retmsg="Document not found!")
|
||||
|
||||
b, n = File2DocumentService.get_minio_address(doc_id=doc_id)
|
||||
response = flask.make_response(MINIO.get(b, n))
|
||||
response = flask.make_response(STORAGE_IMPL.get(b, n))
|
||||
|
||||
ext = re.search(r"\.([^.]+)$", doc.name)
|
||||
if ext:
|
||||
@ -458,7 +458,7 @@ def change_parser():
|
||||
def get_image(image_id):
|
||||
try:
|
||||
bkt, nm = image_id.split("-")
|
||||
response = flask.make_response(MINIO.get(bkt, nm))
|
||||
response = flask.make_response(STORAGE_IMPL.get(bkt, nm))
|
||||
response.headers.set('Content-Type', 'image/JPEG')
|
||||
return response
|
||||
except Exception as e:
|
||||
|
||||
@ -34,7 +34,7 @@ from api.utils.api_utils import get_json_result
|
||||
from api.utils.file_utils import filename_type
|
||||
from rag.nlp import search
|
||||
from rag.utils.es_conn import ELASTICSEARCH
|
||||
from rag.utils.minio_conn import MINIO
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
|
||||
|
||||
@manager.route('/upload', methods=['POST'])
|
||||
@ -98,7 +98,7 @@ def upload():
|
||||
# file type
|
||||
filetype = filename_type(file_obj_names[file_len - 1])
|
||||
location = file_obj_names[file_len - 1]
|
||||
while MINIO.obj_exist(last_folder.id, location):
|
||||
while STORAGE_IMPL.obj_exist(last_folder.id, location):
|
||||
location += "_"
|
||||
blob = file_obj.read()
|
||||
filename = duplicate_name(
|
||||
@ -260,7 +260,7 @@ def rm():
|
||||
e, file = FileService.get_by_id(inner_file_id)
|
||||
if not e:
|
||||
return get_data_error_result(retmsg="File not found!")
|
||||
MINIO.rm(file.parent_id, file.location)
|
||||
STORAGE_IMPL.rm(file.parent_id, file.location)
|
||||
FileService.delete_folder_by_pf_id(current_user.id, file_id)
|
||||
else:
|
||||
if not FileService.delete(file):
|
||||
@ -333,7 +333,7 @@ def get(file_id):
|
||||
if not e:
|
||||
return get_data_error_result(retmsg="Document not found!")
|
||||
b, n = File2DocumentService.get_minio_address(file_id=file_id)
|
||||
response = flask.make_response(MINIO.get(b, n))
|
||||
response = flask.make_response(STORAGE_IMPL.get(b, n))
|
||||
ext = re.search(r"\.([^.]+)$", file.name)
|
||||
if ext:
|
||||
if file.type == FileType.VISUAL.value:
|
||||
|
||||
@ -22,7 +22,7 @@ from api.utils.api_utils import get_json_result
|
||||
from api.versions import get_rag_version
|
||||
from rag.settings import SVR_QUEUE_NAME
|
||||
from rag.utils.es_conn import ELASTICSEARCH
|
||||
from rag.utils.minio_conn import MINIO
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
from timeit import default_timer as timer
|
||||
|
||||
from rag.utils.redis_conn import REDIS_CONN
|
||||
@ -47,7 +47,7 @@ def status():
|
||||
|
||||
st = timer()
|
||||
try:
|
||||
MINIO.health()
|
||||
STORAGE_IMPL.health()
|
||||
res["minio"] = {"status": "green", "elapsed": "{:.1f}".format((timer() - st)*1000.)}
|
||||
except Exception as e:
|
||||
res["minio"] = {"status": "red", "elapsed": "{:.1f}".format((timer() - st)*1000.), "error": str(e)}
|
||||
|
||||
Reference in New Issue
Block a user