Files
ragflow/docker/service_conf.yaml.template
Andrea Bugeja 74afb8d710 feat: Add Single Bucket Mode for MinIO/S3 (#11416)
## Overview

This PR adds support for **Single Bucket Mode** in RAGFlow, allowing
users to configure MinIO/S3 to use a single bucket with a directory
structure instead of creating multiple buckets per Knowledge Base and
user folder.

## Problem Statement

The current implementation creates one bucket per Knowledge Base and one
bucket per user folder, which can be problematic when:
- Cloud providers charge per bucket
- IAM policies restrict bucket creation
- Organizations want centralized data management in a single bucket

## Solution

Added a `prefix_path` configuration option to the MinIO connector that
enables:
- Using a single bucket with directory-based organization
- Backward compatibility with existing multi-bucket deployments
- Support for MinIO, AWS S3, and other S3-compatible storage backends

## Changes

- **`rag/utils/minio_conn.py`**: Enhanced MinIO connector to support
single bucket mode with prefix paths
- **`conf/service_conf.yaml`**: Added new configuration options
(`bucket` and `prefix_path`)
- **`docker/service_conf.yaml.template`**: Updated template with single
bucket configuration examples
- **`docker/.env.single-bucket-example`**: Added example environment
variables for single bucket setup
- **`docs/single-bucket-mode.md`**: Comprehensive documentation covering
usage, migration, and troubleshooting

## Configuration Example

```yaml
minio:
  user: "access-key"
  password: "secret-key"
  host: "minio.example.com:443"
  bucket: "ragflow-bucket"      # Single bucket name
  prefix_path: "ragflow"         # Optional prefix path
```

## Backward Compatibility

 Fully backward compatible - existing deployments continue to work
without any changes
- If `bucket` is not configured, uses default multi-bucket behavior
- If `bucket` is configured without `prefix_path`, uses bucket root
- If both are configured, uses `bucket/prefix_path/` structure

## Testing

- Tested with MinIO (local and cloud)
- Verified backward compatibility with existing multi-bucket mode
- Validated IAM policy restrictions work correctly

## Documentation

Included comprehensive documentation in `docs/single-bucket-mode.md`
covering:
- Configuration examples
- Migration guide from multi-bucket to single-bucket mode
- IAM policy examples
- Troubleshooting guide

---

**Related Issue**: Addresses use cases where bucket creation is
restricted or costly
2025-12-11 19:22:47 +08:00

156 lines
4.6 KiB
Plaintext

ragflow:
host: ${RAGFLOW_HOST:-0.0.0.0}
http_port: 9380
admin:
host: ${RAGFLOW_HOST:-0.0.0.0}
http_port: 9381
mysql:
name: '${MYSQL_DBNAME:-rag_flow}'
user: '${MYSQL_USER:-root}'
password: '${MYSQL_PASSWORD:-infini_rag_flow}'
host: '${MYSQL_HOST:-mysql}'
port: 3306
max_connections: 900
stale_timeout: 300
max_allowed_packet: ${MYSQL_MAX_PACKET:-1073741824}
minio:
user: '${MINIO_USER:-rag_flow}'
password: '${MINIO_PASSWORD:-infini_rag_flow}'
host: '${MINIO_HOST:-minio}:9000'
bucket: '${MINIO_BUCKET:-}'
prefix_path: '${MINIO_PREFIX_PATH:-}'
es:
hosts: 'http://${ES_HOST:-es01}:9200'
username: '${ES_USER:-elastic}'
password: '${ELASTIC_PASSWORD:-infini_rag_flow}'
os:
hosts: 'http://${OS_HOST:-opensearch01}:9201'
username: '${OS_USER:-admin}'
password: '${OPENSEARCH_PASSWORD:-infini_rag_flow_OS_01}'
infinity:
uri: '${INFINITY_HOST:-infinity}:23817'
db_name: 'default_db'
oceanbase:
scheme: 'oceanbase' # set 'mysql' to create connection using mysql config
config:
db_name: '${OCEANBASE_DOC_DBNAME:-test}'
user: '${OCEANBASE_USER:-root@ragflow}'
password: '${OCEANBASE_PASSWORD:-infini_rag_flow}'
host: '${OCEANBASE_HOST:-oceanbase}'
port: ${OCEANBASE_PORT:-2881}
redis:
db: 1
username: '${REDIS_USERNAME:-}'
password: '${REDIS_PASSWORD:-infini_rag_flow}'
host: '${REDIS_HOST:-redis}:6379'
user_default_llm:
default_models:
embedding_model:
api_key: 'xxx'
base_url: 'http://${TEI_HOST}:80'
# postgres:
# name: '${POSTGRES_DBNAME:-rag_flow}'
# user: '${POSTGRES_USER:-rag_flow}'
# password: '${POSTGRES_PASSWORD:-infini_rag_flow}'
# host: '${POSTGRES_HOST:-postgres}'
# port: 5432
# max_connections: 100
# stale_timeout: 30
# s3:
# access_key: 'access_key'
# secret_key: 'secret_key'
# region: 'region'
# endpoint_url: 'endpoint_url'
# bucket: 'bucket'
# prefix_path: 'prefix_path'
# signature_version: 'v4'
# addressing_style: 'path'
# oss:
# access_key: '${ACCESS_KEY}'
# secret_key: '${SECRET_KEY}'
# endpoint_url: '${ENDPOINT}'
# region: '${REGION}'
# bucket: '${BUCKET}'
# prefix_path: '${OSS_PREFIX_PATH}'
# azure:
# auth_type: 'sas'
# container_url: 'container_url'
# sas_token: 'sas_token'
# azure:
# auth_type: 'spn'
# account_url: 'account_url'
# client_id: 'client_id'
# secret: 'secret'
# tenant_id: 'tenant_id'
# container_name: 'container_name'
# The OSS object storage uses the MySQL configuration above by default. If you need to switch to another object storage service, please uncomment and configure the following parameters.
# opendal:
# scheme: 'mysql' # Storage type, such as s3, oss, azure, etc.
# config:
# oss_table: 'opendal_storage'
# user_default_llm:
# factory: 'BAAI'
# api_key: 'backup'
# base_url: 'backup_base_url'
# default_models:
# chat_model:
# name: 'qwen2.5-7b-instruct'
# factory: 'xxxx'
# api_key: 'xxxx'
# base_url: 'https://api.xx.com'
# embedding_model:
# name: 'bge-m3'
# rerank_model: 'bge-reranker-v2'
# asr_model:
# model: 'whisper-large-v3' # alias of name
# image2text_model: ''
# oauth:
# oauth2:
# display_name: "OAuth2"
# client_id: "your_client_id"
# client_secret: "your_client_secret"
# authorization_url: "https://your-oauth-provider.com/oauth/authorize"
# token_url: "https://your-oauth-provider.com/oauth/token"
# userinfo_url: "https://your-oauth-provider.com/oauth/userinfo"
# redirect_uri: "https://your-app.com/v1/user/oauth/callback/oauth2"
# oidc:
# display_name: "OIDC"
# client_id: "your_client_id"
# client_secret: "your_client_secret"
# issuer: "https://your-oauth-provider.com/oidc"
# scope: "openid email profile"
# redirect_uri: "https://your-app.com/v1/user/oauth/callback/oidc"
# github:
# type: "github"
# icon: "github"
# display_name: "Github"
# client_id: "your_client_id"
# client_secret: "your_client_secret"
# redirect_uri: "https://your-app.com/v1/user/oauth/callback/github"
# authentication:
# client:
# switch: false
# http_app_key:
# http_secret_key:
# site:
# switch: false
# permission:
# switch: false
# component: false
# dataset: false
# smtp:
# mail_server: ""
# mail_port: 465
# mail_use_ssl: true
# mail_use_tls: false
# mail_username: ""
# mail_password: ""
# mail_default_sender:
# - "RAGFlow" # display name
# - "" # sender email address
# mail_frontend_url: "https://your-frontend.example.com"
# tcadp_config:
# secret_id: '${TENCENT_SECRET_ID}'
# secret_key: '${TENCENT_SECRET_KEY}'
# region: '${TENCENT_REGION}'