mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 04:22:28 +08:00
Feat: Add the migration script and its doc, added backup as default… (#8245)
### What problem does this PR solve? This PR adds a data backup and migration solution for RAGFlow Docker Compose deployments. Currently, users lack a standardized way to backup and restore RAGFlow data volumes (MySQL, MinIO, Redis, Elasticsearch), which is essential for data safety and environment migration. **Solution:** - **Migration Script** (`docker/migration.sh`) - Automates backup/restore operations for all RAGFlow data volumes - **Documentation** (`docs/guides/migration/migrate_from_docker_compose.md`) - Usage guide and best practices - **Safety Features** - Container conflict detection and user confirmations to prevent data loss ### Type of change - [x] New Feature (non-breaking change which adds functionality) - [x] Documentation Update Co-authored-by: treedy <treedy2022@icloud.com>
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@ -193,3 +193,5 @@ dist
|
||||
# SvelteKit build / generate output
|
||||
.svelte-kit
|
||||
|
||||
# Default backup dir
|
||||
backup
|
||||
|
||||
298
docker/migration.sh
Normal file
298
docker/migration.sh
Normal file
@ -0,0 +1,298 @@
|
||||
#!/bin/bash
|
||||
|
||||
# RAGFlow Data Migration Script
|
||||
# Usage: ./migration.sh [backup|restore] [backup_folder]
|
||||
#
|
||||
# This script helps you backup and restore RAGFlow Docker volumes
|
||||
# including MySQL, MinIO, Redis, and Elasticsearch data.
|
||||
|
||||
set -e # Exit on any error
|
||||
# Instead, we'll handle errors manually for better debugging experience
|
||||
|
||||
# Default values
|
||||
DEFAULT_BACKUP_FOLDER="backup"
|
||||
VOLUMES=("docker_mysql_data" "docker_minio_data" "docker_redis_data" "docker_esdata01")
|
||||
BACKUP_FILES=("mysql_backup.tar.gz" "minio_backup.tar.gz" "redis_backup.tar.gz" "es_backup.tar.gz")
|
||||
|
||||
# Function to display help information
|
||||
show_help() {
|
||||
echo "RAGFlow Data Migration Tool"
|
||||
echo ""
|
||||
echo "USAGE:"
|
||||
echo " $0 <operation> [backup_folder]"
|
||||
echo ""
|
||||
echo "OPERATIONS:"
|
||||
echo " backup - Create backup of all RAGFlow data volumes"
|
||||
echo " restore - Restore RAGFlow data volumes from backup"
|
||||
echo " help - Show this help message"
|
||||
echo ""
|
||||
echo "PARAMETERS:"
|
||||
echo " backup_folder - Name of backup folder (default: '$DEFAULT_BACKUP_FOLDER')"
|
||||
echo ""
|
||||
echo "EXAMPLES:"
|
||||
echo " $0 backup # Backup to './backup' folder"
|
||||
echo " $0 backup my_backup # Backup to './my_backup' folder"
|
||||
echo " $0 restore # Restore from './backup' folder"
|
||||
echo " $0 restore my_backup # Restore from './my_backup' folder"
|
||||
echo ""
|
||||
echo "DOCKER VOLUMES:"
|
||||
echo " - docker_mysql_data (MySQL database)"
|
||||
echo " - docker_minio_data (MinIO object storage)"
|
||||
echo " - docker_redis_data (Redis cache)"
|
||||
echo " - docker_esdata01 (Elasticsearch indices)"
|
||||
}
|
||||
|
||||
# Function to check if Docker is running
|
||||
check_docker() {
|
||||
if ! docker info >/dev/null 2>&1; then
|
||||
echo "❌ Error: Docker is not running or not accessible"
|
||||
echo "Please start Docker and try again"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to check if volume exists
|
||||
volume_exists() {
|
||||
local volume_name=$1
|
||||
docker volume inspect "$volume_name" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
# Function to check if any containers are using the target volumes
|
||||
check_containers_using_volumes() {
|
||||
echo "🔍 Checking for running containers that might be using target volumes..."
|
||||
|
||||
# Get all running containers
|
||||
local running_containers=$(docker ps --format "{{.Names}}")
|
||||
|
||||
if [ -z "$running_containers" ]; then
|
||||
echo "✅ No running containers found"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Check each running container for volume usage
|
||||
local containers_using_volumes=()
|
||||
local volume_usage_details=()
|
||||
|
||||
for container in $running_containers; do
|
||||
# Get container's mount information
|
||||
local mounts=$(docker inspect "$container" --format '{{range .Mounts}}{{.Source}}{{"|"}}{{end}}' 2>/dev/null || echo "")
|
||||
|
||||
# Check if any of our target volumes are used by this container
|
||||
for volume in "${VOLUMES[@]}"; do
|
||||
if echo "$mounts" | grep -q "$volume"; then
|
||||
containers_using_volumes+=("$container")
|
||||
volume_usage_details+=("$container -> $volume")
|
||||
break
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# If any containers are using our volumes, show error and exit
|
||||
if [ ${#containers_using_volumes[@]} -gt 0 ]; then
|
||||
echo ""
|
||||
echo "❌ ERROR: Found running containers using target volumes!"
|
||||
echo ""
|
||||
echo "📋 Running containers status:"
|
||||
docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Image}}"
|
||||
echo ""
|
||||
echo "🔗 Volume usage details:"
|
||||
for detail in "${volume_usage_details[@]}"; do
|
||||
echo " - $detail"
|
||||
done
|
||||
echo ""
|
||||
echo "🛑 SOLUTION: Stop the containers before performing backup/restore operations:"
|
||||
echo " docker-compose -f docker/<your-docker-compose-file>.yml down"
|
||||
echo ""
|
||||
echo "💡 After backup/restore, you can restart with:"
|
||||
echo " docker-compose -f docker/<your-docker-compose-file>.yml up -d"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✅ No containers are using target volumes, safe to proceed"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Function to confirm user action
|
||||
confirm_action() {
|
||||
local message=$1
|
||||
echo -n "$message (y/N): "
|
||||
read -r response
|
||||
case "$response" in
|
||||
[yY]|[yY][eE][sS]) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Function to perform backup
|
||||
perform_backup() {
|
||||
local backup_folder=$1
|
||||
|
||||
echo "🚀 Starting RAGFlow data backup..."
|
||||
echo "📁 Backup folder: $backup_folder"
|
||||
echo ""
|
||||
|
||||
# Check if any containers are using the volumes
|
||||
check_containers_using_volumes
|
||||
|
||||
# Create backup folder if it doesn't exist
|
||||
mkdir -p "$backup_folder"
|
||||
|
||||
# Backup each volume
|
||||
for i in "${!VOLUMES[@]}"; do
|
||||
local volume="${VOLUMES[$i]}"
|
||||
local backup_file="${BACKUP_FILES[$i]}"
|
||||
local step=$((i + 1))
|
||||
|
||||
echo "📦 Step $step/4: Backing up $volume..."
|
||||
|
||||
if volume_exists "$volume"; then
|
||||
docker run --rm \
|
||||
-v "$volume":/source \
|
||||
-v "$(pwd)/$backup_folder":/backup \
|
||||
alpine tar czf "/backup/$backup_file" -C /source .
|
||||
echo "✅ Successfully backed up $volume to $backup_folder/$backup_file"
|
||||
else
|
||||
echo "⚠️ Warning: Volume $volume does not exist, skipping..."
|
||||
fi
|
||||
echo ""
|
||||
done
|
||||
|
||||
echo "🎉 Backup completed successfully!"
|
||||
echo "📍 Backup location: $(pwd)/$backup_folder"
|
||||
|
||||
# List backup files with sizes
|
||||
echo ""
|
||||
echo "📋 Backup files created:"
|
||||
for backup_file in "${BACKUP_FILES[@]}"; do
|
||||
if [ -f "$backup_folder/$backup_file" ]; then
|
||||
local size=$(ls -lh "$backup_folder/$backup_file" | awk '{print $5}')
|
||||
echo " - $backup_file ($size)"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Function to perform restore
|
||||
perform_restore() {
|
||||
local backup_folder=$1
|
||||
|
||||
echo "🔄 Starting RAGFlow data restore..."
|
||||
echo "📁 Backup folder: $backup_folder"
|
||||
echo ""
|
||||
|
||||
# Check if any containers are using the volumes
|
||||
check_containers_using_volumes
|
||||
|
||||
# Check if backup folder exists
|
||||
if [ ! -d "$backup_folder" ]; then
|
||||
echo "❌ Error: Backup folder '$backup_folder' does not exist"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if all backup files exist
|
||||
local missing_files=()
|
||||
for backup_file in "${BACKUP_FILES[@]}"; do
|
||||
if [ ! -f "$backup_folder/$backup_file" ]; then
|
||||
missing_files+=("$backup_file")
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ${#missing_files[@]} -gt 0 ]; then
|
||||
echo "❌ Error: Missing backup files:"
|
||||
for file in "${missing_files[@]}"; do
|
||||
echo " - $file"
|
||||
done
|
||||
echo "Please ensure all backup files are present in '$backup_folder'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for existing volumes and warn user
|
||||
local existing_volumes=()
|
||||
for volume in "${VOLUMES[@]}"; do
|
||||
if volume_exists "$volume"; then
|
||||
existing_volumes+=("$volume")
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ${#existing_volumes[@]} -gt 0 ]; then
|
||||
echo "⚠️ WARNING: The following Docker volumes already exist:"
|
||||
for volume in "${existing_volumes[@]}"; do
|
||||
echo " - $volume"
|
||||
done
|
||||
echo ""
|
||||
echo "🔴 IMPORTANT: Restoring will OVERWRITE existing data!"
|
||||
echo "💡 Recommendation: Create a backup of your current data first:"
|
||||
echo " $0 backup current_backup_$(date +%Y%m%d_%H%M%S)"
|
||||
echo ""
|
||||
|
||||
if ! confirm_action "Do you want to continue with the restore operation?"; then
|
||||
echo "❌ Restore operation cancelled by user"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Create volumes and restore data
|
||||
for i in "${!VOLUMES[@]}"; do
|
||||
local volume="${VOLUMES[$i]}"
|
||||
local backup_file="${BACKUP_FILES[$i]}"
|
||||
local step=$((i + 1))
|
||||
|
||||
echo "🔧 Step $step/4: Restoring $volume..."
|
||||
|
||||
# Create volume if it doesn't exist
|
||||
if ! volume_exists "$volume"; then
|
||||
echo " 📋 Creating Docker volume: $volume"
|
||||
docker volume create "$volume"
|
||||
else
|
||||
echo " 📋 Using existing Docker volume: $volume"
|
||||
fi
|
||||
|
||||
# Restore data
|
||||
echo " 📥 Restoring data from $backup_file..."
|
||||
docker run --rm \
|
||||
-v "$volume":/target \
|
||||
-v "$(pwd)/$backup_folder":/backup \
|
||||
alpine tar xzf "/backup/$backup_file" -C /target
|
||||
|
||||
echo "✅ Successfully restored $volume"
|
||||
echo ""
|
||||
done
|
||||
|
||||
echo "🎉 Restore completed successfully!"
|
||||
echo "💡 You can now start your RAGFlow services"
|
||||
}
|
||||
|
||||
# Main script logic
|
||||
main() {
|
||||
# Check if Docker is available
|
||||
check_docker
|
||||
|
||||
# Parse command line arguments
|
||||
local operation=${1:-}
|
||||
local backup_folder=${2:-$DEFAULT_BACKUP_FOLDER}
|
||||
|
||||
# Handle help or no arguments
|
||||
if [ -z "$operation" ] || [ "$operation" = "help" ] || [ "$operation" = "-h" ] || [ "$operation" = "--help" ]; then
|
||||
show_help
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Validate operation
|
||||
case "$operation" in
|
||||
backup)
|
||||
perform_backup "$backup_folder"
|
||||
;;
|
||||
restore)
|
||||
perform_restore "$backup_folder"
|
||||
;;
|
||||
*)
|
||||
echo "❌ Error: Invalid operation '$operation'"
|
||||
echo ""
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Run main function with all arguments
|
||||
main "$@"
|
||||
108
docs/guides/migration/migrate_from_docker_compose.md
Normal file
108
docs/guides/migration/migrate_from_docker_compose.md
Normal file
@ -0,0 +1,108 @@
|
||||
# Data Migration Guide
|
||||
|
||||
A common scenario is processing large datasets on a powerful instance (e.g., with a GPU) and then migrating the entire RAGFlow service to a different production environment (e.g., a CPU-only server). This guide explains how to safely back up and restore your data using our provided migration script.
|
||||
|
||||
## Identifying Your Data
|
||||
|
||||
By default, RAGFlow uses Docker volumes to store all persistent data, including your database, uploaded files, and search indexes. You can see these volumes by running:
|
||||
|
||||
```bash
|
||||
docker volume ls
|
||||
```
|
||||
|
||||
The output will look similar to this:
|
||||
|
||||
```text
|
||||
DRIVER VOLUME NAME
|
||||
local docker_esdata01
|
||||
local docker_minio_data
|
||||
local docker_mysql_data
|
||||
local docker_redis_data
|
||||
```
|
||||
|
||||
These volumes contain all the data you need to migrate.
|
||||
|
||||
## Step 1: Stop RAGFlow Services
|
||||
|
||||
Before starting the migration, you must stop all running RAGFlow services on the **source machine**. Navigate to the project's root directory and run:
|
||||
|
||||
```bash
|
||||
docker-compose -f docker/docker-compose.yml down
|
||||
```
|
||||
|
||||
**Important:** Do **not** use the `-v` flag (e.g., `docker-compose down -v`), as this will delete all your data volumes. The migration script includes a check and will prevent you from running it if services are active.
|
||||
|
||||
## Step 2: Back Up Your Data
|
||||
|
||||
We provide a convenient script to package all your data volumes into a single backup folder.
|
||||
|
||||
For a quick reference of the script's commands and options, you can run:
|
||||
```bash
|
||||
bash docker/migration.sh help
|
||||
```
|
||||
|
||||
To create a backup, run the following command from the project's root directory:
|
||||
|
||||
```bash
|
||||
bash docker/migration.sh backup
|
||||
```
|
||||
|
||||
This will create a `backup/` folder in your project root containing compressed archives of your data volumes.
|
||||
|
||||
You can also specify a custom name for your backup folder:
|
||||
|
||||
```bash
|
||||
bash docker/migration.sh backup my_ragflow_backup
|
||||
```
|
||||
|
||||
This will create a folder named `my_ragflow_backup/` instead.
|
||||
|
||||
## Step 3: Transfer the Backup Folder
|
||||
|
||||
Copy the entire backup folder (e.g., `backup/` or `my_ragflow_backup/`) from your source machine to the RAGFlow project directory on your **target machine**. You can use tools like `scp`, `rsync`, or a physical drive for the transfer.
|
||||
|
||||
## Step 4: Restore Your Data
|
||||
|
||||
On the **target machine**, ensure that RAGFlow services are not running. Then, use the migration script to restore your data from the backup folder.
|
||||
|
||||
If your backup folder is named `backup/`, run:
|
||||
|
||||
```bash
|
||||
bash docker/migration.sh restore
|
||||
```
|
||||
|
||||
If you used a custom name, specify it in the command:
|
||||
|
||||
```bash
|
||||
bash docker/migration.sh restore my_ragflow_backup
|
||||
```
|
||||
|
||||
The script will automatically create the necessary Docker volumes and unpack the data.
|
||||
|
||||
**Note:** If the script detects that Docker volumes with the same names already exist on the target machine, it will warn you that restoring will overwrite the existing data and ask for confirmation before proceeding.
|
||||
|
||||
## Step 5: Start RAGFlow Services
|
||||
|
||||
Once the restore process is complete, you can start the RAGFlow services on your new machine:
|
||||
|
||||
```bash
|
||||
docker-compose -f docker/docker-compose.yml up -d
|
||||
```
|
||||
|
||||
**Note:** If you already have build an service by docker-compose before, you may need to backup your data for target machine like this guide above and run like:
|
||||
|
||||
```bash
|
||||
# Please backup by `sh docker/migration.sh backup backup_dir_name` before you do the following line.
|
||||
# !!! this line -v flag will delete the original docker volume
|
||||
docker-compose -f docker/docker-compose.yml down -v
|
||||
docker-compose -f docker/docker-compose.yml up -d
|
||||
```
|
||||
|
||||
Your RAGFlow instance is now running with all the data from your original machine.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user