mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-07 02:55:08 +08:00
feat(tools): add Elasticsearch to OceanBase migration tool (#12927)
### What problem does this PR solve? fixes https://github.com/infiniflow/ragflow/issues/12774 Add a CLI tool for migrating RAGFlow data from Elasticsearch to OceanBase, enabling users to switch their document storage backend. - Automatic discovery and migration of all `ragflow_*` indices - Schema conversion with vector dimension auto-detection - Batch processing with progress tracking and resume capability - Data consistency validation and migration report generation **Note**: Due to network issues, I was unable to pull the required Docker images (Elasticsearch, OceanBase) to run the full end-to-end verification. Unit tests have been verified to pass. I will complete the e2e verification when network conditions allow, and submit a follow-up PR if any fixes are needed. ```bash ============================= test session starts ============================== platform darwin -- Python 3.13.6, pytest-9.0.2, pluggy-1.6.0 rootdir: /Users/sevenc/code/ai/oceanbase/ragflow/tools/es-to-oceanbase-migration configfile: pyproject.toml testpaths: tests plugins: anyio-4.12.1, asyncio-1.3.0, cov-7.0.0 collected 86 items tests/test_progress.py::TestMigrationProgress::test_create_basic_progress PASSED [ 1%] tests/test_progress.py::TestMigrationProgress::test_create_progress_with_counts PASSED [ 2%] tests/test_progress.py::TestMigrationProgress::test_progress_default_values PASSED [ 3%] tests/test_progress.py::TestMigrationProgress::test_progress_status_values PASSED [ 4%] tests/test_progress.py::TestProgressManager::test_create_progress_manager PASSED [ 5%] tests/test_progress.py::TestProgressManager::test_create_progress_manager_creates_dir PASSED [ 6%] tests/test_progress.py::TestProgressManager::test_create_progress PASSED [ 8%] tests/test_progress.py::TestProgressManager::test_save_and_load_progress PASSED [ 9%] tests/test_progress.py::TestProgressManager::test_load_nonexistent_progress PASSED [ 10%] tests/test_progress.py::TestProgressManager::test_delete_progress PASSED [ 11%] tests/test_progress.py::TestProgressManager::test_update_progress PASSED [ 12%] tests/test_progress.py::TestProgressManager::test_update_progress_multiple_batches PASSED [ 13%] tests/test_progress.py::TestProgressManager::test_mark_completed PASSED [ 15%] tests/test_progress.py::TestProgressManager::test_mark_failed PASSED [ 16%] tests/test_progress.py::TestProgressManager::test_mark_paused PASSED [ 17%] tests/test_progress.py::TestProgressManager::test_can_resume_running PASSED [ 18%] tests/test_progress.py::TestProgressManager::test_can_resume_paused PASSED [ 19%] tests/test_progress.py::TestProgressManager::test_can_resume_completed PASSED [ 20%] tests/test_progress.py::TestProgressManager::test_can_resume_nonexistent PASSED [ 22%] tests/test_progress.py::TestProgressManager::test_get_resume_info PASSED [ 23%] tests/test_progress.py::TestProgressManager::test_get_resume_info_nonexistent PASSED [ 24%] tests/test_progress.py::TestProgressManager::test_progress_file_path PASSED [ 25%] tests/test_progress.py::TestProgressManager::test_progress_file_content PASSED [ 26%] tests/test_schema.py::TestRAGFlowSchemaConverter::test_analyze_ragflow_mapping PASSED [ 27%] tests/test_schema.py::TestRAGFlowSchemaConverter::test_detect_vector_size PASSED [ 29%] tests/test_schema.py::TestRAGFlowSchemaConverter::test_unknown_fields PASSED [ 30%] tests/test_schema.py::TestRAGFlowSchemaConverter::test_get_column_definitions PASSED [ 31%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_basic_document PASSED [ 32%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_with_vector PASSED [ 33%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_array_fields PASSED [ 34%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_json_fields PASSED [ 36%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_unknown_fields_to_extra PASSED [ 37%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_kb_id_list PASSED [ 38%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_content_with_weight_dict PASSED [ 39%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_batch PASSED [ 40%] tests/test_schema.py::TestVectorFieldPattern::test_valid_patterns PASSED [ 41%] tests/test_schema.py::TestVectorFieldPattern::test_invalid_patterns PASSED [ 43%] tests/test_schema.py::TestVectorFieldPattern::test_extract_dimension PASSED [ 44%] tests/test_schema.py::TestConstants::test_array_columns PASSED [ 45%] tests/test_schema.py::TestConstants::test_json_columns PASSED [ 46%] tests/test_schema.py::TestConstants::test_ragflow_columns_completeness PASSED [ 47%] tests/test_schema.py::TestConstants::test_fts_columns PASSED [ 48%] tests/test_schema.py::TestConstants::test_ragflow_columns_types PASSED [ 50%] tests/test_schema.py::TestRAGFlowSchemaConverterEdgeCases::test_empty_mapping PASSED [ 51%] tests/test_schema.py::TestRAGFlowSchemaConverterEdgeCases::test_mapping_without_properties PASSED [ 52%] tests/test_schema.py::TestRAGFlowSchemaConverterEdgeCases::test_multiple_vector_fields PASSED [ 53%] tests/test_schema.py::TestRAGFlowSchemaConverterEdgeCases::test_get_column_definitions_without_analysis PASSED [ 54%] tests/test_schema.py::TestRAGFlowSchemaConverterEdgeCases::test_get_vector_fields PASSED [ 55%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_empty_document PASSED [ 56%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_document_without_source PASSED [ 58%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_boolean_to_integer PASSED [ 59%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_invalid_integer PASSED [ 60%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_float_field PASSED [ 61%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_array_with_special_characters PASSED [ 62%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_already_json_array PASSED [ 63%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_single_value_to_array PASSED [ 65%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_detect_vector_fields_from_document PASSED [ 66%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_with_default_values PASSED [ 67%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_list_content PASSED [ 68%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_batch_empty PASSED [ 69%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_existing_extra_field_merged PASSED [ 70%] tests/test_verify.py::TestVerificationResult::test_create_basic_result PASSED [ 72%] tests/test_verify.py::TestVerificationResult::test_result_default_values PASSED [ 73%] tests/test_verify.py::TestVerificationResult::test_result_with_counts PASSED [ 74%] tests/test_verify.py::TestMigrationVerifier::test_verify_counts_match PASSED [ 75%] tests/test_verify.py::TestMigrationVerifier::test_verify_counts_mismatch PASSED [ 76%] tests/test_verify.py::TestMigrationVerifier::test_verify_samples_all_match PASSED [ 77%] tests/test_verify.py::TestMigrationVerifier::test_verify_samples_some_missing PASSED [ 79%] tests/test_verify.py::TestMigrationVerifier::test_verify_samples_data_mismatch PASSED [ 80%] tests/test_verify.py::TestMigrationVerifier::test_values_equal_none_values PASSED [ 81%] tests/test_verify.py::TestMigrationVerifier::test_values_equal_array_columns PASSED [ 82%] tests/test_verify.py::TestMigrationVerifier::test_values_equal_json_columns PASSED [ 83%] tests/test_verify.py::TestMigrationVerifier::test_values_equal_kb_id_list PASSED [ 84%] tests/test_verify.py::TestMigrationVerifier::test_values_equal_content_with_weight_dict PASSED [ 86%] tests/test_verify.py::TestMigrationVerifier::test_determine_result_passed PASSED [ 87%] tests/test_verify.py::TestMigrationVerifier::test_determine_result_failed_count PASSED [ 88%] tests/test_verify.py::TestMigrationVerifier::test_determine_result_failed_samples PASSED [ 89%] tests/test_verify.py::TestMigrationVerifier::test_generate_report PASSED [ 90%] tests/test_verify.py::TestMigrationVerifier::test_generate_report_with_missing PASSED [ 91%] tests/test_verify.py::TestMigrationVerifier::test_generate_report_with_mismatches PASSED [ 93%] tests/test_verify.py::TestValueComparison::test_string_comparison PASSED [ 94%] tests/test_verify.py::TestValueComparison::test_integer_comparison PASSED [ 95%] tests/test_verify.py::TestValueComparison::test_float_comparison PASSED [ 96%] tests/test_verify.py::TestValueComparison::test_boolean_comparison PASSED [ 97%] tests/test_verify.py::TestValueComparison::test_empty_array_comparison PASSED [ 98%] tests/test_verify.py::TestValueComparison::test_nested_json_comparison PASSED [100%] ======================= 86 passed, 88 warnings in 0.66s ======================== ``` ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe):
This commit is contained in:
574
tools/es-to-oceanbase-migration/src/es_ob_migration/cli.py
Normal file
574
tools/es-to-oceanbase-migration/src/es_ob_migration/cli.py
Normal file
@ -0,0 +1,574 @@
|
||||
"""
|
||||
CLI entry point for RAGFlow ES to OceanBase migration tool.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import click
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
from rich.logging import RichHandler
|
||||
|
||||
from .es_client import ESClient
|
||||
from .ob_client import OBClient
|
||||
from .migrator import ESToOceanBaseMigrator
|
||||
from .verify import MigrationVerifier
|
||||
from .schema import RAGFLOW_COLUMNS
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def setup_logging(verbose: bool = False):
|
||||
"""Setup logging configuration."""
|
||||
level = logging.DEBUG if verbose else logging.INFO
|
||||
logging.basicConfig(
|
||||
level=level,
|
||||
format="%(message)s",
|
||||
datefmt="[%X]",
|
||||
handlers=[RichHandler(rich_tracebacks=True, console=console)],
|
||||
)
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.option("-v", "--verbose", is_flag=True, help="Enable verbose logging")
|
||||
@click.pass_context
|
||||
def main(ctx, verbose):
|
||||
"""RAGFlow ES to OceanBase Migration Tool.
|
||||
|
||||
Migrate RAGFlow data from Elasticsearch 8+ to OceanBase with schema conversion,
|
||||
vector data mapping, batch import, and resume capability.
|
||||
|
||||
This tool is specifically designed for RAGFlow's data structure.
|
||||
"""
|
||||
ctx.ensure_object(dict)
|
||||
ctx.obj["verbose"] = verbose
|
||||
setup_logging(verbose)
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.option("--es-host", default="localhost", help="Elasticsearch host")
|
||||
@click.option("--es-port", default=9200, type=int, help="Elasticsearch port")
|
||||
@click.option("--es-user", default=None, help="Elasticsearch username")
|
||||
@click.option("--es-password", default=None, help="Elasticsearch password")
|
||||
@click.option("--es-api-key", default=None, help="Elasticsearch API key")
|
||||
@click.option("--ob-host", default="localhost", help="OceanBase host")
|
||||
@click.option("--ob-port", default=2881, type=int, help="OceanBase port")
|
||||
@click.option("--ob-user", default="root@test", help="OceanBase user (format: user@tenant)")
|
||||
@click.option("--ob-password", default="", help="OceanBase password")
|
||||
@click.option("--ob-database", default="test", help="OceanBase database")
|
||||
@click.option("--index", "-i", default=None, help="Source ES index name (omit to migrate all ragflow_* indices)")
|
||||
@click.option("--table", "-t", default=None, help="Target OceanBase table name (omit to use same name as index)")
|
||||
@click.option("--batch-size", default=1000, type=int, help="Batch size for migration")
|
||||
@click.option("--resume", is_flag=True, help="Resume from previous progress")
|
||||
@click.option("--verify/--no-verify", default=True, help="Verify after migration")
|
||||
@click.option("--progress-dir", default=".migration_progress", help="Progress file directory")
|
||||
@click.pass_context
|
||||
def migrate(
|
||||
ctx,
|
||||
es_host,
|
||||
es_port,
|
||||
es_user,
|
||||
es_password,
|
||||
es_api_key,
|
||||
ob_host,
|
||||
ob_port,
|
||||
ob_user,
|
||||
ob_password,
|
||||
ob_database,
|
||||
index,
|
||||
table,
|
||||
batch_size,
|
||||
resume,
|
||||
verify,
|
||||
progress_dir,
|
||||
):
|
||||
"""Run RAGFlow data migration from Elasticsearch to OceanBase.
|
||||
|
||||
If --index is omitted, all indices starting with 'ragflow_' will be migrated.
|
||||
If --table is omitted, the same name as the source index will be used.
|
||||
"""
|
||||
console.print("[bold]RAGFlow ES to OceanBase Migration[/]")
|
||||
|
||||
try:
|
||||
# Initialize ES client first to discover indices if needed
|
||||
es_client = ESClient(
|
||||
host=es_host,
|
||||
port=es_port,
|
||||
username=es_user,
|
||||
password=es_password,
|
||||
api_key=es_api_key,
|
||||
)
|
||||
|
||||
ob_client = OBClient(
|
||||
host=ob_host,
|
||||
port=ob_port,
|
||||
user=ob_user,
|
||||
password=ob_password,
|
||||
database=ob_database,
|
||||
)
|
||||
|
||||
# Determine indices to migrate
|
||||
if index:
|
||||
# Single index specified
|
||||
indices_to_migrate = [(index, table if table else index)]
|
||||
else:
|
||||
# Auto-discover all ragflow_* indices
|
||||
console.print(f"\n[cyan]Discovering RAGFlow indices...[/]")
|
||||
ragflow_indices = es_client.list_ragflow_indices()
|
||||
|
||||
if not ragflow_indices:
|
||||
console.print("[yellow]No ragflow_* indices found in Elasticsearch[/]")
|
||||
sys.exit(0)
|
||||
|
||||
# Each index maps to a table with the same name
|
||||
indices_to_migrate = [(idx, idx) for idx in ragflow_indices]
|
||||
|
||||
console.print(f"[green]Found {len(indices_to_migrate)} RAGFlow indices:[/]")
|
||||
for idx, _ in indices_to_migrate:
|
||||
doc_count = es_client.count_documents(idx)
|
||||
console.print(f" - {idx} ({doc_count:,} documents)")
|
||||
console.print()
|
||||
|
||||
# Initialize migrator
|
||||
migrator = ESToOceanBaseMigrator(
|
||||
es_client=es_client,
|
||||
ob_client=ob_client,
|
||||
progress_dir=progress_dir,
|
||||
)
|
||||
|
||||
# Track overall results
|
||||
total_success = 0
|
||||
total_failed = 0
|
||||
results = []
|
||||
|
||||
# Migrate each index
|
||||
for es_index, ob_table in indices_to_migrate:
|
||||
console.print(f"\n[bold blue]{'='*60}[/]")
|
||||
console.print(f"[bold]Migrating: {es_index} -> {ob_database}.{ob_table}[/]")
|
||||
console.print(f"[bold blue]{'='*60}[/]")
|
||||
|
||||
result = migrator.migrate(
|
||||
es_index=es_index,
|
||||
ob_table=ob_table,
|
||||
batch_size=batch_size,
|
||||
resume=resume,
|
||||
verify_after=verify,
|
||||
)
|
||||
|
||||
results.append(result)
|
||||
if result["success"]:
|
||||
total_success += 1
|
||||
else:
|
||||
total_failed += 1
|
||||
|
||||
# Summary for multiple indices
|
||||
if len(indices_to_migrate) > 1:
|
||||
console.print(f"\n[bold]{'='*60}[/]")
|
||||
console.print(f"[bold]Migration Summary[/]")
|
||||
console.print(f"[bold]{'='*60}[/]")
|
||||
console.print(f" Total indices: {len(indices_to_migrate)}")
|
||||
console.print(f" [green]Successful: {total_success}[/]")
|
||||
if total_failed > 0:
|
||||
console.print(f" [red]Failed: {total_failed}[/]")
|
||||
|
||||
# Exit code based on results
|
||||
if total_failed == 0:
|
||||
console.print("\n[bold green]All migrations completed successfully![/]")
|
||||
sys.exit(0)
|
||||
else:
|
||||
console.print(f"\n[bold red]{total_failed} migration(s) failed[/]")
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"[bold red]Error: {e}[/]")
|
||||
if ctx.obj.get("verbose"):
|
||||
console.print_exception()
|
||||
sys.exit(1)
|
||||
finally:
|
||||
# Cleanup
|
||||
if "es_client" in locals():
|
||||
es_client.close()
|
||||
if "ob_client" in locals():
|
||||
ob_client.close()
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.option("--es-host", default="localhost", help="Elasticsearch host")
|
||||
@click.option("--es-port", default=9200, type=int, help="Elasticsearch port")
|
||||
@click.option("--es-user", default=None, help="Elasticsearch username")
|
||||
@click.option("--es-password", default=None, help="Elasticsearch password")
|
||||
@click.option("--index", "-i", required=True, help="ES index name")
|
||||
@click.option("--output", "-o", default=None, help="Output file (JSON)")
|
||||
@click.pass_context
|
||||
def schema(ctx, es_host, es_port, es_user, es_password, index, output):
|
||||
"""Preview RAGFlow schema analysis from ES mapping."""
|
||||
try:
|
||||
es_client = ESClient(
|
||||
host=es_host,
|
||||
port=es_port,
|
||||
username=es_user,
|
||||
password=es_password,
|
||||
)
|
||||
|
||||
# Dummy OB client for schema preview
|
||||
ob_client = None
|
||||
|
||||
migrator = ESToOceanBaseMigrator(es_client, ob_client if ob_client else OBClient.__new__(OBClient))
|
||||
# Directly use schema converter
|
||||
from .schema import RAGFlowSchemaConverter
|
||||
converter = RAGFlowSchemaConverter()
|
||||
|
||||
es_mapping = es_client.get_index_mapping(index)
|
||||
analysis = converter.analyze_es_mapping(es_mapping)
|
||||
column_defs = converter.get_column_definitions()
|
||||
|
||||
# Display analysis
|
||||
console.print(f"\n[bold]ES Index Analysis: {index}[/]\n")
|
||||
|
||||
# Known RAGFlow fields
|
||||
console.print(f"[green]Known RAGFlow fields:[/] {len(analysis['known_fields'])}")
|
||||
|
||||
# Vector fields
|
||||
if analysis['vector_fields']:
|
||||
console.print(f"\n[cyan]Vector fields detected:[/]")
|
||||
for vf in analysis['vector_fields']:
|
||||
console.print(f" - {vf['name']} (dimension: {vf['dimension']})")
|
||||
|
||||
# Unknown fields
|
||||
if analysis['unknown_fields']:
|
||||
console.print(f"\n[yellow]Unknown fields (will be stored in 'extra'):[/]")
|
||||
for uf in analysis['unknown_fields']:
|
||||
console.print(f" - {uf}")
|
||||
|
||||
# Display RAGFlow column schema
|
||||
console.print(f"\n[bold]RAGFlow OceanBase Schema ({len(column_defs)} columns):[/]\n")
|
||||
|
||||
table = Table(title="Column Definitions")
|
||||
table.add_column("Column Name", style="cyan")
|
||||
table.add_column("OB Type", style="green")
|
||||
table.add_column("Nullable", style="yellow")
|
||||
table.add_column("Special", style="magenta")
|
||||
|
||||
for col in column_defs[:20]: # Show first 20
|
||||
special = []
|
||||
if col.get("is_primary"):
|
||||
special.append("PK")
|
||||
if col.get("index"):
|
||||
special.append("IDX")
|
||||
if col.get("is_array"):
|
||||
special.append("ARRAY")
|
||||
if col.get("is_vector"):
|
||||
special.append("VECTOR")
|
||||
|
||||
table.add_row(
|
||||
col["name"],
|
||||
col["ob_type"],
|
||||
"Yes" if col.get("nullable", True) else "No",
|
||||
", ".join(special) if special else "-",
|
||||
)
|
||||
|
||||
if len(column_defs) > 20:
|
||||
table.add_row("...", f"({len(column_defs) - 20} more)", "", "")
|
||||
|
||||
console.print(table)
|
||||
|
||||
# Save to file if requested
|
||||
if output:
|
||||
preview = {
|
||||
"es_index": index,
|
||||
"es_mapping": es_mapping,
|
||||
"analysis": analysis,
|
||||
"ob_columns": column_defs,
|
||||
}
|
||||
with open(output, "w") as f:
|
||||
json.dump(preview, f, indent=2, default=str)
|
||||
console.print(f"\nSchema saved to {output}")
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"[bold red]Error: {e}[/]")
|
||||
if ctx.obj.get("verbose"):
|
||||
console.print_exception()
|
||||
sys.exit(1)
|
||||
finally:
|
||||
if "es_client" in locals():
|
||||
es_client.close()
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.option("--es-host", default="localhost", help="Elasticsearch host")
|
||||
@click.option("--es-port", default=9200, type=int, help="Elasticsearch port")
|
||||
@click.option("--ob-host", default="localhost", help="OceanBase host")
|
||||
@click.option("--ob-port", default=2881, type=int, help="OceanBase port")
|
||||
@click.option("--ob-user", default="root@test", help="OceanBase user")
|
||||
@click.option("--ob-password", default="", help="OceanBase password")
|
||||
@click.option("--ob-database", default="test", help="OceanBase database")
|
||||
@click.option("--index", "-i", required=True, help="Source ES index name")
|
||||
@click.option("--table", "-t", required=True, help="Target OceanBase table name")
|
||||
@click.option("--sample-size", default=100, type=int, help="Sample size for verification")
|
||||
@click.pass_context
|
||||
def verify(
|
||||
ctx,
|
||||
es_host,
|
||||
es_port,
|
||||
ob_host,
|
||||
ob_port,
|
||||
ob_user,
|
||||
ob_password,
|
||||
ob_database,
|
||||
index,
|
||||
table,
|
||||
sample_size,
|
||||
):
|
||||
"""Verify migration data consistency."""
|
||||
try:
|
||||
es_client = ESClient(host=es_host, port=es_port)
|
||||
ob_client = OBClient(
|
||||
host=ob_host,
|
||||
port=ob_port,
|
||||
user=ob_user,
|
||||
password=ob_password,
|
||||
database=ob_database,
|
||||
)
|
||||
|
||||
verifier = MigrationVerifier(es_client, ob_client)
|
||||
result = verifier.verify(
|
||||
index, table,
|
||||
sample_size=sample_size,
|
||||
)
|
||||
|
||||
console.print(verifier.generate_report(result))
|
||||
|
||||
sys.exit(0 if result.passed else 1)
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"[bold red]Error: {e}[/]")
|
||||
if ctx.obj.get("verbose"):
|
||||
console.print_exception()
|
||||
sys.exit(1)
|
||||
finally:
|
||||
if "es_client" in locals():
|
||||
es_client.close()
|
||||
if "ob_client" in locals():
|
||||
ob_client.close()
|
||||
|
||||
|
||||
@main.command("list-indices")
|
||||
@click.option("--es-host", default="localhost", help="Elasticsearch host")
|
||||
@click.option("--es-port", default=9200, type=int, help="Elasticsearch port")
|
||||
@click.option("--es-user", default=None, help="Elasticsearch username")
|
||||
@click.option("--es-password", default=None, help="Elasticsearch password")
|
||||
@click.pass_context
|
||||
def list_indices(ctx, es_host, es_port, es_user, es_password):
|
||||
"""List all RAGFlow indices (ragflow_*) in Elasticsearch."""
|
||||
try:
|
||||
es_client = ESClient(
|
||||
host=es_host,
|
||||
port=es_port,
|
||||
username=es_user,
|
||||
password=es_password,
|
||||
)
|
||||
|
||||
console.print(f"\n[bold]RAGFlow Indices in Elasticsearch ({es_host}:{es_port})[/]\n")
|
||||
|
||||
indices = es_client.list_ragflow_indices()
|
||||
|
||||
if not indices:
|
||||
console.print("[yellow]No ragflow_* indices found[/]")
|
||||
return
|
||||
|
||||
table = Table(title="RAGFlow Indices")
|
||||
table.add_column("Index Name", style="cyan")
|
||||
table.add_column("Document Count", style="green", justify="right")
|
||||
table.add_column("Type", style="yellow")
|
||||
|
||||
total_docs = 0
|
||||
for idx in indices:
|
||||
doc_count = es_client.count_documents(idx)
|
||||
total_docs += doc_count
|
||||
|
||||
# Determine index type
|
||||
if idx.startswith("ragflow_doc_meta_"):
|
||||
idx_type = "Metadata"
|
||||
elif idx.startswith("ragflow_"):
|
||||
idx_type = "Document Chunks"
|
||||
else:
|
||||
idx_type = "Unknown"
|
||||
|
||||
table.add_row(idx, f"{doc_count:,}", idx_type)
|
||||
|
||||
table.add_row("", "", "")
|
||||
table.add_row("[bold]Total[/]", f"[bold]{total_docs:,}[/]", f"[bold]{len(indices)} indices[/]")
|
||||
|
||||
console.print(table)
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"[bold red]Error: {e}[/]")
|
||||
if ctx.obj.get("verbose"):
|
||||
console.print_exception()
|
||||
sys.exit(1)
|
||||
finally:
|
||||
if "es_client" in locals():
|
||||
es_client.close()
|
||||
|
||||
|
||||
@main.command("list-kb")
|
||||
@click.option("--es-host", default="localhost", help="Elasticsearch host")
|
||||
@click.option("--es-port", default=9200, type=int, help="Elasticsearch port")
|
||||
@click.option("--es-user", default=None, help="Elasticsearch username")
|
||||
@click.option("--es-password", default=None, help="Elasticsearch password")
|
||||
@click.option("--index", "-i", required=True, help="ES index name")
|
||||
@click.pass_context
|
||||
def list_kb(ctx, es_host, es_port, es_user, es_password, index):
|
||||
"""List all knowledge bases in an ES index."""
|
||||
try:
|
||||
es_client = ESClient(
|
||||
host=es_host,
|
||||
port=es_port,
|
||||
username=es_user,
|
||||
password=es_password,
|
||||
)
|
||||
|
||||
console.print(f"\n[bold]Knowledge Bases in index: {index}[/]\n")
|
||||
|
||||
# Get kb_id aggregation
|
||||
agg_result = es_client.aggregate_field(index, "kb_id")
|
||||
buckets = agg_result.get("buckets", [])
|
||||
|
||||
if not buckets:
|
||||
console.print("[yellow]No knowledge bases found[/]")
|
||||
return
|
||||
|
||||
table = Table(title="Knowledge Bases")
|
||||
table.add_column("KB ID", style="cyan")
|
||||
table.add_column("Document Count", style="green", justify="right")
|
||||
|
||||
total_docs = 0
|
||||
for bucket in buckets:
|
||||
table.add_row(
|
||||
bucket["key"],
|
||||
f"{bucket['doc_count']:,}",
|
||||
)
|
||||
total_docs += bucket["doc_count"]
|
||||
|
||||
table.add_row("", "")
|
||||
table.add_row("[bold]Total[/]", f"[bold]{total_docs:,}[/]")
|
||||
|
||||
console.print(table)
|
||||
console.print(f"\nTotal knowledge bases: {len(buckets)}")
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"[bold red]Error: {e}[/]")
|
||||
if ctx.obj.get("verbose"):
|
||||
console.print_exception()
|
||||
sys.exit(1)
|
||||
finally:
|
||||
if "es_client" in locals():
|
||||
es_client.close()
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.option("--es-host", default="localhost", help="Elasticsearch host")
|
||||
@click.option("--es-port", default=9200, type=int, help="Elasticsearch port")
|
||||
@click.option("--ob-host", default="localhost", help="OceanBase host")
|
||||
@click.option("--ob-port", default=2881, type=int, help="OceanBase port")
|
||||
@click.option("--ob-user", default="root@test", help="OceanBase user")
|
||||
@click.option("--ob-password", default="", help="OceanBase password")
|
||||
@click.pass_context
|
||||
def status(ctx, es_host, es_port, ob_host, ob_port, ob_user, ob_password):
|
||||
"""Check connection status to ES and OceanBase."""
|
||||
console.print("[bold]Connection Status[/]\n")
|
||||
|
||||
# Check ES
|
||||
try:
|
||||
es_client = ESClient(host=es_host, port=es_port)
|
||||
health = es_client.health_check()
|
||||
info = es_client.get_cluster_info()
|
||||
console.print(f"[green]Elasticsearch ({es_host}:{es_port}): Connected[/]")
|
||||
console.print(f" Cluster: {health.get('cluster_name')}")
|
||||
console.print(f" Status: {health.get('status')}")
|
||||
console.print(f" Version: {info.get('version', {}).get('number', 'unknown')}")
|
||||
|
||||
# List indices
|
||||
indices = es_client.list_indices("*")
|
||||
console.print(f" Indices: {len(indices)}")
|
||||
|
||||
es_client.close()
|
||||
except Exception as e:
|
||||
console.print(f"[red]Elasticsearch ({es_host}:{es_port}): Failed[/]")
|
||||
console.print(f" Error: {e}")
|
||||
|
||||
console.print()
|
||||
|
||||
# Check OceanBase
|
||||
try:
|
||||
ob_client = OBClient(
|
||||
host=ob_host,
|
||||
port=ob_port,
|
||||
user=ob_user,
|
||||
password=ob_password,
|
||||
)
|
||||
if ob_client.health_check():
|
||||
version = ob_client.get_version()
|
||||
console.print(f"[green]OceanBase ({ob_host}:{ob_port}): Connected[/]")
|
||||
console.print(f" Version: {version}")
|
||||
else:
|
||||
console.print(f"[red]OceanBase ({ob_host}:{ob_port}): Health check failed[/]")
|
||||
ob_client.close()
|
||||
except Exception as e:
|
||||
console.print(f"[red]OceanBase ({ob_host}:{ob_port}): Failed[/]")
|
||||
console.print(f" Error: {e}")
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.option("--es-host", default="localhost", help="Elasticsearch host")
|
||||
@click.option("--es-port", default=9200, type=int, help="Elasticsearch port")
|
||||
@click.option("--index", "-i", required=True, help="ES index name")
|
||||
@click.option("--size", "-n", default=5, type=int, help="Number of samples")
|
||||
@click.pass_context
|
||||
def sample(ctx, es_host, es_port, index, size):
|
||||
"""Show sample documents from ES index."""
|
||||
try:
|
||||
es_client = ESClient(host=es_host, port=es_port)
|
||||
|
||||
docs = es_client.get_sample_documents(index, size)
|
||||
|
||||
console.print(f"\n[bold]Sample documents from {index}[/]")
|
||||
console.print()
|
||||
|
||||
for i, doc in enumerate(docs, 1):
|
||||
console.print(f"[bold cyan]Document {i}[/]")
|
||||
console.print(f" _id: {doc.get('_id')}")
|
||||
console.print(f" kb_id: {doc.get('kb_id')}")
|
||||
console.print(f" doc_id: {doc.get('doc_id')}")
|
||||
console.print(f" docnm_kwd: {doc.get('docnm_kwd')}")
|
||||
|
||||
# Check for vector fields
|
||||
vector_fields = [k for k in doc.keys() if k.startswith("q_") and k.endswith("_vec")]
|
||||
if vector_fields:
|
||||
for vf in vector_fields:
|
||||
vec = doc.get(vf)
|
||||
if vec:
|
||||
console.print(f" {vf}: [{len(vec)} dimensions]")
|
||||
|
||||
content = doc.get("content_with_weight", "")
|
||||
if content:
|
||||
if isinstance(content, dict):
|
||||
content = json.dumps(content, ensure_ascii=False)
|
||||
preview = content[:100] + "..." if len(str(content)) > 100 else content
|
||||
console.print(f" content: {preview}")
|
||||
|
||||
console.print()
|
||||
|
||||
es_client.close()
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"[bold red]Error: {e}[/]")
|
||||
if ctx.obj.get("verbose"):
|
||||
console.print_exception()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user