feat(tools): add Elasticsearch to OceanBase migration tool (#12927 )

### What problem does this PR solve? fixes https://github.com/infiniflow/ragflow/issues/12774 Add a CLI tool for migrating RAGFlow data from Elasticsearch to OceanBase, enabling users to switch their document storage backend. - Automatic discovery and migration of all `ragflow_*` indices - Schema conversion with vector dimension auto-detection - Batch processing with progress tracking and resume capability - Data consistency validation and migration report generation **Note**: Due to network issues, I was unable to pull the required Docker images (Elasticsearch, OceanBase) to run the full end-to-end verification. Unit tests have been verified to pass. I will complete the e2e verification when network conditions allow, and submit a follow-up PR if any fixes are needed. ```bash ============================= test session starts ============================== platform darwin -- Python 3.13.6, pytest-9.0.2, pluggy-1.6.0 rootdir: /Users/sevenc/code/ai/oceanbase/ragflow/tools/es-to-oceanbase-migration configfile: pyproject.toml testpaths: tests plugins: anyio-4.12.1, asyncio-1.3.0, cov-7.0.0 collected 86 items tests/test_progress.py::TestMigrationProgress::test_create_basic_progress PASSED [ 1%] tests/test_progress.py::TestMigrationProgress::test_create_progress_with_counts PASSED [ 2%] tests/test_progress.py::TestMigrationProgress::test_progress_default_values PASSED [ 3%] tests/test_progress.py::TestMigrationProgress::test_progress_status_values PASSED [ 4%] tests/test_progress.py::TestProgressManager::test_create_progress_manager PASSED [ 5%] tests/test_progress.py::TestProgressManager::test_create_progress_manager_creates_dir PASSED [ 6%] tests/test_progress.py::TestProgressManager::test_create_progress PASSED [ 8%] tests/test_progress.py::TestProgressManager::test_save_and_load_progress PASSED [ 9%] tests/test_progress.py::TestProgressManager::test_load_nonexistent_progress PASSED [ 10%] tests/test_progress.py::TestProgressManager::test_delete_progress PASSED [ 11%] tests/test_progress.py::TestProgressManager::test_update_progress PASSED [ 12%] tests/test_progress.py::TestProgressManager::test_update_progress_multiple_batches PASSED [ 13%] tests/test_progress.py::TestProgressManager::test_mark_completed PASSED [ 15%] tests/test_progress.py::TestProgressManager::test_mark_failed PASSED [ 16%] tests/test_progress.py::TestProgressManager::test_mark_paused PASSED [ 17%] tests/test_progress.py::TestProgressManager::test_can_resume_running PASSED [ 18%] tests/test_progress.py::TestProgressManager::test_can_resume_paused PASSED [ 19%] tests/test_progress.py::TestProgressManager::test_can_resume_completed PASSED [ 20%] tests/test_progress.py::TestProgressManager::test_can_resume_nonexistent PASSED [ 22%] tests/test_progress.py::TestProgressManager::test_get_resume_info PASSED [ 23%] tests/test_progress.py::TestProgressManager::test_get_resume_info_nonexistent PASSED [ 24%] tests/test_progress.py::TestProgressManager::test_progress_file_path PASSED [ 25%] tests/test_progress.py::TestProgressManager::test_progress_file_content PASSED [ 26%] tests/test_schema.py::TestRAGFlowSchemaConverter::test_analyze_ragflow_mapping PASSED [ 27%] tests/test_schema.py::TestRAGFlowSchemaConverter::test_detect_vector_size PASSED [ 29%] tests/test_schema.py::TestRAGFlowSchemaConverter::test_unknown_fields PASSED [ 30%] tests/test_schema.py::TestRAGFlowSchemaConverter::test_get_column_definitions PASSED [ 31%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_basic_document PASSED [ 32%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_with_vector PASSED [ 33%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_array_fields PASSED [ 34%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_json_fields PASSED [ 36%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_unknown_fields_to_extra PASSED [ 37%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_kb_id_list PASSED [ 38%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_content_with_weight_dict PASSED [ 39%] tests/test_schema.py::TestRAGFlowDataConverter::test_convert_batch PASSED [ 40%] tests/test_schema.py::TestVectorFieldPattern::test_valid_patterns PASSED [ 41%] tests/test_schema.py::TestVectorFieldPattern::test_invalid_patterns PASSED [ 43%] tests/test_schema.py::TestVectorFieldPattern::test_extract_dimension PASSED [ 44%] tests/test_schema.py::TestConstants::test_array_columns PASSED [ 45%] tests/test_schema.py::TestConstants::test_json_columns PASSED [ 46%] tests/test_schema.py::TestConstants::test_ragflow_columns_completeness PASSED [ 47%] tests/test_schema.py::TestConstants::test_fts_columns PASSED [ 48%] tests/test_schema.py::TestConstants::test_ragflow_columns_types PASSED [ 50%] tests/test_schema.py::TestRAGFlowSchemaConverterEdgeCases::test_empty_mapping PASSED [ 51%] tests/test_schema.py::TestRAGFlowSchemaConverterEdgeCases::test_mapping_without_properties PASSED [ 52%] tests/test_schema.py::TestRAGFlowSchemaConverterEdgeCases::test_multiple_vector_fields PASSED [ 53%] tests/test_schema.py::TestRAGFlowSchemaConverterEdgeCases::test_get_column_definitions_without_analysis PASSED [ 54%] tests/test_schema.py::TestRAGFlowSchemaConverterEdgeCases::test_get_vector_fields PASSED [ 55%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_empty_document PASSED [ 56%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_document_without_source PASSED [ 58%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_boolean_to_integer PASSED [ 59%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_invalid_integer PASSED [ 60%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_float_field PASSED [ 61%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_array_with_special_characters PASSED [ 62%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_already_json_array PASSED [ 63%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_single_value_to_array PASSED [ 65%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_detect_vector_fields_from_document PASSED [ 66%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_with_default_values PASSED [ 67%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_list_content PASSED [ 68%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_convert_batch_empty PASSED [ 69%] tests/test_schema.py::TestRAGFlowDataConverterEdgeCases::test_existing_extra_field_merged PASSED [ 70%] tests/test_verify.py::TestVerificationResult::test_create_basic_result PASSED [ 72%] tests/test_verify.py::TestVerificationResult::test_result_default_values PASSED [ 73%] tests/test_verify.py::TestVerificationResult::test_result_with_counts PASSED [ 74%] tests/test_verify.py::TestMigrationVerifier::test_verify_counts_match PASSED [ 75%] tests/test_verify.py::TestMigrationVerifier::test_verify_counts_mismatch PASSED [ 76%] tests/test_verify.py::TestMigrationVerifier::test_verify_samples_all_match PASSED [ 77%] tests/test_verify.py::TestMigrationVerifier::test_verify_samples_some_missing PASSED [ 79%] tests/test_verify.py::TestMigrationVerifier::test_verify_samples_data_mismatch PASSED [ 80%] tests/test_verify.py::TestMigrationVerifier::test_values_equal_none_values PASSED [ 81%] tests/test_verify.py::TestMigrationVerifier::test_values_equal_array_columns PASSED [ 82%] tests/test_verify.py::TestMigrationVerifier::test_values_equal_json_columns PASSED [ 83%] tests/test_verify.py::TestMigrationVerifier::test_values_equal_kb_id_list PASSED [ 84%] tests/test_verify.py::TestMigrationVerifier::test_values_equal_content_with_weight_dict PASSED [ 86%] tests/test_verify.py::TestMigrationVerifier::test_determine_result_passed PASSED [ 87%] tests/test_verify.py::TestMigrationVerifier::test_determine_result_failed_count PASSED [ 88%] tests/test_verify.py::TestMigrationVerifier::test_determine_result_failed_samples PASSED [ 89%] tests/test_verify.py::TestMigrationVerifier::test_generate_report PASSED [ 90%] tests/test_verify.py::TestMigrationVerifier::test_generate_report_with_missing PASSED [ 91%] tests/test_verify.py::TestMigrationVerifier::test_generate_report_with_mismatches PASSED [ 93%] tests/test_verify.py::TestValueComparison::test_string_comparison PASSED [ 94%] tests/test_verify.py::TestValueComparison::test_integer_comparison PASSED [ 95%] tests/test_verify.py::TestValueComparison::test_float_comparison PASSED [ 96%] tests/test_verify.py::TestValueComparison::test_boolean_comparison PASSED [ 97%] tests/test_verify.py::TestValueComparison::test_empty_array_comparison PASSED [ 98%] tests/test_verify.py::TestValueComparison::test_nested_json_comparison PASSED [100%] ======================= 86 passed, 88 warnings in 0.66s ======================== ``` ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe):
feat: add Peewee ORM support for OceanBase as primary database (#12769 ) (#12926 )
2026-01-31 23:55:06 +08:00 · 2026-01-31 16:11:27 +08:00 · 2026-01-31 15:45:20 +08:00 · 2026-01-31 15:11:54 +08:00 · 2026-01-31 15:03:40 +08:00 · 2026-01-30 20:02:56 +08:00
844 changed files with 58323 additions and 42089 deletions
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@ -1 +1,22 @@
-Refer to [AGENTS.MD](../AGENTS.md) for all repo instructions.
+# Project instructions for Copilot
 ## How to run (minimum)
 - Install:
  - python -m venv .venv && source .venv/bin/activate
  - pip install -r requirements.txt
 - Run:
  - (fill) e.g. uvicorn app.main:app --reload
 - Verify:
  - (fill) curl http://127.0.0.1:8000/health
 ## Project layout (what matters)
 - app/: API entrypoints + routers
 - services/: business logic
 - configs/: config loading (.env)
 - docs/: documents
 - tests/: pytest
 ## Conventions
 - Prefer small, incremental changes.
 - Add logging for new flows.
 - Add/adjust tests for behavior changes.
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -86,6 +86,9 @@ jobs:
            mkdir -p ${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}
            echo "${PR_SHA} ${GITHUB_RUN_ID}" > ${PR_SHA_FP}
          fi
          ARTIFACTS_DIR=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/${GITHUB_RUN_ID}
          echo "ARTIFACTS_DIR=${ARTIFACTS_DIR}" >> ${GITHUB_ENV}
          rm -rf ${ARTIFACTS_DIR} && mkdir -p ${ARTIFACTS_DIR}
      # https://github.com/astral-sh/ruff-action
      - name: Static check with Ruff
@ -161,7 +164,7 @@ jobs:
          INFINITY_THRIFT_PORT=$((23817 + RUNNER_NUM * 10))
          INFINITY_HTTP_PORT=$((23820 + RUNNER_NUM * 10))
          INFINITY_PSQL_PORT=$((5432 + RUNNER_NUM * 10))
-          MYSQL_PORT=$((5455 + RUNNER_NUM * 10))
+          EXPOSE_MYSQL_PORT=$((5455 + RUNNER_NUM * 10))
          MINIO_PORT=$((9000 + RUNNER_NUM * 10))
          MINIO_CONSOLE_PORT=$((9001 + RUNNER_NUM * 10))
          REDIS_PORT=$((6379 + RUNNER_NUM * 10))
@ -181,7 +184,7 @@ jobs:
          echo -e "INFINITY_THRIFT_PORT=${INFINITY_THRIFT_PORT}" >> docker/.env
          echo -e "INFINITY_HTTP_PORT=${INFINITY_HTTP_PORT}" >> docker/.env
          echo -e "INFINITY_PSQL_PORT=${INFINITY_PSQL_PORT}" >> docker/.env
-          echo -e "MYSQL_PORT=${MYSQL_PORT}" >> docker/.env
+          echo -e "EXPOSE_MYSQL_PORT=${EXPOSE_MYSQL_PORT}" >> docker/.env
          echo -e "MINIO_PORT=${MINIO_PORT}" >> docker/.env
          echo -e "MINIO_CONSOLE_PORT=${MINIO_CONSOLE_PORT}" >> docker/.env
          echo -e "REDIS_PORT=${REDIS_PORT}" >> docker/.env
@ -199,8 +202,11 @@ jobs:
          echo -e "RAGFLOW_IMAGE=${RAGFLOW_IMAGE}" >> docker/.env
          echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> ${GITHUB_ENV}
          # Patch entrypoint.sh for coverage
          sed -i '/"\$PY" api\/ragflow_server.py \${INIT_SUPERUSER_ARGS} &/c\   echo "Ensuring coverage is installed..."\n      "$PY" -m pip install coverage\n     export COVERAGE_FILE=/ragflow/logs/.coverage\n        echo "Starting ragflow_server with coverage..."\n        "$PY" -m coverage run --source=./api/apps --omit="*/tests/*,*/migrations/*" -a api/ragflow_server.py ${INIT_SUPERUSER_ARGS} &' docker/entrypoint.sh
          sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} up -d
-          uv sync --python 3.12 --only-group test --no-default-groups --frozen && uv pip install sdk/python --group test
+          uv sync --python 3.12 --group test --frozen && uv pip install -e sdk/python
      - name: Run sdk tests against Elasticsearch
        run: |
@ -209,16 +215,16 @@ jobs:
            echo "Waiting for service to be available..."
            sleep 5
          done
-          source .venv/bin/activate && set -o pipefail; pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/test_sdk_api 2>&1 | tee es_sdk_test.log
+          source .venv/bin/activate && set -o pipefail; pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} --junitxml=pytest-infinity-sdk.xml --cov=sdk/python/ragflow_sdk --cov-branch --cov-report=xml:coverage-es-sdk.xml test/testcases/test_sdk_api 2>&1 | tee es_sdk_test.log
-      - name: Run frontend api tests against Elasticsearch
+      - name: Run web api tests against Elasticsearch
        run: |
          export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
          until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do
            echo "Waiting for service to be available..."
            sleep 5
          done
-          source .venv/bin/activate && set -o pipefail; pytest -s --tb=short sdk/python/test/test_frontend_api/get_email.py sdk/python/test/test_frontend_api/test_dataset.py 2>&1 | tee es_api_test.log
+          source .venv/bin/activate && set -o pipefail; pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/test_web_api 2>&1 | tee es_web_api_test.log
      - name: Run http api tests against Elasticsearch
        run: |
@ -229,6 +235,154 @@ jobs:
          done
          source .venv/bin/activate && set -o pipefail; pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/test_http_api 2>&1 | tee es_http_api_test.log
      - name: RAGFlow CLI retrieval test Elasticsearch
        env:
          PYTHONPATH: ${{ github.workspace }}
        run: |
          set -euo pipefail
          source .venv/bin/activate
          export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
          EMAIL="ci-${GITHUB_RUN_ID}@example.com"
          PASS="ci-pass-${GITHUB_RUN_ID}"
          DATASET="ci_dataset_${GITHUB_RUN_ID}"
          CLI="python admin/client/ragflow_cli.py"
          LOG_FILE="es_cli_test.log"
          : > "${LOG_FILE}"
          ERROR_RE='Traceback|ModuleNotFoundError|ImportError|Parse error|Bad response|Fail to|code:\\s*[1-9]'
          run_cli() {
            local logfile="$1"
            shift
            local allow_re=""
            if [[ "${1:-}" == "--allow" ]]; then
              allow_re="$2"
              shift 2
            fi
            local cmd_display="$*"
            echo "===== $(date -u +\"%Y-%m-%dT%H:%M:%SZ\") CMD: ${cmd_display} =====" | tee -a "${logfile}"
            local tmp_log
            tmp_log="$(mktemp)"
            set +e
            timeout 180s "$@" 2>&1 | tee "${tmp_log}"
            local status=${PIPESTATUS[0]}
            set -e
            cat "${tmp_log}" >> "${logfile}"
            if grep -qiE "${ERROR_RE}" "${tmp_log}"; then
              if [[ -n "${allow_re}" ]] && grep -qiE "${allow_re}" "${tmp_log}"; then
                echo "Allowed CLI error markers in ${logfile}"
                rm -f "${tmp_log}"
                return 0
              fi
              echo "Detected CLI error markers in ${logfile}"
              rm -f "${tmp_log}"
              exit 1
            fi
            rm -f "${tmp_log}"
            return ${status}
          }
          set -a
          source docker/.env
          set +a
          HOST_ADDRESS="http://host.docker.internal:${SVR_HTTP_PORT}"
          USER_HOST="$(echo "${HOST_ADDRESS}" | sed -E 's#^https?://([^:/]+).*#\1#')"
          USER_PORT="${SVR_HTTP_PORT}"
          ADMIN_HOST="${USER_HOST}"
          ADMIN_PORT="${ADMIN_SVR_HTTP_PORT}"
          until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do
            echo "Waiting for service to be available..."
            sleep 5
          done
          admin_ready=0
          for i in $(seq 1 30); do
            if run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "ping"; then
              admin_ready=1
              break
            fi
            sleep 1
          done
          if [[ "${admin_ready}" -ne 1 ]]; then
            echo "Admin service did not become ready"
            exit 1
          fi
          run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "show version"
          ALLOW_USER_EXISTS_RE='already exists|already exist|duplicate|already.*registered|exist(s)?'
          run_cli "${LOG_FILE}" --allow "${ALLOW_USER_EXISTS_RE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "create user '$EMAIL' '$PASS'"
          user_ready=0
          for i in $(seq 1 30); do
            if run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "ping"; then
              user_ready=1
              break
            fi
            sleep 1
          done
          if [[ "${user_ready}" -ne 1 ]]; then
            echo "User service did not become ready"
            exit 1
          fi
          run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "show version"
          run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "create dataset '$DATASET' with embedding 'BAAI/bge-small-en-v1.5@Builtin' parser 'auto'"
          run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "import 'test/benchmark/test_docs/Doc1.pdf,test/benchmark/test_docs/Doc2.pdf' into dataset '$DATASET'"
          run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "parse dataset '$DATASET' sync"
          run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "Benchmark 16 100 search 'what are these documents about' on datasets '$DATASET'"
      - name: Stop ragflow to save coverage Elasticsearch
        if: ${{ !cancelled() }}
        run: |
          # Send SIGINT to ragflow_server.py to trigger coverage save
          PID=$(sudo docker exec ${RAGFLOW_CONTAINER} ps aux | grep "ragflow_server.py" | grep -v grep | awk '{print $2}' | head -n 1)
          if [ -n "$PID" ]; then
            echo "Sending SIGINT to ragflow_server.py (PID: $PID)..."
            sudo docker exec ${RAGFLOW_CONTAINER} kill -INT $PID
            # Wait for process to exit and coverage file to be written
            sleep 10
          else
            echo "ragflow_server.py not found!"
          fi
          sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} stop
      - name: Generate server coverage report Elasticsearch
        if: ${{ !cancelled() }}
        run: |
          # .coverage file should be in docker/ragflow-logs/.coverage
          if [ -f docker/ragflow-logs/.coverage ]; then
            echo "Found .coverage file"
            cp docker/ragflow-logs/.coverage .coverage
            source .venv/bin/activate
            # Create .coveragerc to map container paths to host paths
            echo "[paths]" > .coveragerc
            echo "source =" >> .coveragerc
            echo "    ." >> .coveragerc
            echo "    /ragflow" >> .coveragerc
            coverage xml -o coverage-es-server.xml
            rm .coveragerc
            # Clean up for next run
            sudo rm docker/ragflow-logs/.coverage
          else
            echo ".coverage file not found!"
          fi
      - name: Collect ragflow log Elasticsearch
        if: ${{ !cancelled() }}
        run: |
          if [ -d docker/ragflow-logs ]; then
            cp -r docker/ragflow-logs ${ARTIFACTS_DIR}/ragflow-logs-es
            echo "ragflow log" && tail -n 200 docker/ragflow-logs/ragflow_server.log || true
          else
            echo "No docker/ragflow-logs directory found; skipping log collection"
          fi
          sudo rm -rf docker/ragflow-logs || true
      - name: Stop ragflow:nightly
        if: always()  # always run this step even if previous steps failed
        run: |
@ -247,16 +401,16 @@ jobs:
            echo "Waiting for service to be available..."
            sleep 5
          done
-          source .venv/bin/activate && set -o pipefail; DOC_ENGINE=infinity pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/test_sdk_api 2>&1 | tee infinity_sdk_test.log
+          source .venv/bin/activate && set -o pipefail; DOC_ENGINE=infinity pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} --junitxml=pytest-infinity-sdk.xml --cov=sdk/python/ragflow_sdk --cov-branch --cov-report=xml:coverage-infinity-sdk.xml test/testcases/test_sdk_api 2>&1 | tee infinity_sdk_test.log
-      - name: Run frontend api tests against Infinity
+      - name: Run web api tests against Infinity
        run: |
          export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
          until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do
            echo "Waiting for service to be available..."
            sleep 5
          done
-          source .venv/bin/activate && set -o pipefail; DOC_ENGINE=infinity pytest -s --tb=short sdk/python/test/test_frontend_api/get_email.py sdk/python/test/test_frontend_api/test_dataset.py 2>&1 | tee infinity_api_test.log
+          source .venv/bin/activate && set -o pipefail; DOC_ENGINE=infinity pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/test_web_api/test_api_app 2>&1 | tee infinity_web_api_test.log
      - name: Run http api tests against Infinity
        run: |
@ -267,6 +421,159 @@ jobs:
          done
          source .venv/bin/activate && set -o pipefail; DOC_ENGINE=infinity pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/test_http_api 2>&1 | tee infinity_http_api_test.log
      - name: RAGFlow CLI retrieval test Infinity
        env:
          PYTHONPATH: ${{ github.workspace }}
        run: |
          set -euo pipefail
          source .venv/bin/activate
          export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
          EMAIL="ci-${GITHUB_RUN_ID}@example.com"
          PASS="ci-pass-${GITHUB_RUN_ID}"
          DATASET="ci_dataset_${GITHUB_RUN_ID}"
          CLI="python admin/client/ragflow_cli.py"
          LOG_FILE="infinity_cli_test.log"
          : > "${LOG_FILE}"
          ERROR_RE='Traceback|ModuleNotFoundError|ImportError|Parse error|Bad response|Fail to|code:\\s*[1-9]'
          run_cli() {
            local logfile="$1"
            shift
            local allow_re=""
            if [[ "${1:-}" == "--allow" ]]; then
              allow_re="$2"
              shift 2
            fi
            local cmd_display="$*"
            echo "===== $(date -u +\"%Y-%m-%dT%H:%M:%SZ\") CMD: ${cmd_display} =====" | tee -a "${logfile}"
            local tmp_log
            tmp_log="$(mktemp)"
            set +e
            timeout 180s "$@" 2>&1 | tee "${tmp_log}"
            local status=${PIPESTATUS[0]}
            set -e
            cat "${tmp_log}" >> "${logfile}"
            if grep -qiE "${ERROR_RE}" "${tmp_log}"; then
              if [[ -n "${allow_re}" ]] && grep -qiE "${allow_re}" "${tmp_log}"; then
                echo "Allowed CLI error markers in ${logfile}"
                rm -f "${tmp_log}"
                return 0
              fi
              echo "Detected CLI error markers in ${logfile}"
              rm -f "${tmp_log}"
              exit 1
            fi
            rm -f "${tmp_log}"
            return ${status}
          }
          set -a
          source docker/.env
          set +a
          HOST_ADDRESS="http://host.docker.internal:${SVR_HTTP_PORT}"
          USER_HOST="$(echo "${HOST_ADDRESS}" | sed -E 's#^https?://([^:/]+).*#\1#')"
          USER_PORT="${SVR_HTTP_PORT}"
          ADMIN_HOST="${USER_HOST}"
          ADMIN_PORT="${ADMIN_SVR_HTTP_PORT}"
          until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do
            echo "Waiting for service to be available..."
            sleep 5
          done
          admin_ready=0
          for i in $(seq 1 30); do
            if run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "ping"; then
              admin_ready=1
              break
            fi
            sleep 1
          done
          if [[ "${admin_ready}" -ne 1 ]]; then
            echo "Admin service did not become ready"
            exit 1
          fi
          run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "show version"
          ALLOW_USER_EXISTS_RE='already exists|already exist|duplicate|already.*registered|exist(s)?'
          run_cli "${LOG_FILE}" --allow "${ALLOW_USER_EXISTS_RE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "create user '$EMAIL' '$PASS'"
          user_ready=0
          for i in $(seq 1 30); do
            if run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "ping"; then
              user_ready=1
              break
            fi
            sleep 1
          done
          if [[ "${user_ready}" -ne 1 ]]; then
            echo "User service did not become ready"
            exit 1
          fi
          run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "show version"
          run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "create dataset '$DATASET' with embedding 'BAAI/bge-small-en-v1.5@Builtin' parser 'auto'"
          run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "import 'test/benchmark/test_docs/Doc1.pdf,test/benchmark/test_docs/Doc2.pdf' into dataset '$DATASET'"
          run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "parse dataset '$DATASET' sync"
          run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "Benchmark 16 100 search 'what are these documents about' on datasets '$DATASET'"
      - name: Stop ragflow to save coverage Infinity
        if: ${{ !cancelled() }}
        run: |
          # Send SIGINT to ragflow_server.py to trigger coverage save
          PID=$(sudo docker exec ${RAGFLOW_CONTAINER} ps aux | grep "ragflow_server.py" | grep -v grep | awk '{print $2}' | head -n 1)
          if [ -n "$PID" ]; then
            echo "Sending SIGINT to ragflow_server.py (PID: $PID)..."
            sudo docker exec ${RAGFLOW_CONTAINER} kill -INT $PID
            # Wait for process to exit and coverage file to be written
            sleep 10
          else
            echo "ragflow_server.py not found!"
          fi
          sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} stop
      - name: Generate server coverage report Infinity
        if: ${{ !cancelled() }}
        run: |
          # .coverage file should be in docker/ragflow-logs/.coverage
          if [ -f docker/ragflow-logs/.coverage ]; then
            echo "Found .coverage file"
            cp docker/ragflow-logs/.coverage .coverage
            source .venv/bin/activate
            # Create .coveragerc to map container paths to host paths
            echo "[paths]" > .coveragerc
            echo "source =" >> .coveragerc
            echo "    ." >> .coveragerc
            echo "    /ragflow" >> .coveragerc
            coverage xml -o coverage-infinity-server.xml
            rm .coveragerc
          else
            echo ".coverage file not found!"
          fi
      - name: Upload coverage reports to Codecov
        uses: codecov/codecov-action@v5
        if: ${{ !cancelled() }}
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          fail_ci_if_error: false
      - name: Collect ragflow log
        if: ${{ !cancelled() }}
        run: |
          if [ -d docker/ragflow-logs ]; then
            cp -r docker/ragflow-logs ${ARTIFACTS_DIR}/ragflow-logs-infinity
            echo "ragflow log" && tail -n 200 docker/ragflow-logs/ragflow_server.log || true
          else
            echo "No docker/ragflow-logs directory found; skipping log collection"
          fi
          sudo rm -rf docker/ragflow-logs || true
      - name: Stop ragflow:nightly
        if: always()  # always run this step even if previous steps failed
        run: |
--- a/.gitignore
+++ b/.gitignore
@ -44,6 +44,7 @@ cl100k_base.tiktoken
 chrome*
 huggingface.co/
 nltk_data/
 uv-x86_64*.tar.gz
 # Exclude hash-like temporary files like 9b5ad71b2ce5302211f9c61530b329a4922fc6a4
 *[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]*
@ -51,6 +52,13 @@ nltk_data/
 .venv
 docker/data
 # OceanBase data and conf
 docker/oceanbase/conf
 docker/oceanbase/data
 # SeekDB data and conf
 docker/seekdb
 #--------------------------------------------------#
 # The following was generated with gitignore.nvim: #
@ -198,3 +206,8 @@ backup
 .hypothesis
 # Added by cargo
 /target
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -27,7 +27,7 @@ RAGFlow is an open-source RAG (Retrieval-Augmented Generation) engine based on d
 - **Document Processing**: `deepdoc/` - PDF parsing, OCR, layout analysis
 - **LLM Integration**: `rag/llm/` - Model abstractions for chat, embedding, reranking
 - **RAG Pipeline**: `rag/flow/` - Chunking, parsing, tokenization
- **Graph RAG**: `graphrag/` - Knowledge graph construction and querying
+- **Graph RAG**: `rag/graphrag/` - Knowledge graph construction and querying
 ### Agent System (`/agent/`)
 - **Components**: Modular workflow components (LLM, retrieval, categorize, etc.)
--- a/24
+++ b/24
@ -28,7 +28,6 @@ ENV DEBIAN_FRONTEND=noninteractive
 # Setup apt
 # Python package and implicit dependencies:
 # opencv-python: libglib2.0-0 libglx-mesa0 libgl1
 # aspose-slides: pkg-config libicu-dev libgdiplus         libssl1.1_1.1.1f-1ubuntu2_amd64.deb
 # python-pptx:   default-jdk                              tika-server-standard-3.2.3.jar
 # selenium:      libatk-bridge2.0-0                       chrome-linux64-121-0-6167-85
 # Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev
@ -53,7 +52,8 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
    apt install -y ghostscript && \
    apt install -y pandoc && \
    apt install -y texlive && \
-    apt install -y fonts-freefont-ttf fonts-noto-cjk
+    apt install -y fonts-freefont-ttf fonts-noto-cjk && \
    apt install -y postgresql-client
 # Install uv
 RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
@ -64,10 +64,12 @@ RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps
        echo 'url = "https://pypi.tuna.tsinghua.edu.cn/simple"' >> /etc/uv/uv.toml && \
        echo 'default = true' >> /etc/uv/uv.toml; \
    fi; \
-    tar xzf /deps/uv-x86_64-unknown-linux-gnu.tar.gz \
+    arch="$(uname -m)"; \
-    && cp uv-x86_64-unknown-linux-gnu/* /usr/local/bin/ \
+    if [ "$arch" = "x86_64" ]; then uv_arch="x86_64"; else uv_arch="aarch64"; fi; \
-    && rm -rf uv-x86_64-unknown-linux-gnu \
+    tar xzf "/deps/uv-${uv_arch}-unknown-linux-gnu.tar.gz" \
-    && uv python install 3.11
+    && cp "uv-${uv_arch}-unknown-linux-gnu/"* /usr/local/bin/ \
    && rm -rf "uv-${uv_arch}-unknown-linux-gnu" \
    && uv python install 3.12
 ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
 ENV PATH=/root/.local/bin:$PATH
@ -125,8 +127,6 @@ RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-l
    mv chromedriver /usr/local/bin/ && \
    rm -f /usr/bin/google-chrome
 # https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
 # aspose-slides on linux/arm64 is unavailable
 RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
    if [ "$(uname -m)" = "x86_64" ]; then \
        dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
@ -152,11 +152,14 @@ RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
    else \
        sed -i 's|pypi.tuna.tsinghua.edu.cn|pypi.org|g' uv.lock; \
    fi; \
-    uv sync --python 3.12 --frozen
+    uv sync --python 3.12 --frozen && \
    # Ensure pip is available in the venv for runtime package installation (fixes #12651)
    .venv/bin/python3 -m ensurepip --upgrade
 COPY web web
 COPY docs docs
 RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
    export NODE_OPTIONS="--max-old-space-size=4096" && \
    cd web && npm install && npm run build
 COPY .git /ragflow/.git
@ -186,11 +189,8 @@ COPY conf conf
 COPY deepdoc deepdoc
 COPY rag rag
 COPY agent agent
 COPY graphrag graphrag
 COPY agentic_reasoning agentic_reasoning
 COPY pyproject.toml uv.lock ./
 COPY mcp mcp
 COPY plugin plugin
 COPY common common
 COPY memory memory
--- a/Dockerfile.deps
+++ b/Dockerfile.deps
@ -3,7 +3,7 @@
 FROM scratch
 # Copy resources downloaded via download_deps.py
-COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base.tiktoken libssl1.1_1.1.1f-1ubuntu2_amd64.deb libssl1.1_1.1.1f-1ubuntu2_arm64.deb tika-server-standard-3.2.3.jar tika-server-standard-3.2.3.jar.md5 libssl*.deb uv-x86_64-unknown-linux-gnu.tar.gz /
+COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base.tiktoken libssl1.1_1.1.1f-1ubuntu2_amd64.deb libssl1.1_1.1.1f-1ubuntu2_arm64.deb tika-server-standard-3.2.3.jar tika-server-standard-3.2.3.jar.md5 libssl*.deb uv-x86_64-unknown-linux-gnu.tar.gz uv-aarch64-unknown-linux-gnu.tar.gz /
 COPY nltk_data /nltk_data
--- a/admin/build_cli_release.sh
+++ b/admin/build_cli_release.sh
@ -21,7 +21,7 @@ cp pyproject.toml release/$PROJECT_NAME/pyproject.toml
 cp README.md release/$PROJECT_NAME/README.md
 mkdir release/$PROJECT_NAME/$SOURCE_DIR/$PACKAGE_DIR -p
-cp admin_client.py release/$PROJECT_NAME/$SOURCE_DIR/$PACKAGE_DIR/admin_client.py
+cp ragflow_cli.py release/$PROJECT_NAME/$SOURCE_DIR/$PACKAGE_DIR/ragflow_cli.py
 if [ -d "release/$PROJECT_NAME/$SOURCE_DIR" ]; then
    echo "✅ source dir: release/$PROJECT_NAME/$SOURCE_DIR"
--- a/admin/client/admin_client.py
+++ b/admin/client/admin_client.py
@ -1,938 +0,0 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 import argparse
 import base64
 import getpass
 from cmd import Cmd
 from typing import Any, Dict, List
 import requests
 from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
 from Cryptodome.PublicKey import RSA
 from lark import Lark, Transformer, Tree
 GRAMMAR = r"""
 start: command
 command: sql_command | meta_command
 sql_command: list_services
           | show_service
           | startup_service
           | shutdown_service
           | restart_service
           | list_users
           | show_user
           | drop_user
           | alter_user
           | create_user
           | activate_user
           | list_datasets
           | list_agents
           | create_role
           | drop_role
           | alter_role
           | list_roles
           | show_role
           | grant_permission
           | revoke_permission
           | alter_user_role
           | show_user_permission
           | show_version
 // meta command definition
 meta_command: "\\" meta_command_name [meta_args]
 meta_command_name: /[a-zA-Z?]+/
 meta_args: (meta_arg)+
 meta_arg: /[^\\s"']+/ | quoted_string
 // command definition
 LIST: "LIST"i
 SERVICES: "SERVICES"i
 SHOW: "SHOW"i
 CREATE: "CREATE"i
 SERVICE: "SERVICE"i
 SHUTDOWN: "SHUTDOWN"i
 STARTUP: "STARTUP"i
 RESTART: "RESTART"i
 USERS: "USERS"i
 DROP: "DROP"i
 USER: "USER"i
 ALTER: "ALTER"i
 ACTIVE: "ACTIVE"i
 PASSWORD: "PASSWORD"i
 DATASETS: "DATASETS"i
 OF: "OF"i
 AGENTS: "AGENTS"i
 ROLE: "ROLE"i
 ROLES: "ROLES"i
 DESCRIPTION: "DESCRIPTION"i
 GRANT: "GRANT"i
 REVOKE: "REVOKE"i
 ALL: "ALL"i
 PERMISSION: "PERMISSION"i
 TO: "TO"i
 FROM: "FROM"i
 FOR: "FOR"i
 RESOURCES: "RESOURCES"i
 ON: "ON"i
 SET: "SET"i
 VERSION: "VERSION"i
 list_services: LIST SERVICES ";"
 show_service: SHOW SERVICE NUMBER ";"
 startup_service: STARTUP SERVICE NUMBER ";"
 shutdown_service: SHUTDOWN SERVICE NUMBER ";"
 restart_service: RESTART SERVICE NUMBER ";"
 list_users: LIST USERS ";"
 drop_user: DROP USER quoted_string ";"
 alter_user: ALTER USER PASSWORD quoted_string quoted_string ";"
 show_user: SHOW USER quoted_string ";"
 create_user: CREATE USER quoted_string quoted_string ";"
 activate_user: ALTER USER ACTIVE quoted_string status ";"
 list_datasets: LIST DATASETS OF quoted_string ";"
 list_agents: LIST AGENTS OF quoted_string ";"
 create_role: CREATE ROLE identifier [DESCRIPTION quoted_string] ";"
 drop_role: DROP ROLE identifier ";"
 alter_role: ALTER ROLE identifier SET DESCRIPTION quoted_string ";"
 list_roles: LIST ROLES ";"
 show_role: SHOW ROLE identifier ";"
 grant_permission: GRANT action_list ON identifier TO ROLE identifier ";"
 revoke_permission: REVOKE action_list ON identifier FROM ROLE identifier ";"
 alter_user_role: ALTER USER quoted_string SET ROLE identifier ";"
 show_user_permission: SHOW USER PERMISSION quoted_string ";"
 show_version: SHOW VERSION ";"
 action_list: identifier ("," identifier)*
 identifier: WORD
 quoted_string: QUOTED_STRING
 status: WORD
 QUOTED_STRING: /'[^']+'/ | /"[^"]+"/
 WORD: /[a-zA-Z0-9_\-\.]+/
 NUMBER: /[0-9]+/
 %import common.WS
 %ignore WS
 """
 class AdminTransformer(Transformer):
    def start(self, items):
        return items[0]
    def command(self, items):
        return items[0]
    def list_services(self, items):
        result = {"type": "list_services"}
        return result
    def show_service(self, items):
        service_id = int(items[2])
        return {"type": "show_service", "number": service_id}
    def startup_service(self, items):
        service_id = int(items[2])
        return {"type": "startup_service", "number": service_id}
    def shutdown_service(self, items):
        service_id = int(items[2])
        return {"type": "shutdown_service", "number": service_id}
    def restart_service(self, items):
        service_id = int(items[2])
        return {"type": "restart_service", "number": service_id}
    def list_users(self, items):
        return {"type": "list_users"}
    def show_user(self, items):
        user_name = items[2]
        return {"type": "show_user", "user_name": user_name}
    def drop_user(self, items):
        user_name = items[2]
        return {"type": "drop_user", "user_name": user_name}
    def alter_user(self, items):
        user_name = items[3]
        new_password = items[4]
        return {"type": "alter_user", "user_name": user_name, "password": new_password}
    def create_user(self, items):
        user_name = items[2]
        password = items[3]
        return {"type": "create_user", "user_name": user_name, "password": password, "role": "user"}
    def activate_user(self, items):
        user_name = items[3]
        activate_status = items[4]
        return {"type": "activate_user", "activate_status": activate_status, "user_name": user_name}
    def list_datasets(self, items):
        user_name = items[3]
        return {"type": "list_datasets", "user_name": user_name}
    def list_agents(self, items):
        user_name = items[3]
        return {"type": "list_agents", "user_name": user_name}
    def create_role(self, items):
        role_name = items[2]
        if len(items) > 4:
            description = items[4]
            return {"type": "create_role", "role_name": role_name, "description": description}
        else:
            return {"type": "create_role", "role_name": role_name}
    def drop_role(self, items):
        role_name = items[2]
        return {"type": "drop_role", "role_name": role_name}
    def alter_role(self, items):
        role_name = items[2]
        description = items[5]
        return {"type": "alter_role", "role_name": role_name, "description": description}
    def list_roles(self, items):
        return {"type": "list_roles"}
    def show_role(self, items):
        role_name = items[2]
        return {"type": "show_role", "role_name": role_name}
    def grant_permission(self, items):
        action_list = items[1]
        resource = items[3]
        role_name = items[6]
        return {"type": "grant_permission", "role_name": role_name, "resource": resource, "actions": action_list}
    def revoke_permission(self, items):
        action_list = items[1]
        resource = items[3]
        role_name = items[6]
        return {"type": "revoke_permission", "role_name": role_name, "resource": resource, "actions": action_list}
    def alter_user_role(self, items):
        user_name = items[2]
        role_name = items[5]
        return {"type": "alter_user_role", "user_name": user_name, "role_name": role_name}
    def show_user_permission(self, items):
        user_name = items[3]
        return {"type": "show_user_permission", "user_name": user_name}
    def show_version(self, items):
        return {"type": "show_version"}
    def action_list(self, items):
        return items
    def meta_command(self, items):
        command_name = str(items[0]).lower()
        args = items[1:] if len(items) > 1 else []
        # handle quoted parameter
        parsed_args = []
        for arg in args:
            if hasattr(arg, "value"):
                parsed_args.append(arg.value)
            else:
                parsed_args.append(str(arg))
        return {"type": "meta", "command": command_name, "args": parsed_args}
    def meta_command_name(self, items):
        return items[0]
    def meta_args(self, items):
        return items
 def encrypt(input_string):
    pub = "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArq9XTUSeYr2+N1h3Afl/z8Dse/2yD0ZGrKwx+EEEcdsBLca9Ynmx3nIB5obmLlSfmskLpBo0UACBmB5rEjBp2Q2f3AG3Hjd4B+gNCG6BDaawuDlgANIhGnaTLrIqWrrcm4EMzJOnAOI1fgzJRsOOUEfaS318Eq9OVO3apEyCCt0lOQK6PuksduOjVxtltDav+guVAA068NrPYmRNabVKRNLJpL8w4D44sfth5RvZ3q9t+6RTArpEtc5sh5ChzvqPOzKGMXW83C95TxmXqpbK6olN4RevSfVjEAgCydH6HN6OhtOQEcnrU97r9H0iZOWwbw3pVrZiUkuRD1R56Wzs2wIDAQAB\n-----END PUBLIC KEY-----"
    pub_key = RSA.importKey(pub)
    cipher = Cipher_pkcs1_v1_5.new(pub_key)
    cipher_text = cipher.encrypt(base64.b64encode(input_string.encode("utf-8")))
    return base64.b64encode(cipher_text).decode("utf-8")
 def encode_to_base64(input_string):
    base64_encoded = base64.b64encode(input_string.encode("utf-8"))
    return base64_encoded.decode("utf-8")
 class AdminCLI(Cmd):
    def __init__(self):
        super().__init__()
        self.parser = Lark(GRAMMAR, start="start", parser="lalr", transformer=AdminTransformer())
        self.command_history = []
        self.is_interactive = False
        self.admin_account = "admin@ragflow.io"
        self.admin_password: str = "admin"
        self.session = requests.Session()
        self.access_token: str = ""
        self.host: str = ""
        self.port: int = 0
    intro = r"""Type "\h" for help."""
    prompt = "admin> "
    def onecmd(self, command: str) -> bool:
        try:
            result = self.parse_command(command)
            if isinstance(result, dict):
                if "type" in result and result.get("type") == "empty":
                    return False
            self.execute_command(result)
            if isinstance(result, Tree):
                return False
            if result.get("type") == "meta" and result.get("command") in ["q", "quit", "exit"]:
                return True
        except KeyboardInterrupt:
            print("\nUse '\\q' to quit")
        except EOFError:
            print("\nGoodbye!")
            return True
        return False
    def emptyline(self) -> bool:
        return False
    def default(self, line: str) -> bool:
        return self.onecmd(line)
    def parse_command(self, command_str: str) -> dict[str, str]:
        if not command_str.strip():
            return {"type": "empty"}
        self.command_history.append(command_str)
        try:
            result = self.parser.parse(command_str)
            return result
        except Exception as e:
            return {"type": "error", "message": f"Parse error: {str(e)}"}
    def verify_admin(self, arguments: dict, single_command: bool):
        self.host = arguments["host"]
        self.port = arguments["port"]
        print("Attempt to access server for admin login")
        url = f"http://{self.host}:{self.port}/api/v1/admin/login"
        attempt_count = 3
        if single_command:
            attempt_count = 1
        try_count = 0
        while True:
            try_count += 1
            if try_count > attempt_count:
                return False
            if single_command:
                admin_passwd = arguments["password"]
            else:
                admin_passwd = getpass.getpass(f"password for {self.admin_account}: ").strip()
            try:
                self.admin_password = encrypt(admin_passwd)
                response = self.session.post(url, json={"email": self.admin_account, "password": self.admin_password})
                if response.status_code == 200:
                    res_json = response.json()
                    error_code = res_json.get("code", -1)
                    if error_code == 0:
                        self.session.headers.update({"Content-Type": "application/json", "Authorization": response.headers["Authorization"], "User-Agent": "RAGFlow-CLI/0.23.1"})
                        print("Authentication successful.")
                        return True
                    else:
                        error_message = res_json.get("message", "Unknown error")
                        print(f"Authentication failed: {error_message}, try again")
                        continue
                else:
                    print(f"Bad response，status: {response.status_code}, password is wrong")
            except Exception as e:
                print(str(e))
                print("Can't access server for admin login (connection failed)")
    def _format_service_detail_table(self, data):
        if isinstance(data, list):
            return data
        if not all([isinstance(v, list) for v in data.values()]):
            # normal table
            return data
        # handle task_executor heartbeats map, for example {'name': [{'done': 2, 'now': timestamp1}, {'done': 3, 'now': timestamp2}]
        task_executor_list = []
        for k, v in data.items():
            # display latest status
            heartbeats = sorted(v, key=lambda x: x["now"], reverse=True)
            task_executor_list.append(
                {
                    "task_executor_name": k,
                    **heartbeats[0],
                }
                if heartbeats
                else {"task_executor_name": k}
            )
        return task_executor_list
    def _print_table_simple(self, data):
        if not data:
            print("No data to print")
            return
        if isinstance(data, dict):
            # handle single row data
            data = [data]
        columns = list(set().union(*(d.keys() for d in data)))
        columns.sort()
        col_widths = {}
        def get_string_width(text):
            half_width_chars = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\t\n\r"
            width = 0
            for char in text:
                if char in half_width_chars:
                    width += 1
                else:
                    width += 2
            return width
        for col in columns:
            max_width = get_string_width(str(col))
            for item in data:
                value_len = get_string_width(str(item.get(col, "")))
                if value_len > max_width:
                    max_width = value_len
            col_widths[col] = max(2, max_width)
        # Generate delimiter
        separator = "+" + "+".join(["-" * (col_widths[col] + 2) for col in columns]) + "+"
        # Print header
        print(separator)
        header = "|" + "|".join([f" {col:<{col_widths[col]}} " for col in columns]) + "|"
        print(header)
        print(separator)
        # Print data
        for item in data:
            row = "|"
            for col in columns:
                value = str(item.get(col, ""))
                if get_string_width(value) > col_widths[col]:
                    value = value[: col_widths[col] - 3] + "..."
                row += f" {value:<{col_widths[col] - (get_string_width(value) - len(value))}} |"
            print(row)
        print(separator)
    def run_interactive(self):
        self.is_interactive = True
        print("RAGFlow Admin command line interface - Type '\\?' for help, '\\q' to quit")
        while True:
            try:
                command = input("admin> ").strip()
                if not command:
                    continue
                print(f"command: {command}")
                result = self.parse_command(command)
                self.execute_command(result)
                if isinstance(result, Tree):
                    continue
                if result.get("type") == "meta" and result.get("command") in ["q", "quit", "exit"]:
                    break
            except KeyboardInterrupt:
                print("\nUse '\\q' to quit")
            except EOFError:
                print("\nGoodbye!")
                break
    def run_single_command(self, command: str):
        result = self.parse_command(command)
        self.execute_command(result)
    def parse_connection_args(self, args: List[str]) -> Dict[str, Any]:
        parser = argparse.ArgumentParser(description="Admin CLI Client", add_help=False)
        parser.add_argument("-h", "--host", default="localhost", help="Admin service host")
        parser.add_argument("-p", "--port", type=int, default=9381, help="Admin service port")
        parser.add_argument("-w", "--password", default="admin", type=str, help="Superuser password")
        parser.add_argument("command", nargs="?", help="Single command")
        try:
            parsed_args, remaining_args = parser.parse_known_args(args)
            if remaining_args:
                command = remaining_args[0]
                return {"host": parsed_args.host, "port": parsed_args.port, "password": parsed_args.password, "command": command}
            else:
                return {
                    "host": parsed_args.host,
                    "port": parsed_args.port,
                }
        except SystemExit:
            return {"error": "Invalid connection arguments"}
    def execute_command(self, parsed_command: Dict[str, Any]):
        command_dict: dict
        if isinstance(parsed_command, Tree):
            command_dict = parsed_command.children[0]
        else:
            if parsed_command["type"] == "error":
                print(f"Error: {parsed_command['message']}")
                return
            else:
                command_dict = parsed_command
        # print(f"Parsed command: {command_dict}")
        command_type = command_dict["type"]
        match command_type:
            case "list_services":
                self._handle_list_services(command_dict)
            case "show_service":
                self._handle_show_service(command_dict)
            case "restart_service":
                self._handle_restart_service(command_dict)
            case "shutdown_service":
                self._handle_shutdown_service(command_dict)
            case "startup_service":
                self._handle_startup_service(command_dict)
            case "list_users":
                self._handle_list_users(command_dict)
            case "show_user":
                self._handle_show_user(command_dict)
            case "drop_user":
                self._handle_drop_user(command_dict)
            case "alter_user":
                self._handle_alter_user(command_dict)
            case "create_user":
                self._handle_create_user(command_dict)
            case "activate_user":
                self._handle_activate_user(command_dict)
            case "list_datasets":
                self._handle_list_datasets(command_dict)
            case "list_agents":
                self._handle_list_agents(command_dict)
            case "create_role":
                self._create_role(command_dict)
            case "drop_role":
                self._drop_role(command_dict)
            case "alter_role":
                self._alter_role(command_dict)
            case "list_roles":
                self._list_roles(command_dict)
            case "show_role":
                self._show_role(command_dict)
            case "grant_permission":
                self._grant_permission(command_dict)
            case "revoke_permission":
                self._revoke_permission(command_dict)
            case "alter_user_role":
                self._alter_user_role(command_dict)
            case "show_user_permission":
                self._show_user_permission(command_dict)
            case "show_version":
                self._show_version(command_dict)
            case "meta":
                self._handle_meta_command(command_dict)
            case _:
                print(f"Command '{command_type}' would be executed with API")
    def _handle_list_services(self, command):
        print("Listing all services")
        url = f"http://{self.host}:{self.port}/api/v1/admin/services"
        response = self.session.get(url)
        res_json = response.json()
        if response.status_code == 200:
            self._print_table_simple(res_json["data"])
        else:
            print(f"Fail to get all services, code: {res_json['code']}, message: {res_json['message']}")
    def _handle_show_service(self, command):
        service_id: int = command["number"]
        print(f"Showing service: {service_id}")
        url = f"http://{self.host}:{self.port}/api/v1/admin/services/{service_id}"
        response = self.session.get(url)
        res_json = response.json()
        if response.status_code == 200:
            res_data = res_json["data"]
            if "status" in res_data and res_data["status"] == "alive":
                print(f"Service {res_data['service_name']} is alive, ")
                if isinstance(res_data["message"], str):
                    print(res_data["message"])
                else:
                    data = self._format_service_detail_table(res_data["message"])
                    self._print_table_simple(data)
            else:
                print(f"Service {res_data['service_name']} is down, {res_data['message']}")
        else:
            print(f"Fail to show service, code: {res_json['code']}, message: {res_json['message']}")
    def _handle_restart_service(self, command):
        service_id: int = command["number"]
        print(f"Restart service {service_id}")
    def _handle_shutdown_service(self, command):
        service_id: int = command["number"]
        print(f"Shutdown service {service_id}")
    def _handle_startup_service(self, command):
        service_id: int = command["number"]
        print(f"Startup service {service_id}")
    def _handle_list_users(self, command):
        print("Listing all users")
        url = f"http://{self.host}:{self.port}/api/v1/admin/users"
        response = self.session.get(url)
        res_json = response.json()
        if response.status_code == 200:
            self._print_table_simple(res_json["data"])
        else:
            print(f"Fail to get all users, code: {res_json['code']}, message: {res_json['message']}")
    def _handle_show_user(self, command):
        username_tree: Tree = command["user_name"]
        user_name: str = username_tree.children[0].strip("'\"")
        print(f"Showing user: {user_name}")
        url = f"http://{self.host}:{self.port}/api/v1/admin/users/{user_name}"
        response = self.session.get(url)
        res_json = response.json()
        if response.status_code == 200:
            table_data = res_json["data"]
            table_data.pop("avatar")
            self._print_table_simple(table_data)
        else:
            print(f"Fail to get user {user_name}, code: {res_json['code']}, message: {res_json['message']}")
    def _handle_drop_user(self, command):
        username_tree: Tree = command["user_name"]
        user_name: str = username_tree.children[0].strip("'\"")
        print(f"Drop user: {user_name}")
        url = f"http://{self.host}:{self.port}/api/v1/admin/users/{user_name}"
        response = self.session.delete(url)
        res_json = response.json()
        if response.status_code == 200:
            print(res_json["message"])
        else:
            print(f"Fail to drop user, code: {res_json['code']}, message: {res_json['message']}")
    def _handle_alter_user(self, command):
        user_name_tree: Tree = command["user_name"]
        user_name: str = user_name_tree.children[0].strip("'\"")
        password_tree: Tree = command["password"]
        password: str = password_tree.children[0].strip("'\"")
        print(f"Alter user: {user_name}, password: ******")
        url = f"http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/password"
        response = self.session.put(url, json={"new_password": encrypt(password)})
        res_json = response.json()
        if response.status_code == 200:
            print(res_json["message"])
        else:
            print(f"Fail to alter password, code: {res_json['code']}, message: {res_json['message']}")
    def _handle_create_user(self, command):
        user_name_tree: Tree = command["user_name"]
        user_name: str = user_name_tree.children[0].strip("'\"")
        password_tree: Tree = command["password"]
        password: str = password_tree.children[0].strip("'\"")
        role: str = command["role"]
        print(f"Create user: {user_name}, password: ******, role: {role}")
        url = f"http://{self.host}:{self.port}/api/v1/admin/users"
        response = self.session.post(url, json={"user_name": user_name, "password": encrypt(password), "role": role})
        res_json = response.json()
        if response.status_code == 200:
            self._print_table_simple(res_json["data"])
        else:
            print(f"Fail to create user {user_name}, code: {res_json['code']}, message: {res_json['message']}")
    def _handle_activate_user(self, command):
        user_name_tree: Tree = command["user_name"]
        user_name: str = user_name_tree.children[0].strip("'\"")
        activate_tree: Tree = command["activate_status"]
        activate_status: str = activate_tree.children[0].strip("'\"")
        if activate_status.lower() in ["on", "off"]:
            print(f"Alter user {user_name} activate status, turn {activate_status.lower()}.")
            url = f"http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/activate"
            response = self.session.put(url, json={"activate_status": activate_status})
            res_json = response.json()
            if response.status_code == 200:
                print(res_json["message"])
            else:
                print(f"Fail to alter activate status, code: {res_json['code']}, message: {res_json['message']}")
        else:
            print(f"Unknown activate status: {activate_status}.")
    def _handle_list_datasets(self, command):
        username_tree: Tree = command["user_name"]
        user_name: str = username_tree.children[0].strip("'\"")
        print(f"Listing all datasets of user: {user_name}")
        url = f"http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/datasets"
        response = self.session.get(url)
        res_json = response.json()
        if response.status_code == 200:
            table_data = res_json["data"]
            for t in table_data:
                t.pop("avatar")
            self._print_table_simple(table_data)
        else:
            print(f"Fail to get all datasets of {user_name}, code: {res_json['code']}, message: {res_json['message']}")
    def _handle_list_agents(self, command):
        username_tree: Tree = command["user_name"]
        user_name: str = username_tree.children[0].strip("'\"")
        print(f"Listing all agents of user: {user_name}")
        url = f"http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/agents"
        response = self.session.get(url)
        res_json = response.json()
        if response.status_code == 200:
            table_data = res_json["data"]
            for t in table_data:
                t.pop("avatar")
            self._print_table_simple(table_data)
        else:
            print(f"Fail to get all agents of {user_name}, code: {res_json['code']}, message: {res_json['message']}")
    def _create_role(self, command):
        role_name_tree: Tree = command["role_name"]
        role_name: str = role_name_tree.children[0].strip("'\"")
        desc_str: str = ""
        if "description" in command:
            desc_tree: Tree = command["description"]
            desc_str = desc_tree.children[0].strip("'\"")
        print(f"create role name: {role_name}, description: {desc_str}")
        url = f"http://{self.host}:{self.port}/api/v1/admin/roles"
        response = self.session.post(url, json={"role_name": role_name, "description": desc_str})
        res_json = response.json()
        if response.status_code == 200:
            self._print_table_simple(res_json["data"])
        else:
            print(f"Fail to create role {role_name}, code: {res_json['code']}, message: {res_json['message']}")
    def _drop_role(self, command):
        role_name_tree: Tree = command["role_name"]
        role_name: str = role_name_tree.children[0].strip("'\"")
        print(f"drop role name: {role_name}")
        url = f"http://{self.host}:{self.port}/api/v1/admin/roles/{role_name}"
        response = self.session.delete(url)
        res_json = response.json()
        if response.status_code == 200:
            self._print_table_simple(res_json["data"])
        else:
            print(f"Fail to drop role {role_name}, code: {res_json['code']}, message: {res_json['message']}")
    def _alter_role(self, command):
        role_name_tree: Tree = command["role_name"]
        role_name: str = role_name_tree.children[0].strip("'\"")
        desc_tree: Tree = command["description"]
        desc_str: str = desc_tree.children[0].strip("'\"")
        print(f"alter role name: {role_name}, description: {desc_str}")
        url = f"http://{self.host}:{self.port}/api/v1/admin/roles/{role_name}"
        response = self.session.put(url, json={"description": desc_str})
        res_json = response.json()
        if response.status_code == 200:
            self._print_table_simple(res_json["data"])
        else:
            print(f"Fail to update role {role_name} with description: {desc_str}, code: {res_json['code']}, message: {res_json['message']}")
    def _list_roles(self, command):
        print("Listing all roles")
        url = f"http://{self.host}:{self.port}/api/v1/admin/roles"
        response = self.session.get(url)
        res_json = response.json()
        if response.status_code == 200:
            self._print_table_simple(res_json["data"])
        else:
            print(f"Fail to list roles, code: {res_json['code']}, message: {res_json['message']}")
    def _show_role(self, command):
        role_name_tree: Tree = command["role_name"]
        role_name: str = role_name_tree.children[0].strip("'\"")
        print(f"show role: {role_name}")
        url = f"http://{self.host}:{self.port}/api/v1/admin/roles/{role_name}/permission"
        response = self.session.get(url)
        res_json = response.json()
        if response.status_code == 200:
            self._print_table_simple(res_json["data"])
        else:
            print(f"Fail to list roles, code: {res_json['code']}, message: {res_json['message']}")
    def _grant_permission(self, command):
        role_name_tree: Tree = command["role_name"]
        role_name_str: str = role_name_tree.children[0].strip("'\"")
        resource_tree: Tree = command["resource"]
        resource_str: str = resource_tree.children[0].strip("'\"")
        action_tree_list: list = command["actions"]
        actions: list = []
        for action_tree in action_tree_list:
            action_str: str = action_tree.children[0].strip("'\"")
            actions.append(action_str)
        print(f"grant role_name: {role_name_str}, resource: {resource_str}, actions: {actions}")
        url = f"http://{self.host}:{self.port}/api/v1/admin/roles/{role_name_str}/permission"
        response = self.session.post(url, json={"actions": actions, "resource": resource_str})
        res_json = response.json()
        if response.status_code == 200:
            self._print_table_simple(res_json["data"])
        else:
            print(f"Fail to grant role {role_name_str} with {actions} on {resource_str}, code: {res_json['code']}, message: {res_json['message']}")
    def _revoke_permission(self, command):
        role_name_tree: Tree = command["role_name"]
        role_name_str: str = role_name_tree.children[0].strip("'\"")
        resource_tree: Tree = command["resource"]
        resource_str: str = resource_tree.children[0].strip("'\"")
        action_tree_list: list = command["actions"]
        actions: list = []
        for action_tree in action_tree_list:
            action_str: str = action_tree.children[0].strip("'\"")
            actions.append(action_str)
        print(f"revoke role_name: {role_name_str}, resource: {resource_str}, actions: {actions}")
        url = f"http://{self.host}:{self.port}/api/v1/admin/roles/{role_name_str}/permission"
        response = self.session.delete(url, json={"actions": actions, "resource": resource_str})
        res_json = response.json()
        if response.status_code == 200:
            self._print_table_simple(res_json["data"])
        else:
            print(f"Fail to revoke role {role_name_str} with {actions} on {resource_str}, code: {res_json['code']}, message: {res_json['message']}")
    def _alter_user_role(self, command):
        role_name_tree: Tree = command["role_name"]
        role_name_str: str = role_name_tree.children[0].strip("'\"")
        user_name_tree: Tree = command["user_name"]
        user_name_str: str = user_name_tree.children[0].strip("'\"")
        print(f"alter_user_role user_name: {user_name_str}, role_name: {role_name_str}")
        url = f"http://{self.host}:{self.port}/api/v1/admin/users/{user_name_str}/role"
        response = self.session.put(url, json={"role_name": role_name_str})
        res_json = response.json()
        if response.status_code == 200:
            self._print_table_simple(res_json["data"])
        else:
            print(f"Fail to alter user: {user_name_str} to role {role_name_str}, code: {res_json['code']}, message: {res_json['message']}")
    def _show_user_permission(self, command):
        user_name_tree: Tree = command["user_name"]
        user_name_str: str = user_name_tree.children[0].strip("'\"")
        print(f"show_user_permission user_name: {user_name_str}")
        url = f"http://{self.host}:{self.port}/api/v1/admin/users/{user_name_str}/permission"
        response = self.session.get(url)
        res_json = response.json()
        if response.status_code == 200:
            self._print_table_simple(res_json["data"])
        else:
            print(f"Fail to show user: {user_name_str} permission, code: {res_json['code']}, message: {res_json['message']}")
    def _show_version(self, command):
        print("show_version")
        url = f"http://{self.host}:{self.port}/api/v1/admin/version"
        response = self.session.get(url)
        res_json = response.json()
        if response.status_code == 200:
            self._print_table_simple(res_json["data"])
        else:
            print(f"Fail to show version, code: {res_json['code']}, message: {res_json['message']}")
    def _handle_meta_command(self, command):
        meta_command = command["command"]
        args = command.get("args", [])
        if meta_command in ["?", "h", "help"]:
            self.show_help()
        elif meta_command in ["q", "quit", "exit"]:
            print("Goodbye!")
        else:
            print(f"Meta command '{meta_command}' with args {args}")
    def show_help(self):
        """Help info"""
        help_text = """
 Commands:
  LIST SERVICES
  SHOW SERVICE <service>
  STARTUP SERVICE <service>
  SHUTDOWN SERVICE <service>
  RESTART SERVICE <service>
  LIST USERS
  SHOW USER <user>
  DROP USER <user>
  CREATE USER <user> <password>
  ALTER USER PASSWORD <user> <new_password>
  ALTER USER ACTIVE <user> <on/off>
  LIST DATASETS OF <user>
  LIST AGENTS OF <user>
 Meta Commands:
  \\?, \\h, \\help     Show this help
  \\q, \\quit, \\exit   Quit the CLI
        """
        print(help_text)
 def main():
    import sys
    cli = AdminCLI()
    args = cli.parse_connection_args(sys.argv)
    if "error" in args:
        print("Error: Invalid connection arguments")
        return
    if "command" in args:
        if "password" not in args:
            print("Error: password is missing")
            return
        if cli.verify_admin(args, single_command=True):
            command: str = args["command"]
            # print(f"Run single command: {command}")
            cli.run_single_command(command)
    else:
        if cli.verify_admin(args, single_command=False):
            print(r"""
                ____  ___   ______________                 ___       __          _     
               / __ \/   | / ____/ ____/ /___ _      __   /   | ____/ /___ ___  (_)___ 
              / /_/ / /| |/ / __/ /_  / / __ \ | /| / /  / /| |/ __  / __ `__ \/ / __ \
             / _, _/ ___ / /_/ / __/ / / /_/ / |/ |/ /  / ___ / /_/ / / / / / / / / / /
            /_/ |_/_/  |_\____/_/   /_/\____/|__/|__/  /_/  |_\__,_/_/ /_/ /_/_/_/ /_/ 
            """)
            cli.cmdloop()
 if __name__ == "__main__":
    main()
--- a/admin/client/http_client.py
+++ b/admin/client/http_client.py
@ -0,0 +1,182 @@
 #
 #  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 import time
 import json
 import typing
 from typing import Any, Dict, Optional
 import requests
 # from requests.sessions import HTTPAdapter
 class HttpClient:
    def __init__(
            self,
            host: str = "127.0.0.1",
            port: int = 9381,
            api_version: str = "v1",
            api_key: Optional[str] = None,
            connect_timeout: float = 5.0,
            read_timeout: float = 60.0,
            verify_ssl: bool = False,
    ) -> None:
        self.host = host
        self.port = port
        self.api_version = api_version
        self.api_key = api_key
        self.login_token: str | None = None
        self.connect_timeout = connect_timeout
        self.read_timeout = read_timeout
        self.verify_ssl = verify_ssl
    def api_base(self) -> str:
        return f"{self.host}:{self.port}/api/{self.api_version}"
    def non_api_base(self) -> str:
        return f"{self.host}:{self.port}/{self.api_version}"
    def build_url(self, path: str, use_api_base: bool = True) -> str:
        base = self.api_base() if use_api_base else self.non_api_base()
        if self.verify_ssl:
            return f"https://{base}/{path.lstrip('/')}"
        else:
            return f"http://{base}/{path.lstrip('/')}"
    def _headers(self, auth_kind: Optional[str], extra: Optional[Dict[str, str]]) -> Dict[str, str]:
        headers = {}
        if auth_kind == "api" and self.api_key:
            headers["Authorization"] = f"Bearer {self.api_key}"
        elif auth_kind == "web" and self.login_token:
            headers["Authorization"] = self.login_token
        elif auth_kind == "admin" and self.login_token:
            headers["Authorization"] = self.login_token
        else:
            pass
        if extra:
            headers.update(extra)
        return headers
    def request(
            self,
            method: str,
            path: str,
            *,
            use_api_base: bool = True,
            auth_kind: Optional[str] = "api",
            headers: Optional[Dict[str, str]] = None,
            json_body: Optional[Dict[str, Any]] = None,
            data: Any = None,
            files: Any = None,
            params: Optional[Dict[str, Any]] = None,
            stream: bool = False,
            iterations: int = 1,
    ) -> requests.Response | dict:
        url = self.build_url(path, use_api_base=use_api_base)
        merged_headers = self._headers(auth_kind, headers)
        # timeout: Tuple[float, float] = (self.connect_timeout, self.read_timeout)
        session = requests.Session()
        # adapter = HTTPAdapter(pool_connections=100, pool_maxsize=100)
        # session.mount("http://", adapter)
        http_function = typing.Any
        match method:
            case "GET":
                http_function = session.get
            case "POST":
                http_function = session.post
            case "PUT":
                http_function = session.put
            case "DELETE":
                http_function = session.delete
            case "PATCH":
                http_function = session.patch
            case _:
                raise ValueError(f"Invalid HTTP method: {method}")
        if iterations > 1:
            response_list = []
            total_duration = 0.0
            for _ in range(iterations):
                start_time = time.perf_counter()
                response = http_function(url, headers=merged_headers, json=json_body, data=data, stream=stream)
                # response = session.get(url, headers=merged_headers, json=json_body, data=data, stream=stream)
                # response = requests.request(
                #     method=method,
                #     url=url,
                #     headers=merged_headers,
                #     json=json_body,
                #     data=data,
                #     files=files,
                #     params=params,
                #     stream=stream,
                #     verify=self.verify_ssl,
                # )
                end_time = time.perf_counter()
                total_duration += end_time - start_time
                response_list.append(response)
            return {"duration": total_duration, "response_list": response_list}
        else:
            return http_function(url, headers=merged_headers, json=json_body, data=data, stream=stream)
            # return session.get(url, headers=merged_headers, json=json_body, data=data, stream=stream)
            # return requests.request(
            #     method=method,
            #     url=url,
            #     headers=merged_headers,
            #     json=json_body,
            #     data=data,
            #     files=files,
            #     params=params,
            #     stream=stream,
            #     verify=self.verify_ssl,
            # )
    def request_json(
            self,
            method: str,
            path: str,
            *,
            use_api_base: bool = True,
            auth_kind: Optional[str] = "api",
            headers: Optional[Dict[str, str]] = None,
            json_body: Optional[Dict[str, Any]] = None,
            data: Any = None,
            files: Any = None,
            params: Optional[Dict[str, Any]] = None,
            stream: bool = False,
    ) -> Dict[str, Any]:
        response = self.request(
            method,
            path,
            use_api_base=use_api_base,
            auth_kind=auth_kind,
            headers=headers,
            json_body=json_body,
            data=data,
            files=files,
            params=params,
            stream=stream,
        )
        try:
            return response.json()
        except Exception as exc:
            raise ValueError(f"Non-JSON response from {path}: {exc}") from exc
    @staticmethod
    def parse_json_bytes(raw: bytes) -> Dict[str, Any]:
        try:
            return json.loads(raw.decode("utf-8"))
        except Exception as exc:
            raise ValueError(f"Invalid JSON payload: {exc}") from exc
--- a/admin/client/parser.py
+++ b/admin/client/parser.py
@ -0,0 +1,623 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 from lark import Transformer
 GRAMMAR = r"""
 start: command
 command: sql_command | meta_command
 sql_command: login_user
           | ping_server
           | list_services
           | show_service
           | startup_service
           | shutdown_service
           | restart_service
           | register_user
           | list_users
           | show_user
           | drop_user
           | alter_user
           | create_user
           | activate_user
           | list_datasets
           | list_agents
           | create_role
           | drop_role
           | alter_role
           | list_roles
           | show_role
           | grant_permission
           | revoke_permission
           | alter_user_role
           | show_user_permission
           | show_version
           | grant_admin
           | revoke_admin
           | set_variable
           | show_variable
           | list_variables
           | list_configs
           | list_environments
           | generate_key
           | list_keys
           | drop_key
           | show_current_user
           | set_default_llm
           | set_default_vlm
           | set_default_embedding
           | set_default_reranker
           | set_default_asr
           | set_default_tts
           | reset_default_llm
           | reset_default_vlm
           | reset_default_embedding
           | reset_default_reranker
           | reset_default_asr
           | reset_default_tts
           | create_model_provider
           | drop_model_provider
           | create_user_dataset_with_parser
           | create_user_dataset_with_pipeline
           | drop_user_dataset
           | list_user_datasets
           | list_user_dataset_files
           | list_user_agents
           | list_user_chats
           | create_user_chat
           | drop_user_chat
           | list_user_model_providers
           | list_user_default_models
           | parse_dataset_docs
           | parse_dataset_sync
           | parse_dataset_async
           | import_docs_into_dataset
           | search_on_datasets
           | benchmark
 // meta command definition
 meta_command: "\\" meta_command_name [meta_args]
 meta_command_name: /[a-zA-Z?]+/
 meta_args: (meta_arg)+
 meta_arg: /[^\\s"']+/ | quoted_string
 // command definition
 LOGIN: "LOGIN"i
 REGISTER: "REGISTER"i
 LIST: "LIST"i
 SERVICES: "SERVICES"i
 SHOW: "SHOW"i
 CREATE: "CREATE"i
 SERVICE: "SERVICE"i
 SHUTDOWN: "SHUTDOWN"i
 STARTUP: "STARTUP"i
 RESTART: "RESTART"i
 USERS: "USERS"i
 DROP: "DROP"i
 USER: "USER"i
 ALTER: "ALTER"i
 ACTIVE: "ACTIVE"i
 ADMIN: "ADMIN"i
 PASSWORD: "PASSWORD"i
 DATASET: "DATASET"i
 DATASETS: "DATASETS"i
 OF: "OF"i
 AGENTS: "AGENTS"i
 ROLE: "ROLE"i
 ROLES: "ROLES"i
 DESCRIPTION: "DESCRIPTION"i
 GRANT: "GRANT"i
 REVOKE: "REVOKE"i
 ALL: "ALL"i
 PERMISSION: "PERMISSION"i
 TO: "TO"i
 FROM: "FROM"i
 FOR: "FOR"i
 RESOURCES: "RESOURCES"i
 ON: "ON"i
 SET: "SET"i
 RESET: "RESET"i
 VERSION: "VERSION"i
 VAR: "VAR"i
 VARS: "VARS"i
 CONFIGS: "CONFIGS"i
 ENVS: "ENVS"i
 KEY: "KEY"i
 KEYS: "KEYS"i
 GENERATE: "GENERATE"i
 MODEL: "MODEL"i
 MODELS: "MODELS"i
 PROVIDER: "PROVIDER"i
 PROVIDERS: "PROVIDERS"i
 DEFAULT: "DEFAULT"i
 CHATS: "CHATS"i
 CHAT: "CHAT"i
 FILES: "FILES"i
 AS: "AS"i
 PARSE: "PARSE"i
 IMPORT: "IMPORT"i
 INTO: "INTO"i
 WITH: "WITH"i
 PARSER: "PARSER"i
 PIPELINE: "PIPELINE"i
 SEARCH: "SEARCH"i
 CURRENT: "CURRENT"i
 LLM: "LLM"i
 VLM: "VLM"i
 EMBEDDING: "EMBEDDING"i
 RERANKER: "RERANKER"i
 ASR: "ASR"i
 TTS: "TTS"i
 ASYNC: "ASYNC"i
 SYNC: "SYNC"i
 BENCHMARK: "BENCHMARK"i
 PING: "PING"i
 login_user: LOGIN USER quoted_string ";"
 list_services: LIST SERVICES ";"
 show_service: SHOW SERVICE NUMBER ";"
 startup_service: STARTUP SERVICE NUMBER ";"
 shutdown_service: SHUTDOWN SERVICE NUMBER ";"
 restart_service: RESTART SERVICE NUMBER ";"
 register_user: REGISTER USER quoted_string AS quoted_string PASSWORD quoted_string ";"
 list_users: LIST USERS ";"
 drop_user: DROP USER quoted_string ";"
 alter_user: ALTER USER PASSWORD quoted_string quoted_string ";"
 show_user: SHOW USER quoted_string ";"
 create_user: CREATE USER quoted_string quoted_string ";"
 activate_user: ALTER USER ACTIVE quoted_string status ";"
 list_datasets: LIST DATASETS OF quoted_string ";"
 list_agents: LIST AGENTS OF quoted_string ";"
 create_role: CREATE ROLE identifier [DESCRIPTION quoted_string] ";"
 drop_role: DROP ROLE identifier ";"
 alter_role: ALTER ROLE identifier SET DESCRIPTION quoted_string ";"
 list_roles: LIST ROLES ";"
 show_role: SHOW ROLE identifier ";"
 grant_permission: GRANT identifier_list ON identifier TO ROLE identifier ";"
 revoke_permission: REVOKE identifier_list ON identifier FROM ROLE identifier ";"
 alter_user_role: ALTER USER quoted_string SET ROLE identifier ";"
 show_user_permission: SHOW USER PERMISSION quoted_string ";"
 show_version: SHOW VERSION ";"
 grant_admin: GRANT ADMIN quoted_string ";"
 revoke_admin: REVOKE ADMIN quoted_string ";"
 generate_key: GENERATE KEY FOR USER quoted_string ";"
 list_keys: LIST KEYS OF quoted_string ";"
 drop_key: DROP KEY quoted_string OF quoted_string ";"
 set_variable: SET VAR identifier identifier ";"
 show_variable: SHOW VAR identifier ";"
 list_variables: LIST VARS ";"
 list_configs: LIST CONFIGS ";"
 list_environments: LIST ENVS ";"
 benchmark: BENCHMARK NUMBER NUMBER user_statement
 user_statement: ping_server
                | show_current_user
                | create_model_provider
                | drop_model_provider
                | set_default_llm
                | set_default_vlm
                | set_default_embedding
                | set_default_reranker
                | set_default_asr
                | set_default_tts
                | reset_default_llm
                | reset_default_vlm
                | reset_default_embedding
                | reset_default_reranker
                | reset_default_asr
                | reset_default_tts
                | create_user_dataset_with_parser
                | create_user_dataset_with_pipeline
                | drop_user_dataset
                | list_user_datasets
                | list_user_dataset_files
                | list_user_agents
                | list_user_chats
                | create_user_chat
                | drop_user_chat
                | list_user_model_providers
                | list_user_default_models
                | import_docs_into_dataset
                | search_on_datasets
 ping_server: PING ";"
 show_current_user: SHOW CURRENT USER ";"
 create_model_provider: CREATE MODEL PROVIDER quoted_string quoted_string ";"
 drop_model_provider: DROP MODEL PROVIDER quoted_string ";"
 set_default_llm: SET DEFAULT LLM quoted_string ";"
 set_default_vlm: SET DEFAULT VLM quoted_string ";"
 set_default_embedding: SET DEFAULT EMBEDDING quoted_string ";"
 set_default_reranker: SET DEFAULT RERANKER quoted_string ";"
 set_default_asr: SET DEFAULT ASR quoted_string ";"
 set_default_tts: SET DEFAULT TTS quoted_string ";"
 reset_default_llm: RESET DEFAULT LLM ";"
 reset_default_vlm: RESET DEFAULT VLM ";"
 reset_default_embedding: RESET DEFAULT EMBEDDING ";"
 reset_default_reranker: RESET DEFAULT RERANKER ";"
 reset_default_asr: RESET DEFAULT ASR ";"
 reset_default_tts: RESET DEFAULT TTS ";"
 list_user_datasets: LIST DATASETS ";"
 create_user_dataset_with_parser: CREATE DATASET quoted_string WITH EMBEDDING quoted_string PARSER quoted_string ";" 
 create_user_dataset_with_pipeline: CREATE DATASET quoted_string WITH EMBEDDING quoted_string PIPELINE quoted_string ";" 
 drop_user_dataset: DROP DATASET quoted_string ";"
 list_user_dataset_files: LIST FILES OF DATASET quoted_string ";"
 list_user_agents: LIST AGENTS ";"
 list_user_chats: LIST CHATS ";"
 create_user_chat: CREATE CHAT quoted_string ";"
 drop_user_chat: DROP CHAT quoted_string ";"
 list_user_model_providers: LIST MODEL PROVIDERS ";"
 list_user_default_models: LIST DEFAULT MODELS ";"
 import_docs_into_dataset: IMPORT quoted_string INTO DATASET quoted_string ";"
 search_on_datasets: SEARCH quoted_string ON DATASETS quoted_string ";"
 parse_dataset_docs: PARSE quoted_string OF DATASET quoted_string ";"
 parse_dataset_sync: PARSE DATASET quoted_string SYNC ";"
 parse_dataset_async: PARSE DATASET quoted_string ASYNC ";"
 identifier_list: identifier ("," identifier)*
 identifier: WORD
 quoted_string: QUOTED_STRING
 status: WORD
 QUOTED_STRING: /'[^']+'/ | /"[^"]+"/
 WORD: /[a-zA-Z0-9_\-\.]+/
 NUMBER: /[0-9]+/
 %import common.WS
 %ignore WS
 """
 class RAGFlowCLITransformer(Transformer):
    def start(self, items):
        return items[0]
    def command(self, items):
        return items[0]
    def login_user(self, items):
        email = items[2].children[0].strip("'\"")
        return {"type": "login_user", "email": email}
    def ping_server(self, items):
        return {"type": "ping_server"}
    def list_services(self, items):
        result = {"type": "list_services"}
        return result
    def show_service(self, items):
        service_id = int(items[2])
        return {"type": "show_service", "number": service_id}
    def startup_service(self, items):
        service_id = int(items[2])
        return {"type": "startup_service", "number": service_id}
    def shutdown_service(self, items):
        service_id = int(items[2])
        return {"type": "shutdown_service", "number": service_id}
    def restart_service(self, items):
        service_id = int(items[2])
        return {"type": "restart_service", "number": service_id}
    def register_user(self, items):
        user_name: str = items[2].children[0].strip("'\"")
        nickname: str = items[4].children[0].strip("'\"")
        password: str = items[6].children[0].strip("'\"")
        return {"type": "register_user", "user_name": user_name, "nickname": nickname, "password": password}
    def list_users(self, items):
        return {"type": "list_users"}
    def show_user(self, items):
        user_name = items[2]
        return {"type": "show_user", "user_name": user_name}
    def drop_user(self, items):
        user_name = items[2]
        return {"type": "drop_user", "user_name": user_name}
    def alter_user(self, items):
        user_name = items[3]
        new_password = items[4]
        return {"type": "alter_user", "user_name": user_name, "password": new_password}
    def create_user(self, items):
        user_name = items[2]
        password = items[3]
        return {"type": "create_user", "user_name": user_name, "password": password, "role": "user"}
    def activate_user(self, items):
        user_name = items[3]
        activate_status = items[4]
        return {"type": "activate_user", "activate_status": activate_status, "user_name": user_name}
    def list_datasets(self, items):
        user_name = items[3]
        return {"type": "list_datasets", "user_name": user_name}
    def list_agents(self, items):
        user_name = items[3]
        return {"type": "list_agents", "user_name": user_name}
    def create_role(self, items):
        role_name = items[2]
        if len(items) > 4:
            description = items[4]
            return {"type": "create_role", "role_name": role_name, "description": description}
        else:
            return {"type": "create_role", "role_name": role_name}
    def drop_role(self, items):
        role_name = items[2]
        return {"type": "drop_role", "role_name": role_name}
    def alter_role(self, items):
        role_name = items[2]
        description = items[5]
        return {"type": "alter_role", "role_name": role_name, "description": description}
    def list_roles(self, items):
        return {"type": "list_roles"}
    def show_role(self, items):
        role_name = items[2]
        return {"type": "show_role", "role_name": role_name}
    def grant_permission(self, items):
        action_list = items[1]
        resource = items[3]
        role_name = items[6]
        return {"type": "grant_permission", "role_name": role_name, "resource": resource, "actions": action_list}
    def revoke_permission(self, items):
        action_list = items[1]
        resource = items[3]
        role_name = items[6]
        return {"type": "revoke_permission", "role_name": role_name, "resource": resource, "actions": action_list}
    def alter_user_role(self, items):
        user_name = items[2]
        role_name = items[5]
        return {"type": "alter_user_role", "user_name": user_name, "role_name": role_name}
    def show_user_permission(self, items):
        user_name = items[3]
        return {"type": "show_user_permission", "user_name": user_name}
    def show_version(self, items):
        return {"type": "show_version"}
    def grant_admin(self, items):
        user_name = items[2]
        return {"type": "grant_admin", "user_name": user_name}
    def revoke_admin(self, items):
        user_name = items[2]
        return {"type": "revoke_admin", "user_name": user_name}
    def generate_key(self, items):
        user_name = items[4]
        return {"type": "generate_key", "user_name": user_name}
    def list_keys(self, items):
        user_name = items[3]
        return {"type": "list_keys", "user_name": user_name}
    def drop_key(self, items):
        key = items[2]
        user_name = items[4]
        return {"type": "drop_key", "key": key, "user_name": user_name}
    def set_variable(self, items):
        var_name = items[2]
        var_value = items[3]
        return {"type": "set_variable", "var_name": var_name, "var_value": var_value}
    def show_variable(self, items):
        var_name = items[2]
        return {"type": "show_variable", "var_name": var_name}
    def list_variables(self, items):
        return {"type": "list_variables"}
    def list_configs(self, items):
        return {"type": "list_configs"}
    def list_environments(self, items):
        return {"type": "list_environments"}
    def create_model_provider(self, items):
        provider_name = items[3].children[0].strip("'\"")
        provider_key = items[4].children[0].strip("'\"")
        return {"type": "create_model_provider", "provider_name": provider_name, "provider_key": provider_key}
    def drop_model_provider(self, items):
        provider_name = items[3].children[0].strip("'\"")
        return {"type": "drop_model_provider", "provider_name": provider_name}
    def show_current_user(self, items):
        return {"type": "show_current_user"}
    def set_default_llm(self, items):
        llm_id = items[3].children[0].strip("'\"")
        return {"type": "set_default_model", "model_type": "llm_id", "model_id": llm_id}
    def set_default_vlm(self, items):
        vlm_id = items[3].children[0].strip("'\"")
        return {"type": "set_default_model", "model_type": "img2txt_id", "model_id": vlm_id}
    def set_default_embedding(self, items):
        embedding_id = items[3].children[0].strip("'\"")
        return {"type": "set_default_model", "model_type": "embd_id", "model_id": embedding_id}
    def set_default_reranker(self, items):
        reranker_id = items[3].children[0].strip("'\"")
        return {"type": "set_default_model", "model_type": "reranker_id", "model_id": reranker_id}
    def set_default_asr(self, items):
        asr_id = items[3].children[0].strip("'\"")
        return {"type": "set_default_model", "model_type": "asr_id", "model_id": asr_id}
    def set_default_tts(self, items):
        tts_id = items[3].children[0].strip("'\"")
        return {"type": "set_default_model", "model_type": "tts_id", "model_id": tts_id}
    def reset_default_llm(self, items):
        return {"type": "reset_default_model", "model_type": "llm_id"}
    def reset_default_vlm(self, items):
        return {"type": "reset_default_model", "model_type": "img2txt_id"}
    def reset_default_embedding(self, items):
        return {"type": "reset_default_model", "model_type": "embd_id"}
    def reset_default_reranker(self, items):
        return {"type": "reset_default_model", "model_type": "reranker_id"}
    def reset_default_asr(self, items):
        return {"type": "reset_default_model", "model_type": "asr_id"}
    def reset_default_tts(self, items):
        return {"type": "reset_default_model", "model_type": "tts_id"}
    def list_user_datasets(self, items):
        return {"type": "list_user_datasets"}
    def create_user_dataset_with_parser(self, items):
        dataset_name = items[2].children[0].strip("'\"")
        embedding = items[5].children[0].strip("'\"")
        parser_type = items[7].children[0].strip("'\"")
        return {"type": "create_user_dataset", "dataset_name": dataset_name, "embedding": embedding,
                "parser_type": parser_type}
    def create_user_dataset_with_pipeline(self, items):
        dataset_name = items[2].children[0].strip("'\"")
        embedding = items[5].children[0].strip("'\"")
        pipeline = items[7].children[0].strip("'\"")
        return {"type": "create_user_dataset", "dataset_name": dataset_name, "embedding": embedding,
                "pipeline": pipeline}
    def drop_user_dataset(self, items):
        dataset_name = items[2].children[0].strip("'\"")
        return {"type": "drop_user_dataset", "dataset_name": dataset_name}
    def list_user_dataset_files(self, items):
        dataset_name = items[4].children[0].strip("'\"")
        return {"type": "list_user_dataset_files", "dataset_name": dataset_name}
    def list_user_agents(self, items):
        return {"type": "list_user_agents"}
    def list_user_chats(self, items):
        return {"type": "list_user_chats"}
    def create_user_chat(self, items):
        chat_name = items[2].children[0].strip("'\"")
        return {"type": "create_user_chat", "chat_name": chat_name}
    def drop_user_chat(self, items):
        chat_name = items[2].children[0].strip("'\"")
        return {"type": "drop_user_chat", "chat_name": chat_name}
    def list_user_model_providers(self, items):
        return {"type": "list_user_model_providers"}
    def list_user_default_models(self, items):
        return {"type": "list_user_default_models"}
    def parse_dataset_docs(self, items):
        document_list_str = items[1].children[0].strip("'\"")
        document_names = document_list_str.split(",")
        if len(document_names) == 1:
            document_names = document_names[0]
            document_names = document_names.split(" ")
        dataset_name = items[4].children[0].strip("'\"")
        return {"type": "parse_dataset_docs", "dataset_name": dataset_name, "document_names": document_names}
    def parse_dataset_sync(self, items):
        dataset_name = items[2].children[0].strip("'\"")
        return {"type": "parse_dataset", "dataset_name": dataset_name, "method": "sync"}
    def parse_dataset_async(self, items):
        dataset_name = items[2].children[0].strip("'\"")
        return {"type": "parse_dataset", "dataset_name": dataset_name, "method": "async"}
    def import_docs_into_dataset(self, items):
        document_list_str = items[1].children[0].strip("'\"")
        document_paths = document_list_str.split(",")
        if len(document_paths) == 1:
            document_paths = document_paths[0]
            document_paths = document_paths.split(" ")
        dataset_name = items[4].children[0].strip("'\"")
        return {"type": "import_docs_into_dataset", "dataset_name": dataset_name, "document_paths": document_paths}
    def search_on_datasets(self, items):
        question = items[1].children[0].strip("'\"")
        datasets_str = items[4].children[0].strip("'\"")
        datasets = datasets_str.split(",")
        if len(datasets) == 1:
            datasets = datasets[0]
            datasets = datasets.split(" ")
        return {"type": "search_on_datasets", "datasets": datasets, "question": question}
    def benchmark(self, items):
        concurrency: int = int(items[1])
        iterations: int = int(items[2])
        command = items[3].children[0]
        return {"type": "benchmark", "concurrency": concurrency, "iterations": iterations, "command": command}
    def action_list(self, items):
        return items
    def meta_command(self, items):
        command_name = str(items[0]).lower()
        args = items[1:] if len(items) > 1 else []
        # handle quoted parameter
        parsed_args = []
        for arg in args:
            if hasattr(arg, "value"):
                parsed_args.append(arg.value)
            else:
                parsed_args.append(str(arg))
        return {"type": "meta", "command": command_name, "args": parsed_args}
    def meta_command_name(self, items):
        return items[0]
    def meta_args(self, items):
        return items
--- a/admin/client/pyproject.toml
+++ b/admin/client/pyproject.toml
@ -20,5 +20,8 @@ test = [
    "requests-toolbelt>=1.0.0",
 ]
 [tool.setuptools]
 py-modules = ["ragflow_cli", "parser"]
 [project.scripts]
-ragflow-cli = "admin_client:main"
+ragflow-cli = "ragflow_cli:main"
--- a/admin/client/ragflow_cli.py
+++ b/admin/client/ragflow_cli.py
@ -0,0 +1,322 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 import sys
 import argparse
 import base64
 import getpass
 from cmd import Cmd
 from typing import Any, Dict, List
 import requests
 import warnings
 from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
 from Cryptodome.PublicKey import RSA
 from lark import Lark, Tree
 from parser import GRAMMAR, RAGFlowCLITransformer
 from http_client import HttpClient
 from ragflow_client import RAGFlowClient, run_command
 from user import login_user
 warnings.filterwarnings("ignore", category=getpass.GetPassWarning)
 def encrypt(input_string):
    pub = "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArq9XTUSeYr2+N1h3Afl/z8Dse/2yD0ZGrKwx+EEEcdsBLca9Ynmx3nIB5obmLlSfmskLpBo0UACBmB5rEjBp2Q2f3AG3Hjd4B+gNCG6BDaawuDlgANIhGnaTLrIqWrrcm4EMzJOnAOI1fgzJRsOOUEfaS318Eq9OVO3apEyCCt0lOQK6PuksduOjVxtltDav+guVAA068NrPYmRNabVKRNLJpL8w4D44sfth5RvZ3q9t+6RTArpEtc5sh5ChzvqPOzKGMXW83C95TxmXqpbK6olN4RevSfVjEAgCydH6HN6OhtOQEcnrU97r9H0iZOWwbw3pVrZiUkuRD1R56Wzs2wIDAQAB\n-----END PUBLIC KEY-----"
    pub_key = RSA.importKey(pub)
    cipher = Cipher_pkcs1_v1_5.new(pub_key)
    cipher_text = cipher.encrypt(base64.b64encode(input_string.encode("utf-8")))
    return base64.b64encode(cipher_text).decode("utf-8")
 def encode_to_base64(input_string):
    base64_encoded = base64.b64encode(input_string.encode("utf-8"))
    return base64_encoded.decode("utf-8")
 class RAGFlowCLI(Cmd):
    def __init__(self):
        super().__init__()
        self.parser = Lark(GRAMMAR, start="start", parser="lalr", transformer=RAGFlowCLITransformer())
        self.command_history = []
        self.account = "admin@ragflow.io"
        self.account_password: str = "admin"
        self.session = requests.Session()
        self.host: str = ""
        self.port: int = 0
        self.mode: str = "admin"
        self.ragflow_client = None
    intro = r"""Type "\h" for help."""
    prompt = "ragflow> "
    def onecmd(self, command: str) -> bool:
        try:
            result = self.parse_command(command)
            if isinstance(result, dict):
                if "type" in result and result.get("type") == "empty":
                    return False
            self.execute_command(result)
            if isinstance(result, Tree):
                return False
            if result.get("type") == "meta" and result.get("command") in ["q", "quit", "exit"]:
                return True
        except KeyboardInterrupt:
            print("\nUse '\\q' to quit")
        except EOFError:
            print("\nGoodbye!")
            return True
        return False
    def emptyline(self) -> bool:
        return False
    def default(self, line: str) -> bool:
        return self.onecmd(line)
    def parse_command(self, command_str: str) -> dict[str, str]:
        if not command_str.strip():
            return {"type": "empty"}
        self.command_history.append(command_str)
        try:
            result = self.parser.parse(command_str)
            return result
        except Exception as e:
            return {"type": "error", "message": f"Parse error: {str(e)}"}
    def verify_auth(self, arguments: dict, single_command: bool, auth: bool):
        server_type = arguments.get("type", "admin")
        http_client = HttpClient(arguments["host"], arguments["port"])
        if not auth:
            self.ragflow_client = RAGFlowClient(http_client, server_type)
            return True
        user_name = arguments["username"]
        attempt_count = 3
        if single_command:
            attempt_count = 1
        try_count = 0
        while True:
            try_count += 1
            if try_count > attempt_count:
                return False
            if single_command:
                user_password = arguments["password"]
            else:
                user_password = getpass.getpass(f"password for {user_name}: ").strip()
            try:
                token = login_user(http_client, server_type, user_name, user_password)
                http_client.login_token = token
                self.ragflow_client = RAGFlowClient(http_client, server_type)
                return True
            except Exception as e:
                print(str(e))
                print("Can't access server for login (connection failed)")
    def _format_service_detail_table(self, data):
        if isinstance(data, list):
            return data
        if not all([isinstance(v, list) for v in data.values()]):
            # normal table
            return data
        # handle task_executor heartbeats map, for example {'name': [{'done': 2, 'now': timestamp1}, {'done': 3, 'now': timestamp2}]
        task_executor_list = []
        for k, v in data.items():
            # display latest status
            heartbeats = sorted(v, key=lambda x: x["now"], reverse=True)
            task_executor_list.append(
                {
                    "task_executor_name": k,
                    **heartbeats[0],
                }
                if heartbeats
                else {"task_executor_name": k}
            )
        return task_executor_list
    def _print_table_simple(self, data):
        if not data:
            print("No data to print")
            return
        if isinstance(data, dict):
            # handle single row data
            data = [data]
        columns = list(set().union(*(d.keys() for d in data)))
        columns.sort()
        col_widths = {}
        def get_string_width(text):
            half_width_chars = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\t\n\r"
            width = 0
            for char in text:
                if char in half_width_chars:
                    width += 1
                else:
                    width += 2
            return width
        for col in columns:
            max_width = get_string_width(str(col))
            for item in data:
                value_len = get_string_width(str(item.get(col, "")))
                if value_len > max_width:
                    max_width = value_len
            col_widths[col] = max(2, max_width)
        # Generate delimiter
        separator = "+" + "+".join(["-" * (col_widths[col] + 2) for col in columns]) + "+"
        # Print header
        print(separator)
        header = "|" + "|".join([f" {col:<{col_widths[col]}} " for col in columns]) + "|"
        print(header)
        print(separator)
        # Print data
        for item in data:
            row = "|"
            for col in columns:
                value = str(item.get(col, ""))
                if get_string_width(value) > col_widths[col]:
                    value = value[: col_widths[col] - 3] + "..."
                row += f" {value:<{col_widths[col] - (get_string_width(value) - len(value))}} |"
            print(row)
        print(separator)
    def run_interactive(self, args):
        if self.verify_auth(args, single_command=False, auth=args["auth"]):
            print(r"""
                ____  ___   ______________                 ________    ____
               / __ \/   | / ____/ ____/ /___ _      __   / ____/ /   /  _/
              / /_/ / /| |/ / __/ /_  / / __ \ | /| / /  / /   / /    / /  
             / _, _/ ___ / /_/ / __/ / / /_/ / |/ |/ /  / /___/ /____/ /   
            /_/ |_/_/  |_\____/_/   /_/\____/|__/|__/   \____/_____/___/   
            """)
            self.cmdloop()
        print("RAGFlow command line interface - Type '\\?' for help, '\\q' to quit")
    def run_single_command(self, args):
        if self.verify_auth(args, single_command=True, auth=args["auth"]):
            command = args["command"]
            result = self.parse_command(command)
            self.execute_command(result)
    def parse_connection_args(self, args: List[str]) -> Dict[str, Any]:
        parser = argparse.ArgumentParser(description="RAGFlow CLI Client", add_help=False)
        parser.add_argument("-h", "--host", default="127.0.0.1", help="Admin or RAGFlow service host")
        parser.add_argument("-p", "--port", type=int, default=9381, help="Admin or RAGFlow service port")
        parser.add_argument("-w", "--password", default="admin", type=str, help="Superuser password")
        parser.add_argument("-t", "--type", default="admin", type=str, help="CLI mode, admin or user")
        parser.add_argument("-u", "--username", default=None,
                            help="Username (email). In admin mode defaults to admin@ragflow.io, in user mode required.")
        parser.add_argument("command", nargs="?", help="Single command")
        try:
            parsed_args, remaining_args = parser.parse_known_args(args)
            # Determine username based on mode
            username = parsed_args.username
            if parsed_args.type == "admin":
                if username is None:
                    username = "admin@ragflow.io"
            if remaining_args:
                if remaining_args[0] == "command":
                    command_str = ' '.join(remaining_args[1:]) + ';'
                    auth = True
                    if remaining_args[1] == "register":
                        auth = False
                    else:
                        if username is None:
                            print("Error: username (-u) is required in user mode")
                            return {"error": "Username required"}
                    return {
                        "host": parsed_args.host,
                        "port": parsed_args.port,
                        "password": parsed_args.password,
                        "type": parsed_args.type,
                        "username": username,
                        "command": command_str,
                        "auth": auth
                    }
                else:
                    return {"error": "Invalid command"}
            else:
                auth = True
                if username is None:
                    auth = False
                return {
                    "host": parsed_args.host,
                    "port": parsed_args.port,
                    "type": parsed_args.type,
                    "username": username,
                    "auth": auth
                }
        except SystemExit:
            return {"error": "Invalid connection arguments"}
    def execute_command(self, parsed_command: Dict[str, Any]):
        command_dict: dict
        if isinstance(parsed_command, Tree):
            command_dict = parsed_command.children[0]
        else:
            if parsed_command["type"] == "error":
                print(f"Error: {parsed_command['message']}")
                return
            else:
                command_dict = parsed_command
        # print(f"Parsed command: {command_dict}")
        run_command(self.ragflow_client, command_dict)
 def main():
    cli = RAGFlowCLI()
    args = cli.parse_connection_args(sys.argv)
    if "error" in args:
        print("Error: Invalid connection arguments")
        return
    if "command" in args:
        # single command mode
        # for user mode, api key or password is ok
        # for admin mode, only password
        if "password" not in args:
            print("Error: password is missing")
            return
        cli.run_single_command(args)
    else:
        cli.run_interactive(args)
 if __name__ == "__main__":
    main()
--- a/admin/client/ragflow_client.py
+++ b/admin/client/ragflow_client.py
--- a/admin/client/user.py
+++ b/admin/client/user.py
@ -0,0 +1,65 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 from http_client import HttpClient
 class AuthException(Exception):
    def __init__(self, message, code=401):
        super().__init__(message)
        self.code = code
        self.message = message
 def encrypt_password(password_plain: str) -> str:
    try:
        from api.utils.crypt import crypt
    except Exception as exc:
        raise AuthException(
            "Password encryption unavailable; install pycryptodomex (uv sync --python 3.12 --group test)."
        ) from exc
    return crypt(password_plain)
 def register_user(client: HttpClient, email: str, nickname: str, password: str) -> None:
    password_enc = encrypt_password(password)
    payload = {"email": email, "nickname": nickname, "password": password_enc}
    res = client.request_json("POST", "/user/register", use_api_base=False, auth_kind=None, json_body=payload)
    if res.get("code") == 0:
        return
    msg = res.get("message", "")
    if "has already registered" in msg:
        return
    raise AuthException(f"Register failed: {msg}")
 def login_user(client: HttpClient, server_type: str, email: str, password: str) -> str:
    password_enc = encrypt_password(password)
    payload = {"email": email, "password": password_enc}
    if server_type == "admin":
        response = client.request("POST", "/admin/login", use_api_base=True, auth_kind=None, json_body=payload)
    else:
        response = client.request("POST", "/user/login", use_api_base=False, auth_kind=None, json_body=payload)
    try:
        res = response.json()
    except Exception as exc:
        raise AuthException(f"Login failed: invalid JSON response ({exc})") from exc
    if res.get("code") != 0:
        raise AuthException(f"Login failed: {res.get('message')}")
    token = response.headers.get("Authorization")
    if not token:
        raise AuthException("Login failed: missing Authorization header")
    return token
--- a/admin/server/admin_server.py
+++ b/admin/server/admin_server.py
@ -14,10 +14,12 @@
 #  limitations under the License.
 #
 import time
 start_ts = time.time()
 import os
 import signal
 import logging
 import time
 import threading
 import traceback
 import faulthandler
@ -66,7 +68,7 @@ if __name__ == '__main__':
    SERVICE_CONFIGS.configs = load_configurations(SERVICE_CONF)
    try:
-        logging.info("RAGFlow Admin service start...")
+        logging.info(f"RAGFlow admin is ready after {time.time() - start_ts}s initialization.")
        run_simple(
            hostname="0.0.0.0",
            port=9381,
--- a/admin/server/routes.py
+++ b/admin/server/routes.py
@ -15,29 +15,34 @@
 #
 import secrets
 import logging
 from typing import Any
-from flask import Blueprint, request
+from common.time_utils import current_timestamp, datetime_format
 from datetime import datetime
 from flask import Blueprint, Response, request
 from flask_login import current_user, login_required, logout_user
 from auth import login_verify, login_admin, check_admin_auth
 from responses import success_response, error_response
-from services import UserMgr, ServiceMgr, UserServiceMgr
+from services import UserMgr, ServiceMgr, UserServiceMgr, SettingsMgr, ConfigMgr, EnvironmentsMgr, SandboxMgr
 from roles import RoleMgr
 from api.common.exceptions import AdminException
 from common.versions import get_ragflow_version
 from api.utils.api_utils import generate_confirmation_token
-admin_bp = Blueprint('admin', __name__, url_prefix='/api/v1/admin')
+admin_bp = Blueprint("admin", __name__, url_prefix="/api/v1/admin")
-@admin_bp.route('/ping', methods=['GET'])
+@admin_bp.route("/ping", methods=["GET"])
 def ping():
-    return success_response('PONG')
+    return success_response("PONG")
-@admin_bp.route('/login', methods=['POST'])
+@admin_bp.route("/login", methods=["POST"])
 def login():
    if not request.json:
-        return error_response('Authorize admin failed.' ,400)
+        return error_response("Authorize admin failed.", 400)
    try:
        email = request.json.get("email", "")
        password = request.json.get("password", "")
@ -46,7 +51,7 @@ def login():
        return error_response(str(e), 500)
-@admin_bp.route('/logout', methods=['GET'])
+@admin_bp.route("/logout", methods=["GET"])
@login_required
 def logout():
    try:
@ -58,7 +63,7 @@ def logout():
        return error_response(str(e), 500)
-@admin_bp.route('/auth', methods=['GET'])
+@admin_bp.route("/auth", methods=["GET"])
@login_verify
 def auth_admin():
    try:
@ -67,7 +72,7 @@ def auth_admin():
        return error_response(str(e), 500)
-@admin_bp.route('/users', methods=['GET'])
+@admin_bp.route("/users", methods=["GET"])
@login_required
@check_admin_auth
 def list_users():
@ -78,18 +83,18 @@ def list_users():
        return error_response(str(e), 500)
-@admin_bp.route('/users', methods=['POST'])
+@admin_bp.route("/users", methods=["POST"])
@login_required
@check_admin_auth
 def create_user():
    try:
        data = request.get_json()
-        if not data or 'username' not in data or 'password' not in data:
+        if not data or "username" not in data or "password" not in data:
            return error_response("Username and password are required", 400)
-        username = data['username']
+        username = data["username"]
-        password = data['password']
+        password = data["password"]
-        role = data.get('role', 'user')
+        role = data.get("role", "user")
        res = UserMgr.create_user(username, password, role)
        if res["success"]:
@ -105,7 +110,7 @@ def create_user():
        return error_response(str(e))
-@admin_bp.route('/users/<username>', methods=['DELETE'])
+@admin_bp.route("/users/<username>", methods=["DELETE"])
@login_required
@check_admin_auth
 def delete_user(username):
@ -122,16 +127,16 @@ def delete_user(username):
        return error_response(str(e), 500)
-@admin_bp.route('/users/<username>/password', methods=['PUT'])
+@admin_bp.route("/users/<username>/password", methods=["PUT"])
@login_required
@check_admin_auth
 def change_password(username):
    try:
        data = request.get_json()
-        if not data or 'new_password' not in data:
+        if not data or "new_password" not in data:
            return error_response("New password is required", 400)
-        new_password = data['new_password']
+        new_password = data["new_password"]
        msg = UserMgr.update_user_password(username, new_password)
        return success_response(None, msg)
@ -141,15 +146,15 @@ def change_password(username):
        return error_response(str(e), 500)
-@admin_bp.route('/users/<username>/activate', methods=['PUT'])
+@admin_bp.route("/users/<username>/activate", methods=["PUT"])
@login_required
@check_admin_auth
 def alter_user_activate_status(username):
    try:
        data = request.get_json()
-        if not data or 'activate_status' not in data:
+        if not data or "activate_status" not in data:
            return error_response("Activation status is required", 400)
-        activate_status = data['activate_status']
+        activate_status = data["activate_status"]
        msg = UserMgr.update_user_activate_status(username, activate_status)
        return success_response(None, msg)
    except AdminException as e:
@ -158,7 +163,39 @@ def alter_user_activate_status(username):
        return error_response(str(e), 500)
-@admin_bp.route('/users/<username>', methods=['GET'])
+@admin_bp.route("/users/<username>/admin", methods=["PUT"])
@login_required
@check_admin_auth
 def grant_admin(username):
    try:
        if current_user.email == username:
            return error_response(f"can't grant current user: {username}", 409)
        msg = UserMgr.grant_admin(username)
        return success_response(None, msg)
    except AdminException as e:
        return error_response(e.message, e.code)
    except Exception as e:
        return error_response(str(e), 500)
@admin_bp.route("/users/<username>/admin", methods=["DELETE"])
@login_required
@check_admin_auth
 def revoke_admin(username):
    try:
        if current_user.email == username:
            return error_response(f"can't grant current user: {username}", 409)
        msg = UserMgr.revoke_admin(username)
        return success_response(None, msg)
    except AdminException as e:
        return error_response(e.message, e.code)
    except Exception as e:
        return error_response(str(e), 500)
@admin_bp.route("/users/<username>", methods=["GET"])
@login_required
@check_admin_auth
 def get_user_details(username):
@ -172,7 +209,7 @@ def get_user_details(username):
        return error_response(str(e), 500)
-@admin_bp.route('/users/<username>/datasets', methods=['GET'])
+@admin_bp.route("/users/<username>/datasets", methods=["GET"])
@login_required
@check_admin_auth
 def get_user_datasets(username):
@ -186,7 +223,7 @@ def get_user_datasets(username):
        return error_response(str(e), 500)
-@admin_bp.route('/users/<username>/agents', methods=['GET'])
+@admin_bp.route("/users/<username>/agents", methods=["GET"])
@login_required
@check_admin_auth
 def get_user_agents(username):
@ -200,7 +237,7 @@ def get_user_agents(username):
        return error_response(str(e), 500)
-@admin_bp.route('/services', methods=['GET'])
+@admin_bp.route("/services", methods=["GET"])
@login_required
@check_admin_auth
 def get_services():
@ -211,7 +248,7 @@ def get_services():
        return error_response(str(e), 500)
-@admin_bp.route('/service_types/<service_type>', methods=['GET'])
+@admin_bp.route("/service_types/<service_type>", methods=["GET"])
@login_required
@check_admin_auth
 def get_services_by_type(service_type_str):
@ -222,7 +259,7 @@ def get_services_by_type(service_type_str):
        return error_response(str(e), 500)
-@admin_bp.route('/services/<service_id>', methods=['GET'])
+@admin_bp.route("/services/<service_id>", methods=["GET"])
@login_required
@check_admin_auth
 def get_service(service_id):
@ -233,7 +270,7 @@ def get_service(service_id):
        return error_response(str(e), 500)
-@admin_bp.route('/services/<service_id>', methods=['DELETE'])
+@admin_bp.route("/services/<service_id>", methods=["DELETE"])
@login_required
@check_admin_auth
 def shutdown_service(service_id):
@ -244,7 +281,7 @@ def shutdown_service(service_id):
        return error_response(str(e), 500)
-@admin_bp.route('/services/<service_id>', methods=['PUT'])
+@admin_bp.route("/services/<service_id>", methods=["PUT"])
@login_required
@check_admin_auth
 def restart_service(service_id):
@ -255,38 +292,38 @@ def restart_service(service_id):
        return error_response(str(e), 500)
-@admin_bp.route('/roles', methods=['POST'])
+@admin_bp.route("/roles", methods=["POST"])
@login_required
@check_admin_auth
 def create_role():
    try:
        data = request.get_json()
-        if not data or 'role_name' not in data:
+        if not data or "role_name" not in data:
            return error_response("Role name is required", 400)
-        role_name: str = data['role_name']
+        role_name: str = data["role_name"]
-        description: str = data['description']
+        description: str = data["description"]
        res = RoleMgr.create_role(role_name, description)
        return success_response(res)
    except Exception as e:
        return error_response(str(e), 500)
-@admin_bp.route('/roles/<role_name>', methods=['PUT'])
+@admin_bp.route("/roles/<role_name>", methods=["PUT"])
@login_required
@check_admin_auth
 def update_role(role_name: str):
    try:
        data = request.get_json()
-        if not data or 'description' not in data:
+        if not data or "description" not in data:
            return error_response("Role description is required", 400)
-        description: str = data['description']
+        description: str = data["description"]
        res = RoleMgr.update_role_description(role_name, description)
        return success_response(res)
    except Exception as e:
        return error_response(str(e), 500)
-@admin_bp.route('/roles/<role_name>', methods=['DELETE'])
+@admin_bp.route("/roles/<role_name>", methods=["DELETE"])
@login_required
@check_admin_auth
 def delete_role(role_name: str):
@ -297,7 +334,7 @@ def delete_role(role_name: str):
        return error_response(str(e), 500)
-@admin_bp.route('/roles', methods=['GET'])
+@admin_bp.route("/roles", methods=["GET"])
@login_required
@check_admin_auth
 def list_roles():
@ -308,7 +345,7 @@ def list_roles():
        return error_response(str(e), 500)
-@admin_bp.route('/roles/<role_name>/permission', methods=['GET'])
+@admin_bp.route("/roles/<role_name>/permission", methods=["GET"])
@login_required
@check_admin_auth
 def get_role_permission(role_name: str):
@ -319,54 +356,54 @@ def get_role_permission(role_name: str):
        return error_response(str(e), 500)
-@admin_bp.route('/roles/<role_name>/permission', methods=['POST'])
+@admin_bp.route("/roles/<role_name>/permission", methods=["POST"])
@login_required
@check_admin_auth
 def grant_role_permission(role_name: str):
    try:
        data = request.get_json()
-        if not data or 'actions' not in data or 'resource' not in data:
+        if not data or "actions" not in data or "resource" not in data:
            return error_response("Permission is required", 400)
-        actions: list = data['actions']
+        actions: list = data["actions"]
-        resource: str = data['resource']
+        resource: str = data["resource"]
        res = RoleMgr.grant_role_permission(role_name, actions, resource)
        return success_response(res)
    except Exception as e:
        return error_response(str(e), 500)
-@admin_bp.route('/roles/<role_name>/permission', methods=['DELETE'])
+@admin_bp.route("/roles/<role_name>/permission", methods=["DELETE"])
@login_required
@check_admin_auth
 def revoke_role_permission(role_name: str):
    try:
        data = request.get_json()
-        if not data or 'actions' not in data or 'resource' not in data:
+        if not data or "actions" not in data or "resource" not in data:
            return error_response("Permission is required", 400)
-        actions: list = data['actions']
+        actions: list = data["actions"]
-        resource: str = data['resource']
+        resource: str = data["resource"]
        res = RoleMgr.revoke_role_permission(role_name, actions, resource)
        return success_response(res)
    except Exception as e:
        return error_response(str(e), 500)
-@admin_bp.route('/users/<user_name>/role', methods=['PUT'])
+@admin_bp.route("/users/<user_name>/role", methods=["PUT"])
@login_required
@check_admin_auth
 def update_user_role(user_name: str):
    try:
        data = request.get_json()
-        if not data or 'role_name' not in data:
+        if not data or "role_name" not in data:
            return error_response("Role name is required", 400)
-        role_name: str = data['role_name']
+        role_name: str = data["role_name"]
        res = RoleMgr.update_user_role(user_name, role_name)
        return success_response(res)
    except Exception as e:
        return error_response(str(e), 500)
-@admin_bp.route('/users/<user_name>/permission', methods=['GET'])
+@admin_bp.route("/users/<user_name>/permission", methods=["GET"])
@login_required
@check_admin_auth
 def get_user_permission(user_name: str):
@ -376,7 +413,140 @@ def get_user_permission(user_name: str):
    except Exception as e:
        return error_response(str(e), 500)
-@admin_bp.route('/version', methods=['GET'])
+
@admin_bp.route("/variables", methods=["PUT"])
@login_required
@check_admin_auth
 def set_variable():
    try:
        data = request.get_json()
        if not data and "var_name" not in data:
            return error_response("Var name is required", 400)
        if "var_value" not in data:
            return error_response("Var value is required", 400)
        var_name: str = data["var_name"]
        var_value: str = data["var_value"]
        SettingsMgr.update_by_name(var_name, var_value)
        return success_response(None, "Set variable successfully")
    except AdminException as e:
        return error_response(str(e), 400)
    except Exception as e:
        return error_response(str(e), 500)
@admin_bp.route("/variables", methods=["GET"])
@login_required
@check_admin_auth
 def get_variable():
    try:
        if request.content_length is None or request.content_length == 0:
            # list variables
            res = list(SettingsMgr.get_all())
            return success_response(res)
        # get var
        data = request.get_json()
        if not data and "var_name" not in data:
            return error_response("Var name is required", 400)
        var_name: str = data["var_name"]
        res = SettingsMgr.get_by_name(var_name)
        return success_response(res)
    except AdminException as e:
        return error_response(str(e), 400)
    except Exception as e:
        return error_response(str(e), 500)
@admin_bp.route("/configs", methods=["GET"])
@login_required
@check_admin_auth
 def get_config():
    try:
        res = list(ConfigMgr.get_all())
        return success_response(res)
    except AdminException as e:
        return error_response(str(e), 400)
    except Exception as e:
        return error_response(str(e), 500)
@admin_bp.route("/environments", methods=["GET"])
@login_required
@check_admin_auth
 def get_environments():
    try:
        res = list(EnvironmentsMgr.get_all())
        return success_response(res)
    except AdminException as e:
        return error_response(str(e), 400)
    except Exception as e:
        return error_response(str(e), 500)
@admin_bp.route("/users/<username>/keys", methods=["POST"])
@login_required
@check_admin_auth
 def generate_user_api_key(username: str) -> tuple[Response, int]:
    try:
        user_details: list[dict[str, Any]] = UserMgr.get_user_details(username)
        if not user_details:
            return error_response("User not found!", 404)
        tenants: list[dict[str, Any]] = UserServiceMgr.get_user_tenants(username)
        if not tenants:
            return error_response("Tenant not found!", 404)
        tenant_id: str = tenants[0]["tenant_id"]
        key: str = generate_confirmation_token()
        obj: dict[str, Any] = {
            "tenant_id": tenant_id,
            "token": key,
            "beta": generate_confirmation_token().replace("ragflow-", "")[:32],
            "create_time": current_timestamp(),
            "create_date": datetime_format(datetime.now()),
            "update_time": None,
            "update_date": None,
        }
        if not UserMgr.save_api_key(obj):
            return error_response("Failed to generate API key!", 500)
        return success_response(obj, "API key generated successfully")
    except AdminException as e:
        return error_response(e.message, e.code)
    except Exception as e:
        return error_response(str(e), 500)
@admin_bp.route("/users/<username>/keys", methods=["GET"])
@login_required
@check_admin_auth
 def get_user_api_keys(username: str) -> tuple[Response, int]:
    try:
        api_keys: list[dict[str, Any]] = UserMgr.get_user_api_key(username)
        return success_response(api_keys, "Get user API keys")
    except AdminException as e:
        return error_response(e.message, e.code)
    except Exception as e:
        return error_response(str(e), 500)
@admin_bp.route("/users/<username>/keys/<key>", methods=["DELETE"])
@login_required
@check_admin_auth
 def delete_user_api_key(username: str, key: str) -> tuple[Response, int]:
    try:
        deleted = UserMgr.delete_api_key(username, key)
        if deleted:
            return success_response(None, "API key deleted successfully")
        else:
            return error_response("API key not found or could not be deleted", 404)
    except AdminException as e:
        return error_response(e.message, e.code)
    except Exception as e:
        return error_response(str(e), 500)
@admin_bp.route("/version", methods=["GET"])
@login_required
@check_admin_auth
 def show_version():
@ -385,3 +555,100 @@ def show_version():
        return success_response(res)
    except Exception as e:
        return error_response(str(e), 500)
@admin_bp.route("/sandbox/providers", methods=["GET"])
@login_required
@check_admin_auth
 def list_sandbox_providers():
    """List all available sandbox providers."""
    try:
        res = SandboxMgr.list_providers()
        return success_response(res)
    except AdminException as e:
        return error_response(str(e), 400)
    except Exception as e:
        return error_response(str(e), 500)
@admin_bp.route("/sandbox/providers/<provider_id>/schema", methods=["GET"])
@login_required
@check_admin_auth
 def get_sandbox_provider_schema(provider_id: str):
    """Get configuration schema for a specific provider."""
    try:
        res = SandboxMgr.get_provider_config_schema(provider_id)
        return success_response(res)
    except AdminException as e:
        return error_response(str(e), 400)
    except Exception as e:
        return error_response(str(e), 500)
@admin_bp.route("/sandbox/config", methods=["GET"])
@login_required
@check_admin_auth
 def get_sandbox_config():
    """Get current sandbox configuration."""
    try:
        res = SandboxMgr.get_config()
        return success_response(res)
    except AdminException as e:
        return error_response(str(e), 400)
    except Exception as e:
        return error_response(str(e), 500)
@admin_bp.route("/sandbox/config", methods=["POST"])
@login_required
@check_admin_auth
 def set_sandbox_config():
    """Set sandbox provider configuration."""
    try:
        data = request.get_json()
        if not data:
            logging.error("set_sandbox_config: Request body is required")
            return error_response("Request body is required", 400)
        provider_type = data.get("provider_type")
        if not provider_type:
            logging.error("set_sandbox_config: provider_type is required")
            return error_response("provider_type is required", 400)
        config = data.get("config", {})
        set_active = data.get("set_active", True)  # Default to True for backward compatibility
        logging.info(f"set_sandbox_config: provider_type={provider_type}, set_active={set_active}")
        logging.info(f"set_sandbox_config: config keys={list(config.keys())}")
        res = SandboxMgr.set_config(provider_type, config, set_active)
        return success_response(res, "Sandbox configuration updated successfully")
    except AdminException as e:
        logging.exception("set_sandbox_config AdminException")
        return error_response(str(e), 400)
    except Exception as e:
        logging.exception("set_sandbox_config unexpected error")
        return error_response(str(e), 500)
@admin_bp.route("/sandbox/test", methods=["POST"])
@login_required
@check_admin_auth
 def test_sandbox_connection():
    """Test connection to sandbox provider."""
    try:
        data = request.get_json()
        if not data:
            return error_response("Request body is required", 400)
        provider_type = data.get("provider_type")
        if not provider_type:
            return error_response("provider_type is required", 400)
        config = data.get("config", {})
        res = SandboxMgr.test_connection(provider_type, config)
        return success_response(res)
    except AdminException as e:
        return error_response(str(e), 400)
    except Exception as e:
        return error_response(str(e), 500)
--- a/admin/server/services.py
+++ b/admin/server/services.py
@ -14,16 +14,22 @@
 #  limitations under the License.
 #
 import json
 import os
 import logging
 import re
 from typing import Any
 from werkzeug.security import check_password_hash
 from common.constants import ActiveEnum
 from api.db.services import UserService
 from api.db.joint_services.user_account_service import create_new_user, delete_user_data
 from api.db.services.canvas_service import UserCanvasService
-from api.db.services.user_service import TenantService
+from api.db.services.user_service import TenantService, UserTenantService
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.system_settings_service import SystemSettingsService
 from api.db.services.api_service import APITokenService
 from api.db.db_models import APIToken
 from api.utils.crypt import decrypt
 from api.utils import health_utils
@ -37,13 +43,15 @@ class UserMgr:
        users = UserService.get_all_users()
        result = []
        for user in users:
-            result.append({
+            result.append(
-                'email': user.email,
+                {
-                'nickname': user.nickname,
+                    "email": user.email,
-                'create_date': user.create_date,
+                    "nickname": user.nickname,
-                'is_active': user.is_active,
+                    "create_date": user.create_date,
-                'is_superuser': user.is_superuser,
+                    "is_active": user.is_active,
-            })
+                    "is_superuser": user.is_superuser,
                }
            )
        return result
    @staticmethod
@ -52,19 +60,21 @@ class UserMgr:
        users = UserService.query_user_by_email(username)
        result = []
        for user in users:
-            result.append({
+            result.append(
-                'avatar': user.avatar,
+                {
-                'email': user.email,
+                    "avatar": user.avatar,
-                'language': user.language,
+                    "email": user.email,
-                'last_login_time': user.last_login_time,
+                    "language": user.language,
-                'is_active': user.is_active,
+                    "last_login_time": user.last_login_time,
-                'is_anonymous': user.is_anonymous,
+                    "is_active": user.is_active,
-                'login_channel': user.login_channel,
+                    "is_anonymous": user.is_anonymous,
-                'status': user.status,
+                    "login_channel": user.login_channel,
-                'is_superuser': user.is_superuser,
+                    "status": user.status,
-                'create_date': user.create_date,
+                    "is_superuser": user.is_superuser,
-                'update_date': user.update_date
+                    "create_date": user.create_date,
-            })
+                    "update_date": user.update_date,
                }
            )
        return result
    @staticmethod
@ -126,8 +136,8 @@ class UserMgr:
        # format activate_status before handle
        _activate_status = activate_status.lower()
        target_status = {
-            'on': ActiveEnum.ACTIVE.value,
+            "on": ActiveEnum.ACTIVE.value,
-            'off': ActiveEnum.INACTIVE.value,
+            "off": ActiveEnum.INACTIVE.value,
        }.get(_activate_status)
        if not target_status:
            raise AdminException(f"Invalid activate_status: {activate_status}")
@ -137,9 +147,84 @@ class UserMgr:
        UserService.update_user(usr.id, {"is_active": target_status})
        return f"Turn {_activate_status} user activate status successfully!"
    @staticmethod
    def get_user_api_key(username: str) -> list[dict[str, Any]]:
        # use email to find user. check exist and unique.
        user_list: list[Any] = UserService.query_user_by_email(username)
        if not user_list:
            raise UserNotFoundError(username)
        elif len(user_list) > 1:
            raise AdminException(f"More than one user with username '{username}' found!")
        usr: Any = user_list[0]
        # tenant_id is typically the same as user_id for the owner tenant
        tenant_id: str = usr.id
        # Query all API keys for this tenant
        api_keys: Any = APITokenService.query(tenant_id=tenant_id)
        result: list[dict[str, Any]] = []
        for key in api_keys:
            result.append(key.to_dict())
        return result
    @staticmethod
    def save_api_key(api_key: dict[str, Any]) -> bool:
        return APITokenService.save(**api_key)
    @staticmethod
    def delete_api_key(username: str, key: str) -> bool:
        # use email to find user. check exist and unique.
        user_list: list[Any] = UserService.query_user_by_email(username)
        if not user_list:
            raise UserNotFoundError(username)
        elif len(user_list) > 1:
            raise AdminException(f"Exist more than 1 user: {username}!")
        usr: Any = user_list[0]
        # tenant_id is typically the same as user_id for the owner tenant
        tenant_id: str = usr.id
        # Delete the API key
        deleted_count: int = APITokenService.filter_delete([APIToken.tenant_id == tenant_id, APIToken.token == key])
        return deleted_count > 0
    @staticmethod
    def grant_admin(username: str):
        # use email to find user. check exist and unique.
        user_list = UserService.query_user_by_email(username)
        if not user_list:
            raise UserNotFoundError(username)
        elif len(user_list) > 1:
            raise AdminException(f"Exist more than 1 user: {username}!")
        # check activate status different from new
        usr = user_list[0]
        if usr.is_superuser:
            return f"{usr} is already superuser!"
        # update is_active
        UserService.update_user(usr.id, {"is_superuser": True})
        return "Grant successfully!"
    @staticmethod
    def revoke_admin(username: str):
        # use email to find user. check exist and unique.
        user_list = UserService.query_user_by_email(username)
        if not user_list:
            raise UserNotFoundError(username)
        elif len(user_list) > 1:
            raise AdminException(f"Exist more than 1 user: {username}!")
        # check activate status different from new
        usr = user_list[0]
        if not usr.is_superuser:
            return f"{usr} isn't superuser, yet!"
        # update is_active
        UserService.update_user(usr.id, {"is_superuser": False})
        return "Revoke successfully!"
 class UserServiceMgr:
    @staticmethod
    def get_user_datasets(username):
        # use email to find user.
@ -169,39 +254,43 @@ class UserServiceMgr:
        tenant_ids = [m["tenant_id"] for m in tenants]
        # filter permitted agents and owned agents
        res = UserCanvasService.get_all_agents_by_tenant_ids(tenant_ids, usr.id)
-        return [{
+        return [{"title": r["title"], "permission": r["permission"], "canvas_category": r["canvas_category"].split("_")[0], "avatar": r["avatar"]} for r in res]
-            'title': r['title'],
+
-            'permission': r['permission'],
+    @staticmethod
-            'canvas_category': r['canvas_category'].split('_')[0],
+    def get_user_tenants(email: str) -> list[dict[str, Any]]:
-            'avatar': r['avatar']
+        users: list[Any] = UserService.query_user_by_email(email)
-        } for r in res]
+        if not users:
            raise UserNotFoundError(email)
        user: Any = users[0]
        tenants: list[dict[str, Any]] = UserTenantService.get_tenants_by_user_id(user.id)
        return tenants
 class ServiceMgr:
    @staticmethod
    def get_all_services():
-        doc_engine = os.getenv('DOC_ENGINE', 'elasticsearch')
+        doc_engine = os.getenv("DOC_ENGINE", "elasticsearch")
        result = []
        configs = SERVICE_CONFIGS.configs
        for service_id, config in enumerate(configs):
            config_dict = config.to_dict()
-            if config_dict['service_type'] == 'retrieval':
+            if config_dict["service_type"] == "retrieval":
-                if config_dict['extra']['retrieval_type'] != doc_engine:
+                if config_dict["extra"]["retrieval_type"] != doc_engine:
                    continue
            try:
                service_detail = ServiceMgr.get_service_details(service_id)
                if "status" in service_detail:
-                    config_dict['status'] = service_detail['status']
+                    config_dict["status"] = service_detail["status"]
                else:
-                    config_dict['status'] = 'timeout'
+                    config_dict["status"] = "timeout"
            except Exception as e:
                logging.warning(f"Can't get service details, error: {e}")
-                config_dict['status'] = 'timeout'
+                config_dict["status"] = "timeout"
-            if not config_dict['host']:
+            if not config_dict["host"]:
-                config_dict['host'] = '-'
+                config_dict["host"] = "-"
-            if not config_dict['port']:
+            if not config_dict["port"]:
-                config_dict['port'] = '-'
+                config_dict["port"] = "-"
            result.append(config_dict)
        return result
@ -217,11 +306,18 @@ class ServiceMgr:
            raise AdminException(f"invalid service_index: {service_idx}")
        service_config = configs[service_idx]
        service_info = {'name': service_config.name, 'detail_func_name': service_config.detail_func_name}
-        detail_func = getattr(health_utils, service_info.get('detail_func_name'))
+        # exclude retrieval service if retrieval_type is not matched
        doc_engine = os.getenv("DOC_ENGINE", "elasticsearch")
        if service_config.service_type == "retrieval":
            if service_config.retrieval_type != doc_engine:
                raise AdminException(f"invalid service_index: {service_idx}")
        service_info = {"name": service_config.name, "detail_func_name": service_config.detail_func_name}
        detail_func = getattr(health_utils, service_info.get("detail_func_name"))
        res = detail_func()
-        res.update({'service_name': service_info.get('name')})
+        res.update({"service_name": service_info.get("name")})
        return res
    @staticmethod
@ -231,3 +327,397 @@ class ServiceMgr:
    @staticmethod
    def restart_service(service_id: int):
        raise AdminException("restart_service: not implemented")
 class SettingsMgr:
    @staticmethod
    def get_all():
        settings = SystemSettingsService.get_all()
        result = []
        for setting in settings:
            result.append(
                {
                    "name": setting.name,
                    "source": setting.source,
                    "data_type": setting.data_type,
                    "value": setting.value,
                }
            )
        return result
    @staticmethod
    def get_by_name(name: str):
        settings = SystemSettingsService.get_by_name(name)
        if len(settings) == 0:
            raise AdminException(f"Can't get setting: {name}")
        result = []
        for setting in settings:
            result.append(
                {
                    "name": setting.name,
                    "source": setting.source,
                    "data_type": setting.data_type,
                    "value": setting.value,
                }
            )
        return result
    @staticmethod
    def update_by_name(name: str, value: str):
        settings = SystemSettingsService.get_by_name(name)
        if len(settings) == 1:
            setting = settings[0]
            setting.value = value
            setting_dict = setting.to_dict()
            SystemSettingsService.update_by_name(name, setting_dict)
        elif len(settings) > 1:
            raise AdminException(f"Can't update more than 1 setting: {name}")
        else:
            # Create new setting if it doesn't exist
            # Determine data_type based on name and value
            if name.startswith("sandbox."):
                data_type = "json"
            elif name.endswith(".enabled"):
                data_type = "boolean"
            else:
                data_type = "string"
            new_setting = {
                "name": name,
                "value": str(value),
                "source": "admin",
                "data_type": data_type,
            }
            SystemSettingsService.save(**new_setting)
 class ConfigMgr:
    @staticmethod
    def get_all():
        result = []
        configs = SERVICE_CONFIGS.configs
        for config in configs:
            config_dict = config.to_dict()
            result.append(config_dict)
        return result
 class EnvironmentsMgr:
    @staticmethod
    def get_all():
        result = []
        env_kv = {"env": "DOC_ENGINE", "value": os.getenv("DOC_ENGINE")}
        result.append(env_kv)
        env_kv = {"env": "DEFAULT_SUPERUSER_EMAIL", "value": os.getenv("DEFAULT_SUPERUSER_EMAIL", "admin@ragflow.io")}
        result.append(env_kv)
        env_kv = {"env": "DB_TYPE", "value": os.getenv("DB_TYPE", "mysql")}
        result.append(env_kv)
        env_kv = {"env": "DEVICE", "value": os.getenv("DEVICE", "cpu")}
        result.append(env_kv)
        env_kv = {"env": "STORAGE_IMPL", "value": os.getenv("STORAGE_IMPL", "MINIO")}
        result.append(env_kv)
        return result
 class SandboxMgr:
    """Manager for sandbox provider configuration and operations."""
    # Provider registry with metadata
    PROVIDER_REGISTRY = {
        "self_managed": {
            "name": "Self-Managed",
            "description": "On-premise deployment using Daytona/Docker",
            "tags": ["self-hosted", "low-latency", "secure"],
        },
        "aliyun_codeinterpreter": {
            "name": "Aliyun Code Interpreter",
            "description": "Aliyun Function Compute Code Interpreter - Code execution in serverless microVMs",
            "tags": ["saas", "cloud", "scalable", "aliyun"],
        },
        "e2b": {
            "name": "E2B",
            "description": "E2B Cloud - Code Execution Sandboxes",
            "tags": ["saas", "fast", "global"],
        },
    }
    @staticmethod
    def list_providers():
        """List all available sandbox providers."""
        result = []
        for provider_id, metadata in SandboxMgr.PROVIDER_REGISTRY.items():
            result.append({
                "id": provider_id,
                **metadata
            })
        return result
    @staticmethod
    def get_provider_config_schema(provider_id: str):
        """Get configuration schema for a specific provider."""
        from agent.sandbox.providers import (
            SelfManagedProvider,
            AliyunCodeInterpreterProvider,
            E2BProvider,
        )
        schemas = {
            "self_managed": SelfManagedProvider.get_config_schema(),
            "aliyun_codeinterpreter": AliyunCodeInterpreterProvider.get_config_schema(),
            "e2b": E2BProvider.get_config_schema(),
        }
        if provider_id not in schemas:
            raise AdminException(f"Unknown provider: {provider_id}")
        return schemas.get(provider_id, {})
    @staticmethod
    def get_config():
        """Get current sandbox configuration."""
        try:
            # Get active provider type
            provider_type_settings = SystemSettingsService.get_by_name("sandbox.provider_type")
            if not provider_type_settings:
                # Return default config if not set
                provider_type = "self_managed"
            else:
                provider_type = provider_type_settings[0].value
            # Get provider-specific config
            provider_config_settings = SystemSettingsService.get_by_name(f"sandbox.{provider_type}")
            if not provider_config_settings:
                provider_config = {}
            else:
                try:
                    provider_config = json.loads(provider_config_settings[0].value)
                except json.JSONDecodeError:
                    provider_config = {}
            return {
                "provider_type": provider_type,
                "config": provider_config,
            }
        except Exception as e:
            raise AdminException(f"Failed to get sandbox config: {str(e)}")
    @staticmethod
    def set_config(provider_type: str, config: dict, set_active: bool = True):
        """
        Set sandbox provider configuration.
        Args:
            provider_type: Provider identifier (e.g., "self_managed", "e2b")
            config: Provider configuration dictionary
            set_active: If True, also update the active provider. If False,
                       only update the configuration without switching providers.
                       Default: True
        Returns:
            Dictionary with updated provider_type and config
        """
        from agent.sandbox.providers import (
            SelfManagedProvider,
            AliyunCodeInterpreterProvider,
            E2BProvider,
        )
        try:
            # Validate provider type
            if provider_type not in SandboxMgr.PROVIDER_REGISTRY:
                raise AdminException(f"Unknown provider type: {provider_type}")
            # Get provider schema for validation
            schema = SandboxMgr.get_provider_config_schema(provider_type)
            # Validate config against schema
            for field_name, field_schema in schema.items():
                if field_schema.get("required", False) and field_name not in config:
                    raise AdminException(f"Required field '{field_name}' is missing")
                # Type validation
                if field_name in config:
                    field_type = field_schema.get("type")
                    if field_type == "integer":
                        if not isinstance(config[field_name], int):
                            raise AdminException(f"Field '{field_name}' must be an integer")
                    elif field_type == "string":
                        if not isinstance(config[field_name], str):
                            raise AdminException(f"Field '{field_name}' must be a string")
                    elif field_type == "bool":
                        if not isinstance(config[field_name], bool):
                            raise AdminException(f"Field '{field_name}' must be a boolean")
                    # Range validation for integers
                    if field_type == "integer" and field_name in config:
                        min_val = field_schema.get("min")
                        max_val = field_schema.get("max")
                        if min_val is not None and config[field_name] < min_val:
                            raise AdminException(f"Field '{field_name}' must be >= {min_val}")
                        if max_val is not None and config[field_name] > max_val:
                            raise AdminException(f"Field '{field_name}' must be <= {max_val}")
            # Provider-specific custom validation
            provider_classes = {
                "self_managed": SelfManagedProvider,
                "aliyun_codeinterpreter": AliyunCodeInterpreterProvider,
                "e2b": E2BProvider,
            }
            provider = provider_classes[provider_type]()
            is_valid, error_msg = provider.validate_config(config)
            if not is_valid:
                raise AdminException(f"Provider validation failed: {error_msg}")
            # Update provider_type only if set_active is True
            if set_active:
                SettingsMgr.update_by_name("sandbox.provider_type", provider_type)
            # Always update the provider config
            config_json = json.dumps(config)
            SettingsMgr.update_by_name(f"sandbox.{provider_type}", config_json)
            return {"provider_type": provider_type, "config": config}
        except AdminException:
            raise
        except Exception as e:
            raise AdminException(f"Failed to set sandbox config: {str(e)}")
    @staticmethod
    def test_connection(provider_type: str, config: dict):
        """
        Test connection to sandbox provider by executing a simple Python script.
        This creates a temporary sandbox instance and runs a test code to verify:
        - Connection credentials are valid
        - Sandbox can be created
        - Code execution works correctly
        Args:
            provider_type: Provider identifier
            config: Provider configuration dictionary
        Returns:
            dict with test results including stdout, stderr, exit_code, execution_time
        """
        try:
            from agent.sandbox.providers import (
                SelfManagedProvider,
                AliyunCodeInterpreterProvider,
                E2BProvider,
            )
            # Instantiate provider based on type
            provider_classes = {
                "self_managed": SelfManagedProvider,
                "aliyun_codeinterpreter": AliyunCodeInterpreterProvider,
                "e2b": E2BProvider,
            }
            if provider_type not in provider_classes:
                raise AdminException(f"Unknown provider type: {provider_type}")
            provider = provider_classes[provider_type]()
            # Initialize with config
            if not provider.initialize(config):
                raise AdminException(f"Failed to initialize provider '{provider_type}'")
            # Create a temporary sandbox instance for testing
            instance = provider.create_instance(template="python")
            if not instance or instance.status != "READY":
                raise AdminException(f"Failed to create sandbox instance. Status: {instance.status if instance else 'None'}")
            # Simple test code that exercises basic Python functionality
            test_code = """
 # Test basic Python functionality
 import sys
 import json
 import math
 print("Python version:", sys.version)
 print("Platform:", sys.platform)
 # Test basic calculations
 result = 2 + 2
 print(f"2 + 2 = {result}")
 # Test JSON operations
 data = {"test": "data", "value": 123}
 print(f"JSON dump: {json.dumps(data)}")
 # Test math operations
 print(f"Math.sqrt(16) = {math.sqrt(16)}")
 # Test error handling
 try:
    x = 1 / 1
    print("Division test: OK")
 except Exception as e:
    print(f"Error: {e}")
 # Return success indicator
 print("TEST_PASSED")
 """
            # Execute test code with timeout
            execution_result = provider.execute_code(
                instance_id=instance.instance_id,
                code=test_code,
                language="python",
                timeout=10  # 10 seconds timeout
            )
            # Clean up the test instance (if provider supports it)
            try:
                if hasattr(provider, 'terminate_instance'):
                    provider.terminate_instance(instance.instance_id)
                    logging.info(f"Cleaned up test instance {instance.instance_id}")
                else:
                    logging.warning(f"Provider {provider_type} does not support terminate_instance, test instance may leak")
            except Exception as cleanup_error:
                logging.warning(f"Failed to cleanup test instance {instance.instance_id}: {cleanup_error}")
            # Build detailed result message
            success = execution_result.exit_code == 0 and "TEST_PASSED" in execution_result.stdout
            message_parts = [
                f"Test {success and 'PASSED' or 'FAILED'}",
                f"Exit code: {execution_result.exit_code}",
                f"Execution time: {execution_result.execution_time:.2f}s"
            ]
            if execution_result.stdout.strip():
                stdout_preview = execution_result.stdout.strip()[:200]
                message_parts.append(f"Output: {stdout_preview}...")
            if execution_result.stderr.strip():
                stderr_preview = execution_result.stderr.strip()[:200]
                message_parts.append(f"Errors: {stderr_preview}...")
            message = " | ".join(message_parts)
            return {
                "success": success,
                "message": message,
                "details": {
                    "exit_code": execution_result.exit_code,
                    "execution_time": execution_result.execution_time,
                    "stdout": execution_result.stdout,
                    "stderr": execution_result.stderr,
                }
            }
        except AdminException:
            raise
        except Exception as e:
            import traceback
            error_details = traceback.format_exc()
            raise AdminException(f"Connection test failed: {str(e)}\\n\\nStack trace:\\n{error_details}")
--- a/agent/canvas.py
+++ b/agent/canvas.py
@ -283,7 +283,8 @@ class Canvas(Graph):
            "sys.query": "",
            "sys.user_id": tenant_id,
            "sys.conversation_turns": 0,
-            "sys.files": []
+            "sys.files": [],
            "sys.history": []
        }
        self.variables = {}
        super().__init__(dsl, tenant_id, task_id)
@ -294,12 +295,15 @@ class Canvas(Graph):
        self.history = self.dsl["history"]
        if "globals" in self.dsl:
            self.globals = self.dsl["globals"]
            if "sys.history" not in self.globals:
                self.globals["sys.history"] = []
        else:
            self.globals = {
            "sys.query": "",
            "sys.user_id": "",
            "sys.conversation_turns": 0,
-            "sys.files": []
+            "sys.files": [],
            "sys.history": []
        }
        if "variables" in self.dsl:
            self.variables = self.dsl["variables"]
@ -340,21 +344,23 @@ class Canvas(Graph):
                key = k[4:]
                if key in self.variables:
                    variable = self.variables[key]
-                    if variable["value"]:
+                    if variable["type"] == "string":
-                        self.globals[k] = variable["value"]
+                        self.globals[k] = ""
                        variable["value"] = ""
                    elif variable["type"] == "number":
                        self.globals[k] = 0
                        variable["value"] = 0
                    elif variable["type"] == "boolean":
                        self.globals[k] = False
                        variable["value"] = False
                    elif variable["type"] == "object":
                        self.globals[k] = {}
                        variable["value"] = {}
                    elif variable["type"].startswith("array"):
                        self.globals[k] = []
                        variable["value"] = []
                    else:
-                        if variable["type"] == "string":
+                        self.globals[k] = ""
                            self.globals[k] = ""
                        elif variable["type"] == "number":
                            self.globals[k] = 0
                        elif variable["type"] == "boolean":
                            self.globals[k] = False
                        elif variable["type"] == "object":
                            self.globals[k] = {}
                        elif variable["type"].startswith("array"):
                            self.globals[k] = []
                        else:
                            self.globals[k] = ""
                else:
                    self.globals[k] = ""
@ -419,9 +425,15 @@ class Canvas(Graph):
            loop = asyncio.get_running_loop()
            tasks = []
            max_concurrency = getattr(self._thread_pool, "_max_workers", 5)
            sem = asyncio.Semaphore(max_concurrency)
-            def _run_async_in_thread(coro_func, **call_kwargs):
+            async def _invoke_one(cpn_obj, sync_fn, call_kwargs, use_async: bool):
-                return asyncio.run(coro_func(**call_kwargs))
+                async with sem:
                    if use_async:
                        await cpn_obj.invoke_async(**(call_kwargs or {}))
                        return
                    await loop.run_in_executor(self._thread_pool, partial(sync_fn, **(call_kwargs or {})))
            i = f
            while i < t:
@ -447,11 +459,9 @@ class Canvas(Graph):
                if task_fn is None:
                    continue
-                invoke_async = getattr(cpn, "invoke_async", None)
+                fn_invoke_async = getattr(cpn, "_invoke_async", None)
-                if invoke_async and asyncio.iscoroutinefunction(invoke_async):
+                use_async = (fn_invoke_async and asyncio.iscoroutinefunction(fn_invoke_async)) or asyncio.iscoroutinefunction(getattr(cpn, "_invoke", None))
-                    tasks.append(loop.run_in_executor(self._thread_pool, partial(_run_async_in_thread, invoke_async, **(call_kwargs or {}))))
+                tasks.append(asyncio.create_task(_invoke_one(cpn, task_fn, call_kwargs, use_async)))
                else:
                    tasks.append(loop.run_in_executor(self._thread_pool, partial(task_fn, **(call_kwargs or {}))))
            if tasks:
                await asyncio.gather(*tasks)
@ -638,6 +648,7 @@ class Canvas(Graph):
                           "created_at": st,
                       })
            self.history.append(("assistant", self.get_component_obj(self.path[-1]).output()))
            self.globals["sys.history"].append(f"{self.history[-1][0]}: {self.history[-1][1]}")
        elif "Task has been canceled" in self.error:
            yield decorate("workflow_finished",
                       {
@ -715,6 +726,7 @@ class Canvas(Graph):
    def add_user_input(self, question):
        self.history.append(("user", question))
        self.globals["sys.history"].append(f"{self.history[-1][0]}: {self.history[-1][1]}")
    def get_prologue(self):
        return self.components["begin"]["obj"]._param.prologue
@ -740,13 +752,16 @@ class Canvas(Graph):
        def image_to_base64(file):
            return "data:{};base64,{}".format(file["mime_type"],
                                        base64.b64encode(FileService.get_blob(file["created_by"], file["id"])).decode("utf-8"))
        def parse_file(file):
            blob = FileService.get_blob(file["created_by"], file["id"])
            return FileService.parse(file["name"], blob, True, file["created_by"])
        loop = asyncio.get_running_loop()
        tasks = []
        for file in files:
            if file["mime_type"].find("image") >=0:
                tasks.append(loop.run_in_executor(self._thread_pool, image_to_base64, file))
                continue
-            tasks.append(loop.run_in_executor(self._thread_pool, FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"]))
+            tasks.append(loop.run_in_executor(self._thread_pool, parse_file, file))
        return await asyncio.gather(*tasks)
    def get_files(self, files: Union[None, list[dict]]) -> list[str]:
--- a/agent/component/base.py
+++ b/agent/component/base.py
@ -27,6 +27,10 @@ import pandas as pd
 from agent import settings
 from common.connection_utils import timeout
 from common.misc_utils import thread_pool_exec
 _FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
 _DEPRECATED_PARAMS = "_deprecated_params"
 _USER_FEEDED_PARAMS = "_user_feeded_params"
@ -379,6 +383,7 @@ class ComponentBase(ABC):
    def __init__(self, canvas, id, param: ComponentParamBase):
        from agent.canvas import Graph  # Local import to avoid cyclic dependency
        assert isinstance(canvas, Graph), "canvas must be an instance of Canvas"
        self._canvas = canvas
        self._id = id
@ -430,7 +435,7 @@ class ComponentBase(ABC):
            elif asyncio.iscoroutinefunction(self._invoke):
                await self._invoke(**kwargs)
            else:
-                await asyncio.to_thread(self._invoke, **kwargs)
+                await thread_pool_exec(self._invoke, **kwargs)
        except Exception as e:
            if self.get_exception_default_value():
                self.set_exception_default_value()
--- a/agent/component/categorize.py
+++ b/agent/component/categorize.py
@ -97,6 +97,13 @@ Here's description of each category:
 class Categorize(LLM, ABC):
    component_name = "Categorize"
    def get_input_elements(self) -> dict[str, dict]:
        query_key = self._param.query or "sys.query"
        elements = self.get_input_elements_from_text(f"{{{query_key}}}")
        if not elements:
            logging.warning(f"[Categorize] input element not detected for query key: {query_key}")
        return elements
    @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
    async def _invoke_async(self, **kwargs):
        if self.check_if_canceled("Categorize processing"):
@ -105,12 +112,15 @@ class Categorize(LLM, ABC):
        msg = self._canvas.get_history(self._param.message_history_window_size)
        if not msg:
            msg = [{"role": "user", "content": ""}]
-        if kwargs.get("sys.query"):
+        query_key = self._param.query or "sys.query"
-            msg[-1]["content"] = kwargs["sys.query"]
+        if query_key in kwargs:
-            self.set_input_value("sys.query", kwargs["sys.query"])
+            query_value = kwargs[query_key]
        else:
-            msg[-1]["content"] = self._canvas.get_variable_value(self._param.query)
+            query_value = self._canvas.get_variable_value(query_key)
-            self.set_input_value(self._param.query, msg[-1]["content"])
+        if query_value is None:
            query_value = ""
        msg[-1]["content"] = query_value
        self.set_input_value(query_key, msg[-1]["content"])
        self._param.update_prompt()
        chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
@ -137,7 +147,7 @@ class Categorize(LLM, ABC):
            category_counts[c] = count
        cpn_ids = list(self._param.category_description.items())[-1][1]["to"]
-        max_category = list(self._param.category_description.keys())[0]
+        max_category = list(self._param.category_description.keys())[-1]
        if any(category_counts.values()):
            max_category = max(category_counts.items(), key=lambda x: x[1])[0]
            cpn_ids = self._param.category_description[max_category]["to"]
--- a/agent/plugin/README.md
+++ b/agent/plugin/README.md
@ -23,7 +23,7 @@ All the execution logic of this tool should go into this method.
 When you start RAGFlow, you can see your plugin was loaded in the log:
 ```
-2025-05-15 19:29:08,959 INFO     34670 Recursively importing plugins from path `/some-path/ragflow/plugin/embedded_plugins`
+2025-05-15 19:29:08,959 INFO     34670 Recursively importing plugins from path `/some-path/ragflow/agent/plugin/embedded_plugins`
 2025-05-15 19:29:08,960 INFO     34670 Loaded llm_tools plugin BadCalculatorPlugin version 1.0.0
 ```
--- a/agent/plugin/README_zh.md
+++ b/agent/plugin/README_zh.md
@ -23,7 +23,7 @@ RAGFlow将会从`embedded_plugins`子文件夹中递归加载所有的插件。
 当你启动RAGFlow时，你会在日志中看见你的插件被加载了：
 ```
-2025-05-15 19:29:08,959 INFO     34670 Recursively importing plugins from path `/some-path/ragflow/plugin/embedded_plugins`
+2025-05-15 19:29:08,959 INFO     34670 Recursively importing plugins from path `/some-path/ragflow/agent/plugin/embedded_plugins`
 2025-05-15 19:29:08,960 INFO     34670 Loaded llm_tools plugin BadCalculatorPlugin version 1.0.0
 ```
--- a/agent/plugin/init.py
+++ b/agent/plugin/init.py
--- a/agent/plugin/common.py
+++ b/agent/plugin/common.py
--- a/agent/plugin/embedded_plugins/llm_tools/bad_calculator.py
+++ b/agent/plugin/embedded_plugins/llm_tools/bad_calculator.py
@ -1,5 +1,5 @@
 import logging
-from plugin.llm_tool_plugin import LLMToolMetadata, LLMToolPlugin
+from agent.plugin.llm_tool_plugin import LLMToolMetadata, LLMToolPlugin
 class BadCalculatorPlugin(LLMToolPlugin):
--- a/agent/plugin/llm_tool_plugin.py
+++ b/agent/plugin/llm_tool_plugin.py
--- a/agent/plugin/plugin_manager.py
+++ b/agent/plugin/plugin_manager.py
--- a/agent/sandbox/.env.example
+++ b/agent/sandbox/.env.example
--- a/agent/sandbox/Makefile
+++ b/agent/sandbox/Makefile
--- a/agent/sandbox/README.md
+++ b/agent/sandbox/README.md
--- a/agent/sandbox/asserts/code_executor_manager.svg
+++ b/agent/sandbox/asserts/code_executor_manager.svg
--- a/agent/sandbox/client.py
+++ b/agent/sandbox/client.py
@ -0,0 +1,239 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 """
 Sandbox client for agent components.
 This module provides a unified interface for agent components to interact
 with the configured sandbox provider.
 """
 import json
 import logging
 from typing import Dict, Any, Optional
 from api.db.services.system_settings_service import SystemSettingsService
 from agent.sandbox.providers import ProviderManager
 from agent.sandbox.providers.base import ExecutionResult
 logger = logging.getLogger(__name__)
 # Global provider manager instance
 _provider_manager: Optional[ProviderManager] = None
 def get_provider_manager() -> ProviderManager:
    """
    Get the global provider manager instance.
    Returns:
        ProviderManager instance with active provider loaded
    """
    global _provider_manager
    if _provider_manager is not None:
        return _provider_manager
    # Initialize provider manager with system settings
    _provider_manager = ProviderManager()
    _load_provider_from_settings()
    return _provider_manager
 def _load_provider_from_settings() -> None:
    """
    Load sandbox provider from system settings and configure the provider manager.
    This function reads the system settings to determine which provider is active
    and initializes it with the appropriate configuration.
    """
    global _provider_manager
    if _provider_manager is None:
        return
    try:
        # Get active provider type
        provider_type_settings = SystemSettingsService.get_by_name("sandbox.provider_type")
        if not provider_type_settings:
            raise RuntimeError(
                "Sandbox provider type not configured. Please set 'sandbox.provider_type' in system settings."
            )
        provider_type = provider_type_settings[0].value
        # Get provider configuration
        provider_config_settings = SystemSettingsService.get_by_name(f"sandbox.{provider_type}")
        if not provider_config_settings:
            logger.warning(f"No configuration found for provider: {provider_type}")
            config = {}
        else:
            try:
                config = json.loads(provider_config_settings[0].value)
            except json.JSONDecodeError as e:
                logger.error(f"Failed to parse sandbox config for {provider_type}: {e}")
                config = {}
        # Import and instantiate the provider
        from agent.sandbox.providers import (
            SelfManagedProvider,
            AliyunCodeInterpreterProvider,
            E2BProvider,
        )
        provider_classes = {
            "self_managed": SelfManagedProvider,
            "aliyun_codeinterpreter": AliyunCodeInterpreterProvider,
            "e2b": E2BProvider,
        }
        if provider_type not in provider_classes:
            logger.error(f"Unknown provider type: {provider_type}")
            return
        provider_class = provider_classes[provider_type]
        provider = provider_class()
        # Initialize the provider
        if not provider.initialize(config):
            logger.error(f"Failed to initialize sandbox provider: {provider_type}. Config keys: {list(config.keys())}")
            return
        # Set the active provider
        _provider_manager.set_provider(provider_type, provider)
        logger.info(f"Sandbox provider '{provider_type}' initialized successfully")
    except Exception as e:
        logger.error(f"Failed to load sandbox provider from settings: {e}")
        import traceback
        traceback.print_exc()
 def reload_provider() -> None:
    """
    Reload the sandbox provider from system settings.
    Use this function when sandbox settings have been updated.
    """
    global _provider_manager
    _provider_manager = None
    _load_provider_from_settings()
 def execute_code(
    code: str,
    language: str = "python",
    timeout: int = 30,
    arguments: Optional[Dict[str, Any]] = None
 ) -> ExecutionResult:
    """
    Execute code in the configured sandbox.
    This is the main entry point for agent components to execute code.
    Args:
        code: Source code to execute
        language: Programming language (python, nodejs, javascript)
        timeout: Maximum execution time in seconds
        arguments: Optional arguments dict to pass to main() function
    Returns:
        ExecutionResult containing stdout, stderr, exit_code, and metadata
    Raises:
        RuntimeError: If no provider is configured or execution fails
    """
    provider_manager = get_provider_manager()
    if not provider_manager.is_configured():
        raise RuntimeError(
            "No sandbox provider configured. Please configure sandbox settings in the admin panel."
        )
    provider = provider_manager.get_provider()
    # Create a sandbox instance
    instance = provider.create_instance(template=language)
    try:
        # Execute the code
        result = provider.execute_code(
            instance_id=instance.instance_id,
            code=code,
            language=language,
            timeout=timeout,
            arguments=arguments
        )
        return result
    finally:
        # Clean up the instance
        try:
            provider.destroy_instance(instance.instance_id)
        except Exception as e:
            logger.warning(f"Failed to destroy sandbox instance {instance.instance_id}: {e}")
 def health_check() -> bool:
    """
    Check if the sandbox provider is healthy.
    Returns:
        True if provider is configured and healthy, False otherwise
    """
    try:
        provider_manager = get_provider_manager()
        if not provider_manager.is_configured():
            return False
        provider = provider_manager.get_provider()
        return provider.health_check()
    except Exception as e:
        logger.error(f"Sandbox health check failed: {e}")
        return False
 def get_provider_info() -> Dict[str, Any]:
    """
    Get information about the current sandbox provider.
    Returns:
        Dictionary with provider information:
        - provider_type: Type of the active provider
        - configured: Whether provider is configured
        - healthy: Whether provider is healthy
    """
    try:
        provider_manager = get_provider_manager()
        return {
            "provider_type": provider_manager.get_provider_name(),
            "configured": provider_manager.is_configured(),
            "healthy": health_check(),
        }
    except Exception as e:
        logger.error(f"Failed to get provider info: {e}")
        return {
            "provider_type": None,
            "configured": False,
            "healthy": False,
        }
--- a/agent/sandbox/docker-compose.yml
+++ b/agent/sandbox/docker-compose.yml
--- a/agent/sandbox/executor_manager/Dockerfile
+++ b/agent/sandbox/executor_manager/Dockerfile
@ -0,0 +1,37 @@
 FROM python:3.11-slim-bookworm
 RUN grep -rl 'deb.debian.org' /etc/apt/ | xargs sed -i 's|http[s]*://deb.debian.org|https://mirrors.tuna.tsinghua.edu.cn|g' && \
    apt-get update && \
    apt-get install -y curl gcc && \
    rm -rf /var/lib/apt/lists/*
 ARG TARGETARCH
 ARG TARGETVARIANT
 RUN set -eux; \
    case "${TARGETARCH}${TARGETVARIANT}" in \
        amd64) DOCKER_ARCH=x86_64 ;; \
        arm64) DOCKER_ARCH=aarch64 ;; \
        armv7) DOCKER_ARCH=armhf ;; \
        armv6) DOCKER_ARCH=armel ;; \
        arm64v8) DOCKER_ARCH=aarch64 ;; \
        arm64v7) DOCKER_ARCH=armhf ;; \
        arm*) DOCKER_ARCH=armhf ;; \
        ppc64le) DOCKER_ARCH=ppc64le ;; \
        s390x) DOCKER_ARCH=s390x ;; \
        *) echo "Unsupported architecture: ${TARGETARCH}${TARGETVARIANT}" && exit 1 ;; \
    esac; \
    echo "Downloading Docker for architecture: ${DOCKER_ARCH}"; \
    curl -fsSL "https://download.docker.com/linux/static/stable/${DOCKER_ARCH}/docker-29.1.0.tgz" | \
    tar xz -C /usr/local/bin --strip-components=1 docker/docker; \
    ln -sf /usr/local/bin/docker /usr/bin/docker
 COPY --from=ghcr.io/astral-sh/uv:0.7.5 /uv /uvx /bin/
 ENV UV_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
 WORKDIR /app
 COPY . .
 RUN uv pip install --system -r requirements.txt
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "9385"]
--- a/agent/sandbox/executor_manager/api/init.py
+++ b/agent/sandbox/executor_manager/api/init.py
--- a/agent/sandbox/executor_manager/api/handlers.py
+++ b/agent/sandbox/executor_manager/api/handlers.py
--- a/agent/sandbox/executor_manager/api/routes.py
+++ b/agent/sandbox/executor_manager/api/routes.py
--- a/agent/sandbox/executor_manager/core/init.py
+++ b/agent/sandbox/executor_manager/core/init.py
--- a/agent/sandbox/executor_manager/core/config.py
+++ b/agent/sandbox/executor_manager/core/config.py
--- a/agent/sandbox/executor_manager/core/container.py
+++ b/agent/sandbox/executor_manager/core/container.py
--- a/agent/sandbox/executor_manager/core/logger.py
+++ b/agent/sandbox/executor_manager/core/logger.py
--- a/agent/sandbox/executor_manager/main.py
+++ b/agent/sandbox/executor_manager/main.py
--- a/agent/sandbox/executor_manager/models/init.py
+++ b/agent/sandbox/executor_manager/models/init.py
--- a/agent/sandbox/executor_manager/models/enums.py
+++ b/agent/sandbox/executor_manager/models/enums.py
--- a/agent/sandbox/executor_manager/models/schemas.py
+++ b/agent/sandbox/executor_manager/models/schemas.py
--- a/agent/sandbox/executor_manager/requirements.txt
+++ b/agent/sandbox/executor_manager/requirements.txt
--- a/agent/sandbox/executor_manager/seccomp-profile-default.json
+++ b/agent/sandbox/executor_manager/seccomp-profile-default.json
--- a/agent/sandbox/executor_manager/services/init.py
+++ b/agent/sandbox/executor_manager/services/init.py
--- a/agent/sandbox/executor_manager/services/execution.py
+++ b/agent/sandbox/executor_manager/services/execution.py
--- a/agent/sandbox/executor_manager/services/limiter.py
+++ b/agent/sandbox/executor_manager/services/limiter.py
--- a/agent/sandbox/executor_manager/services/security.py
+++ b/agent/sandbox/executor_manager/services/security.py
--- a/agent/sandbox/executor_manager/util.py
+++ b/agent/sandbox/executor_manager/util.py
--- a/agent/sandbox/executor_manager/utils/init.py
+++ b/agent/sandbox/executor_manager/utils/init.py
--- a/agent/sandbox/executor_manager/utils/common.py
+++ b/agent/sandbox/executor_manager/utils/common.py
--- a/agent/sandbox/providers/init.py
+++ b/agent/sandbox/providers/init.py
@ -0,0 +1,43 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 """
 Sandbox providers package.
 This package contains:
 - base.py: Base interface for all sandbox providers
 - manager.py: Provider manager for managing active provider
 - self_managed.py: Self-managed provider implementation (wraps existing executor_manager)
 - aliyun_codeinterpreter.py: Aliyun Code Interpreter provider implementation
  Official Documentation: https://help.aliyun.com/zh/functioncompute/fc/sandbox-sandbox-code-interepreter
 - e2b.py: E2B provider implementation
 """
 from .base import SandboxProvider, SandboxInstance, ExecutionResult
 from .manager import ProviderManager
 from .self_managed import SelfManagedProvider
 from .aliyun_codeinterpreter import AliyunCodeInterpreterProvider
 from .e2b import E2BProvider
 __all__ = [
    "SandboxProvider",
    "SandboxInstance",
    "ExecutionResult",
    "ProviderManager",
    "SelfManagedProvider",
    "AliyunCodeInterpreterProvider",
    "E2BProvider",
 ]
--- a/agent/sandbox/providers/aliyun_codeinterpreter.py
+++ b/agent/sandbox/providers/aliyun_codeinterpreter.py
@ -0,0 +1,512 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 """
 Aliyun Code Interpreter provider implementation.
 This provider integrates with Aliyun Function Compute Code Interpreter service
 for secure code execution in serverless microVMs using the official agentrun-sdk.
 Official Documentation: https://help.aliyun.com/zh/functioncompute/fc/sandbox-sandbox-code-interepreter
 Official SDK: https://github.com/Serverless-Devs/agentrun-sdk-python
 https://api.aliyun.com/api/AgentRun/2025-09-10/CreateTemplate?lang=PYTHON
 https://api.aliyun.com/api/AgentRun/2025-09-10/CreateSandbox?lang=PYTHON
 """
 import logging
 import os
 import time
 from typing import Dict, Any, List, Optional
 from datetime import datetime, timezone
 from agentrun.sandbox import TemplateType, CodeLanguage, Template, TemplateInput, Sandbox
 from agentrun.utils.config import Config
 from agentrun.utils.exception import ServerError
 from .base import SandboxProvider, SandboxInstance, ExecutionResult
 logger = logging.getLogger(__name__)
 class AliyunCodeInterpreterProvider(SandboxProvider):
    """
    Aliyun Code Interpreter provider implementation.
    This provider uses the official agentrun-sdk to interact with
    Aliyun Function Compute's Code Interpreter service.
    """
    def __init__(self):
        self.access_key_id: Optional[str] = None
        self.access_key_secret: Optional[str] = None
        self.account_id: Optional[str] = None
        self.region: str = "cn-hangzhou"
        self.template_name: str = ""
        self.timeout: int = 30
        self._initialized: bool = False
        self._config: Optional[Config] = None
    def initialize(self, config: Dict[str, Any]) -> bool:
        """
        Initialize the provider with Aliyun credentials.
        Args:
            config: Configuration dictionary with keys:
                - access_key_id: Aliyun AccessKey ID
                - access_key_secret: Aliyun AccessKey Secret
                - account_id: Aliyun primary account ID (主账号ID)
                - region: Region (default: "cn-hangzhou")
                - template_name: Optional sandbox template name
                - timeout: Request timeout in seconds (default: 30, max 30)
        Returns:
            True if initialization successful, False otherwise
        """
        # Get values from config or environment variables
        access_key_id = config.get("access_key_id") or os.getenv("AGENTRUN_ACCESS_KEY_ID")
        access_key_secret = config.get("access_key_secret") or os.getenv("AGENTRUN_ACCESS_KEY_SECRET")
        account_id = config.get("account_id") or os.getenv("AGENTRUN_ACCOUNT_ID")
        region = config.get("region") or os.getenv("AGENTRUN_REGION", "cn-hangzhou")
        self.access_key_id = access_key_id
        self.access_key_secret = access_key_secret
        self.account_id = account_id
        self.region = region
        self.template_name = config.get("template_name", "")
        self.timeout = min(config.get("timeout", 30), 30)  # Max 30 seconds
        logger.info(f"Aliyun Code Interpreter: Initializing with account_id={self.account_id}, region={self.region}")
        # Validate required fields
        if not self.access_key_id or not self.access_key_secret:
            logger.error("Aliyun Code Interpreter: Missing access_key_id or access_key_secret")
            return False
        if not self.account_id:
            logger.error("Aliyun Code Interpreter: Missing account_id (主账号ID)")
            return False
        # Create SDK configuration
        try:
            logger.info(f"Aliyun Code Interpreter: Creating Config object with account_id={self.account_id}")
            self._config = Config(
                access_key_id=self.access_key_id,
                access_key_secret=self.access_key_secret,
                account_id=self.account_id,
                region_id=self.region,
                timeout=self.timeout,
            )
            logger.info("Aliyun Code Interpreter: Config object created successfully")
            # Verify connection with health check
            if not self.health_check():
                logger.error(f"Aliyun Code Interpreter: Health check failed for region {self.region}")
                return False
            self._initialized = True
            logger.info(f"Aliyun Code Interpreter: Initialized successfully for region {self.region}")
            return True
        except Exception as e:
            logger.error(f"Aliyun Code Interpreter: Initialization failed - {str(e)}")
            return False
    def create_instance(self, template: str = "python") -> SandboxInstance:
        """
        Create a new sandbox instance in Aliyun Code Interpreter.
        Args:
            template: Programming language (python, javascript)
        Returns:
            SandboxInstance object
        Raises:
            RuntimeError: If instance creation fails
        """
        if not self._initialized or not self._config:
            raise RuntimeError("Provider not initialized. Call initialize() first.")
        # Normalize language
        language = self._normalize_language(template)
        try:
            # Get or create template
            from agentrun.sandbox import Sandbox
            if self.template_name:
                # Use existing template
                template_name = self.template_name
            else:
                # Try to get default template, or create one if it doesn't exist
                default_template_name = f"ragflow-{language}-default"
                try:
                    # Check if template exists
                    Template.get_by_name(default_template_name, config=self._config)
                    template_name = default_template_name
                except Exception:
                    # Create default template if it doesn't exist
                    template_input = TemplateInput(
                        template_name=default_template_name,
                        template_type=TemplateType.CODE_INTERPRETER,
                    )
                    Template.create(template_input, config=self._config)
                    template_name = default_template_name
            # Create sandbox directly
            sandbox = Sandbox.create(
                template_type=TemplateType.CODE_INTERPRETER,
                template_name=template_name,
                sandbox_idle_timeout_seconds=self.timeout,
                config=self._config,
            )
            instance_id = sandbox.sandbox_id
            return SandboxInstance(
                instance_id=instance_id,
                provider="aliyun_codeinterpreter",
                status="READY",
                metadata={
                    "language": language,
                    "region": self.region,
                    "account_id": self.account_id,
                    "template_name": template_name,
                    "created_at": datetime.now(timezone.utc).isoformat(),
                },
            )
        except ServerError as e:
            raise RuntimeError(f"Failed to create sandbox instance: {str(e)}")
        except Exception as e:
            raise RuntimeError(f"Unexpected error creating instance: {str(e)}")
    def execute_code(self, instance_id: str, code: str, language: str, timeout: int = 10, arguments: Optional[Dict[str, Any]] = None) -> ExecutionResult:
        """
        Execute code in the Aliyun Code Interpreter instance.
        Args:
            instance_id: ID of the sandbox instance
            code: Source code to execute
            language: Programming language (python, javascript)
            timeout: Maximum execution time in seconds (max 30)
            arguments: Optional arguments dict to pass to main() function
        Returns:
            ExecutionResult containing stdout, stderr, exit_code, and metadata
        Raises:
            RuntimeError: If execution fails
            TimeoutError: If execution exceeds timeout
        """
        if not self._initialized or not self._config:
            raise RuntimeError("Provider not initialized. Call initialize() first.")
        # Normalize language
        normalized_lang = self._normalize_language(language)
        # Enforce 30-second hard limit
        timeout = min(timeout or self.timeout, 30)
        try:
            # Connect to existing sandbox instance
            sandbox = Sandbox.connect(sandbox_id=instance_id, config=self._config)
            # Convert language string to CodeLanguage enum
            code_language = CodeLanguage.PYTHON if normalized_lang == "python" else CodeLanguage.JAVASCRIPT
            # Wrap code to call main() function
            # Matches self_managed provider behavior: call main(**arguments)
            if normalized_lang == "python":
                # Build arguments string for main() call
                if arguments:
                    import json as json_module
                    args_json = json_module.dumps(arguments)
                    wrapped_code = f'''{code}
 if __name__ == "__main__":
    import json
    result = main(**{args_json})
    print(json.dumps(result) if isinstance(result, dict) else result)
 '''
                else:
                    wrapped_code = f'''{code}
 if __name__ == "__main__":
    import json
    result = main()
    print(json.dumps(result) if isinstance(result, dict) else result)
 '''
            else:  # javascript
                if arguments:
                    import json as json_module
                    args_json = json_module.dumps(arguments)
                    wrapped_code = f'''{code}
 // Call main and output result
 const result = main({args_json});
 console.log(typeof result === 'object' ? JSON.stringify(result) : String(result));
 '''
                else:
                    wrapped_code = f'''{code}
 // Call main and output result
 const result = main();
 console.log(typeof result === 'object' ? JSON.stringify(result) : String(result));
 '''
            logger.debug(f"Aliyun Code Interpreter: Wrapped code (first 200 chars): {wrapped_code[:200]}")
            start_time = time.time()
            # Execute code using SDK's simplified execute endpoint
            logger.info(f"Aliyun Code Interpreter: Executing code (language={normalized_lang}, timeout={timeout})")
            logger.debug(f"Aliyun Code Interpreter: Original code (first 200 chars): {code[:200]}")
            result = sandbox.context.execute(
                code=wrapped_code,
                language=code_language,
                timeout=timeout,
            )
            execution_time = time.time() - start_time
            logger.info(f"Aliyun Code Interpreter: Execution completed in {execution_time:.2f}s")
            logger.debug(f"Aliyun Code Interpreter: Raw SDK result: {result}")
            # Parse execution result
            results = result.get("results", []) if isinstance(result, dict) else []
            logger.info(f"Aliyun Code Interpreter: Parsed {len(results)} result items")
            # Extract stdout and stderr from results
            stdout_parts = []
            stderr_parts = []
            exit_code = 0
            execution_status = "ok"
            for item in results:
                result_type = item.get("type", "")
                text = item.get("text", "")
                if result_type == "stdout":
                    stdout_parts.append(text)
                elif result_type == "stderr":
                    stderr_parts.append(text)
                    exit_code = 1  # Error occurred
                elif result_type == "endOfExecution":
                    execution_status = item.get("status", "ok")
                    if execution_status != "ok":
                        exit_code = 1
                elif result_type == "error":
                    stderr_parts.append(text)
                    exit_code = 1
            stdout = "\n".join(stdout_parts)
            stderr = "\n".join(stderr_parts)
            logger.info(f"Aliyun Code Interpreter: stdout length={len(stdout)}, stderr length={len(stderr)}, exit_code={exit_code}")
            if stdout:
                logger.debug(f"Aliyun Code Interpreter: stdout (first 200 chars): {stdout[:200]}")
            if stderr:
                logger.debug(f"Aliyun Code Interpreter: stderr (first 200 chars): {stderr[:200]}")
            return ExecutionResult(
                stdout=stdout,
                stderr=stderr,
                exit_code=exit_code,
                execution_time=execution_time,
                metadata={
                    "instance_id": instance_id,
                    "language": normalized_lang,
                    "context_id": result.get("contextId") if isinstance(result, dict) else None,
                    "timeout": timeout,
                },
            )
        except ServerError as e:
            if "timeout" in str(e).lower():
                raise TimeoutError(f"Execution timed out after {timeout} seconds")
            raise RuntimeError(f"Failed to execute code: {str(e)}")
        except Exception as e:
            raise RuntimeError(f"Unexpected error during execution: {str(e)}")
    def destroy_instance(self, instance_id: str) -> bool:
        """
        Destroy an Aliyun Code Interpreter instance.
        Args:
            instance_id: ID of the instance to destroy
        Returns:
            True if destruction successful, False otherwise
        """
        if not self._initialized or not self._config:
            raise RuntimeError("Provider not initialized. Call initialize() first.")
        try:
            # Delete sandbox by ID directly
            Sandbox.delete_by_id(sandbox_id=instance_id)
            logger.info(f"Successfully destroyed sandbox instance {instance_id}")
            return True
        except ServerError as e:
            logger.error(f"Failed to destroy instance {instance_id}: {str(e)}")
            return False
        except Exception as e:
            logger.error(f"Unexpected error destroying instance {instance_id}: {str(e)}")
            return False
    def health_check(self) -> bool:
        """
        Check if the Aliyun Code Interpreter service is accessible.
        Returns:
            True if provider is healthy, False otherwise
        """
        if not self._initialized and not (self.access_key_id and self.account_id):
            return False
        try:
            # Try to list templates to verify connection
            from agentrun.sandbox import Template
            templates = Template.list(config=self._config)
            return templates is not None
        except Exception as e:
            logger.warning(f"Aliyun Code Interpreter health check failed: {str(e)}")
            # If we get any response (even an error), the service is reachable
            return "connection" not in str(e).lower()
    def get_supported_languages(self) -> List[str]:
        """
        Get list of supported programming languages.
        Returns:
            List of language identifiers
        """
        return ["python", "javascript"]
    @staticmethod
    def get_config_schema() -> Dict[str, Dict]:
        """
        Return configuration schema for Aliyun Code Interpreter provider.
        Returns:
            Dictionary mapping field names to their schema definitions
        """
        return {
            "access_key_id": {
                "type": "string",
                "required": True,
                "label": "Access Key ID",
                "placeholder": "LTAI5t...",
                "description": "Aliyun AccessKey ID for authentication",
                "secret": False,
            },
            "access_key_secret": {
                "type": "string",
                "required": True,
                "label": "Access Key Secret",
                "placeholder": "••••••••••••••••",
                "description": "Aliyun AccessKey Secret for authentication",
                "secret": True,
            },
            "account_id": {
                "type": "string",
                "required": True,
                "label": "Account ID",
                "placeholder": "1234567890...",
                "description": "Aliyun primary account ID (主账号ID), required for API calls",
            },
            "region": {
                "type": "string",
                "required": False,
                "label": "Region",
                "default": "cn-hangzhou",
                "description": "Aliyun region for Code Interpreter service",
                "options": ["cn-hangzhou", "cn-beijing", "cn-shanghai", "cn-shenzhen", "cn-guangzhou"],
            },
            "template_name": {
                "type": "string",
                "required": False,
                "label": "Template Name",
                "placeholder": "my-interpreter",
                "description": "Optional sandbox template name for pre-configured environments",
            },
            "timeout": {
                "type": "integer",
                "required": False,
                "label": "Execution Timeout (seconds)",
                "default": 30,
                "min": 1,
                "max": 30,
                "description": "Code execution timeout (max 30 seconds - hard limit)",
            },
        }
    def validate_config(self, config: Dict[str, Any]) -> tuple[bool, Optional[str]]:
        """
        Validate Aliyun-specific configuration.
        Args:
            config: Configuration dictionary to validate
        Returns:
            Tuple of (is_valid, error_message)
        """
        # Validate access key format
        access_key_id = config.get("access_key_id", "")
        if access_key_id and not access_key_id.startswith("LTAI"):
            return False, "Invalid AccessKey ID format (should start with 'LTAI')"
        # Validate account ID
        account_id = config.get("account_id", "")
        if not account_id:
            return False, "Account ID is required"
        # Validate region
        valid_regions = ["cn-hangzhou", "cn-beijing", "cn-shanghai", "cn-shenzhen", "cn-guangzhou"]
        region = config.get("region", "cn-hangzhou")
        if region and region not in valid_regions:
            return False, f"Invalid region. Must be one of: {', '.join(valid_regions)}"
        # Validate timeout range (max 30 seconds)
        timeout = config.get("timeout", 30)
        if isinstance(timeout, int) and (timeout < 1 or timeout > 30):
            return False, "Timeout must be between 1 and 30 seconds"
        return True, None
    def _normalize_language(self, language: str) -> str:
        """
        Normalize language identifier to Aliyun format.
        Args:
            language: Language identifier (python, python3, javascript, nodejs)
        Returns:
            Normalized language identifier
        """
        if not language:
            return "python"
        lang_lower = language.lower()
        if lang_lower in ("python", "python3"):
            return "python"
        elif lang_lower in ("javascript", "nodejs"):
            return "javascript"
        else:
            return language
--- a/agent/sandbox/providers/base.py
+++ b/agent/sandbox/providers/base.py
@ -0,0 +1,212 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 """
 Base interface for sandbox providers.
 Each sandbox provider (self-managed, SaaS) implements this interface
 to provide code execution capabilities.
 """
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Dict, Any, Optional, List
@dataclass
 class SandboxInstance:
    """Represents a sandbox execution instance"""
    instance_id: str
    provider: str
    status: str  # running, stopped, error
    metadata: Dict[str, Any]
    def __post_init__(self):
        if self.metadata is None:
            self.metadata = {}
@dataclass
 class ExecutionResult:
    """Result of code execution in a sandbox"""
    stdout: str
    stderr: str
    exit_code: int
    execution_time: float  # in seconds
    metadata: Dict[str, Any]
    def __post_init__(self):
        if self.metadata is None:
            self.metadata = {}
 class SandboxProvider(ABC):
    """
    Base interface for all sandbox providers.
    Each provider implementation (self-managed, Aliyun OpenSandbox, E2B, etc.)
    must implement these methods to provide code execution capabilities.
    """
    @abstractmethod
    def initialize(self, config: Dict[str, Any]) -> bool:
        """
        Initialize the provider with configuration.
        Args:
            config: Provider-specific configuration dictionary
        Returns:
            True if initialization successful, False otherwise
        """
        pass
    @abstractmethod
    def create_instance(self, template: str = "python") -> SandboxInstance:
        """
        Create a new sandbox instance.
        Args:
            template: Programming language/template for the instance
                     (e.g., "python", "nodejs", "bash")
        Returns:
            SandboxInstance object representing the created instance
        Raises:
            RuntimeError: If instance creation fails
        """
        pass
    @abstractmethod
    def execute_code(
        self,
        instance_id: str,
        code: str,
        language: str,
        timeout: int = 10,
        arguments: Optional[Dict[str, Any]] = None
    ) -> ExecutionResult:
        """
        Execute code in a sandbox instance.
        Args:
            instance_id: ID of the sandbox instance
            code: Source code to execute
            language: Programming language (python, javascript, etc.)
            timeout: Maximum execution time in seconds
            arguments: Optional arguments dict to pass to main() function
        Returns:
            ExecutionResult containing stdout, stderr, exit_code, and metadata
        Raises:
            RuntimeError: If execution fails
            TimeoutError: If execution exceeds timeout
        """
        pass
    @abstractmethod
    def destroy_instance(self, instance_id: str) -> bool:
        """
        Destroy a sandbox instance.
        Args:
            instance_id: ID of the instance to destroy
        Returns:
            True if destruction successful, False otherwise
        Raises:
            RuntimeError: If destruction fails
        """
        pass
    @abstractmethod
    def health_check(self) -> bool:
        """
        Check if the provider is healthy and accessible.
        Returns:
            True if provider is healthy, False otherwise
        """
        pass
    @abstractmethod
    def get_supported_languages(self) -> List[str]:
        """
        Get list of supported programming languages.
        Returns:
            List of language identifiers (e.g., ["python", "javascript", "go"])
        """
        pass
    @staticmethod
    def get_config_schema() -> Dict[str, Dict]:
        """
        Return configuration schema for this provider.
        The schema defines what configuration fields are required/optional,
        their types, validation rules, and UI labels.
        Returns:
            Dictionary mapping field names to their schema definitions.
        Example:
            {
                "endpoint": {
                    "type": "string",
                    "required": True,
                    "label": "API Endpoint",
                    "placeholder": "http://localhost:9385"
                },
                "timeout": {
                    "type": "integer",
                    "default": 30,
                    "label": "Timeout (seconds)",
                    "min": 5,
                    "max": 300
                }
            }
        """
        return {}
    def validate_config(self, config: Dict[str, Any]) -> tuple[bool, Optional[str]]:
        """
        Validate provider-specific configuration.
        This method allows providers to implement custom validation logic beyond
        the basic schema validation. Override this method to add provider-specific
        checks like URL format validation, API key format validation, etc.
        Args:
            config: Configuration dictionary to validate
        Returns:
            Tuple of (is_valid, error_message):
                - is_valid: True if configuration is valid, False otherwise
                - error_message: Error message if invalid, None if valid
        Example:
            >>> def validate_config(self, config):
            >>>     endpoint = config.get("endpoint", "")
            >>>     if not endpoint.startswith(("http://", "https://")):
            >>>         return False, "Endpoint must start with http:// or https://"
            >>>     return True, None
        """
        # Default implementation: no custom validation
        return True, None
--- a/agent/sandbox/providers/e2b.py
+++ b/agent/sandbox/providers/e2b.py
@ -0,0 +1,233 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 """
 E2B provider implementation.
 This provider integrates with E2B Cloud for cloud-based code execution
 using Firecracker microVMs.
 """
 import uuid
 from typing import Dict, Any, List
 from .base import SandboxProvider, SandboxInstance, ExecutionResult
 class E2BProvider(SandboxProvider):
    """
    E2B provider implementation.
    This provider uses E2B Cloud service for secure code execution
    in Firecracker microVMs.
    """
    def __init__(self):
        self.api_key: str = ""
        self.region: str = "us"
        self.timeout: int = 30
        self._initialized: bool = False
    def initialize(self, config: Dict[str, Any]) -> bool:
        """
        Initialize the provider with E2B credentials.
        Args:
            config: Configuration dictionary with keys:
                - api_key: E2B API key
                - region: Region (us, eu) (default: "us")
                - timeout: Request timeout in seconds (default: 30)
        Returns:
            True if initialization successful, False otherwise
        """
        self.api_key = config.get("api_key", "")
        self.region = config.get("region", "us")
        self.timeout = config.get("timeout", 30)
        # Validate required fields
        if not self.api_key:
            return False
        # TODO: Implement actual E2B API client initialization
        # For now, we'll mark as initialized but actual API calls will fail
        self._initialized = True
        return True
    def create_instance(self, template: str = "python") -> SandboxInstance:
        """
        Create a new sandbox instance in E2B.
        Args:
            template: Programming language template (python, nodejs, go, bash)
        Returns:
            SandboxInstance object
        Raises:
            RuntimeError: If instance creation fails
        """
        if not self._initialized:
            raise RuntimeError("Provider not initialized. Call initialize() first.")
        # Normalize language
        language = self._normalize_language(template)
        # TODO: Implement actual E2B API call
        # POST /sandbox with template
        instance_id = str(uuid.uuid4())
        return SandboxInstance(
            instance_id=instance_id,
            provider="e2b",
            status="running",
            metadata={
                "language": language,
                "region": self.region,
            }
        )
    def execute_code(
        self,
        instance_id: str,
        code: str,
        language: str,
        timeout: int = 10
    ) -> ExecutionResult:
        """
        Execute code in the E2B instance.
        Args:
            instance_id: ID of the sandbox instance
            code: Source code to execute
            language: Programming language (python, nodejs, go, bash)
            timeout: Maximum execution time in seconds
        Returns:
            ExecutionResult containing stdout, stderr, exit_code, and metadata
        Raises:
            RuntimeError: If execution fails
            TimeoutError: If execution exceeds timeout
        """
        if not self._initialized:
            raise RuntimeError("Provider not initialized. Call initialize() first.")
        # TODO: Implement actual E2B API call
        # POST /sandbox/{sandboxID}/execute
        raise RuntimeError(
            "E2B provider is not yet fully implemented. "
            "Please use the self-managed provider or implement the E2B API integration. "
            "See https://github.com/e2b-dev/e2b for API documentation."
        )
    def destroy_instance(self, instance_id: str) -> bool:
        """
        Destroy an E2B instance.
        Args:
            instance_id: ID of the instance to destroy
        Returns:
            True if destruction successful, False otherwise
        """
        if not self._initialized:
            raise RuntimeError("Provider not initialized. Call initialize() first.")
        # TODO: Implement actual E2B API call
        # DELETE /sandbox/{sandboxID}
        return True
    def health_check(self) -> bool:
        """
        Check if the E2B service is accessible.
        Returns:
            True if provider is healthy, False otherwise
        """
        if not self._initialized:
            return False
        # TODO: Implement actual E2B health check API call
        # GET /healthz or similar
        # For now, return True if initialized with API key
        return bool(self.api_key)
    def get_supported_languages(self) -> List[str]:
        """
        Get list of supported programming languages.
        Returns:
            List of language identifiers
        """
        return ["python", "nodejs", "javascript", "go", "bash"]
    @staticmethod
    def get_config_schema() -> Dict[str, Dict]:
        """
        Return configuration schema for E2B provider.
        Returns:
            Dictionary mapping field names to their schema definitions
        """
        return {
            "api_key": {
                "type": "string",
                "required": True,
                "label": "API Key",
                "placeholder": "e2b_sk_...",
                "description": "E2B API key for authentication",
                "secret": True,
            },
            "region": {
                "type": "string",
                "required": False,
                "label": "Region",
                "default": "us",
                "description": "E2B service region (us or eu)",
            },
            "timeout": {
                "type": "integer",
                "required": False,
                "label": "Request Timeout (seconds)",
                "default": 30,
                "min": 5,
                "max": 300,
                "description": "API request timeout for code execution",
            }
        }
    def _normalize_language(self, language: str) -> str:
        """
        Normalize language identifier to E2B template format.
        Args:
            language: Language identifier
        Returns:
            Normalized language identifier
        """
        if not language:
            return "python"
        lang_lower = language.lower()
        if lang_lower in ("python", "python3"):
            return "python"
        elif lang_lower in ("javascript", "nodejs"):
            return "nodejs"
        else:
            return language
--- a/agent/sandbox/providers/manager.py
+++ b/agent/sandbox/providers/manager.py
@ -0,0 +1,78 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 """
 Provider manager for sandbox providers.
 Since sandbox configuration is global (system-level), we only use one
 active provider at a time. This manager is a thin wrapper that holds a reference
 to the currently active provider.
 """
 from typing import Optional
 from .base import SandboxProvider
 class ProviderManager:
    """
    Manages the currently active sandbox provider.
    With global configuration, there's only one active provider at a time.
    This manager simply holds a reference to that provider.
    """
    def __init__(self):
        """Initialize an empty provider manager."""
        self.current_provider: Optional[SandboxProvider] = None
        self.current_provider_name: Optional[str] = None
    def set_provider(self, name: str, provider: SandboxProvider):
        """
        Set the active provider.
        Args:
            name: Provider identifier (e.g., "self_managed", "e2b")
            provider: Provider instance
        """
        self.current_provider = provider
        self.current_provider_name = name
    def get_provider(self) -> Optional[SandboxProvider]:
        """
        Get the active provider.
        Returns:
            Currently active SandboxProvider instance, or None if not set
        """
        return self.current_provider
    def get_provider_name(self) -> Optional[str]:
        """
        Get the active provider name.
        Returns:
            Provider name (e.g., "self_managed"), or None if not set
        """
        return self.current_provider_name
    def is_configured(self) -> bool:
        """
        Check if a provider is configured.
        Returns:
            True if a provider is set, False otherwise
        """
        return self.current_provider is not None
--- a/agent/sandbox/providers/self_managed.py
+++ b/agent/sandbox/providers/self_managed.py
@ -0,0 +1,359 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 """
 Self-managed sandbox provider implementation.
 This provider wraps the existing executor_manager HTTP API which manages
 a pool of Docker containers with gVisor for secure code execution.
 """
 import base64
 import time
 import uuid
 from typing import Dict, Any, List, Optional
 import requests
 from .base import SandboxProvider, SandboxInstance, ExecutionResult
 class SelfManagedProvider(SandboxProvider):
    """
    Self-managed sandbox provider using Daytona/Docker.
    This provider communicates with the executor_manager HTTP API
    which manages a pool of containers for code execution.
    """
    def __init__(self):
        self.endpoint: str = "http://localhost:9385"
        self.timeout: int = 30
        self.max_retries: int = 3
        self.pool_size: int = 10
        self._initialized: bool = False
    def initialize(self, config: Dict[str, Any]) -> bool:
        """
        Initialize the provider with configuration.
        Args:
            config: Configuration dictionary with keys:
                - endpoint: HTTP endpoint (default: "http://localhost:9385")
                - timeout: Request timeout in seconds (default: 30)
                - max_retries: Maximum retry attempts (default: 3)
                - pool_size: Container pool size for info (default: 10)
        Returns:
            True if initialization successful, False otherwise
        """
        self.endpoint = config.get("endpoint", "http://localhost:9385")
        self.timeout = config.get("timeout", 30)
        self.max_retries = config.get("max_retries", 3)
        self.pool_size = config.get("pool_size", 10)
        # Validate endpoint is accessible
        if not self.health_check():
            # Try to fall back to SANDBOX_HOST from settings if we are using localhost
            if "localhost" in self.endpoint or "127.0.0.1" in self.endpoint:
                try:
                    from api import settings
                    if settings.SANDBOX_HOST and settings.SANDBOX_HOST not in self.endpoint:
                        original_endpoint = self.endpoint
                        self.endpoint = f"http://{settings.SANDBOX_HOST}:9385"
                        if self.health_check():
                            import logging
                            logging.warning(f"Sandbox self_managed: Connected using settings.SANDBOX_HOST fallback: {self.endpoint} (original: {original_endpoint})")
                            self._initialized = True
                            return True
                        else:
                            self.endpoint = original_endpoint # Restore if fallback also fails
                except ImportError:
                    pass
            return False
        self._initialized = True
        return True
    def create_instance(self, template: str = "python") -> SandboxInstance:
        """
        Create a new sandbox instance.
        Note: For self-managed provider, instances are managed internally
        by the executor_manager's container pool. This method returns
        a logical instance handle.
        Args:
            template: Programming language (python, nodejs)
        Returns:
            SandboxInstance object
        Raises:
            RuntimeError: If instance creation fails
        """
        if not self._initialized:
            raise RuntimeError("Provider not initialized. Call initialize() first.")
        # Normalize language
        language = self._normalize_language(template)
        # The executor_manager manages instances internally via container pool
        # We create a logical instance ID for tracking
        instance_id = str(uuid.uuid4())
        return SandboxInstance(
            instance_id=instance_id,
            provider="self_managed",
            status="running",
            metadata={
                "language": language,
                "endpoint": self.endpoint,
                "pool_size": self.pool_size,
            }
        )
    def execute_code(
        self,
        instance_id: str,
        code: str,
        language: str,
        timeout: int = 10,
        arguments: Optional[Dict[str, Any]] = None
    ) -> ExecutionResult:
        """
        Execute code in the sandbox.
        Args:
            instance_id: ID of the sandbox instance (not used for self-managed)
            code: Source code to execute
            language: Programming language (python, nodejs, javascript)
            timeout: Maximum execution time in seconds
            arguments: Optional arguments dict to pass to main() function
        Returns:
            ExecutionResult containing stdout, stderr, exit_code, and metadata
        Raises:
            RuntimeError: If execution fails
            TimeoutError: If execution exceeds timeout
        """
        if not self._initialized:
            raise RuntimeError("Provider not initialized. Call initialize() first.")
        # Normalize language
        normalized_lang = self._normalize_language(language)
        # Prepare request
        code_b64 = base64.b64encode(code.encode("utf-8")).decode("utf-8")
        payload = {
            "code_b64": code_b64,
            "language": normalized_lang,
            "arguments": arguments or {}
        }
        url = f"{self.endpoint}/run"
        exec_timeout = timeout or self.timeout
        start_time = time.time()
        try:
            response = requests.post(
                url,
                json=payload,
                timeout=exec_timeout,
                headers={"Content-Type": "application/json"}
            )
            execution_time = time.time() - start_time
            if response.status_code != 200:
                raise RuntimeError(
                    f"HTTP {response.status_code}: {response.text}"
                )
            result = response.json()
            return ExecutionResult(
                stdout=result.get("stdout", ""),
                stderr=result.get("stderr", ""),
                exit_code=result.get("exit_code", 0),
                execution_time=execution_time,
                metadata={
                    "status": result.get("status"),
                    "time_used_ms": result.get("time_used_ms"),
                    "memory_used_kb": result.get("memory_used_kb"),
                    "detail": result.get("detail"),
                    "instance_id": instance_id,
                }
            )
        except requests.Timeout:
            execution_time = time.time() - start_time
            raise TimeoutError(
                f"Execution timed out after {exec_timeout} seconds"
            )
        except requests.RequestException as e:
            raise RuntimeError(f"HTTP request failed: {str(e)}")
    def destroy_instance(self, instance_id: str) -> bool:
        """
        Destroy a sandbox instance.
        Note: For self-managed provider, instances are returned to the
        internal pool automatically by executor_manager after execution.
        This is a no-op for tracking purposes.
        Args:
            instance_id: ID of the instance to destroy
        Returns:
            True (always succeeds for self-managed)
        """
        # The executor_manager manages container lifecycle internally
        # Container is returned to pool after execution
        return True
    def health_check(self) -> bool:
        """
        Check if the provider is healthy and accessible.
        Returns:
            True if provider is healthy, False otherwise
        """
        try:
            url = f"{self.endpoint}/healthz"
            response = requests.get(url, timeout=5)
            return response.status_code == 200
        except Exception:
            return False
    def get_supported_languages(self) -> List[str]:
        """
        Get list of supported programming languages.
        Returns:
            List of language identifiers
        """
        return ["python", "nodejs", "javascript"]
    @staticmethod
    def get_config_schema() -> Dict[str, Dict]:
        """
        Return configuration schema for self-managed provider.
        Returns:
            Dictionary mapping field names to their schema definitions
        """
        return {
            "endpoint": {
                "type": "string",
                "required": True,
                "label": "Executor Manager Endpoint",
                "placeholder": "http://localhost:9385",
                "default": "http://localhost:9385",
                "description": "HTTP endpoint of the executor_manager service"
            },
            "timeout": {
                "type": "integer",
                "required": False,
                "label": "Request Timeout (seconds)",
                "default": 30,
                "min": 5,
                "max": 300,
                "description": "HTTP request timeout for code execution"
            },
            "max_retries": {
                "type": "integer",
                "required": False,
                "label": "Max Retries",
                "default": 3,
                "min": 0,
                "max": 10,
                "description": "Maximum number of retry attempts for failed requests"
            },
            "pool_size": {
                "type": "integer",
                "required": False,
                "label": "Container Pool Size",
                "default": 10,
                "min": 1,
                "max": 100,
                "description": "Size of the container pool (configured in executor_manager)"
            }
        }
    def _normalize_language(self, language: str) -> str:
        """
        Normalize language identifier to executor_manager format.
        Args:
            language: Language identifier (python, python3, nodejs, javascript)
        Returns:
            Normalized language identifier
        """
        if not language:
            return "python"
        lang_lower = language.lower()
        if lang_lower in ("python", "python3"):
            return "python"
        elif lang_lower in ("javascript", "nodejs"):
            return "nodejs"
        else:
            return language
    def validate_config(self, config: dict) -> tuple[bool, Optional[str]]:
        """
        Validate self-managed provider configuration.
        Performs custom validation beyond the basic schema validation,
        such as checking URL format.
        Args:
            config: Configuration dictionary to validate
        Returns:
            Tuple of (is_valid, error_message)
        """
        # Validate endpoint URL format
        endpoint = config.get("endpoint", "")
        if endpoint:
            # Check if it's a valid HTTP/HTTPS URL or localhost
            import re
            url_pattern = r'^(https?://|http://localhost|http://[\d\.]+:[a-z]+:[/]|http://[\w\.]+:)'
            if not re.match(url_pattern, endpoint):
                return False, f"Invalid endpoint format: {endpoint}. Must start with http:// or https://"
        # Validate pool_size is positive
        pool_size = config.get("pool_size", 10)
        if isinstance(pool_size, int) and pool_size <= 0:
            return False, "Pool size must be greater than 0"
        # Validate timeout is reasonable
        timeout = config.get("timeout", 30)
        if isinstance(timeout, int) and (timeout < 1 or timeout > 600):
            return False, "Timeout must be between 1 and 600 seconds"
        # Validate max_retries
        max_retries = config.get("max_retries", 3)
        if isinstance(max_retries, int) and (max_retries < 0 or max_retries > 10):
            return False, "Max retries must be between 0 and 10"
        return True, None
--- a/agent/sandbox/pyproject.toml
+++ b/agent/sandbox/pyproject.toml
--- a/agent/sandbox/sandbox_base_image/nodejs/Dockerfile
+++ b/agent/sandbox/sandbox_base_image/nodejs/Dockerfile
@ -1,4 +1,4 @@
-FROM node:24-bookworm-slim
+FROM node:24.13-bookworm-slim
 RUN npm config set registry https://registry.npmmirror.com
--- a/agent/sandbox/sandbox_base_image/nodejs/package-lock.json
+++ b/agent/sandbox/sandbox_base_image/nodejs/package-lock.json
--- a/agent/sandbox/sandbox_base_image/nodejs/package.json
+++ b/agent/sandbox/sandbox_base_image/nodejs/package.json
--- a/agent/sandbox/sandbox_base_image/python/Dockerfile
+++ b/agent/sandbox/sandbox_base_image/python/Dockerfile
--- a/agent/sandbox/sandbox_base_image/python/requirements.txt
+++ b/agent/sandbox/sandbox_base_image/python/requirements.txt
--- a/agent/sandbox/scripts/restart.sh
+++ b/agent/sandbox/scripts/restart.sh
--- a/agent/sandbox/scripts/start.sh
+++ b/agent/sandbox/scripts/start.sh
--- a/agent/sandbox/scripts/stop.sh
+++ b/agent/sandbox/scripts/stop.sh
--- a/agent/sandbox/scripts/wait-for-it-http.sh
+++ b/agent/sandbox/scripts/wait-for-it-http.sh
--- a/agent/sandbox/scripts/wait-for-it.sh
+++ b/agent/sandbox/scripts/wait-for-it.sh
--- a/agent/sandbox/tests/MIGRATION_GUIDE.md
+++ b/agent/sandbox/tests/MIGRATION_GUIDE.md
@ -0,0 +1,261 @@
 # Aliyun Code Interpreter Provider - 使用官方 SDK
 ## 重要变更
 ### 官方资源
 - **Code Interpreter API**: https://help.aliyun.com/zh/functioncompute/fc/sandbox-sandbox-code-interepreter
 - **官方 SDK**: https://github.com/Serverless-Devs/agentrun-sdk-python
 - **SDK 文档**: https://docs.agent.run
 ## 使用官方 SDK 的优势
 从手动 HTTP 请求迁移到官方 SDK (`agentrun-sdk`) 有以下优势：
 ### 1. **自动签名认证**
 - SDK 自动处理 Aliyun API 签名（无需手动实现 `Authorization` 头）
 - 支持多种认证方式：AccessKey、STS Token
 - 自动读取环境变量
 ### 2. **简化的 API**
 ```python
 # 旧实现（手动 HTTP 请求）
 response = requests.post(
    f"{DATA_ENDPOINT}/sandboxes/{sandbox_id}/execute",
    headers={"X-Acs-Parent-Id": account_id},
    json={"code": code, "language": "python"}
 )
 # 新实现（使用 SDK）
 sandbox = CodeInterpreterSandbox(template_name="python-sandbox", config=config)
 result = sandbox.context.execute(code="print('hello')")
 ```
 ### 3. **更好的错误处理**
 - 结构化的异常类型 (`ServerError`)
 - 自动重试机制
 - 详细的错误信息
 ## 主要变更
 ### 1. 文件重命名
 | 旧文件名 | 新文件名 | 说明 |
 |---------|---------|------|
 | `aliyun_opensandbox.py` | `aliyun_codeinterpreter.py` | 提供商实现 |
 | `test_aliyun_provider.py` | `test_aliyun_codeinterpreter.py` | 单元测试 |
 | `test_aliyun_integration.py` | `test_aliyun_codeinterpreter_integration.py` | 集成测试 |
 ### 2. 配置字段变更
 #### 旧配置（OpenSandbox）
 ```json
 {
  "access_key_id": "LTAI5t...",
  "access_key_secret": "...",
  "region": "cn-hangzhou",
  "workspace_id": "ws-xxxxx"
 }
 ```
 #### 新配置（Code Interpreter）
 ```json
 {
  "access_key_id": "LTAI5t...",
  "access_key_secret": "...",
  "account_id": "1234567890...",  // 新增：阿里云主账号ID（必需）
  "region": "cn-hangzhou",
  "template_name": "python-sandbox",  // 新增：沙箱模板名称
  "timeout": 30  // 最大 30 秒（硬限制）
 }
 ```
 ### 3. 关键差异
 | 特性 | OpenSandbox | Code Interpreter |
 |------|-------------|-----------------|
 | **API 端点** | `opensandbox.{region}.aliyuncs.com` | `agentrun.{region}.aliyuncs.com` (控制面) |
 | **API 版本** | `2024-01-01` | `2025-09-10` |
 | **认证** | 需要 AccessKey | 需要 AccessKey + 主账号ID |
 | **请求头** | 标准签名 | 需要 `X-Acs-Parent-Id` 头 |
 | **超时限制** | 可配置 | **最大 30 秒**（硬限制） |
 | **上下文** | 不支持 | 支持上下文（Jupyter kernel） |
 ### 4. API 调用方式变更
 #### 旧实现（假设的 OpenSandbox）
 ```python
 # 单一端点
 API_ENDPOINT = "https://opensandbox.cn-hangzhou.aliyuncs.com"
 # 简单的请求/响应
 response = requests.post(
    f"{API_ENDPOINT}/execute",
    json={"code": "print('hello')", "language": "python"}
 )
 ```
 #### 新实现（Code Interpreter）
 ```python
 # 控制面 API - 管理沙箱生命周期
 CONTROL_ENDPOINT = "https://agentrun.cn-hangzhou.aliyuncs.com/2025-09-10"
 # 数据面 API - 执行代码
 DATA_ENDPOINT = "https://{account_id}.agentrun-data.cn-hangzhou.aliyuncs.com"
 # 创建沙箱（控制面）
 response = requests.post(
    f"{CONTROL_ENDPOINT}/sandboxes",
    headers={"X-Acs-Parent-Id": account_id},
    json={"templateName": "python-sandbox"}
 )
 # 执行代码（数据面）
 response = requests.post(
    f"{DATA_ENDPOINT}/sandboxes/{sandbox_id}/execute",
    headers={"X-Acs-Parent-Id": account_id},
    json={"code": "print('hello')", "language": "python", "timeout": 30}
 )
 ```
 ### 5. 迁移步骤
 #### 步骤 1: 更新配置
 如果您之前使用的是 `aliyun_opensandbox`：
 **旧配置**:
 ```json
 {
  "name": "sandbox.provider_type",
  "value": "aliyun_opensandbox"
 }
 ```
 **新配置**:
 ```json
 {
  "name": "sandbox.provider_type",
  "value": "aliyun_codeinterpreter"
 }
 ```
 #### 步骤 2: 添加必需的 account_id
 在 Aliyun 控制台右上角点击头像，获取主账号 ID：
 1. 登录 [阿里云控制台](https://ram.console.aliyun.com/manage/ak)
 2. 点击右上角头像
 3. 复制主账号 ID（16 位数字）
 #### 步骤 3: 更新环境变量
 ```bash
 # 新增必需的环境变量
 export ALIYUN_ACCOUNT_ID="1234567890123456"
 # 其他环境变量保持不变
 export ALIYUN_ACCESS_KEY_ID="LTAI5t..."
 export ALIYUN_ACCESS_KEY_SECRET="..."
 export ALIYUN_REGION="cn-hangzhou"
 ```
 #### 步骤 4: 运行测试
 ```bash
 # 单元测试（不需要真实凭据）
 pytest agent/sandbox/tests/test_aliyun_codeinterpreter.py -v
 # 集成测试（需要真实凭据）
 pytest agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py -v -m integration
 ```
 ## 文件变更清单
 ### ✅ 已完成
 - [x] 创建 `aliyun_codeinterpreter.py` - 新的提供商实现
 - [x] 更新 `sandbox_spec.md` - 规范文档
 - [x] 更新 `admin/services.py` - 服务管理器
 - [x] 更新 `providers/__init__.py` - 包导出
 - [x] 创建 `test_aliyun_codeinterpreter.py` - 单元测试
 - [x] 创建 `test_aliyun_codeinterpreter_integration.py` - 集成测试
 ### 📝 可选清理
 如果您想删除旧的 OpenSandbox 实现：
 ```bash
 # 删除旧文件（可选）
 rm agent/sandbox/providers/aliyun_opensandbox.py
 rm agent/sandbox/tests/test_aliyun_provider.py
 rm agent/sandbox/tests/test_aliyun_integration.py
 ```
 **注意**: 保留旧文件不会影响新功能，只是代码冗余。
 ## API 参考
 ### 控制面 API（沙箱管理）
 | 端点 | 方法 | 说明 |
 |------|------|------|
 | `/sandboxes` | POST | 创建沙箱实例 |
 | `/sandboxes/{id}/stop` | POST | 停止实例 |
 | `/sandboxes/{id}` | DELETE | 删除实例 |
 | `/templates` | GET | 列出模板 |
 ### 数据面 API（代码执行）
 | 端点 | 方法 | 说明 |
 |------|------|------|
 | `/sandboxes/{id}/execute` | POST | 执行代码（简化版） |
 | `/sandboxes/{id}/contexts` | POST | 创建上下文 |
 | `/sandboxes/{id}/contexts/{ctx_id}/execute` | POST | 在上下文中执行 |
 | `/sandboxes/{id}/health` | GET | 健康检查 |
 | `/sandboxes/{id}/files` | GET/POST | 文件读写 |
 | `/sandboxes/{id}/processes/cmd` | POST | 执行 Shell 命令 |
 ## 常见问题
 ### Q: 为什么要添加 account_id？
 **A**: Code Interpreter API 需要在请求头中提供 `X-Acs-Parent-Id`（阿里云主账号ID）进行身份验证。这是 Aliyun Code Interpreter API 的必需参数。
 ### Q: 30 秒超时限制可以绕过吗？
 **A**: 不可以。这是 Aliyun Code Interpreter 的**硬限制**，无法通过配置或请求参数绕过。如果代码执行时间超过 30 秒，请考虑：
 1. 优化代码逻辑
 2. 分批处理数据
 3. 使用上下文保持状态
 ### Q: 旧的 OpenSandbox 配置还能用吗？
 **A**: 不能。OpenSandbox 和 Code Interpreter 是两个不同的服务，API 不兼容。必须迁移到新的配置格式。
 ### Q: 如何获取阿里云主账号 ID？
 **A**:
 1. 登录阿里云控制台
 2. 点击右上角的头像
 3. 在弹出的信息中可以看到"主账号ID"
 ### Q: 迁移后会影响现有功能吗？
 **A**:
 - **自我管理提供商（self_managed）**: 不受影响
 - **E2B 提供商**: 不受影响
 - **Aliyun 提供商**: 需要更新配置并重新测试
 ## 相关文档
 - [官方文档](https://help.aliyun.com/zh/functioncompute/fc/sandbox-sandbox-code-interepreter)
 - [sandbox 规范](../docs/develop/sandbox_spec.md)
 - [测试指南](./README.md)
 - [快速开始](./QUICKSTART.md)
 ## 技术支持
 如有问题，请：
 1. 查看官方文档
 2. 检查配置是否正确
 3. 查看测试输出中的错误信息
 4. 联系 RAGFlow 团队
--- a/agent/sandbox/tests/QUICKSTART.md
+++ b/agent/sandbox/tests/QUICKSTART.md
@ -0,0 +1,178 @@
 # Aliyun OpenSandbox Provider - 快速测试指南
 ## 测试说明
 ### 1. 单元测试（不需要真实凭据）
 单元测试使用 mock，**不需要**真实的 Aliyun 凭据，可以随时运行。
 ```bash
 # 运行 Aliyun 提供商的单元测试
 pytest agent/sandbox/tests/test_aliyun_provider.py -v
 # 预期输出：
 # test_aliyun_provider.py::TestAliyunOpenSandboxProvider::test_provider_initialization PASSED
 # test_aliyun_provider.py::TestAliyunOpenSandboxProvider::test_initialize_success PASSED
 # ...
 # ========================= 48 passed in 2.34s ==========================
 ```
 ### 2. 集成测试（需要真实凭据）
 集成测试会调用真实的 Aliyun API，需要配置凭据。
 #### 步骤 1: 配置环境变量
 ```bash
 export ALIYUN_ACCESS_KEY_ID="LTAI5t..."  # 替换为真实的 Access Key ID
 export ALIYUN_ACCESS_KEY_SECRET="..."     # 替换为真实的 Access Key Secret
 export ALIYUN_REGION="cn-hangzhou"        # 可选，默认为 cn-hangzhou
 ```
 #### 步骤 2: 运行集成测试
 ```bash
 # 运行所有集成测试
 pytest agent/sandbox/tests/test_aliyun_integration.py -v -m integration
 # 运行特定测试
 pytest agent/sandbox/tests/test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_health_check -v
 ```
 #### 步骤 3: 预期输出
 ```
 test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_initialize_provider PASSED
 test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_health_check PASSED
 test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_execute_python_code PASSED
 ...
 ========================== 10 passed in 15.67s ==========================
 ```
 ### 3. 测试场景
 #### 基础功能测试
 ```bash
 # 健康检查
 pytest agent/sandbox/tests/test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_health_check -v
 # 创建实例
 pytest agent/sandbox/tests/test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_create_python_instance -v
 # 执行代码
 pytest agent/sandbox/tests/test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_execute_python_code -v
 # 销毁实例
 pytest agent/sandbox/tests/test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_destroy_instance -v
 ```
 #### 错误处理测试
 ```bash
 # 代码执行错误
 pytest agent/sandbox/tests/test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_execute_python_code_with_error -v
 # 超时处理
 pytest agent/sandbox/tests/test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_execute_python_code_timeout -v
 ```
 #### 真实场景测试
 ```bash
 # 数据处理工作流
 pytest agent/sandbox/tests/test_aliyun_integration.py::TestAliyunRealWorldScenarios::test_data_processing_workflow -v
 # 字符串操作
 pytest agent/sandbox/tests/test_aliyun_integration.py::TestAliyunRealWorldScenarios::test_string_manipulation -v
 # 多次执行
 pytest agent/sandbox/tests/test_aliyun_integration.py::TestAliyunRealWorldScenarios::test_multiple_executions_same_instance -v
 ```
 ## 常见问题
 ### Q: 没有凭据怎么办？
 **A:** 运行单元测试即可，不需要真实凭据：
 ```bash
 pytest agent/sandbox/tests/test_aliyun_provider.py -v
 ```
 ### Q: 如何跳过集成测试？
 **A:** 使用 pytest 标记跳过：
 ```bash
 # 只运行单元测试，跳过集成测试
 pytest agent/sandbox/tests/ -v -m "not integration"
 ```
 ### Q: 集成测试失败怎么办？
 **A:** 检查以下几点：
 1. **凭据是否正确**
   ```bash
   echo $ALIYUN_ACCESS_KEY_ID
   echo $ALIYUN_ACCESS_KEY_SECRET
   ```
 2. **网络连接是否正常**
   ```bash
   curl -I https://opensandbox.cn-hangzhou.aliyuncs.com
   ```
 3. **是否有 OpenSandbox 服务权限**
   - 登录阿里云控制台
   - 检查是否已开通 OpenSandbox 服务
   - 检查 AccessKey 权限
 4. **查看详细错误信息**
   ```bash
   pytest agent/sandbox/tests/test_aliyun_integration.py -v -s
   ```
 ### Q: 测试超时怎么办？
 **A:** 增加超时时间或检查网络：
 ```bash
 # 使用更长的超时
 pytest agent/sandbox/tests/test_aliyun_integration.py -v --timeout=60
 ```
 ## 测试命令速查表
 | 命令 | 说明 | 需要凭据 |
 |------|------|---------|
 | `pytest agent/sandbox/tests/test_aliyun_provider.py -v` | 单元测试 | ❌ |
 | `pytest agent/sandbox/tests/test_aliyun_integration.py -v` | 集成测试 | ✅ |
 | `pytest agent/sandbox/tests/ -v -m "not integration"` | 仅单元测试 | ❌ |
 | `pytest agent/sandbox/tests/ -v -m integration` | 仅集成测试 | ✅ |
 | `pytest agent/sandbox/tests/ -v` | 所有测试 | 部分需要 |
 ## 获取 Aliyun 凭据
 1. 访问 [阿里云控制台](https://ram.console.aliyun.com/manage/ak)
 2. 创建 AccessKey
 3. 保存 AccessKey ID 和 AccessKey Secret
 4. 设置环境变量
 ⚠️ **安全提示：**
 - 不要在代码中硬编码凭据
 - 使用环境变量或配置文件
 - 定期轮换 AccessKey
 - 限制 AccessKey 权限
 ## 下一步
 1. ✅ **运行单元测试** - 验证代码逻辑
 2. 🔧 **配置凭据** - 设置环境变量
 3. 🚀 **运行集成测试** - 测试真实 API
 4. 📊 **查看结果** - 确保所有测试通过
 5. 🎯 **集成到系统** - 使用 admin API 配置提供商
 ## 需要帮助？
 - 查看 [完整文档](README.md)
 - 检查 [sandbox 规范](../../../../../docs/develop/sandbox_spec.md)
 - 联系 RAGFlow 团队
--- a/agent/sandbox/tests/README.md
+++ b/agent/sandbox/tests/README.md
@ -0,0 +1,213 @@
 # Sandbox Provider Tests
 This directory contains tests for the RAGFlow sandbox provider system.
 ## Test Structure
 ```
 tests/
 ├── pytest.ini                           # Pytest configuration
 ├── test_providers.py                    # Unit tests for all providers (mocked)
 ├── test_aliyun_provider.py              # Unit tests for Aliyun provider (mocked)
 ├── test_aliyun_integration.py           # Integration tests for Aliyun (real API)
 └── sandbox_security_tests_full.py      # Security tests for self-managed provider
 ```
 ## Test Types
 ### 1. Unit Tests (No Credentials Required)
 Unit tests use mocks and don't require any external services or credentials.
 **Files:**
 - `test_providers.py` - Tests for base provider interface and manager
 - `test_aliyun_provider.py` - Tests for Aliyun provider with mocked API calls
 **Run unit tests:**
 ```bash
 # Run all unit tests
 pytest agent/sandbox/tests/test_providers.py -v
 pytest agent/sandbox/tests/test_aliyun_provider.py -v
 # Run specific test
 pytest agent/sandbox/tests/test_aliyun_provider.py::TestAliyunOpenSandboxProvider::test_initialize_success -v
 # Run all unit tests (skip integration)
 pytest agent/sandbox/tests/ -v -m "not integration"
 ```
 ### 2. Integration Tests (Real Credentials Required)
 Integration tests make real API calls to Aliyun OpenSandbox service.
 **Files:**
 - `test_aliyun_integration.py` - Tests with real Aliyun API calls
 **Setup environment variables:**
 ```bash
 export ALIYUN_ACCESS_KEY_ID="LTAI5t..."
 export ALIYUN_ACCESS_KEY_SECRET="..."
 export ALIYUN_REGION="cn-hangzhou"  # Optional, defaults to cn-hangzhou
 export ALIYUN_WORKSPACE_ID="ws-..."  # Optional
 ```
 **Run integration tests:**
 ```bash
 # Run only integration tests
 pytest agent/sandbox/tests/test_aliyun_integration.py -v -m integration
 # Run all tests including integration
 pytest agent/sandbox/tests/ -v
 # Run specific integration test
 pytest agent/sandbox/tests/test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_health_check -v
 ```
 ### 3. Security Tests
 Security tests validate the security features of the self-managed sandbox provider.
 **Files:**
 - `sandbox_security_tests_full.py` - Comprehensive security tests
 **Run security tests:**
 ```bash
 # Run all security tests
 pytest agent/sandbox/tests/sandbox_security_tests_full.py -v
 # Run specific security test
 pytest agent/sandbox/tests/sandbox_security_tests_full.py -k "test_dangerous_imports" -v
 ```
 ## Test Commands
 ### Quick Test Commands
 ```bash
 # Run all sandbox tests (unit only, fast)
 pytest agent/sandbox/tests/ -v -m "not integration" --tb=short
 # Run tests with coverage
 pytest agent/sandbox/tests/ -v --cov=agent.sandbox --cov-report=term-missing -m "not integration"
 # Run tests and stop on first failure
 pytest agent/sandbox/tests/ -v -x -m "not integration"
 # Run tests in parallel (requires pytest-xdist)
 pytest agent/sandbox/tests/ -v -n auto -m "not integration"
 ```
 ### Aliyun Provider Testing
 ```bash
 # 1. Run unit tests (no credentials needed)
 pytest agent/sandbox/tests/test_aliyun_provider.py -v
 # 2. Set up credentials for integration tests
 export ALIYUN_ACCESS_KEY_ID="your-key-id"
 export ALIYUN_ACCESS_KEY_SECRET="your-secret"
 export ALIYUN_REGION="cn-hangzhou"
 # 3. Run integration tests (makes real API calls)
 pytest agent/sandbox/tests/test_aliyun_integration.py -v
 # 4. Test specific scenarios
 pytest agent/sandbox/tests/test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_execute_python_code -v
 pytest agent/sandbox/tests/test_aliyun_integration.py::TestAliyunRealWorldScenarios -v
 ```
 ## Understanding Test Results
 ### Unit Test Output
 ```
 agent/sandbox/tests/test_aliyun_provider.py::TestAliyunOpenSandboxProvider::test_initialize_success PASSED
 agent/sandbox/tests/test_aliyun_provider.py::TestAliyunOpenSandboxProvider::test_create_instance_python PASSED
 ...
 ========================== 48 passed in 2.34s ===========================
 ```
 ### Integration Test Output
 ```
 agent/sandbox/tests/test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_health_check PASSED
 agent/sandbox/tests/test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_create_python_instance PASSED
 agent/sandbox/tests/test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_execute_python_code PASSED
 ...
 ========================== 10 passed in 15.67s ===========================
 ```
 **Note:** Integration tests will be skipped if credentials are not set:
 ```
 agent/sandbox/tests/test_aliyun_integration.py::TestAliyunOpenSandboxIntegration::test_health_check SKIPPED
 ...
 ========================== 48 skipped, 10 passed in 0.12s ===========================
 ```
 ## Troubleshooting
 ### Integration Tests Fail
 1. **Check credentials:**
   ```bash
   echo $ALIYUN_ACCESS_KEY_ID
   echo $ALIYUN_ACCESS_KEY_SECRET
   ```
 2. **Check network connectivity:**
   ```bash
   curl -I https://opensandbox.cn-hangzhou.aliyuncs.com
   ```
 3. **Verify permissions:**
   - Make sure your Aliyun account has OpenSandbox service enabled
   - Check that your AccessKey has the required permissions
 4. **Check region:**
   - Verify the region is correct for your account
   - Try different regions: cn-hangzhou, cn-beijing, cn-shanghai, etc.
 ### Tests Timeout
 If tests timeout, increase the timeout in the test configuration or run with a longer timeout:
 ```bash
 pytest agent/sandbox/tests/test_aliyun_integration.py -v --timeout=60
 ```
 ### Mock Tests Fail
 If unit tests fail, it's likely a code issue, not a credentials issue:
 1. Check the test error message
 2. Review the code changes
 3. Run with verbose output: `pytest -vv`
 ## Contributing
 When adding new providers:
 1. **Create unit tests** in `test_{provider}_provider.py` with mocks
 2. **Create integration tests** in `test_{provider}_integration.py` with real API calls
 3. **Add markers** to distinguish test types
 4. **Update this README** with provider-specific testing instructions
 Example:
 ```python
@pytest.mark.integration
 def test_new_provider_real_api():
    """Test with real API calls."""
    # Your test here
 ```
 ## Continuous Integration
 In CI/CD pipelines:
 ```yaml
 # Run unit tests only (fast, no credentials)
 pytest agent/sandbox/tests/ -v -m "not integration"
 # Run integration tests if credentials available
 if [ -n "$ALIYUN_ACCESS_KEY_ID" ]; then
    pytest agent/sandbox/tests/test_aliyun_integration.py -v -m integration
 fi
 ```
--- a/sdk/python/test/libs/init.py
+++ b/sdk/python/test/libs/init.py
@ -13,3 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 """
 Sandbox provider tests package.
 """
--- a/agent/sandbox/tests/pytest.ini
+++ b/agent/sandbox/tests/pytest.ini
@ -0,0 +1,33 @@
 [pytest]
 # Pytest configuration for sandbox tests
 # Test discovery patterns
 python_files = test_*.py
 python_classes = Test*
 python_functions = test_*
 # Markers for different test types
 markers =
    integration: Tests that require external services (Aliyun API, etc.)
    unit: Fast tests that don't require external services
    slow: Tests that take a long time to run
 # Test paths
 testpaths = .
 # Minimum version
 minversion = 7.0
 # Output options
 addopts =
    -v
    --strict-markers
    --tb=short
    --disable-warnings
 # Log options
 log_cli = false
 log_cli_level = INFO
 # Coverage options (if using pytest-cov)
 # addopts = --cov=agent.sandbox --cov-report=html --cov-report=term
--- a/agent/sandbox/tests/sandbox_security_tests_full.py
+++ b/agent/sandbox/tests/sandbox_security_tests_full.py
--- a/agent/sandbox/tests/test_aliyun_codeinterpreter.py
+++ b/agent/sandbox/tests/test_aliyun_codeinterpreter.py
@ -0,0 +1,329 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 """
 Unit tests for Aliyun Code Interpreter provider.
 These tests use mocks and don't require real Aliyun credentials.
 Official Documentation: https://help.aliyun.com/zh/functioncompute/fc/sandbox-sandbox-code-interepreter
 Official SDK: https://github.com/Serverless-Devs/agentrun-sdk-python
 """
 import pytest
 from unittest.mock import patch, MagicMock
 from agent.sandbox.providers.base import SandboxProvider
 from agent.sandbox.providers.aliyun_codeinterpreter import AliyunCodeInterpreterProvider
 class TestAliyunCodeInterpreterProvider:
    """Test AliyunCodeInterpreterProvider implementation."""
    def test_provider_initialization(self):
        """Test provider initialization."""
        provider = AliyunCodeInterpreterProvider()
        assert provider.access_key_id == ""
        assert provider.access_key_secret == ""
        assert provider.account_id == ""
        assert provider.region == "cn-hangzhou"
        assert provider.template_name == ""
        assert provider.timeout == 30
        assert not provider._initialized
    @patch("agent.sandbox.providers.aliyun_codeinterpreter.Template")
    def test_initialize_success(self, mock_template):
        """Test successful initialization."""
        # Mock health check response
        mock_template.list.return_value = []
        provider = AliyunCodeInterpreterProvider()
        result = provider.initialize(
            {
                "access_key_id": "LTAI5tXXXXXXXXXX",
                "access_key_secret": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
                "account_id": "1234567890123456",
                "region": "cn-hangzhou",
                "template_name": "python-sandbox",
                "timeout": 20,
            }
        )
        assert result is True
        assert provider.access_key_id == "LTAI5tXXXXXXXXXX"
        assert provider.access_key_secret == "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
        assert provider.account_id == "1234567890123456"
        assert provider.region == "cn-hangzhou"
        assert provider.template_name == "python-sandbox"
        assert provider.timeout == 20
        assert provider._initialized
    def test_initialize_missing_credentials(self):
        """Test initialization with missing credentials."""
        provider = AliyunCodeInterpreterProvider()
        # Missing access_key_id
        result = provider.initialize({"access_key_secret": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"})
        assert result is False
        # Missing access_key_secret
        result = provider.initialize({"access_key_id": "LTAI5tXXXXXXXXXX"})
        assert result is False
        # Missing account_id
        provider2 = AliyunCodeInterpreterProvider()
        result = provider2.initialize({"access_key_id": "LTAI5tXXXXXXXXXX", "access_key_secret": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"})
        assert result is False
    @patch("agent.sandbox.providers.aliyun_codeinterpreter.Template")
    def test_initialize_default_config(self, mock_template):
        """Test initialization with default config."""
        mock_template.list.return_value = []
        provider = AliyunCodeInterpreterProvider()
        result = provider.initialize({"access_key_id": "LTAI5tXXXXXXXXXX", "access_key_secret": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "account_id": "1234567890123456"})
        assert result is True
        assert provider.region == "cn-hangzhou"
        assert provider.template_name == ""
    @patch("agent.sandbox.providers.aliyun_codeinterpreter.CodeInterpreterSandbox")
    def test_create_instance_python(self, mock_sandbox_class):
        """Test creating a Python instance."""
        # Mock successful instance creation
        mock_sandbox = MagicMock()
        mock_sandbox.sandbox_id = "01JCED8Z9Y6XQVK8M2NRST5WXY"
        mock_sandbox_class.return_value = mock_sandbox
        provider = AliyunCodeInterpreterProvider()
        provider._initialized = True
        provider._config = MagicMock()
        instance = provider.create_instance("python")
        assert instance.provider == "aliyun_codeinterpreter"
        assert instance.status == "READY"
        assert instance.metadata["language"] == "python"
    @patch("agent.sandbox.providers.aliyun_codeinterpreter.CodeInterpreterSandbox")
    def test_create_instance_javascript(self, mock_sandbox_class):
        """Test creating a JavaScript instance."""
        mock_sandbox = MagicMock()
        mock_sandbox.sandbox_id = "01JCED8Z9Y6XQVK8M2NRST5WXY"
        mock_sandbox_class.return_value = mock_sandbox
        provider = AliyunCodeInterpreterProvider()
        provider._initialized = True
        provider._config = MagicMock()
        instance = provider.create_instance("javascript")
        assert instance.metadata["language"] == "javascript"
    def test_create_instance_not_initialized(self):
        """Test creating instance when provider not initialized."""
        provider = AliyunCodeInterpreterProvider()
        with pytest.raises(RuntimeError, match="Provider not initialized"):
            provider.create_instance("python")
    @patch("agent.sandbox.providers.aliyun_codeinterpreter.CodeInterpreterSandbox")
    def test_execute_code_success(self, mock_sandbox_class):
        """Test successful code execution."""
        # Mock sandbox instance
        mock_sandbox = MagicMock()
        mock_sandbox.context.execute.return_value = {
            "results": [{"type": "stdout", "text": "Hello, World!"}, {"type": "result", "text": "None"}, {"type": "endOfExecution", "status": "ok"}],
            "contextId": "kernel-12345-67890",
        }
        mock_sandbox_class.return_value = mock_sandbox
        provider = AliyunCodeInterpreterProvider()
        provider._initialized = True
        provider._config = MagicMock()
        result = provider.execute_code(instance_id="01JCED8Z9Y6XQVK8M2NRST5WXY", code="print('Hello, World!')", language="python", timeout=10)
        assert result.stdout == "Hello, World!"
        assert result.stderr == ""
        assert result.exit_code == 0
        assert result.execution_time > 0
    @patch("agent.sandbox.providers.aliyun_codeinterpreter.CodeInterpreterSandbox")
    def test_execute_code_timeout(self, mock_sandbox_class):
        """Test code execution timeout."""
        from agentrun.utils.exception import ServerError
        mock_sandbox = MagicMock()
        mock_sandbox.context.execute.side_effect = ServerError(408, "Request timeout")
        mock_sandbox_class.return_value = mock_sandbox
        provider = AliyunCodeInterpreterProvider()
        provider._initialized = True
        provider._config = MagicMock()
        with pytest.raises(TimeoutError, match="Execution timed out"):
            provider.execute_code(instance_id="01JCED8Z9Y6XQVK8M2NRST5WXY", code="while True: pass", language="python", timeout=5)
    @patch("agent.sandbox.providers.aliyun_codeinterpreter.CodeInterpreterSandbox")
    def test_execute_code_with_error(self, mock_sandbox_class):
        """Test code execution with error."""
        mock_sandbox = MagicMock()
        mock_sandbox.context.execute.return_value = {
            "results": [{"type": "stderr", "text": "Traceback..."}, {"type": "error", "text": "NameError: name 'x' is not defined"}, {"type": "endOfExecution", "status": "error"}]
        }
        mock_sandbox_class.return_value = mock_sandbox
        provider = AliyunCodeInterpreterProvider()
        provider._initialized = True
        provider._config = MagicMock()
        result = provider.execute_code(instance_id="01JCED8Z9Y6XQVK8M2NRST5WXY", code="print(x)", language="python")
        assert result.exit_code != 0
        assert len(result.stderr) > 0
    def test_get_supported_languages(self):
        """Test getting supported languages."""
        provider = AliyunCodeInterpreterProvider()
        languages = provider.get_supported_languages()
        assert "python" in languages
        assert "javascript" in languages
    def test_get_config_schema(self):
        """Test getting configuration schema."""
        schema = AliyunCodeInterpreterProvider.get_config_schema()
        assert "access_key_id" in schema
        assert schema["access_key_id"]["required"] is True
        assert "access_key_secret" in schema
        assert schema["access_key_secret"]["required"] is True
        assert "account_id" in schema
        assert schema["account_id"]["required"] is True
        assert "region" in schema
        assert "template_name" in schema
        assert "timeout" in schema
    def test_validate_config_success(self):
        """Test successful configuration validation."""
        provider = AliyunCodeInterpreterProvider()
        is_valid, error_msg = provider.validate_config({"access_key_id": "LTAI5tXXXXXXXXXX", "account_id": "1234567890123456", "region": "cn-hangzhou"})
        assert is_valid is True
        assert error_msg is None
    def test_validate_config_invalid_access_key(self):
        """Test validation with invalid access key format."""
        provider = AliyunCodeInterpreterProvider()
        is_valid, error_msg = provider.validate_config({"access_key_id": "INVALID_KEY"})
        assert is_valid is False
        assert "AccessKey ID format" in error_msg
    def test_validate_config_missing_account_id(self):
        """Test validation with missing account ID."""
        provider = AliyunCodeInterpreterProvider()
        is_valid, error_msg = provider.validate_config({})
        assert is_valid is False
        assert "Account ID" in error_msg
    def test_validate_config_invalid_region(self):
        """Test validation with invalid region."""
        provider = AliyunCodeInterpreterProvider()
        is_valid, error_msg = provider.validate_config(
            {
                "access_key_id": "LTAI5tXXXXXXXXXX",
                "account_id": "1234567890123456",  # Provide required field
                "region": "us-west-1",
            }
        )
        assert is_valid is False
        assert "Invalid region" in error_msg
    def test_validate_config_invalid_timeout(self):
        """Test validation with invalid timeout (> 30 seconds)."""
        provider = AliyunCodeInterpreterProvider()
        is_valid, error_msg = provider.validate_config(
            {
                "access_key_id": "LTAI5tXXXXXXXXXX",
                "account_id": "1234567890123456",  # Provide required field
                "timeout": 60,
            }
        )
        assert is_valid is False
        assert "Timeout must be between 1 and 30 seconds" in error_msg
    def test_normalize_language_python(self):
        """Test normalizing Python language identifier."""
        provider = AliyunCodeInterpreterProvider()
        assert provider._normalize_language("python") == "python"
        assert provider._normalize_language("python3") == "python"
        assert provider._normalize_language("PYTHON") == "python"
    def test_normalize_language_javascript(self):
        """Test normalizing JavaScript language identifier."""
        provider = AliyunCodeInterpreterProvider()
        assert provider._normalize_language("javascript") == "javascript"
        assert provider._normalize_language("nodejs") == "javascript"
        assert provider._normalize_language("JavaScript") == "javascript"
 class TestAliyunCodeInterpreterInterface:
    """Test that Aliyun provider correctly implements the interface."""
    def test_aliyun_provider_is_abstract(self):
        """Test that AliyunCodeInterpreterProvider is a SandboxProvider."""
        provider = AliyunCodeInterpreterProvider()
        assert isinstance(provider, SandboxProvider)
    def test_aliyun_provider_has_abstract_methods(self):
        """Test that AliyunCodeInterpreterProvider implements all abstract methods."""
        provider = AliyunCodeInterpreterProvider()
        assert hasattr(provider, "initialize")
        assert callable(provider.initialize)
        assert hasattr(provider, "create_instance")
        assert callable(provider.create_instance)
        assert hasattr(provider, "execute_code")
        assert callable(provider.execute_code)
        assert hasattr(provider, "destroy_instance")
        assert callable(provider.destroy_instance)
        assert hasattr(provider, "health_check")
        assert callable(provider.health_check)
        assert hasattr(provider, "get_supported_languages")
        assert callable(provider.get_supported_languages)
--- a/agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py
+++ b/agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py
@ -0,0 +1,353 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 """
 Integration tests for Aliyun Code Interpreter provider.
 These tests require real Aliyun credentials and will make actual API calls.
 To run these tests, set the following environment variables:
    export AGENTRUN_ACCESS_KEY_ID="LTAI5t..."
    export AGENTRUN_ACCESS_KEY_SECRET="..."
    export AGENTRUN_ACCOUNT_ID="1234567890..."  # Aliyun primary account ID (主账号ID)
    export AGENTRUN_REGION="cn-hangzhou"  # Note: AGENTRUN_REGION (SDK will read this)
 Then run:
    pytest agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py -v
 Official Documentation: https://help.aliyun.com/zh/functioncompute/fc/sandbox-sandbox-code-interepreter
 """
 import os
 import pytest
 from agent.sandbox.providers.aliyun_codeinterpreter import AliyunCodeInterpreterProvider
 # Skip all tests if credentials are not provided
 pytestmark = pytest.mark.skipif(
    not all(
        [
            os.getenv("AGENTRUN_ACCESS_KEY_ID"),
            os.getenv("AGENTRUN_ACCESS_KEY_SECRET"),
            os.getenv("AGENTRUN_ACCOUNT_ID"),
        ]
    ),
    reason="Aliyun credentials not set. Set AGENTRUN_ACCESS_KEY_ID, AGENTRUN_ACCESS_KEY_SECRET, and AGENTRUN_ACCOUNT_ID.",
 )
@pytest.fixture
 def aliyun_config():
    """Get Aliyun configuration from environment variables."""
    return {
        "access_key_id": os.getenv("AGENTRUN_ACCESS_KEY_ID"),
        "access_key_secret": os.getenv("AGENTRUN_ACCESS_KEY_SECRET"),
        "account_id": os.getenv("AGENTRUN_ACCOUNT_ID"),
        "region": os.getenv("AGENTRUN_REGION", "cn-hangzhou"),
        "template_name": os.getenv("AGENTRUN_TEMPLATE_NAME", ""),
        "timeout": 30,
    }
@pytest.fixture
 def provider(aliyun_config):
    """Create an initialized Aliyun provider."""
    provider = AliyunCodeInterpreterProvider()
    initialized = provider.initialize(aliyun_config)
    if not initialized:
        pytest.skip("Failed to initialize Aliyun provider. Check credentials, account ID, and network.")
    return provider
@pytest.mark.integration
 class TestAliyunCodeInterpreterIntegration:
    """Integration tests for Aliyun Code Interpreter provider."""
    def test_initialize_provider(self, aliyun_config):
        """Test provider initialization with real credentials."""
        provider = AliyunCodeInterpreterProvider()
        result = provider.initialize(aliyun_config)
        assert result is True
        assert provider._initialized is True
    def test_health_check(self, provider):
        """Test health check with real API."""
        result = provider.health_check()
        assert result is True
    def test_get_supported_languages(self, provider):
        """Test getting supported languages."""
        languages = provider.get_supported_languages()
        assert "python" in languages
        assert "javascript" in languages
        assert isinstance(languages, list)
    def test_create_python_instance(self, provider):
        """Test creating a Python sandbox instance."""
        try:
            instance = provider.create_instance("python")
            assert instance.provider == "aliyun_codeinterpreter"
            assert instance.status in ["READY", "CREATING"]
            assert instance.metadata["language"] == "python"
            assert len(instance.instance_id) > 0
            # Clean up
            provider.destroy_instance(instance.instance_id)
        except Exception as e:
            pytest.skip(f"Instance creation failed: {str(e)}. API might not be available yet.")
    def test_execute_python_code(self, provider):
        """Test executing Python code in the sandbox."""
        try:
            # Create instance
            instance = provider.create_instance("python")
            # Execute simple code
            result = provider.execute_code(
                instance_id=instance.instance_id,
                code="print('Hello from Aliyun Code Interpreter!')\nprint(42)",
                language="python",
                timeout=30,  # Max 30 seconds
            )
            assert result.exit_code == 0
            assert "Hello from Aliyun Code Interpreter!" in result.stdout
            assert "42" in result.stdout
            assert result.execution_time > 0
            # Clean up
            provider.destroy_instance(instance.instance_id)
        except Exception as e:
            pytest.skip(f"Code execution test failed: {str(e)}. API might not be available yet.")
    def test_execute_python_code_with_arguments(self, provider):
        """Test executing Python code with arguments parameter."""
        try:
            # Create instance
            instance = provider.create_instance("python")
            # Execute code with arguments
            result = provider.execute_code(
                instance_id=instance.instance_id,
                code="""def main(name: str, count: int) -> dict:
    return {"message": f"Hello {name}!" * count}
 """,
                language="python",
                timeout=30,
                arguments={"name": "World", "count": 2}
            )
            assert result.exit_code == 0
            assert "Hello World!Hello World!" in result.stdout
            # Clean up
            provider.destroy_instance(instance.instance_id)
        except Exception as e:
            pytest.skip(f"Arguments test failed: {str(e)}. API might not be available yet.")
    def test_execute_python_code_with_error(self, provider):
        """Test executing Python code that produces an error."""
        try:
            # Create instance
            instance = provider.create_instance("python")
            # Execute code with error
            result = provider.execute_code(instance_id=instance.instance_id, code="raise ValueError('Test error')", language="python", timeout=30)
            assert result.exit_code != 0
            assert len(result.stderr) > 0 or "ValueError" in result.stdout
            # Clean up
            provider.destroy_instance(instance.instance_id)
        except Exception as e:
            pytest.skip(f"Error handling test failed: {str(e)}. API might not be available yet.")
    def test_execute_javascript_code(self, provider):
        """Test executing JavaScript code in the sandbox."""
        try:
            # Create instance
            instance = provider.create_instance("javascript")
            # Execute simple code
            result = provider.execute_code(instance_id=instance.instance_id, code="console.log('Hello from JavaScript!');", language="javascript", timeout=30)
            assert result.exit_code == 0
            assert "Hello from JavaScript!" in result.stdout
            # Clean up
            provider.destroy_instance(instance.instance_id)
        except Exception as e:
            pytest.skip(f"JavaScript execution test failed: {str(e)}. API might not be available yet.")
    def test_execute_javascript_code_with_arguments(self, provider):
        """Test executing JavaScript code with arguments parameter."""
        try:
            # Create instance
            instance = provider.create_instance("javascript")
            # Execute code with arguments
            result = provider.execute_code(
                instance_id=instance.instance_id,
                code="""function main(args) {
  const { name, count } = args;
  return `Hello ${name}!`.repeat(count);
 }""",
                language="javascript",
                timeout=30,
                arguments={"name": "World", "count": 2}
            )
            assert result.exit_code == 0
            assert "Hello World!Hello World!" in result.stdout
            # Clean up
            provider.destroy_instance(instance.instance_id)
        except Exception as e:
            pytest.skip(f"JavaScript arguments test failed: {str(e)}. API might not be available yet.")
    def test_destroy_instance(self, provider):
        """Test destroying a sandbox instance."""
        try:
            # Create instance
            instance = provider.create_instance("python")
            # Destroy instance
            result = provider.destroy_instance(instance.instance_id)
            # Note: The API might return True immediately or async
            assert result is True or result is False
        except Exception as e:
            pytest.skip(f"Destroy instance test failed: {str(e)}. API might not be available yet.")
    def test_config_validation(self, provider):
        """Test configuration validation."""
        # Valid config
        is_valid, error = provider.validate_config({"access_key_id": "LTAI5tXXXXXXXXXX", "account_id": "1234567890123456", "region": "cn-hangzhou", "timeout": 30})
        assert is_valid is True
        assert error is None
        # Invalid access key
        is_valid, error = provider.validate_config({"access_key_id": "INVALID_KEY"})
        assert is_valid is False
        # Missing account ID
        is_valid, error = provider.validate_config({})
        assert is_valid is False
        assert "Account ID" in error
    def test_timeout_limit(self, provider):
        """Test that timeout is limited to 30 seconds."""
        # Timeout > 30 should be clamped to 30
        provider2 = AliyunCodeInterpreterProvider()
        provider2.initialize(
            {
                "access_key_id": os.getenv("AGENTRUN_ACCESS_KEY_ID"),
                "access_key_secret": os.getenv("AGENTRUN_ACCESS_KEY_SECRET"),
                "account_id": os.getenv("AGENTRUN_ACCOUNT_ID"),
                "timeout": 60,  # Request 60 seconds
            }
        )
        # Should be clamped to 30
        assert provider2.timeout == 30
@pytest.mark.integration
 class TestAliyunCodeInterpreterScenarios:
    """Test real-world usage scenarios."""
    def test_data_processing_workflow(self, provider):
        """Test a simple data processing workflow."""
        try:
            instance = provider.create_instance("python")
            # Execute data processing code
            code = """
 import json
 data = [{"name": "Alice", "age": 30}, {"name": "Bob", "age": 25}]
 result = json.dumps(data, indent=2)
 print(result)
 """
            result = provider.execute_code(instance_id=instance.instance_id, code=code, language="python", timeout=30)
            assert result.exit_code == 0
            assert "Alice" in result.stdout
            assert "Bob" in result.stdout
            provider.destroy_instance(instance.instance_id)
        except Exception as e:
            pytest.skip(f"Data processing test failed: {str(e)}")
    def test_string_manipulation(self, provider):
        """Test string manipulation operations."""
        try:
            instance = provider.create_instance("python")
            code = """
 text = "Hello, World!"
 print(text.upper())
 print(text.lower())
 print(text.replace("World", "Aliyun"))
 """
            result = provider.execute_code(instance_id=instance.instance_id, code=code, language="python", timeout=30)
            assert result.exit_code == 0
            assert "HELLO, WORLD!" in result.stdout
            assert "hello, world!" in result.stdout
            assert "Hello, Aliyun!" in result.stdout
            provider.destroy_instance(instance.instance_id)
        except Exception as e:
            pytest.skip(f"String manipulation test failed: {str(e)}")
    def test_context_persistence(self, provider):
        """Test code execution with context persistence."""
        try:
            instance = provider.create_instance("python")
            # First execution - define variable
            result1 = provider.execute_code(instance_id=instance.instance_id, code="x = 42\nprint(x)", language="python", timeout=30)
            assert result1.exit_code == 0
            # Second execution - use variable
            # Note: Context persistence depends on whether the contextId is reused
            result2 = provider.execute_code(instance_id=instance.instance_id, code="print(f'x is {x}')", language="python", timeout=30)
            # Context might or might not persist depending on API implementation
            assert result2.exit_code == 0
            provider.destroy_instance(instance.instance_id)
        except Exception as e:
            pytest.skip(f"Context persistence test failed: {str(e)}")
 def test_without_credentials():
    """Test that tests are skipped without credentials."""
    # This test should always run (not skipped)
    if all(
        [
            os.getenv("AGENTRUN_ACCESS_KEY_ID"),
            os.getenv("AGENTRUN_ACCESS_KEY_SECRET"),
            os.getenv("AGENTRUN_ACCOUNT_ID"),
        ]
    ):
        assert True  # Credentials are set
    else:
        assert True  # Credentials not set, test still passes
--- a/agent/sandbox/tests/test_providers.py
+++ b/agent/sandbox/tests/test_providers.py
@ -0,0 +1,423 @@
 #
 #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 """
 Unit tests for sandbox provider abstraction layer.
 """
 import pytest
 from unittest.mock import Mock, patch
 import requests
 from agent.sandbox.providers.base import SandboxProvider, SandboxInstance, ExecutionResult
 from agent.sandbox.providers.manager import ProviderManager
 from agent.sandbox.providers.self_managed import SelfManagedProvider
 class TestSandboxDataclasses:
    """Test sandbox dataclasses."""
    def test_sandbox_instance_creation(self):
        """Test SandboxInstance dataclass creation."""
        instance = SandboxInstance(
            instance_id="test-123",
            provider="self_managed",
            status="running",
            metadata={"language": "python"}
        )
        assert instance.instance_id == "test-123"
        assert instance.provider == "self_managed"
        assert instance.status == "running"
        assert instance.metadata == {"language": "python"}
    def test_sandbox_instance_default_metadata(self):
        """Test SandboxInstance with None metadata."""
        instance = SandboxInstance(
            instance_id="test-123",
            provider="self_managed",
            status="running",
            metadata=None
        )
        assert instance.metadata == {}
    def test_execution_result_creation(self):
        """Test ExecutionResult dataclass creation."""
        result = ExecutionResult(
            stdout="Hello, World!",
            stderr="",
            exit_code=0,
            execution_time=1.5,
            metadata={"status": "success"}
        )
        assert result.stdout == "Hello, World!"
        assert result.stderr == ""
        assert result.exit_code == 0
        assert result.execution_time == 1.5
        assert result.metadata == {"status": "success"}
    def test_execution_result_default_metadata(self):
        """Test ExecutionResult with None metadata."""
        result = ExecutionResult(
            stdout="output",
            stderr="error",
            exit_code=1,
            execution_time=0.5,
            metadata=None
        )
        assert result.metadata == {}
 class TestProviderManager:
    """Test ProviderManager functionality."""
    def test_manager_initialization(self):
        """Test ProviderManager initialization."""
        manager = ProviderManager()
        assert manager.current_provider is None
        assert manager.current_provider_name is None
        assert not manager.is_configured()
    def test_set_provider(self):
        """Test setting a provider."""
        manager = ProviderManager()
        mock_provider = Mock(spec=SandboxProvider)
        manager.set_provider("self_managed", mock_provider)
        assert manager.current_provider == mock_provider
        assert manager.current_provider_name == "self_managed"
        assert manager.is_configured()
    def test_get_provider(self):
        """Test getting the current provider."""
        manager = ProviderManager()
        mock_provider = Mock(spec=SandboxProvider)
        manager.set_provider("self_managed", mock_provider)
        assert manager.get_provider() == mock_provider
    def test_get_provider_name(self):
        """Test getting the current provider name."""
        manager = ProviderManager()
        mock_provider = Mock(spec=SandboxProvider)
        manager.set_provider("self_managed", mock_provider)
        assert manager.get_provider_name() == "self_managed"
    def test_get_provider_when_not_set(self):
        """Test getting provider when none is set."""
        manager = ProviderManager()
        assert manager.get_provider() is None
        assert manager.get_provider_name() is None
 class TestSelfManagedProvider:
    """Test SelfManagedProvider implementation."""
    def test_provider_initialization(self):
        """Test provider initialization."""
        provider = SelfManagedProvider()
        assert provider.endpoint == "http://localhost:9385"
        assert provider.timeout == 30
        assert provider.max_retries == 3
        assert provider.pool_size == 10
        assert not provider._initialized
    @patch('requests.get')
    def test_initialize_success(self, mock_get):
        """Test successful initialization."""
        mock_response = Mock()
        mock_response.status_code = 200
        mock_get.return_value = mock_response
        provider = SelfManagedProvider()
        result = provider.initialize({
            "endpoint": "http://test-endpoint:9385",
            "timeout": 60,
            "max_retries": 5,
            "pool_size": 20
        })
        assert result is True
        assert provider.endpoint == "http://test-endpoint:9385"
        assert provider.timeout == 60
        assert provider.max_retries == 5
        assert provider.pool_size == 20
        assert provider._initialized
        mock_get.assert_called_once_with("http://test-endpoint:9385/healthz", timeout=5)
    @patch('requests.get')
    def test_initialize_failure(self, mock_get):
        """Test initialization failure."""
        mock_get.side_effect = Exception("Connection error")
        provider = SelfManagedProvider()
        result = provider.initialize({"endpoint": "http://invalid:9385"})
        assert result is False
        assert not provider._initialized
    def test_initialize_default_config(self):
        """Test initialization with default config."""
        with patch('requests.get') as mock_get:
            mock_response = Mock()
            mock_response.status_code = 200
            mock_get.return_value = mock_response
            provider = SelfManagedProvider()
            result = provider.initialize({})
            assert result is True
            assert provider.endpoint == "http://localhost:9385"
            assert provider.timeout == 30
    def test_create_instance_python(self):
        """Test creating a Python instance."""
        provider = SelfManagedProvider()
        provider._initialized = True
        instance = provider.create_instance("python")
        assert instance.provider == "self_managed"
        assert instance.status == "running"
        assert instance.metadata["language"] == "python"
        assert instance.metadata["endpoint"] == "http://localhost:9385"
        assert len(instance.instance_id) > 0  # Verify instance_id exists
    def test_create_instance_nodejs(self):
        """Test creating a Node.js instance."""
        provider = SelfManagedProvider()
        provider._initialized = True
        instance = provider.create_instance("nodejs")
        assert instance.metadata["language"] == "nodejs"
    def test_create_instance_not_initialized(self):
        """Test creating instance when provider not initialized."""
        provider = SelfManagedProvider()
        with pytest.raises(RuntimeError, match="Provider not initialized"):
            provider.create_instance("python")
    @patch('requests.post')
    def test_execute_code_success(self, mock_post):
        """Test successful code execution."""
        mock_response = Mock()
        mock_response.status_code = 200
        mock_response.json.return_value = {
            "status": "success",
            "stdout": '{"result": 42}',
            "stderr": "",
            "exit_code": 0,
            "time_used_ms": 100.0,
            "memory_used_kb": 1024.0
        }
        mock_post.return_value = mock_response
        provider = SelfManagedProvider()
        provider._initialized = True
        result = provider.execute_code(
            instance_id="test-123",
            code="def main(): return {'result': 42}",
            language="python",
            timeout=10
        )
        assert result.stdout == '{"result": 42}'
        assert result.stderr == ""
        assert result.exit_code == 0
        assert result.execution_time > 0
        assert result.metadata["status"] == "success"
        assert result.metadata["instance_id"] == "test-123"
    @patch('requests.post')
    def test_execute_code_timeout(self, mock_post):
        """Test code execution timeout."""
        mock_post.side_effect = requests.Timeout()
        provider = SelfManagedProvider()
        provider._initialized = True
        with pytest.raises(TimeoutError, match="Execution timed out"):
            provider.execute_code(
                instance_id="test-123",
                code="while True: pass",
                language="python",
                timeout=5
            )
    @patch('requests.post')
    def test_execute_code_http_error(self, mock_post):
        """Test code execution with HTTP error."""
        mock_response = Mock()
        mock_response.status_code = 500
        mock_response.text = "Internal Server Error"
        mock_post.return_value = mock_response
        provider = SelfManagedProvider()
        provider._initialized = True
        with pytest.raises(RuntimeError, match="HTTP 500"):
            provider.execute_code(
                instance_id="test-123",
                code="invalid code",
                language="python"
            )
    def test_execute_code_not_initialized(self):
        """Test executing code when provider not initialized."""
        provider = SelfManagedProvider()
        with pytest.raises(RuntimeError, match="Provider not initialized"):
            provider.execute_code(
                instance_id="test-123",
                code="print('hello')",
                language="python"
            )
    def test_destroy_instance(self):
        """Test destroying an instance (no-op for self-managed)."""
        provider = SelfManagedProvider()
        provider._initialized = True
        # For self-managed, destroy_instance is a no-op
        result = provider.destroy_instance("test-123")
        assert result is True
    @patch('requests.get')
    def test_health_check_success(self, mock_get):
        """Test successful health check."""
        mock_response = Mock()
        mock_response.status_code = 200
        mock_get.return_value = mock_response
        provider = SelfManagedProvider()
        result = provider.health_check()
        assert result is True
        mock_get.assert_called_once_with("http://localhost:9385/healthz", timeout=5)
    @patch('requests.get')
    def test_health_check_failure(self, mock_get):
        """Test health check failure."""
        mock_get.side_effect = Exception("Connection error")
        provider = SelfManagedProvider()
        result = provider.health_check()
        assert result is False
    def test_get_supported_languages(self):
        """Test getting supported languages."""
        provider = SelfManagedProvider()
        languages = provider.get_supported_languages()
        assert "python" in languages
        assert "nodejs" in languages
        assert "javascript" in languages
    def test_get_config_schema(self):
        """Test getting configuration schema."""
        schema = SelfManagedProvider.get_config_schema()
        assert "endpoint" in schema
        assert schema["endpoint"]["type"] == "string"
        assert schema["endpoint"]["required"] is True
        assert schema["endpoint"]["default"] == "http://localhost:9385"
        assert "timeout" in schema
        assert schema["timeout"]["type"] == "integer"
        assert schema["timeout"]["default"] == 30
        assert "max_retries" in schema
        assert schema["max_retries"]["type"] == "integer"
        assert "pool_size" in schema
        assert schema["pool_size"]["type"] == "integer"
    def test_normalize_language_python(self):
        """Test normalizing Python language identifier."""
        provider = SelfManagedProvider()
        assert provider._normalize_language("python") == "python"
        assert provider._normalize_language("python3") == "python"
        assert provider._normalize_language("PYTHON") == "python"
        assert provider._normalize_language("Python3") == "python"
    def test_normalize_language_javascript(self):
        """Test normalizing JavaScript language identifier."""
        provider = SelfManagedProvider()
        assert provider._normalize_language("javascript") == "nodejs"
        assert provider._normalize_language("nodejs") == "nodejs"
        assert provider._normalize_language("JavaScript") == "nodejs"
        assert provider._normalize_language("NodeJS") == "nodejs"
    def test_normalize_language_default(self):
        """Test language normalization with empty/unknown input."""
        provider = SelfManagedProvider()
        assert provider._normalize_language("") == "python"
        assert provider._normalize_language(None) == "python"
        assert provider._normalize_language("unknown") == "unknown"
 class TestProviderInterface:
    """Test that providers correctly implement the interface."""
    def test_self_managed_provider_is_abstract(self):
        """Test that SelfManagedProvider is a SandboxProvider."""
        provider = SelfManagedProvider()
        assert isinstance(provider, SandboxProvider)
    def test_self_managed_provider_has_abstract_methods(self):
        """Test that SelfManagedProvider implements all abstract methods."""
        provider = SelfManagedProvider()
        # Check all abstract methods are implemented
        assert hasattr(provider, 'initialize')
        assert callable(provider.initialize)
        assert hasattr(provider, 'create_instance')
        assert callable(provider.create_instance)
        assert hasattr(provider, 'execute_code')
        assert callable(provider.execute_code)
        assert hasattr(provider, 'destroy_instance')
        assert callable(provider.destroy_instance)
        assert hasattr(provider, 'health_check')
        assert callable(provider.health_check)
        assert hasattr(provider, 'get_supported_languages')
        assert callable(provider.get_supported_languages)
--- a/agent/sandbox/tests/verify_sdk.py
+++ b/agent/sandbox/tests/verify_sdk.py
@ -0,0 +1,78 @@
 #!/usr/bin/env python3
 """
 Quick verification script for Aliyun Code Interpreter provider using official SDK.
 """
 import importlib.util
 import sys
 sys.path.insert(0, ".")
 print("=" * 60)
 print("Aliyun Code Interpreter Provider - SDK Verification")
 print("=" * 60)
 # Test 1: Import provider
 print("\n[1/5] Testing provider import...")
 try:
    from agent.sandbox.providers.aliyun_codeinterpreter import AliyunCodeInterpreterProvider
    print("✓ Provider imported successfully")
 except ImportError as e:
    print(f"✗ Import failed: {e}")
    sys.exit(1)
 # Test 2: Check provider class
 print("\n[2/5] Testing provider class...")
 provider = AliyunCodeInterpreterProvider()
 assert hasattr(provider, "initialize")
 assert hasattr(provider, "create_instance")
 assert hasattr(provider, "execute_code")
 assert hasattr(provider, "destroy_instance")
 assert hasattr(provider, "health_check")
 print("✓ Provider has all required methods")
 # Test 3: Check SDK imports
 print("\n[3/5] Testing SDK imports...")
 try:
    # Check if agentrun SDK is available using importlib
    if (
        importlib.util.find_spec("agentrun.sandbox") is None
        or importlib.util.find_spec("agentrun.utils.config") is None
        or importlib.util.find_spec("agentrun.utils.exception") is None
    ):
        raise ImportError("agentrun SDK not found")
    # Verify imports work (assign to _ to indicate they're intentionally unused)
    from agentrun.sandbox import CodeInterpreterSandbox, TemplateType, CodeLanguage
    from agentrun.utils.config import Config
    from agentrun.utils.exception import ServerError
    _ = (CodeInterpreterSandbox, TemplateType, CodeLanguage, Config, ServerError)
    print("✓ SDK modules imported successfully")
 except ImportError as e:
    print(f"✗ SDK import failed: {e}")
    sys.exit(1)
 # Test 4: Check config schema
 print("\n[4/5] Testing configuration schema...")
 schema = AliyunCodeInterpreterProvider.get_config_schema()
 required_fields = ["access_key_id", "access_key_secret", "account_id"]
 for field in required_fields:
    assert field in schema
    assert schema[field]["required"] is True
 print(f"✓ All required fields present: {', '.join(required_fields)}")
 # Test 5: Check supported languages
 print("\n[5/5] Testing supported languages...")
 languages = provider.get_supported_languages()
 assert "python" in languages
 assert "javascript" in languages
 print(f"✓ Supported languages: {', '.join(languages)}")
 print("\n" + "=" * 60)
 print("All verification tests passed! ✓")
 print("=" * 60)
 print("\nNote: This provider now uses the official agentrun-sdk.")
 print("SDK Documentation: https://github.com/Serverless-Devs/agentrun-sdk-python")
 print("API Documentation: https://help.aliyun.com/zh/functioncompute/fc/sandbox-sandbox-code-interepreter")
--- a/agent/sandbox/uv.lock
+++ b/agent/sandbox/uv.lock
@ -1,7 +1,16 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.10"
 [[package]]
 name = "annotated-doc"
 version = "0.0.4"
 source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
 sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" }
 wheels = [
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" },
 ]
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@ -16,7 +25,6 @@ name = "anyio"
 version = "4.9.0"
 source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
 dependencies = [
    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
    { name = "idna" },
    { name = "sniffio" },
    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
@ -53,32 +61,6 @@ version = "3.4.2"
 source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
 sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/33/89c2ced2b67d1c2a61c19c6751aa8902d46ce3dacb23600a283619f5a12d/charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63", size = 126367, upload-time = "2025-05-02T08:34:42.01Z" }
 wheels = [
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/28/9901804da60055b406e1a1c5ba7aac1276fb77f1dde635aabfc7fd84b8ab/charset_normalizer-3.4.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c48ed483eb946e6c04ccbe02c6b4d1d48e51944b6db70f697e089c193404941", size = 201818, upload-time = "2025-05-02T08:31:46.725Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d9/9b/892a8c8af9110935e5adcbb06d9c6fe741b6bb02608c6513983048ba1a18/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2d318c11350e10662026ad0eb71bb51c7812fc8590825304ae0bdd4ac283acd", size = 144649, upload-time = "2025-05-02T08:31:48.889Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/a5/4179abd063ff6414223575e008593861d62abfc22455b5d1a44995b7c101/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9cbfacf36cb0ec2897ce0ebc5d08ca44213af24265bd56eca54bee7923c48fd6", size = 155045, upload-time = "2025-05-02T08:31:50.757Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/95/bc08c7dfeddd26b4be8c8287b9bb055716f31077c8b0ea1cd09553794665/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18dd2e350387c87dabe711b86f83c9c78af772c748904d372ade190b5c7c9d4d", size = 147356, upload-time = "2025-05-02T08:31:52.634Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/2d/7a5b635aa65284bf3eab7653e8b4151ab420ecbae918d3e359d1947b4d61/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8075c35cd58273fee266c58c0c9b670947c19df5fb98e7b66710e04ad4e9ff86", size = 149471, upload-time = "2025-05-02T08:31:56.207Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/38/51fc6ac74251fd331a8cfdb7ec57beba8c23fd5493f1050f71c87ef77ed0/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5bf4545e3b962767e5c06fe1738f951f77d27967cb2caa64c28be7c4563e162c", size = 151317, upload-time = "2025-05-02T08:31:57.613Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/17/edee1e32215ee6e9e46c3e482645b46575a44a2d72c7dfd49e49f60ce6bf/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a6ab32f7210554a96cd9e33abe3ddd86732beeafc7a28e9955cdf22ffadbab0", size = 146368, upload-time = "2025-05-02T08:31:59.468Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/26/2c/ea3e66f2b5f21fd00b2825c94cafb8c326ea6240cd80a91eb09e4a285830/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b33de11b92e9f75a2b545d6e9b6f37e398d86c3e9e9653c4864eb7e89c5773ef", size = 154491, upload-time = "2025-05-02T08:32:01.219Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/47/7be7fa972422ad062e909fd62460d45c3ef4c141805b7078dbab15904ff7/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8755483f3c00d6c9a77f490c17e6ab0c8729e39e6390328e42521ef175380ae6", size = 157695, upload-time = "2025-05-02T08:32:03.045Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2f/42/9f02c194da282b2b340f28e5fb60762de1151387a36842a92b533685c61e/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:68a328e5f55ec37c57f19ebb1fdc56a248db2e3e9ad769919a58672958e8f366", size = 154849, upload-time = "2025-05-02T08:32:04.651Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/67/44/89cacd6628f31fb0b63201a618049be4be2a7435a31b55b5eb1c3674547a/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:21b2899062867b0e1fde9b724f8aecb1af14f2778d69aacd1a5a1853a597a5db", size = 150091, upload-time = "2025-05-02T08:32:06.719Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/79/4b8da9f712bc079c0f16b6d67b099b0b8d808c2292c937f267d816ec5ecc/charset_normalizer-3.4.2-cp310-cp310-win32.whl", hash = "sha256:e8082b26888e2f8b36a042a58307d5b917ef2b1cacab921ad3323ef91901c71a", size = 98445, upload-time = "2025-05-02T08:32:08.66Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7d/d7/96970afb4fb66497a40761cdf7bd4f6fca0fc7bafde3a84f836c1f57a926/charset_normalizer-3.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:f69a27e45c43520f5487f27627059b64aaf160415589230992cec34c5e18a509", size = 105782, upload-time = "2025-05-02T08:32:10.46Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/85/4c40d00dcc6284a1c1ad5de5e0996b06f39d8232f1031cd23c2f5c07ee86/charset_normalizer-3.4.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:be1e352acbe3c78727a16a455126d9ff83ea2dfdcbc83148d2982305a04714c2", size = 198794, upload-time = "2025-05-02T08:32:11.945Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/d9/7a6c0b9db952598e97e93cbdfcb91bacd89b9b88c7c983250a77c008703c/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa88ca0b1932e93f2d961bf3addbb2db902198dca337d88c89e1559e066e7645", size = 142846, upload-time = "2025-05-02T08:32:13.946Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/82/a37989cda2ace7e37f36c1a8ed16c58cf48965a79c2142713244bf945c89/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d524ba3f1581b35c03cb42beebab4a13e6cdad7b36246bd22541fa585a56cccd", size = 153350, upload-time = "2025-05-02T08:32:15.873Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/68/a576b31b694d07b53807269d05ec3f6f1093e9545e8607121995ba7a8313/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28a1005facc94196e1fb3e82a3d442a9d9110b8434fc1ded7a24a2983c9888d8", size = 145657, upload-time = "2025-05-02T08:32:17.283Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/9b/ad67f03d74554bed3aefd56fe836e1623a50780f7c998d00ca128924a499/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdb20a30fe1175ecabed17cbf7812f7b804b8a315a25f24678bcdf120a90077f", size = 147260, upload-time = "2025-05-02T08:32:18.807Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a6/e6/8aebae25e328160b20e31a7e9929b1578bbdc7f42e66f46595a432f8539e/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f5d9ed7f254402c9e7d35d2f5972c9bbea9040e99cd2861bd77dc68263277c7", size = 149164, upload-time = "2025-05-02T08:32:20.333Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8b/f2/b3c2f07dbcc248805f10e67a0262c93308cfa149a4cd3d1fe01f593e5fd2/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd387a49825780ff861998cd959767800d54f8308936b21025326de4b5a42b9", size = 144571, upload-time = "2025-05-02T08:32:21.86Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/5b/c3f3a94bc345bc211622ea59b4bed9ae63c00920e2e8f11824aa5708e8b7/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f0aa37f3c979cf2546b73e8222bbfa3dc07a641585340179d768068e3455e544", size = 151952, upload-time = "2025-05-02T08:32:23.434Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e2/4d/ff460c8b474122334c2fa394a3f99a04cf11c646da895f81402ae54f5c42/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e70e990b2137b29dc5564715de1e12701815dacc1d056308e2b17e9095372a82", size = 155959, upload-time = "2025-05-02T08:32:24.993Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/2b/b964c6a2fda88611a1fe3d4c400d39c66a42d6c169c924818c848f922415/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0c8c57f84ccfc871a48a47321cfa49ae1df56cd1d965a09abe84066f6853b9c0", size = 153030, upload-time = "2025-05-02T08:32:26.435Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/59/2e/d3b9811db26a5ebf444bc0fa4f4be5aa6d76fc6e1c0fd537b16c14e849b6/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6b66f92b17849b85cad91259efc341dce9c1af48e2173bf38a85c6329f1033e5", size = 148015, upload-time = "2025-05-02T08:32:28.376Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/07/c5fd7c11eafd561bb51220d600a788f1c8d77c5eef37ee49454cc5c35575/charset_normalizer-3.4.2-cp311-cp311-win32.whl", hash = "sha256:daac4765328a919a805fa5e2720f3e94767abd632ae410a9062dff5412bae65a", size = 98106, upload-time = "2025-05-02T08:32:30.281Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/05/5e33dbef7e2f773d672b6d79f10ec633d4a71cd96db6673625838a4fd532/charset_normalizer-3.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:e53efc7c7cee4c1e70661e2e112ca46a575f90ed9ae3fef200f2a25e954f4b28", size = 105402, upload-time = "2025-05-02T08:32:32.191Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/a4/37f4d6035c89cac7930395a35cc0f1b872e652eaafb76a6075943754f095/charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7", size = 199936, upload-time = "2025-05-02T08:32:33.712Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ee/8a/1a5e33b73e0d9287274f899d967907cd0bf9c343e651755d9307e0dbf2b3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3", size = 143790, upload-time = "2025-05-02T08:32:35.768Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/52/59521f1d8e6ab1482164fa21409c5ef44da3e9f653c13ba71becdd98dec3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a", size = 153924, upload-time = "2025-05-02T08:32:37.284Z" },
@ -141,27 +123,19 @@ wheels = [
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998, upload-time = "2025-01-27T10:46:09.186Z" },
 ]
 [[package]]
 name = "exceptiongroup"
 version = "1.2.2"
 source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
 sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/35/2495c4ac46b980e4ca1f6ad6db102322ef3ad2410b79fdde159a4b0f3b92/exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc", size = 28883, upload-time = "2024-07-12T22:26:00.161Z" }
 wheels = [
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453, upload-time = "2024-07-12T22:25:58.476Z" },
 ]
 [[package]]
 name = "fastapi"
-version = "0.115.12"
+version = "0.128.0"
 source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
 dependencies = [
    { name = "annotated-doc" },
    { name = "pydantic" },
    { name = "starlette" },
    { name = "typing-extensions" },
 ]
-sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/55/ae499352d82338331ca1e28c7f4a63bfd09479b16395dce38cf50a39e2c2/fastapi-0.115.12.tar.gz", hash = "sha256:1e2c2a2646905f9e83d32f04a3f86aff4a286669c6c950ca95b5fd68c2602681", size = 295236, upload-time = "2025-03-23T22:55:43.822Z" }
+sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/08/8c8508db6c7b9aae8f7175046af41baad690771c9bcde676419965e338c7/fastapi-0.128.0.tar.gz", hash = "sha256:1cc179e1cef10a6be60ffe429f79b829dce99d8de32d7acb7e6c8dfdf7f2645a", size = 365682, upload-time = "2025-12-27T15:21:13.714Z" }
 wheels = [
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/50/b3/b51f09c2ba432a576fe63758bddc81f78f0c6309d9e5c10d194313bf021e/fastapi-0.115.12-py3-none-any.whl", hash = "sha256:e94613d6c05e27be7ffebdd6ea5f388112e5e430c8f7d6494a9d1d88d43e814d", size = 95164, upload-time = "2025-03-23T22:55:42.101Z" },
+    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/05/5cbb59154b093548acd0f4c7c474a118eda06da25aa75c616b72d8fcd92a/fastapi-0.128.0-py3-none-any.whl", hash = "sha256:aebd93f9716ee3b4f4fcfe13ffb7cf308d99c9f3ab5622d8877441072561582d", size = 103094, upload-time = "2025-12-27T15:21:12.154Z" },
 ]
 [[package]]
@ -304,33 +278,6 @@ dependencies = [
 ]
 sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ad/88/5f2260bdfae97aabf98f1778d43f69574390ad787afb646292a638c923d4/pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc", size = 435195, upload-time = "2025-04-23T18:33:52.104Z" }
 wheels = [
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/92/b31726561b5dae176c2d2c2dc43a9c5bfba5d32f96f8b4c0a600dd492447/pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8", size = 2028817, upload-time = "2025-04-23T18:30:43.919Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/44/3f0b95fafdaca04a483c4e685fe437c6891001bf3ce8b2fded82b9ea3aa1/pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d", size = 1861357, upload-time = "2025-04-23T18:30:46.372Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/97/e8f13b55766234caae05372826e8e4b3b96e7b248be3157f53237682e43c/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d", size = 1898011, upload-time = "2025-04-23T18:30:47.591Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/a3/99c48cf7bafc991cc3ee66fd544c0aae8dc907b752f1dad2d79b1b5a471f/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d53b22f2032c42eaaf025f7c40c2e3b94568ae077a606f006d206a463bc69572", size = 1982730, upload-time = "2025-04-23T18:30:49.328Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/de/8e/a5b882ec4307010a840fb8b58bd9bf65d1840c92eae7534c7441709bf54b/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0405262705a123b7ce9f0b92f123334d67b70fd1f20a9372b907ce1080c7ba02", size = 2136178, upload-time = "2025-04-23T18:30:50.907Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/bb/71e35fc3ed05af6834e890edb75968e2802fe98778971ab5cba20a162315/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b25d91e288e2c4e0662b8038a28c6a07eaac3e196cfc4ff69de4ea3db992a1b", size = 2736462, upload-time = "2025-04-23T18:30:52.083Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/0d/c8f7593e6bc7066289bbc366f2235701dcbebcd1ff0ef8e64f6f239fb47d/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bdfe4b3789761f3bcb4b1ddf33355a71079858958e3a552f16d5af19768fef2", size = 2005652, upload-time = "2025-04-23T18:30:53.389Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/7a/996d8bd75f3eda405e3dd219ff5ff0a283cd8e34add39d8ef9157e722867/pydantic_core-2.33.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efec8db3266b76ef9607c2c4c419bdb06bf335ae433b80816089ea7585816f6a", size = 2113306, upload-time = "2025-04-23T18:30:54.661Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/84/daf2a6fb2db40ffda6578a7e8c5a6e9c8affb251a05c233ae37098118788/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:031c57d67ca86902726e0fae2214ce6770bbe2f710dc33063187a68744a5ecac", size = 2073720, upload-time = "2025-04-23T18:30:56.11Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/fb/2258da019f4825128445ae79456a5499c032b55849dbd5bed78c95ccf163/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:f8de619080e944347f5f20de29a975c2d815d9ddd8be9b9b7268e2e3ef68605a", size = 2244915, upload-time = "2025-04-23T18:30:57.501Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/7a/925ff73756031289468326e355b6fa8316960d0d65f8b5d6b3a3e7866de7/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:73662edf539e72a9440129f231ed3757faab89630d291b784ca99237fb94db2b", size = 2241884, upload-time = "2025-04-23T18:30:58.867Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/b0/249ee6d2646f1cdadcb813805fe76265745c4010cf20a8eba7b0e639d9b2/pydantic_core-2.33.2-cp310-cp310-win32.whl", hash = "sha256:0a39979dcbb70998b0e505fb1556a1d550a0781463ce84ebf915ba293ccb7e22", size = 1910496, upload-time = "2025-04-23T18:31:00.078Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/ff/172ba8f12a42d4b552917aa65d1f2328990d3ccfc01d5b7c943ec084299f/pydantic_core-2.33.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0379a2b24882fef529ec3b4987cb5d003b9cda32256024e6fe1586ac45fc640", size = 1955019, upload-time = "2025-04-23T18:31:01.335Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/8d/71db63483d518cbbf290261a1fc2839d17ff89fce7089e08cad07ccfce67/pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7", size = 2028584, upload-time = "2025-04-23T18:31:03.106Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/2f/3cfa7244ae292dd850989f328722d2aef313f74ffc471184dc509e1e4e5a/pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246", size = 1855071, upload-time = "2025-04-23T18:31:04.621Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/d3/4ae42d33f5e3f50dd467761304be2fa0a9417fbf09735bc2cce003480f2a/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f", size = 1897823, upload-time = "2025-04-23T18:31:06.377Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/f3/aa5976e8352b7695ff808599794b1fba2a9ae2ee954a3426855935799488/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc", size = 1983792, upload-time = "2025-04-23T18:31:07.93Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d5/7a/cda9b5a23c552037717f2b2a5257e9b2bfe45e687386df9591eff7b46d28/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de", size = 2136338, upload-time = "2025-04-23T18:31:09.283Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/9f/b8f9ec8dd1417eb9da784e91e1667d58a2a4a7b7b34cf4af765ef663a7e5/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a", size = 2730998, upload-time = "2025-04-23T18:31:11.7Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/bc/cd720e078576bdb8255d5032c5d63ee5c0bf4b7173dd955185a1d658c456/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef", size = 2003200, upload-time = "2025-04-23T18:31:13.536Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/22/3602b895ee2cd29d11a2b349372446ae9727c32e78a94b3d588a40fdf187/pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e", size = 2113890, upload-time = "2025-04-23T18:31:15.011Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/e6/e3c5908c03cf00d629eb38393a98fccc38ee0ce8ecce32f69fc7d7b558a7/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d", size = 2073359, upload-time = "2025-04-23T18:31:16.393Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/12/e7/6a36a07c59ebefc8777d1ffdaf5ae71b06b21952582e4b07eba88a421c79/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30", size = 2245883, upload-time = "2025-04-23T18:31:17.892Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/16/3f/59b3187aaa6cc0c1e6616e8045b284de2b6a87b027cce2ffcea073adf1d2/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf", size = 2241074, upload-time = "2025-04-23T18:31:19.205Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/ed/55532bb88f674d5d8f67ab121a2a13c385df382de2a1677f30ad385f7438/pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51", size = 1910538, upload-time = "2025-04-23T18:31:20.541Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fe/1b/25b7cccd4519c0b23c2dd636ad39d381abf113085ce4f7bec2b0dc755eb1/pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab", size = 1952909, upload-time = "2025-04-23T18:31:22.371Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/49/a9/d809358e49126438055884c4366a1f6227f0f84f635a9014e2deb9b9de54/pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65", size = 1897786, upload-time = "2025-04-23T18:31:24.161Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/8a/2b41c97f554ec8c71f2a8a5f85cb56a8b0956addfe8b0efb5b3d77e8bdc3/pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc", size = 2009000, upload-time = "2025-04-23T18:31:25.863Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/02/6224312aacb3c8ecbaa959897af57181fb6cf3a3d7917fd44d0f2917e6f2/pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7", size = 1847996, upload-time = "2025-04-23T18:31:27.341Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d6/46/6dcdf084a523dbe0a0be59d054734b86a981726f221f4562aed313dbcb49/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025", size = 1880957, upload-time = "2025-04-23T18:31:28.956Z" },
@ -362,24 +309,6 @@ wheels = [
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/7d/e09391c2eebeab681df2b74bfe6c43422fffede8dc74187b2b0bf6fd7571/pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac", size = 1806162, upload-time = "2025-04-23T18:32:20.188Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f1/3d/847b6b1fed9f8ed3bb95a9ad04fbd0b212e832d4f0f50ff4d9ee5a9f15cf/pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5", size = 1981560, upload-time = "2025-04-23T18:32:22.354Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777, upload-time = "2025-04-23T18:32:25.088Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/68/373d55e58b7e83ce371691f6eaa7175e3a24b956c44628eb25d7da007917/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa", size = 2023982, upload-time = "2025-04-23T18:32:53.14Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/16/145f54ac08c96a63d8ed6442f9dec17b2773d19920b627b18d4f10a061ea/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29", size = 1858412, upload-time = "2025-04-23T18:32:55.52Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/b1/c6dc6c3e2de4516c0bb2c46f6a373b91b5660312342a0cf5826e38ad82fa/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d", size = 1892749, upload-time = "2025-04-23T18:32:57.546Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/12/73/8cd57e20afba760b21b742106f9dbdfa6697f1570b189c7457a1af4cd8a0/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa9d91b338f2df0508606f7009fde642391425189bba6d8c653afd80fd6bb64e", size = 2067527, upload-time = "2025-04-23T18:32:59.771Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e3/d5/0bb5d988cc019b3cba4a78f2d4b3854427fc47ee8ec8e9eaabf787da239c/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2058a32994f1fde4ca0480ab9d1e75a0e8c87c22b53a3ae66554f9af78f2fe8c", size = 2108225, upload-time = "2025-04-23T18:33:04.51Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f1/c5/00c02d1571913d496aabf146106ad8239dc132485ee22efe08085084ff7c/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0e03262ab796d986f978f79c943fc5f620381be7287148b8010b4097f79a39ec", size = 2069490, upload-time = "2025-04-23T18:33:06.391Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/a8/dccc38768274d3ed3a59b5d06f59ccb845778687652daa71df0cab4040d7/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1a8695a8d00c73e50bff9dfda4d540b7dee29ff9b8053e38380426a85ef10052", size = 2237525, upload-time = "2025-04-23T18:33:08.44Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d4/e7/4f98c0b125dda7cf7ccd14ba936218397b44f50a56dd8c16a3091df116c3/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa754d1850735a0b0e03bcffd9d4b4343eb417e47196e4485d9cca326073a42c", size = 2238446, upload-time = "2025-04-23T18:33:10.313Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ce/91/2ec36480fdb0b783cd9ef6795753c1dea13882f2e68e73bce76ae8c21e6a/pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808", size = 2066678, upload-time = "2025-04-23T18:33:12.224Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/27/d4ae6487d73948d6f20dddcd94be4ea43e74349b56eba82e9bdee2d7494c/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8", size = 2025200, upload-time = "2025-04-23T18:33:14.199Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f1/b8/b3cb95375f05d33801024079b9392a5ab45267a63400bf1866e7ce0f0de4/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593", size = 1859123, upload-time = "2025-04-23T18:33:16.555Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/bc/0d0b5adeda59a261cd30a1235a445bf55c7e46ae44aea28f7bd6ed46e091/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612", size = 1892852, upload-time = "2025-04-23T18:33:18.513Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3e/11/d37bdebbda2e449cb3f519f6ce950927b56d62f0b84fd9cb9e372a26a3d5/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7", size = 2067484, upload-time = "2025-04-23T18:33:20.475Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8c/55/1f95f0a05ce72ecb02a8a8a1c3be0579bbc29b1d5ab68f1378b7bebc5057/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e", size = 2108896, upload-time = "2025-04-23T18:33:22.501Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/89/2b2de6c81fa131f423246a9109d7b2a375e83968ad0800d6e57d0574629b/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8", size = 2069475, upload-time = "2025-04-23T18:33:24.528Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/e9/1f7efbe20d0b2b10f6718944b5d8ece9152390904f29a78e68d4e7961159/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf", size = 2239013, upload-time = "2025-04-23T18:33:26.621Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/b2/5309c905a93811524a49b4e031e9851a6b00ff0fb668794472ea7746b448/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb", size = 2238715, upload-time = "2025-04-23T18:33:28.656Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/32/56/8a7ca5d2cd2cda1d245d34b1c9a942920a718082ae8e54e5f3e5a58b7add/pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1", size = 2066757, upload-time = "2025-04-23T18:33:30.645Z" },
 ]
 [[package]]
@ -420,14 +349,15 @@ wheels = [
 [[package]]
 name = "starlette"
-version = "0.46.2"
+version = "0.49.1"
 source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
 dependencies = [
    { name = "anyio" },
    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ce/20/08dfcd9c983f6a6f4a1000d934b9e6d626cff8d2eeb77a89a68eef20a2b7/starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5", size = 2580846, upload-time = "2025-04-13T13:56:17.942Z" }
+sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/3f/507c21db33b66fb027a332f2cb3abbbe924cc3a79ced12f01ed8645955c9/starlette-0.49.1.tar.gz", hash = "sha256:481a43b71e24ed8c43b11ea02f5353d77840e01480881b8cb5a26b8cae64a8cb", size = 2654703, upload-time = "2025-10-28T17:34:10.928Z" }
 wheels = [
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037, upload-time = "2025-04-13T13:56:16.21Z" },
+    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/da/545b75d420bb23b5d494b0517757b351963e974e79933f01e05c929f20a6/starlette-0.49.1-py3-none-any.whl", hash = "sha256:d92ce9f07e4a3caa3ac13a79523bd18e3bc0042bb8ff2d759a8e7dd0e1859875", size = 74175, upload-time = "2025-10-28T17:34:09.13Z" },
 ]
 [[package]]
@ -453,11 +383,11 @@ wheels = [
 [[package]]
 name = "urllib3"
-version = "2.4.0"
+version = "2.6.3"
 source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
-sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/78/16493d9c386d8e60e442a35feac5e00f0913c0f4b7c217c11e8ec2ff53e0/urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466", size = 390672, upload-time = "2025-04-10T15:23:39.232Z" }
+sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
 wheels = [
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680, upload-time = "2025-04-10T15:23:37.377Z" },
+    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
 ]
 [[package]]
@ -467,7 +397,6 @@ source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
 dependencies = [
    { name = "click" },
    { name = "h11" },
    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a6/ae/9bbb19b9e1c450cf9ecaef06463e40234d98d95bf572fab11b4f19ae5ded/uvicorn-0.34.2.tar.gz", hash = "sha256:0e929828f6186353a80b58ea719861d2629d766293b6d19baf086ba31d4f3328", size = 76815, upload-time = "2025-04-19T06:02:50.101Z" }
 wheels = [
@ -480,28 +409,6 @@ version = "1.17.2"
 source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
 sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531, upload-time = "2025-01-14T10:35:45.465Z" }
 wheels = [
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/d1/1daec934997e8b160040c78d7b31789f19b122110a75eca3d4e8da0049e1/wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984", size = 53307, upload-time = "2025-01-14T10:33:13.616Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/7b/13369d42651b809389c1a7153baa01d9700430576c81a2f5c5e460df0ed9/wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22", size = 38486, upload-time = "2025-01-14T10:33:15.947Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/bf/e0105016f907c30b4bd9e377867c48c34dc9c6c0c104556c9c9126bd89ed/wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7", size = 38777, upload-time = "2025-01-14T10:33:17.462Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/27/70/0f6e0679845cbf8b165e027d43402a55494779295c4b08414097b258ac87/wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c", size = 83314, upload-time = "2025-01-14T10:33:21.282Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0f/77/0576d841bf84af8579124a93d216f55d6f74374e4445264cb378a6ed33eb/wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72", size = 74947, upload-time = "2025-01-14T10:33:24.414Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/ec/00759565518f268ed707dcc40f7eeec38637d46b098a1f5143bff488fe97/wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061", size = 82778, upload-time = "2025-01-14T10:33:26.152Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f8/5a/7cffd26b1c607b0b0c8a9ca9d75757ad7620c9c0a9b4a25d3f8a1480fafc/wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2", size = 81716, upload-time = "2025-01-14T10:33:27.372Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/09/dccf68fa98e862df7e6a60a61d43d644b7d095a5fc36dbb591bbd4a1c7b2/wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c", size = 74548, upload-time = "2025-01-14T10:33:28.52Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/8e/067021fa3c8814952c5e228d916963c1115b983e21393289de15128e867e/wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62", size = 81334, upload-time = "2025-01-14T10:33:29.643Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4b/0d/9d4b5219ae4393f718699ca1c05f5ebc0c40d076f7e65fd48f5f693294fb/wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563", size = 36427, upload-time = "2025-01-14T10:33:30.832Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/6a/c5a83e8f61aec1e1aeef939807602fb880e5872371e95df2137142f5c58e/wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f", size = 38774, upload-time = "2025-01-14T10:33:32.897Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cd/f7/a2aab2cbc7a665efab072344a8949a71081eed1d2f451f7f7d2b966594a2/wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58", size = 53308, upload-time = "2025-01-14T10:33:33.992Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/50/ff/149aba8365fdacef52b31a258c4dc1c57c79759c335eff0b3316a2664a64/wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda", size = 38488, upload-time = "2025-01-14T10:33:35.264Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/46/5a917ce85b5c3b490d35c02bf71aedaa9f2f63f2d15d9949cc4ba56e8ba9/wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438", size = 38776, upload-time = "2025-01-14T10:33:38.28Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/74/336c918d2915a4943501c77566db41d1bd6e9f4dbc317f356b9a244dfe83/wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a", size = 83776, upload-time = "2025-01-14T10:33:40.678Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/99/c0c844a5ccde0fe5761d4305485297f91d67cf2a1a824c5f282e661ec7ff/wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000", size = 75420, upload-time = "2025-01-14T10:33:41.868Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/b0/9fc566b0fe08b282c850063591a756057c3247b2362b9286429ec5bf1721/wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6", size = 83199, upload-time = "2025-01-14T10:33:43.598Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/4b/71996e62d543b0a0bd95dda485219856def3347e3e9380cc0d6cf10cfb2f/wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b", size = 82307, upload-time = "2025-01-14T10:33:48.499Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/39/35/0282c0d8789c0dc9bcc738911776c762a701f95cfe113fb8f0b40e45c2b9/wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662", size = 75025, upload-time = "2025-01-14T10:33:51.191Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4f/6d/90c9fd2c3c6fee181feecb620d95105370198b6b98a0770cba090441a828/wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72", size = 81879, upload-time = "2025-01-14T10:33:52.328Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8f/fa/9fb6e594f2ce03ef03eddbdb5f4f90acb1452221a5351116c7c4708ac865/wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317", size = 36419, upload-time = "2025-01-14T10:33:53.551Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/f8/fb1773491a253cbc123c5d5dc15c86041f746ed30416535f2a8df1f4a392/wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3", size = 38773, upload-time = "2025-01-14T10:33:56.323Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/bd/ab55f849fd1f9a58ed7ea47f5559ff09741b25f00c191231f9f059c83949/wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925", size = 53799, upload-time = "2025-01-14T10:33:57.4Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/18/75ddc64c3f63988f5a1d7e10fb204ffe5762bc663f8023f18ecaf31a332e/wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392", size = 38821, upload-time = "2025-01-14T10:33:59.334Z" },
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/2a/97928387d6ed1c1ebbfd4efc4133a0633546bec8481a2dd5ec961313a1c7/wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40", size = 38919, upload-time = "2025-01-14T10:34:04.093Z" },
--- a/agent/templates/user_interaction.json
+++ b/agent/templates/user_interaction.json
@ -2,10 +2,12 @@
    "id": 27,
    "title": {
        "en": "Interactive Agent",
        "de": "Interaktiver Agent",
        "zh": "可交互的 Agent"
    },
    "description": {
        "en": "During the Agent’s execution, users can actively intervene and interact with the Agent to adjust or guide its output, ensuring the final result aligns with their intentions.",
        "de": "Wahrend der Ausführung des Agenten können Benutzer aktiv eingreifen und mit dem Agenten interagieren, um dessen Ausgabe zu steuern, sodass das Endergebnis ihren Vorstellungen entspricht.",
        "zh": "在 Agent 的运行过程中，用户可以随时介入，与 Agent 进行交互，以调整或引导生成结果，使最终输出更符合预期。"
    },
    "canvas_type": "Agent",
--- a/agent/tools/base.py
+++ b/agent/tools/base.py
@ -27,6 +27,10 @@ from common.mcp_tool_call_conn import MCPToolCallSession, ToolCallSession
 from timeit import default_timer as timer
 from common.misc_utils import thread_pool_exec
 class ToolParameter(TypedDict):
    type: str
    description: str
@ -56,12 +60,12 @@ class LLMToolPluginCallSession(ToolCallSession):
        st = timer()
        tool_obj = self.tools_map[name]
        if isinstance(tool_obj, MCPToolCallSession):
-            resp = await asyncio.to_thread(tool_obj.tool_call, name, arguments, 60)
+            resp = await thread_pool_exec(tool_obj.tool_call, name, arguments, 60)
        else:
            if hasattr(tool_obj, "invoke_async") and asyncio.iscoroutinefunction(tool_obj.invoke_async):
                resp = await tool_obj.invoke_async(**arguments)
            else:
-                resp = await asyncio.to_thread(tool_obj.invoke, **arguments)
+                resp = await thread_pool_exec(tool_obj.invoke, **arguments)
        self.callback(name, arguments, resp, elapsed_time=timer()-st)
        return resp
@ -122,6 +126,7 @@ class ToolParamBase(ComponentParamBase):
 class ToolBase(ComponentBase):
    def __init__(self, canvas, id, param: ComponentParamBase):
        from agent.canvas import Canvas  # Local import to avoid cyclic dependency
        assert isinstance(canvas, Canvas), "canvas must be an instance of Canvas"
        self._canvas = canvas
        self._id = id
@ -164,7 +169,7 @@ class ToolBase(ComponentBase):
            elif asyncio.iscoroutinefunction(self._invoke):
                res = await self._invoke(**kwargs)
            else:
-                res = await asyncio.to_thread(self._invoke, **kwargs)
+                res = await thread_pool_exec(self._invoke, **kwargs)
        except Exception as e:
            self._param.outputs["_ERROR"] = {"value": str(e)}
            logging.exception(e)
--- a/agent/tools/code_exec.py
+++ b/agent/tools/code_exec.py
@ -110,7 +110,7 @@ module.exports = { main };
        self.lang = Language.PYTHON.value
        self.script = 'def main(arg1: str, arg2: str) -> dict: return {"result": arg1 + arg2}'
        self.arguments = {}
-        self.outputs = {"result": {"value": "", "type": "string"}}
+        self.outputs = {"result": {"value": "", "type": "object"}}
    def check(self):
        self.check_valid_value(self.lang, "Support languages", ["python", "python3", "nodejs", "javascript"])
@ -140,26 +140,61 @@ class CodeExec(ToolBase, ABC):
                continue
            arguments[k] = self._canvas.get_variable_value(v) if v else None
-        self._execute_code(language=lang, code=script, arguments=arguments)
+        return self._execute_code(language=lang, code=script, arguments=arguments)
    def _execute_code(self, language: str, code: str, arguments: dict):
        import requests
        if self.check_if_canceled("CodeExec execution"):
-            return
+            return self.output()
        try:
            # Try using the new sandbox provider system first
            try:
                from agent.sandbox.client import execute_code as sandbox_execute_code
                if self.check_if_canceled("CodeExec execution"):
                    return
                # Execute code using the provider system
                result = sandbox_execute_code(
                    code=code,
                    language=language,
                    timeout=int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10 * 60)),
                    arguments=arguments
                )
                if self.check_if_canceled("CodeExec execution"):
                    return
                # Process the result
                if result.stderr:
                    self.set_output("_ERROR", result.stderr)
                    return
                parsed_stdout = self._deserialize_stdout(result.stdout)
                logging.info(f"[CodeExec]: Provider system -> {parsed_stdout}")
                self._populate_outputs(parsed_stdout, result.stdout)
                return
            except (ImportError, RuntimeError) as provider_error:
                # Provider system not available or not configured, fall back to HTTP
                logging.info(f"[CodeExec]: Provider system not available, using HTTP fallback: {provider_error}")
            # Fallback to direct HTTP request
            code_b64 = self._encode_code(code)
            code_req = CodeExecutionRequest(code_b64=code_b64, language=language, arguments=arguments).model_dump()
        except Exception as e:
            if self.check_if_canceled("CodeExec execution"):
-                return
+                return self.output()
            self.set_output("_ERROR", "construct code request error: " + str(e))
            return self.output()
        try:
            if self.check_if_canceled("CodeExec execution"):
-                return "Task has been canceled"
+                self.set_output("_ERROR", "Task has been canceled")
                return self.output()
            resp = requests.post(url=f"http://{settings.SANDBOX_HOST}:9385/run", json=code_req, timeout=int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10 * 60)))
            logging.info(f"http://{settings.SANDBOX_HOST}:9385/run,  code_req: {code_req}, resp.status_code {resp.status_code}:")
@ -174,17 +209,18 @@ class CodeExec(ToolBase, ABC):
                stderr = body.get("stderr")
                if stderr:
                    self.set_output("_ERROR", stderr)
-                    return
+                    return self.output()
                raw_stdout = body.get("stdout", "")
                parsed_stdout = self._deserialize_stdout(raw_stdout)
                logging.info(f"[CodeExec]: http://{settings.SANDBOX_HOST}:9385/run -> {parsed_stdout}")
                self._populate_outputs(parsed_stdout, raw_stdout)
            else:
                self.set_output("_ERROR", "There is no response from sandbox")
                return self.output()
        except Exception as e:
            if self.check_if_canceled("CodeExec execution"):
-                return
+                return self.output()
            self.set_output("_ERROR", "Exception executing code: " + str(e))
@ -295,6 +331,8 @@ class CodeExec(ToolBase, ABC):
                if key.startswith("_"):
                    continue
                val = self._get_by_path(parsed_stdout, key)
                if val is None and len(outputs_items) == 1:
                    val = parsed_stdout
                coerced = self._coerce_output_value(val, meta.get("type"))
                logging.info(f"[CodeExec]: populate dict key='{key}' raw='{val}' coerced='{coerced}'")
                self.set_output(key, coerced)
--- a/agent/tools/exesql.py
+++ b/agent/tools/exesql.py
@ -53,7 +53,7 @@ class ExeSQLParam(ToolParamBase):
        self.max_records = 1024
    def check(self):
-        self.check_valid_value(self.db_type, "Choose DB type", ['mysql', 'postgres', 'mariadb', 'mssql', 'IBM DB2', 'trino'])
+        self.check_valid_value(self.db_type, "Choose DB type", ['mysql', 'postgres', 'mariadb', 'mssql', 'IBM DB2', 'trino', 'oceanbase'])
        self.check_empty(self.database, "Database name")
        self.check_empty(self.username, "database username")
        self.check_empty(self.host, "IP Address")
@ -86,6 +86,12 @@ class ExeSQL(ToolBase, ABC):
        def convert_decimals(obj):
            from decimal import Decimal
            import math
            if isinstance(obj, float):
                # Handle NaN and Infinity which are not valid JSON values
                if math.isnan(obj) or math.isinf(obj):
                    return None
                return obj
            if isinstance(obj, Decimal):
                return float(obj)  # 或 str(obj)
            elif isinstance(obj, dict):
@ -120,6 +126,9 @@ class ExeSQL(ToolBase, ABC):
        if self._param.db_type in ["mysql", "mariadb"]:
            db = pymysql.connect(db=self._param.database, user=self._param.username, host=self._param.host,
                                 port=self._param.port, password=self._param.password)
        elif self._param.db_type == 'oceanbase':
            db = pymysql.connect(db=self._param.database, user=self._param.username, host=self._param.host,
                                 port=self._param.port, password=self._param.password, charset='utf8mb4')
        elif self._param.db_type == 'postgres':
            db = psycopg2.connect(dbname=self._param.database, user=self._param.username, host=self._param.host,
                                  port=self._param.port, password=self._param.password)
--- a/agent/tools/retrieval.py
+++ b/agent/tools/retrieval.py
@ -21,7 +21,7 @@ import re
 from abc import ABC
 from agent.tools.base import ToolParamBase, ToolBase, ToolMeta
 from common.constants import LLMType
-from api.db.services.document_service import DocumentService
+from api.db.services.doc_metadata_service import DocMetadataService
 from common.metadata_utils import apply_meta_data_filter
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.llm_service import LLMBundle
@ -125,7 +125,7 @@ class Retrieval(ToolBase, ABC):
        doc_ids = []
        if self._param.meta_data_filter != {}:
-            metas = DocumentService.get_meta_by_kbs(kb_ids)
+            metas = DocMetadataService.get_flatted_meta_by_kbs(kb_ids)
            def _resolve_manual_filter(flt: dict) -> dict:
                pat = re.compile(self.variable_ref_patt)
@ -174,7 +174,7 @@ class Retrieval(ToolBase, ABC):
        if kbs:
            query = re.sub(r"^user[:：\s]*", "", query, flags=re.IGNORECASE)
-            kbinfos = settings.retriever.retrieval(
+            kbinfos = await settings.retriever.retrieval(
                query,
                embd_mdl,
                [kb.tenant_id for kb in kbs],
@ -193,7 +193,7 @@ class Retrieval(ToolBase, ABC):
            if self._param.toc_enhance:
                chat_mdl = LLMBundle(self._canvas._tenant_id, LLMType.CHAT)
-                cks = settings.retriever.retrieval_by_toc(query, kbinfos["chunks"], [kb.tenant_id for kb in kbs],
+                cks = await settings.retriever.retrieval_by_toc(query, kbinfos["chunks"], [kb.tenant_id for kb in kbs],
                                                          chat_mdl, self._param.top_n)
                if self.check_if_canceled("Retrieval processing"):
                    return
--- a/agentic_reasoning/init.py
+++ b/agentic_reasoning/init.py
@ -1 +0,0 @@
 from .deep_research import DeepResearcher as DeepResearcher
--- a/agentic_reasoning/deep_research.py
+++ b/agentic_reasoning/deep_research.py
@ -1,238 +0,0 @@
 #
 #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 import logging
 import re
 from functools import partial
 from agentic_reasoning.prompts import BEGIN_SEARCH_QUERY, BEGIN_SEARCH_RESULT, END_SEARCH_RESULT, MAX_SEARCH_LIMIT, \
    END_SEARCH_QUERY, REASON_PROMPT, RELEVANT_EXTRACTION_PROMPT
 from api.db.services.llm_service import LLMBundle
 from rag.nlp import extract_between
 from rag.prompts import kb_prompt
 from rag.utils.tavily_conn import Tavily
 class DeepResearcher:
    def __init__(self,
                 chat_mdl: LLMBundle,
                 prompt_config: dict,
                 kb_retrieve: partial = None,
                 kg_retrieve: partial = None
                 ):
        self.chat_mdl = chat_mdl
        self.prompt_config = prompt_config
        self._kb_retrieve = kb_retrieve
        self._kg_retrieve = kg_retrieve
    def _remove_tags(text: str, start_tag: str, end_tag: str) -> str:
        """General Tag Removal Method"""
        pattern = re.escape(start_tag) + r"(.*?)" + re.escape(end_tag)
        return re.sub(pattern, "", text)
    @staticmethod
    def _remove_query_tags(text: str) -> str:
        """Remove Query Tags"""
        return DeepResearcher._remove_tags(text, BEGIN_SEARCH_QUERY, END_SEARCH_QUERY)
    @staticmethod
    def _remove_result_tags(text: str) -> str:
        """Remove Result Tags"""
        return DeepResearcher._remove_tags(text, BEGIN_SEARCH_RESULT, END_SEARCH_RESULT)
    async def _generate_reasoning(self, msg_history):
        """Generate reasoning steps"""
        query_think = ""
        if msg_history[-1]["role"] != "user":
            msg_history.append({"role": "user", "content": "Continues reasoning with the new information.\n"})
        else:
            msg_history[-1]["content"] += "\n\nContinues reasoning with the new information.\n"
        async for ans in self.chat_mdl.async_chat_streamly(REASON_PROMPT, msg_history, {"temperature": 0.7}):
            ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
            if not ans:
                continue
            query_think = ans
            yield query_think
            query_think = ""
        yield query_think
    def _extract_search_queries(self, query_think, question, step_index):
        """Extract search queries from thinking"""
        queries = extract_between(query_think, BEGIN_SEARCH_QUERY, END_SEARCH_QUERY)
        if not queries and step_index == 0:
            # If this is the first step and no queries are found, use the original question as the query
            queries = [question]
        return queries
    def _truncate_previous_reasoning(self, all_reasoning_steps):
        """Truncate previous reasoning steps to maintain a reasonable length"""
        truncated_prev_reasoning = ""
        for i, step in enumerate(all_reasoning_steps):
            truncated_prev_reasoning += f"Step {i + 1}: {step}\n\n"
        prev_steps = truncated_prev_reasoning.split('\n\n')
        if len(prev_steps) <= 5:
            truncated_prev_reasoning = '\n\n'.join(prev_steps)
        else:
            truncated_prev_reasoning = ''
            for i, step in enumerate(prev_steps):
                if i == 0 or i >= len(prev_steps) - 4 or BEGIN_SEARCH_QUERY in step or BEGIN_SEARCH_RESULT in step:
                    truncated_prev_reasoning += step + '\n\n'
                else:
                    if truncated_prev_reasoning[-len('\n\n...\n\n'):] != '\n\n...\n\n':
                        truncated_prev_reasoning += '...\n\n'
        return truncated_prev_reasoning.strip('\n')
    def _retrieve_information(self, search_query):
        """Retrieve information from different sources"""
        # 1. Knowledge base retrieval
        kbinfos = []
        try:
            kbinfos = self._kb_retrieve(question=search_query) if self._kb_retrieve else {"chunks": [], "doc_aggs": []}
        except Exception as e:
            logging.error(f"Knowledge base retrieval error: {e}")
        # 2. Web retrieval (if Tavily API is configured)
        try:
            if self.prompt_config.get("tavily_api_key"):
                tav = Tavily(self.prompt_config["tavily_api_key"])
                tav_res = tav.retrieve_chunks(search_query)
                kbinfos["chunks"].extend(tav_res["chunks"])
                kbinfos["doc_aggs"].extend(tav_res["doc_aggs"])
        except Exception as e:
            logging.error(f"Web retrieval error: {e}")
        # 3. Knowledge graph retrieval (if configured)
        try:
            if self.prompt_config.get("use_kg") and self._kg_retrieve:
                ck = self._kg_retrieve(question=search_query)
                if ck["content_with_weight"]:
                    kbinfos["chunks"].insert(0, ck)
        except Exception as e:
            logging.error(f"Knowledge graph retrieval error: {e}")
        return kbinfos
    def _update_chunk_info(self, chunk_info, kbinfos):
        """Update chunk information for citations"""
        if not chunk_info["chunks"]:
            # If this is the first retrieval, use the retrieval results directly
            for k in chunk_info.keys():
                chunk_info[k] = kbinfos[k]
        else:
            # Merge newly retrieved information, avoiding duplicates
            cids = [c["chunk_id"] for c in chunk_info["chunks"]]
            for c in kbinfos["chunks"]:
                if c["chunk_id"] not in cids:
                    chunk_info["chunks"].append(c)
            dids = [d["doc_id"] for d in chunk_info["doc_aggs"]]
            for d in kbinfos["doc_aggs"]:
                if d["doc_id"] not in dids:
                    chunk_info["doc_aggs"].append(d)
    async def _extract_relevant_info(self, truncated_prev_reasoning, search_query, kbinfos):
        """Extract and summarize relevant information"""
        summary_think = ""
        async for ans in self.chat_mdl.async_chat_streamly(
                RELEVANT_EXTRACTION_PROMPT.format(
                    prev_reasoning=truncated_prev_reasoning,
                    search_query=search_query,
                    document="\n".join(kb_prompt(kbinfos, 4096))
                ),
                [{"role": "user",
                  "content": f'Now you should analyze each web page and find helpful information based on the current search query "{search_query}" and previous reasoning steps.'}],
                {"temperature": 0.7}):
            ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
            if not ans:
                continue
            summary_think = ans
            yield summary_think
            summary_think = ""
        yield summary_think
    async def thinking(self, chunk_info: dict, question: str):
        executed_search_queries = []
        msg_history = [{"role": "user", "content": f'Question:\"{question}\"\n'}]
        all_reasoning_steps = []
        think = "<think>"
        for step_index in range(MAX_SEARCH_LIMIT + 1):
            # Check if the maximum search limit has been reached
            if step_index == MAX_SEARCH_LIMIT - 1:
                summary_think = f"\n{BEGIN_SEARCH_RESULT}\nThe maximum search limit is exceeded. You are not allowed to search.\n{END_SEARCH_RESULT}\n"
                yield {"answer": think + summary_think + "</think>", "reference": {}, "audio_binary": None}
                all_reasoning_steps.append(summary_think)
                msg_history.append({"role": "assistant", "content": summary_think})
                break
            # Step 1: Generate reasoning
            query_think = ""
            async for ans in self._generate_reasoning(msg_history):
                query_think = ans
                yield {"answer": think + self._remove_query_tags(query_think) + "</think>", "reference": {}, "audio_binary": None}
            think += self._remove_query_tags(query_think)
            all_reasoning_steps.append(query_think)
            # Step 2: Extract search queries
            queries = self._extract_search_queries(query_think, question, step_index)
            if not queries and step_index > 0:
                # If not the first step and no queries, end the search process
                break
            # Process each search query
            for search_query in queries:
                logging.info(f"[THINK]Query: {step_index}. {search_query}")
                msg_history.append({"role": "assistant", "content": search_query})
                think += f"\n\n> {step_index + 1}. {search_query}\n\n"
                yield {"answer": think + "</think>", "reference": {}, "audio_binary": None}
                # Check if the query has already been executed
                if search_query in executed_search_queries:
                    summary_think = f"\n{BEGIN_SEARCH_RESULT}\nYou have searched this query. Please refer to previous results.\n{END_SEARCH_RESULT}\n"
                    yield {"answer": think + summary_think + "</think>", "reference": {}, "audio_binary": None}
                    all_reasoning_steps.append(summary_think)
                    msg_history.append({"role": "user", "content": summary_think})
                    think += summary_think
                    continue
                executed_search_queries.append(search_query)
                # Step 3: Truncate previous reasoning steps
                truncated_prev_reasoning = self._truncate_previous_reasoning(all_reasoning_steps)
                # Step 4: Retrieve information
                kbinfos = self._retrieve_information(search_query)
                # Step 5: Update chunk information
                self._update_chunk_info(chunk_info, kbinfos)
                # Step 6: Extract relevant information
                think += "\n\n"
                summary_think = ""
                async for ans in self._extract_relevant_info(truncated_prev_reasoning, search_query, kbinfos):
                    summary_think = ans
                    yield {"answer": think + self._remove_result_tags(summary_think) + "</think>", "reference": {}, "audio_binary": None}
                all_reasoning_steps.append(summary_think)
                msg_history.append(
                    {"role": "user", "content": f"\n\n{BEGIN_SEARCH_RESULT}{summary_think}{END_SEARCH_RESULT}\n\n"})
                think += self._remove_result_tags(summary_think)
                logging.info(f"[THINK]Summary: {step_index}. {summary_think}")
        yield think + "</think>"
--- a/agentic_reasoning/prompts.py
+++ b/agentic_reasoning/prompts.py
@ -1,147 +0,0 @@
 #
 #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 BEGIN_SEARCH_QUERY = "<|begin_search_query|>"
 END_SEARCH_QUERY = "<|end_search_query|>"
 BEGIN_SEARCH_RESULT = "<|begin_search_result|>"
 END_SEARCH_RESULT = "<|end_search_result|>"
 MAX_SEARCH_LIMIT = 6
 REASON_PROMPT = f"""You are an advanced reasoning agent. Your goal is to answer the user's question by breaking it down into a series of verifiable steps.
 You have access to a powerful search tool to find information.
 **Your Task:**
 1.  Analyze the user's question.
 2.  If you need information, issue a search query to find a specific fact.
 3.  Review the search results.
 4.  Repeat the search process until you have all the facts needed to answer the question.
 5.  Once you have gathered sufficient information, synthesize the facts and provide the final answer directly.
 **Tool Usage:**
 - To search, you MUST write your query between the special tokens: {BEGIN_SEARCH_QUERY}your query{END_SEARCH_QUERY}.
 - The system will provide results between {BEGIN_SEARCH_RESULT}search results{END_SEARCH_RESULT}.
 - You have a maximum of {MAX_SEARCH_LIMIT} search attempts.
 ---
 **Example 1: Multi-hop Question**
 **Question:** "Are both the directors of Jaws and Casino Royale from the same country?"
 **Your Thought Process & Actions:**
 First, I need to identify the director of Jaws.
 {BEGIN_SEARCH_QUERY}who is the director of Jaws?{END_SEARCH_QUERY}
 [System returns search results]
 {BEGIN_SEARCH_RESULT}
 Jaws is a 1975 American thriller film directed by Steven Spielberg.
 {END_SEARCH_RESULT}
 Okay, the director of Jaws is Steven Spielberg. Now I need to find out his nationality.
 {BEGIN_SEARCH_QUERY}where is Steven Spielberg from?{END_SEARCH_QUERY}
 [System returns search results]
 {BEGIN_SEARCH_RESULT}
 Steven Allan Spielberg is an American filmmaker. Born in Cincinnati, Ohio...
 {END_SEARCH_RESULT}
 So, Steven Spielberg is from the USA. Next, I need to find the director of Casino Royale.
 {BEGIN_SEARCH_QUERY}who is the director of Casino Royale 2006?{END_SEARCH_QUERY}
 [System returns search results]
 {BEGIN_SEARCH_RESULT}
 Casino Royale is a 2006 spy film directed by Martin Campbell.
 {END_SEARCH_RESULT}
 The director of Casino Royale is Martin Campbell. Now I need his nationality.
 {BEGIN_SEARCH_QUERY}where is Martin Campbell from?{END_SEARCH_QUERY}
 [System returns search results]
 {BEGIN_SEARCH_RESULT}
 Martin Campbell (born 24 October 1943) is a New Zealand film and television director.
 {END_SEARCH_RESULT}
 I have all the information. Steven Spielberg is from the USA, and Martin Campbell is from New Zealand. They are not from the same country.
 Final Answer: No, the directors of Jaws and Casino Royale are not from the same country. Steven Spielberg is from the USA, and Martin Campbell is from New Zealand.
 ---
 **Example 2: Simple Fact Retrieval**
 **Question:** "When was the founder of craigslist born?"
 **Your Thought Process & Actions:**
 First, I need to know who founded craigslist.
 {BEGIN_SEARCH_QUERY}who founded craigslist?{END_SEARCH_QUERY}
 [System returns search results]
 {BEGIN_SEARCH_RESULT}
 Craigslist was founded in 1995 by Craig Newmark.
 {END_SEARCH_RESULT}
 The founder is Craig Newmark. Now I need his birth date.
 {BEGIN_SEARCH_QUERY}when was Craig Newmark born?{END_SEARCH_QUERY}
 [System returns search results]
 {BEGIN_SEARCH_RESULT}
 Craig Newmark was born on December 6, 1952.
 {END_SEARCH_RESULT}
 I have found the answer.
 Final Answer: The founder of craigslist, Craig Newmark, was born on December 6, 1952.
 ---
 **Important Rules:**
 - **One Fact at a Time:** Decompose the problem and issue one search query at a time to find a single, specific piece of information.
 - **Be Precise:** Formulate clear and precise search queries. If a search fails, rephrase it.
 - **Synthesize at the End:** Do not provide the final answer until you have completed all necessary searches.
 - **Language Consistency:** Your search queries should be in the same language as the user's question.
 Now, begin your work. Please answer the following question by thinking step-by-step.
 """
 RELEVANT_EXTRACTION_PROMPT = """You are a highly efficient information extraction module. Your sole purpose is to extract the single most relevant piece of information from the provided `Searched Web Pages` that directly answers the `Current Search Query`.
 **Your Task:**
 1.  Read the `Current Search Query` to understand what specific information is needed.
 2.  Scan the `Searched Web Pages` to find the answer to that query.
 3.  Extract only the essential, factual information that answers the query. Be concise.
 **Context (For Your Information Only):**
 The `Previous Reasoning Steps` are provided to give you context on the overall goal, but your primary focus MUST be on answering the `Current Search Query`. Do not use information from the previous steps in your output.
 **Output Format:**
 Your response must follow one of two formats precisely.
 1.  **If a direct and relevant answer is found:**
    - Start your response immediately with `Final Information`.
    - Provide only the extracted fact(s). Do not add any extra conversational text.
    *Example:*
    `Current Search Query`: Where is Martin Campbell from?
    `Searched Web Pages`: [Long article snippet about Martin Campbell's career, which includes the sentence "Martin Campbell (born 24 October 1943) is a New Zealand film and television director..."]
    *Your Output:*
    Final Information
    Martin Campbell is a New Zealand film and television director.
 2.  **If no relevant answer that directly addresses the query is found in the web pages:**
    - Start your response immediately with `Final Information`.
    - Write the exact phrase: `No helpful information found.`
 ---
 **BEGIN TASK**
 **Inputs:**
 - **Previous Reasoning Steps:**
 {prev_reasoning}
 - **Current Search Query:**
 {search_query}
 - **Searched Web Pages:**
 {document}
 """
--- a/api/apps/init.py
+++ b/api/apps/init.py
@ -16,21 +16,23 @@
 import logging
 import os
 import sys
 import time
 from importlib.util import module_from_spec, spec_from_file_location
 from pathlib import Path
-from quart import Blueprint, Quart, request, g, current_app, session
+from quart import Blueprint, Quart, request, g, current_app, session, jsonify
 from flasgger import Swagger
 from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
 from quart_cors import cors
-from common.constants import StatusEnum
+from common.constants import StatusEnum, RetCode
 from api.db.db_models import close_connection, APIToken
 from api.db.services import UserService
 from api.utils.json_encode import CustomJSONEncoder
 from api.utils import commands
-from quart_auth import Unauthorized
+from quart_auth import Unauthorized as QuartAuthUnauthorized
 from werkzeug.exceptions import Unauthorized as WerkzeugUnauthorized
 from quart_schema import QuartSchema
 from common import settings
-from api.utils.api_utils import server_error_response
+from api.utils.api_utils import server_error_response, get_json_result
 from api.constants import API_VERSION
 from common.misc_utils import get_uuid
@ -38,40 +40,27 @@ settings.init_settings()
 __all__ = ["app"]
 UNAUTHORIZED_MESSAGE = "<Unauthorized '401: Unauthorized'>"
 def _unauthorized_message(error):
    if error is None:
        return UNAUTHORIZED_MESSAGE
    try:
        msg = repr(error)
    except Exception:
        return UNAUTHORIZED_MESSAGE
    if msg == UNAUTHORIZED_MESSAGE:
        return msg
    if "Unauthorized" in msg and "401" in msg:
        return msg
    return UNAUTHORIZED_MESSAGE
 app = Quart(__name__)
 app = cors(app, allow_origin="*")
-# Add this at the beginning of your file to configure Swagger UI
+# openapi supported
-swagger_config = {
+QuartSchema(app)
    "headers": [],
    "specs": [
        {
            "endpoint": "apispec",
            "route": "/apispec.json",
            "rule_filter": lambda rule: True,  # Include all endpoints
            "model_filter": lambda tag: True,  # Include all models
        }
    ],
    "static_url_path": "/flasgger_static",
    "swagger_ui": True,
    "specs_route": "/apidocs/",
 }
 swagger = Swagger(
    app,
    config=swagger_config,
    template={
        "swagger": "2.0",
        "info": {
            "title": "RAGFlow API",
            "description": "",
            "version": "1.0.0",
        },
        "securityDefinitions": {
            "ApiKeyAuth": {"type": "apiKey", "name": "Authorization", "in": "header"}
        },
    },
 )
 app.url_map.strict_slashes = False
 app.json_encoder = CustomJSONEncoder
@ -125,18 +114,28 @@ def _load_user():
        user = UserService.query(
            access_token=access_token, status=StatusEnum.VALID.value
        )
        if not user and len(authorization.split()) == 2:
            objs = APIToken.query(token=authorization.split()[1])
            if objs:
                user = UserService.query(id=objs[0].tenant_id, status=StatusEnum.VALID.value)
        if user:
            if not user[0].access_token or not user[0].access_token.strip():
                logging.warning(f"User {user[0].email} has empty access_token in database")
                return None
            g.user = user[0]
            return user[0]
-    except Exception as e:
+    except Exception as e_auth:
-        logging.warning(f"load_user got exception {e}")
+        logging.warning(f"load_user got exception {e_auth}")
        try:
            authorization = request.headers.get("Authorization")
            if len(authorization.split()) == 2:
                objs = APIToken.query(token=authorization.split()[1])
                if objs:
                    user = UserService.query(id=objs[0].tenant_id, status=StatusEnum.VALID.value)
                    if user:
                        if not user[0].access_token or not user[0].access_token.strip():
                            logging.warning(f"User {user[0].email} has empty access_token in database")
                            return None
                        g.user = user[0]
                        return user[0]
        except Exception as e_api_token:
            logging.warning(f"load_user got exception {e_api_token}")
 current_user = LocalProxy(_load_user)
@ -164,10 +163,18 @@ def login_required(func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]
    @wraps(func)
    async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
-        if not current_user:  # or not session.get("_user_id"):
+        timing_enabled = os.getenv("RAGFLOW_API_TIMING")
-            raise Unauthorized()
+        t_start = time.perf_counter() if timing_enabled else None
-        else:
+        user = current_user
-            return await current_app.ensure_async(func)(*args, **kwargs)
+        if timing_enabled:
            logging.info(
                "api_timing login_required auth_ms=%.2f path=%s",
                (time.perf_counter() - t_start) * 1000,
                request.path,
            )
        if not user:  # or not session.get("_user_id"):
            raise QuartAuthUnauthorized()
        return await current_app.ensure_async(func)(*args, **kwargs)
    return wrapper
@ -277,14 +284,34 @@ client_urls_prefix = [
@app.errorhandler(404)
 async def not_found(error):
-    error_msg: str = f"The requested URL {request.path} was not found"
+    logging.error(f"The requested URL {request.path} was not found")
-    logging.error(error_msg)
+    message = f"Not Found: {request.path}"
-    return {
+    response = {
        "code": RetCode.NOT_FOUND,
        "message": message,
        "data": None,
        "error": "Not Found",
-        "message": error_msg,
+    }
-    }, 404
+    return jsonify(response), RetCode.NOT_FOUND
@app.errorhandler(401)
 async def unauthorized(error):
    logging.warning("Unauthorized request")
    return get_json_result(code=RetCode.UNAUTHORIZED, message=_unauthorized_message(error)), RetCode.UNAUTHORIZED
@app.errorhandler(QuartAuthUnauthorized)
 async def unauthorized_quart_auth(error):
    logging.warning("Unauthorized request (quart_auth)")
    return get_json_result(code=RetCode.UNAUTHORIZED, message=repr(error)), RetCode.UNAUTHORIZED
@app.errorhandler(WerkzeugUnauthorized)
 async def unauthorized_werkzeug(error):
    logging.warning("Unauthorized request (werkzeug)")
    return get_json_result(code=RetCode.UNAUTHORIZED, message=_unauthorized_message(error)), RetCode.UNAUTHORIZED
@app.teardown_request
 def _db_close(exception):
    if exception:
--- a/api/apps/canvas_app.py
+++ b/api/apps/canvas_app.py
@ -13,7 +13,6 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 import asyncio
 import inspect
 import json
 import logging
@ -29,9 +28,14 @@ from api.db.services.task_service import queue_dataflow, CANVAS_DEBUG_DOC_ID, Ta
 from api.db.services.user_service import TenantService
 from api.db.services.user_canvas_version import UserCanvasVersionService
 from common.constants import RetCode
-from common.misc_utils import get_uuid
+from common.misc_utils import get_uuid, thread_pool_exec
-from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result, \
+from api.utils.api_utils import (
-    get_request_json
+    get_json_result,
    server_error_response,
    validate_request,
    get_data_error_result,
    get_request_json,
 )
 from agent.canvas import Canvas
 from peewee import MySQLDatabase, PostgresqlDatabase
 from api.db.db_models import APIToken, Task
@ -132,12 +136,12 @@ async def run():
    files = req.get("files", [])
    inputs = req.get("inputs", {})
    user_id = req.get("user_id", current_user.id)
-    if not await asyncio.to_thread(UserCanvasService.accessible, req["id"], current_user.id):
+    if not await thread_pool_exec(UserCanvasService.accessible, req["id"], current_user.id):
        return get_json_result(
            data=False, message='Only owner of canvas authorized for this operation.',
            code=RetCode.OPERATING_ERROR)
-    e, cvs = await asyncio.to_thread(UserCanvasService.get_by_id, req["id"])
+    e, cvs = await thread_pool_exec(UserCanvasService.get_by_id, req["id"])
    if not e:
        return get_data_error_result(message="canvas not found.")
@ -147,7 +151,7 @@ async def run():
    if cvs.canvas_category == CanvasCategory.DataFlow:
        task_id = get_uuid()
        Pipeline(cvs.dsl, tenant_id=current_user.id, doc_id=CANVAS_DEBUG_DOC_ID, task_id=task_id, flow_id=req["id"])
-        ok, error_message = await asyncio.to_thread(queue_dataflow, user_id, req["id"], task_id, CANVAS_DEBUG_DOC_ID, files[0], 0)
+        ok, error_message = await thread_pool_exec(queue_dataflow, user_id, req["id"], task_id, CANVAS_DEBUG_DOC_ID, files[0], 0)
        if not ok:
            return get_data_error_result(message=error_message)
        return get_json_result(data={"message_id": task_id})
@ -322,6 +326,9 @@ async def test_db_connect():
        if req["db_type"] in ["mysql", "mariadb"]:
            db = MySQLDatabase(req["database"], user=req["username"], host=req["host"], port=req["port"],
                               password=req["password"])
        elif req["db_type"] == "oceanbase":
            db = MySQLDatabase(req["database"], user=req["username"], host=req["host"], port=req["port"],
                               password=req["password"], charset="utf8mb4")
        elif req["db_type"] == 'postgres':
            db = PostgresqlDatabase(req["database"], user=req["username"], host=req["host"], port=req["port"],
                                    password=req["password"])
@ -540,6 +547,7 @@ def sessions(canvas_id):
@login_required
 def prompts():
    from rag.prompts.generator import ANALYZE_TASK_SYSTEM, ANALYZE_TASK_USER, NEXT_STEP, REFLECT, CITATION_PROMPT_TEMPLATE
    return get_json_result(data={
        "task_analysis": ANALYZE_TASK_SYSTEM +"\n\n"+ ANALYZE_TASK_USER,
        "plan_generation": NEXT_STEP,
--- a/api/apps/chunk_app.py
+++ b/api/apps/chunk_app.py
@ -13,22 +13,29 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-import asyncio
+import base64
 import datetime
 import json
 import logging
 import re
 import base64
 import xxhash
 from quart import request
 from api.db.services.document_service import DocumentService
 from api.db.services.doc_metadata_service import DocMetadataService
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.llm_service import LLMBundle
 from common.metadata_utils import apply_meta_data_filter
 from api.db.services.search_service import SearchService
 from api.db.services.user_service import UserTenantService
-from api.utils.api_utils import get_data_error_result, get_json_result, server_error_response, validate_request, \
+from api.utils.api_utils import (
-    get_request_json
+    get_data_error_result,
    get_json_result,
    server_error_response,
    validate_request,
    get_request_json,
 )
 from common.misc_utils import thread_pool_exec
 from rag.app.qa import beAdoc, rmPrefix
 from rag.app.tag import label_question
 from rag.nlp import rag_tokenizer, search
@ -38,7 +45,6 @@ from common.constants import RetCode, LLMType, ParserType, PAGERANK_FLD
 from common import settings
 from api.apps import login_required, current_user
@manager.route('/list', methods=['POST'])  # noqa: F821
@login_required
@validate_request("doc_id")
@ -61,7 +67,7 @@ async def list_chunk():
        }
        if "available_int" in req:
            query["available_int"] = int(req["available_int"])
-        sres = settings.retriever.search(query, search.index_name(tenant_id), kb_ids, highlight=["content_ltks"])
+        sres = await settings.retriever.search(query, search.index_name(tenant_id), kb_ids, highlight=["content_ltks"])
        res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
        for id in sres.ids:
            d = {
@ -126,10 +132,15 @@ def get():
@validate_request("doc_id", "chunk_id", "content_with_weight")
 async def set():
    req = await get_request_json()
    content_with_weight = req["content_with_weight"]
    if not isinstance(content_with_weight, (str, bytes)):
        raise TypeError("expected string or bytes-like object")
    if isinstance(content_with_weight, bytes):
        content_with_weight = content_with_weight.decode("utf-8", errors="ignore")
    d = {
        "id": req["chunk_id"],
-        "content_with_weight": req["content_with_weight"]}
+        "content_with_weight": content_with_weight}
-    d["content_ltks"] = rag_tokenizer.tokenize(req["content_with_weight"])
+    d["content_ltks"] = rag_tokenizer.tokenize(content_with_weight)
    d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
    if "important_kwd" in req:
        if not isinstance(req["important_kwd"], list):
@ -171,20 +182,21 @@ async def set():
                _d = beAdoc(d, q, a, not any(
                    [rag_tokenizer.is_chinese(t) for t in q + a]))
-            v, c = embd_mdl.encode([doc.name, req["content_with_weight"] if not _d.get("question_kwd") else "\n".join(_d["question_kwd"])])
+            v, c = embd_mdl.encode([doc.name, content_with_weight if not _d.get("question_kwd") else "\n".join(_d["question_kwd"])])
            v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
            _d["q_%d_vec" % len(v)] = v.tolist()
            settings.docStoreConn.update({"id": req["chunk_id"]}, _d, search.index_name(tenant_id), doc.kb_id)
            # update image
            image_base64 = req.get("image_base64", None)
-            if image_base64:
+            img_id = req.get("img_id", "")
-                bkt, name = req.get("img_id", "-").split("-")
+            if image_base64 and img_id and "-" in img_id:
                bkt, name = img_id.split("-", 1)
                image_binary = base64.b64decode(image_base64)
                settings.STORAGE_IMPL.put(bkt, name, image_binary)
            return get_json_result(data=True)
-        return await asyncio.to_thread(_set_sync)
+        return await thread_pool_exec(_set_sync)
    except Exception as e:
        return server_error_response(e)
@ -207,7 +219,7 @@ async def switch():
                    return get_data_error_result(message="Index updating failure")
            return get_json_result(data=True)
-        return await asyncio.to_thread(_switch_sync)
+        return await thread_pool_exec(_switch_sync)
    except Exception as e:
        return server_error_response(e)
@ -222,19 +234,34 @@ async def rm():
            e, doc = DocumentService.get_by_id(req["doc_id"])
            if not e:
                return get_data_error_result(message="Document not found!")
-            if not settings.docStoreConn.delete({"id": req["chunk_ids"]},
+            condition = {"id": req["chunk_ids"], "doc_id": req["doc_id"]}
-                                                search.index_name(DocumentService.get_tenant_id(req["doc_id"])),
+            try:
-                                                doc.kb_id):
+                deleted_count = settings.docStoreConn.delete(condition,
                                                             search.index_name(DocumentService.get_tenant_id(req["doc_id"])),
                                                             doc.kb_id)
            except Exception:
                return get_data_error_result(message="Chunk deleting failure")
            deleted_chunk_ids = req["chunk_ids"]
-            chunk_number = len(deleted_chunk_ids)
+            if isinstance(deleted_chunk_ids, list):
                unique_chunk_ids = list(dict.fromkeys(deleted_chunk_ids))
                has_ids = len(unique_chunk_ids) > 0
            else:
                unique_chunk_ids = [deleted_chunk_ids]
                has_ids = deleted_chunk_ids not in (None, "")
            if has_ids and deleted_count == 0:
                return get_data_error_result(message="Index updating failure")
            if deleted_count > 0 and deleted_count < len(unique_chunk_ids):
                deleted_count += settings.docStoreConn.delete({"doc_id": req["doc_id"]},
                                                              search.index_name(DocumentService.get_tenant_id(req["doc_id"])),
                                                              doc.kb_id)
            chunk_number = deleted_count
            DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0)
            for cid in deleted_chunk_ids:
                if settings.STORAGE_IMPL.obj_exist(doc.kb_id, cid):
                    settings.STORAGE_IMPL.rm(doc.kb_id, cid)
            return get_json_result(data=True)
-        return await asyncio.to_thread(_rm_sync)
+        return await thread_pool_exec(_rm_sync)
    except Exception as e:
        return server_error_response(e)
@ -244,6 +271,7 @@ async def rm():
@validate_request("doc_id", "content_with_weight")
 async def create():
    req = await get_request_json()
    req_id = request.headers.get("X-Request-ID")
    chunck_id = xxhash.xxh64((req["content_with_weight"] + req["doc_id"]).encode("utf-8")).hexdigest()
    d = {"id": chunck_id, "content_ltks": rag_tokenizer.tokenize(req["content_with_weight"]),
         "content_with_weight": req["content_with_weight"]}
@ -260,14 +288,23 @@ async def create():
    d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
    if "tag_feas" in req:
        d["tag_feas"] = req["tag_feas"]
    if "tag_feas" in req:
        d["tag_feas"] = req["tag_feas"]
    try:
        def _log_response(resp, code, message):
            logging.info(
                "chunk_create response req_id=%s status=%s code=%s message=%s",
                req_id,
                getattr(resp, "status_code", None),
                code,
                message,
            )
        def _create_sync():
            e, doc = DocumentService.get_by_id(req["doc_id"])
            if not e:
-                return get_data_error_result(message="Document not found!")
+                resp = get_data_error_result(message="Document not found!")
                _log_response(resp, RetCode.DATA_ERROR, "Document not found!")
                return resp
            d["kb_id"] = [doc.kb_id]
            d["docnm_kwd"] = doc.name
            d["title_tks"] = rag_tokenizer.tokenize(doc.name)
@ -275,11 +312,15 @@ async def create():
            tenant_id = DocumentService.get_tenant_id(req["doc_id"])
            if not tenant_id:
-                return get_data_error_result(message="Tenant not found!")
+                resp = get_data_error_result(message="Tenant not found!")
                _log_response(resp, RetCode.DATA_ERROR, "Tenant not found!")
                return resp
            e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
            if not e:
-                return get_data_error_result(message="Knowledgebase not found!")
+                resp = get_data_error_result(message="Knowledgebase not found!")
                _log_response(resp, RetCode.DATA_ERROR, "Knowledgebase not found!")
                return resp
            if kb.pagerank:
                d[PAGERANK_FLD] = kb.pagerank
@ -293,10 +334,13 @@ async def create():
            DocumentService.increment_chunk_num(
                doc.id, doc.kb_id, c, 1, 0)
-            return get_json_result(data={"chunk_id": chunck_id})
+            resp = get_json_result(data={"chunk_id": chunck_id})
            _log_response(resp, RetCode.SUCCESS, "success")
            return resp
-        return await asyncio.to_thread(_create_sync)
+        return await thread_pool_exec(_create_sync)
    except Exception as e:
        logging.info("chunk_create exception req_id=%s error=%r", req_id, e)
        return server_error_response(e)
@ -338,7 +382,7 @@ async def retrieval_test():
                chat_mdl = LLMBundle(user_id, LLMType.CHAT)
        if meta_data_filter:
-            metas = DocumentService.get_meta_by_kbs(kb_ids)
+            metas = DocMetadataService.get_flatted_meta_by_kbs(kb_ids)
            local_doc_ids = await apply_meta_data_filter(meta_data_filter, metas, question, chat_mdl, local_doc_ids)
        tenants = UserTenantService.query(user_id=user_id)
@ -372,14 +416,21 @@ async def retrieval_test():
            _question += await keyword_extraction(chat_mdl, _question)
        labels = label_question(_question, [kb])
-        ranks = settings.retriever.retrieval(_question, embd_mdl, tenant_ids, kb_ids, page, size,
+        ranks = await settings.retriever.retrieval(
-                               float(req.get("similarity_threshold", 0.0)),
+                        _question,
-                               float(req.get("vector_similarity_weight", 0.3)),
+                        embd_mdl,
-                               top,
+                        tenant_ids,
-                               local_doc_ids, rerank_mdl=rerank_mdl,
+                        kb_ids,
-                                             highlight=req.get("highlight", False),
+                        page,
-                               rank_feature=labels
+                        size,
-                               )
+                        float(req.get("similarity_threshold", 0.0)),
                        float(req.get("vector_similarity_weight", 0.3)),
                        doc_ids=local_doc_ids,
                        top=top,
                        rerank_mdl=rerank_mdl,
                        rank_feature=labels
                    )
        if use_kg:
            ck = await settings.kg_retriever.retrieval(_question,
                                                   tenant_ids,
@ -407,7 +458,7 @@ async def retrieval_test():
@manager.route('/knowledge_graph', methods=['GET'])  # noqa: F821
@login_required
-def knowledge_graph():
+async def knowledge_graph():
    doc_id = request.args["doc_id"]
    tenant_id = DocumentService.get_tenant_id(doc_id)
    kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
@ -415,7 +466,7 @@ def knowledge_graph():
        "doc_ids": [doc_id],
        "knowledge_graph_kwd": ["graph", "mind_map"]
    }
-    sres = settings.retriever.search(req, search.index_name(tenant_id), kb_ids)
+    sres = await settings.retriever.search(req, search.index_name(tenant_id), kb_ids)
    obj = {"graph": {}, "mind_map": {}}
    for id in sres.ids[:2]:
        ty = sres.field[id]["knowledge_graph_kwd"]
--- a/api/apps/dialog_app.py
+++ b/api/apps/dialog_app.py
@ -25,6 +25,7 @@ from api.utils.api_utils import get_data_error_result, get_json_result, get_requ
 from common.misc_utils import get_uuid
 from common.constants import RetCode
 from api.apps import login_required, current_user
 import logging
@manager.route('/set', methods=['POST'])  # noqa: F821
@ -42,13 +43,19 @@ async def set_dialog():
    if len(name.encode("utf-8")) > 255:
        return get_data_error_result(message=f"Dialog name length is {len(name)} which is larger than 255")
-    if is_create and DialogService.query(tenant_id=current_user.id, name=name.strip()):
+    name = name.strip()
-        name = name.strip()
+    if is_create:
-        name = duplicate_name(
+        # only for chat creating
-            DialogService.query,
+        existing_names = {
-            name=name,
+            d.name.casefold()
-            tenant_id=current_user.id,
+            for d in DialogService.query(tenant_id=current_user.id, status=StatusEnum.VALID.value)
-            status=StatusEnum.VALID.value)
+            if d.name
        }
        if name.casefold() in existing_names:
            def _name_exists(name: str, **_kwargs) -> bool:
                return name.casefold() in existing_names
            name = duplicate_name(_name_exists, name=name)
    description = req.get("description", "A helpful dialog")
    icon = req.get("icon", "")
@ -63,16 +70,30 @@ async def set_dialog():
    meta_data_filter = req.get("meta_data_filter", {})
    prompt_config = req["prompt_config"]
    # Set default parameters for datasets with knowledge retrieval
    # All datasets with {knowledge} in system prompt need "knowledge" parameter to enable retrieval
    kb_ids = req.get("kb_ids", [])
    parameters = prompt_config.get("parameters")
    logging.debug(f"set_dialog: kb_ids={kb_ids}, parameters={parameters}, is_create={not is_create}")
    # Check if parameters is missing, None, or empty list
    if kb_ids and not parameters:
        # Check if system prompt uses {knowledge} placeholder
        if "{knowledge}" in prompt_config.get("system", ""):
            # Set default parameters for any dataset with knowledge placeholder
            prompt_config["parameters"] = [{"key": "knowledge", "optional": False}]
            logging.debug(f"Set default parameters for datasets with knowledge placeholder: {kb_ids}")
    if not is_create:
-        if not req.get("kb_ids", []) and not prompt_config.get("tavily_api_key") and "{knowledge}" in prompt_config['system']:
+        # only for chat updating
        if not req.get("kb_ids", []) and not prompt_config.get("tavily_api_key") and "{knowledge}" in prompt_config.get("system", ""):
            return get_data_error_result(message="Please remove `{knowledge}` in system prompt since no dataset / Tavily used here.")
-        for p in prompt_config["parameters"]:
+    for p in prompt_config.get("parameters", []):
-            if p["optional"]:
+        if p["optional"]:
-                continue
+            continue
-            if prompt_config["system"].find("{%s}" % p["key"]) < 0:
+        if prompt_config.get("system", "").find("{%s}" % p["key"]) < 0:
-                return get_data_error_result(
+            return get_data_error_result(
-                    message="Parameter '{}' is not used".format(p["key"]))
+                message="Parameter '{}' is not used".format(p["key"]))
    try:
        e, tenant = TenantService.get_by_id(current_user.id)
--- a/api/apps/document_app.py
+++ b/api/apps/document_app.py
@ -13,7 +13,6 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License
 #
 import asyncio
 import json
 import os.path
 import pathlib
@ -27,18 +26,20 @@ from api.db import VALID_FILE_TYPES, FileType
 from api.db.db_models import Task
 from api.db.services import duplicate_name
 from api.db.services.document_service import DocumentService, doc_upload_and_parse
-from common.metadata_utils import meta_filter, convert_conditions
+from api.db.services.doc_metadata_service import DocMetadataService
 from common.metadata_utils import meta_filter, convert_conditions, turn2jsonschema
 from api.db.services.file2document_service import File2DocumentService
 from api.db.services.file_service import FileService
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.task_service import TaskService, cancel_all_task_of
 from api.db.services.user_service import UserTenantService
-from common.misc_utils import get_uuid
+from common.misc_utils import get_uuid, thread_pool_exec
 from api.utils.api_utils import (
    get_data_error_result,
    get_json_result,
    server_error_response,
-    validate_request, get_request_json,
+    validate_request,
    get_request_json,
 )
 from api.utils.file_utils import filename_type, thumbnail
 from common.file_utils import get_project_base_directory
@ -62,10 +63,21 @@ async def upload():
        return get_json_result(data=False, message="No file part!", code=RetCode.ARGUMENT_ERROR)
    file_objs = files.getlist("file")
    def _close_file_objs(objs):
        for obj in objs:
            try:
                obj.close()
            except Exception:
                try:
                    obj.stream.close()
                except Exception:
                    pass
    for file_obj in file_objs:
        if file_obj.filename == "":
            _close_file_objs(file_objs)
            return get_json_result(data=False, message="No file selected!", code=RetCode.ARGUMENT_ERROR)
        if len(file_obj.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
            _close_file_objs(file_objs)
            return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=RetCode.ARGUMENT_ERROR)
    e, kb = KnowledgebaseService.get_by_id(kb_id)
@ -74,8 +86,9 @@ async def upload():
    if not check_kb_team_permission(kb, current_user.id):
        return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
-    err, files = await asyncio.to_thread(FileService.upload_document, kb, file_objs, current_user.id)
+    err, files = await thread_pool_exec(FileService.upload_document, kb, file_objs, current_user.id)
    if err:
        files = [f[0] for f in files] if files else []
        return get_json_result(data=files, message="\n".join(err), code=RetCode.SERVER_ERROR)
    if not files:
@ -214,6 +227,7 @@ async def list_docs():
    kb_id = request.args.get("kb_id")
    if not kb_id:
        return get_json_result(data=False, message='Lack of "KB ID"', code=RetCode.ARGUMENT_ERROR)
    tenants = UserTenantService.query(user_id=current_user.id)
    for tenant in tenants:
        if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
@ -268,7 +282,7 @@ async def list_docs():
    doc_ids_filter = None
    metas = None
    if metadata_condition or metadata:
-        metas = DocumentService.get_flatted_meta_by_kbs([kb_id])
+        metas = DocMetadataService.get_flatted_meta_by_kbs([kb_id])
    if metadata_condition:
        doc_ids_filter = set(meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and")))
@ -333,6 +347,8 @@ async def list_docs():
                doc_item["thumbnail"] = f"/v1/document/image/{kb_id}-{doc_item['thumbnail']}"
            if doc_item.get("source_type"):
                doc_item["source_type"] = doc_item["source_type"].split("/")[0]
            if doc_item["parser_config"].get("metadata"):
                doc_item["parser_config"]["metadata"] = turn2jsonschema(doc_item["parser_config"]["metadata"])
        return get_json_result(data={"total": tol, "docs": docs})
    except Exception as e:
@ -386,7 +402,11 @@ async def doc_infos():
        if not DocumentService.accessible(doc_id, current_user.id):
            return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
    docs = DocumentService.get_by_ids(doc_ids)
-    return get_json_result(data=list(docs.dicts()))
+    docs_list = list(docs.dicts())
    # Add meta_fields for each document
    for doc in docs_list:
        doc["meta_fields"] = DocMetadataService.get_document_metadata(doc["id"])
    return get_json_result(data=docs_list)
@manager.route("/metadata/summary", methods=["POST"])  # noqa: F821
@ -394,6 +414,7 @@ async def doc_infos():
 async def metadata_summary():
    req = await get_request_json()
    kb_id = req.get("kb_id")
    doc_ids = req.get("doc_ids")
    if not kb_id:
        return get_json_result(data=False, message='Lack of "KB ID"', code=RetCode.ARGUMENT_ERROR)
@ -405,7 +426,7 @@ async def metadata_summary():
        return get_json_result(data=False, message="Only owner of dataset authorized for this operation.", code=RetCode.OPERATING_ERROR)
    try:
-        summary = DocumentService.get_metadata_summary(kb_id)
+        summary = DocMetadataService.get_metadata_summary(kb_id, doc_ids)
        return get_json_result(data={"summary": summary})
    except Exception as e:
        return server_error_response(e)
@ -413,36 +434,20 @@ async def metadata_summary():
@manager.route("/metadata/update", methods=["POST"])  # noqa: F821
@login_required
@validate_request("doc_ids")
 async def metadata_update():
    req = await get_request_json()
    kb_id = req.get("kb_id")
-    if not kb_id:
+    document_ids = req.get("doc_ids")
        return get_json_result(data=False, message='Lack of "KB ID"', code=RetCode.ARGUMENT_ERROR)
    tenants = UserTenantService.query(user_id=current_user.id)
    for tenant in tenants:
        if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
            break
    else:
        return get_json_result(data=False, message="Only owner of dataset authorized for this operation.", code=RetCode.OPERATING_ERROR)
    selector = req.get("selector", {}) or {}
    updates = req.get("updates", []) or []
    deletes = req.get("deletes", []) or []
-    if not isinstance(selector, dict):
+    if not kb_id:
-        return get_json_result(data=False, message="selector must be an object.", code=RetCode.ARGUMENT_ERROR)
+        return get_json_result(data=False, message='Lack of "KB ID"', code=RetCode.ARGUMENT_ERROR)
    if not isinstance(updates, list) or not isinstance(deletes, list):
        return get_json_result(data=False, message="updates and deletes must be lists.", code=RetCode.ARGUMENT_ERROR)
    metadata_condition = selector.get("metadata_condition", {}) or {}
    if metadata_condition and not isinstance(metadata_condition, dict):
        return get_json_result(data=False, message="metadata_condition must be an object.", code=RetCode.ARGUMENT_ERROR)
    document_ids = selector.get("document_ids", []) or []
    if document_ids and not isinstance(document_ids, list):
        return get_json_result(data=False, message="document_ids must be a list.", code=RetCode.ARGUMENT_ERROR)
    for upd in updates:
        if not isinstance(upd, dict) or not upd.get("key") or "value" not in upd:
            return get_json_result(data=False, message="Each update requires key and value.", code=RetCode.ARGUMENT_ERROR)
@ -450,24 +455,8 @@ async def metadata_update():
        if not isinstance(d, dict) or not d.get("key"):
            return get_json_result(data=False, message="Each delete requires key.", code=RetCode.ARGUMENT_ERROR)
-    kb_doc_ids = KnowledgebaseService.list_documents_by_ids([kb_id])
+    updated = DocMetadataService.batch_update_metadata(kb_id, document_ids, updates, deletes)
-    target_doc_ids = set(kb_doc_ids)
+    return get_json_result(data={"updated": updated, "matched_docs": len(document_ids)})
    if document_ids:
        invalid_ids = set(document_ids) - set(kb_doc_ids)
        if invalid_ids:
            return get_json_result(data=False, message=f"These documents do not belong to dataset {kb_id}: {', '.join(invalid_ids)}", code=RetCode.ARGUMENT_ERROR)
        target_doc_ids = set(document_ids)
    if metadata_condition:
        metas = DocumentService.get_flatted_meta_by_kbs([kb_id])
        filtered_ids = set(meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and")))
        target_doc_ids = target_doc_ids & filtered_ids
        if metadata_condition.get("conditions") and not target_doc_ids:
            return get_json_result(data={"updated": 0, "matched_docs": 0})
    target_doc_ids = list(target_doc_ids)
    updated = DocumentService.batch_update_metadata(kb_id, target_doc_ids, updates, deletes)
    return get_json_result(data={"updated": updated, "matched_docs": len(target_doc_ids)})
@manager.route("/update_metadata_setting", methods=["POST"])  # noqa: F821
@ -521,31 +510,61 @@ async def change_status():
        return get_json_result(data=False, message='"Status" must be either 0 or 1!', code=RetCode.ARGUMENT_ERROR)
    result = {}
    has_error = False
    for doc_id in doc_ids:
        if not DocumentService.accessible(doc_id, current_user.id):
            result[doc_id] = {"error": "No authorization."}
            has_error = True
            continue
        try:
            e, doc = DocumentService.get_by_id(doc_id)
            if not e:
                result[doc_id] = {"error": "No authorization."}
                has_error = True
                continue
            e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
            if not e:
                result[doc_id] = {"error": "Can't find this dataset!"}
                has_error = True
                continue
            current_status = str(doc.status)
            if current_status == status:
                result[doc_id] = {"status": status}
                continue
            if not DocumentService.update_by_id(doc_id, {"status": str(status)}):
                result[doc_id] = {"error": "Database error (Document update)!"}
                has_error = True
                continue
            status_int = int(status)
-            if not settings.docStoreConn.update({"doc_id": doc_id}, {"available_int": status_int}, search.index_name(kb.tenant_id), doc.kb_id):
+            if getattr(doc, "chunk_num", 0) > 0:
-                result[doc_id] = {"error": "Database error (docStore update)!"}
+                try:
                    ok = settings.docStoreConn.update(
                        {"doc_id": doc_id},
                        {"available_int": status_int},
                        search.index_name(kb.tenant_id),
                        doc.kb_id,
                    )
                except Exception as exc:
                    msg = str(exc)
                    if "3022" in msg:
                        result[doc_id] = {"error": "Document store table missing."}
                    else:
                        result[doc_id] = {"error": f"Document store update failed: {msg}"}
                    has_error = True
                    continue
                if not ok:
                    result[doc_id] = {"error": "Database error (docStore update)!"}
                    has_error = True
                    continue
            result[doc_id] = {"status": status}
        except Exception as e:
            result[doc_id] = {"error": f"Internal server error: {str(e)}"}
            has_error = True
    if has_error:
        return get_json_result(data=result, message="Partial failure", code=RetCode.SERVER_ERROR)
    return get_json_result(data=result)
@ -562,7 +581,7 @@ async def rm():
        if not DocumentService.accessible4deletion(doc_id, current_user.id):
            return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
-    errors = await asyncio.to_thread(FileService.delete_docs, doc_ids, current_user.id)
+    errors = await thread_pool_exec(FileService.delete_docs, doc_ids, current_user.id)
    if errors:
        return get_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
@ -575,10 +594,11 @@ async def rm():
@validate_request("doc_ids", "run")
 async def run():
    req = await get_request_json()
    uid = current_user.id
    try:
        def _run_sync():
            for doc_id in req["doc_ids"]:
-                if not DocumentService.accessible(doc_id, current_user.id):
+                if not DocumentService.accessible(doc_id, uid):
                    return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
            kb_table_num_map = {}
@ -615,6 +635,7 @@ async def run():
                        e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
                        if not e:
                            raise LookupError("Can't find this dataset!")
                        doc.parser_config["llm_id"] = kb.parser_config.get("llm_id")
                        doc.parser_config["enable_metadata"] = kb.parser_config.get("enable_metadata", False)
                        doc.parser_config["metadata"] = kb.parser_config.get("metadata", {})
                        DocumentService.update_parser_config(doc.id, doc.parser_config)
@ -623,7 +644,7 @@ async def run():
            return get_json_result(data=True)
-        return await asyncio.to_thread(_run_sync)
+        return await thread_pool_exec(_run_sync)
    except Exception as e:
        return server_error_response(e)
@ -633,9 +654,10 @@ async def run():
@validate_request("doc_id", "name")
 async def rename():
    req = await get_request_json()
    uid = current_user.id
    try:
        def _rename_sync():
-            if not DocumentService.accessible(req["doc_id"], current_user.id):
+            if not DocumentService.accessible(req["doc_id"], uid):
                return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
            e, doc = DocumentService.get_by_id(req["doc_id"])
@ -674,7 +696,7 @@ async def rename():
                )
            return get_json_result(data=True)
-        return await asyncio.to_thread(_rename_sync)
+        return await thread_pool_exec(_rename_sync)
    except Exception as e:
        return server_error_response(e)
@ -689,7 +711,7 @@ async def get(doc_id):
            return get_data_error_result(message="Document not found!")
        b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
-        data = await asyncio.to_thread(settings.STORAGE_IMPL.get, b, n)
+        data = await thread_pool_exec(settings.STORAGE_IMPL.get, b, n)
        response = await make_response(data)
        ext = re.search(r"\.([^.]+)$", doc.name.lower())
@ -711,7 +733,7 @@ async def get(doc_id):
 async def download_attachment(attachment_id):
    try:
        ext = request.args.get("ext", "markdown")
-        data = await asyncio.to_thread(settings.STORAGE_IMPL.get, current_user.id, attachment_id)
+        data = await thread_pool_exec(settings.STORAGE_IMPL.get, current_user.id, attachment_id)
        response = await make_response(data)
        response.headers.set("Content-Type", CONTENT_TYPE_MAP.get(ext, f"application/{ext}"))
@ -784,7 +806,7 @@ async def get_image(image_id):
        if len(arr) != 2:
            return get_data_error_result(message="Image not found.")
        bkt, nm = image_id.split("-")
-        data = await asyncio.to_thread(settings.STORAGE_IMPL.get, bkt, nm)
+        data = await thread_pool_exec(settings.STORAGE_IMPL.get, bkt, nm)
        response = await make_response(data)
        response.headers.set("Content-Type", "image/JPEG")
        return response
@ -892,7 +914,7 @@ async def set_meta():
        if not e:
            return get_data_error_result(message="Document not found!")
-        if not DocumentService.update_by_id(req["doc_id"], {"meta_fields": meta}):
+        if not DocMetadataService.update_document_metadata(req["doc_id"], meta):
            return get_data_error_result(message="Database error (meta updates)!")
        return get_json_result(data=True)
--- a/api/apps/file_app.py
+++ b/api/apps/file_app.py
@ -14,7 +14,6 @@
 #  limitations under the License
 #
 import logging
 import asyncio
 import os
 import pathlib
 import re
@ -25,7 +24,7 @@ from api.common.check_team_permission import check_file_team_permission
 from api.db.services.document_service import DocumentService
 from api.db.services.file2document_service import File2DocumentService
 from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
-from common.misc_utils import get_uuid
+from common.misc_utils import get_uuid, thread_pool_exec
 from common.constants import RetCode, FileSource
 from api.db import FileType
 from api.db.services import duplicate_name
@ -35,7 +34,6 @@ from api.utils.file_utils import filename_type
 from api.utils.web_utils import CONTENT_TYPE_MAP
 from common import settings
@manager.route('/upload', methods=['POST'])  # noqa: F821
@login_required
 # @validate_request("parent_id")
@ -65,7 +63,7 @@ async def upload():
        async def _handle_single_file(file_obj):
            MAX_FILE_NUM_PER_USER: int = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
-            if 0 < MAX_FILE_NUM_PER_USER <= await asyncio.to_thread(DocumentService.get_doc_count, current_user.id):
+            if 0 < MAX_FILE_NUM_PER_USER <= await thread_pool_exec(DocumentService.get_doc_count, current_user.id):
                return get_data_error_result( message="Exceed the maximum file number of a free user!")
            # split file name path
@ -77,35 +75,35 @@ async def upload():
            file_len = len(file_obj_names)
            # get folder
-            file_id_list = await asyncio.to_thread(FileService.get_id_list_by_id, pf_id, file_obj_names, 1, [pf_id])
+            file_id_list = await thread_pool_exec(FileService.get_id_list_by_id, pf_id, file_obj_names, 1, [pf_id])
            len_id_list = len(file_id_list)
            # create folder
            if file_len != len_id_list:
-                e, file = await asyncio.to_thread(FileService.get_by_id, file_id_list[len_id_list - 1])
+                e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 1])
                if not e:
                    return get_data_error_result(message="Folder not found!")
-                last_folder = await asyncio.to_thread(FileService.create_folder, file, file_id_list[len_id_list - 1], file_obj_names,
+                last_folder = await thread_pool_exec(FileService.create_folder, file, file_id_list[len_id_list - 1], file_obj_names,
                                                        len_id_list)
            else:
-                e, file = await asyncio.to_thread(FileService.get_by_id, file_id_list[len_id_list - 2])
+                e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 2])
                if not e:
                    return get_data_error_result(message="Folder not found!")
-                last_folder = await asyncio.to_thread(FileService.create_folder, file, file_id_list[len_id_list - 2], file_obj_names,
+                last_folder = await thread_pool_exec(FileService.create_folder, file, file_id_list[len_id_list - 2], file_obj_names,
                                                        len_id_list)
            # file type
            filetype = filename_type(file_obj_names[file_len - 1])
            location = file_obj_names[file_len - 1]
-            while await asyncio.to_thread(settings.STORAGE_IMPL.obj_exist, last_folder.id, location):
+            while await thread_pool_exec(settings.STORAGE_IMPL.obj_exist, last_folder.id, location):
                location += "_"
-            blob = await asyncio.to_thread(file_obj.read)
+            blob = await thread_pool_exec(file_obj.read)
-            filename = await asyncio.to_thread(
+            filename = await thread_pool_exec(
                duplicate_name,
                FileService.query,
                name=file_obj_names[file_len - 1],
                parent_id=last_folder.id)
-            await asyncio.to_thread(settings.STORAGE_IMPL.put, last_folder.id, location, blob)
+            await thread_pool_exec(settings.STORAGE_IMPL.put, last_folder.id, location, blob)
            file_data = {
                "id": get_uuid(),
                "parent_id": last_folder.id,
@ -116,7 +114,7 @@ async def upload():
                "location": location,
                "size": len(blob),
            }
-            inserted = await asyncio.to_thread(FileService.insert, file_data)
+            inserted = await thread_pool_exec(FileService.insert, file_data)
            return inserted.to_json()
        for file_obj in file_objs:
@ -249,6 +247,7 @@ def get_all_parent_folders():
 async def rm():
    req = await get_request_json()
    file_ids = req["file_ids"]
    uid = current_user.id
    try:
        def _delete_single_file(file):
@ -287,21 +286,21 @@ async def rm():
                    return get_data_error_result(message="File or Folder not found!")
                if not file.tenant_id:
                    return get_data_error_result(message="Tenant not found!")
-                if not check_file_team_permission(file, current_user.id):
+                if not check_file_team_permission(file, uid):
                    return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
                if file.source_type == FileSource.KNOWLEDGEBASE:
                    continue
                if file.type == FileType.FOLDER.value:
-                    _delete_folder_recursive(file, current_user.id)
+                    _delete_folder_recursive(file, uid)
                    continue
                _delete_single_file(file)
            return get_json_result(data=True)
-        return await asyncio.to_thread(_rm_sync)
+        return await thread_pool_exec(_rm_sync)
    except Exception as e:
        return server_error_response(e)
@ -357,10 +356,10 @@ async def get(file_id):
        if not check_file_team_permission(file, current_user.id):
            return get_json_result(data=False, message='No authorization.', code=RetCode.AUTHENTICATION_ERROR)
-        blob = await asyncio.to_thread(settings.STORAGE_IMPL.get, file.parent_id, file.location)
+        blob = await thread_pool_exec(settings.STORAGE_IMPL.get, file.parent_id, file.location)
        if not blob:
            b, n = File2DocumentService.get_storage_address(file_id=file_id)
-            blob = await asyncio.to_thread(settings.STORAGE_IMPL.get, b, n)
+            blob = await thread_pool_exec(settings.STORAGE_IMPL.get, b, n)
        response = await make_response(blob)
        ext = re.search(r"\.([^.]+)$", file.name.lower())
@ -460,7 +459,7 @@ async def move():
                _move_entry_recursive(file, dest_folder)
            return get_json_result(data=True)
-        return await asyncio.to_thread(_move_sync)
+        return await thread_pool_exec(_move_sync)
    except Exception as e:
        return server_error_response(e)
--- a/api/apps/kb_app.py
+++ b/api/apps/kb_app.py
@ -17,21 +17,29 @@ import json
 import logging
 import random
 import re
 import asyncio
 from common.metadata_utils import turn2jsonschema
 from quart import request
 import numpy as np
 from api.db.services.connector_service import Connector2KbService
 from api.db.services.llm_service import LLMBundle
 from api.db.services.document_service import DocumentService, queue_raptor_o_graphrag_tasks
 from api.db.services.doc_metadata_service import DocMetadataService
 from api.db.services.file2document_service import File2DocumentService
 from api.db.services.file_service import FileService
 from api.db.services.pipeline_operation_log_service import PipelineOperationLogService
 from api.db.services.task_service import TaskService, GRAPH_RAPTOR_FAKE_DOC_ID
 from api.db.services.user_service import TenantService, UserTenantService
-from api.utils.api_utils import get_error_data_result, server_error_response, get_data_error_result, validate_request, not_allowed_parameters, \
+from api.utils.api_utils import (
-    get_request_json
+    get_error_data_result,
    server_error_response,
    get_data_error_result,
    validate_request,
    not_allowed_parameters,
    get_request_json,
 )
 from common.misc_utils import thread_pool_exec
 from api.db import VALID_FILE_TYPES
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.db_models import File
@ -44,7 +52,6 @@ from common import settings
 from common.doc_store.doc_store_base import OrderByExpr
 from api.apps import login_required, current_user
@manager.route('/create', methods=['post'])  # noqa: F821
@login_required
@validate_request("name")
@ -82,6 +89,20 @@ async def update():
        return get_data_error_result(
            message=f"Dataset name length is {len(req['name'])} which is large than {DATASET_NAME_LIMIT}")
    req["name"] = req["name"].strip()
    if settings.DOC_ENGINE_INFINITY:
        parser_id = req.get("parser_id")
        if isinstance(parser_id, str) and parser_id.lower() == "tag":
            return get_json_result(
                code=RetCode.OPERATING_ERROR,
                message="The chunking method Tag has not been supported by Infinity yet.",
                data=False,
            )
        if "pagerank" in req and req["pagerank"] > 0:
            return get_json_result(
                code=RetCode.DATA_ERROR,
                message="'pagerank' can only be set when doc_engine is elasticsearch",
                data=False,
            )
    if not KnowledgebaseService.accessible4deletion(req["kb_id"], current_user.id):
        return get_json_result(
@ -130,7 +151,7 @@ async def update():
        if kb.pagerank != req.get("pagerank", 0):
            if req.get("pagerank", 0) > 0:
-                await asyncio.to_thread(
+                await thread_pool_exec(
                    settings.docStoreConn.update,
                    {"kb_id": kb.id},
                    {PAGERANK_FLD: req["pagerank"]},
@ -139,7 +160,7 @@ async def update():
                )
            else:
                # Elasticsearch requires PAGERANK_FLD be non-zero!
-                await asyncio.to_thread(
+                await thread_pool_exec(
                    settings.docStoreConn.update,
                    {"exists": PAGERANK_FLD},
                    {"remove": PAGERANK_FLD},
@ -174,6 +195,7 @@ async def update_metadata_setting():
            message="Database error (Knowledgebase rename)!")
    kb = kb.to_dict()
    kb["parser_config"]["metadata"] = req["metadata"]
    kb["parser_config"]["enable_metadata"] = req.get("enable_metadata", True)
    KnowledgebaseService.update_by_id(kb["id"], kb)
    return get_json_result(data=kb)
@ -198,6 +220,8 @@ def detail():
                message="Can't find this dataset!")
        kb["size"] = DocumentService.get_total_size_by_kb_id(kb_id=kb["id"],keywords="", run_status=[], types=[])
        kb["connectors"] = Connector2KbService.list_connectors(kb_id)
        if kb["parser_config"].get("metadata"):
            kb["parser_config"]["metadata"] = turn2jsonschema(kb["parser_config"]["metadata"])
        for key in ["graphrag_task_finish_at", "raptor_task_finish_at", "mindmap_task_finish_at"]:
            if finish_at := kb.get(key):
@ -249,7 +273,8 @@ async def list_kbs():
@validate_request("kb_id")
 async def rm():
    req = await get_request_json()
-    if not KnowledgebaseService.accessible4deletion(req["kb_id"], current_user.id):
+    uid = current_user.id
    if not KnowledgebaseService.accessible4deletion(req["kb_id"], uid):
        return get_json_result(
            data=False,
            message='No authorization.',
@ -257,7 +282,7 @@ async def rm():
        )
    try:
        kbs = KnowledgebaseService.query(
-            created_by=current_user.id, id=req["kb_id"])
+            created_by=uid, id=req["kb_id"])
        if not kbs:
            return get_json_result(
                data=False, message='Only owner of dataset authorized for this operation.',
@ -280,17 +305,24 @@ async def rm():
                    File.name == kbs[0].name,
                ]
            )
            # Delete the table BEFORE deleting the database record
            for kb in kbs:
                try:
                    settings.docStoreConn.delete({"kb_id": kb.id}, search.index_name(kb.tenant_id), kb.id)
                    settings.docStoreConn.delete_idx(search.index_name(kb.tenant_id), kb.id)
                    logging.info(f"Dropped index for dataset {kb.id}")
                except Exception as e:
                    logging.error(f"Failed to drop index for dataset {kb.id}: {e}")
            if not KnowledgebaseService.delete_by_id(req["kb_id"]):
                return get_data_error_result(
                    message="Database error (Knowledgebase removal)!")
            for kb in kbs:
                settings.docStoreConn.delete({"kb_id": kb.id}, search.index_name(kb.tenant_id), kb.id)
                settings.docStoreConn.delete_idx(search.index_name(kb.tenant_id), kb.id)
                if hasattr(settings.STORAGE_IMPL, 'remove_bucket'):
                    settings.STORAGE_IMPL.remove_bucket(kb.id)
            return get_json_result(data=True)
-        return await asyncio.to_thread(_rm_sync)
+        return await thread_pool_exec(_rm_sync)
    except Exception as e:
        return server_error_response(e)
@ -372,7 +404,7 @@ async def rename_tags(kb_id):
@manager.route('/<kb_id>/knowledge_graph', methods=['GET'])  # noqa: F821
@login_required
-def knowledge_graph(kb_id):
+async def knowledge_graph(kb_id):
    if not KnowledgebaseService.accessible(kb_id, current_user.id):
        return get_json_result(
            data=False,
@ -388,7 +420,7 @@ def knowledge_graph(kb_id):
    obj = {"graph": {}, "mind_map": {}}
    if not settings.docStoreConn.index_exist(search.index_name(kb.tenant_id), kb_id):
        return get_json_result(data=obj)
-    sres = settings.retriever.search(req, search.index_name(kb.tenant_id), [kb_id])
+    sres = await settings.retriever.search(req, search.index_name(kb.tenant_id), [kb_id])
    if not len(sres.ids):
        return get_json_result(data=obj)
@ -436,7 +468,7 @@ def get_meta():
                message='No authorization.',
                code=RetCode.AUTHENTICATION_ERROR
            )
-    return get_json_result(data=DocumentService.get_meta_by_kbs(kb_ids))
+    return get_json_result(data=DocMetadataService.get_flatted_meta_by_kbs(kb_ids))
@manager.route("/basic_info", methods=["GET"])  # noqa: F821
--- a/api/apps/llm_app.py
+++ b/api/apps/llm_app.py
@ -146,10 +146,6 @@ async def add_llm():
        # Assemble ark_api_key endpoint_id into api_key
        api_key = apikey_json(["ark_api_key", "endpoint_id"])
    elif factory == "Tencent Hunyuan":
        req["api_key"] = apikey_json(["hunyuan_sid", "hunyuan_sk"])
        return await set_api_key()
    elif factory == "Tencent Cloud":
        req["api_key"] = apikey_json(["tencent_cloud_sid", "tencent_cloud_sk"])
        return await set_api_key()
@ -195,6 +191,9 @@ async def add_llm():
    elif factory == "MinerU":
        api_key = apikey_json(["api_key", "provider_order"])
    elif factory == "PaddleOCR":
        api_key = apikey_json(["api_key", "provider_order"])
    llm = {
        "tenant_id": current_user.id,
        "llm_factory": factory,
@ -230,8 +229,7 @@ async def add_llm():
                **extra,
            )
            try:
-                m, tc = await mdl.async_chat(None, [{"role": "user", "content": "Hello! How are you doing!"}],
+                m, tc = await mdl.async_chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], {"temperature": 0.9})
                                             {"temperature": 0.9})
                if not tc and m.find("**ERROR**:") >= 0:
                    raise Exception(m)
            except Exception as e:
@ -371,17 +369,18 @@ def my_llms():
@manager.route("/list", methods=["GET"])  # noqa: F821
@login_required
-def list_app():
+async def list_app():
    self_deployed = ["FastEmbed", "Ollama", "Xinference", "LocalAI", "LM-Studio", "GPUStack"]
    weighted = []
    model_type = request.args.get("model_type")
    tenant_id = current_user.id
    try:
-        TenantLLMService.ensure_mineru_from_env(current_user.id)
+        TenantLLMService.ensure_mineru_from_env(tenant_id)
-        objs = TenantLLMService.query(tenant_id=current_user.id)
+        objs = TenantLLMService.query(tenant_id=tenant_id)
        facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key and o.status == StatusEnum.VALID.value])
        status = {(o.llm_name + "@" + o.llm_factory) for o in objs if o.status == StatusEnum.VALID.value}
        llms = LLMService.get_all()
-        llms = [m.to_dict() for m in llms if m.status == StatusEnum.VALID.value and m.fid not in weighted and (m.fid == 'Builtin' or (m.llm_name + "@" + m.fid) in status)]
+        llms = [m.to_dict() for m in llms if m.status == StatusEnum.VALID.value and m.fid not in weighted and (m.fid == "Builtin" or (m.llm_name + "@" + m.fid) in status)]
        for m in llms:
            m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in self_deployed
            if "tei-" in os.getenv("COMPOSE_PROFILES", "") and m["model_type"] == LLMType.EMBEDDING and m["fid"] == "Builtin" and m["llm_name"] == os.getenv("TEI_MODEL", ""):
--- a/api/apps/mcp_server_app.py
+++ b/api/apps/mcp_server_app.py
@ -13,8 +13,6 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 import asyncio
 from quart import Response, request
 from api.apps import current_user, login_required
@ -23,12 +21,11 @@ from api.db.services.mcp_server_service import MCPServerService
 from api.db.services.user_service import TenantService
 from common.constants import RetCode, VALID_MCP_SERVER_TYPES
-from common.misc_utils import get_uuid
+from common.misc_utils import get_uuid, thread_pool_exec
 from api.utils.api_utils import get_data_error_result, get_json_result, get_mcp_tools, get_request_json, server_error_response, validate_request
 from api.utils.web_utils import get_float, safe_json_parse
 from common.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions
@manager.route("/list", methods=["POST"])  # noqa: F821
@login_required
 async def list_mcp() -> Response:
@ -108,7 +105,7 @@ async def create() -> Response:
            return get_data_error_result(message="Tenant not found.")
        mcp_server = MCPServer(id=server_name, name=server_name, url=url, server_type=server_type, variables=variables, headers=headers)
-        server_tools, err_message = await asyncio.to_thread(get_mcp_tools, [mcp_server], timeout)
+        server_tools, err_message = await thread_pool_exec(get_mcp_tools, [mcp_server], timeout)
        if err_message:
            return get_data_error_result(err_message)
@ -160,7 +157,7 @@ async def update() -> Response:
        req["id"] = mcp_id
        mcp_server = MCPServer(id=server_name, name=server_name, url=url, server_type=server_type, variables=variables, headers=headers)
-        server_tools, err_message = await asyncio.to_thread(get_mcp_tools, [mcp_server], timeout)
+        server_tools, err_message = await thread_pool_exec(get_mcp_tools, [mcp_server], timeout)
        if err_message:
            return get_data_error_result(err_message)
@ -244,7 +241,7 @@ async def import_multiple() -> Response:
            headers = {"authorization_token": config["authorization_token"]} if "authorization_token" in config else {}
            variables = {k: v for k, v in config.items() if k not in {"type", "url", "headers"}}
            mcp_server = MCPServer(id=new_name, name=new_name, url=config["url"], server_type=config["type"], variables=variables, headers=headers)
-            server_tools, err_message = await asyncio.to_thread(get_mcp_tools, [mcp_server], timeout)
+            server_tools, err_message = await thread_pool_exec(get_mcp_tools, [mcp_server], timeout)
            if err_message:
                results.append({"server": base_name, "success": False, "message": err_message})
                continue
@ -324,7 +321,7 @@ async def list_tools() -> Response:
                tool_call_sessions.append(tool_call_session)
                try:
-                    tools = await asyncio.to_thread(tool_call_session.get_tools, timeout)
+                    tools = await thread_pool_exec(tool_call_session.get_tools, timeout)
                except Exception as e:
                    return get_data_error_result(message=f"MCP list tools error: {e}")
@ -341,7 +338,7 @@ async def list_tools() -> Response:
        return server_error_response(e)
    finally:
        # PERF: blocking call to close sessions — consider moving to background thread or task queue
-        await asyncio.to_thread(close_multiple_mcp_toolcall_sessions, tool_call_sessions)
+        await thread_pool_exec(close_multiple_mcp_toolcall_sessions, tool_call_sessions)
@manager.route("/test_tool", methods=["POST"])  # noqa: F821
@ -368,10 +365,10 @@ async def test_tool() -> Response:
        tool_call_session = MCPToolCallSession(mcp_server, mcp_server.variables)
        tool_call_sessions.append(tool_call_session)
-        result = await asyncio.to_thread(tool_call_session.tool_call, tool_name, arguments, timeout)
+        result = await thread_pool_exec(tool_call_session.tool_call, tool_name, arguments, timeout)
        # PERF: blocking call to close sessions — consider moving to background thread or task queue
-        await asyncio.to_thread(close_multiple_mcp_toolcall_sessions, tool_call_sessions)
+        await thread_pool_exec(close_multiple_mcp_toolcall_sessions, tool_call_sessions)
        return get_json_result(data=result)
    except Exception as e:
        return server_error_response(e)
@ -425,12 +422,12 @@ async def test_mcp() -> Response:
        tool_call_session = MCPToolCallSession(mcp_server, mcp_server.variables)
        try:
-            tools = await asyncio.to_thread(tool_call_session.get_tools, timeout)
+            tools = await thread_pool_exec(tool_call_session.get_tools, timeout)
        except Exception as e:
            return get_data_error_result(message=f"Test MCP error: {e}")
        finally:
            # PERF: blocking call to close sessions — consider moving to background thread or task queue
-            await asyncio.to_thread(close_multiple_mcp_toolcall_sessions, [tool_call_session])
+            await thread_pool_exec(close_multiple_mcp_toolcall_sessions, [tool_call_session])
        for tool in tools:
            tool_dict = tool.model_dump()
--- a/api/apps/plugin_app.py
+++ b/api/apps/plugin_app.py
@ -18,7 +18,7 @@
 from quart import Response
 from api.apps import login_required
 from api.utils.api_utils import get_json_result
-from plugin import GlobalPluginManager
+from agent.plugin import GlobalPluginManager
@manager.route('/llm_tools', methods=['GET'])  # noqa: F821
--- a/Show More
+++ b/Show More
`@ -1,4 +1,4 @@`
	`FROM node:24-bookworm-slim`	`FROM node:24.13-bookworm-slim`

	`RUN npm config set registry https://registry.npmmirror.com`	`RUN npm config set registry https://registry.npmmirror.com`
		`@ -1 +0,0 @@`
			`from .deep_research import DeepResearcher as DeepResearcher`