diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 20b8f7ceb..7e25d99e6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -203,7 +203,7 @@ jobs: echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> ${GITHUB_ENV} sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} up -d - uv sync --python 3.12 --only-group test --no-default-groups --frozen && uv pip install sdk/python --group test + uv sync --python 3.12 --group test --frozen && uv pip install sdk/python - name: Run sdk tests against Elasticsearch run: | @@ -232,6 +232,107 @@ jobs: done source .venv/bin/activate && set -o pipefail; pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/test_http_api 2>&1 | tee es_http_api_test.log + - name: RAGFlow CLI retrieval test + env: + PYTHONPATH: ${{ github.workspace }} + run: | + set -euo pipefail + source .venv/bin/activate + + export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" + + EMAIL="ci-${GITHUB_RUN_ID}@example.com" + PASS="ci-pass-${GITHUB_RUN_ID}" + DATASET="ci_dataset_${GITHUB_RUN_ID}" + + CLI="python admin/client/ragflow_cli.py" + + LOG_FILE="es_cli_test.log" + : > "${LOG_FILE}" + + ERROR_RE='Traceback|ModuleNotFoundError|ImportError|Parse error|Bad response|Fail to|code:\\s*[1-9]' + run_cli() { + local logfile="$1" + shift + local allow_re="" + if [[ "${1:-}" == "--allow" ]]; then + allow_re="$2" + shift 2 + fi + local cmd_display="$*" + echo "===== $(date -u +\"%Y-%m-%dT%H:%M:%SZ\") CMD: ${cmd_display} =====" | tee -a "${logfile}" + local tmp_log + tmp_log="$(mktemp)" + set +e + timeout 180s "$@" 2>&1 | tee "${tmp_log}" + local status=${PIPESTATUS[0]} + set -e + cat "${tmp_log}" >> "${logfile}" + if grep -qiE "${ERROR_RE}" "${tmp_log}"; then + if [[ -n "${allow_re}" ]] && grep -qiE "${allow_re}" "${tmp_log}"; then + echo "Allowed CLI error markers in ${logfile}" + rm -f "${tmp_log}" + return 0 + fi + echo "Detected CLI error markers in ${logfile}" + rm -f "${tmp_log}" + exit 1 + fi + rm -f "${tmp_log}" + return ${status} + } + + set -a + source docker/.env + set +a + + HOST_ADDRESS="http://host.docker.internal:${SVR_HTTP_PORT}" + USER_HOST="$(echo "${HOST_ADDRESS}" | sed -E 's#^https?://([^:/]+).*#\1#')" + USER_PORT="${SVR_HTTP_PORT}" + ADMIN_HOST="${USER_HOST}" + ADMIN_PORT="${ADMIN_SVR_HTTP_PORT}" + + until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do + echo "Waiting for service to be available..." + sleep 5 + done + + admin_ready=0 + for i in $(seq 1 30); do + if run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "ping"; then + admin_ready=1 + break + fi + sleep 1 + done + if [[ "${admin_ready}" -ne 1 ]]; then + echo "Admin service did not become ready" + exit 1 + fi + + run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "show version" + ALLOW_USER_EXISTS_RE='already exists|already exist|duplicate|already.*registered|exist(s)?' + run_cli "${LOG_FILE}" --allow "${ALLOW_USER_EXISTS_RE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "create user '$EMAIL' '$PASS'" + + user_ready=0 + for i in $(seq 1 30); do + if run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "ping"; then + user_ready=1 + break + fi + sleep 1 + done + if [[ "${user_ready}" -ne 1 ]]; then + echo "User service did not become ready" + exit 1 + fi + + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "show version" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "create dataset '$DATASET' with embedding 'BAAI/bge-small-en-v1.5@Builtin' parser 'auto'" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "import 'test/benchmark/test_docs/Doc1.pdf,test/benchmark/test_docs/Doc2.pdf' into dataset '$DATASET'" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "parse dataset '$DATASET' sync" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "Benchmark 16 100 search 'what are these documents about' on datasets '$DATASET'" + - name: Collect ragflow log if: ${{ !cancelled() }} run: | @@ -277,6 +378,107 @@ jobs: done source .venv/bin/activate && set -o pipefail; DOC_ENGINE=infinity pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/test_http_api 2>&1 | tee infinity_http_api_test.log + - name: RAGFlow CLI retrieval test (Infinity) + env: + PYTHONPATH: ${{ github.workspace }} + run: | + set -euo pipefail + source .venv/bin/activate + + export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" + + EMAIL="ci-${GITHUB_RUN_ID}@example.com" + PASS="ci-pass-${GITHUB_RUN_ID}" + DATASET="ci_dataset_${GITHUB_RUN_ID}" + + CLI="python admin/client/ragflow_cli.py" + + LOG_FILE="infinity_cli_test.log" + : > "${LOG_FILE}" + + ERROR_RE='Traceback|ModuleNotFoundError|ImportError|Parse error|Bad response|Fail to|code:\\s*[1-9]' + run_cli() { + local logfile="$1" + shift + local allow_re="" + if [[ "${1:-}" == "--allow" ]]; then + allow_re="$2" + shift 2 + fi + local cmd_display="$*" + echo "===== $(date -u +\"%Y-%m-%dT%H:%M:%SZ\") CMD: ${cmd_display} =====" | tee -a "${logfile}" + local tmp_log + tmp_log="$(mktemp)" + set +e + timeout 180s "$@" 2>&1 | tee "${tmp_log}" + local status=${PIPESTATUS[0]} + set -e + cat "${tmp_log}" >> "${logfile}" + if grep -qiE "${ERROR_RE}" "${tmp_log}"; then + if [[ -n "${allow_re}" ]] && grep -qiE "${allow_re}" "${tmp_log}"; then + echo "Allowed CLI error markers in ${logfile}" + rm -f "${tmp_log}" + return 0 + fi + echo "Detected CLI error markers in ${logfile}" + rm -f "${tmp_log}" + exit 1 + fi + rm -f "${tmp_log}" + return ${status} + } + + set -a + source docker/.env + set +a + + HOST_ADDRESS="http://host.docker.internal:${SVR_HTTP_PORT}" + USER_HOST="$(echo "${HOST_ADDRESS}" | sed -E 's#^https?://([^:/]+).*#\1#')" + USER_PORT="${SVR_HTTP_PORT}" + ADMIN_HOST="${USER_HOST}" + ADMIN_PORT="${ADMIN_SVR_HTTP_PORT}" + + until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do + echo "Waiting for service to be available..." + sleep 5 + done + + admin_ready=0 + for i in $(seq 1 30); do + if run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "ping"; then + admin_ready=1 + break + fi + sleep 1 + done + if [[ "${admin_ready}" -ne 1 ]]; then + echo "Admin service did not become ready" + exit 1 + fi + + run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "show version" + ALLOW_USER_EXISTS_RE='already exists|already exist|duplicate|already.*registered|exist(s)?' + run_cli "${LOG_FILE}" --allow "${ALLOW_USER_EXISTS_RE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "create user '$EMAIL' '$PASS'" + + user_ready=0 + for i in $(seq 1 30); do + if run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "ping"; then + user_ready=1 + break + fi + sleep 1 + done + if [[ "${user_ready}" -ne 1 ]]; then + echo "User service did not become ready" + exit 1 + fi + + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "show version" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "create dataset '$DATASET' with embedding 'BAAI/bge-small-en-v1.5@Builtin' parser 'auto'" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "import 'test/benchmark/test_docs/Doc1.pdf,test/benchmark/test_docs/Doc2.pdf' into dataset '$DATASET'" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "parse dataset '$DATASET' sync" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "Benchmark 16 100 search 'what are these documents about' on datasets '$DATASET'" + - name: Collect ragflow log if: ${{ !cancelled() }} run: |