mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-30 07:06:39 +08:00
Feat: Add CLI retrieval test to CI workflow (#12881)
### What problem does this PR solve? Adds a CLI-based retrieval test to CI after the Elasticsearch HTTP API tests to validate end-to-end admin/user flows and dataset retrieval via ragflow_cli.py. This helps catch regressions in the CLI path that aren’t covered by existing API tests. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
204
.github/workflows/tests.yml
vendored
204
.github/workflows/tests.yml
vendored
@ -203,7 +203,7 @@ jobs:
|
||||
echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> ${GITHUB_ENV}
|
||||
|
||||
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} up -d
|
||||
uv sync --python 3.12 --only-group test --no-default-groups --frozen && uv pip install sdk/python --group test
|
||||
uv sync --python 3.12 --group test --frozen && uv pip install sdk/python
|
||||
|
||||
- name: Run sdk tests against Elasticsearch
|
||||
run: |
|
||||
@ -232,6 +232,107 @@ jobs:
|
||||
done
|
||||
source .venv/bin/activate && set -o pipefail; pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/test_http_api 2>&1 | tee es_http_api_test.log
|
||||
|
||||
- name: RAGFlow CLI retrieval test
|
||||
env:
|
||||
PYTHONPATH: ${{ github.workspace }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
source .venv/bin/activate
|
||||
|
||||
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
||||
|
||||
EMAIL="ci-${GITHUB_RUN_ID}@example.com"
|
||||
PASS="ci-pass-${GITHUB_RUN_ID}"
|
||||
DATASET="ci_dataset_${GITHUB_RUN_ID}"
|
||||
|
||||
CLI="python admin/client/ragflow_cli.py"
|
||||
|
||||
LOG_FILE="es_cli_test.log"
|
||||
: > "${LOG_FILE}"
|
||||
|
||||
ERROR_RE='Traceback|ModuleNotFoundError|ImportError|Parse error|Bad response|Fail to|code:\\s*[1-9]'
|
||||
run_cli() {
|
||||
local logfile="$1"
|
||||
shift
|
||||
local allow_re=""
|
||||
if [[ "${1:-}" == "--allow" ]]; then
|
||||
allow_re="$2"
|
||||
shift 2
|
||||
fi
|
||||
local cmd_display="$*"
|
||||
echo "===== $(date -u +\"%Y-%m-%dT%H:%M:%SZ\") CMD: ${cmd_display} =====" | tee -a "${logfile}"
|
||||
local tmp_log
|
||||
tmp_log="$(mktemp)"
|
||||
set +e
|
||||
timeout 180s "$@" 2>&1 | tee "${tmp_log}"
|
||||
local status=${PIPESTATUS[0]}
|
||||
set -e
|
||||
cat "${tmp_log}" >> "${logfile}"
|
||||
if grep -qiE "${ERROR_RE}" "${tmp_log}"; then
|
||||
if [[ -n "${allow_re}" ]] && grep -qiE "${allow_re}" "${tmp_log}"; then
|
||||
echo "Allowed CLI error markers in ${logfile}"
|
||||
rm -f "${tmp_log}"
|
||||
return 0
|
||||
fi
|
||||
echo "Detected CLI error markers in ${logfile}"
|
||||
rm -f "${tmp_log}"
|
||||
exit 1
|
||||
fi
|
||||
rm -f "${tmp_log}"
|
||||
return ${status}
|
||||
}
|
||||
|
||||
set -a
|
||||
source docker/.env
|
||||
set +a
|
||||
|
||||
HOST_ADDRESS="http://host.docker.internal:${SVR_HTTP_PORT}"
|
||||
USER_HOST="$(echo "${HOST_ADDRESS}" | sed -E 's#^https?://([^:/]+).*#\1#')"
|
||||
USER_PORT="${SVR_HTTP_PORT}"
|
||||
ADMIN_HOST="${USER_HOST}"
|
||||
ADMIN_PORT="${ADMIN_SVR_HTTP_PORT}"
|
||||
|
||||
until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do
|
||||
echo "Waiting for service to be available..."
|
||||
sleep 5
|
||||
done
|
||||
|
||||
admin_ready=0
|
||||
for i in $(seq 1 30); do
|
||||
if run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "ping"; then
|
||||
admin_ready=1
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
if [[ "${admin_ready}" -ne 1 ]]; then
|
||||
echo "Admin service did not become ready"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "show version"
|
||||
ALLOW_USER_EXISTS_RE='already exists|already exist|duplicate|already.*registered|exist(s)?'
|
||||
run_cli "${LOG_FILE}" --allow "${ALLOW_USER_EXISTS_RE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "create user '$EMAIL' '$PASS'"
|
||||
|
||||
user_ready=0
|
||||
for i in $(seq 1 30); do
|
||||
if run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "ping"; then
|
||||
user_ready=1
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
if [[ "${user_ready}" -ne 1 ]]; then
|
||||
echo "User service did not become ready"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "show version"
|
||||
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "create dataset '$DATASET' with embedding 'BAAI/bge-small-en-v1.5@Builtin' parser 'auto'"
|
||||
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "import 'test/benchmark/test_docs/Doc1.pdf,test/benchmark/test_docs/Doc2.pdf' into dataset '$DATASET'"
|
||||
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "parse dataset '$DATASET' sync"
|
||||
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "Benchmark 16 100 search 'what are these documents about' on datasets '$DATASET'"
|
||||
|
||||
- name: Collect ragflow log
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
@ -277,6 +378,107 @@ jobs:
|
||||
done
|
||||
source .venv/bin/activate && set -o pipefail; DOC_ENGINE=infinity pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/test_http_api 2>&1 | tee infinity_http_api_test.log
|
||||
|
||||
- name: RAGFlow CLI retrieval test (Infinity)
|
||||
env:
|
||||
PYTHONPATH: ${{ github.workspace }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
source .venv/bin/activate
|
||||
|
||||
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
||||
|
||||
EMAIL="ci-${GITHUB_RUN_ID}@example.com"
|
||||
PASS="ci-pass-${GITHUB_RUN_ID}"
|
||||
DATASET="ci_dataset_${GITHUB_RUN_ID}"
|
||||
|
||||
CLI="python admin/client/ragflow_cli.py"
|
||||
|
||||
LOG_FILE="infinity_cli_test.log"
|
||||
: > "${LOG_FILE}"
|
||||
|
||||
ERROR_RE='Traceback|ModuleNotFoundError|ImportError|Parse error|Bad response|Fail to|code:\\s*[1-9]'
|
||||
run_cli() {
|
||||
local logfile="$1"
|
||||
shift
|
||||
local allow_re=""
|
||||
if [[ "${1:-}" == "--allow" ]]; then
|
||||
allow_re="$2"
|
||||
shift 2
|
||||
fi
|
||||
local cmd_display="$*"
|
||||
echo "===== $(date -u +\"%Y-%m-%dT%H:%M:%SZ\") CMD: ${cmd_display} =====" | tee -a "${logfile}"
|
||||
local tmp_log
|
||||
tmp_log="$(mktemp)"
|
||||
set +e
|
||||
timeout 180s "$@" 2>&1 | tee "${tmp_log}"
|
||||
local status=${PIPESTATUS[0]}
|
||||
set -e
|
||||
cat "${tmp_log}" >> "${logfile}"
|
||||
if grep -qiE "${ERROR_RE}" "${tmp_log}"; then
|
||||
if [[ -n "${allow_re}" ]] && grep -qiE "${allow_re}" "${tmp_log}"; then
|
||||
echo "Allowed CLI error markers in ${logfile}"
|
||||
rm -f "${tmp_log}"
|
||||
return 0
|
||||
fi
|
||||
echo "Detected CLI error markers in ${logfile}"
|
||||
rm -f "${tmp_log}"
|
||||
exit 1
|
||||
fi
|
||||
rm -f "${tmp_log}"
|
||||
return ${status}
|
||||
}
|
||||
|
||||
set -a
|
||||
source docker/.env
|
||||
set +a
|
||||
|
||||
HOST_ADDRESS="http://host.docker.internal:${SVR_HTTP_PORT}"
|
||||
USER_HOST="$(echo "${HOST_ADDRESS}" | sed -E 's#^https?://([^:/]+).*#\1#')"
|
||||
USER_PORT="${SVR_HTTP_PORT}"
|
||||
ADMIN_HOST="${USER_HOST}"
|
||||
ADMIN_PORT="${ADMIN_SVR_HTTP_PORT}"
|
||||
|
||||
until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do
|
||||
echo "Waiting for service to be available..."
|
||||
sleep 5
|
||||
done
|
||||
|
||||
admin_ready=0
|
||||
for i in $(seq 1 30); do
|
||||
if run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "ping"; then
|
||||
admin_ready=1
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
if [[ "${admin_ready}" -ne 1 ]]; then
|
||||
echo "Admin service did not become ready"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "show version"
|
||||
ALLOW_USER_EXISTS_RE='already exists|already exist|duplicate|already.*registered|exist(s)?'
|
||||
run_cli "${LOG_FILE}" --allow "${ALLOW_USER_EXISTS_RE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "create user '$EMAIL' '$PASS'"
|
||||
|
||||
user_ready=0
|
||||
for i in $(seq 1 30); do
|
||||
if run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "ping"; then
|
||||
user_ready=1
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
if [[ "${user_ready}" -ne 1 ]]; then
|
||||
echo "User service did not become ready"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "show version"
|
||||
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "create dataset '$DATASET' with embedding 'BAAI/bge-small-en-v1.5@Builtin' parser 'auto'"
|
||||
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "import 'test/benchmark/test_docs/Doc1.pdf,test/benchmark/test_docs/Doc2.pdf' into dataset '$DATASET'"
|
||||
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "parse dataset '$DATASET' sync"
|
||||
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "Benchmark 16 100 search 'what are these documents about' on datasets '$DATASET'"
|
||||
|
||||
- name: Collect ragflow log
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
|
||||
Reference in New Issue
Block a user