mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-05 01:55:05 +08:00
### What problem does this PR solve? This PR adds MySQL and PostgreSQL as data source connectors, allowing users to import data directly from relational databases into RAGFlow for RAG workflows. Many users store their knowledge in databases (product catalogs, documentation, FAQs, etc.) and currently have no way to sync this data into RAGFlow without exporting to files first. This feature lets them connect directly to their databases, run SQL queries, and automatically create documents from the results. Closes #763 Closes #11560 ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): ### What this PR does **New capabilities:** - Connect to MySQL and PostgreSQL databases - Run custom SQL queries to extract data - Map database columns to document content (vectorized) and metadata (searchable) - Support incremental sync using a timestamp column - Full frontend UI with connection form and tooltips **Files changed:** Backend: - `common/constants.py` - Added MYSQL/POSTGRESQL to FileSource enum - `common/data_source/config.py` - Added to DocumentSource enum - `common/data_source/rdbms_connector.py` - New connector (368 lines) - `common/data_source/__init__.py` - Exported the connector - `rag/svr/sync_data_source.py` - Added MySQL and PostgreSQL sync classes - `pyproject.toml` - Added mysql-connector-python dependency Frontend: - `web/src/pages/user-setting/data-source/constant/index.tsx` - Form fields - `web/src/locales/en.ts` - English translations - `web/src/assets/svg/data-source/mysql.svg` - MySQL icon - `web/src/assets/svg/data-source/postgresql.svg` - PostgreSQL icon ### Testing done Tested with MySQL 8.0 and PostgreSQL 16: - Connection validation works correctly - Full sync imports all query results as documents - Incremental sync only fetches rows updated since last sync - Custom SQL queries filter data as expected - Invalid credentials show clear error messages - Lint checks pass (`ruff check` returns no errors) --------- Co-authored-by: mkdev11 <YOUR_GITHUB_ID+MkDev11@users.noreply.github.com>
585 lines
28 KiB
YAML
585 lines
28 KiB
YAML
name: tests
|
|
permissions:
|
|
contents: read
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- 'main'
|
|
- '*.*.*'
|
|
paths-ignore:
|
|
- 'docs/**'
|
|
- '*.md'
|
|
- '*.mdx'
|
|
# The only difference between pull_request and pull_request_target is the context in which the workflow runs:
|
|
# — pull_request_target workflows use the workflow files from the default branch, and secrets are available.
|
|
# — pull_request workflows use the workflow files from the pull request branch, and secrets are unavailable.
|
|
pull_request:
|
|
types: [ synchronize, ready_for_review ]
|
|
paths-ignore:
|
|
- 'docs/**'
|
|
- '*.md'
|
|
- '*.mdx'
|
|
schedule:
|
|
- cron: '0 16 * * *' # This schedule runs every 16:00:00Z(00:00:00+08:00)
|
|
|
|
# https://docs.github.com/en/actions/using-jobs/using-concurrency
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
ragflow_tests:
|
|
name: ragflow_tests
|
|
# https://docs.github.com/en/actions/using-jobs/using-conditions-to-control-job-execution
|
|
# https://github.com/orgs/community/discussions/26261
|
|
if: ${{ github.event_name != 'pull_request' || (github.event.pull_request.draft == false && contains(github.event.pull_request.labels.*.name, 'ci')) }}
|
|
runs-on: [ "self-hosted", "ragflow-test" ]
|
|
steps:
|
|
- name: Ensure workspace ownership
|
|
run: |
|
|
echo "Workflow triggered by ${{ github.event_name }}"
|
|
echo "chown -R ${USER} ${GITHUB_WORKSPACE}" && sudo chown -R ${USER} ${GITHUB_WORKSPACE}
|
|
|
|
# https://github.com/actions/checkout/issues/1781
|
|
- name: Check out code
|
|
uses: actions/checkout@v6
|
|
with:
|
|
ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.sha }}
|
|
fetch-depth: 0
|
|
fetch-tags: true
|
|
|
|
- name: Check workflow duplication
|
|
if: ${{ !cancelled() && !failure() }}
|
|
run: |
|
|
if [[ ${GITHUB_EVENT_NAME} != "pull_request" && ${GITHUB_EVENT_NAME} != "schedule" ]]; then
|
|
HEAD=$(git rev-parse HEAD)
|
|
# Find a PR that introduced a given commit
|
|
gh auth login --with-token <<< "${{ secrets.GITHUB_TOKEN }}"
|
|
PR_NUMBER=$(gh pr list --search ${HEAD} --state merged --json number --jq .[0].number)
|
|
echo "HEAD=${HEAD}"
|
|
echo "PR_NUMBER=${PR_NUMBER}"
|
|
if [[ -n "${PR_NUMBER}" ]]; then
|
|
PR_SHA_FP=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/PR_${PR_NUMBER}
|
|
if [[ -f "${PR_SHA_FP}" ]]; then
|
|
read -r PR_SHA PR_RUN_ID < "${PR_SHA_FP}"
|
|
# Calculate the hash of the current workspace content
|
|
HEAD_SHA=$(git rev-parse HEAD^{tree})
|
|
if [[ "${HEAD_SHA}" == "${PR_SHA}" ]]; then
|
|
echo "Cancel myself since the workspace content hash is the same with PR #${PR_NUMBER} merged. See ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${PR_RUN_ID} for details."
|
|
gh run cancel ${GITHUB_RUN_ID}
|
|
while true; do
|
|
status=$(gh run view ${GITHUB_RUN_ID} --json status -q .status)
|
|
[ "${status}" = "completed" ] && break
|
|
sleep 5
|
|
done
|
|
exit 1
|
|
fi
|
|
fi
|
|
fi
|
|
elif [[ ${GITHUB_EVENT_NAME} == "pull_request" ]]; then
|
|
PR_NUMBER=${{ github.event.pull_request.number }}
|
|
PR_SHA_FP=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/PR_${PR_NUMBER}
|
|
# Calculate the hash of the current workspace content
|
|
PR_SHA=$(git rev-parse HEAD^{tree})
|
|
echo "PR #${PR_NUMBER} workspace content hash: ${PR_SHA}"
|
|
mkdir -p ${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}
|
|
echo "${PR_SHA} ${GITHUB_RUN_ID}" > ${PR_SHA_FP}
|
|
fi
|
|
ARTIFACTS_DIR=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/${GITHUB_RUN_ID}
|
|
echo "ARTIFACTS_DIR=${ARTIFACTS_DIR}" >> ${GITHUB_ENV}
|
|
rm -rf ${ARTIFACTS_DIR} && mkdir -p ${ARTIFACTS_DIR}
|
|
|
|
# https://github.com/astral-sh/ruff-action
|
|
- name: Static check with Ruff
|
|
uses: astral-sh/ruff-action@v3
|
|
with:
|
|
version: ">=0.11.x"
|
|
args: "check"
|
|
|
|
- name: Check comments of changed Python files
|
|
if: ${{ false }}
|
|
run: |
|
|
if [[ ${{ github.event_name }} == 'pull_request' || ${{ github.event_name }} == 'pull_request_target' ]]; then
|
|
CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }} \
|
|
| grep -E '\.(py)$' || true)
|
|
|
|
if [ -n "$CHANGED_FILES" ]; then
|
|
echo "Check comments of changed Python files with check_comment_ascii.py"
|
|
|
|
readarray -t files <<< "$CHANGED_FILES"
|
|
HAS_ERROR=0
|
|
|
|
for file in "${files[@]}"; do
|
|
if [ -f "$file" ]; then
|
|
if python3 check_comment_ascii.py "$file"; then
|
|
echo "✅ $file"
|
|
else
|
|
echo "❌ $file"
|
|
HAS_ERROR=1
|
|
fi
|
|
fi
|
|
done
|
|
|
|
if [ $HAS_ERROR -ne 0 ]; then
|
|
exit 1
|
|
fi
|
|
else
|
|
echo "No Python files changed"
|
|
fi
|
|
fi
|
|
|
|
- name: Run unit test
|
|
run: |
|
|
uv sync --python 3.12 --group test --frozen
|
|
source .venv/bin/activate
|
|
which pytest || echo "pytest not in PATH"
|
|
echo "Start to run unit test"
|
|
python3 run_tests.py
|
|
|
|
- name: Build ragflow:nightly
|
|
run: |
|
|
RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-${HOME}}
|
|
RAGFLOW_IMAGE=infiniflow/ragflow:${GITHUB_RUN_ID}
|
|
echo "RAGFLOW_IMAGE=${RAGFLOW_IMAGE}" >> ${GITHUB_ENV}
|
|
sudo docker pull ubuntu:22.04
|
|
sudo DOCKER_BUILDKIT=1 docker build --build-arg NEED_MIRROR=1 --build-arg HTTPS_PROXY=${HTTPS_PROXY} --build-arg HTTP_PROXY=${HTTP_PROXY} -f Dockerfile -t ${RAGFLOW_IMAGE} .
|
|
if [[ ${GITHUB_EVENT_NAME} == "schedule" ]]; then
|
|
export HTTP_API_TEST_LEVEL=p3
|
|
else
|
|
export HTTP_API_TEST_LEVEL=p2
|
|
fi
|
|
echo "HTTP_API_TEST_LEVEL=${HTTP_API_TEST_LEVEL}" >> ${GITHUB_ENV}
|
|
echo "RAGFLOW_CONTAINER=${GITHUB_RUN_ID}-ragflow-cpu-1" >> ${GITHUB_ENV}
|
|
|
|
- name: Start ragflow:nightly
|
|
run: |
|
|
# Determine runner number (default to 1 if not found)
|
|
RUNNER_NUM=$(sudo docker inspect $(hostname) --format '{{index .Config.Labels "com.docker.compose.container-number"}}' 2>/dev/null || true)
|
|
RUNNER_NUM=${RUNNER_NUM:-1}
|
|
|
|
# Compute port numbers using bash arithmetic
|
|
ES_PORT=$((1200 + RUNNER_NUM * 10))
|
|
OS_PORT=$((1201 + RUNNER_NUM * 10))
|
|
INFINITY_THRIFT_PORT=$((23817 + RUNNER_NUM * 10))
|
|
INFINITY_HTTP_PORT=$((23820 + RUNNER_NUM * 10))
|
|
INFINITY_PSQL_PORT=$((5432 + RUNNER_NUM * 10))
|
|
EXPOSE_MYSQL_PORT=$((5455 + RUNNER_NUM * 10))
|
|
MINIO_PORT=$((9000 + RUNNER_NUM * 10))
|
|
MINIO_CONSOLE_PORT=$((9001 + RUNNER_NUM * 10))
|
|
REDIS_PORT=$((6379 + RUNNER_NUM * 10))
|
|
TEI_PORT=$((6380 + RUNNER_NUM * 10))
|
|
KIBANA_PORT=$((6601 + RUNNER_NUM * 10))
|
|
SVR_HTTP_PORT=$((9380 + RUNNER_NUM * 10))
|
|
ADMIN_SVR_HTTP_PORT=$((9381 + RUNNER_NUM * 10))
|
|
SVR_MCP_PORT=$((9382 + RUNNER_NUM * 10))
|
|
SANDBOX_EXECUTOR_MANAGER_PORT=$((9385 + RUNNER_NUM * 10))
|
|
SVR_WEB_HTTP_PORT=$((80 + RUNNER_NUM * 10))
|
|
SVR_WEB_HTTPS_PORT=$((443 + RUNNER_NUM * 10))
|
|
|
|
# Persist computed ports into docker/.env so docker-compose uses the correct host bindings
|
|
echo "" >> docker/.env
|
|
echo -e "ES_PORT=${ES_PORT}" >> docker/.env
|
|
echo -e "OS_PORT=${OS_PORT}" >> docker/.env
|
|
echo -e "INFINITY_THRIFT_PORT=${INFINITY_THRIFT_PORT}" >> docker/.env
|
|
echo -e "INFINITY_HTTP_PORT=${INFINITY_HTTP_PORT}" >> docker/.env
|
|
echo -e "INFINITY_PSQL_PORT=${INFINITY_PSQL_PORT}" >> docker/.env
|
|
echo -e "EXPOSE_MYSQL_PORT=${EXPOSE_MYSQL_PORT}" >> docker/.env
|
|
echo -e "MINIO_PORT=${MINIO_PORT}" >> docker/.env
|
|
echo -e "MINIO_CONSOLE_PORT=${MINIO_CONSOLE_PORT}" >> docker/.env
|
|
echo -e "TEI_PORT=${TEI_PORT}" >> docker/.env
|
|
echo -e "KIBANA_PORT=${KIBANA_PORT}" >> docker/.env
|
|
echo -e "SVR_HTTP_PORT=${SVR_HTTP_PORT}" >> docker/.env
|
|
echo -e "ADMIN_SVR_HTTP_PORT=${ADMIN_SVR_HTTP_PORT}" >> docker/.env
|
|
echo -e "SVR_MCP_PORT=${SVR_MCP_PORT}" >> docker/.env
|
|
echo -e "SANDBOX_EXECUTOR_MANAGER_PORT=${SANDBOX_EXECUTOR_MANAGER_PORT}" >> docker/.env
|
|
echo -e "SVR_WEB_HTTP_PORT=${SVR_WEB_HTTP_PORT}" >> docker/.env
|
|
echo -e "SVR_WEB_HTTPS_PORT=${SVR_WEB_HTTPS_PORT}" >> docker/.env
|
|
|
|
echo -e "COMPOSE_PROFILES=\${COMPOSE_PROFILES},tei-cpu" >> docker/.env
|
|
echo -e "TEI_MODEL=BAAI/bge-small-en-v1.5" >> docker/.env
|
|
echo -e "RAGFLOW_IMAGE=${RAGFLOW_IMAGE}" >> docker/.env
|
|
echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> ${GITHUB_ENV}
|
|
|
|
# Patch entrypoint.sh for coverage
|
|
sed -i '/"\$PY" api\/ragflow_server.py \${INIT_SUPERUSER_ARGS} &/c\ echo "Ensuring coverage is installed..."\n "$PY" -m pip install coverage\n export COVERAGE_FILE=/ragflow/logs/.coverage\n echo "Starting ragflow_server with coverage..."\n "$PY" -m coverage run --source=./api/apps --omit="*/tests/*,*/migrations/*" -a api/ragflow_server.py ${INIT_SUPERUSER_ARGS} &' docker/entrypoint.sh
|
|
|
|
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} up -d
|
|
uv sync --python 3.12 --group test --frozen && uv pip install -e sdk/python
|
|
|
|
- name: Run sdk tests against Elasticsearch
|
|
run: |
|
|
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
|
until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do
|
|
echo "Waiting for service to be available..."
|
|
sleep 5
|
|
done
|
|
source .venv/bin/activate && set -o pipefail; pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} --junitxml=pytest-infinity-sdk.xml --cov=sdk/python/ragflow_sdk --cov-branch --cov-report=xml:coverage-es-sdk.xml test/testcases/test_sdk_api 2>&1 | tee es_sdk_test.log
|
|
|
|
- name: Run web api tests against Elasticsearch
|
|
run: |
|
|
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
|
until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do
|
|
echo "Waiting for service to be available..."
|
|
sleep 5
|
|
done
|
|
source .venv/bin/activate && set -o pipefail; pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/test_web_api 2>&1 | tee es_web_api_test.log
|
|
|
|
- name: Run http api tests against Elasticsearch
|
|
run: |
|
|
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
|
until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do
|
|
echo "Waiting for service to be available..."
|
|
sleep 5
|
|
done
|
|
source .venv/bin/activate && set -o pipefail; pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/test_http_api 2>&1 | tee es_http_api_test.log
|
|
|
|
- name: RAGFlow CLI retrieval test Elasticsearch
|
|
env:
|
|
PYTHONPATH: ${{ github.workspace }}
|
|
run: |
|
|
set -euo pipefail
|
|
source .venv/bin/activate
|
|
|
|
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
|
|
|
EMAIL="ci-${GITHUB_RUN_ID}@example.com"
|
|
PASS="ci-pass-${GITHUB_RUN_ID}"
|
|
DATASET="ci_dataset_${GITHUB_RUN_ID}"
|
|
|
|
CLI="python admin/client/ragflow_cli.py"
|
|
|
|
LOG_FILE="es_cli_test.log"
|
|
: > "${LOG_FILE}"
|
|
|
|
ERROR_RE='Traceback|ModuleNotFoundError|ImportError|Parse error|Bad response|Fail to|code:\\s*[1-9]'
|
|
run_cli() {
|
|
local logfile="$1"
|
|
shift
|
|
local allow_re=""
|
|
if [[ "${1:-}" == "--allow" ]]; then
|
|
allow_re="$2"
|
|
shift 2
|
|
fi
|
|
local cmd_display="$*"
|
|
echo "===== $(date -u +\"%Y-%m-%dT%H:%M:%SZ\") CMD: ${cmd_display} =====" | tee -a "${logfile}"
|
|
local tmp_log
|
|
tmp_log="$(mktemp)"
|
|
set +e
|
|
timeout 180s "$@" 2>&1 | tee "${tmp_log}"
|
|
local status=${PIPESTATUS[0]}
|
|
set -e
|
|
cat "${tmp_log}" >> "${logfile}"
|
|
if grep -qiE "${ERROR_RE}" "${tmp_log}"; then
|
|
if [[ -n "${allow_re}" ]] && grep -qiE "${allow_re}" "${tmp_log}"; then
|
|
echo "Allowed CLI error markers in ${logfile}"
|
|
rm -f "${tmp_log}"
|
|
return 0
|
|
fi
|
|
echo "Detected CLI error markers in ${logfile}"
|
|
rm -f "${tmp_log}"
|
|
exit 1
|
|
fi
|
|
rm -f "${tmp_log}"
|
|
return ${status}
|
|
}
|
|
|
|
set -a
|
|
source docker/.env
|
|
set +a
|
|
|
|
HOST_ADDRESS="http://host.docker.internal:${SVR_HTTP_PORT}"
|
|
USER_HOST="$(echo "${HOST_ADDRESS}" | sed -E 's#^https?://([^:/]+).*#\1#')"
|
|
USER_PORT="${SVR_HTTP_PORT}"
|
|
ADMIN_HOST="${USER_HOST}"
|
|
ADMIN_PORT="${ADMIN_SVR_HTTP_PORT}"
|
|
|
|
until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do
|
|
echo "Waiting for service to be available..."
|
|
sleep 5
|
|
done
|
|
|
|
admin_ready=0
|
|
for i in $(seq 1 30); do
|
|
if run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "ping"; then
|
|
admin_ready=1
|
|
break
|
|
fi
|
|
sleep 1
|
|
done
|
|
if [[ "${admin_ready}" -ne 1 ]]; then
|
|
echo "Admin service did not become ready"
|
|
exit 1
|
|
fi
|
|
|
|
run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "show version"
|
|
ALLOW_USER_EXISTS_RE='already exists|already exist|duplicate|already.*registered|exist(s)?'
|
|
run_cli "${LOG_FILE}" --allow "${ALLOW_USER_EXISTS_RE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "create user '$EMAIL' '$PASS'"
|
|
|
|
user_ready=0
|
|
for i in $(seq 1 30); do
|
|
if run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "ping"; then
|
|
user_ready=1
|
|
break
|
|
fi
|
|
sleep 1
|
|
done
|
|
if [[ "${user_ready}" -ne 1 ]]; then
|
|
echo "User service did not become ready"
|
|
exit 1
|
|
fi
|
|
|
|
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "show version"
|
|
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "create dataset '$DATASET' with embedding 'BAAI/bge-small-en-v1.5@Builtin' parser 'auto'"
|
|
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "import 'test/benchmark/test_docs/Doc1.pdf,test/benchmark/test_docs/Doc2.pdf' into dataset '$DATASET'"
|
|
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "parse dataset '$DATASET' sync"
|
|
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "Benchmark 16 100 search 'what are these documents about' on datasets '$DATASET'"
|
|
|
|
- name: Stop ragflow to save coverage Elasticsearch
|
|
if: ${{ !cancelled() }}
|
|
run: |
|
|
# Send SIGINT to ragflow_server.py to trigger coverage save
|
|
PID=$(sudo docker exec ${RAGFLOW_CONTAINER} ps aux | grep "ragflow_server.py" | grep -v grep | awk '{print $2}' | head -n 1)
|
|
if [ -n "$PID" ]; then
|
|
echo "Sending SIGINT to ragflow_server.py (PID: $PID)..."
|
|
sudo docker exec ${RAGFLOW_CONTAINER} kill -INT $PID
|
|
# Wait for process to exit and coverage file to be written
|
|
sleep 10
|
|
else
|
|
echo "ragflow_server.py not found!"
|
|
fi
|
|
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} stop
|
|
|
|
- name: Generate server coverage report Elasticsearch
|
|
if: ${{ !cancelled() }}
|
|
run: |
|
|
# .coverage file should be in docker/ragflow-logs/.coverage
|
|
if [ -f docker/ragflow-logs/.coverage ]; then
|
|
echo "Found .coverage file"
|
|
cp docker/ragflow-logs/.coverage .coverage
|
|
source .venv/bin/activate
|
|
# Create .coveragerc to map container paths to host paths
|
|
echo "[paths]" > .coveragerc
|
|
echo "source =" >> .coveragerc
|
|
echo " ." >> .coveragerc
|
|
echo " /ragflow" >> .coveragerc
|
|
coverage xml -o coverage-es-server.xml
|
|
rm .coveragerc
|
|
# Clean up for next run
|
|
sudo rm docker/ragflow-logs/.coverage
|
|
else
|
|
echo ".coverage file not found!"
|
|
fi
|
|
|
|
- name: Collect ragflow log Elasticsearch
|
|
if: ${{ !cancelled() }}
|
|
run: |
|
|
if [ -d docker/ragflow-logs ]; then
|
|
cp -r docker/ragflow-logs ${ARTIFACTS_DIR}/ragflow-logs-es
|
|
echo "ragflow log" && tail -n 200 docker/ragflow-logs/ragflow_server.log || true
|
|
else
|
|
echo "No docker/ragflow-logs directory found; skipping log collection"
|
|
fi
|
|
sudo rm -rf docker/ragflow-logs || true
|
|
|
|
- name: Stop ragflow:nightly
|
|
if: always() # always run this step even if previous steps failed
|
|
run: |
|
|
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} down -v || true
|
|
sudo docker ps -a --filter "label=com.docker.compose.project=${GITHUB_RUN_ID}" -q | xargs -r sudo docker rm -f
|
|
|
|
- name: Start ragflow:nightly
|
|
run: |
|
|
sed -i '1i DOC_ENGINE=infinity' docker/.env
|
|
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} up -d
|
|
|
|
- name: Run sdk tests against Infinity
|
|
run: |
|
|
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
|
until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do
|
|
echo "Waiting for service to be available..."
|
|
sleep 5
|
|
done
|
|
source .venv/bin/activate && set -o pipefail; DOC_ENGINE=infinity pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} --junitxml=pytest-infinity-sdk.xml --cov=sdk/python/ragflow_sdk --cov-branch --cov-report=xml:coverage-infinity-sdk.xml test/testcases/test_sdk_api 2>&1 | tee infinity_sdk_test.log
|
|
|
|
- name: Run web api tests against Infinity
|
|
run: |
|
|
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
|
until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do
|
|
echo "Waiting for service to be available..."
|
|
sleep 5
|
|
done
|
|
source .venv/bin/activate && set -o pipefail; DOC_ENGINE=infinity pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/test_web_api/test_api_app 2>&1 | tee infinity_web_api_test.log
|
|
|
|
- name: Run http api tests against Infinity
|
|
run: |
|
|
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
|
until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do
|
|
echo "Waiting for service to be available..."
|
|
sleep 5
|
|
done
|
|
source .venv/bin/activate && set -o pipefail; DOC_ENGINE=infinity pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/test_http_api 2>&1 | tee infinity_http_api_test.log
|
|
|
|
- name: RAGFlow CLI retrieval test Infinity
|
|
env:
|
|
PYTHONPATH: ${{ github.workspace }}
|
|
run: |
|
|
set -euo pipefail
|
|
source .venv/bin/activate
|
|
|
|
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
|
|
|
EMAIL="ci-${GITHUB_RUN_ID}@example.com"
|
|
PASS="ci-pass-${GITHUB_RUN_ID}"
|
|
DATASET="ci_dataset_${GITHUB_RUN_ID}"
|
|
|
|
CLI="python admin/client/ragflow_cli.py"
|
|
|
|
LOG_FILE="infinity_cli_test.log"
|
|
: > "${LOG_FILE}"
|
|
|
|
ERROR_RE='Traceback|ModuleNotFoundError|ImportError|Parse error|Bad response|Fail to|code:\\s*[1-9]'
|
|
run_cli() {
|
|
local logfile="$1"
|
|
shift
|
|
local allow_re=""
|
|
if [[ "${1:-}" == "--allow" ]]; then
|
|
allow_re="$2"
|
|
shift 2
|
|
fi
|
|
local cmd_display="$*"
|
|
echo "===== $(date -u +\"%Y-%m-%dT%H:%M:%SZ\") CMD: ${cmd_display} =====" | tee -a "${logfile}"
|
|
local tmp_log
|
|
tmp_log="$(mktemp)"
|
|
set +e
|
|
timeout 180s "$@" 2>&1 | tee "${tmp_log}"
|
|
local status=${PIPESTATUS[0]}
|
|
set -e
|
|
cat "${tmp_log}" >> "${logfile}"
|
|
if grep -qiE "${ERROR_RE}" "${tmp_log}"; then
|
|
if [[ -n "${allow_re}" ]] && grep -qiE "${allow_re}" "${tmp_log}"; then
|
|
echo "Allowed CLI error markers in ${logfile}"
|
|
rm -f "${tmp_log}"
|
|
return 0
|
|
fi
|
|
echo "Detected CLI error markers in ${logfile}"
|
|
rm -f "${tmp_log}"
|
|
exit 1
|
|
fi
|
|
rm -f "${tmp_log}"
|
|
return ${status}
|
|
}
|
|
|
|
set -a
|
|
source docker/.env
|
|
set +a
|
|
|
|
HOST_ADDRESS="http://host.docker.internal:${SVR_HTTP_PORT}"
|
|
USER_HOST="$(echo "${HOST_ADDRESS}" | sed -E 's#^https?://([^:/]+).*#\1#')"
|
|
USER_PORT="${SVR_HTTP_PORT}"
|
|
ADMIN_HOST="${USER_HOST}"
|
|
ADMIN_PORT="${ADMIN_SVR_HTTP_PORT}"
|
|
|
|
until sudo docker exec ${RAGFLOW_CONTAINER} curl -s --connect-timeout 5 ${HOST_ADDRESS}/v1/system/ping > /dev/null; do
|
|
echo "Waiting for service to be available..."
|
|
sleep 5
|
|
done
|
|
|
|
admin_ready=0
|
|
for i in $(seq 1 30); do
|
|
if run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "ping"; then
|
|
admin_ready=1
|
|
break
|
|
fi
|
|
sleep 1
|
|
done
|
|
if [[ "${admin_ready}" -ne 1 ]]; then
|
|
echo "Admin service did not become ready"
|
|
exit 1
|
|
fi
|
|
|
|
run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "show version"
|
|
ALLOW_USER_EXISTS_RE='already exists|already exist|duplicate|already.*registered|exist(s)?'
|
|
run_cli "${LOG_FILE}" --allow "${ALLOW_USER_EXISTS_RE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "create user '$EMAIL' '$PASS'"
|
|
|
|
user_ready=0
|
|
for i in $(seq 1 30); do
|
|
if run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "ping"; then
|
|
user_ready=1
|
|
break
|
|
fi
|
|
sleep 1
|
|
done
|
|
if [[ "${user_ready}" -ne 1 ]]; then
|
|
echo "User service did not become ready"
|
|
exit 1
|
|
fi
|
|
|
|
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "show version"
|
|
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "create dataset '$DATASET' with embedding 'BAAI/bge-small-en-v1.5@Builtin' parser 'auto'"
|
|
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "import 'test/benchmark/test_docs/Doc1.pdf,test/benchmark/test_docs/Doc2.pdf' into dataset '$DATASET'"
|
|
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "parse dataset '$DATASET' sync"
|
|
run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "Benchmark 16 100 search 'what are these documents about' on datasets '$DATASET'"
|
|
|
|
- name: Stop ragflow to save coverage Infinity
|
|
if: ${{ !cancelled() }}
|
|
run: |
|
|
# Send SIGINT to ragflow_server.py to trigger coverage save
|
|
PID=$(sudo docker exec ${RAGFLOW_CONTAINER} ps aux | grep "ragflow_server.py" | grep -v grep | awk '{print $2}' | head -n 1)
|
|
if [ -n "$PID" ]; then
|
|
echo "Sending SIGINT to ragflow_server.py (PID: $PID)..."
|
|
sudo docker exec ${RAGFLOW_CONTAINER} kill -INT $PID
|
|
# Wait for process to exit and coverage file to be written
|
|
sleep 10
|
|
else
|
|
echo "ragflow_server.py not found!"
|
|
fi
|
|
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} stop
|
|
|
|
- name: Generate server coverage report Infinity
|
|
if: ${{ !cancelled() }}
|
|
run: |
|
|
# .coverage file should be in docker/ragflow-logs/.coverage
|
|
if [ -f docker/ragflow-logs/.coverage ]; then
|
|
echo "Found .coverage file"
|
|
cp docker/ragflow-logs/.coverage .coverage
|
|
source .venv/bin/activate
|
|
# Create .coveragerc to map container paths to host paths
|
|
echo "[paths]" > .coveragerc
|
|
echo "source =" >> .coveragerc
|
|
echo " ." >> .coveragerc
|
|
echo " /ragflow" >> .coveragerc
|
|
coverage xml -o coverage-infinity-server.xml
|
|
rm .coveragerc
|
|
else
|
|
echo ".coverage file not found!"
|
|
fi
|
|
|
|
- name: Upload coverage reports to Codecov
|
|
uses: codecov/codecov-action@v5
|
|
if: ${{ !cancelled() }}
|
|
with:
|
|
token: ${{ secrets.CODECOV_TOKEN }}
|
|
fail_ci_if_error: false
|
|
|
|
- name: Collect ragflow log
|
|
if: ${{ !cancelled() }}
|
|
run: |
|
|
if [ -d docker/ragflow-logs ]; then
|
|
cp -r docker/ragflow-logs ${ARTIFACTS_DIR}/ragflow-logs-infinity
|
|
echo "ragflow log" && tail -n 200 docker/ragflow-logs/ragflow_server.log || true
|
|
else
|
|
echo "No docker/ragflow-logs directory found; skipping log collection"
|
|
fi
|
|
sudo rm -rf docker/ragflow-logs || true
|
|
|
|
- name: Stop ragflow:nightly
|
|
if: always() # always run this step even if previous steps failed
|
|
run: |
|
|
# Sometimes `docker compose down` fail due to hang container, heavy load etc. Need to remove such containers to release resources(for example, listen ports).
|
|
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} down -v || true
|
|
sudo docker ps -a --filter "label=com.docker.compose.project=${GITHUB_RUN_ID}" -q | xargs -r sudo docker rm -f
|
|
if [[ -n ${RAGFLOW_IMAGE} ]]; then
|
|
sudo docker rmi -f ${RAGFLOW_IMAGE}
|
|
fi
|