mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-04 03:25:30 +08:00
Compare commits
209 Commits
b0b866c8fd
...
v0.21.1
| Author | SHA1 | Date | |
|---|---|---|---|
| de24e74b4c | |||
| 83e80e3d7f | |||
| ea73f13ebf | |||
| af6eabad0e | |||
| 5fb5a51b2e | |||
| 37004ecfb3 | |||
| 6d333ec4bc | |||
| ac188b0486 | |||
| adeb9d87e2 | |||
| d121033208 | |||
| 494f84cd69 | |||
| f24d464a53 | |||
| 484c536f2e | |||
| f7112acd97 | |||
| de4f75dcd8 | |||
| 15fff5724e | |||
| d616354d66 | |||
| 1bad24e3ab | |||
| 4910146149 | |||
| 0e549e96ee | |||
| 318cb7d792 | |||
| 4d1255b231 | |||
| b30f0be858 | |||
| a82e9b3d91 | |||
| 02a452993e | |||
| 307cdc62ea | |||
| 2d491188b8 | |||
| acc0f7396e | |||
| 9a4cd81891 | |||
| 1694f32e8e | |||
| 41fade3fe6 | |||
| 8d333f3590 | |||
| cd77425b87 | |||
| 544c9990e3 | |||
| 41a647fe32 | |||
| 594bf485d4 | |||
| 863c3e3d9c | |||
| 1767039be3 | |||
| cd75fa02b1 | |||
| cfdd37820a | |||
| 9d12380806 | |||
| 866098634b | |||
| 8013505daf | |||
| deb81810e9 | |||
| 6ab96287c9 | |||
| aaa4776657 | |||
| 5b2e5dd334 | |||
| de46b0d46e | |||
| cc703da747 | |||
| d956a442ce | |||
| 5fc59a3132 | |||
| 1d955507e9 | |||
| cf09c2260a | |||
| c9b18cbe18 | |||
| 8123942ec1 | |||
| 685114d253 | |||
| c9e56d20cf | |||
| 8ee0b6ea54 | |||
| f50b2461cb | |||
| 617faee718 | |||
| b15643bd80 | |||
| f12290f04b | |||
| 15838a6673 | |||
| 39ad9490ac | |||
| 387baf858f | |||
| 2dba858c84 | |||
| 43ea312144 | |||
| ce05696d95 | |||
| 0f62bfda21 | |||
| 70ffe2b4e8 | |||
| e76db6e222 | |||
| 7b664b5a84 | |||
| 8a41057236 | |||
| 447041d265 | |||
| f0375c4acd | |||
| 8af769de41 | |||
| f808bc32ba | |||
| e8cb1d8fc4 | |||
| 4e86ee4ff9 | |||
| c99034f717 | |||
| 86b254d214 | |||
| 1c38f4cefb | |||
| 74c195cd36 | |||
| e48bec1cbf | |||
| 205a5eb9f5 | |||
| 8844826208 | |||
| 8fe4281d81 | |||
| fb1bedbd3c | |||
| 6e55b9146c | |||
| 071ea9c493 | |||
| 5037a28e4d | |||
| fdac4afd10 | |||
| 769d701f56 | |||
| 8b512cdadf | |||
| 3ae126836a | |||
| e8bfda6020 | |||
| 34c54cd459 | |||
| 3d873d98fb | |||
| fbe25b5add | |||
| 0c6c7c8fe7 | |||
| e266f9a66f | |||
| fde6e5ab39 | |||
| 67529825e2 | |||
| 738a7d5c24 | |||
| 83ec915d51 | |||
| e535099f36 | |||
| 16b5feadb7 | |||
| 960f47c4d4 | |||
| 51139de178 | |||
| 1f5167f1ca | |||
| 578ea34b3e | |||
| 5fb3d2f55c | |||
| d99d1e3518 | |||
| 5b387b68ba | |||
| f92a45dcc4 | |||
| c4b8e4845c | |||
| 87659dcd3a | |||
| 6fd9508017 | |||
| 113851a692 | |||
| 66c69d10fe | |||
| 781d49cd0e | |||
| aaae938f54 | |||
| 9e73f799b2 | |||
| 21a62130c8 | |||
| 68e47c81d4 | |||
| f11d8af936 | |||
| 74ec734d69 | |||
| 8c75803b70 | |||
| ff4239c7cf | |||
| cf5867b146 | |||
| 77481ab3ab | |||
| 9c53b3336a | |||
| 24481f0332 | |||
| 4e6b84bb41 | |||
| 65c3f0406c | |||
| 7fb8b30cc2 | |||
| acca3640f7 | |||
| 58836d84fe | |||
| ad56137a59 | |||
| 2828e321bc | |||
| 932781ea4e | |||
| 5200711441 | |||
| c21cea2038 | |||
| 6a0f448419 | |||
| 7d2f65671f | |||
| a0d5f81098 | |||
| 52f26f4643 | |||
| 313e92dd9b | |||
| fee757eb41 | |||
| b5ddc7ca05 | |||
| 534fa60b2a | |||
| 390b2b8f26 | |||
| 0283e4098f | |||
| 2cdba3d1e6 | |||
| eb0b37d7ee | |||
| 198e52e990 | |||
| a50ccf77f9 | |||
| deaf15a08b | |||
| 0d8791936e | |||
| 5d167cd772 | |||
| f35c5ed119 | |||
| fc46d6bb87 | |||
| 8252b1c5c0 | |||
| c802a6ffdd | |||
| 9b06734ced | |||
| 6ab4c1a6e9 | |||
| f631073ac2 | |||
| 8aabc2807c | |||
| d931c33ced | |||
| f4324e89d9 | |||
| f04c9e2937 | |||
| 1fc2889f98 | |||
| ee0c38da66 | |||
| c1806e1ab2 | |||
| 66d0d44a00 | |||
| 2078d88c28 | |||
| 7734ad7fcd | |||
| 1a47e136e3 | |||
| cbf04ee470 | |||
| ef0aecea3b | |||
| dfc5fa1f4d | |||
| f341dc03b8 | |||
| 4585edc20e | |||
| dba9158f9a | |||
| 82f572ff95 | |||
| 518a00630e | |||
| aa61ae24dc | |||
| fb950079ef | |||
| aec8c15e7e | |||
| 7c620bdc69 | |||
| e7dde69584 | |||
| d6eded1959 | |||
| 80f851922a | |||
| 17757930a3 | |||
| a8883905a7 | |||
| 8426cbbd02 | |||
| 0b759f559c | |||
| 2d5d10ecbf | |||
| 954bd5a1c2 | |||
| ccb1c269e8 | |||
| 6dfb0c245c | |||
| 72d1047a8f | |||
| bece37e6c8 | |||
| 59cb0eb8bc | |||
| fc56217eb3 | |||
| 723cf9443e | |||
| bd94b5dfb5 | |||
| ef59c5bab9 | |||
| 62b7c655c5 |
18
.github/workflows/release.yml
vendored
18
.github/workflows/release.yml
vendored
@ -25,7 +25,7 @@ jobs:
|
|||||||
- name: Check out code
|
- name: Check out code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.MY_GITHUB_TOKEN }} # Use the secret as an environment variable
|
token: ${{ secrets.GITHUB_TOKEN }} # Use the secret as an environment variable
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
fetch-tags: true
|
fetch-tags: true
|
||||||
|
|
||||||
@ -69,7 +69,7 @@ jobs:
|
|||||||
# https://github.com/actions/upload-release-asset has been replaced by https://github.com/softprops/action-gh-release
|
# https://github.com/actions/upload-release-asset has been replaced by https://github.com/softprops/action-gh-release
|
||||||
uses: softprops/action-gh-release@v2
|
uses: softprops/action-gh-release@v2
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.MY_GITHUB_TOKEN }} # Use the secret as an environment variable
|
token: ${{ secrets.GITHUB_TOKEN }} # Use the secret as an environment variable
|
||||||
prerelease: ${{ env.PRERELEASE }}
|
prerelease: ${{ env.PRERELEASE }}
|
||||||
tag_name: ${{ env.RELEASE_TAG }}
|
tag_name: ${{ env.RELEASE_TAG }}
|
||||||
# The body field does not support environment variable substitution directly.
|
# The body field does not support environment variable substitution directly.
|
||||||
@ -120,3 +120,17 @@ jobs:
|
|||||||
packages-dir: sdk/python/dist/
|
packages-dir: sdk/python/dist/
|
||||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
password: ${{ secrets.PYPI_API_TOKEN }}
|
||||||
verbose: true
|
verbose: true
|
||||||
|
|
||||||
|
- name: Build ragflow-cli
|
||||||
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
|
run: |
|
||||||
|
cd admin/client && \
|
||||||
|
uv build
|
||||||
|
|
||||||
|
- name: Publish client package distributions to PyPI
|
||||||
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
|
uses: pypa/gh-action-pypi-publish@release/v1
|
||||||
|
with:
|
||||||
|
packages-dir: admin/client/dist/
|
||||||
|
password: ${{ secrets.PYPI_API_TOKEN }}
|
||||||
|
verbose: true
|
||||||
|
|||||||
48
.github/workflows/tests.yml
vendored
48
.github/workflows/tests.yml
vendored
@ -34,12 +34,10 @@ jobs:
|
|||||||
# https://github.com/hmarr/debug-action
|
# https://github.com/hmarr/debug-action
|
||||||
#- uses: hmarr/debug-action@v2
|
#- uses: hmarr/debug-action@v2
|
||||||
|
|
||||||
- name: Show who triggered this workflow
|
- name: Ensure workspace ownership
|
||||||
run: |
|
run: |
|
||||||
echo "Workflow triggered by ${{ github.event_name }}"
|
echo "Workflow triggered by ${{ github.event_name }}"
|
||||||
|
echo "chown -R $USER $GITHUB_WORKSPACE" && sudo chown -R $USER $GITHUB_WORKSPACE
|
||||||
- name: Ensure workspace ownership
|
|
||||||
run: echo "chown -R $USER $GITHUB_WORKSPACE" && sudo chown -R $USER $GITHUB_WORKSPACE
|
|
||||||
|
|
||||||
# https://github.com/actions/checkout/issues/1781
|
# https://github.com/actions/checkout/issues/1781
|
||||||
- name: Check out code
|
- name: Check out code
|
||||||
@ -48,6 +46,44 @@ jobs:
|
|||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
fetch-tags: true
|
fetch-tags: true
|
||||||
|
|
||||||
|
- name: Check workflow duplication
|
||||||
|
if: ${{ !cancelled() && !failure() && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci')) }}
|
||||||
|
run: |
|
||||||
|
if [[ ${{ github.event_name }} != 'pull_request' ]]; then
|
||||||
|
HEAD=$(git rev-parse HEAD)
|
||||||
|
# Find a PR that introduced a given commit
|
||||||
|
gh auth login --with-token <<< "${{ secrets.GITHUB_TOKEN }}"
|
||||||
|
PR_NUMBER=$(gh pr list --search ${HEAD} --state merged --json number --jq .[0].number)
|
||||||
|
echo "HEAD=${HEAD}"
|
||||||
|
echo "PR_NUMBER=${PR_NUMBER}"
|
||||||
|
if [[ -n ${PR_NUMBER} ]]; then
|
||||||
|
PR_SHA_FP=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/PR_${PR_NUMBER}
|
||||||
|
if [[ -f ${PR_SHA_FP} ]]; then
|
||||||
|
read -r PR_SHA PR_RUN_ID < "${PR_SHA_FP}"
|
||||||
|
# Calculate the hash of the current workspace content
|
||||||
|
HEAD_SHA=$(git rev-parse HEAD^{tree})
|
||||||
|
if [[ ${HEAD_SHA} == ${PR_SHA} ]]; then
|
||||||
|
echo "Cancel myself since the workspace content hash is the same with PR #${PR_NUMBER} merged. See ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${PR_RUN_ID} for details."
|
||||||
|
gh run cancel ${GITHUB_RUN_ID}
|
||||||
|
while true; do
|
||||||
|
status=$(gh run view ${GITHUB_RUN_ID} --json status -q .status)
|
||||||
|
[ "$status" = "completed" ] && break
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
PR_NUMBER=${{ github.event.pull_request.number }}
|
||||||
|
PR_SHA_FP=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/PR_${PR_NUMBER}
|
||||||
|
# Calculate the hash of the current workspace content
|
||||||
|
PR_SHA=$(git rev-parse HEAD^{tree})
|
||||||
|
echo "PR #${PR_NUMBER} workspace content hash: ${PR_SHA}"
|
||||||
|
mkdir -p ${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}
|
||||||
|
echo "${PR_SHA} ${GITHUB_RUN_ID}" > ${PR_SHA_FP}
|
||||||
|
fi
|
||||||
|
|
||||||
# https://github.com/astral-sh/ruff-action
|
# https://github.com/astral-sh/ruff-action
|
||||||
- name: Static check with Ruff
|
- name: Static check with Ruff
|
||||||
uses: astral-sh/ruff-action@v3
|
uses: astral-sh/ruff-action@v3
|
||||||
@ -59,11 +95,11 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-$HOME}
|
RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-$HOME}
|
||||||
sudo docker pull ubuntu:22.04
|
sudo docker pull ubuntu:22.04
|
||||||
sudo docker build --progress=plain --build-arg LIGHTEN=1 --build-arg NEED_MIRROR=1 -f Dockerfile -t infiniflow/ragflow:nightly-slim .
|
sudo DOCKER_BUILDKIT=1 docker build --build-arg LIGHTEN=1 --build-arg NEED_MIRROR=1 -f Dockerfile -t infiniflow/ragflow:nightly-slim .
|
||||||
|
|
||||||
- name: Build ragflow:nightly
|
- name: Build ragflow:nightly
|
||||||
run: |
|
run: |
|
||||||
sudo docker build --progress=plain --build-arg NEED_MIRROR=1 -f Dockerfile -t infiniflow/ragflow:nightly .
|
sudo DOCKER_BUILDKIT=1 docker build --build-arg NEED_MIRROR=1 -f Dockerfile -t infiniflow/ragflow:nightly .
|
||||||
|
|
||||||
- name: Start ragflow:nightly-slim
|
- name: Start ragflow:nightly-slim
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
2
.gitignore
vendored
2
.gitignore
vendored
@ -149,7 +149,7 @@ out
|
|||||||
# Nuxt.js build / generate output
|
# Nuxt.js build / generate output
|
||||||
.nuxt
|
.nuxt
|
||||||
dist
|
dist
|
||||||
|
ragflow_cli.egg-info
|
||||||
# Gatsby files
|
# Gatsby files
|
||||||
.cache/
|
.cache/
|
||||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||||
|
|||||||
@ -191,6 +191,7 @@ ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
|
|||||||
ENV PYTHONPATH=/ragflow/
|
ENV PYTHONPATH=/ragflow/
|
||||||
|
|
||||||
COPY web web
|
COPY web web
|
||||||
|
COPY admin admin
|
||||||
COPY api api
|
COPY api api
|
||||||
COPY conf conf
|
COPY conf conf
|
||||||
COPY deepdoc deepdoc
|
COPY deepdoc deepdoc
|
||||||
|
|||||||
22
README.md
22
README.md
@ -1,6 +1,6 @@
|
|||||||
<div align="center">
|
<div align="center">
|
||||||
<a href="https://demo.ragflow.io/">
|
<a href="https://demo.ragflow.io/">
|
||||||
<img src="web/src/assets/logo-with-text.png" width="520" alt="ragflow logo">
|
<img src="web/src/assets/logo-with-text.svg" width="520" alt="ragflow logo">
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -22,7 +22,7 @@
|
|||||||
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
||||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.20.5">
|
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.21.1">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
||||||
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
||||||
@ -84,8 +84,8 @@ Try our demo at [https://demo.ragflow.io](https://demo.ragflow.io).
|
|||||||
|
|
||||||
## 🔥 Latest Updates
|
## 🔥 Latest Updates
|
||||||
|
|
||||||
|
- 2025-10-15 Supports orchestrable ingestion pipeline.
|
||||||
- 2025-08-08 Supports OpenAI's latest GPT-5 series models.
|
- 2025-08-08 Supports OpenAI's latest GPT-5 series models.
|
||||||
- 2025-08-04 Supports new models, including Kimi K2 and Grok 4.
|
|
||||||
- 2025-08-01 Supports agentic workflow and MCP.
|
- 2025-08-01 Supports agentic workflow and MCP.
|
||||||
- 2025-05-23 Adds a Python/JavaScript code executor component to Agent.
|
- 2025-05-23 Adds a Python/JavaScript code executor component to Agent.
|
||||||
- 2025-05-05 Supports cross-language query.
|
- 2025-05-05 Supports cross-language query.
|
||||||
@ -135,7 +135,7 @@ releases! 🌟
|
|||||||
## 🔎 System Architecture
|
## 🔎 System Architecture
|
||||||
|
|
||||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
## 🎬 Get Started
|
## 🎬 Get Started
|
||||||
@ -187,7 +187,7 @@ releases! 🌟
|
|||||||
> All Docker images are built for x86 platforms. We don't currently offer Docker images for ARM64.
|
> All Docker images are built for x86 platforms. We don't currently offer Docker images for ARM64.
|
||||||
> If you are on an ARM64 platform, follow [this guide](https://ragflow.io/docs/dev/build_docker_image) to build a Docker image compatible with your system.
|
> If you are on an ARM64 platform, follow [this guide](https://ragflow.io/docs/dev/build_docker_image) to build a Docker image compatible with your system.
|
||||||
|
|
||||||
> The command below downloads the `v0.20.5-slim` edition of the RAGFlow Docker image. See the following table for descriptions of different RAGFlow editions. To download a RAGFlow edition different from `v0.20.5-slim`, update the `RAGFLOW_IMAGE` variable accordingly in **docker/.env** before using `docker compose` to start the server. For example: set `RAGFLOW_IMAGE=infiniflow/ragflow:v0.20.5` for the full edition `v0.20.5`.
|
> The command below downloads the `v0.21.1-slim` edition of the RAGFlow Docker image. See the following table for descriptions of different RAGFlow editions. To download a RAGFlow edition different from `v0.21.1-slim`, update the `RAGFLOW_IMAGE` variable accordingly in **docker/.env** before using `docker compose` to start the server. For example: set `RAGFLOW_IMAGE=infiniflow/ragflow:v0.21.1` for the full edition `v0.21.1`.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ cd ragflow/docker
|
$ cd ragflow/docker
|
||||||
@ -200,8 +200,8 @@ releases! 🌟
|
|||||||
|
|
||||||
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
||||||
|-------------------|-----------------|-----------------------|--------------------------|
|
|-------------------|-----------------|-----------------------|--------------------------|
|
||||||
| v0.20.5 | ≈9 | :heavy_check_mark: | Stable release |
|
| v0.21.1 | ≈9 | :heavy_check_mark: | Stable release |
|
||||||
| v0.20.5-slim | ≈2 | ❌ | Stable release |
|
| v0.21.1-slim | ≈2 | ❌ | Stable release |
|
||||||
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
||||||
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
||||||
|
|
||||||
@ -341,11 +341,13 @@ docker build --platform linux/amd64 -f Dockerfile -t infiniflow/ragflow:nightly
|
|||||||
5. If your operating system does not have jemalloc, please install it as follows:
|
5. If your operating system does not have jemalloc, please install it as follows:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# ubuntu
|
# Ubuntu
|
||||||
sudo apt-get install libjemalloc-dev
|
sudo apt-get install libjemalloc-dev
|
||||||
# centos
|
# CentOS
|
||||||
sudo yum install jemalloc
|
sudo yum install jemalloc
|
||||||
# mac
|
# OpenSUSE
|
||||||
|
sudo zypper install jemalloc
|
||||||
|
# macOS
|
||||||
sudo brew install jemalloc
|
sudo brew install jemalloc
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
14
README_id.md
14
README_id.md
@ -1,6 +1,6 @@
|
|||||||
<div align="center">
|
<div align="center">
|
||||||
<a href="https://demo.ragflow.io/">
|
<a href="https://demo.ragflow.io/">
|
||||||
<img src="web/src/assets/logo-with-text.png" width="520" alt="Logo ragflow">
|
<img src="web/src/assets/logo-with-text.svg" width="520" alt="Logo ragflow">
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -22,7 +22,7 @@
|
|||||||
<img alt="Lencana Daring" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
<img alt="Lencana Daring" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
||||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.20.5">
|
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.21.1">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
||||||
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Rilis%20Terbaru" alt="Rilis Terbaru">
|
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Rilis%20Terbaru" alt="Rilis Terbaru">
|
||||||
@ -80,8 +80,8 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io).
|
|||||||
|
|
||||||
## 🔥 Pembaruan Terbaru
|
## 🔥 Pembaruan Terbaru
|
||||||
|
|
||||||
|
- 2025-10-15 Dukungan untuk jalur data yang terorkestrasi.
|
||||||
- 2025-08-08 Mendukung model seri GPT-5 terbaru dari OpenAI.
|
- 2025-08-08 Mendukung model seri GPT-5 terbaru dari OpenAI.
|
||||||
- 2025-08-04 Mendukung model baru, termasuk Kimi K2 dan Grok 4.
|
|
||||||
- 2025-08-01 Mendukung alur kerja agen dan MCP.
|
- 2025-08-01 Mendukung alur kerja agen dan MCP.
|
||||||
- 2025-05-23 Menambahkan komponen pelaksana kode Python/JS ke Agen.
|
- 2025-05-23 Menambahkan komponen pelaksana kode Python/JS ke Agen.
|
||||||
- 2025-05-05 Mendukung kueri lintas bahasa.
|
- 2025-05-05 Mendukung kueri lintas bahasa.
|
||||||
@ -129,7 +129,7 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io).
|
|||||||
## 🔎 Arsitektur Sistem
|
## 🔎 Arsitektur Sistem
|
||||||
|
|
||||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
## 🎬 Mulai
|
## 🎬 Mulai
|
||||||
@ -181,7 +181,7 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io).
|
|||||||
> Semua gambar Docker dibangun untuk platform x86. Saat ini, kami tidak menawarkan gambar Docker untuk ARM64.
|
> Semua gambar Docker dibangun untuk platform x86. Saat ini, kami tidak menawarkan gambar Docker untuk ARM64.
|
||||||
> Jika Anda menggunakan platform ARM64, [silakan gunakan panduan ini untuk membangun gambar Docker yang kompatibel dengan sistem Anda](https://ragflow.io/docs/dev/build_docker_image).
|
> Jika Anda menggunakan platform ARM64, [silakan gunakan panduan ini untuk membangun gambar Docker yang kompatibel dengan sistem Anda](https://ragflow.io/docs/dev/build_docker_image).
|
||||||
|
|
||||||
> Perintah di bawah ini mengunduh edisi v0.20.5-slim dari gambar Docker RAGFlow. Silakan merujuk ke tabel berikut untuk deskripsi berbagai edisi RAGFlow. Untuk mengunduh edisi RAGFlow yang berbeda dari v0.20.5-slim, perbarui variabel RAGFLOW_IMAGE di docker/.env sebelum menggunakan docker compose untuk memulai server. Misalnya, atur RAGFLOW_IMAGE=infiniflow/ragflow:v0.20.5 untuk edisi lengkap v0.20.5.
|
> Perintah di bawah ini mengunduh edisi v0.21.1-slim dari gambar Docker RAGFlow. Silakan merujuk ke tabel berikut untuk deskripsi berbagai edisi RAGFlow. Untuk mengunduh edisi RAGFlow yang berbeda dari v0.21.1-slim, perbarui variabel RAGFLOW_IMAGE di docker/.env sebelum menggunakan docker compose untuk memulai server. Misalnya, atur RAGFLOW_IMAGE=infiniflow/ragflow:v0.21.1 untuk edisi lengkap v0.21.1.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ cd ragflow/docker
|
$ cd ragflow/docker
|
||||||
@ -194,8 +194,8 @@ $ docker compose -f docker-compose.yml up -d
|
|||||||
|
|
||||||
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
||||||
| ----------------- | --------------- | --------------------- | ------------------------ |
|
| ----------------- | --------------- | --------------------- | ------------------------ |
|
||||||
| v0.20.5 | ≈9 | :heavy_check_mark: | Stable release |
|
| v0.21.1 | ≈9 | :heavy_check_mark: | Stable release |
|
||||||
| v0.20.5-slim | ≈2 | ❌ | Stable release |
|
| v0.21.1-slim | ≈2 | ❌ | Stable release |
|
||||||
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
||||||
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
||||||
|
|
||||||
|
|||||||
14
README_ja.md
14
README_ja.md
@ -1,6 +1,6 @@
|
|||||||
<div align="center">
|
<div align="center">
|
||||||
<a href="https://demo.ragflow.io/">
|
<a href="https://demo.ragflow.io/">
|
||||||
<img src="web/src/assets/logo-with-text.png" width="350" alt="ragflow logo">
|
<img src="web/src/assets/logo-with-text.svg" width="350" alt="ragflow logo">
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -22,7 +22,7 @@
|
|||||||
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
||||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.20.5">
|
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.21.1">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
||||||
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
||||||
@ -60,8 +60,8 @@
|
|||||||
|
|
||||||
## 🔥 最新情報
|
## 🔥 最新情報
|
||||||
|
|
||||||
|
- 2025-10-15 オーケストレーションされたデータパイプラインのサポート。
|
||||||
- 2025-08-08 OpenAI の最新 GPT-5 シリーズモデルをサポートします。
|
- 2025-08-08 OpenAI の最新 GPT-5 シリーズモデルをサポートします。
|
||||||
- 2025-08-04 新モデル、キミK2およびGrok 4をサポート。
|
|
||||||
- 2025-08-01 エージェントワークフローとMCPをサポート。
|
- 2025-08-01 エージェントワークフローとMCPをサポート。
|
||||||
- 2025-05-23 エージェントに Python/JS コードエグゼキュータコンポーネントを追加しました。
|
- 2025-05-23 エージェントに Python/JS コードエグゼキュータコンポーネントを追加しました。
|
||||||
- 2025-05-05 言語間クエリをサポートしました。
|
- 2025-05-05 言語間クエリをサポートしました。
|
||||||
@ -109,7 +109,7 @@
|
|||||||
## 🔎 システム構成
|
## 🔎 システム構成
|
||||||
|
|
||||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
## 🎬 初期設定
|
## 🎬 初期設定
|
||||||
@ -160,7 +160,7 @@
|
|||||||
> 現在、公式に提供されているすべての Docker イメージは x86 アーキテクチャ向けにビルドされており、ARM64 用の Docker イメージは提供されていません。
|
> 現在、公式に提供されているすべての Docker イメージは x86 アーキテクチャ向けにビルドされており、ARM64 用の Docker イメージは提供されていません。
|
||||||
> ARM64 アーキテクチャのオペレーティングシステムを使用している場合は、[このドキュメント](https://ragflow.io/docs/dev/build_docker_image)を参照して Docker イメージを自分でビルドしてください。
|
> ARM64 アーキテクチャのオペレーティングシステムを使用している場合は、[このドキュメント](https://ragflow.io/docs/dev/build_docker_image)を参照して Docker イメージを自分でビルドしてください。
|
||||||
|
|
||||||
> 以下のコマンドは、RAGFlow Docker イメージの v0.20.5-slim エディションをダウンロードします。異なる RAGFlow エディションの説明については、以下の表を参照してください。v0.20.5-slim とは異なるエディションをダウンロードするには、docker/.env ファイルの RAGFLOW_IMAGE 変数を適宜更新し、docker compose を使用してサーバーを起動してください。例えば、完全版 v0.20.5 をダウンロードするには、RAGFLOW_IMAGE=infiniflow/ragflow:v0.20.5 と設定します。
|
> 以下のコマンドは、RAGFlow Docker イメージの v0.21.1-slim エディションをダウンロードします。異なる RAGFlow エディションの説明については、以下の表を参照してください。v0.21.1-slim とは異なるエディションをダウンロードするには、docker/.env ファイルの RAGFLOW_IMAGE 変数を適宜更新し、docker compose を使用してサーバーを起動してください。例えば、完全版 v0.21.1 をダウンロードするには、RAGFLOW_IMAGE=infiniflow/ragflow:v0.21.1 と設定します。
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ cd ragflow/docker
|
$ cd ragflow/docker
|
||||||
@ -173,8 +173,8 @@
|
|||||||
|
|
||||||
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
||||||
| ----------------- | --------------- | --------------------- | ------------------------ |
|
| ----------------- | --------------- | --------------------- | ------------------------ |
|
||||||
| v0.20.5 | ≈9 | :heavy_check_mark: | Stable release |
|
| v0.21.1 | ≈9 | :heavy_check_mark: | Stable release |
|
||||||
| v0.20.5-slim | ≈2 | ❌ | Stable release |
|
| v0.21.1-slim | ≈2 | ❌ | Stable release |
|
||||||
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
||||||
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
||||||
|
|
||||||
|
|||||||
14
README_ko.md
14
README_ko.md
@ -1,6 +1,6 @@
|
|||||||
<div align="center">
|
<div align="center">
|
||||||
<a href="https://demo.ragflow.io/">
|
<a href="https://demo.ragflow.io/">
|
||||||
<img src="web/src/assets/logo-with-text.png" width="520" alt="ragflow logo">
|
<img src="web/src/assets/logo-with-text.svg" width="520" alt="ragflow logo">
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -22,7 +22,7 @@
|
|||||||
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
||||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.20.5">
|
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.21.1">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
||||||
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
||||||
@ -60,8 +60,8 @@
|
|||||||
|
|
||||||
## 🔥 업데이트
|
## 🔥 업데이트
|
||||||
|
|
||||||
|
- 2025-10-15 조정된 데이터 파이프라인 지원.
|
||||||
- 2025-08-08 OpenAI의 최신 GPT-5 시리즈 모델을 지원합니다.
|
- 2025-08-08 OpenAI의 최신 GPT-5 시리즈 모델을 지원합니다.
|
||||||
- 2025-08-04 새로운 모델인 Kimi K2와 Grok 4를 포함하여 지원합니다.
|
|
||||||
- 2025-08-01 에이전트 워크플로우와 MCP를 지원합니다.
|
- 2025-08-01 에이전트 워크플로우와 MCP를 지원합니다.
|
||||||
- 2025-05-23 Agent에 Python/JS 코드 실행기 구성 요소를 추가합니다.
|
- 2025-05-23 Agent에 Python/JS 코드 실행기 구성 요소를 추가합니다.
|
||||||
- 2025-05-05 언어 간 쿼리를 지원합니다.
|
- 2025-05-05 언어 간 쿼리를 지원합니다.
|
||||||
@ -109,7 +109,7 @@
|
|||||||
## 🔎 시스템 아키텍처
|
## 🔎 시스템 아키텍처
|
||||||
|
|
||||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
## 🎬 시작하기
|
## 🎬 시작하기
|
||||||
@ -160,7 +160,7 @@
|
|||||||
> 모든 Docker 이미지는 x86 플랫폼을 위해 빌드되었습니다. 우리는 현재 ARM64 플랫폼을 위한 Docker 이미지를 제공하지 않습니다.
|
> 모든 Docker 이미지는 x86 플랫폼을 위해 빌드되었습니다. 우리는 현재 ARM64 플랫폼을 위한 Docker 이미지를 제공하지 않습니다.
|
||||||
> ARM64 플랫폼을 사용 중이라면, [시스템과 호환되는 Docker 이미지를 빌드하려면 이 가이드를 사용해 주세요](https://ragflow.io/docs/dev/build_docker_image).
|
> ARM64 플랫폼을 사용 중이라면, [시스템과 호환되는 Docker 이미지를 빌드하려면 이 가이드를 사용해 주세요](https://ragflow.io/docs/dev/build_docker_image).
|
||||||
|
|
||||||
> 아래 명령어는 RAGFlow Docker 이미지의 v0.20.5-slim 버전을 다운로드합니다. 다양한 RAGFlow 버전에 대한 설명은 다음 표를 참조하십시오. v0.20.5-slim과 다른 RAGFlow 버전을 다운로드하려면, docker/.env 파일에서 RAGFLOW_IMAGE 변수를 적절히 업데이트한 후 docker compose를 사용하여 서버를 시작하십시오. 예를 들어, 전체 버전인 v0.20.5을 다운로드하려면 RAGFLOW_IMAGE=infiniflow/ragflow:v0.20.5로 설정합니다.
|
> 아래 명령어는 RAGFlow Docker 이미지의 v0.21.1-slim 버전을 다운로드합니다. 다양한 RAGFlow 버전에 대한 설명은 다음 표를 참조하십시오. v0.21.1-slim과 다른 RAGFlow 버전을 다운로드하려면, docker/.env 파일에서 RAGFLOW_IMAGE 변수를 적절히 업데이트한 후 docker compose를 사용하여 서버를 시작하십시오. 예를 들어, 전체 버전인 v0.21.1을 다운로드하려면 RAGFLOW_IMAGE=infiniflow/ragflow:v0.21.1로 설정합니다.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ cd ragflow/docker
|
$ cd ragflow/docker
|
||||||
@ -173,8 +173,8 @@
|
|||||||
|
|
||||||
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
||||||
| ----------------- | --------------- | --------------------- | ------------------------ |
|
| ----------------- | --------------- | --------------------- | ------------------------ |
|
||||||
| v0.20.5 | ≈9 | :heavy_check_mark: | Stable release |
|
| v0.21.1 | ≈9 | :heavy_check_mark: | Stable release |
|
||||||
| v0.20.5-slim | ≈2 | ❌ | Stable release |
|
| v0.21.1-slim | ≈2 | ❌ | Stable release |
|
||||||
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
||||||
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
<div align="center">
|
<div align="center">
|
||||||
<a href="https://demo.ragflow.io/">
|
<a href="https://demo.ragflow.io/">
|
||||||
<img src="web/src/assets/logo-with-text.png" width="520" alt="ragflow logo">
|
<img src="web/src/assets/logo-with-text.svg" width="520" alt="ragflow logo">
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -22,7 +22,7 @@
|
|||||||
<img alt="Badge Estático" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
<img alt="Badge Estático" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
||||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.20.5">
|
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.21.1">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
||||||
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Última%20Relese" alt="Última Versão">
|
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Última%20Relese" alt="Última Versão">
|
||||||
@ -80,8 +80,8 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io).
|
|||||||
|
|
||||||
## 🔥 Últimas Atualizações
|
## 🔥 Últimas Atualizações
|
||||||
|
|
||||||
|
- 10-15-2025 Suporte para pipelines de dados orquestrados.
|
||||||
- 08-08-2025 Suporta a mais recente série GPT-5 da OpenAI.
|
- 08-08-2025 Suporta a mais recente série GPT-5 da OpenAI.
|
||||||
- 04-08-2025 Suporta novos modelos, incluindo Kimi K2 e Grok 4.
|
|
||||||
- 01-08-2025 Suporta fluxo de trabalho agente e MCP.
|
- 01-08-2025 Suporta fluxo de trabalho agente e MCP.
|
||||||
- 23-05-2025 Adicione o componente executor de código Python/JS ao Agente.
|
- 23-05-2025 Adicione o componente executor de código Python/JS ao Agente.
|
||||||
- 05-05-2025 Suporte a consultas entre idiomas.
|
- 05-05-2025 Suporte a consultas entre idiomas.
|
||||||
@ -129,7 +129,7 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io).
|
|||||||
## 🔎 Arquitetura do Sistema
|
## 🔎 Arquitetura do Sistema
|
||||||
|
|
||||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
## 🎬 Primeiros Passos
|
## 🎬 Primeiros Passos
|
||||||
@ -180,7 +180,7 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io).
|
|||||||
> Todas as imagens Docker são construídas para plataformas x86. Atualmente, não oferecemos imagens Docker para ARM64.
|
> Todas as imagens Docker são construídas para plataformas x86. Atualmente, não oferecemos imagens Docker para ARM64.
|
||||||
> Se você estiver usando uma plataforma ARM64, por favor, utilize [este guia](https://ragflow.io/docs/dev/build_docker_image) para construir uma imagem Docker compatível com o seu sistema.
|
> Se você estiver usando uma plataforma ARM64, por favor, utilize [este guia](https://ragflow.io/docs/dev/build_docker_image) para construir uma imagem Docker compatível com o seu sistema.
|
||||||
|
|
||||||
> O comando abaixo baixa a edição `v0.20.5-slim` da imagem Docker do RAGFlow. Consulte a tabela a seguir para descrições de diferentes edições do RAGFlow. Para baixar uma edição do RAGFlow diferente da `v0.20.5-slim`, atualize a variável `RAGFLOW_IMAGE` conforme necessário no **docker/.env** antes de usar `docker compose` para iniciar o servidor. Por exemplo: defina `RAGFLOW_IMAGE=infiniflow/ragflow:v0.20.5` para a edição completa `v0.20.5`.
|
> O comando abaixo baixa a edição `v0.21.1-slim` da imagem Docker do RAGFlow. Consulte a tabela a seguir para descrições de diferentes edições do RAGFlow. Para baixar uma edição do RAGFlow diferente da `v0.21.1-slim`, atualize a variável `RAGFLOW_IMAGE` conforme necessário no **docker/.env** antes de usar `docker compose` para iniciar o servidor. Por exemplo: defina `RAGFLOW_IMAGE=infiniflow/ragflow:v0.21.1` para a edição completa `v0.21.1`.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ cd ragflow/docker
|
$ cd ragflow/docker
|
||||||
@ -193,8 +193,8 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io).
|
|||||||
|
|
||||||
| Tag da imagem RAGFlow | Tamanho da imagem (GB) | Possui modelos de incorporação? | Estável? |
|
| Tag da imagem RAGFlow | Tamanho da imagem (GB) | Possui modelos de incorporação? | Estável? |
|
||||||
| --------------------- | ---------------------- | ------------------------------- | ------------------------ |
|
| --------------------- | ---------------------- | ------------------------------- | ------------------------ |
|
||||||
| v0.20.5 | ~9 | :heavy_check_mark: | Lançamento estável |
|
| v0.21.1 | ~9 | :heavy_check_mark: | Lançamento estável |
|
||||||
| v0.20.5-slim | ~2 | ❌ | Lançamento estável |
|
| v0.21.1-slim | ~2 | ❌ | Lançamento estável |
|
||||||
| nightly | ~9 | :heavy_check_mark: | _Instável_ build noturno |
|
| nightly | ~9 | :heavy_check_mark: | _Instável_ build noturno |
|
||||||
| nightly-slim | ~2 | ❌ | _Instável_ build noturno |
|
| nightly-slim | ~2 | ❌ | _Instável_ build noturno |
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
<div align="center">
|
<div align="center">
|
||||||
<a href="https://demo.ragflow.io/">
|
<a href="https://demo.ragflow.io/">
|
||||||
<img src="web/src/assets/logo-with-text.png" width="350" alt="ragflow logo">
|
<img src="web/src/assets/logo-with-text.svg" width="350" alt="ragflow logo">
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -22,7 +22,7 @@
|
|||||||
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
||||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.20.5">
|
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.21.1">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
||||||
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
||||||
@ -83,8 +83,8 @@
|
|||||||
|
|
||||||
## 🔥 近期更新
|
## 🔥 近期更新
|
||||||
|
|
||||||
|
- 2025-10-15 支援可編排的資料管道。
|
||||||
- 2025-08-08 支援 OpenAI 最新的 GPT-5 系列模型。
|
- 2025-08-08 支援 OpenAI 最新的 GPT-5 系列模型。
|
||||||
- 2025-08-04 支援 Kimi K2 和 Grok 4 等模型.
|
|
||||||
- 2025-08-01 支援 agentic workflow 和 MCP
|
- 2025-08-01 支援 agentic workflow 和 MCP
|
||||||
- 2025-05-23 為 Agent 新增 Python/JS 程式碼執行器元件。
|
- 2025-05-23 為 Agent 新增 Python/JS 程式碼執行器元件。
|
||||||
- 2025-05-05 支援跨語言查詢。
|
- 2025-05-05 支援跨語言查詢。
|
||||||
@ -132,7 +132,7 @@
|
|||||||
## 🔎 系統架構
|
## 🔎 系統架構
|
||||||
|
|
||||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
## 🎬 快速開始
|
## 🎬 快速開始
|
||||||
@ -183,7 +183,7 @@
|
|||||||
> 所有 Docker 映像檔都是為 x86 平台建置的。目前,我們不提供 ARM64 平台的 Docker 映像檔。
|
> 所有 Docker 映像檔都是為 x86 平台建置的。目前,我們不提供 ARM64 平台的 Docker 映像檔。
|
||||||
> 如果您使用的是 ARM64 平台,請使用 [這份指南](https://ragflow.io/docs/dev/build_docker_image) 來建置適合您系統的 Docker 映像檔。
|
> 如果您使用的是 ARM64 平台,請使用 [這份指南](https://ragflow.io/docs/dev/build_docker_image) 來建置適合您系統的 Docker 映像檔。
|
||||||
|
|
||||||
> 執行以下指令會自動下載 RAGFlow slim Docker 映像 `v0.20.5-slim`。請參考下表查看不同 Docker 發行版的說明。如需下載不同於 `v0.20.5-slim` 的 Docker 映像,請在執行 `docker compose` 啟動服務之前先更新 **docker/.env** 檔案內的 `RAGFLOW_IMAGE` 變數。例如,你可以透過設定 `RAGFLOW_IMAGE=infiniflow/ragflow:v0.20.5` 來下載 RAGFlow 鏡像的 `v0.20.5` 完整發行版。
|
> 執行以下指令會自動下載 RAGFlow slim Docker 映像 `v0.21.1-slim`。請參考下表查看不同 Docker 發行版的說明。如需下載不同於 `v0.21.1-slim` 的 Docker 映像,請在執行 `docker compose` 啟動服務之前先更新 **docker/.env** 檔案內的 `RAGFLOW_IMAGE` 變數。例如,你可以透過設定 `RAGFLOW_IMAGE=infiniflow/ragflow:v0.21.1` 來下載 RAGFlow 鏡像的 `v0.21.1` 完整發行版。
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ cd ragflow/docker
|
$ cd ragflow/docker
|
||||||
@ -196,8 +196,8 @@
|
|||||||
|
|
||||||
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
||||||
| ----------------- | --------------- | --------------------- | ------------------------ |
|
| ----------------- | --------------- | --------------------- | ------------------------ |
|
||||||
| v0.20.5 | ≈9 | :heavy_check_mark: | Stable release |
|
| v0.21.1 | ≈9 | :heavy_check_mark: | Stable release |
|
||||||
| v0.20.5-slim | ≈2 | ❌ | Stable release |
|
| v0.21.1-slim | ≈2 | ❌ | Stable release |
|
||||||
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
||||||
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
||||||
|
|
||||||
|
|||||||
16
README_zh.md
16
README_zh.md
@ -1,6 +1,6 @@
|
|||||||
<div align="center">
|
<div align="center">
|
||||||
<a href="https://demo.ragflow.io/">
|
<a href="https://demo.ragflow.io/">
|
||||||
<img src="web/src/assets/logo-with-text.png" width="350" alt="ragflow logo">
|
<img src="web/src/assets/logo-with-text.svg" width="350" alt="ragflow logo">
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -22,7 +22,7 @@
|
|||||||
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
||||||
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.20.5">
|
<img src="https://img.shields.io/docker/pulls/infiniflow/ragflow?label=Docker%20Pulls&color=0db7ed&logo=docker&logoColor=white&style=flat-square" alt="docker pull infiniflow/ragflow:v0.21.1">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
||||||
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
||||||
@ -83,8 +83,8 @@
|
|||||||
|
|
||||||
## 🔥 近期更新
|
## 🔥 近期更新
|
||||||
|
|
||||||
- 2025-08-08 支持 OpenAI 最新的 GPT-5 系列模型.
|
- 2025-10-15 支持可编排的数据管道。
|
||||||
- 2025-08-04 新增对 Kimi K2 和 Grok 4 等模型的支持.
|
- 2025-08-08 支持 OpenAI 最新的 GPT-5 系列模型。
|
||||||
- 2025-08-01 支持 agentic workflow 和 MCP。
|
- 2025-08-01 支持 agentic workflow 和 MCP。
|
||||||
- 2025-05-23 Agent 新增 Python/JS 代码执行器组件。
|
- 2025-05-23 Agent 新增 Python/JS 代码执行器组件。
|
||||||
- 2025-05-05 支持跨语言查询。
|
- 2025-05-05 支持跨语言查询。
|
||||||
@ -132,7 +132,7 @@
|
|||||||
## 🔎 系统架构
|
## 🔎 系统架构
|
||||||
|
|
||||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
## 🎬 快速开始
|
## 🎬 快速开始
|
||||||
@ -183,7 +183,7 @@
|
|||||||
> 请注意,目前官方提供的所有 Docker 镜像均基于 x86 架构构建,并不提供基于 ARM64 的 Docker 镜像。
|
> 请注意,目前官方提供的所有 Docker 镜像均基于 x86 架构构建,并不提供基于 ARM64 的 Docker 镜像。
|
||||||
> 如果你的操作系统是 ARM64 架构,请参考[这篇文档](https://ragflow.io/docs/dev/build_docker_image)自行构建 Docker 镜像。
|
> 如果你的操作系统是 ARM64 架构,请参考[这篇文档](https://ragflow.io/docs/dev/build_docker_image)自行构建 Docker 镜像。
|
||||||
|
|
||||||
> 运行以下命令会自动下载 RAGFlow slim Docker 镜像 `v0.20.5-slim`。请参考下表查看不同 Docker 发行版的描述。如需下载不同于 `v0.20.5-slim` 的 Docker 镜像,请在运行 `docker compose` 启动服务之前先更新 **docker/.env** 文件内的 `RAGFLOW_IMAGE` 变量。比如,你可以通过设置 `RAGFLOW_IMAGE=infiniflow/ragflow:v0.20.5` 来下载 RAGFlow 镜像的 `v0.20.5` 完整发行版。
|
> 运行以下命令会自动下载 RAGFlow slim Docker 镜像 `v0.21.1-slim`。请参考下表查看不同 Docker 发行版的描述。如需下载不同于 `v0.21.1-slim` 的 Docker 镜像,请在运行 `docker compose` 启动服务之前先更新 **docker/.env** 文件内的 `RAGFLOW_IMAGE` 变量。比如,你可以通过设置 `RAGFLOW_IMAGE=infiniflow/ragflow:v0.21.1` 来下载 RAGFlow 镜像的 `v0.21.1` 完整发行版。
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ cd ragflow/docker
|
$ cd ragflow/docker
|
||||||
@ -196,8 +196,8 @@
|
|||||||
|
|
||||||
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
||||||
| ----------------- | --------------- | --------------------- | ------------------------ |
|
| ----------------- | --------------- | --------------------- | ------------------------ |
|
||||||
| v0.20.5 | ≈9 | :heavy_check_mark: | Stable release |
|
| v0.21.1 | ≈9 | :heavy_check_mark: | Stable release |
|
||||||
| v0.20.5-slim | ≈2 | ❌ | Stable release |
|
| v0.21.1-slim | ≈2 | ❌ | Stable release |
|
||||||
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
| nightly | ≈9 | :heavy_check_mark: | _Unstable_ nightly build |
|
||||||
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
| nightly-slim | ≈2 | ❌ | _Unstable_ nightly build |
|
||||||
|
|
||||||
|
|||||||
@ -1,567 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import base64
|
|
||||||
from Cryptodome.PublicKey import RSA
|
|
||||||
from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
|
|
||||||
from typing import Dict, List, Any
|
|
||||||
from lark import Lark, Transformer, Tree
|
|
||||||
import requests
|
|
||||||
from requests.auth import HTTPBasicAuth
|
|
||||||
|
|
||||||
GRAMMAR = r"""
|
|
||||||
start: command
|
|
||||||
|
|
||||||
command: sql_command | meta_command
|
|
||||||
|
|
||||||
sql_command: list_services
|
|
||||||
| show_service
|
|
||||||
| startup_service
|
|
||||||
| shutdown_service
|
|
||||||
| restart_service
|
|
||||||
| list_users
|
|
||||||
| show_user
|
|
||||||
| drop_user
|
|
||||||
| alter_user
|
|
||||||
| create_user
|
|
||||||
| activate_user
|
|
||||||
| list_datasets
|
|
||||||
| list_agents
|
|
||||||
|
|
||||||
// meta command definition
|
|
||||||
meta_command: "\\" meta_command_name [meta_args]
|
|
||||||
|
|
||||||
meta_command_name: /[a-zA-Z?]+/
|
|
||||||
meta_args: (meta_arg)+
|
|
||||||
|
|
||||||
meta_arg: /[^\\s"']+/ | quoted_string
|
|
||||||
|
|
||||||
// command definition
|
|
||||||
|
|
||||||
LIST: "LIST"i
|
|
||||||
SERVICES: "SERVICES"i
|
|
||||||
SHOW: "SHOW"i
|
|
||||||
CREATE: "CREATE"i
|
|
||||||
SERVICE: "SERVICE"i
|
|
||||||
SHUTDOWN: "SHUTDOWN"i
|
|
||||||
STARTUP: "STARTUP"i
|
|
||||||
RESTART: "RESTART"i
|
|
||||||
USERS: "USERS"i
|
|
||||||
DROP: "DROP"i
|
|
||||||
USER: "USER"i
|
|
||||||
ALTER: "ALTER"i
|
|
||||||
ACTIVE: "ACTIVE"i
|
|
||||||
PASSWORD: "PASSWORD"i
|
|
||||||
DATASETS: "DATASETS"i
|
|
||||||
OF: "OF"i
|
|
||||||
AGENTS: "AGENTS"i
|
|
||||||
|
|
||||||
list_services: LIST SERVICES ";"
|
|
||||||
show_service: SHOW SERVICE NUMBER ";"
|
|
||||||
startup_service: STARTUP SERVICE NUMBER ";"
|
|
||||||
shutdown_service: SHUTDOWN SERVICE NUMBER ";"
|
|
||||||
restart_service: RESTART SERVICE NUMBER ";"
|
|
||||||
|
|
||||||
list_users: LIST USERS ";"
|
|
||||||
drop_user: DROP USER quoted_string ";"
|
|
||||||
alter_user: ALTER USER PASSWORD quoted_string quoted_string ";"
|
|
||||||
show_user: SHOW USER quoted_string ";"
|
|
||||||
create_user: CREATE USER quoted_string quoted_string ";"
|
|
||||||
activate_user: ALTER USER ACTIVE quoted_string status ";"
|
|
||||||
|
|
||||||
list_datasets: LIST DATASETS OF quoted_string ";"
|
|
||||||
list_agents: LIST AGENTS OF quoted_string ";"
|
|
||||||
|
|
||||||
identifier: WORD
|
|
||||||
quoted_string: QUOTED_STRING
|
|
||||||
status: WORD
|
|
||||||
|
|
||||||
QUOTED_STRING: /'[^']+'/ | /"[^"]+"/
|
|
||||||
WORD: /[a-zA-Z0-9_\-\.]+/
|
|
||||||
NUMBER: /[0-9]+/
|
|
||||||
|
|
||||||
%import common.WS
|
|
||||||
%ignore WS
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class AdminTransformer(Transformer):
|
|
||||||
|
|
||||||
def start(self, items):
|
|
||||||
return items[0]
|
|
||||||
|
|
||||||
def command(self, items):
|
|
||||||
return items[0]
|
|
||||||
|
|
||||||
def list_services(self, items):
|
|
||||||
result = {'type': 'list_services'}
|
|
||||||
return result
|
|
||||||
|
|
||||||
def show_service(self, items):
|
|
||||||
service_id = int(items[2])
|
|
||||||
return {"type": "show_service", "number": service_id}
|
|
||||||
|
|
||||||
def startup_service(self, items):
|
|
||||||
service_id = int(items[2])
|
|
||||||
return {"type": "startup_service", "number": service_id}
|
|
||||||
|
|
||||||
def shutdown_service(self, items):
|
|
||||||
service_id = int(items[2])
|
|
||||||
return {"type": "shutdown_service", "number": service_id}
|
|
||||||
|
|
||||||
def restart_service(self, items):
|
|
||||||
service_id = int(items[2])
|
|
||||||
return {"type": "restart_service", "number": service_id}
|
|
||||||
|
|
||||||
def list_users(self, items):
|
|
||||||
return {"type": "list_users"}
|
|
||||||
|
|
||||||
def show_user(self, items):
|
|
||||||
user_name = items[2]
|
|
||||||
return {"type": "show_user", "username": user_name}
|
|
||||||
|
|
||||||
def drop_user(self, items):
|
|
||||||
user_name = items[2]
|
|
||||||
return {"type": "drop_user", "username": user_name}
|
|
||||||
|
|
||||||
def alter_user(self, items):
|
|
||||||
user_name = items[3]
|
|
||||||
new_password = items[4]
|
|
||||||
return {"type": "alter_user", "username": user_name, "password": new_password}
|
|
||||||
|
|
||||||
def create_user(self, items):
|
|
||||||
user_name = items[2]
|
|
||||||
password = items[3]
|
|
||||||
return {"type": "create_user", "username": user_name, "password": password, "role": "user"}
|
|
||||||
|
|
||||||
def activate_user(self, items):
|
|
||||||
user_name = items[3]
|
|
||||||
activate_status = items[4]
|
|
||||||
return {"type": "activate_user", "activate_status": activate_status, "username": user_name}
|
|
||||||
|
|
||||||
def list_datasets(self, items):
|
|
||||||
user_name = items[3]
|
|
||||||
return {"type": "list_datasets", "username": user_name}
|
|
||||||
|
|
||||||
def list_agents(self, items):
|
|
||||||
user_name = items[3]
|
|
||||||
return {"type": "list_agents", "username": user_name}
|
|
||||||
|
|
||||||
def meta_command(self, items):
|
|
||||||
command_name = str(items[0]).lower()
|
|
||||||
args = items[1:] if len(items) > 1 else []
|
|
||||||
|
|
||||||
# handle quoted parameter
|
|
||||||
parsed_args = []
|
|
||||||
for arg in args:
|
|
||||||
if hasattr(arg, 'value'):
|
|
||||||
parsed_args.append(arg.value)
|
|
||||||
else:
|
|
||||||
parsed_args.append(str(arg))
|
|
||||||
|
|
||||||
return {'type': 'meta', 'command': command_name, 'args': parsed_args}
|
|
||||||
|
|
||||||
def meta_command_name(self, items):
|
|
||||||
return items[0]
|
|
||||||
|
|
||||||
def meta_args(self, items):
|
|
||||||
return items
|
|
||||||
|
|
||||||
|
|
||||||
def encode_to_base64(input_string):
|
|
||||||
base64_encoded = base64.b64encode(input_string.encode('utf-8'))
|
|
||||||
return base64_encoded.decode('utf-8')
|
|
||||||
|
|
||||||
|
|
||||||
def encrypt(input_string):
|
|
||||||
pub = '-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArq9XTUSeYr2+N1h3Afl/z8Dse/2yD0ZGrKwx+EEEcdsBLca9Ynmx3nIB5obmLlSfmskLpBo0UACBmB5rEjBp2Q2f3AG3Hjd4B+gNCG6BDaawuDlgANIhGnaTLrIqWrrcm4EMzJOnAOI1fgzJRsOOUEfaS318Eq9OVO3apEyCCt0lOQK6PuksduOjVxtltDav+guVAA068NrPYmRNabVKRNLJpL8w4D44sfth5RvZ3q9t+6RTArpEtc5sh5ChzvqPOzKGMXW83C95TxmXqpbK6olN4RevSfVjEAgCydH6HN6OhtOQEcnrU97r9H0iZOWwbw3pVrZiUkuRD1R56Wzs2wIDAQAB\n-----END PUBLIC KEY-----'
|
|
||||||
pub_key = RSA.importKey(pub)
|
|
||||||
cipher = Cipher_pkcs1_v1_5.new(pub_key)
|
|
||||||
cipher_text = cipher.encrypt(base64.b64encode(input_string.encode('utf-8')))
|
|
||||||
return base64.b64encode(cipher_text).decode("utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
class AdminCommandParser:
|
|
||||||
def __init__(self):
|
|
||||||
self.parser = Lark(GRAMMAR, start='start', parser='lalr', transformer=AdminTransformer())
|
|
||||||
self.command_history = []
|
|
||||||
|
|
||||||
def parse_command(self, command_str: str) -> Dict[str, Any]:
|
|
||||||
if not command_str.strip():
|
|
||||||
return {'type': 'empty'}
|
|
||||||
|
|
||||||
self.command_history.append(command_str)
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = self.parser.parse(command_str)
|
|
||||||
return result
|
|
||||||
except Exception as e:
|
|
||||||
return {'type': 'error', 'message': f'Parse error: {str(e)}'}
|
|
||||||
|
|
||||||
|
|
||||||
class AdminCLI:
|
|
||||||
def __init__(self):
|
|
||||||
self.parser = AdminCommandParser()
|
|
||||||
self.is_interactive = False
|
|
||||||
self.admin_account = "admin@ragflow.io"
|
|
||||||
self.admin_password: str = "admin"
|
|
||||||
self.host: str = ""
|
|
||||||
self.port: int = 0
|
|
||||||
|
|
||||||
def verify_admin(self, args):
|
|
||||||
|
|
||||||
conn_info = self._parse_connection_args(args)
|
|
||||||
if 'error' in conn_info:
|
|
||||||
print(f"Error: {conn_info['error']}")
|
|
||||||
return
|
|
||||||
|
|
||||||
self.host = conn_info['host']
|
|
||||||
self.port = conn_info['port']
|
|
||||||
print(f"Attempt to access ip: {self.host}, port: {self.port}")
|
|
||||||
url = f'http://{self.host}:{self.port}/api/v1/admin/auth'
|
|
||||||
|
|
||||||
try_count = 0
|
|
||||||
while True:
|
|
||||||
try_count += 1
|
|
||||||
if try_count > 3:
|
|
||||||
return False
|
|
||||||
|
|
||||||
admin_passwd = input(f"password for {self.admin_account}: ").strip()
|
|
||||||
try:
|
|
||||||
self.admin_password = encode_to_base64(admin_passwd)
|
|
||||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
|
||||||
if response.status_code == 200:
|
|
||||||
res_json = response.json()
|
|
||||||
error_code = res_json.get('code', -1)
|
|
||||||
if error_code == 0:
|
|
||||||
print("Authentication successful.")
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
error_message = res_json.get('message', 'Unknown error')
|
|
||||||
print(f"Authentication failed: {error_message}, try again")
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
print(f"Bad response,status: {response.status_code}, try again")
|
|
||||||
except Exception:
|
|
||||||
print(f"Can't access {self.host}, port: {self.port}")
|
|
||||||
|
|
||||||
def _print_table_simple(self, data):
|
|
||||||
if not data:
|
|
||||||
print("No data to print")
|
|
||||||
return
|
|
||||||
if isinstance(data, dict):
|
|
||||||
# handle single row data
|
|
||||||
data = [data]
|
|
||||||
|
|
||||||
columns = list(data[0].keys())
|
|
||||||
col_widths = {}
|
|
||||||
|
|
||||||
for col in columns:
|
|
||||||
max_width = len(str(col))
|
|
||||||
for item in data:
|
|
||||||
value_len = len(str(item.get(col, '')))
|
|
||||||
if value_len > max_width:
|
|
||||||
max_width = value_len
|
|
||||||
col_widths[col] = max(2, max_width)
|
|
||||||
|
|
||||||
# Generate delimiter
|
|
||||||
separator = "+" + "+".join(["-" * (col_widths[col] + 2) for col in columns]) + "+"
|
|
||||||
|
|
||||||
# Print header
|
|
||||||
print(separator)
|
|
||||||
header = "|" + "|".join([f" {col:<{col_widths[col]}} " for col in columns]) + "|"
|
|
||||||
print(header)
|
|
||||||
print(separator)
|
|
||||||
|
|
||||||
# Print data
|
|
||||||
for item in data:
|
|
||||||
row = "|"
|
|
||||||
for col in columns:
|
|
||||||
value = str(item.get(col, ''))
|
|
||||||
if len(value) > col_widths[col]:
|
|
||||||
value = value[:col_widths[col] - 3] + "..."
|
|
||||||
row += f" {value:<{col_widths[col]}} |"
|
|
||||||
print(row)
|
|
||||||
|
|
||||||
print(separator)
|
|
||||||
|
|
||||||
def run_interactive(self):
|
|
||||||
|
|
||||||
self.is_interactive = True
|
|
||||||
print("RAGFlow Admin command line interface - Type '\\?' for help, '\\q' to quit")
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
command = input("admin> ").strip()
|
|
||||||
if not command:
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f"command: {command}")
|
|
||||||
result = self.parser.parse_command(command)
|
|
||||||
self.execute_command(result)
|
|
||||||
|
|
||||||
if isinstance(result, Tree):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if result.get('type') == 'meta' and result.get('command') in ['q', 'quit', 'exit']:
|
|
||||||
break
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("\nUse '\\q' to quit")
|
|
||||||
except EOFError:
|
|
||||||
print("\nGoodbye!")
|
|
||||||
break
|
|
||||||
|
|
||||||
def run_single_command(self, args):
|
|
||||||
conn_info = self._parse_connection_args(args)
|
|
||||||
if 'error' in conn_info:
|
|
||||||
print(f"Error: {conn_info['error']}")
|
|
||||||
return
|
|
||||||
|
|
||||||
def _parse_connection_args(self, args: List[str]) -> Dict[str, Any]:
|
|
||||||
parser = argparse.ArgumentParser(description='Admin CLI Client', add_help=False)
|
|
||||||
parser.add_argument('-h', '--host', default='localhost', help='Admin service host')
|
|
||||||
parser.add_argument('-p', '--port', type=int, default=8080, help='Admin service port')
|
|
||||||
|
|
||||||
try:
|
|
||||||
parsed_args, remaining_args = parser.parse_known_args(args)
|
|
||||||
return {
|
|
||||||
'host': parsed_args.host,
|
|
||||||
'port': parsed_args.port,
|
|
||||||
}
|
|
||||||
except SystemExit:
|
|
||||||
return {'error': 'Invalid connection arguments'}
|
|
||||||
|
|
||||||
def execute_command(self, parsed_command: Dict[str, Any]):
|
|
||||||
|
|
||||||
command_dict: dict
|
|
||||||
if isinstance(parsed_command, Tree):
|
|
||||||
command_dict = parsed_command.children[0]
|
|
||||||
else:
|
|
||||||
if parsed_command['type'] == 'error':
|
|
||||||
print(f"Error: {parsed_command['message']}")
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
command_dict = parsed_command
|
|
||||||
|
|
||||||
# print(f"Parsed command: {command_dict}")
|
|
||||||
|
|
||||||
command_type = command_dict['type']
|
|
||||||
|
|
||||||
match command_type:
|
|
||||||
case 'list_services':
|
|
||||||
self._handle_list_services(command_dict)
|
|
||||||
case 'show_service':
|
|
||||||
self._handle_show_service(command_dict)
|
|
||||||
case 'restart_service':
|
|
||||||
self._handle_restart_service(command_dict)
|
|
||||||
case 'shutdown_service':
|
|
||||||
self._handle_shutdown_service(command_dict)
|
|
||||||
case 'startup_service':
|
|
||||||
self._handle_startup_service(command_dict)
|
|
||||||
case 'list_users':
|
|
||||||
self._handle_list_users(command_dict)
|
|
||||||
case 'show_user':
|
|
||||||
self._handle_show_user(command_dict)
|
|
||||||
case 'drop_user':
|
|
||||||
self._handle_drop_user(command_dict)
|
|
||||||
case 'alter_user':
|
|
||||||
self._handle_alter_user(command_dict)
|
|
||||||
case 'create_user':
|
|
||||||
self._handle_create_user(command_dict)
|
|
||||||
case 'activate_user':
|
|
||||||
self._handle_activate_user(command_dict)
|
|
||||||
case 'list_datasets':
|
|
||||||
self._handle_list_datasets(command_dict)
|
|
||||||
case 'list_agents':
|
|
||||||
self._handle_list_agents(command_dict)
|
|
||||||
case 'meta':
|
|
||||||
self._handle_meta_command(command_dict)
|
|
||||||
case _:
|
|
||||||
print(f"Command '{command_type}' would be executed with API")
|
|
||||||
|
|
||||||
def _handle_list_services(self, command):
|
|
||||||
print("Listing all services")
|
|
||||||
|
|
||||||
url = f'http://{self.host}:{self.port}/api/v1/admin/services'
|
|
||||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
|
||||||
res_json = response.json()
|
|
||||||
if response.status_code == 200:
|
|
||||||
self._print_table_simple(res_json['data'])
|
|
||||||
else:
|
|
||||||
print(f"Fail to get all users, code: {res_json['code']}, message: {res_json['message']}")
|
|
||||||
|
|
||||||
def _handle_show_service(self, command):
|
|
||||||
service_id: int = command['number']
|
|
||||||
print(f"Showing service: {service_id}")
|
|
||||||
|
|
||||||
def _handle_restart_service(self, command):
|
|
||||||
service_id: int = command['number']
|
|
||||||
print(f"Restart service {service_id}")
|
|
||||||
|
|
||||||
def _handle_shutdown_service(self, command):
|
|
||||||
service_id: int = command['number']
|
|
||||||
print(f"Shutdown service {service_id}")
|
|
||||||
|
|
||||||
def _handle_startup_service(self, command):
|
|
||||||
service_id: int = command['number']
|
|
||||||
print(f"Startup service {service_id}")
|
|
||||||
|
|
||||||
def _handle_list_users(self, command):
|
|
||||||
print("Listing all users")
|
|
||||||
|
|
||||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users'
|
|
||||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
|
||||||
res_json = response.json()
|
|
||||||
if response.status_code == 200:
|
|
||||||
self._print_table_simple(res_json['data'])
|
|
||||||
else:
|
|
||||||
print(f"Fail to get all users, code: {res_json['code']}, message: {res_json['message']}")
|
|
||||||
|
|
||||||
def _handle_show_user(self, command):
|
|
||||||
username_tree: Tree = command['username']
|
|
||||||
username: str = username_tree.children[0].strip("'\"")
|
|
||||||
print(f"Showing user: {username}")
|
|
||||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}'
|
|
||||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
|
||||||
res_json = response.json()
|
|
||||||
if response.status_code == 200:
|
|
||||||
self._print_table_simple(res_json['data'])
|
|
||||||
else:
|
|
||||||
print(f"Fail to get user {username}, code: {res_json['code']}, message: {res_json['message']}")
|
|
||||||
|
|
||||||
def _handle_drop_user(self, command):
|
|
||||||
username_tree: Tree = command['username']
|
|
||||||
username: str = username_tree.children[0].strip("'\"")
|
|
||||||
print(f"Drop user: {username}")
|
|
||||||
|
|
||||||
def _handle_alter_user(self, command):
|
|
||||||
username_tree: Tree = command['username']
|
|
||||||
username: str = username_tree.children[0].strip("'\"")
|
|
||||||
password_tree: Tree = command['password']
|
|
||||||
password: str = password_tree.children[0].strip("'\"")
|
|
||||||
print(f"Alter user: {username}, password: {password}")
|
|
||||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}/password'
|
|
||||||
response = requests.put(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password), json={'new_password': encrypt(password)})
|
|
||||||
res_json = response.json()
|
|
||||||
if response.status_code == 200:
|
|
||||||
print(res_json["message"])
|
|
||||||
else:
|
|
||||||
print(f"Fail to alter password, code: {res_json['code']}, message: {res_json['message']}")
|
|
||||||
|
|
||||||
def _handle_create_user(self, command):
|
|
||||||
username_tree: Tree = command['username']
|
|
||||||
username: str = username_tree.children[0].strip("'\"")
|
|
||||||
password_tree: Tree = command['password']
|
|
||||||
password: str = password_tree.children[0].strip("'\"")
|
|
||||||
role: str = command['role']
|
|
||||||
print(f"Create user: {username}, password: {password}, role: {role}")
|
|
||||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users'
|
|
||||||
response = requests.post(
|
|
||||||
url,
|
|
||||||
auth=HTTPBasicAuth(self.admin_account, self.admin_password),
|
|
||||||
json={'username': username, 'password': encrypt(password), 'role': role}
|
|
||||||
)
|
|
||||||
res_json = response.json()
|
|
||||||
if response.status_code == 200:
|
|
||||||
self._print_table_simple(res_json['data'])
|
|
||||||
else:
|
|
||||||
print(f"Fail to create user {username}, code: {res_json['code']}, message: {res_json['message']}")
|
|
||||||
|
|
||||||
def _handle_activate_user(self, command):
|
|
||||||
username_tree: Tree = command['username']
|
|
||||||
username: str = username_tree.children[0].strip("'\"")
|
|
||||||
activate_tree: Tree = command['activate_status']
|
|
||||||
activate_status: str = activate_tree.children[0].strip("'\"")
|
|
||||||
if activate_status.lower() in ['on', 'off']:
|
|
||||||
print(f"Alter user {username} activate status, turn {activate_status.lower()}.")
|
|
||||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}/activate'
|
|
||||||
response = requests.put(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password), json={'activate_status': activate_status})
|
|
||||||
res_json = response.json()
|
|
||||||
if response.status_code == 200:
|
|
||||||
print(res_json["message"])
|
|
||||||
else:
|
|
||||||
print(f"Fail to alter activate status, code: {res_json['code']}, message: {res_json['message']}")
|
|
||||||
else:
|
|
||||||
print(f"Unknown activate status: {activate_status}.")
|
|
||||||
|
|
||||||
def _handle_list_datasets(self, command):
|
|
||||||
username_tree: Tree = command['username']
|
|
||||||
username: str = username_tree.children[0].strip("'\"")
|
|
||||||
print(f"Listing all datasets of user: {username}")
|
|
||||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}/datasets'
|
|
||||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
|
||||||
res_json = response.json()
|
|
||||||
if response.status_code == 200:
|
|
||||||
self._print_table_simple(res_json['data'])
|
|
||||||
else:
|
|
||||||
print(f"Fail to get all datasets of {username}, code: {res_json['code']}, message: {res_json['message']}")
|
|
||||||
|
|
||||||
def _handle_list_agents(self, command):
|
|
||||||
username_tree: Tree = command['username']
|
|
||||||
username: str = username_tree.children[0].strip("'\"")
|
|
||||||
print(f"Listing all agents of user: {username}")
|
|
||||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}/agents'
|
|
||||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
|
||||||
res_json = response.json()
|
|
||||||
if response.status_code == 200:
|
|
||||||
self._print_table_simple(res_json['data'])
|
|
||||||
else:
|
|
||||||
print(f"Fail to get all agents of {username}, code: {res_json['code']}, message: {res_json['message']}")
|
|
||||||
|
|
||||||
def _handle_meta_command(self, command):
|
|
||||||
meta_command = command['command']
|
|
||||||
args = command.get('args', [])
|
|
||||||
|
|
||||||
if meta_command in ['?', 'h', 'help']:
|
|
||||||
self.show_help()
|
|
||||||
elif meta_command in ['q', 'quit', 'exit']:
|
|
||||||
print("Goodbye!")
|
|
||||||
else:
|
|
||||||
print(f"Meta command '{meta_command}' with args {args}")
|
|
||||||
|
|
||||||
def show_help(self):
|
|
||||||
"""Help info"""
|
|
||||||
help_text = """
|
|
||||||
Commands:
|
|
||||||
LIST SERVICES
|
|
||||||
SHOW SERVICE <service>
|
|
||||||
STARTUP SERVICE <service>
|
|
||||||
SHUTDOWN SERVICE <service>
|
|
||||||
RESTART SERVICE <service>
|
|
||||||
LIST USERS
|
|
||||||
SHOW USER <user>
|
|
||||||
DROP USER <user>
|
|
||||||
CREATE USER <user> <password>
|
|
||||||
ALTER USER PASSWORD <user> <new_password>
|
|
||||||
LIST DATASETS OF <user>
|
|
||||||
LIST AGENTS OF <user>
|
|
||||||
|
|
||||||
Meta Commands:
|
|
||||||
\\?, \\h, \\help Show this help
|
|
||||||
\\q, \\quit, \\exit Quit the CLI
|
|
||||||
"""
|
|
||||||
print(help_text)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
import sys
|
|
||||||
|
|
||||||
cli = AdminCLI()
|
|
||||||
|
|
||||||
if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == '-'):
|
|
||||||
print(r"""
|
|
||||||
____ ___ ______________ ___ __ _
|
|
||||||
/ __ \/ | / ____/ ____/ /___ _ __ / | ____/ /___ ___ (_)___
|
|
||||||
/ /_/ / /| |/ / __/ /_ / / __ \ | /| / / / /| |/ __ / __ `__ \/ / __ \
|
|
||||||
/ _, _/ ___ / /_/ / __/ / / /_/ / |/ |/ / / ___ / /_/ / / / / / / / / / /
|
|
||||||
/_/ |_/_/ |_\____/_/ /_/\____/|__/|__/ /_/ |_\__,_/_/ /_/ /_/_/_/ /_/
|
|
||||||
""")
|
|
||||||
if cli.verify_admin(sys.argv):
|
|
||||||
cli.run_interactive()
|
|
||||||
else:
|
|
||||||
if cli.verify_admin(sys.argv):
|
|
||||||
cli.run_interactive()
|
|
||||||
# cli.run_single_command(sys.argv[1:])
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
@ -1,57 +0,0 @@
|
|||||||
import logging
|
|
||||||
import uuid
|
|
||||||
from functools import wraps
|
|
||||||
from flask import request, jsonify
|
|
||||||
|
|
||||||
from exceptions import AdminException
|
|
||||||
from api.db.init_data import encode_to_base64
|
|
||||||
from api.db.services import UserService
|
|
||||||
|
|
||||||
|
|
||||||
def check_admin(username: str, password: str):
|
|
||||||
users = UserService.query(email=username)
|
|
||||||
if not users:
|
|
||||||
logging.info(f"Username: {username} is not registered!")
|
|
||||||
user_info = {
|
|
||||||
"id": uuid.uuid1().hex,
|
|
||||||
"password": encode_to_base64("admin"),
|
|
||||||
"nickname": "admin",
|
|
||||||
"is_superuser": True,
|
|
||||||
"email": "admin@ragflow.io",
|
|
||||||
"creator": "system",
|
|
||||||
"status": "1",
|
|
||||||
}
|
|
||||||
if not UserService.save(**user_info):
|
|
||||||
raise AdminException("Can't init admin.", 500)
|
|
||||||
|
|
||||||
user = UserService.query_user(username, password)
|
|
||||||
if user:
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def login_verify(f):
|
|
||||||
@wraps(f)
|
|
||||||
def decorated(*args, **kwargs):
|
|
||||||
auth = request.authorization
|
|
||||||
if not auth or 'username' not in auth.parameters or 'password' not in auth.parameters:
|
|
||||||
return jsonify({
|
|
||||||
"code": 401,
|
|
||||||
"message": "Authentication required",
|
|
||||||
"data": None
|
|
||||||
}), 200
|
|
||||||
|
|
||||||
username = auth.parameters['username']
|
|
||||||
password = auth.parameters['password']
|
|
||||||
# TODO: to check the username and password from DB
|
|
||||||
if check_admin(username, password) is False:
|
|
||||||
return jsonify({
|
|
||||||
"code": 403,
|
|
||||||
"message": "Access denied",
|
|
||||||
"data": None
|
|
||||||
}), 200
|
|
||||||
|
|
||||||
return f(*args, **kwargs)
|
|
||||||
|
|
||||||
return decorated
|
|
||||||
47
admin/build_cli_release.sh
Executable file
47
admin/build_cli_release.sh
Executable file
@ -0,0 +1,47 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
echo "🚀 Start building..."
|
||||||
|
echo "================================"
|
||||||
|
|
||||||
|
PROJECT_NAME="ragflow-cli"
|
||||||
|
|
||||||
|
RELEASE_DIR="release"
|
||||||
|
BUILD_DIR="dist"
|
||||||
|
SOURCE_DIR="src"
|
||||||
|
PACKAGE_DIR="ragflow_cli"
|
||||||
|
|
||||||
|
echo "🧹 Clean old build folder..."
|
||||||
|
rm -rf release/
|
||||||
|
|
||||||
|
echo "📁 Prepare source code..."
|
||||||
|
mkdir release/$PROJECT_NAME/$SOURCE_DIR -p
|
||||||
|
cp pyproject.toml release/$PROJECT_NAME/pyproject.toml
|
||||||
|
cp README.md release/$PROJECT_NAME/README.md
|
||||||
|
|
||||||
|
mkdir release/$PROJECT_NAME/$SOURCE_DIR/$PACKAGE_DIR -p
|
||||||
|
cp admin_client.py release/$PROJECT_NAME/$SOURCE_DIR/$PACKAGE_DIR/admin_client.py
|
||||||
|
|
||||||
|
if [ -d "release/$PROJECT_NAME/$SOURCE_DIR" ]; then
|
||||||
|
echo "✅ source dir: release/$PROJECT_NAME/$SOURCE_DIR"
|
||||||
|
else
|
||||||
|
echo "❌ source dir not exist: release/$PROJECT_NAME/$SOURCE_DIR"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "🔨 Make build file..."
|
||||||
|
cd release/$PROJECT_NAME
|
||||||
|
export PYTHONPATH=$(pwd)
|
||||||
|
python -m build
|
||||||
|
|
||||||
|
echo "✅ check build result..."
|
||||||
|
if [ -d "$BUILD_DIR" ]; then
|
||||||
|
echo "📦 Package generated:"
|
||||||
|
ls -la $BUILD_DIR/
|
||||||
|
else
|
||||||
|
echo "❌ Build Failed: $BUILD_DIR not exist."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "🎉 Build finished successfully!"
|
||||||
@ -15,22 +15,55 @@ It consists of a server-side Service and a command-line client (CLI), both imple
|
|||||||
- **Admin Service**: A backend service that interfaces with the RAGFlow system to execute administrative operations and monitor its status.
|
- **Admin Service**: A backend service that interfaces with the RAGFlow system to execute administrative operations and monitor its status.
|
||||||
- **Admin CLI**: A command-line interface that allows users to connect to the Admin Service and issue commands for system management.
|
- **Admin CLI**: A command-line interface that allows users to connect to the Admin Service and issue commands for system management.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Starting the Admin Service
|
### Starting the Admin Service
|
||||||
|
|
||||||
1. Before start Admin Service, please make sure RAGFlow system is already started.
|
#### Launching from source code
|
||||||
|
|
||||||
|
1. Before start Admin Service, please make sure RAGFlow system is already started.
|
||||||
|
|
||||||
|
2. Launch from source code:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python admin/server/admin_server.py
|
||||||
|
```
|
||||||
|
The service will start and listen for incoming connections from the CLI on the configured port.
|
||||||
|
|
||||||
|
#### Using docker image
|
||||||
|
|
||||||
|
1. Before startup, please configure the `docker_compose.yml` file to enable admin server:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
command:
|
||||||
|
- --enable-adminserver
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start the containers, the service will start and listen for incoming connections from the CLI on the configured port.
|
||||||
|
|
||||||
|
|
||||||
2. Run the service script:
|
|
||||||
```bash
|
|
||||||
python admin/admin_server.py
|
|
||||||
```
|
|
||||||
The service will start and listen for incoming connections from the CLI on the configured port.
|
|
||||||
|
|
||||||
### Using the Admin CLI
|
### Using the Admin CLI
|
||||||
|
|
||||||
1. Ensure the Admin Service is running.
|
1. Ensure the Admin Service is running.
|
||||||
2. Launch the CLI client:
|
2. Install ragflow-cli.
|
||||||
```bash
|
```bash
|
||||||
python admin/admin_client.py -h 0.0.0.0 -p 9381
|
pip install ragflow-cli==0.21.1
|
||||||
|
```
|
||||||
|
3. Launch the CLI client:
|
||||||
|
```bash
|
||||||
|
ragflow-cli -h 127.0.0.1 -p 9381
|
||||||
|
```
|
||||||
|
You will be prompted to enter the superuser's password to log in.
|
||||||
|
The default password is admin.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
|
||||||
|
- -h: RAGFlow admin server host address
|
||||||
|
|
||||||
|
- -p: RAGFlow admin server port
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Supported Commands
|
## Supported Commands
|
||||||
|
|
||||||
@ -42,12 +75,7 @@ Commands are case-insensitive and must be terminated with a semicolon (`;`).
|
|||||||
- Lists all available services within the RAGFlow system.
|
- Lists all available services within the RAGFlow system.
|
||||||
- `SHOW SERVICE <id>;`
|
- `SHOW SERVICE <id>;`
|
||||||
- Shows detailed status information for the service identified by `<id>`.
|
- Shows detailed status information for the service identified by `<id>`.
|
||||||
- `STARTUP SERVICE <id>;`
|
|
||||||
- Attempts to start the service identified by `<id>`.
|
|
||||||
- `SHUTDOWN SERVICE <id>;`
|
|
||||||
- Attempts to gracefully shut down the service identified by `<id>`.
|
|
||||||
- `RESTART SERVICE <id>;`
|
|
||||||
- Attempts to restart the service identified by `<id>`.
|
|
||||||
|
|
||||||
### User Management Commands
|
### User Management Commands
|
||||||
|
|
||||||
@ -55,10 +83,17 @@ Commands are case-insensitive and must be terminated with a semicolon (`;`).
|
|||||||
- Lists all users known to the system.
|
- Lists all users known to the system.
|
||||||
- `SHOW USER '<username>';`
|
- `SHOW USER '<username>';`
|
||||||
- Shows details and permissions for the specified user. The username must be enclosed in single or double quotes.
|
- Shows details and permissions for the specified user. The username must be enclosed in single or double quotes.
|
||||||
|
|
||||||
|
- `CREATE USER <username> <password>;`
|
||||||
|
- Create user by username and password. The username and password must be enclosed in single or double quotes.
|
||||||
|
|
||||||
- `DROP USER '<username>';`
|
- `DROP USER '<username>';`
|
||||||
- Removes the specified user from the system. Use with caution.
|
- Removes the specified user from the system. Use with caution.
|
||||||
- `ALTER USER PASSWORD '<username>' '<new_password>';`
|
- `ALTER USER PASSWORD '<username>' '<new_password>';`
|
||||||
- Changes the password for the specified user.
|
- Changes the password for the specified user.
|
||||||
|
- `ALTER USER ACTIVE <username> <on/off>;`
|
||||||
|
- Changes the user to active or inactive.
|
||||||
|
|
||||||
|
|
||||||
### Data and Agent Commands
|
### Data and Agent Commands
|
||||||
|
|
||||||
931
admin/client/admin_client.py
Normal file
931
admin/client/admin_client.py
Normal file
@ -0,0 +1,931 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import base64
|
||||||
|
from cmd import Cmd
|
||||||
|
|
||||||
|
from Cryptodome.PublicKey import RSA
|
||||||
|
from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
|
||||||
|
from typing import Dict, List, Any
|
||||||
|
from lark import Lark, Transformer, Tree
|
||||||
|
import requests
|
||||||
|
|
||||||
|
GRAMMAR = r"""
|
||||||
|
start: command
|
||||||
|
|
||||||
|
command: sql_command | meta_command
|
||||||
|
|
||||||
|
sql_command: list_services
|
||||||
|
| show_service
|
||||||
|
| startup_service
|
||||||
|
| shutdown_service
|
||||||
|
| restart_service
|
||||||
|
| list_users
|
||||||
|
| show_user
|
||||||
|
| drop_user
|
||||||
|
| alter_user
|
||||||
|
| create_user
|
||||||
|
| activate_user
|
||||||
|
| list_datasets
|
||||||
|
| list_agents
|
||||||
|
| create_role
|
||||||
|
| drop_role
|
||||||
|
| alter_role
|
||||||
|
| list_roles
|
||||||
|
| show_role
|
||||||
|
| grant_permission
|
||||||
|
| revoke_permission
|
||||||
|
| alter_user_role
|
||||||
|
| show_user_permission
|
||||||
|
|
||||||
|
// meta command definition
|
||||||
|
meta_command: "\\" meta_command_name [meta_args]
|
||||||
|
|
||||||
|
meta_command_name: /[a-zA-Z?]+/
|
||||||
|
meta_args: (meta_arg)+
|
||||||
|
|
||||||
|
meta_arg: /[^\\s"']+/ | quoted_string
|
||||||
|
|
||||||
|
// command definition
|
||||||
|
|
||||||
|
LIST: "LIST"i
|
||||||
|
SERVICES: "SERVICES"i
|
||||||
|
SHOW: "SHOW"i
|
||||||
|
CREATE: "CREATE"i
|
||||||
|
SERVICE: "SERVICE"i
|
||||||
|
SHUTDOWN: "SHUTDOWN"i
|
||||||
|
STARTUP: "STARTUP"i
|
||||||
|
RESTART: "RESTART"i
|
||||||
|
USERS: "USERS"i
|
||||||
|
DROP: "DROP"i
|
||||||
|
USER: "USER"i
|
||||||
|
ALTER: "ALTER"i
|
||||||
|
ACTIVE: "ACTIVE"i
|
||||||
|
PASSWORD: "PASSWORD"i
|
||||||
|
DATASETS: "DATASETS"i
|
||||||
|
OF: "OF"i
|
||||||
|
AGENTS: "AGENTS"i
|
||||||
|
ROLE: "ROLE"i
|
||||||
|
ROLES: "ROLES"i
|
||||||
|
DESCRIPTION: "DESCRIPTION"i
|
||||||
|
GRANT: "GRANT"i
|
||||||
|
REVOKE: "REVOKE"i
|
||||||
|
ALL: "ALL"i
|
||||||
|
PERMISSION: "PERMISSION"i
|
||||||
|
TO: "TO"i
|
||||||
|
FROM: "FROM"i
|
||||||
|
FOR: "FOR"i
|
||||||
|
RESOURCES: "RESOURCES"i
|
||||||
|
ON: "ON"i
|
||||||
|
SET: "SET"i
|
||||||
|
|
||||||
|
list_services: LIST SERVICES ";"
|
||||||
|
show_service: SHOW SERVICE NUMBER ";"
|
||||||
|
startup_service: STARTUP SERVICE NUMBER ";"
|
||||||
|
shutdown_service: SHUTDOWN SERVICE NUMBER ";"
|
||||||
|
restart_service: RESTART SERVICE NUMBER ";"
|
||||||
|
|
||||||
|
list_users: LIST USERS ";"
|
||||||
|
drop_user: DROP USER quoted_string ";"
|
||||||
|
alter_user: ALTER USER PASSWORD quoted_string quoted_string ";"
|
||||||
|
show_user: SHOW USER quoted_string ";"
|
||||||
|
create_user: CREATE USER quoted_string quoted_string ";"
|
||||||
|
activate_user: ALTER USER ACTIVE quoted_string status ";"
|
||||||
|
|
||||||
|
list_datasets: LIST DATASETS OF quoted_string ";"
|
||||||
|
list_agents: LIST AGENTS OF quoted_string ";"
|
||||||
|
|
||||||
|
create_role: CREATE ROLE identifier [DESCRIPTION quoted_string] ";"
|
||||||
|
drop_role: DROP ROLE identifier ";"
|
||||||
|
alter_role: ALTER ROLE identifier SET DESCRIPTION quoted_string ";"
|
||||||
|
list_roles: LIST ROLES ";"
|
||||||
|
show_role: SHOW ROLE identifier ";"
|
||||||
|
|
||||||
|
grant_permission: GRANT action_list ON identifier TO ROLE identifier ";"
|
||||||
|
revoke_permission: REVOKE action_list ON identifier FROM ROLE identifier ";"
|
||||||
|
alter_user_role: ALTER USER quoted_string SET ROLE identifier ";"
|
||||||
|
show_user_permission: SHOW USER PERMISSION quoted_string ";"
|
||||||
|
|
||||||
|
action_list: identifier ("," identifier)*
|
||||||
|
|
||||||
|
identifier: WORD
|
||||||
|
quoted_string: QUOTED_STRING
|
||||||
|
status: WORD
|
||||||
|
|
||||||
|
QUOTED_STRING: /'[^']+'/ | /"[^"]+"/
|
||||||
|
WORD: /[a-zA-Z0-9_\-\.]+/
|
||||||
|
NUMBER: /[0-9]+/
|
||||||
|
|
||||||
|
%import common.WS
|
||||||
|
%ignore WS
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class AdminTransformer(Transformer):
|
||||||
|
|
||||||
|
def start(self, items):
|
||||||
|
return items[0]
|
||||||
|
|
||||||
|
def command(self, items):
|
||||||
|
return items[0]
|
||||||
|
|
||||||
|
def list_services(self, items):
|
||||||
|
result = {'type': 'list_services'}
|
||||||
|
return result
|
||||||
|
|
||||||
|
def show_service(self, items):
|
||||||
|
service_id = int(items[2])
|
||||||
|
return {"type": "show_service", "number": service_id}
|
||||||
|
|
||||||
|
def startup_service(self, items):
|
||||||
|
service_id = int(items[2])
|
||||||
|
return {"type": "startup_service", "number": service_id}
|
||||||
|
|
||||||
|
def shutdown_service(self, items):
|
||||||
|
service_id = int(items[2])
|
||||||
|
return {"type": "shutdown_service", "number": service_id}
|
||||||
|
|
||||||
|
def restart_service(self, items):
|
||||||
|
service_id = int(items[2])
|
||||||
|
return {"type": "restart_service", "number": service_id}
|
||||||
|
|
||||||
|
def list_users(self, items):
|
||||||
|
return {"type": "list_users"}
|
||||||
|
|
||||||
|
def show_user(self, items):
|
||||||
|
user_name = items[2]
|
||||||
|
return {"type": "show_user", "user_name": user_name}
|
||||||
|
|
||||||
|
def drop_user(self, items):
|
||||||
|
user_name = items[2]
|
||||||
|
return {"type": "drop_user", "user_name": user_name}
|
||||||
|
|
||||||
|
def alter_user(self, items):
|
||||||
|
user_name = items[3]
|
||||||
|
new_password = items[4]
|
||||||
|
return {"type": "alter_user", "user_name": user_name, "password": new_password}
|
||||||
|
|
||||||
|
def create_user(self, items):
|
||||||
|
user_name = items[2]
|
||||||
|
password = items[3]
|
||||||
|
return {"type": "create_user", "user_name": user_name, "password": password, "role": "user"}
|
||||||
|
|
||||||
|
def activate_user(self, items):
|
||||||
|
user_name = items[3]
|
||||||
|
activate_status = items[4]
|
||||||
|
return {"type": "activate_user", "activate_status": activate_status, "user_name": user_name}
|
||||||
|
|
||||||
|
def list_datasets(self, items):
|
||||||
|
user_name = items[3]
|
||||||
|
return {"type": "list_datasets", "user_name": user_name}
|
||||||
|
|
||||||
|
def list_agents(self, items):
|
||||||
|
user_name = items[3]
|
||||||
|
return {"type": "list_agents", "user_name": user_name}
|
||||||
|
|
||||||
|
def create_role(self, items):
|
||||||
|
role_name = items[2]
|
||||||
|
if len(items) > 4:
|
||||||
|
description = items[4]
|
||||||
|
return {"type": "create_role", "role_name": role_name, "description": description}
|
||||||
|
else:
|
||||||
|
return {"type": "create_role", "role_name": role_name}
|
||||||
|
|
||||||
|
def drop_role(self, items):
|
||||||
|
role_name = items[2]
|
||||||
|
return {"type": "drop_role", "role_name": role_name}
|
||||||
|
|
||||||
|
def alter_role(self, items):
|
||||||
|
role_name = items[2]
|
||||||
|
description = items[5]
|
||||||
|
return {"type": "alter_role", "role_name": role_name, "description": description}
|
||||||
|
|
||||||
|
def list_roles(self, items):
|
||||||
|
return {"type": "list_roles"}
|
||||||
|
|
||||||
|
def show_role(self, items):
|
||||||
|
role_name = items[2]
|
||||||
|
return {"type": "show_role", "role_name": role_name}
|
||||||
|
|
||||||
|
def grant_permission(self, items):
|
||||||
|
action_list = items[1]
|
||||||
|
resource = items[3]
|
||||||
|
role_name = items[6]
|
||||||
|
return {"type": "grant_permission", "role_name": role_name, "resource": resource, "actions": action_list}
|
||||||
|
|
||||||
|
def revoke_permission(self, items):
|
||||||
|
action_list = items[1]
|
||||||
|
resource = items[3]
|
||||||
|
role_name = items[6]
|
||||||
|
return {
|
||||||
|
"type": "revoke_permission",
|
||||||
|
"role_name": role_name,
|
||||||
|
"resource": resource, "actions": action_list
|
||||||
|
}
|
||||||
|
|
||||||
|
def alter_user_role(self, items):
|
||||||
|
user_name = items[2]
|
||||||
|
role_name = items[5]
|
||||||
|
return {"type": "alter_user_role", "user_name": user_name, "role_name": role_name}
|
||||||
|
|
||||||
|
def show_user_permission(self, items):
|
||||||
|
user_name = items[3]
|
||||||
|
return {"type": "show_user_permission", "user_name": user_name}
|
||||||
|
|
||||||
|
def action_list(self, items):
|
||||||
|
return items
|
||||||
|
|
||||||
|
def meta_command(self, items):
|
||||||
|
command_name = str(items[0]).lower()
|
||||||
|
args = items[1:] if len(items) > 1 else []
|
||||||
|
|
||||||
|
# handle quoted parameter
|
||||||
|
parsed_args = []
|
||||||
|
for arg in args:
|
||||||
|
if hasattr(arg, 'value'):
|
||||||
|
parsed_args.append(arg.value)
|
||||||
|
else:
|
||||||
|
parsed_args.append(str(arg))
|
||||||
|
|
||||||
|
return {'type': 'meta', 'command': command_name, 'args': parsed_args}
|
||||||
|
|
||||||
|
def meta_command_name(self, items):
|
||||||
|
return items[0]
|
||||||
|
|
||||||
|
def meta_args(self, items):
|
||||||
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
def encrypt(input_string):
|
||||||
|
pub = '-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArq9XTUSeYr2+N1h3Afl/z8Dse/2yD0ZGrKwx+EEEcdsBLca9Ynmx3nIB5obmLlSfmskLpBo0UACBmB5rEjBp2Q2f3AG3Hjd4B+gNCG6BDaawuDlgANIhGnaTLrIqWrrcm4EMzJOnAOI1fgzJRsOOUEfaS318Eq9OVO3apEyCCt0lOQK6PuksduOjVxtltDav+guVAA068NrPYmRNabVKRNLJpL8w4D44sfth5RvZ3q9t+6RTArpEtc5sh5ChzvqPOzKGMXW83C95TxmXqpbK6olN4RevSfVjEAgCydH6HN6OhtOQEcnrU97r9H0iZOWwbw3pVrZiUkuRD1R56Wzs2wIDAQAB\n-----END PUBLIC KEY-----'
|
||||||
|
pub_key = RSA.importKey(pub)
|
||||||
|
cipher = Cipher_pkcs1_v1_5.new(pub_key)
|
||||||
|
cipher_text = cipher.encrypt(base64.b64encode(input_string.encode('utf-8')))
|
||||||
|
return base64.b64encode(cipher_text).decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def encode_to_base64(input_string):
|
||||||
|
base64_encoded = base64.b64encode(input_string.encode('utf-8'))
|
||||||
|
return base64_encoded.decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
class AdminCLI(Cmd):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.parser = Lark(GRAMMAR, start='start', parser='lalr', transformer=AdminTransformer())
|
||||||
|
self.command_history = []
|
||||||
|
self.is_interactive = False
|
||||||
|
self.admin_account = "admin@ragflow.io"
|
||||||
|
self.admin_password: str = "admin"
|
||||||
|
self.session = requests.Session()
|
||||||
|
self.access_token: str = ""
|
||||||
|
self.host: str = ""
|
||||||
|
self.port: int = 0
|
||||||
|
|
||||||
|
intro = r"""Type "\h" for help."""
|
||||||
|
prompt = "admin> "
|
||||||
|
|
||||||
|
def onecmd(self, command: str) -> bool:
|
||||||
|
try:
|
||||||
|
result = self.parse_command(command)
|
||||||
|
|
||||||
|
if isinstance(result, dict):
|
||||||
|
if 'type' in result and result.get('type') == 'empty':
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.execute_command(result)
|
||||||
|
|
||||||
|
if isinstance(result, Tree):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if result.get('type') == 'meta' and result.get('command') in ['q', 'quit', 'exit']:
|
||||||
|
return True
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\nUse '\\q' to quit")
|
||||||
|
except EOFError:
|
||||||
|
print("\nGoodbye!")
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def emptyline(self) -> bool:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def default(self, line: str) -> bool:
|
||||||
|
return self.onecmd(line)
|
||||||
|
|
||||||
|
def parse_command(self, command_str: str) -> dict[str, str]:
|
||||||
|
if not command_str.strip():
|
||||||
|
return {'type': 'empty'}
|
||||||
|
|
||||||
|
self.command_history.append(command_str)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = self.parser.parse(command_str)
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
return {'type': 'error', 'message': f'Parse error: {str(e)}'}
|
||||||
|
|
||||||
|
def verify_admin(self, arguments: dict, single_command: bool):
|
||||||
|
self.host = arguments['host']
|
||||||
|
self.port = arguments['port']
|
||||||
|
print(f"Attempt to access ip: {self.host}, port: {self.port}")
|
||||||
|
url = f"http://{self.host}:{self.port}/api/v1/admin/login"
|
||||||
|
|
||||||
|
attempt_count = 3
|
||||||
|
if single_command:
|
||||||
|
attempt_count = 1
|
||||||
|
|
||||||
|
try_count = 0
|
||||||
|
while True:
|
||||||
|
try_count += 1
|
||||||
|
if try_count > attempt_count:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if single_command:
|
||||||
|
admin_passwd = arguments['password']
|
||||||
|
else:
|
||||||
|
admin_passwd = input(f"password for {self.admin_account}: ").strip()
|
||||||
|
try:
|
||||||
|
self.admin_password = encrypt(admin_passwd)
|
||||||
|
response = self.session.post(url, json={'email': self.admin_account, 'password': self.admin_password})
|
||||||
|
if response.status_code == 200:
|
||||||
|
res_json = response.json()
|
||||||
|
error_code = res_json.get('code', -1)
|
||||||
|
if error_code == 0:
|
||||||
|
self.session.headers.update({
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': response.headers['Authorization'],
|
||||||
|
'User-Agent': 'RAGFlow-CLI/0.21.1'
|
||||||
|
})
|
||||||
|
print("Authentication successful.")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
error_message = res_json.get('message', 'Unknown error')
|
||||||
|
print(f"Authentication failed: {error_message}, try again")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
print(f"Bad response,status: {response.status_code}, password is wrong")
|
||||||
|
except Exception as e:
|
||||||
|
print(str(e))
|
||||||
|
print(f"Can't access {self.host}, port: {self.port}")
|
||||||
|
|
||||||
|
def _print_table_simple(self, data):
|
||||||
|
if not data:
|
||||||
|
print("No data to print")
|
||||||
|
return
|
||||||
|
if isinstance(data, dict):
|
||||||
|
# handle single row data
|
||||||
|
data = [data]
|
||||||
|
|
||||||
|
columns = list(data[0].keys())
|
||||||
|
col_widths = {}
|
||||||
|
|
||||||
|
def get_string_width(text):
|
||||||
|
half_width_chars = (
|
||||||
|
" !\"#$%&'()*+,-./0123456789:;<=>?@"
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`"
|
||||||
|
"abcdefghijklmnopqrstuvwxyz{|}~"
|
||||||
|
"\t\n\r"
|
||||||
|
)
|
||||||
|
width = 0
|
||||||
|
for char in text:
|
||||||
|
if char in half_width_chars:
|
||||||
|
width += 1
|
||||||
|
else:
|
||||||
|
width += 2
|
||||||
|
return width
|
||||||
|
|
||||||
|
for col in columns:
|
||||||
|
max_width = get_string_width(str(col))
|
||||||
|
for item in data:
|
||||||
|
value_len = get_string_width(str(item.get(col, '')))
|
||||||
|
if value_len > max_width:
|
||||||
|
max_width = value_len
|
||||||
|
col_widths[col] = max(2, max_width)
|
||||||
|
|
||||||
|
# Generate delimiter
|
||||||
|
separator = "+" + "+".join(["-" * (col_widths[col] + 2) for col in columns]) + "+"
|
||||||
|
|
||||||
|
# Print header
|
||||||
|
print(separator)
|
||||||
|
header = "|" + "|".join([f" {col:<{col_widths[col]}} " for col in columns]) + "|"
|
||||||
|
print(header)
|
||||||
|
print(separator)
|
||||||
|
|
||||||
|
# Print data
|
||||||
|
for item in data:
|
||||||
|
row = "|"
|
||||||
|
for col in columns:
|
||||||
|
value = str(item.get(col, ''))
|
||||||
|
if get_string_width(value) > col_widths[col]:
|
||||||
|
value = value[:col_widths[col] - 3] + "..."
|
||||||
|
row += f" {value:<{col_widths[col] - (get_string_width(value) - len(value))}} |"
|
||||||
|
print(row)
|
||||||
|
|
||||||
|
print(separator)
|
||||||
|
|
||||||
|
def run_interactive(self):
|
||||||
|
|
||||||
|
self.is_interactive = True
|
||||||
|
print("RAGFlow Admin command line interface - Type '\\?' for help, '\\q' to quit")
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
command = input("admin> ").strip()
|
||||||
|
if not command:
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"command: {command}")
|
||||||
|
result = self.parse_command(command)
|
||||||
|
self.execute_command(result)
|
||||||
|
|
||||||
|
if isinstance(result, Tree):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if result.get('type') == 'meta' and result.get('command') in ['q', 'quit', 'exit']:
|
||||||
|
break
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\nUse '\\q' to quit")
|
||||||
|
except EOFError:
|
||||||
|
print("\nGoodbye!")
|
||||||
|
break
|
||||||
|
|
||||||
|
def run_single_command(self, command: str):
|
||||||
|
result = self.parse_command(command)
|
||||||
|
self.execute_command(result)
|
||||||
|
|
||||||
|
def parse_connection_args(self, args: List[str]) -> Dict[str, Any]:
|
||||||
|
parser = argparse.ArgumentParser(description='Admin CLI Client', add_help=False)
|
||||||
|
parser.add_argument('-h', '--host', default='localhost', help='Admin service host')
|
||||||
|
parser.add_argument('-p', '--port', type=int, default=8080, help='Admin service port')
|
||||||
|
parser.add_argument('-w', '--password', default='admin', type=str, help='Superuser password')
|
||||||
|
parser.add_argument('command', nargs='?', help='Single command')
|
||||||
|
try:
|
||||||
|
parsed_args, remaining_args = parser.parse_known_args(args)
|
||||||
|
if remaining_args:
|
||||||
|
command = remaining_args[0]
|
||||||
|
return {
|
||||||
|
'host': parsed_args.host,
|
||||||
|
'port': parsed_args.port,
|
||||||
|
'password': parsed_args.password,
|
||||||
|
'command': command
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
'host': parsed_args.host,
|
||||||
|
'port': parsed_args.port,
|
||||||
|
}
|
||||||
|
except SystemExit:
|
||||||
|
return {'error': 'Invalid connection arguments'}
|
||||||
|
|
||||||
|
def execute_command(self, parsed_command: Dict[str, Any]):
|
||||||
|
|
||||||
|
command_dict: dict
|
||||||
|
if isinstance(parsed_command, Tree):
|
||||||
|
command_dict = parsed_command.children[0]
|
||||||
|
else:
|
||||||
|
if parsed_command['type'] == 'error':
|
||||||
|
print(f"Error: {parsed_command['message']}")
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
command_dict = parsed_command
|
||||||
|
|
||||||
|
# print(f"Parsed command: {command_dict}")
|
||||||
|
|
||||||
|
command_type = command_dict['type']
|
||||||
|
|
||||||
|
match command_type:
|
||||||
|
case 'list_services':
|
||||||
|
self._handle_list_services(command_dict)
|
||||||
|
case 'show_service':
|
||||||
|
self._handle_show_service(command_dict)
|
||||||
|
case 'restart_service':
|
||||||
|
self._handle_restart_service(command_dict)
|
||||||
|
case 'shutdown_service':
|
||||||
|
self._handle_shutdown_service(command_dict)
|
||||||
|
case 'startup_service':
|
||||||
|
self._handle_startup_service(command_dict)
|
||||||
|
case 'list_users':
|
||||||
|
self._handle_list_users(command_dict)
|
||||||
|
case 'show_user':
|
||||||
|
self._handle_show_user(command_dict)
|
||||||
|
case 'drop_user':
|
||||||
|
self._handle_drop_user(command_dict)
|
||||||
|
case 'alter_user':
|
||||||
|
self._handle_alter_user(command_dict)
|
||||||
|
case 'create_user':
|
||||||
|
self._handle_create_user(command_dict)
|
||||||
|
case 'activate_user':
|
||||||
|
self._handle_activate_user(command_dict)
|
||||||
|
case 'list_datasets':
|
||||||
|
self._handle_list_datasets(command_dict)
|
||||||
|
case 'list_agents':
|
||||||
|
self._handle_list_agents(command_dict)
|
||||||
|
case 'create_role':
|
||||||
|
self._create_role(command_dict)
|
||||||
|
case 'drop_role':
|
||||||
|
self._drop_role(command_dict)
|
||||||
|
case 'alter_role':
|
||||||
|
self._alter_role(command_dict)
|
||||||
|
case 'list_roles':
|
||||||
|
self._list_roles(command_dict)
|
||||||
|
case 'show_role':
|
||||||
|
self._show_role(command_dict)
|
||||||
|
case 'grant_permission':
|
||||||
|
self._grant_permission(command_dict)
|
||||||
|
case 'revoke_permission':
|
||||||
|
self._revoke_permission(command_dict)
|
||||||
|
case 'alter_user_role':
|
||||||
|
self._alter_user_role(command_dict)
|
||||||
|
case 'show_user_permission':
|
||||||
|
self._show_user_permission(command_dict)
|
||||||
|
case 'meta':
|
||||||
|
self._handle_meta_command(command_dict)
|
||||||
|
case _:
|
||||||
|
print(f"Command '{command_type}' would be executed with API")
|
||||||
|
|
||||||
|
def _handle_list_services(self, command):
|
||||||
|
print("Listing all services")
|
||||||
|
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/services'
|
||||||
|
response = self.session.get(url)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(f"Fail to get all services, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _handle_show_service(self, command):
|
||||||
|
service_id: int = command['number']
|
||||||
|
print(f"Showing service: {service_id}")
|
||||||
|
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/services/{service_id}'
|
||||||
|
response = self.session.get(url)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
res_data = res_json['data']
|
||||||
|
if 'status' in res_data and res_data['status'] == 'alive':
|
||||||
|
print(f"Service {res_data['service_name']} is alive, ")
|
||||||
|
if isinstance(res_data['message'], str):
|
||||||
|
print(res_data['message'])
|
||||||
|
else:
|
||||||
|
self._print_table_simple(res_data['message'])
|
||||||
|
else:
|
||||||
|
print(f"Service {res_data['service_name']} is down, {res_data['message']}")
|
||||||
|
else:
|
||||||
|
print(f"Fail to show service, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _handle_restart_service(self, command):
|
||||||
|
service_id: int = command['number']
|
||||||
|
print(f"Restart service {service_id}")
|
||||||
|
|
||||||
|
def _handle_shutdown_service(self, command):
|
||||||
|
service_id: int = command['number']
|
||||||
|
print(f"Shutdown service {service_id}")
|
||||||
|
|
||||||
|
def _handle_startup_service(self, command):
|
||||||
|
service_id: int = command['number']
|
||||||
|
print(f"Startup service {service_id}")
|
||||||
|
|
||||||
|
def _handle_list_users(self, command):
|
||||||
|
print("Listing all users")
|
||||||
|
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/users'
|
||||||
|
response = self.session.get(url)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(f"Fail to get all users, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _handle_show_user(self, command):
|
||||||
|
username_tree: Tree = command['user_name']
|
||||||
|
user_name: str = username_tree.children[0].strip("'\"")
|
||||||
|
print(f"Showing user: {user_name}")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}'
|
||||||
|
response = self.session.get(url)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(f"Fail to get user {user_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _handle_drop_user(self, command):
|
||||||
|
username_tree: Tree = command['user_name']
|
||||||
|
user_name: str = username_tree.children[0].strip("'\"")
|
||||||
|
print(f"Drop user: {user_name}")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}'
|
||||||
|
response = self.session.delete(url)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
print(res_json["message"])
|
||||||
|
else:
|
||||||
|
print(f"Fail to drop user, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _handle_alter_user(self, command):
|
||||||
|
user_name_tree: Tree = command['user_name']
|
||||||
|
user_name: str = user_name_tree.children[0].strip("'\"")
|
||||||
|
password_tree: Tree = command['password']
|
||||||
|
password: str = password_tree.children[0].strip("'\"")
|
||||||
|
print(f"Alter user: {user_name}, password: {password}")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/password'
|
||||||
|
response = self.session.put(url, json={'new_password': encrypt(password)})
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
print(res_json["message"])
|
||||||
|
else:
|
||||||
|
print(f"Fail to alter password, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _handle_create_user(self, command):
|
||||||
|
user_name_tree: Tree = command['user_name']
|
||||||
|
user_name: str = user_name_tree.children[0].strip("'\"")
|
||||||
|
password_tree: Tree = command['password']
|
||||||
|
password: str = password_tree.children[0].strip("'\"")
|
||||||
|
role: str = command['role']
|
||||||
|
print(f"Create user: {user_name}, password: {password}, role: {role}")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/users'
|
||||||
|
response = self.session.post(
|
||||||
|
url,
|
||||||
|
json={'user_name': user_name, 'password': encrypt(password), 'role': role}
|
||||||
|
)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(f"Fail to create user {user_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _handle_activate_user(self, command):
|
||||||
|
user_name_tree: Tree = command['user_name']
|
||||||
|
user_name: str = user_name_tree.children[0].strip("'\"")
|
||||||
|
activate_tree: Tree = command['activate_status']
|
||||||
|
activate_status: str = activate_tree.children[0].strip("'\"")
|
||||||
|
if activate_status.lower() in ['on', 'off']:
|
||||||
|
print(f"Alter user {user_name} activate status, turn {activate_status.lower()}.")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/activate'
|
||||||
|
response = self.session.put(url, json={'activate_status': activate_status})
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
print(res_json["message"])
|
||||||
|
else:
|
||||||
|
print(f"Fail to alter activate status, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
else:
|
||||||
|
print(f"Unknown activate status: {activate_status}.")
|
||||||
|
|
||||||
|
def _handle_list_datasets(self, command):
|
||||||
|
username_tree: Tree = command['user_name']
|
||||||
|
user_name: str = username_tree.children[0].strip("'\"")
|
||||||
|
print(f"Listing all datasets of user: {user_name}")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/datasets'
|
||||||
|
response = self.session.get(url)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(f"Fail to get all datasets of {user_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _handle_list_agents(self, command):
|
||||||
|
username_tree: Tree = command['user_name']
|
||||||
|
user_name: str = username_tree.children[0].strip("'\"")
|
||||||
|
print(f"Listing all agents of user: {user_name}")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/agents'
|
||||||
|
response = self.session.get(url)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(f"Fail to get all agents of {user_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _create_role(self, command):
|
||||||
|
role_name_tree: Tree = command['role_name']
|
||||||
|
role_name: str = role_name_tree.children[0].strip("'\"")
|
||||||
|
desc_str: str = ''
|
||||||
|
if 'description' in command:
|
||||||
|
desc_tree: Tree = command['description']
|
||||||
|
desc_str = desc_tree.children[0].strip("'\"")
|
||||||
|
|
||||||
|
print(f"create role name: {role_name}, description: {desc_str}")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/roles'
|
||||||
|
response = self.session.post(
|
||||||
|
url,
|
||||||
|
json={'role_name': role_name, 'description': desc_str}
|
||||||
|
)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(f"Fail to create role {role_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _drop_role(self, command):
|
||||||
|
role_name_tree: Tree = command['role_name']
|
||||||
|
role_name: str = role_name_tree.children[0].strip("'\"")
|
||||||
|
print(f"drop role name: {role_name}")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name}'
|
||||||
|
response = self.session.delete(url)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(f"Fail to drop role {role_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _alter_role(self, command):
|
||||||
|
role_name_tree: Tree = command['role_name']
|
||||||
|
role_name: str = role_name_tree.children[0].strip("'\"")
|
||||||
|
desc_tree: Tree = command['description']
|
||||||
|
desc_str: str = desc_tree.children[0].strip("'\"")
|
||||||
|
|
||||||
|
print(f"alter role name: {role_name}, description: {desc_str}")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name}'
|
||||||
|
response = self.session.put(
|
||||||
|
url,
|
||||||
|
json={'description': desc_str}
|
||||||
|
)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"Fail to update role {role_name} with description: {desc_str}, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _list_roles(self, command):
|
||||||
|
print("Listing all roles")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/roles'
|
||||||
|
response = self.session.get(url)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(f"Fail to list roles, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _show_role(self, command):
|
||||||
|
role_name_tree: Tree = command['role_name']
|
||||||
|
role_name: str = role_name_tree.children[0].strip("'\"")
|
||||||
|
print(f"show role: {role_name}")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name}/permission'
|
||||||
|
response = self.session.get(url)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(f"Fail to list roles, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _grant_permission(self, command):
|
||||||
|
role_name_tree: Tree = command['role_name']
|
||||||
|
role_name_str: str = role_name_tree.children[0].strip("'\"")
|
||||||
|
resource_tree: Tree = command['resource']
|
||||||
|
resource_str: str = resource_tree.children[0].strip("'\"")
|
||||||
|
action_tree_list: list = command['actions']
|
||||||
|
actions: list = []
|
||||||
|
for action_tree in action_tree_list:
|
||||||
|
action_str: str = action_tree.children[0].strip("'\"")
|
||||||
|
actions.append(action_str)
|
||||||
|
print(f"grant role_name: {role_name_str}, resource: {resource_str}, actions: {actions}")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name_str}/permission'
|
||||||
|
response = self.session.post(
|
||||||
|
url,
|
||||||
|
json={'actions': actions, 'resource': resource_str}
|
||||||
|
)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"Fail to grant role {role_name_str} with {actions} on {resource_str}, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _revoke_permission(self, command):
|
||||||
|
role_name_tree: Tree = command['role_name']
|
||||||
|
role_name_str: str = role_name_tree.children[0].strip("'\"")
|
||||||
|
resource_tree: Tree = command['resource']
|
||||||
|
resource_str: str = resource_tree.children[0].strip("'\"")
|
||||||
|
action_tree_list: list = command['actions']
|
||||||
|
actions: list = []
|
||||||
|
for action_tree in action_tree_list:
|
||||||
|
action_str: str = action_tree.children[0].strip("'\"")
|
||||||
|
actions.append(action_str)
|
||||||
|
print(f"revoke role_name: {role_name_str}, resource: {resource_str}, actions: {actions}")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name_str}/permission'
|
||||||
|
response = self.session.delete(
|
||||||
|
url,
|
||||||
|
json={'actions': actions, 'resource': resource_str}
|
||||||
|
)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"Fail to revoke role {role_name_str} with {actions} on {resource_str}, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _alter_user_role(self, command):
|
||||||
|
role_name_tree: Tree = command['role_name']
|
||||||
|
role_name_str: str = role_name_tree.children[0].strip("'\"")
|
||||||
|
user_name_tree: Tree = command['user_name']
|
||||||
|
user_name_str: str = user_name_tree.children[0].strip("'\"")
|
||||||
|
print(f"alter_user_role user_name: {user_name_str}, role_name: {role_name_str}")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name_str}/role'
|
||||||
|
response = self.session.put(
|
||||||
|
url,
|
||||||
|
json={'role_name': role_name_str}
|
||||||
|
)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"Fail to alter user: {user_name_str} to role {role_name_str}, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _show_user_permission(self, command):
|
||||||
|
user_name_tree: Tree = command['user_name']
|
||||||
|
user_name_str: str = user_name_tree.children[0].strip("'\"")
|
||||||
|
print(f"show_user_permission user_name: {user_name_str}")
|
||||||
|
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name_str}/permission'
|
||||||
|
response = self.session.get(url)
|
||||||
|
res_json = response.json()
|
||||||
|
if response.status_code == 200:
|
||||||
|
self._print_table_simple(res_json['data'])
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"Fail to show user: {user_name_str} permission, code: {res_json['code']}, message: {res_json['message']}")
|
||||||
|
|
||||||
|
def _handle_meta_command(self, command):
|
||||||
|
meta_command = command['command']
|
||||||
|
args = command.get('args', [])
|
||||||
|
|
||||||
|
if meta_command in ['?', 'h', 'help']:
|
||||||
|
self.show_help()
|
||||||
|
elif meta_command in ['q', 'quit', 'exit']:
|
||||||
|
print("Goodbye!")
|
||||||
|
else:
|
||||||
|
print(f"Meta command '{meta_command}' with args {args}")
|
||||||
|
|
||||||
|
def show_help(self):
|
||||||
|
"""Help info"""
|
||||||
|
help_text = """
|
||||||
|
Commands:
|
||||||
|
LIST SERVICES
|
||||||
|
SHOW SERVICE <service>
|
||||||
|
STARTUP SERVICE <service>
|
||||||
|
SHUTDOWN SERVICE <service>
|
||||||
|
RESTART SERVICE <service>
|
||||||
|
LIST USERS
|
||||||
|
SHOW USER <user>
|
||||||
|
DROP USER <user>
|
||||||
|
CREATE USER <user> <password>
|
||||||
|
ALTER USER PASSWORD <user> <new_password>
|
||||||
|
ALTER USER ACTIVE <user> <on/off>
|
||||||
|
LIST DATASETS OF <user>
|
||||||
|
LIST AGENTS OF <user>
|
||||||
|
|
||||||
|
Meta Commands:
|
||||||
|
\\?, \\h, \\help Show this help
|
||||||
|
\\q, \\quit, \\exit Quit the CLI
|
||||||
|
"""
|
||||||
|
print(help_text)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
import sys
|
||||||
|
|
||||||
|
cli = AdminCLI()
|
||||||
|
|
||||||
|
args = cli.parse_connection_args(sys.argv)
|
||||||
|
if 'error' in args:
|
||||||
|
print(f"Error: {args['error']}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if 'command' in args:
|
||||||
|
if 'password' not in args:
|
||||||
|
print("Error: password is missing")
|
||||||
|
return
|
||||||
|
if cli.verify_admin(args, single_command=True):
|
||||||
|
command: str = args['command']
|
||||||
|
print(f"Run single command: {command}")
|
||||||
|
cli.run_single_command(command)
|
||||||
|
else:
|
||||||
|
if cli.verify_admin(args, single_command=False):
|
||||||
|
print(r"""
|
||||||
|
____ ___ ______________ ___ __ _
|
||||||
|
/ __ \/ | / ____/ ____/ /___ _ __ / | ____/ /___ ___ (_)___
|
||||||
|
/ /_/ / /| |/ / __/ /_ / / __ \ | /| / / / /| |/ __ / __ `__ \/ / __ \
|
||||||
|
/ _, _/ ___ / /_/ / __/ / / /_/ / |/ |/ / / ___ / /_/ / / / / / / / / / /
|
||||||
|
/_/ |_/_/ |_\____/_/ /_/\____/|__/|__/ /_/ |_\__,_/_/ /_/ /_/_/_/ /_/
|
||||||
|
""")
|
||||||
|
cli.cmdloop()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
24
admin/client/pyproject.toml
Normal file
24
admin/client/pyproject.toml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
[project]
|
||||||
|
name = "ragflow-cli"
|
||||||
|
version = "0.21.1"
|
||||||
|
description = "Admin Service's client of [RAGFlow](https://github.com/infiniflow/ragflow). The Admin Service provides user management and system monitoring. "
|
||||||
|
authors = [{ name = "Lynn", email = "lynn_inf@hotmail.com" }]
|
||||||
|
license = { text = "Apache License, Version 2.0" }
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.10,<3.13"
|
||||||
|
dependencies = [
|
||||||
|
"requests>=2.30.0,<3.0.0",
|
||||||
|
"beartype>=0.18.5,<0.19.0",
|
||||||
|
"pycryptodomex>=3.10.0",
|
||||||
|
"lark>=1.1.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[dependency-groups]
|
||||||
|
test = [
|
||||||
|
"pytest>=8.3.5",
|
||||||
|
"requests>=2.32.3",
|
||||||
|
"requests-toolbelt>=1.0.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
ragflow-cli = "admin_client:main"
|
||||||
@ -1,15 +0,0 @@
|
|||||||
from flask import jsonify
|
|
||||||
|
|
||||||
def success_response(data=None, message="Success", code = 0):
|
|
||||||
return jsonify({
|
|
||||||
"code": code,
|
|
||||||
"message": message,
|
|
||||||
"data": data
|
|
||||||
}), 200
|
|
||||||
|
|
||||||
def error_response(message="Error", code=-1, data=None):
|
|
||||||
return jsonify({
|
|
||||||
"code": code,
|
|
||||||
"message": message,
|
|
||||||
"data": data
|
|
||||||
}), 400
|
|
||||||
187
admin/routes.py
187
admin/routes.py
@ -1,187 +0,0 @@
|
|||||||
from flask import Blueprint, request
|
|
||||||
|
|
||||||
from auth import login_verify
|
|
||||||
from responses import success_response, error_response
|
|
||||||
from services import UserMgr, ServiceMgr, UserServiceMgr
|
|
||||||
from exceptions import AdminException
|
|
||||||
|
|
||||||
admin_bp = Blueprint('admin', __name__, url_prefix='/api/v1/admin')
|
|
||||||
|
|
||||||
|
|
||||||
@admin_bp.route('/auth', methods=['GET'])
|
|
||||||
@login_verify
|
|
||||||
def auth_admin():
|
|
||||||
try:
|
|
||||||
return success_response(None, "Admin is authorized", 0)
|
|
||||||
except Exception as e:
|
|
||||||
return error_response(str(e), 500)
|
|
||||||
|
|
||||||
|
|
||||||
@admin_bp.route('/users', methods=['GET'])
|
|
||||||
@login_verify
|
|
||||||
def list_users():
|
|
||||||
try:
|
|
||||||
users = UserMgr.get_all_users()
|
|
||||||
return success_response(users, "Get all users", 0)
|
|
||||||
except Exception as e:
|
|
||||||
return error_response(str(e), 500)
|
|
||||||
|
|
||||||
|
|
||||||
@admin_bp.route('/users', methods=['POST'])
|
|
||||||
@login_verify
|
|
||||||
def create_user():
|
|
||||||
try:
|
|
||||||
data = request.get_json()
|
|
||||||
if not data or 'username' not in data or 'password' not in data:
|
|
||||||
return error_response("Username and password are required", 400)
|
|
||||||
|
|
||||||
username = data['username']
|
|
||||||
password = data['password']
|
|
||||||
role = data.get('role', 'user')
|
|
||||||
|
|
||||||
res = UserMgr.create_user(username, password, role)
|
|
||||||
if res["success"]:
|
|
||||||
user_info = res["user_info"]
|
|
||||||
user_info.pop("password") # do not return password
|
|
||||||
return success_response(user_info, "User created successfully")
|
|
||||||
else:
|
|
||||||
return error_response("create user failed")
|
|
||||||
|
|
||||||
except AdminException as e:
|
|
||||||
return error_response(e.message, e.code)
|
|
||||||
except Exception as e:
|
|
||||||
return error_response(str(e))
|
|
||||||
|
|
||||||
|
|
||||||
@admin_bp.route('/users/<username>', methods=['DELETE'])
|
|
||||||
@login_verify
|
|
||||||
def delete_user(username):
|
|
||||||
try:
|
|
||||||
UserMgr.delete_user(username)
|
|
||||||
return success_response(None, "User and all data deleted successfully")
|
|
||||||
|
|
||||||
except AdminException as e:
|
|
||||||
return error_response(e.message, e.code)
|
|
||||||
except Exception as e:
|
|
||||||
return error_response(str(e), 500)
|
|
||||||
|
|
||||||
|
|
||||||
@admin_bp.route('/users/<username>/password', methods=['PUT'])
|
|
||||||
@login_verify
|
|
||||||
def change_password(username):
|
|
||||||
try:
|
|
||||||
data = request.get_json()
|
|
||||||
if not data or 'new_password' not in data:
|
|
||||||
return error_response("New password is required", 400)
|
|
||||||
|
|
||||||
new_password = data['new_password']
|
|
||||||
msg = UserMgr.update_user_password(username, new_password)
|
|
||||||
return success_response(None, msg)
|
|
||||||
|
|
||||||
except AdminException as e:
|
|
||||||
return error_response(e.message, e.code)
|
|
||||||
except Exception as e:
|
|
||||||
return error_response(str(e), 500)
|
|
||||||
|
|
||||||
|
|
||||||
@admin_bp.route('/users/<username>/activate', methods=['PUT'])
|
|
||||||
@login_verify
|
|
||||||
def alter_user_activate_status(username):
|
|
||||||
try:
|
|
||||||
data = request.get_json()
|
|
||||||
if not data or 'activate_status' not in data:
|
|
||||||
return error_response("Activation status is required", 400)
|
|
||||||
activate_status = data['activate_status']
|
|
||||||
msg = UserMgr.update_user_activate_status(username, activate_status)
|
|
||||||
return success_response(None, msg)
|
|
||||||
except AdminException as e:
|
|
||||||
return error_response(e.message, e.code)
|
|
||||||
except Exception as e:
|
|
||||||
return error_response(str(e), 500)
|
|
||||||
|
|
||||||
@admin_bp.route('/users/<username>', methods=['GET'])
|
|
||||||
@login_verify
|
|
||||||
def get_user_details(username):
|
|
||||||
try:
|
|
||||||
user_details = UserMgr.get_user_details(username)
|
|
||||||
return success_response(user_details)
|
|
||||||
|
|
||||||
except AdminException as e:
|
|
||||||
return error_response(e.message, e.code)
|
|
||||||
except Exception as e:
|
|
||||||
return error_response(str(e), 500)
|
|
||||||
|
|
||||||
@admin_bp.route('/users/<username>/datasets', methods=['GET'])
|
|
||||||
@login_verify
|
|
||||||
def get_user_datasets(username):
|
|
||||||
try:
|
|
||||||
datasets_list = UserServiceMgr.get_user_datasets(username)
|
|
||||||
return success_response(datasets_list)
|
|
||||||
|
|
||||||
except AdminException as e:
|
|
||||||
return error_response(e.message, e.code)
|
|
||||||
except Exception as e:
|
|
||||||
return error_response(str(e), 500)
|
|
||||||
|
|
||||||
|
|
||||||
@admin_bp.route('/users/<username>/agents', methods=['GET'])
|
|
||||||
@login_verify
|
|
||||||
def get_user_agents(username):
|
|
||||||
try:
|
|
||||||
agents_list = UserServiceMgr.get_user_agents(username)
|
|
||||||
return success_response(agents_list)
|
|
||||||
|
|
||||||
except AdminException as e:
|
|
||||||
return error_response(e.message, e.code)
|
|
||||||
except Exception as e:
|
|
||||||
return error_response(str(e), 500)
|
|
||||||
|
|
||||||
|
|
||||||
@admin_bp.route('/services', methods=['GET'])
|
|
||||||
@login_verify
|
|
||||||
def get_services():
|
|
||||||
try:
|
|
||||||
services = ServiceMgr.get_all_services()
|
|
||||||
return success_response(services, "Get all services", 0)
|
|
||||||
except Exception as e:
|
|
||||||
return error_response(str(e), 500)
|
|
||||||
|
|
||||||
|
|
||||||
@admin_bp.route('/service_types/<service_type>', methods=['GET'])
|
|
||||||
@login_verify
|
|
||||||
def get_services_by_type(service_type_str):
|
|
||||||
try:
|
|
||||||
services = ServiceMgr.get_services_by_type(service_type_str)
|
|
||||||
return success_response(services)
|
|
||||||
except Exception as e:
|
|
||||||
return error_response(str(e), 500)
|
|
||||||
|
|
||||||
|
|
||||||
@admin_bp.route('/services/<service_id>', methods=['GET'])
|
|
||||||
@login_verify
|
|
||||||
def get_service(service_id):
|
|
||||||
try:
|
|
||||||
services = ServiceMgr.get_service_details(service_id)
|
|
||||||
return success_response(services)
|
|
||||||
except Exception as e:
|
|
||||||
return error_response(str(e), 500)
|
|
||||||
|
|
||||||
|
|
||||||
@admin_bp.route('/services/<service_id>', methods=['DELETE'])
|
|
||||||
@login_verify
|
|
||||||
def shutdown_service(service_id):
|
|
||||||
try:
|
|
||||||
services = ServiceMgr.shutdown_service(service_id)
|
|
||||||
return success_response(services)
|
|
||||||
except Exception as e:
|
|
||||||
return error_response(str(e), 500)
|
|
||||||
|
|
||||||
|
|
||||||
@admin_bp.route('/services/<service_id>', methods=['PUT'])
|
|
||||||
@login_verify
|
|
||||||
def restart_service(service_id):
|
|
||||||
try:
|
|
||||||
services = ServiceMgr.restart_service(service_id)
|
|
||||||
return success_response(services)
|
|
||||||
except Exception as e:
|
|
||||||
return error_response(str(e), 500)
|
|
||||||
@ -1,3 +1,18 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
@ -12,6 +27,9 @@ from api.utils.log_utils import init_root_logger
|
|||||||
from api.constants import SERVICE_CONF
|
from api.constants import SERVICE_CONF
|
||||||
from api import settings
|
from api import settings
|
||||||
from config import load_configurations, SERVICE_CONFIGS
|
from config import load_configurations, SERVICE_CONFIGS
|
||||||
|
from auth import init_default_admin, setup_auth
|
||||||
|
from flask_session import Session
|
||||||
|
from flask_login import LoginManager
|
||||||
|
|
||||||
stop_event = threading.Event()
|
stop_event = threading.Event()
|
||||||
|
|
||||||
@ -27,7 +45,17 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
app.register_blueprint(admin_bp)
|
app.register_blueprint(admin_bp)
|
||||||
|
app.config["SESSION_PERMANENT"] = False
|
||||||
|
app.config["SESSION_TYPE"] = "filesystem"
|
||||||
|
app.config["MAX_CONTENT_LENGTH"] = int(
|
||||||
|
os.environ.get("MAX_CONTENT_LENGTH", 1024 * 1024 * 1024)
|
||||||
|
)
|
||||||
|
Session(app)
|
||||||
|
login_manager = LoginManager()
|
||||||
|
login_manager.init_app(app)
|
||||||
settings.init_settings()
|
settings.init_settings()
|
||||||
|
setup_auth(login_manager)
|
||||||
|
init_default_admin()
|
||||||
SERVICE_CONFIGS.configs = load_configurations(SERVICE_CONF)
|
SERVICE_CONFIGS.configs = load_configurations(SERVICE_CONF)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
193
admin/server/auth.py
Normal file
193
admin/server/auth.py
Normal file
@ -0,0 +1,193 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import uuid
|
||||||
|
from functools import wraps
|
||||||
|
from datetime import datetime
|
||||||
|
from flask import request, jsonify
|
||||||
|
from flask_login import current_user, login_user
|
||||||
|
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
||||||
|
|
||||||
|
from api import settings
|
||||||
|
from api.common.exceptions import AdminException, UserNotFoundError
|
||||||
|
from api.db.init_data import encode_to_base64
|
||||||
|
from api.db.services import UserService
|
||||||
|
from api.db import ActiveEnum, StatusEnum
|
||||||
|
from api.utils.crypt import decrypt
|
||||||
|
from api.utils import (
|
||||||
|
current_timestamp,
|
||||||
|
datetime_format,
|
||||||
|
get_format_time,
|
||||||
|
get_uuid,
|
||||||
|
)
|
||||||
|
from api.utils.api_utils import (
|
||||||
|
construct_response,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def setup_auth(login_manager):
|
||||||
|
@login_manager.request_loader
|
||||||
|
def load_user(web_request):
|
||||||
|
jwt = Serializer(secret_key=settings.SECRET_KEY)
|
||||||
|
authorization = web_request.headers.get("Authorization")
|
||||||
|
if authorization:
|
||||||
|
try:
|
||||||
|
access_token = str(jwt.loads(authorization))
|
||||||
|
|
||||||
|
if not access_token or not access_token.strip():
|
||||||
|
logging.warning("Authentication attempt with empty access token")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Access tokens should be UUIDs (32 hex characters)
|
||||||
|
if len(access_token.strip()) < 32:
|
||||||
|
logging.warning(f"Authentication attempt with invalid token format: {len(access_token)} chars")
|
||||||
|
return None
|
||||||
|
|
||||||
|
user = UserService.query(
|
||||||
|
access_token=access_token, status=StatusEnum.VALID.value
|
||||||
|
)
|
||||||
|
if user:
|
||||||
|
if not user[0].access_token or not user[0].access_token.strip():
|
||||||
|
logging.warning(f"User {user[0].email} has empty access_token in database")
|
||||||
|
return None
|
||||||
|
return user[0]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"load_user got exception {e}")
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def init_default_admin():
|
||||||
|
# Verify that at least one active admin user exists. If not, create a default one.
|
||||||
|
users = UserService.query(is_superuser=True)
|
||||||
|
if not users:
|
||||||
|
default_admin = {
|
||||||
|
"id": uuid.uuid1().hex,
|
||||||
|
"password": encode_to_base64("admin"),
|
||||||
|
"nickname": "admin",
|
||||||
|
"is_superuser": True,
|
||||||
|
"email": "admin@ragflow.io",
|
||||||
|
"creator": "system",
|
||||||
|
"status": "1",
|
||||||
|
}
|
||||||
|
if not UserService.save(**default_admin):
|
||||||
|
raise AdminException("Can't init admin.", 500)
|
||||||
|
elif not any([u.is_active == ActiveEnum.ACTIVE.value for u in users]):
|
||||||
|
raise AdminException("No active admin. Please update 'is_active' in db manually.", 500)
|
||||||
|
|
||||||
|
|
||||||
|
def check_admin_auth(func):
|
||||||
|
@wraps(func)
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
user = UserService.filter_by_id(current_user.id)
|
||||||
|
if not user:
|
||||||
|
raise UserNotFoundError(current_user.email)
|
||||||
|
if not user.is_superuser:
|
||||||
|
raise AdminException("Not admin", 403)
|
||||||
|
if user.is_active == ActiveEnum.INACTIVE.value:
|
||||||
|
raise AdminException(f"User {current_user.email} inactive", 403)
|
||||||
|
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
def login_admin(email: str, password: str):
|
||||||
|
"""
|
||||||
|
:param email: admin email
|
||||||
|
:param password: string before decrypt
|
||||||
|
"""
|
||||||
|
users = UserService.query(email=email)
|
||||||
|
if not users:
|
||||||
|
raise UserNotFoundError(email)
|
||||||
|
psw = decrypt(password)
|
||||||
|
user = UserService.query_user(email, psw)
|
||||||
|
if not user:
|
||||||
|
raise AdminException("Email and password do not match!")
|
||||||
|
if not user.is_superuser:
|
||||||
|
raise AdminException("Not admin", 403)
|
||||||
|
if user.is_active == ActiveEnum.INACTIVE.value:
|
||||||
|
raise AdminException(f"User {email} inactive", 403)
|
||||||
|
|
||||||
|
resp = user.to_json()
|
||||||
|
user.access_token = get_uuid()
|
||||||
|
login_user(user)
|
||||||
|
user.update_time = (current_timestamp(),)
|
||||||
|
user.update_date = (datetime_format(datetime.now()),)
|
||||||
|
user.last_login_time = get_format_time()
|
||||||
|
user.save()
|
||||||
|
msg = "Welcome back!"
|
||||||
|
return construct_response(data=resp, auth=user.get_id(), message=msg)
|
||||||
|
|
||||||
|
|
||||||
|
def check_admin(username: str, password: str):
|
||||||
|
users = UserService.query(email=username)
|
||||||
|
if not users:
|
||||||
|
logging.info(f"Username: {username} is not registered!")
|
||||||
|
user_info = {
|
||||||
|
"id": uuid.uuid1().hex,
|
||||||
|
"password": encode_to_base64("admin"),
|
||||||
|
"nickname": "admin",
|
||||||
|
"is_superuser": True,
|
||||||
|
"email": "admin@ragflow.io",
|
||||||
|
"creator": "system",
|
||||||
|
"status": "1",
|
||||||
|
}
|
||||||
|
if not UserService.save(**user_info):
|
||||||
|
raise AdminException("Can't init admin.", 500)
|
||||||
|
|
||||||
|
user = UserService.query_user(username, password)
|
||||||
|
if user:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def login_verify(f):
|
||||||
|
@wraps(f)
|
||||||
|
def decorated(*args, **kwargs):
|
||||||
|
auth = request.authorization
|
||||||
|
if not auth or 'username' not in auth.parameters or 'password' not in auth.parameters:
|
||||||
|
return jsonify({
|
||||||
|
"code": 401,
|
||||||
|
"message": "Authentication required",
|
||||||
|
"data": None
|
||||||
|
}), 200
|
||||||
|
|
||||||
|
username = auth.parameters['username']
|
||||||
|
password = auth.parameters['password']
|
||||||
|
try:
|
||||||
|
if check_admin(username, password) is False:
|
||||||
|
return jsonify({
|
||||||
|
"code": 500,
|
||||||
|
"message": "Access denied",
|
||||||
|
"data": None
|
||||||
|
}), 200
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = str(e)
|
||||||
|
return jsonify({
|
||||||
|
"code": 500,
|
||||||
|
"message": error_msg
|
||||||
|
}), 200
|
||||||
|
|
||||||
|
return f(*args, **kwargs)
|
||||||
|
|
||||||
|
return decorated
|
||||||
@ -1,14 +1,33 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from api.utils import read_config
|
from api.utils.configs import read_config
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
|
||||||
class ServiceConfigs:
|
class ServiceConfigs:
|
||||||
|
configs = dict
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.configs = []
|
self.configs = []
|
||||||
self.lock = threading.Lock()
|
self.lock = threading.Lock()
|
||||||
@ -32,9 +51,11 @@ class BaseConfig(BaseModel):
|
|||||||
host: str
|
host: str
|
||||||
port: int
|
port: int
|
||||||
service_type: str
|
service_type: str
|
||||||
|
detail_func_name: str
|
||||||
|
|
||||||
def to_dict(self) -> dict[str, Any]:
|
def to_dict(self) -> dict[str, Any]:
|
||||||
return {'id': self.id, 'name': self.name, 'host': self.host, 'port': self.port, 'service_type': self.service_type}
|
return {'id': self.id, 'name': self.name, 'host': self.host, 'port': self.port,
|
||||||
|
'service_type': self.service_type}
|
||||||
|
|
||||||
|
|
||||||
class MetaConfig(BaseConfig):
|
class MetaConfig(BaseConfig):
|
||||||
@ -209,7 +230,9 @@ def load_configurations(config_path: str) -> list[BaseConfig]:
|
|||||||
name: str = f'ragflow_{ragflow_count}'
|
name: str = f'ragflow_{ragflow_count}'
|
||||||
host: str = v['host']
|
host: str = v['host']
|
||||||
http_port: int = v['http_port']
|
http_port: int = v['http_port']
|
||||||
config = RAGFlowServerConfig(id=id_count, name=name, host=host, port=http_port, service_type="ragflow_server")
|
config = RAGFlowServerConfig(id=id_count, name=name, host=host, port=http_port,
|
||||||
|
service_type="ragflow_server",
|
||||||
|
detail_func_name="check_ragflow_server_alive")
|
||||||
configurations.append(config)
|
configurations.append(config)
|
||||||
id_count += 1
|
id_count += 1
|
||||||
case "es":
|
case "es":
|
||||||
@ -222,7 +245,8 @@ def load_configurations(config_path: str) -> list[BaseConfig]:
|
|||||||
password: str = v.get('password')
|
password: str = v.get('password')
|
||||||
config = ElasticsearchConfig(id=id_count, name=name, host=host, port=port, service_type="retrieval",
|
config = ElasticsearchConfig(id=id_count, name=name, host=host, port=port, service_type="retrieval",
|
||||||
retrieval_type="elasticsearch",
|
retrieval_type="elasticsearch",
|
||||||
username=username, password=password)
|
username=username, password=password,
|
||||||
|
detail_func_name="get_es_cluster_stats")
|
||||||
configurations.append(config)
|
configurations.append(config)
|
||||||
id_count += 1
|
id_count += 1
|
||||||
|
|
||||||
@ -233,8 +257,9 @@ def load_configurations(config_path: str) -> list[BaseConfig]:
|
|||||||
host = parts[0]
|
host = parts[0]
|
||||||
port = int(parts[1])
|
port = int(parts[1])
|
||||||
database: str = v.get('db_name', 'default_db')
|
database: str = v.get('db_name', 'default_db')
|
||||||
config = InfinityConfig(id=id_count, name=name, host=host, port=port, service_type="retrieval", retrieval_type="infinity",
|
config = InfinityConfig(id=id_count, name=name, host=host, port=port, service_type="retrieval",
|
||||||
db_name=database)
|
retrieval_type="infinity",
|
||||||
|
db_name=database, detail_func_name="get_infinity_status")
|
||||||
configurations.append(config)
|
configurations.append(config)
|
||||||
id_count += 1
|
id_count += 1
|
||||||
case "minio":
|
case "minio":
|
||||||
@ -245,8 +270,9 @@ def load_configurations(config_path: str) -> list[BaseConfig]:
|
|||||||
port = int(parts[1])
|
port = int(parts[1])
|
||||||
user = v.get('user')
|
user = v.get('user')
|
||||||
password = v.get('password')
|
password = v.get('password')
|
||||||
config = MinioConfig(id=id_count, name=name, host=host, port=port, user=user, password=password, service_type="file_store",
|
config = MinioConfig(id=id_count, name=name, host=host, port=port, user=user, password=password,
|
||||||
store_type="minio")
|
service_type="file_store",
|
||||||
|
store_type="minio", detail_func_name="check_minio_alive")
|
||||||
configurations.append(config)
|
configurations.append(config)
|
||||||
id_count += 1
|
id_count += 1
|
||||||
case "redis":
|
case "redis":
|
||||||
@ -258,7 +284,7 @@ def load_configurations(config_path: str) -> list[BaseConfig]:
|
|||||||
password = v.get('password')
|
password = v.get('password')
|
||||||
db: int = v.get('db')
|
db: int = v.get('db')
|
||||||
config = RedisConfig(id=id_count, name=name, host=host, port=port, password=password, database=db,
|
config = RedisConfig(id=id_count, name=name, host=host, port=port, password=password, database=db,
|
||||||
service_type="message_queue", mq_type="redis")
|
service_type="message_queue", mq_type="redis", detail_func_name="get_redis_info")
|
||||||
configurations.append(config)
|
configurations.append(config)
|
||||||
id_count += 1
|
id_count += 1
|
||||||
case "mysql":
|
case "mysql":
|
||||||
@ -268,7 +294,7 @@ def load_configurations(config_path: str) -> list[BaseConfig]:
|
|||||||
username = v.get('user')
|
username = v.get('user')
|
||||||
password = v.get('password')
|
password = v.get('password')
|
||||||
config = MySQLConfig(id=id_count, name=name, host=host, port=port, username=username, password=password,
|
config = MySQLConfig(id=id_count, name=name, host=host, port=port, username=username, password=password,
|
||||||
service_type="meta_data", meta_type="mysql")
|
service_type="meta_data", meta_type="mysql", detail_func_name="get_mysql_status")
|
||||||
configurations.append(config)
|
configurations.append(config)
|
||||||
id_count += 1
|
id_count += 1
|
||||||
case "admin":
|
case "admin":
|
||||||
15
admin/server/models.py
Normal file
15
admin/server/models.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
34
admin/server/responses.py
Normal file
34
admin/server/responses.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
from flask import jsonify
|
||||||
|
|
||||||
|
|
||||||
|
def success_response(data=None, message="Success", code=0):
|
||||||
|
return jsonify({
|
||||||
|
"code": code,
|
||||||
|
"message": message,
|
||||||
|
"data": data
|
||||||
|
}), 200
|
||||||
|
|
||||||
|
|
||||||
|
def error_response(message="Error", code=-1, data=None):
|
||||||
|
return jsonify({
|
||||||
|
"code": code,
|
||||||
|
"message": message,
|
||||||
|
"data": data
|
||||||
|
}), 400
|
||||||
76
admin/server/roles.py
Normal file
76
admin/server/roles.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
from api.common.exceptions import AdminException
|
||||||
|
|
||||||
|
|
||||||
|
class RoleMgr:
|
||||||
|
@staticmethod
|
||||||
|
def create_role(role_name: str, description: str):
|
||||||
|
error_msg = f"not implement: create role: {role_name}, description: {description}"
|
||||||
|
logging.error(error_msg)
|
||||||
|
raise AdminException(error_msg)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def update_role_description(role_name: str, description: str) -> Dict[str, Any]:
|
||||||
|
error_msg = f"not implement: update role: {role_name} with description: {description}"
|
||||||
|
logging.error(error_msg)
|
||||||
|
raise AdminException(error_msg)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def delete_role(role_name: str) -> Dict[str, Any]:
|
||||||
|
error_msg = f"not implement: drop role: {role_name}"
|
||||||
|
logging.error(error_msg)
|
||||||
|
raise AdminException(error_msg)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def list_roles() -> Dict[str, Any]:
|
||||||
|
error_msg = "not implement: list roles"
|
||||||
|
logging.error(error_msg)
|
||||||
|
raise AdminException(error_msg)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_role_permission(role_name: str) -> Dict[str, Any]:
|
||||||
|
error_msg = f"not implement: show role {role_name}"
|
||||||
|
logging.error(error_msg)
|
||||||
|
raise AdminException(error_msg)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def grant_role_permission(role_name: str, actions: list, resource: str) -> Dict[str, Any]:
|
||||||
|
error_msg = f"not implement: grant role {role_name} actions: {actions} on {resource}"
|
||||||
|
logging.error(error_msg)
|
||||||
|
raise AdminException(error_msg)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def revoke_role_permission(role_name: str, actions: list, resource: str) -> Dict[str, Any]:
|
||||||
|
error_msg = f"not implement: revoke role {role_name} actions: {actions} on {resource}"
|
||||||
|
logging.error(error_msg)
|
||||||
|
raise AdminException(error_msg)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def update_user_role(user_name: str, role_name: str) -> Dict[str, Any]:
|
||||||
|
error_msg = f"not implement: update user role: {user_name} to role {role_name}"
|
||||||
|
logging.error(error_msg)
|
||||||
|
raise AdminException(error_msg)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_user_permission(user_name: str) -> Dict[str, Any]:
|
||||||
|
error_msg = f"not implement: get user permission: {user_name}"
|
||||||
|
logging.error(error_msg)
|
||||||
|
raise AdminException(error_msg)
|
||||||
371
admin/server/routes.py
Normal file
371
admin/server/routes.py
Normal file
@ -0,0 +1,371 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
import secrets
|
||||||
|
|
||||||
|
from flask import Blueprint, request
|
||||||
|
from flask_login import current_user, logout_user, login_required
|
||||||
|
|
||||||
|
from auth import login_verify, login_admin, check_admin_auth
|
||||||
|
from responses import success_response, error_response
|
||||||
|
from services import UserMgr, ServiceMgr, UserServiceMgr
|
||||||
|
from roles import RoleMgr
|
||||||
|
from api.common.exceptions import AdminException
|
||||||
|
|
||||||
|
admin_bp = Blueprint('admin', __name__, url_prefix='/api/v1/admin')
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/login', methods=['POST'])
|
||||||
|
def login():
|
||||||
|
if not request.json:
|
||||||
|
return error_response('Authorize admin failed.' ,400)
|
||||||
|
try:
|
||||||
|
email = request.json.get("email", "")
|
||||||
|
password = request.json.get("password", "")
|
||||||
|
return login_admin(email, password)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/logout', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
def logout():
|
||||||
|
try:
|
||||||
|
current_user.access_token = f"INVALID_{secrets.token_hex(16)}"
|
||||||
|
current_user.save()
|
||||||
|
logout_user()
|
||||||
|
return success_response(True)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/auth', methods=['GET'])
|
||||||
|
@login_verify
|
||||||
|
def auth_admin():
|
||||||
|
try:
|
||||||
|
return success_response(None, "Admin is authorized", 0)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/users', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def list_users():
|
||||||
|
try:
|
||||||
|
users = UserMgr.get_all_users()
|
||||||
|
return success_response(users, "Get all users", 0)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/users', methods=['POST'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def create_user():
|
||||||
|
try:
|
||||||
|
data = request.get_json()
|
||||||
|
if not data or 'username' not in data or 'password' not in data:
|
||||||
|
return error_response("Username and password are required", 400)
|
||||||
|
|
||||||
|
username = data['username']
|
||||||
|
password = data['password']
|
||||||
|
role = data.get('role', 'user')
|
||||||
|
|
||||||
|
res = UserMgr.create_user(username, password, role)
|
||||||
|
if res["success"]:
|
||||||
|
user_info = res["user_info"]
|
||||||
|
user_info.pop("password") # do not return password
|
||||||
|
return success_response(user_info, "User created successfully")
|
||||||
|
else:
|
||||||
|
return error_response("create user failed")
|
||||||
|
|
||||||
|
except AdminException as e:
|
||||||
|
return error_response(e.message, e.code)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/users/<username>', methods=['DELETE'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def delete_user(username):
|
||||||
|
try:
|
||||||
|
res = UserMgr.delete_user(username)
|
||||||
|
if res["success"]:
|
||||||
|
return success_response(None, res["message"])
|
||||||
|
else:
|
||||||
|
return error_response(res["message"])
|
||||||
|
|
||||||
|
except AdminException as e:
|
||||||
|
return error_response(e.message, e.code)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/users/<username>/password', methods=['PUT'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def change_password(username):
|
||||||
|
try:
|
||||||
|
data = request.get_json()
|
||||||
|
if not data or 'new_password' not in data:
|
||||||
|
return error_response("New password is required", 400)
|
||||||
|
|
||||||
|
new_password = data['new_password']
|
||||||
|
msg = UserMgr.update_user_password(username, new_password)
|
||||||
|
return success_response(None, msg)
|
||||||
|
|
||||||
|
except AdminException as e:
|
||||||
|
return error_response(e.message, e.code)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/users/<username>/activate', methods=['PUT'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def alter_user_activate_status(username):
|
||||||
|
try:
|
||||||
|
data = request.get_json()
|
||||||
|
if not data or 'activate_status' not in data:
|
||||||
|
return error_response("Activation status is required", 400)
|
||||||
|
activate_status = data['activate_status']
|
||||||
|
msg = UserMgr.update_user_activate_status(username, activate_status)
|
||||||
|
return success_response(None, msg)
|
||||||
|
except AdminException as e:
|
||||||
|
return error_response(e.message, e.code)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/users/<username>', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def get_user_details(username):
|
||||||
|
try:
|
||||||
|
user_details = UserMgr.get_user_details(username)
|
||||||
|
return success_response(user_details)
|
||||||
|
|
||||||
|
except AdminException as e:
|
||||||
|
return error_response(e.message, e.code)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/users/<username>/datasets', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def get_user_datasets(username):
|
||||||
|
try:
|
||||||
|
datasets_list = UserServiceMgr.get_user_datasets(username)
|
||||||
|
return success_response(datasets_list)
|
||||||
|
|
||||||
|
except AdminException as e:
|
||||||
|
return error_response(e.message, e.code)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/users/<username>/agents', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def get_user_agents(username):
|
||||||
|
try:
|
||||||
|
agents_list = UserServiceMgr.get_user_agents(username)
|
||||||
|
return success_response(agents_list)
|
||||||
|
|
||||||
|
except AdminException as e:
|
||||||
|
return error_response(e.message, e.code)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/services', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def get_services():
|
||||||
|
try:
|
||||||
|
services = ServiceMgr.get_all_services()
|
||||||
|
return success_response(services, "Get all services", 0)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/service_types/<service_type>', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def get_services_by_type(service_type_str):
|
||||||
|
try:
|
||||||
|
services = ServiceMgr.get_services_by_type(service_type_str)
|
||||||
|
return success_response(services)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/services/<service_id>', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def get_service(service_id):
|
||||||
|
try:
|
||||||
|
services = ServiceMgr.get_service_details(service_id)
|
||||||
|
return success_response(services)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/services/<service_id>', methods=['DELETE'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def shutdown_service(service_id):
|
||||||
|
try:
|
||||||
|
services = ServiceMgr.shutdown_service(service_id)
|
||||||
|
return success_response(services)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/services/<service_id>', methods=['PUT'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def restart_service(service_id):
|
||||||
|
try:
|
||||||
|
services = ServiceMgr.restart_service(service_id)
|
||||||
|
return success_response(services)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/roles', methods=['POST'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def create_role():
|
||||||
|
try:
|
||||||
|
data = request.get_json()
|
||||||
|
if not data or 'role_name' not in data:
|
||||||
|
return error_response("Role name is required", 400)
|
||||||
|
role_name: str = data['role_name']
|
||||||
|
description: str = data['description']
|
||||||
|
res = RoleMgr.create_role(role_name, description)
|
||||||
|
return success_response(res)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/roles/<role_name>', methods=['PUT'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def update_role(role_name: str):
|
||||||
|
try:
|
||||||
|
data = request.get_json()
|
||||||
|
if not data or 'description' not in data:
|
||||||
|
return error_response("Role description is required", 400)
|
||||||
|
description: str = data['description']
|
||||||
|
res = RoleMgr.update_role_description(role_name, description)
|
||||||
|
return success_response(res)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/roles/<role_name>', methods=['DELETE'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def delete_role(role_name: str):
|
||||||
|
try:
|
||||||
|
res = RoleMgr.delete_role(role_name)
|
||||||
|
return success_response(res)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/roles', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def list_roles():
|
||||||
|
try:
|
||||||
|
res = RoleMgr.list_roles()
|
||||||
|
return success_response(res)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/roles/<role_name>/permission', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def get_role_permission(role_name: str):
|
||||||
|
try:
|
||||||
|
res = RoleMgr.get_role_permission(role_name)
|
||||||
|
return success_response(res)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/roles/<role_name>/permission', methods=['POST'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def grant_role_permission(role_name: str):
|
||||||
|
try:
|
||||||
|
data = request.get_json()
|
||||||
|
if not data or 'actions' not in data or 'resource' not in data:
|
||||||
|
return error_response("Permission is required", 400)
|
||||||
|
actions: list = data['actions']
|
||||||
|
resource: str = data['resource']
|
||||||
|
res = RoleMgr.grant_role_permission(role_name, actions, resource)
|
||||||
|
return success_response(res)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/roles/<role_name>/permission', methods=['DELETE'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def revoke_role_permission(role_name: str):
|
||||||
|
try:
|
||||||
|
data = request.get_json()
|
||||||
|
if not data or 'actions' not in data or 'resource' not in data:
|
||||||
|
return error_response("Permission is required", 400)
|
||||||
|
actions: list = data['actions']
|
||||||
|
resource: str = data['resource']
|
||||||
|
res = RoleMgr.revoke_role_permission(role_name, actions, resource)
|
||||||
|
return success_response(res)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/users/<user_name>/role', methods=['PUT'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def update_user_role(user_name: str):
|
||||||
|
try:
|
||||||
|
data = request.get_json()
|
||||||
|
if not data or 'role_name' not in data:
|
||||||
|
return error_response("Role name is required", 400)
|
||||||
|
role_name: str = data['role_name']
|
||||||
|
res = RoleMgr.update_user_role(user_name, role_name)
|
||||||
|
return success_response(res)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_bp.route('/users/<user_name>/permission', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
@check_admin_auth
|
||||||
|
def get_user_permission(user_name: str):
|
||||||
|
try:
|
||||||
|
res = RoleMgr.get_user_permission(user_name)
|
||||||
|
return success_response(res)
|
||||||
|
except Exception as e:
|
||||||
|
return error_response(str(e), 500)
|
||||||
@ -1,22 +1,48 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from werkzeug.security import check_password_hash
|
from werkzeug.security import check_password_hash
|
||||||
from api.db import ActiveEnum
|
from api.db import ActiveEnum
|
||||||
from api.db.services import UserService
|
from api.db.services import UserService
|
||||||
from api.db.joint_services.user_account_service import create_new_user
|
from api.db.joint_services.user_account_service import create_new_user, delete_user_data
|
||||||
from api.db.services.canvas_service import UserCanvasService
|
from api.db.services.canvas_service import UserCanvasService
|
||||||
from api.db.services.user_service import TenantService
|
from api.db.services.user_service import TenantService
|
||||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||||
from api.utils.crypt import decrypt
|
from api.utils.crypt import decrypt
|
||||||
from exceptions import AdminException, UserAlreadyExistsError, UserNotFoundError
|
from api.utils import health_utils
|
||||||
|
|
||||||
|
from api.common.exceptions import AdminException, UserAlreadyExistsError, UserNotFoundError
|
||||||
from config import SERVICE_CONFIGS
|
from config import SERVICE_CONFIGS
|
||||||
|
|
||||||
|
|
||||||
class UserMgr:
|
class UserMgr:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_all_users():
|
def get_all_users():
|
||||||
users = UserService.get_all_users()
|
users = UserService.get_all_users()
|
||||||
result = []
|
result = []
|
||||||
for user in users:
|
for user in users:
|
||||||
result.append({'email': user.email, 'nickname': user.nickname, 'create_date': user.create_date, 'is_active': user.is_active})
|
result.append({
|
||||||
|
'email': user.email,
|
||||||
|
'nickname': user.nickname,
|
||||||
|
'create_date': user.create_date,
|
||||||
|
'is_active': user.is_active,
|
||||||
|
'is_superuser': user.is_superuser,
|
||||||
|
})
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -29,7 +55,6 @@ class UserMgr:
|
|||||||
'email': user.email,
|
'email': user.email,
|
||||||
'language': user.language,
|
'language': user.language,
|
||||||
'last_login_time': user.last_login_time,
|
'last_login_time': user.last_login_time,
|
||||||
'is_authenticated': user.is_authenticated,
|
|
||||||
'is_active': user.is_active,
|
'is_active': user.is_active,
|
||||||
'is_anonymous': user.is_anonymous,
|
'is_anonymous': user.is_anonymous,
|
||||||
'login_channel': user.login_channel,
|
'login_channel': user.login_channel,
|
||||||
@ -61,7 +86,13 @@ class UserMgr:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def delete_user(username):
|
def delete_user(username):
|
||||||
# use email to delete
|
# use email to delete
|
||||||
raise AdminException("delete_user: not implemented")
|
user_list = UserService.query_user_by_email(username)
|
||||||
|
if not user_list:
|
||||||
|
raise UserNotFoundError(username)
|
||||||
|
if len(user_list) > 1:
|
||||||
|
raise AdminException(f"Exist more than 1 user: {username}!")
|
||||||
|
usr = user_list[0]
|
||||||
|
return delete_user_data(usr.id)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def update_user_password(username, new_password) -> str:
|
def update_user_password(username, new_password) -> str:
|
||||||
@ -104,6 +135,7 @@ class UserMgr:
|
|||||||
UserService.update_user(usr.id, {"is_active": target_status})
|
UserService.update_user(usr.id, {"is_active": target_status})
|
||||||
return f"Turn {_activate_status} user activate status successfully!"
|
return f"Turn {_activate_status} user activate status successfully!"
|
||||||
|
|
||||||
|
|
||||||
class UserServiceMgr:
|
class UserServiceMgr:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -134,7 +166,13 @@ class UserServiceMgr:
|
|||||||
tenants = TenantService.get_joined_tenants_by_user_id(usr.id)
|
tenants = TenantService.get_joined_tenants_by_user_id(usr.id)
|
||||||
tenant_ids = [m["tenant_id"] for m in tenants]
|
tenant_ids = [m["tenant_id"] for m in tenants]
|
||||||
# filter permitted agents and owned agents
|
# filter permitted agents and owned agents
|
||||||
return UserCanvasService.get_all_agents_by_tenant_ids(tenant_ids, usr.id)
|
res = UserCanvasService.get_all_agents_by_tenant_ids(tenant_ids, usr.id)
|
||||||
|
return [{
|
||||||
|
'title': r['title'],
|
||||||
|
'permission': r['permission'],
|
||||||
|
'canvas_category': r['canvas_category'].split('_')[0]
|
||||||
|
} for r in res]
|
||||||
|
|
||||||
|
|
||||||
class ServiceMgr:
|
class ServiceMgr:
|
||||||
|
|
||||||
@ -142,8 +180,17 @@ class ServiceMgr:
|
|||||||
def get_all_services():
|
def get_all_services():
|
||||||
result = []
|
result = []
|
||||||
configs = SERVICE_CONFIGS.configs
|
configs = SERVICE_CONFIGS.configs
|
||||||
for config in configs:
|
for service_id, config in enumerate(configs):
|
||||||
result.append(config.to_dict())
|
config_dict = config.to_dict()
|
||||||
|
try:
|
||||||
|
service_detail = ServiceMgr.get_service_details(service_id)
|
||||||
|
if "status" in service_detail:
|
||||||
|
config_dict['status'] = service_detail['status']
|
||||||
|
else:
|
||||||
|
config_dict['status'] = 'timeout'
|
||||||
|
except Exception:
|
||||||
|
config_dict['status'] = 'timeout'
|
||||||
|
result.append(config_dict)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -152,7 +199,22 @@ class ServiceMgr:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_service_details(service_id: int):
|
def get_service_details(service_id: int):
|
||||||
raise AdminException("get_service_details: not implemented")
|
service_id = int(service_id)
|
||||||
|
configs = SERVICE_CONFIGS.configs
|
||||||
|
service_config_mapping = {
|
||||||
|
c.id: {
|
||||||
|
'name': c.name,
|
||||||
|
'detail_func_name': c.detail_func_name
|
||||||
|
} for c in configs
|
||||||
|
}
|
||||||
|
service_info = service_config_mapping.get(service_id, {})
|
||||||
|
if not service_info:
|
||||||
|
raise AdminException(f"invalid service_id: {service_id}")
|
||||||
|
|
||||||
|
detail_func = getattr(health_utils, service_info.get('detail_func_name'))
|
||||||
|
res = detail_func()
|
||||||
|
res.update({'service_name': service_info.get('name')})
|
||||||
|
return res
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def shutdown_service(service_id: int):
|
def shutdown_service(service_id: int):
|
||||||
@ -153,6 +153,16 @@ class Graph:
|
|||||||
def get_tenant_id(self):
|
def get_tenant_id(self):
|
||||||
return self._tenant_id
|
return self._tenant_id
|
||||||
|
|
||||||
|
def get_variable_value(self, exp: str) -> Any:
|
||||||
|
exp = exp.strip("{").strip("}").strip(" ").strip("{").strip("}")
|
||||||
|
if exp.find("@") < 0:
|
||||||
|
return self.globals[exp]
|
||||||
|
cpn_id, var_nm = exp.split("@")
|
||||||
|
cpn = self.get_component(cpn_id)
|
||||||
|
if not cpn:
|
||||||
|
raise Exception(f"Can't find variable: '{cpn_id}@{var_nm}'")
|
||||||
|
return cpn["obj"].output(var_nm)
|
||||||
|
|
||||||
|
|
||||||
class Canvas(Graph):
|
class Canvas(Graph):
|
||||||
|
|
||||||
@ -193,7 +203,6 @@ class Canvas(Graph):
|
|||||||
self.history = []
|
self.history = []
|
||||||
self.retrieval = []
|
self.retrieval = []
|
||||||
self.memory = []
|
self.memory = []
|
||||||
|
|
||||||
for k in self.globals.keys():
|
for k in self.globals.keys():
|
||||||
if isinstance(self.globals[k], str):
|
if isinstance(self.globals[k], str):
|
||||||
self.globals[k] = ""
|
self.globals[k] = ""
|
||||||
@ -282,7 +291,6 @@ class Canvas(Graph):
|
|||||||
"thoughts": self.get_component_thoughts(self.path[i])
|
"thoughts": self.get_component_thoughts(self.path[i])
|
||||||
})
|
})
|
||||||
_run_batch(idx, to)
|
_run_batch(idx, to)
|
||||||
|
|
||||||
# post processing of components invocation
|
# post processing of components invocation
|
||||||
for i in range(idx, to):
|
for i in range(idx, to):
|
||||||
cpn = self.get_component(self.path[i])
|
cpn = self.get_component(self.path[i])
|
||||||
@ -383,7 +391,6 @@ class Canvas(Graph):
|
|||||||
self.path = path
|
self.path = path
|
||||||
yield decorate("user_inputs", {"inputs": another_inputs, "tips": tips})
|
yield decorate("user_inputs", {"inputs": another_inputs, "tips": tips})
|
||||||
return
|
return
|
||||||
|
|
||||||
self.path = self.path[:idx]
|
self.path = self.path[:idx]
|
||||||
if not self.error:
|
if not self.error:
|
||||||
yield decorate("workflow_finished",
|
yield decorate("workflow_finished",
|
||||||
@ -406,16 +413,6 @@ class Canvas(Graph):
|
|||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_variable_value(self, exp: str) -> Any:
|
|
||||||
exp = exp.strip("{").strip("}").strip(" ").strip("{").strip("}")
|
|
||||||
if exp.find("@") < 0:
|
|
||||||
return self.globals[exp]
|
|
||||||
cpn_id, var_nm = exp.split("@")
|
|
||||||
cpn = self.get_component(cpn_id)
|
|
||||||
if not cpn:
|
|
||||||
raise Exception(f"Can't find variable: '{cpn_id}@{var_nm}'")
|
|
||||||
return cpn["obj"].output(var_nm)
|
|
||||||
|
|
||||||
def get_history(self, window_size):
|
def get_history(self, window_size):
|
||||||
convs = []
|
convs = []
|
||||||
if window_size <= 0:
|
if window_size <= 0:
|
||||||
|
|||||||
@ -137,7 +137,7 @@ class Agent(LLM, ToolBase):
|
|||||||
res.update(cpn.get_input_form())
|
res.update(cpn.get_input_form())
|
||||||
return res
|
return res
|
||||||
|
|
||||||
@timeout(os.environ.get("COMPONENT_EXEC_TIMEOUT", 20*60))
|
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 20*60)))
|
||||||
def _invoke(self, **kwargs):
|
def _invoke(self, **kwargs):
|
||||||
if kwargs.get("user_prompt"):
|
if kwargs.get("user_prompt"):
|
||||||
usr_pmt = ""
|
usr_pmt = ""
|
||||||
@ -346,3 +346,11 @@ Respond immediately with your final comprehensive answer.
|
|||||||
|
|
||||||
return "Error occurred."
|
return "Error occurred."
|
||||||
|
|
||||||
|
def reset(self, temp=False):
|
||||||
|
"""
|
||||||
|
Reset all tools if they have a reset method. This avoids errors for tools like MCPToolCallSession.
|
||||||
|
"""
|
||||||
|
for k, cpn in self.tools.items():
|
||||||
|
if hasattr(cpn, "reset") and callable(cpn.reset):
|
||||||
|
cpn.reset()
|
||||||
|
|
||||||
|
|||||||
@ -431,7 +431,7 @@ class ComponentBase(ABC):
|
|||||||
self.set_output("_elapsed_time", time.perf_counter() - self.output("_created_time"))
|
self.set_output("_elapsed_time", time.perf_counter() - self.output("_created_time"))
|
||||||
return self.output()
|
return self.output()
|
||||||
|
|
||||||
@timeout(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60))
|
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||||
def _invoke(self, **kwargs):
|
def _invoke(self, **kwargs):
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|||||||
@ -19,11 +19,12 @@ import os
|
|||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from abc import ABC
|
from abc import ABC
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from agent.component.base import ComponentBase, ComponentParamBase
|
||||||
from api.utils.api_utils import timeout
|
from api.utils.api_utils import timeout
|
||||||
from deepdoc.parser import HtmlParser
|
from deepdoc.parser import HtmlParser
|
||||||
from agent.component.base import ComponentBase, ComponentParamBase
|
|
||||||
|
|
||||||
|
|
||||||
class InvokeParam(ComponentParamBase):
|
class InvokeParam(ComponentParamBase):
|
||||||
@ -43,17 +44,17 @@ class InvokeParam(ComponentParamBase):
|
|||||||
self.datatype = "json" # New parameter to determine data posting type
|
self.datatype = "json" # New parameter to determine data posting type
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
self.check_valid_value(self.method.lower(), "Type of content from the crawler", ['get', 'post', 'put'])
|
self.check_valid_value(self.method.lower(), "Type of content from the crawler", ["get", "post", "put"])
|
||||||
self.check_empty(self.url, "End point URL")
|
self.check_empty(self.url, "End point URL")
|
||||||
self.check_positive_integer(self.timeout, "Timeout time in second")
|
self.check_positive_integer(self.timeout, "Timeout time in second")
|
||||||
self.check_boolean(self.clean_html, "Clean HTML")
|
self.check_boolean(self.clean_html, "Clean HTML")
|
||||||
self.check_valid_value(self.datatype.lower(), "Data post type", ['json', 'formdata']) # Check for valid datapost value
|
self.check_valid_value(self.datatype.lower(), "Data post type", ["json", "formdata"]) # Check for valid datapost value
|
||||||
|
|
||||||
|
|
||||||
class Invoke(ComponentBase, ABC):
|
class Invoke(ComponentBase, ABC):
|
||||||
component_name = "Invoke"
|
component_name = "Invoke"
|
||||||
|
|
||||||
@timeout(os.environ.get("COMPONENT_EXEC_TIMEOUT", 3))
|
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 3)))
|
||||||
def _invoke(self, **kwargs):
|
def _invoke(self, **kwargs):
|
||||||
args = {}
|
args = {}
|
||||||
for para in self._param.variables:
|
for para in self._param.variables:
|
||||||
@ -63,6 +64,18 @@ class Invoke(ComponentBase, ABC):
|
|||||||
args[para["key"]] = self._canvas.get_variable_value(para["ref"])
|
args[para["key"]] = self._canvas.get_variable_value(para["ref"])
|
||||||
|
|
||||||
url = self._param.url.strip()
|
url = self._param.url.strip()
|
||||||
|
|
||||||
|
def replace_variable(match):
|
||||||
|
var_name = match.group(1)
|
||||||
|
try:
|
||||||
|
value = self._canvas.get_variable_value(var_name)
|
||||||
|
return str(value or "")
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# {base_url} or {component_id@variable_name}
|
||||||
|
url = re.sub(r"\{([a-zA-Z_][a-zA-Z0-9_.@-]*)\}", replace_variable, url)
|
||||||
|
|
||||||
if url.find("http") != 0:
|
if url.find("http") != 0:
|
||||||
url = "http://" + url
|
url = "http://" + url
|
||||||
|
|
||||||
@ -75,52 +88,32 @@ class Invoke(ComponentBase, ABC):
|
|||||||
proxies = {"http": self._param.proxy, "https": self._param.proxy}
|
proxies = {"http": self._param.proxy, "https": self._param.proxy}
|
||||||
|
|
||||||
last_e = ""
|
last_e = ""
|
||||||
for _ in range(self._param.max_retries+1):
|
for _ in range(self._param.max_retries + 1):
|
||||||
try:
|
try:
|
||||||
if method == 'get':
|
if method == "get":
|
||||||
response = requests.get(url=url,
|
response = requests.get(url=url, params=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
|
||||||
params=args,
|
|
||||||
headers=headers,
|
|
||||||
proxies=proxies,
|
|
||||||
timeout=self._param.timeout)
|
|
||||||
if self._param.clean_html:
|
if self._param.clean_html:
|
||||||
sections = HtmlParser()(None, response.content)
|
sections = HtmlParser()(None, response.content)
|
||||||
self.set_output("result", "\n".join(sections))
|
self.set_output("result", "\n".join(sections))
|
||||||
else:
|
else:
|
||||||
self.set_output("result", response.text)
|
self.set_output("result", response.text)
|
||||||
|
|
||||||
if method == 'put':
|
if method == "put":
|
||||||
if self._param.datatype.lower() == 'json':
|
if self._param.datatype.lower() == "json":
|
||||||
response = requests.put(url=url,
|
response = requests.put(url=url, json=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
|
||||||
json=args,
|
|
||||||
headers=headers,
|
|
||||||
proxies=proxies,
|
|
||||||
timeout=self._param.timeout)
|
|
||||||
else:
|
else:
|
||||||
response = requests.put(url=url,
|
response = requests.put(url=url, data=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
|
||||||
data=args,
|
|
||||||
headers=headers,
|
|
||||||
proxies=proxies,
|
|
||||||
timeout=self._param.timeout)
|
|
||||||
if self._param.clean_html:
|
if self._param.clean_html:
|
||||||
sections = HtmlParser()(None, response.content)
|
sections = HtmlParser()(None, response.content)
|
||||||
self.set_output("result", "\n".join(sections))
|
self.set_output("result", "\n".join(sections))
|
||||||
else:
|
else:
|
||||||
self.set_output("result", response.text)
|
self.set_output("result", response.text)
|
||||||
|
|
||||||
if method == 'post':
|
if method == "post":
|
||||||
if self._param.datatype.lower() == 'json':
|
if self._param.datatype.lower() == "json":
|
||||||
response = requests.post(url=url,
|
response = requests.post(url=url, json=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
|
||||||
json=args,
|
|
||||||
headers=headers,
|
|
||||||
proxies=proxies,
|
|
||||||
timeout=self._param.timeout)
|
|
||||||
else:
|
else:
|
||||||
response = requests.post(url=url,
|
response = requests.post(url=url, data=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
|
||||||
data=args,
|
|
||||||
headers=headers,
|
|
||||||
proxies=proxies,
|
|
||||||
timeout=self._param.timeout)
|
|
||||||
if self._param.clean_html:
|
if self._param.clean_html:
|
||||||
self.set_output("result", "\n".join(sections))
|
self.set_output("result", "\n".join(sections))
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -101,6 +101,8 @@ class LLM(ComponentBase):
|
|||||||
|
|
||||||
def get_input_elements(self) -> dict[str, Any]:
|
def get_input_elements(self) -> dict[str, Any]:
|
||||||
res = self.get_input_elements_from_text(self._param.sys_prompt)
|
res = self.get_input_elements_from_text(self._param.sys_prompt)
|
||||||
|
if isinstance(self._param.prompts, str):
|
||||||
|
self._param.prompts = [{"role": "user", "content": self._param.prompts}]
|
||||||
for prompt in self._param.prompts:
|
for prompt in self._param.prompts:
|
||||||
d = self.get_input_elements_from_text(prompt["content"])
|
d = self.get_input_elements_from_text(prompt["content"])
|
||||||
res.update(d)
|
res.update(d)
|
||||||
@ -112,6 +114,17 @@ class LLM(ComponentBase):
|
|||||||
def add2system_prompt(self, txt):
|
def add2system_prompt(self, txt):
|
||||||
self._param.sys_prompt += txt
|
self._param.sys_prompt += txt
|
||||||
|
|
||||||
|
def _sys_prompt_and_msg(self, msg, args):
|
||||||
|
if isinstance(self._param.prompts, str):
|
||||||
|
self._param.prompts = [{"role": "user", "content": self._param.prompts}]
|
||||||
|
for p in self._param.prompts:
|
||||||
|
if msg and msg[-1]["role"] == p["role"]:
|
||||||
|
continue
|
||||||
|
p = deepcopy(p)
|
||||||
|
p["content"] = self.string_format(p["content"], args)
|
||||||
|
msg.append(p)
|
||||||
|
return msg, self.string_format(self._param.sys_prompt, args)
|
||||||
|
|
||||||
def _prepare_prompt_variables(self):
|
def _prepare_prompt_variables(self):
|
||||||
if self._param.visual_files_var:
|
if self._param.visual_files_var:
|
||||||
self.imgs = self._canvas.get_variable_value(self._param.visual_files_var)
|
self.imgs = self._canvas.get_variable_value(self._param.visual_files_var)
|
||||||
@ -127,7 +140,6 @@ class LLM(ComponentBase):
|
|||||||
|
|
||||||
args = {}
|
args = {}
|
||||||
vars = self.get_input_elements() if not self._param.debug_inputs else self._param.debug_inputs
|
vars = self.get_input_elements() if not self._param.debug_inputs else self._param.debug_inputs
|
||||||
sys_prompt = self._param.sys_prompt
|
|
||||||
for k, o in vars.items():
|
for k, o in vars.items():
|
||||||
args[k] = o["value"]
|
args[k] = o["value"]
|
||||||
if not isinstance(args[k], str):
|
if not isinstance(args[k], str):
|
||||||
@ -137,16 +149,8 @@ class LLM(ComponentBase):
|
|||||||
args[k] = str(args[k])
|
args[k] = str(args[k])
|
||||||
self.set_input_value(k, args[k])
|
self.set_input_value(k, args[k])
|
||||||
|
|
||||||
msg = self._canvas.get_history(self._param.message_history_window_size)[:-1]
|
msg, sys_prompt = self._sys_prompt_and_msg(self._canvas.get_history(self._param.message_history_window_size)[:-1], args)
|
||||||
for p in self._param.prompts:
|
|
||||||
if msg and msg[-1]["role"] == p["role"]:
|
|
||||||
continue
|
|
||||||
msg.append(deepcopy(p))
|
|
||||||
|
|
||||||
sys_prompt = self.string_format(sys_prompt, args)
|
|
||||||
user_defined_prompt, sys_prompt = self._extract_prompts(sys_prompt)
|
user_defined_prompt, sys_prompt = self._extract_prompts(sys_prompt)
|
||||||
for m in msg:
|
|
||||||
m["content"] = self.string_format(m["content"], args)
|
|
||||||
if self._param.cite and self._canvas.get_reference()["chunks"]:
|
if self._param.cite and self._canvas.get_reference()["chunks"]:
|
||||||
sys_prompt += citation_prompt(user_defined_prompt)
|
sys_prompt += citation_prompt(user_defined_prompt)
|
||||||
|
|
||||||
@ -201,7 +205,7 @@ class LLM(ComponentBase):
|
|||||||
for txt in self.chat_mdl.chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf(), images=self.imgs, **kwargs):
|
for txt in self.chat_mdl.chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf(), images=self.imgs, **kwargs):
|
||||||
yield delta(txt)
|
yield delta(txt)
|
||||||
|
|
||||||
@timeout(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60))
|
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||||
def _invoke(self, **kwargs):
|
def _invoke(self, **kwargs):
|
||||||
def clean_formated_answer(ans: str) -> str:
|
def clean_formated_answer(ans: str) -> str:
|
||||||
ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||||
|
|||||||
@ -127,7 +127,7 @@ class Message(ComponentBase):
|
|||||||
]
|
]
|
||||||
return any([re.search(p, content) for p in patt])
|
return any([re.search(p, content) for p in patt])
|
||||||
|
|
||||||
@timeout(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60))
|
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||||
def _invoke(self, **kwargs):
|
def _invoke(self, **kwargs):
|
||||||
rand_cnt = random.choice(self._param.content)
|
rand_cnt = random.choice(self._param.content)
|
||||||
if self._param.stream and not self._is_jinjia2(rand_cnt):
|
if self._param.stream and not self._is_jinjia2(rand_cnt):
|
||||||
|
|||||||
@ -61,7 +61,7 @@ class SwitchParam(ComponentParamBase):
|
|||||||
class Switch(ComponentBase, ABC):
|
class Switch(ComponentBase, ABC):
|
||||||
component_name = "Switch"
|
component_name = "Switch"
|
||||||
|
|
||||||
@timeout(os.environ.get("COMPONENT_EXEC_TIMEOUT", 3))
|
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 3)))
|
||||||
def _invoke(self, **kwargs):
|
def _invoke(self, **kwargs):
|
||||||
for cond in self._param.conditions:
|
for cond in self._param.conditions:
|
||||||
res = []
|
res = []
|
||||||
|
|||||||
726
agent/templates/advanced_ingestion_pipeline.json
Normal file
726
agent/templates/advanced_ingestion_pipeline.json
Normal file
File diff suppressed because one or more lines are too long
493
agent/templates/chunk_summary.json
Normal file
493
agent/templates/chunk_summary.json
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1172
agent/templates/stock_research_report.json
Normal file
1172
agent/templates/stock_research_report.json
Normal file
File diff suppressed because one or more lines are too long
369
agent/templates/title_chunker.json
Normal file
369
agent/templates/title_chunker.json
Normal file
File diff suppressed because one or more lines are too long
@ -156,7 +156,7 @@ class CodeExec(ToolBase, ABC):
|
|||||||
self.set_output("_ERROR", "construct code request error: " + str(e))
|
self.set_output("_ERROR", "construct code request error: " + str(e))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resp = requests.post(url=f"http://{settings.SANDBOX_HOST}:9385/run", json=code_req, timeout=os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60))
|
resp = requests.post(url=f"http://{settings.SANDBOX_HOST}:9385/run", json=code_req, timeout=int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||||
logging.info(f"http://{settings.SANDBOX_HOST}:9385/run, code_req: {code_req}, resp.status_code {resp.status_code}:")
|
logging.info(f"http://{settings.SANDBOX_HOST}:9385/run, code_req: {code_req}, resp.status_code {resp.status_code}:")
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
|||||||
@ -53,12 +53,13 @@ class ExeSQLParam(ToolParamBase):
|
|||||||
self.max_records = 1024
|
self.max_records = 1024
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
self.check_valid_value(self.db_type, "Choose DB type", ['mysql', 'postgres', 'mariadb', 'mssql'])
|
self.check_valid_value(self.db_type, "Choose DB type", ['mysql', 'postgres', 'mariadb', 'mssql', 'IBM DB2', 'trino'])
|
||||||
self.check_empty(self.database, "Database name")
|
self.check_empty(self.database, "Database name")
|
||||||
self.check_empty(self.username, "database username")
|
self.check_empty(self.username, "database username")
|
||||||
self.check_empty(self.host, "IP Address")
|
self.check_empty(self.host, "IP Address")
|
||||||
self.check_positive_integer(self.port, "IP Port")
|
self.check_positive_integer(self.port, "IP Port")
|
||||||
self.check_empty(self.password, "Database password")
|
if self.db_type != "trino":
|
||||||
|
self.check_empty(self.password, "Database password")
|
||||||
self.check_positive_integer(self.max_records, "Maximum number of records")
|
self.check_positive_integer(self.max_records, "Maximum number of records")
|
||||||
if self.database == "rag_flow":
|
if self.database == "rag_flow":
|
||||||
if self.host == "ragflow-mysql":
|
if self.host == "ragflow-mysql":
|
||||||
@ -123,6 +124,94 @@ class ExeSQL(ToolBase, ABC):
|
|||||||
r'PWD=' + self._param.password
|
r'PWD=' + self._param.password
|
||||||
)
|
)
|
||||||
db = pyodbc.connect(conn_str)
|
db = pyodbc.connect(conn_str)
|
||||||
|
elif self._param.db_type == 'trino':
|
||||||
|
try:
|
||||||
|
import trino
|
||||||
|
from trino.auth import BasicAuthentication
|
||||||
|
except Exception:
|
||||||
|
raise Exception("Missing dependency 'trino'. Please install: pip install trino")
|
||||||
|
|
||||||
|
def _parse_catalog_schema(db: str):
|
||||||
|
if not db:
|
||||||
|
return None, None
|
||||||
|
if "." in db:
|
||||||
|
c, s = db.split(".", 1)
|
||||||
|
elif "/" in db:
|
||||||
|
c, s = db.split("/", 1)
|
||||||
|
else:
|
||||||
|
c, s = db, "default"
|
||||||
|
return c, s
|
||||||
|
|
||||||
|
catalog, schema = _parse_catalog_schema(self._param.database)
|
||||||
|
if not catalog:
|
||||||
|
raise Exception("For Trino, `database` must be 'catalog.schema' or at least 'catalog'.")
|
||||||
|
|
||||||
|
http_scheme = "https" if os.environ.get("TRINO_USE_TLS", "0") == "1" else "http"
|
||||||
|
auth = None
|
||||||
|
if http_scheme == "https" and self._param.password:
|
||||||
|
auth = BasicAuthentication(self._param.username, self._param.password)
|
||||||
|
|
||||||
|
try:
|
||||||
|
db = trino.dbapi.connect(
|
||||||
|
host=self._param.host,
|
||||||
|
port=int(self._param.port or 8080),
|
||||||
|
user=self._param.username or "ragflow",
|
||||||
|
catalog=catalog,
|
||||||
|
schema=schema or "default",
|
||||||
|
http_scheme=http_scheme,
|
||||||
|
auth=auth
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception("Database Connection Failed! \n" + str(e))
|
||||||
|
elif self._param.db_type == 'IBM DB2':
|
||||||
|
import ibm_db
|
||||||
|
conn_str = (
|
||||||
|
f"DATABASE={self._param.database};"
|
||||||
|
f"HOSTNAME={self._param.host};"
|
||||||
|
f"PORT={self._param.port};"
|
||||||
|
f"PROTOCOL=TCPIP;"
|
||||||
|
f"UID={self._param.username};"
|
||||||
|
f"PWD={self._param.password};"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
conn = ibm_db.connect(conn_str, "", "")
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception("Database Connection Failed! \n" + str(e))
|
||||||
|
|
||||||
|
sql_res = []
|
||||||
|
formalized_content = []
|
||||||
|
for single_sql in sqls:
|
||||||
|
single_sql = single_sql.replace("```", "").strip()
|
||||||
|
if not single_sql:
|
||||||
|
continue
|
||||||
|
single_sql = re.sub(r"\[ID:[0-9]+\]", "", single_sql)
|
||||||
|
|
||||||
|
stmt = ibm_db.exec_immediate(conn, single_sql)
|
||||||
|
rows = []
|
||||||
|
row = ibm_db.fetch_assoc(stmt)
|
||||||
|
while row and len(rows) < self._param.max_records:
|
||||||
|
rows.append(row)
|
||||||
|
row = ibm_db.fetch_assoc(stmt)
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
sql_res.append({"content": "No record in the database!"})
|
||||||
|
continue
|
||||||
|
|
||||||
|
df = pd.DataFrame(rows)
|
||||||
|
for col in df.columns:
|
||||||
|
if pd.api.types.is_datetime64_any_dtype(df[col]):
|
||||||
|
df[col] = df[col].dt.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
df = df.where(pd.notnull(df), None)
|
||||||
|
|
||||||
|
sql_res.append(convert_decimals(df.to_dict(orient="records")))
|
||||||
|
formalized_content.append(df.to_markdown(index=False, floatfmt=".6f"))
|
||||||
|
|
||||||
|
ibm_db.close(conn)
|
||||||
|
|
||||||
|
self.set_output("json", sql_res)
|
||||||
|
self.set_output("formalized_content", "\n\n".join(formalized_content))
|
||||||
|
return self.output("formalized_content")
|
||||||
try:
|
try:
|
||||||
cursor = db.cursor()
|
cursor = db.cursor()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -150,6 +239,8 @@ class ExeSQL(ToolBase, ABC):
|
|||||||
if pd.api.types.is_datetime64_any_dtype(single_res[col]):
|
if pd.api.types.is_datetime64_any_dtype(single_res[col]):
|
||||||
single_res[col] = single_res[col].dt.strftime('%Y-%m-%d')
|
single_res[col] = single_res[col].dt.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
single_res = single_res.where(pd.notnull(single_res), None)
|
||||||
|
|
||||||
sql_res.append(convert_decimals(single_res.to_dict(orient='records')))
|
sql_res.append(convert_decimals(single_res.to_dict(orient='records')))
|
||||||
formalized_content.append(single_res.to_markdown(index=False, floatfmt=".6f"))
|
formalized_content.append(single_res.to_markdown(index=False, floatfmt=".6f"))
|
||||||
|
|
||||||
|
|||||||
@ -85,13 +85,7 @@ class PubMed(ToolBase, ABC):
|
|||||||
self._retrieve_chunks(pubmedcnt.findall("PubmedArticle"),
|
self._retrieve_chunks(pubmedcnt.findall("PubmedArticle"),
|
||||||
get_title=lambda child: child.find("MedlineCitation").find("Article").find("ArticleTitle").text,
|
get_title=lambda child: child.find("MedlineCitation").find("Article").find("ArticleTitle").text,
|
||||||
get_url=lambda child: "https://pubmed.ncbi.nlm.nih.gov/" + child.find("MedlineCitation").find("PMID").text,
|
get_url=lambda child: "https://pubmed.ncbi.nlm.nih.gov/" + child.find("MedlineCitation").find("PMID").text,
|
||||||
get_content=lambda child: child.find("MedlineCitation") \
|
get_content=lambda child: self._format_pubmed_content(child),)
|
||||||
.find("Article") \
|
|
||||||
.find("Abstract") \
|
|
||||||
.find("AbstractText").text \
|
|
||||||
if child.find("MedlineCitation")\
|
|
||||||
.find("Article").find("Abstract") \
|
|
||||||
else "No abstract available")
|
|
||||||
return self.output("formalized_content")
|
return self.output("formalized_content")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
last_e = e
|
last_e = e
|
||||||
@ -104,5 +98,50 @@ class PubMed(ToolBase, ABC):
|
|||||||
|
|
||||||
assert False, self.output()
|
assert False, self.output()
|
||||||
|
|
||||||
|
def _format_pubmed_content(self, child):
|
||||||
|
"""Extract structured reference info from PubMed XML"""
|
||||||
|
def safe_find(path):
|
||||||
|
node = child
|
||||||
|
for p in path.split("/"):
|
||||||
|
if node is None:
|
||||||
|
return None
|
||||||
|
node = node.find(p)
|
||||||
|
return node.text if node is not None and node.text else None
|
||||||
|
|
||||||
|
title = safe_find("MedlineCitation/Article/ArticleTitle") or "No title"
|
||||||
|
abstract = safe_find("MedlineCitation/Article/Abstract/AbstractText") or "No abstract available"
|
||||||
|
journal = safe_find("MedlineCitation/Article/Journal/Title") or "Unknown Journal"
|
||||||
|
volume = safe_find("MedlineCitation/Article/Journal/JournalIssue/Volume") or "-"
|
||||||
|
issue = safe_find("MedlineCitation/Article/Journal/JournalIssue/Issue") or "-"
|
||||||
|
pages = safe_find("MedlineCitation/Article/Pagination/MedlinePgn") or "-"
|
||||||
|
|
||||||
|
# Authors
|
||||||
|
authors = []
|
||||||
|
for author in child.findall(".//AuthorList/Author"):
|
||||||
|
lastname = safe_find("LastName") or ""
|
||||||
|
forename = safe_find("ForeName") or ""
|
||||||
|
fullname = f"{forename} {lastname}".strip()
|
||||||
|
if fullname:
|
||||||
|
authors.append(fullname)
|
||||||
|
authors_str = ", ".join(authors) if authors else "Unknown Authors"
|
||||||
|
|
||||||
|
# DOI
|
||||||
|
doi = None
|
||||||
|
for eid in child.findall(".//ArticleId"):
|
||||||
|
if eid.attrib.get("IdType") == "doi":
|
||||||
|
doi = eid.text
|
||||||
|
break
|
||||||
|
|
||||||
|
return (
|
||||||
|
f"Title: {title}\n"
|
||||||
|
f"Authors: {authors_str}\n"
|
||||||
|
f"Journal: {journal}\n"
|
||||||
|
f"Volume: {volume}\n"
|
||||||
|
f"Issue: {issue}\n"
|
||||||
|
f"Pages: {pages}\n"
|
||||||
|
f"DOI: {doi or '-'}\n"
|
||||||
|
f"Abstract: {abstract.strip()}"
|
||||||
|
)
|
||||||
|
|
||||||
def thoughts(self) -> str:
|
def thoughts(self) -> str:
|
||||||
return "Looking for scholarly papers on `{}`,” prioritising reputable sources.".format(self.get_input().get("query", "-_-!"))
|
return "Looking for scholarly papers on `{}`,” prioritising reputable sources.".format(self.get_input().get("query", "-_-!"))
|
||||||
|
|||||||
@ -18,12 +18,14 @@ import re
|
|||||||
from abc import ABC
|
from abc import ABC
|
||||||
from agent.tools.base import ToolParamBase, ToolBase, ToolMeta
|
from agent.tools.base import ToolParamBase, ToolBase, ToolMeta
|
||||||
from api.db import LLMType
|
from api.db import LLMType
|
||||||
|
from api.db.services.document_service import DocumentService
|
||||||
|
from api.db.services.dialog_service import meta_filter
|
||||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||||
from api.db.services.llm_service import LLMBundle
|
from api.db.services.llm_service import LLMBundle
|
||||||
from api import settings
|
from api import settings
|
||||||
from api.utils.api_utils import timeout
|
from api.utils.api_utils import timeout
|
||||||
from rag.app.tag import label_question
|
from rag.app.tag import label_question
|
||||||
from rag.prompts.generator import cross_languages, kb_prompt
|
from rag.prompts.generator import cross_languages, kb_prompt, gen_meta_filter
|
||||||
|
|
||||||
|
|
||||||
class RetrievalParam(ToolParamBase):
|
class RetrievalParam(ToolParamBase):
|
||||||
@ -57,6 +59,8 @@ class RetrievalParam(ToolParamBase):
|
|||||||
self.empty_response = ""
|
self.empty_response = ""
|
||||||
self.use_kg = False
|
self.use_kg = False
|
||||||
self.cross_languages = []
|
self.cross_languages = []
|
||||||
|
self.toc_enhance = False
|
||||||
|
self.meta_data_filter={}
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
self.check_decimal_float(self.similarity_threshold, "[Retrieval] Similarity threshold")
|
self.check_decimal_float(self.similarity_threshold, "[Retrieval] Similarity threshold")
|
||||||
@ -116,12 +120,27 @@ class Retrieval(ToolBase, ABC):
|
|||||||
vars = self.get_input_elements_from_text(kwargs["query"])
|
vars = self.get_input_elements_from_text(kwargs["query"])
|
||||||
vars = {k:o["value"] for k,o in vars.items()}
|
vars = {k:o["value"] for k,o in vars.items()}
|
||||||
query = self.string_format(kwargs["query"], vars)
|
query = self.string_format(kwargs["query"], vars)
|
||||||
|
|
||||||
|
doc_ids=[]
|
||||||
|
if self._param.meta_data_filter!={}:
|
||||||
|
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||||
|
if self._param.meta_data_filter.get("method") == "auto":
|
||||||
|
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT)
|
||||||
|
filters = gen_meta_filter(chat_mdl, metas, query)
|
||||||
|
doc_ids.extend(meta_filter(metas, filters))
|
||||||
|
if not doc_ids:
|
||||||
|
doc_ids = None
|
||||||
|
elif self._param.meta_data_filter.get("method") == "manual":
|
||||||
|
doc_ids.extend(meta_filter(metas, self._param.meta_data_filter["manual"]))
|
||||||
|
if not doc_ids:
|
||||||
|
doc_ids = None
|
||||||
|
|
||||||
if self._param.cross_languages:
|
if self._param.cross_languages:
|
||||||
query = cross_languages(kbs[0].tenant_id, None, query, self._param.cross_languages)
|
query = cross_languages(kbs[0].tenant_id, None, query, self._param.cross_languages)
|
||||||
|
|
||||||
if kbs:
|
if kbs:
|
||||||
query = re.sub(r"^user[::\s]*", "", query, flags=re.IGNORECASE)
|
query = re.sub(r"^user[::\s]*", "", query, flags=re.IGNORECASE)
|
||||||
kbinfos = settings.retrievaler.retrieval(
|
kbinfos = settings.retriever.retrieval(
|
||||||
query,
|
query,
|
||||||
embd_mdl,
|
embd_mdl,
|
||||||
[kb.tenant_id for kb in kbs],
|
[kb.tenant_id for kb in kbs],
|
||||||
@ -130,12 +149,18 @@ class Retrieval(ToolBase, ABC):
|
|||||||
self._param.top_n,
|
self._param.top_n,
|
||||||
self._param.similarity_threshold,
|
self._param.similarity_threshold,
|
||||||
1 - self._param.keywords_similarity_weight,
|
1 - self._param.keywords_similarity_weight,
|
||||||
|
doc_ids=doc_ids,
|
||||||
aggs=False,
|
aggs=False,
|
||||||
rerank_mdl=rerank_mdl,
|
rerank_mdl=rerank_mdl,
|
||||||
rank_feature=label_question(query, kbs),
|
rank_feature=label_question(query, kbs),
|
||||||
)
|
)
|
||||||
|
if self._param.toc_enhance:
|
||||||
|
chat_mdl = LLMBundle(self._canvas._tenant_id, LLMType.CHAT)
|
||||||
|
cks = settings.retriever.retrieval_by_toc(query, kbinfos["chunks"], [kb.tenant_id for kb in kbs], chat_mdl, self._param.top_n)
|
||||||
|
if cks:
|
||||||
|
kbinfos["chunks"] = cks
|
||||||
if self._param.use_kg:
|
if self._param.use_kg:
|
||||||
ck = settings.kg_retrievaler.retrieval(query,
|
ck = settings.kg_retriever.retrieval(query,
|
||||||
[kb.tenant_id for kb in kbs],
|
[kb.tenant_id for kb in kbs],
|
||||||
kb_ids,
|
kb_ids,
|
||||||
embd_mdl,
|
embd_mdl,
|
||||||
@ -146,7 +171,7 @@ class Retrieval(ToolBase, ABC):
|
|||||||
kbinfos = {"chunks": [], "doc_aggs": []}
|
kbinfos = {"chunks": [], "doc_aggs": []}
|
||||||
|
|
||||||
if self._param.use_kg and kbs:
|
if self._param.use_kg and kbs:
|
||||||
ck = settings.kg_retrievaler.retrieval(query, [kb.tenant_id for kb in kbs], filtered_kb_ids, embd_mdl, LLMBundle(kbs[0].tenant_id, LLMType.CHAT))
|
ck = settings.kg_retriever.retrieval(query, [kb.tenant_id for kb in kbs], filtered_kb_ids, embd_mdl, LLMBundle(kbs[0].tenant_id, LLMType.CHAT))
|
||||||
if ck["content_with_weight"]:
|
if ck["content_with_weight"]:
|
||||||
ck["content"] = ck["content_with_weight"]
|
ck["content"] = ck["content_with_weight"]
|
||||||
del ck["content_with_weight"]
|
del ck["content_with_weight"]
|
||||||
|
|||||||
@ -85,7 +85,7 @@ class SearXNG(ToolBase, ABC):
|
|||||||
self.set_output("formalized_content", "")
|
self.set_output("formalized_content", "")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
searxng_url = (kwargs.get("searxng_url") or getattr(self._param, "searxng_url", "") or "").strip()
|
searxng_url = (getattr(self._param, "searxng_url", "") or kwargs.get("searxng_url") or "").strip()
|
||||||
# In try-run, if no URL configured, just return empty instead of raising
|
# In try-run, if no URL configured, just return empty instead of raising
|
||||||
if not searxng_url:
|
if not searxng_url:
|
||||||
self.set_output("formalized_content", "")
|
self.set_output("formalized_content", "")
|
||||||
|
|||||||
@ -536,7 +536,7 @@ def list_chunks():
|
|||||||
)
|
)
|
||||||
kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
|
kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
|
||||||
|
|
||||||
res = settings.retrievaler.chunk_list(doc_id, tenant_id, kb_ids)
|
res = settings.retriever.chunk_list(doc_id, tenant_id, kb_ids)
|
||||||
res = [
|
res = [
|
||||||
{
|
{
|
||||||
"content": res_item["content_with_weight"],
|
"content": res_item["content_with_weight"],
|
||||||
@ -884,7 +884,7 @@ def retrieval():
|
|||||||
if req.get("keyword", False):
|
if req.get("keyword", False):
|
||||||
chat_mdl = LLMBundle(kbs[0].tenant_id, LLMType.CHAT)
|
chat_mdl = LLMBundle(kbs[0].tenant_id, LLMType.CHAT)
|
||||||
question += keyword_extraction(chat_mdl, question)
|
question += keyword_extraction(chat_mdl, question)
|
||||||
ranks = settings.retrievaler.retrieval(question, embd_mdl, kbs[0].tenant_id, kb_ids, page, size,
|
ranks = settings.retriever.retrieval(question, embd_mdl, kbs[0].tenant_id, kb_ids, page, size,
|
||||||
similarity_threshold, vector_similarity_weight, top,
|
similarity_threshold, vector_similarity_weight, top,
|
||||||
doc_ids, rerank_mdl=rerank_mdl, highlight= highlight,
|
doc_ids, rerank_mdl=rerank_mdl, highlight= highlight,
|
||||||
rank_feature=label_question(question, kbs))
|
rank_feature=label_question(question, kbs))
|
||||||
|
|||||||
@ -19,15 +19,19 @@ import re
|
|||||||
import sys
|
import sys
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
|
import flask
|
||||||
import trio
|
import trio
|
||||||
from flask import request, Response
|
from flask import request, Response
|
||||||
from flask_login import login_required, current_user
|
from flask_login import login_required, current_user
|
||||||
|
|
||||||
from agent.component.llm import LLM
|
from agent.component import LLM
|
||||||
|
from api import settings
|
||||||
from api.db import CanvasCategory, FileType
|
from api.db import CanvasCategory, FileType
|
||||||
from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService, API4ConversationService
|
from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService, API4ConversationService
|
||||||
from api.db.services.document_service import DocumentService
|
from api.db.services.document_service import DocumentService
|
||||||
from api.db.services.file_service import FileService
|
from api.db.services.file_service import FileService
|
||||||
|
from api.db.services.pipeline_operation_log_service import PipelineOperationLogService
|
||||||
|
from api.db.services.task_service import queue_dataflow, CANVAS_DEBUG_DOC_ID, TaskService
|
||||||
from api.db.services.user_service import TenantService
|
from api.db.services.user_service import TenantService
|
||||||
from api.db.services.user_canvas_version import UserCanvasVersionService
|
from api.db.services.user_canvas_version import UserCanvasVersionService
|
||||||
from api.settings import RetCode
|
from api.settings import RetCode
|
||||||
@ -35,25 +39,19 @@ from api.utils import get_uuid
|
|||||||
from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result
|
from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result
|
||||||
from agent.canvas import Canvas
|
from agent.canvas import Canvas
|
||||||
from peewee import MySQLDatabase, PostgresqlDatabase
|
from peewee import MySQLDatabase, PostgresqlDatabase
|
||||||
from api.db.db_models import APIToken
|
from api.db.db_models import APIToken, Task
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from api.utils.file_utils import filename_type, read_potential_broken_pdf
|
from api.utils.file_utils import filename_type, read_potential_broken_pdf
|
||||||
|
from rag.flow.pipeline import Pipeline
|
||||||
|
from rag.nlp import search
|
||||||
from rag.utils.redis_conn import REDIS_CONN
|
from rag.utils.redis_conn import REDIS_CONN
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/templates', methods=['GET']) # noqa: F821
|
@manager.route('/templates', methods=['GET']) # noqa: F821
|
||||||
@login_required
|
@login_required
|
||||||
def templates():
|
def templates():
|
||||||
return get_json_result(data=[c.to_dict() for c in CanvasTemplateService.query(canvas_category=CanvasCategory.Agent)])
|
return get_json_result(data=[c.to_dict() for c in CanvasTemplateService.get_all()])
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/list', methods=['GET']) # noqa: F821
|
|
||||||
@login_required
|
|
||||||
def canvas_list():
|
|
||||||
return get_json_result(data=sorted([c.to_dict() for c in \
|
|
||||||
UserCanvasService.query(user_id=current_user.id, canvas_category=CanvasCategory.Agent)], key=lambda x: x["update_time"]*-1)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/rm', methods=['POST']) # noqa: F821
|
@manager.route('/rm', methods=['POST']) # noqa: F821
|
||||||
@ -77,9 +75,10 @@ def save():
|
|||||||
if not isinstance(req["dsl"], str):
|
if not isinstance(req["dsl"], str):
|
||||||
req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
|
req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
|
||||||
req["dsl"] = json.loads(req["dsl"])
|
req["dsl"] = json.loads(req["dsl"])
|
||||||
|
cate = req.get("canvas_category", CanvasCategory.Agent)
|
||||||
if "id" not in req:
|
if "id" not in req:
|
||||||
req["user_id"] = current_user.id
|
req["user_id"] = current_user.id
|
||||||
if UserCanvasService.query(user_id=current_user.id, title=req["title"].strip(), canvas_category=CanvasCategory.Agent):
|
if UserCanvasService.query(user_id=current_user.id, title=req["title"].strip(), canvas_category=cate):
|
||||||
return get_data_error_result(message=f"{req['title'].strip()} already exists.")
|
return get_data_error_result(message=f"{req['title'].strip()} already exists.")
|
||||||
req["id"] = get_uuid()
|
req["id"] = get_uuid()
|
||||||
if not UserCanvasService.save(**req):
|
if not UserCanvasService.save(**req):
|
||||||
@ -101,7 +100,7 @@ def save():
|
|||||||
def get(canvas_id):
|
def get(canvas_id):
|
||||||
if not UserCanvasService.accessible(canvas_id, current_user.id):
|
if not UserCanvasService.accessible(canvas_id, current_user.id):
|
||||||
return get_data_error_result(message="canvas not found.")
|
return get_data_error_result(message="canvas not found.")
|
||||||
e, c = UserCanvasService.get_by_tenant_id(canvas_id)
|
e, c = UserCanvasService.get_by_canvas_id(canvas_id)
|
||||||
return get_json_result(data=c)
|
return get_json_result(data=c)
|
||||||
|
|
||||||
|
|
||||||
@ -148,6 +147,14 @@ def run():
|
|||||||
if not isinstance(cvs.dsl, str):
|
if not isinstance(cvs.dsl, str):
|
||||||
cvs.dsl = json.dumps(cvs.dsl, ensure_ascii=False)
|
cvs.dsl = json.dumps(cvs.dsl, ensure_ascii=False)
|
||||||
|
|
||||||
|
if cvs.canvas_category == CanvasCategory.DataFlow:
|
||||||
|
task_id = get_uuid()
|
||||||
|
Pipeline(cvs.dsl, tenant_id=current_user.id, doc_id=CANVAS_DEBUG_DOC_ID, task_id=task_id, flow_id=req["id"])
|
||||||
|
ok, error_message = queue_dataflow(tenant_id=user_id, flow_id=req["id"], task_id=task_id, file=files[0], priority=0)
|
||||||
|
if not ok:
|
||||||
|
return get_data_error_result(message=error_message)
|
||||||
|
return get_json_result(data={"message_id": task_id})
|
||||||
|
|
||||||
try:
|
try:
|
||||||
canvas = Canvas(cvs.dsl, current_user.id, req["id"])
|
canvas = Canvas(cvs.dsl, current_user.id, req["id"])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -173,6 +180,44 @@ def run():
|
|||||||
return resp
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route('/rerun', methods=['POST']) # noqa: F821
|
||||||
|
@validate_request("id", "dsl", "component_id")
|
||||||
|
@login_required
|
||||||
|
def rerun():
|
||||||
|
req = request.json
|
||||||
|
doc = PipelineOperationLogService.get_documents_info(req["id"])
|
||||||
|
if not doc:
|
||||||
|
return get_data_error_result(message="Document not found.")
|
||||||
|
doc = doc[0]
|
||||||
|
if 0 < doc["progress"] < 1:
|
||||||
|
return get_data_error_result(message=f"`{doc['name']}` is processing...")
|
||||||
|
|
||||||
|
if settings.docStoreConn.indexExist(search.index_name(current_user.id), doc["kb_id"]):
|
||||||
|
settings.docStoreConn.delete({"doc_id": doc["id"]}, search.index_name(current_user.id), doc["kb_id"])
|
||||||
|
doc["progress_msg"] = ""
|
||||||
|
doc["chunk_num"] = 0
|
||||||
|
doc["token_num"] = 0
|
||||||
|
DocumentService.clear_chunk_num_when_rerun(doc["id"])
|
||||||
|
DocumentService.update_by_id(id, doc)
|
||||||
|
TaskService.filter_delete([Task.doc_id == id])
|
||||||
|
|
||||||
|
dsl = req["dsl"]
|
||||||
|
dsl["path"] = [req["component_id"]]
|
||||||
|
PipelineOperationLogService.update_by_id(req["id"], {"dsl": dsl})
|
||||||
|
queue_dataflow(tenant_id=current_user.id, flow_id=req["id"], task_id=get_uuid(), doc_id=doc["id"], priority=0, rerun=True)
|
||||||
|
return get_json_result(data=True)
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route('/cancel/<task_id>', methods=['PUT']) # noqa: F821
|
||||||
|
@login_required
|
||||||
|
def cancel(task_id):
|
||||||
|
try:
|
||||||
|
REDIS_CONN.set(f"{task_id}-cancel", "x")
|
||||||
|
except Exception as e:
|
||||||
|
logging.exception(e)
|
||||||
|
return get_json_result(data=True)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/reset', methods=['POST']) # noqa: F821
|
@manager.route('/reset', methods=['POST']) # noqa: F821
|
||||||
@validate_request("id")
|
@validate_request("id")
|
||||||
@login_required
|
@login_required
|
||||||
@ -198,7 +243,7 @@ def reset():
|
|||||||
|
|
||||||
@manager.route("/upload/<canvas_id>", methods=["POST"]) # noqa: F821
|
@manager.route("/upload/<canvas_id>", methods=["POST"]) # noqa: F821
|
||||||
def upload(canvas_id):
|
def upload(canvas_id):
|
||||||
e, cvs = UserCanvasService.get_by_tenant_id(canvas_id)
|
e, cvs = UserCanvasService.get_by_canvas_id(canvas_id)
|
||||||
if not e:
|
if not e:
|
||||||
return get_data_error_result(message="canvas not found.")
|
return get_data_error_result(message="canvas not found.")
|
||||||
|
|
||||||
@ -348,6 +393,65 @@ def test_db_connect():
|
|||||||
cursor = db.cursor()
|
cursor = db.cursor()
|
||||||
cursor.execute("SELECT 1")
|
cursor.execute("SELECT 1")
|
||||||
cursor.close()
|
cursor.close()
|
||||||
|
elif req["db_type"] == 'IBM DB2':
|
||||||
|
import ibm_db
|
||||||
|
conn_str = (
|
||||||
|
f"DATABASE={req['database']};"
|
||||||
|
f"HOSTNAME={req['host']};"
|
||||||
|
f"PORT={req['port']};"
|
||||||
|
f"PROTOCOL=TCPIP;"
|
||||||
|
f"UID={req['username']};"
|
||||||
|
f"PWD={req['password']};"
|
||||||
|
)
|
||||||
|
logging.info(conn_str)
|
||||||
|
conn = ibm_db.connect(conn_str, "", "")
|
||||||
|
stmt = ibm_db.exec_immediate(conn, "SELECT 1 FROM sysibm.sysdummy1")
|
||||||
|
ibm_db.fetch_assoc(stmt)
|
||||||
|
ibm_db.close(conn)
|
||||||
|
return get_json_result(data="Database Connection Successful!")
|
||||||
|
elif req["db_type"] == 'trino':
|
||||||
|
def _parse_catalog_schema(db: str):
|
||||||
|
if not db:
|
||||||
|
return None, None
|
||||||
|
if "." in db:
|
||||||
|
c, s = db.split(".", 1)
|
||||||
|
elif "/" in db:
|
||||||
|
c, s = db.split("/", 1)
|
||||||
|
else:
|
||||||
|
c, s = db, "default"
|
||||||
|
return c, s
|
||||||
|
try:
|
||||||
|
import trino
|
||||||
|
import os
|
||||||
|
from trino.auth import BasicAuthentication
|
||||||
|
except Exception:
|
||||||
|
return server_error_response("Missing dependency 'trino'. Please install: pip install trino")
|
||||||
|
|
||||||
|
catalog, schema = _parse_catalog_schema(req["database"])
|
||||||
|
if not catalog:
|
||||||
|
return server_error_response("For Trino, 'database' must be 'catalog.schema' or at least 'catalog'.")
|
||||||
|
|
||||||
|
http_scheme = "https" if os.environ.get("TRINO_USE_TLS", "0") == "1" else "http"
|
||||||
|
|
||||||
|
auth = None
|
||||||
|
if http_scheme == "https" and req.get("password"):
|
||||||
|
auth = BasicAuthentication(req.get("username") or "ragflow", req["password"])
|
||||||
|
|
||||||
|
conn = trino.dbapi.connect(
|
||||||
|
host=req["host"],
|
||||||
|
port=int(req["port"] or 8080),
|
||||||
|
user=req["username"] or "ragflow",
|
||||||
|
catalog=catalog,
|
||||||
|
schema=schema or "default",
|
||||||
|
http_scheme=http_scheme,
|
||||||
|
auth=auth
|
||||||
|
)
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("SELECT 1")
|
||||||
|
cur.fetchall()
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
return get_json_result(data="Database Connection Successful!")
|
||||||
else:
|
else:
|
||||||
return server_error_response("Unsupported database type.")
|
return server_error_response("Unsupported database type.")
|
||||||
if req["db_type"] != 'mssql':
|
if req["db_type"] != 'mssql':
|
||||||
@ -383,22 +487,32 @@ def getversion( version_id):
|
|||||||
return get_json_result(data=f"Error getting history file: {e}")
|
return get_json_result(data=f"Error getting history file: {e}")
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/listteam', methods=['GET']) # noqa: F821
|
@manager.route('/list', methods=['GET']) # noqa: F821
|
||||||
@login_required
|
@login_required
|
||||||
def list_canvas():
|
def list_canvas():
|
||||||
keywords = request.args.get("keywords", "")
|
keywords = request.args.get("keywords", "")
|
||||||
page_number = int(request.args.get("page", 1))
|
page_number = int(request.args.get("page", 0))
|
||||||
items_per_page = int(request.args.get("page_size", 150))
|
items_per_page = int(request.args.get("page_size", 0))
|
||||||
orderby = request.args.get("orderby", "create_time")
|
orderby = request.args.get("orderby", "create_time")
|
||||||
desc = request.args.get("desc", True)
|
canvas_category = request.args.get("canvas_category")
|
||||||
try:
|
if request.args.get("desc", "true").lower() == "false":
|
||||||
|
desc = False
|
||||||
|
else:
|
||||||
|
desc = True
|
||||||
|
owner_ids = [id for id in request.args.get("owner_ids", "").strip().split(",") if id]
|
||||||
|
if not owner_ids:
|
||||||
tenants = TenantService.get_joined_tenants_by_user_id(current_user.id)
|
tenants = TenantService.get_joined_tenants_by_user_id(current_user.id)
|
||||||
|
tenants = [m["tenant_id"] for m in tenants]
|
||||||
|
tenants.append(current_user.id)
|
||||||
canvas, total = UserCanvasService.get_by_tenant_ids(
|
canvas, total = UserCanvasService.get_by_tenant_ids(
|
||||||
[m["tenant_id"] for m in tenants], current_user.id, page_number,
|
tenants, current_user.id, page_number,
|
||||||
items_per_page, orderby, desc, keywords, canvas_category=CanvasCategory.Agent)
|
items_per_page, orderby, desc, keywords, canvas_category)
|
||||||
return get_json_result(data={"canvas": canvas, "total": total})
|
else:
|
||||||
except Exception as e:
|
tenants = owner_ids
|
||||||
return server_error_response(e)
|
canvas, total = UserCanvasService.get_by_tenant_ids(
|
||||||
|
tenants, current_user.id, 0,
|
||||||
|
0, orderby, desc, keywords, canvas_category)
|
||||||
|
return get_json_result(data={"canvas": canvas, "total": total})
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/setting', methods=['POST']) # noqa: F821
|
@manager.route('/setting', methods=['POST']) # noqa: F821
|
||||||
@ -483,3 +597,11 @@ def prompts():
|
|||||||
#"context_ranking": RANK_MEMORY,
|
#"context_ranking": RANK_MEMORY,
|
||||||
"citation_guidelines": CITATION_PROMPT_TEMPLATE
|
"citation_guidelines": CITATION_PROMPT_TEMPLATE
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route('/download', methods=['GET']) # noqa: F821
|
||||||
|
def download():
|
||||||
|
id = request.args.get("id")
|
||||||
|
created_by = request.args.get("created_by")
|
||||||
|
blob = FileService.get_blob(created_by, id)
|
||||||
|
return flask.make_response(blob)
|
||||||
@ -60,7 +60,7 @@ def list_chunk():
|
|||||||
}
|
}
|
||||||
if "available_int" in req:
|
if "available_int" in req:
|
||||||
query["available_int"] = int(req["available_int"])
|
query["available_int"] = int(req["available_int"])
|
||||||
sres = settings.retrievaler.search(query, search.index_name(tenant_id), kb_ids, highlight=True)
|
sres = settings.retriever.search(query, search.index_name(tenant_id), kb_ids, highlight=["content_ltks"])
|
||||||
res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
|
res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
|
||||||
for id in sres.ids:
|
for id in sres.ids:
|
||||||
d = {
|
d = {
|
||||||
@ -346,15 +346,16 @@ def retrieval_test():
|
|||||||
question += keyword_extraction(chat_mdl, question)
|
question += keyword_extraction(chat_mdl, question)
|
||||||
|
|
||||||
labels = label_question(question, [kb])
|
labels = label_question(question, [kb])
|
||||||
ranks = settings.retrievaler.retrieval(question, embd_mdl, tenant_ids, kb_ids, page, size,
|
ranks = settings.retriever.retrieval(question, embd_mdl, tenant_ids, kb_ids, page, size,
|
||||||
float(req.get("similarity_threshold", 0.0)),
|
float(req.get("similarity_threshold", 0.0)),
|
||||||
float(req.get("vector_similarity_weight", 0.3)),
|
float(req.get("vector_similarity_weight", 0.3)),
|
||||||
top,
|
top,
|
||||||
doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight"),
|
doc_ids, rerank_mdl=rerank_mdl,
|
||||||
|
highlight=req.get("highlight", False),
|
||||||
rank_feature=labels
|
rank_feature=labels
|
||||||
)
|
)
|
||||||
if use_kg:
|
if use_kg:
|
||||||
ck = settings.kg_retrievaler.retrieval(question,
|
ck = settings.kg_retriever.retrieval(question,
|
||||||
tenant_ids,
|
tenant_ids,
|
||||||
kb_ids,
|
kb_ids,
|
||||||
embd_mdl,
|
embd_mdl,
|
||||||
@ -384,7 +385,7 @@ def knowledge_graph():
|
|||||||
"doc_ids": [doc_id],
|
"doc_ids": [doc_id],
|
||||||
"knowledge_graph_kwd": ["graph", "mind_map"]
|
"knowledge_graph_kwd": ["graph", "mind_map"]
|
||||||
}
|
}
|
||||||
sres = settings.retrievaler.search(req, search.index_name(tenant_id), kb_ids)
|
sres = settings.retriever.search(req, search.index_name(tenant_id), kb_ids)
|
||||||
obj = {"graph": {}, "mind_map": {}}
|
obj = {"graph": {}, "mind_map": {}}
|
||||||
for id in sres.ids[:2]:
|
for id in sres.ids[:2]:
|
||||||
ty = sres.field[id]["knowledge_graph_kwd"]
|
ty = sres.field[id]["knowledge_graph_kwd"]
|
||||||
|
|||||||
@ -1,353 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
from functools import partial
|
|
||||||
|
|
||||||
import trio
|
|
||||||
from flask import request
|
|
||||||
from flask_login import current_user, login_required
|
|
||||||
|
|
||||||
from agent.canvas import Canvas
|
|
||||||
from agent.component.llm import LLM
|
|
||||||
from api.db import CanvasCategory, FileType
|
|
||||||
from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
|
|
||||||
from api.db.services.document_service import DocumentService
|
|
||||||
from api.db.services.file_service import FileService
|
|
||||||
from api.db.services.task_service import queue_dataflow
|
|
||||||
from api.db.services.user_canvas_version import UserCanvasVersionService
|
|
||||||
from api.db.services.user_service import TenantService
|
|
||||||
from api.settings import RetCode
|
|
||||||
from api.utils import get_uuid
|
|
||||||
from api.utils.api_utils import get_data_error_result, get_json_result, server_error_response, validate_request
|
|
||||||
from api.utils.file_utils import filename_type, read_potential_broken_pdf
|
|
||||||
from rag.flow.pipeline import Pipeline
|
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/templates", methods=["GET"]) # noqa: F821
|
|
||||||
@login_required
|
|
||||||
def templates():
|
|
||||||
return get_json_result(data=[c.to_dict() for c in CanvasTemplateService.query(canvas_category=CanvasCategory.DataFlow)])
|
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/list", methods=["GET"]) # noqa: F821
|
|
||||||
@login_required
|
|
||||||
def canvas_list():
|
|
||||||
return get_json_result(data=sorted([c.to_dict() for c in UserCanvasService.query(user_id=current_user.id, canvas_category=CanvasCategory.DataFlow)], key=lambda x: x["update_time"] * -1))
|
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/rm", methods=["POST"]) # noqa: F821
|
|
||||||
@validate_request("canvas_ids")
|
|
||||||
@login_required
|
|
||||||
def rm():
|
|
||||||
for i in request.json["canvas_ids"]:
|
|
||||||
if not UserCanvasService.accessible(i, current_user.id):
|
|
||||||
return get_json_result(data=False, message="Only owner of canvas authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
|
||||||
UserCanvasService.delete_by_id(i)
|
|
||||||
return get_json_result(data=True)
|
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/set", methods=["POST"]) # noqa: F821
|
|
||||||
@validate_request("dsl", "title")
|
|
||||||
@login_required
|
|
||||||
def save():
|
|
||||||
req = request.json
|
|
||||||
if not isinstance(req["dsl"], str):
|
|
||||||
req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
|
|
||||||
req["dsl"] = json.loads(req["dsl"])
|
|
||||||
req["canvas_category"] = CanvasCategory.DataFlow
|
|
||||||
if "id" not in req:
|
|
||||||
req["user_id"] = current_user.id
|
|
||||||
if UserCanvasService.query(user_id=current_user.id, title=req["title"].strip(), canvas_category=CanvasCategory.DataFlow):
|
|
||||||
return get_data_error_result(message=f"{req['title'].strip()} already exists.")
|
|
||||||
req["id"] = get_uuid()
|
|
||||||
|
|
||||||
if not UserCanvasService.save(**req):
|
|
||||||
return get_data_error_result(message="Fail to save canvas.")
|
|
||||||
else:
|
|
||||||
if not UserCanvasService.accessible(req["id"], current_user.id):
|
|
||||||
return get_json_result(data=False, message="Only owner of canvas authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
|
||||||
UserCanvasService.update_by_id(req["id"], req)
|
|
||||||
# save version
|
|
||||||
UserCanvasVersionService.insert(user_canvas_id=req["id"], dsl=req["dsl"], title="{0}_{1}".format(req["title"], time.strftime("%Y_%m_%d_%H_%M_%S")))
|
|
||||||
UserCanvasVersionService.delete_all_versions(req["id"])
|
|
||||||
return get_json_result(data=req)
|
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/get/<canvas_id>", methods=["GET"]) # noqa: F821
|
|
||||||
@login_required
|
|
||||||
def get(canvas_id):
|
|
||||||
if not UserCanvasService.accessible(canvas_id, current_user.id):
|
|
||||||
return get_data_error_result(message="canvas not found.")
|
|
||||||
e, c = UserCanvasService.get_by_tenant_id(canvas_id)
|
|
||||||
return get_json_result(data=c)
|
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/run", methods=["POST"]) # noqa: F821
|
|
||||||
@validate_request("id")
|
|
||||||
@login_required
|
|
||||||
def run():
|
|
||||||
req = request.json
|
|
||||||
flow_id = req.get("id", "")
|
|
||||||
doc_id = req.get("doc_id", "")
|
|
||||||
if not all([flow_id, doc_id]):
|
|
||||||
return get_data_error_result(message="id and doc_id are required.")
|
|
||||||
|
|
||||||
if not DocumentService.get_by_id(doc_id):
|
|
||||||
return get_data_error_result(message=f"Document for {doc_id} not found.")
|
|
||||||
|
|
||||||
user_id = req.get("user_id", current_user.id)
|
|
||||||
if not UserCanvasService.accessible(flow_id, current_user.id):
|
|
||||||
return get_json_result(data=False, message="Only owner of canvas authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
|
||||||
|
|
||||||
e, cvs = UserCanvasService.get_by_id(flow_id)
|
|
||||||
if not e:
|
|
||||||
return get_data_error_result(message="canvas not found.")
|
|
||||||
|
|
||||||
if not isinstance(cvs.dsl, str):
|
|
||||||
cvs.dsl = json.dumps(cvs.dsl, ensure_ascii=False)
|
|
||||||
|
|
||||||
task_id = get_uuid()
|
|
||||||
|
|
||||||
ok, error_message = queue_dataflow(dsl=cvs.dsl, tenant_id=user_id, doc_id=doc_id, task_id=task_id, flow_id=flow_id, priority=0)
|
|
||||||
if not ok:
|
|
||||||
return server_error_response(error_message)
|
|
||||||
|
|
||||||
return get_json_result(data={"task_id": task_id, "flow_id": flow_id})
|
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/reset", methods=["POST"]) # noqa: F821
|
|
||||||
@validate_request("id")
|
|
||||||
@login_required
|
|
||||||
def reset():
|
|
||||||
req = request.json
|
|
||||||
flow_id = req.get("id", "")
|
|
||||||
if not flow_id:
|
|
||||||
return get_data_error_result(message="id is required.")
|
|
||||||
|
|
||||||
if not UserCanvasService.accessible(flow_id, current_user.id):
|
|
||||||
return get_json_result(data=False, message="Only owner of canvas authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
|
||||||
|
|
||||||
task_id = req.get("task_id", "")
|
|
||||||
|
|
||||||
try:
|
|
||||||
e, user_canvas = UserCanvasService.get_by_id(req["id"])
|
|
||||||
if not e:
|
|
||||||
return get_data_error_result(message="canvas not found.")
|
|
||||||
|
|
||||||
dataflow = Pipeline(dsl=json.dumps(user_canvas.dsl), tenant_id=current_user.id, flow_id=flow_id, task_id=task_id)
|
|
||||||
dataflow.reset()
|
|
||||||
req["dsl"] = json.loads(str(dataflow))
|
|
||||||
UserCanvasService.update_by_id(req["id"], {"dsl": req["dsl"]})
|
|
||||||
return get_json_result(data=req["dsl"])
|
|
||||||
except Exception as e:
|
|
||||||
return server_error_response(e)
|
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/upload/<canvas_id>", methods=["POST"]) # noqa: F821
|
|
||||||
def upload(canvas_id):
|
|
||||||
e, cvs = UserCanvasService.get_by_tenant_id(canvas_id)
|
|
||||||
if not e:
|
|
||||||
return get_data_error_result(message="canvas not found.")
|
|
||||||
|
|
||||||
user_id = cvs["user_id"]
|
|
||||||
|
|
||||||
def structured(filename, filetype, blob, content_type):
|
|
||||||
nonlocal user_id
|
|
||||||
if filetype == FileType.PDF.value:
|
|
||||||
blob = read_potential_broken_pdf(blob)
|
|
||||||
|
|
||||||
location = get_uuid()
|
|
||||||
FileService.put_blob(user_id, location, blob)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"id": location,
|
|
||||||
"name": filename,
|
|
||||||
"size": sys.getsizeof(blob),
|
|
||||||
"extension": filename.split(".")[-1].lower(),
|
|
||||||
"mime_type": content_type,
|
|
||||||
"created_by": user_id,
|
|
||||||
"created_at": time.time(),
|
|
||||||
"preview_url": None,
|
|
||||||
}
|
|
||||||
|
|
||||||
if request.args.get("url"):
|
|
||||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CrawlResult, DefaultMarkdownGenerator, PruningContentFilter
|
|
||||||
|
|
||||||
try:
|
|
||||||
url = request.args.get("url")
|
|
||||||
filename = re.sub(r"\?.*", "", url.split("/")[-1])
|
|
||||||
|
|
||||||
async def adownload():
|
|
||||||
browser_config = BrowserConfig(
|
|
||||||
headless=True,
|
|
||||||
verbose=False,
|
|
||||||
)
|
|
||||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
||||||
crawler_config = CrawlerRunConfig(markdown_generator=DefaultMarkdownGenerator(content_filter=PruningContentFilter()), pdf=True, screenshot=False)
|
|
||||||
result: CrawlResult = await crawler.arun(url=url, config=crawler_config)
|
|
||||||
return result
|
|
||||||
|
|
||||||
page = trio.run(adownload())
|
|
||||||
if page.pdf:
|
|
||||||
if filename.split(".")[-1].lower() != "pdf":
|
|
||||||
filename += ".pdf"
|
|
||||||
return get_json_result(data=structured(filename, "pdf", page.pdf, page.response_headers["content-type"]))
|
|
||||||
|
|
||||||
return get_json_result(data=structured(filename, "html", str(page.markdown).encode("utf-8"), page.response_headers["content-type"], user_id))
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return server_error_response(e)
|
|
||||||
|
|
||||||
file = request.files["file"]
|
|
||||||
try:
|
|
||||||
DocumentService.check_doc_health(user_id, file.filename)
|
|
||||||
return get_json_result(data=structured(file.filename, filename_type(file.filename), file.read(), file.content_type))
|
|
||||||
except Exception as e:
|
|
||||||
return server_error_response(e)
|
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/input_form", methods=["GET"]) # noqa: F821
|
|
||||||
@login_required
|
|
||||||
def input_form():
|
|
||||||
flow_id = request.args.get("id")
|
|
||||||
cpn_id = request.args.get("component_id")
|
|
||||||
try:
|
|
||||||
e, user_canvas = UserCanvasService.get_by_id(flow_id)
|
|
||||||
if not e:
|
|
||||||
return get_data_error_result(message="canvas not found.")
|
|
||||||
if not UserCanvasService.query(user_id=current_user.id, id=flow_id):
|
|
||||||
return get_json_result(data=False, message="Only owner of canvas authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
|
||||||
|
|
||||||
dataflow = Pipeline(dsl=json.dumps(user_canvas.dsl), tenant_id=current_user.id, flow_id=flow_id, task_id="")
|
|
||||||
|
|
||||||
return get_json_result(data=dataflow.get_component_input_form(cpn_id))
|
|
||||||
except Exception as e:
|
|
||||||
return server_error_response(e)
|
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/debug", methods=["POST"]) # noqa: F821
|
|
||||||
@validate_request("id", "component_id", "params")
|
|
||||||
@login_required
|
|
||||||
def debug():
|
|
||||||
req = request.json
|
|
||||||
if not UserCanvasService.accessible(req["id"], current_user.id):
|
|
||||||
return get_json_result(data=False, message="Only owner of canvas authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
|
||||||
try:
|
|
||||||
e, user_canvas = UserCanvasService.get_by_id(req["id"])
|
|
||||||
canvas = Canvas(json.dumps(user_canvas.dsl), current_user.id)
|
|
||||||
canvas.reset()
|
|
||||||
canvas.message_id = get_uuid()
|
|
||||||
component = canvas.get_component(req["component_id"])["obj"]
|
|
||||||
component.reset()
|
|
||||||
|
|
||||||
if isinstance(component, LLM):
|
|
||||||
component.set_debug_inputs(req["params"])
|
|
||||||
component.invoke(**{k: o["value"] for k, o in req["params"].items()})
|
|
||||||
outputs = component.output()
|
|
||||||
for k in outputs.keys():
|
|
||||||
if isinstance(outputs[k], partial):
|
|
||||||
txt = ""
|
|
||||||
for c in outputs[k]():
|
|
||||||
txt += c
|
|
||||||
outputs[k] = txt
|
|
||||||
return get_json_result(data=outputs)
|
|
||||||
except Exception as e:
|
|
||||||
return server_error_response(e)
|
|
||||||
|
|
||||||
|
|
||||||
# api get list version dsl of canvas
|
|
||||||
@manager.route("/getlistversion/<canvas_id>", methods=["GET"]) # noqa: F821
|
|
||||||
@login_required
|
|
||||||
def getlistversion(canvas_id):
|
|
||||||
try:
|
|
||||||
list = sorted([c.to_dict() for c in UserCanvasVersionService.list_by_canvas_id(canvas_id)], key=lambda x: x["update_time"] * -1)
|
|
||||||
return get_json_result(data=list)
|
|
||||||
except Exception as e:
|
|
||||||
return get_data_error_result(message=f"Error getting history files: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
# api get version dsl of canvas
|
|
||||||
@manager.route("/getversion/<version_id>", methods=["GET"]) # noqa: F821
|
|
||||||
@login_required
|
|
||||||
def getversion(version_id):
|
|
||||||
try:
|
|
||||||
e, version = UserCanvasVersionService.get_by_id(version_id)
|
|
||||||
if version:
|
|
||||||
return get_json_result(data=version.to_dict())
|
|
||||||
except Exception as e:
|
|
||||||
return get_json_result(data=f"Error getting history file: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/listteam", methods=["GET"]) # noqa: F821
|
|
||||||
@login_required
|
|
||||||
def list_canvas():
|
|
||||||
keywords = request.args.get("keywords", "")
|
|
||||||
page_number = int(request.args.get("page", 1))
|
|
||||||
items_per_page = int(request.args.get("page_size", 150))
|
|
||||||
orderby = request.args.get("orderby", "create_time")
|
|
||||||
desc = request.args.get("desc", True)
|
|
||||||
try:
|
|
||||||
tenants = TenantService.get_joined_tenants_by_user_id(current_user.id)
|
|
||||||
canvas, total = UserCanvasService.get_by_tenant_ids(
|
|
||||||
[m["tenant_id"] for m in tenants], current_user.id, page_number, items_per_page, orderby, desc, keywords, canvas_category=CanvasCategory.DataFlow
|
|
||||||
)
|
|
||||||
return get_json_result(data={"canvas": canvas, "total": total})
|
|
||||||
except Exception as e:
|
|
||||||
return server_error_response(e)
|
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/setting", methods=["POST"]) # noqa: F821
|
|
||||||
@validate_request("id", "title", "permission")
|
|
||||||
@login_required
|
|
||||||
def setting():
|
|
||||||
req = request.json
|
|
||||||
req["user_id"] = current_user.id
|
|
||||||
|
|
||||||
if not UserCanvasService.accessible(req["id"], current_user.id):
|
|
||||||
return get_json_result(data=False, message="Only owner of canvas authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
|
||||||
|
|
||||||
e, flow = UserCanvasService.get_by_id(req["id"])
|
|
||||||
if not e:
|
|
||||||
return get_data_error_result(message="canvas not found.")
|
|
||||||
flow = flow.to_dict()
|
|
||||||
flow["title"] = req["title"]
|
|
||||||
for key in ("description", "permission", "avatar"):
|
|
||||||
if value := req.get(key):
|
|
||||||
flow[key] = value
|
|
||||||
|
|
||||||
num = UserCanvasService.update_by_id(req["id"], flow)
|
|
||||||
return get_json_result(data=num)
|
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/trace", methods=["GET"]) # noqa: F821
|
|
||||||
def trace():
|
|
||||||
dataflow_id = request.args.get("dataflow_id")
|
|
||||||
task_id = request.args.get("task_id")
|
|
||||||
if not all([dataflow_id, task_id]):
|
|
||||||
return get_data_error_result(message="dataflow_id and task_id are required.")
|
|
||||||
|
|
||||||
e, dataflow_canvas = UserCanvasService.get_by_id(dataflow_id)
|
|
||||||
if not e:
|
|
||||||
return get_data_error_result(message="dataflow not found.")
|
|
||||||
|
|
||||||
dsl_str = json.dumps(dataflow_canvas.dsl, ensure_ascii=False)
|
|
||||||
dataflow = Pipeline(dsl=dsl_str, tenant_id=dataflow_canvas.user_id, flow_id=dataflow_id, task_id=task_id)
|
|
||||||
log = dataflow.fetch_logs()
|
|
||||||
|
|
||||||
return get_json_result(data=log)
|
|
||||||
@ -24,6 +24,7 @@ from flask import request
|
|||||||
from flask_login import current_user, login_required
|
from flask_login import current_user, login_required
|
||||||
|
|
||||||
from api import settings
|
from api import settings
|
||||||
|
from api.common.check_team_permission import check_kb_team_permission
|
||||||
from api.constants import FILE_NAME_LEN_LIMIT, IMG_BASE64_PREFIX
|
from api.constants import FILE_NAME_LEN_LIMIT, IMG_BASE64_PREFIX
|
||||||
from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileSource, FileType, ParserType, TaskStatus
|
from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileSource, FileType, ParserType, TaskStatus
|
||||||
from api.db.db_models import File, Task
|
from api.db.db_models import File, Task
|
||||||
@ -32,7 +33,7 @@ from api.db.services.document_service import DocumentService, doc_upload_and_par
|
|||||||
from api.db.services.file2document_service import File2DocumentService
|
from api.db.services.file2document_service import File2DocumentService
|
||||||
from api.db.services.file_service import FileService
|
from api.db.services.file_service import FileService
|
||||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||||
from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks
|
from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks, queue_dataflow
|
||||||
from api.db.services.user_service import UserTenantService
|
from api.db.services.user_service import UserTenantService
|
||||||
from api.utils import get_uuid
|
from api.utils import get_uuid
|
||||||
from api.utils.api_utils import (
|
from api.utils.api_utils import (
|
||||||
@ -44,7 +45,7 @@ from api.utils.api_utils import (
|
|||||||
from api.utils.file_utils import filename_type, get_project_base_directory, thumbnail
|
from api.utils.file_utils import filename_type, get_project_base_directory, thumbnail
|
||||||
from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url
|
from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url
|
||||||
from deepdoc.parser.html_parser import RAGFlowHtmlParser
|
from deepdoc.parser.html_parser import RAGFlowHtmlParser
|
||||||
from rag.nlp import search
|
from rag.nlp import search, rag_tokenizer
|
||||||
from rag.utils.storage_factory import STORAGE_IMPL
|
from rag.utils.storage_factory import STORAGE_IMPL
|
||||||
|
|
||||||
|
|
||||||
@ -68,8 +69,10 @@ def upload():
|
|||||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
if not e:
|
if not e:
|
||||||
raise LookupError("Can't find this knowledgebase!")
|
raise LookupError("Can't find this knowledgebase!")
|
||||||
err, files = FileService.upload_document(kb, file_objs, current_user.id)
|
if not check_kb_team_permission(kb, current_user.id):
|
||||||
|
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||||
|
|
||||||
|
err, files = FileService.upload_document(kb, file_objs, current_user.id)
|
||||||
if err:
|
if err:
|
||||||
return get_json_result(data=files, message="\n".join(err), code=settings.RetCode.SERVER_ERROR)
|
return get_json_result(data=files, message="\n".join(err), code=settings.RetCode.SERVER_ERROR)
|
||||||
|
|
||||||
@ -94,6 +97,8 @@ def web_crawl():
|
|||||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
if not e:
|
if not e:
|
||||||
raise LookupError("Can't find this knowledgebase!")
|
raise LookupError("Can't find this knowledgebase!")
|
||||||
|
if check_kb_team_permission(kb, current_user.id):
|
||||||
|
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||||
|
|
||||||
blob = html2pdf(url)
|
blob = html2pdf(url)
|
||||||
if not blob:
|
if not blob:
|
||||||
@ -182,6 +187,7 @@ def create():
|
|||||||
"id": get_uuid(),
|
"id": get_uuid(),
|
||||||
"kb_id": kb.id,
|
"kb_id": kb.id,
|
||||||
"parser_id": kb.parser_id,
|
"parser_id": kb.parser_id,
|
||||||
|
"pipeline_id": kb.pipeline_id,
|
||||||
"parser_config": kb.parser_config,
|
"parser_config": kb.parser_config,
|
||||||
"created_by": current_user.id,
|
"created_by": current_user.id,
|
||||||
"type": FileType.VIRTUAL,
|
"type": FileType.VIRTUAL,
|
||||||
@ -479,8 +485,11 @@ def run():
|
|||||||
kb_table_num_map[kb_id] = count
|
kb_table_num_map[kb_id] = count
|
||||||
if kb_table_num_map[kb_id] <= 0:
|
if kb_table_num_map[kb_id] <= 0:
|
||||||
KnowledgebaseService.delete_field_map(kb_id)
|
KnowledgebaseService.delete_field_map(kb_id)
|
||||||
bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"])
|
if doc.get("pipeline_id", ""):
|
||||||
queue_tasks(doc, bucket, name, 0)
|
queue_dataflow(tenant_id, flow_id=doc["pipeline_id"], task_id=get_uuid(), doc_id=id)
|
||||||
|
else:
|
||||||
|
bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"])
|
||||||
|
queue_tasks(doc, bucket, name, 0)
|
||||||
|
|
||||||
return get_json_result(data=True)
|
return get_json_result(data=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -515,6 +524,21 @@ def rename():
|
|||||||
e, file = FileService.get_by_id(informs[0].file_id)
|
e, file = FileService.get_by_id(informs[0].file_id)
|
||||||
FileService.update_by_id(file.id, {"name": req["name"]})
|
FileService.update_by_id(file.id, {"name": req["name"]})
|
||||||
|
|
||||||
|
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
||||||
|
title_tks = rag_tokenizer.tokenize(req["name"])
|
||||||
|
es_body = {
|
||||||
|
"docnm_kwd": req["name"],
|
||||||
|
"title_tks": title_tks,
|
||||||
|
"title_sm_tks": rag_tokenizer.fine_grained_tokenize(title_tks),
|
||||||
|
}
|
||||||
|
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
|
||||||
|
settings.docStoreConn.update(
|
||||||
|
{"doc_id": req["doc_id"]},
|
||||||
|
es_body,
|
||||||
|
search.index_name(tenant_id),
|
||||||
|
doc.kb_id,
|
||||||
|
)
|
||||||
|
|
||||||
return get_json_result(data=True)
|
return get_json_result(data=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
@ -546,31 +570,22 @@ def get(doc_id):
|
|||||||
|
|
||||||
@manager.route("/change_parser", methods=["POST"]) # noqa: F821
|
@manager.route("/change_parser", methods=["POST"]) # noqa: F821
|
||||||
@login_required
|
@login_required
|
||||||
@validate_request("doc_id", "parser_id")
|
@validate_request("doc_id")
|
||||||
def change_parser():
|
def change_parser():
|
||||||
req = request.json
|
|
||||||
|
|
||||||
|
req = request.json
|
||||||
if not DocumentService.accessible(req["doc_id"], current_user.id):
|
if not DocumentService.accessible(req["doc_id"], current_user.id):
|
||||||
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
|
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||||
try:
|
|
||||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||||
|
if not e:
|
||||||
|
return get_data_error_result(message="Document not found!")
|
||||||
|
|
||||||
|
def reset_doc():
|
||||||
|
nonlocal doc
|
||||||
|
e = DocumentService.update_by_id(doc.id, {"pipeline_id": req["pipeline_id"], "parser_id": req["parser_id"], "progress": 0, "progress_msg": "", "run": TaskStatus.UNSTART.value})
|
||||||
if not e:
|
if not e:
|
||||||
return get_data_error_result(message="Document not found!")
|
return get_data_error_result(message="Document not found!")
|
||||||
if doc.parser_id.lower() == req["parser_id"].lower():
|
|
||||||
if "parser_config" in req:
|
|
||||||
if req["parser_config"] == doc.parser_config:
|
|
||||||
return get_json_result(data=True)
|
|
||||||
else:
|
|
||||||
return get_json_result(data=True)
|
|
||||||
|
|
||||||
if (doc.type == FileType.VISUAL and req["parser_id"] != "picture") or (re.search(r"\.(ppt|pptx|pages)$", doc.name) and req["parser_id"] != "presentation"):
|
|
||||||
return get_data_error_result(message="Not supported yet!")
|
|
||||||
|
|
||||||
e = DocumentService.update_by_id(doc.id, {"parser_id": req["parser_id"], "progress": 0, "progress_msg": "", "run": TaskStatus.UNSTART.value})
|
|
||||||
if not e:
|
|
||||||
return get_data_error_result(message="Document not found!")
|
|
||||||
if "parser_config" in req:
|
|
||||||
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
|
||||||
if doc.token_num > 0:
|
if doc.token_num > 0:
|
||||||
e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, doc.process_duration * -1)
|
e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, doc.process_duration * -1)
|
||||||
if not e:
|
if not e:
|
||||||
@ -581,6 +596,26 @@ def change_parser():
|
|||||||
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
|
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
|
||||||
settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
|
settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if "pipeline_id" in req and req["pipeline_id"] != "":
|
||||||
|
if doc.pipeline_id == req["pipeline_id"]:
|
||||||
|
return get_json_result(data=True)
|
||||||
|
DocumentService.update_by_id(doc.id, {"pipeline_id": req["pipeline_id"]})
|
||||||
|
reset_doc()
|
||||||
|
return get_json_result(data=True)
|
||||||
|
|
||||||
|
if doc.parser_id.lower() == req["parser_id"].lower():
|
||||||
|
if "parser_config" in req:
|
||||||
|
if req["parser_config"] == doc.parser_config:
|
||||||
|
return get_json_result(data=True)
|
||||||
|
else:
|
||||||
|
return get_json_result(data=True)
|
||||||
|
|
||||||
|
if (doc.type == FileType.VISUAL and req["parser_id"] != "picture") or (re.search(r"\.(ppt|pptx|pages)$", doc.name) and req["parser_id"] != "presentation"):
|
||||||
|
return get_data_error_result(message="Not supported yet!")
|
||||||
|
if "parser_config" in req:
|
||||||
|
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
||||||
|
reset_doc()
|
||||||
return get_json_result(data=True)
|
return get_json_result(data=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|||||||
@ -13,6 +13,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License
|
# limitations under the License
|
||||||
#
|
#
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import re
|
import re
|
||||||
@ -21,6 +22,7 @@ import flask
|
|||||||
from flask import request
|
from flask import request
|
||||||
from flask_login import login_required, current_user
|
from flask_login import login_required, current_user
|
||||||
|
|
||||||
|
from api.common.check_team_permission import check_file_team_permission
|
||||||
from api.db.services.document_service import DocumentService
|
from api.db.services.document_service import DocumentService
|
||||||
from api.db.services.file2document_service import File2DocumentService
|
from api.db.services.file2document_service import File2DocumentService
|
||||||
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
||||||
@ -233,52 +235,63 @@ def get_all_parent_folders():
|
|||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/rm', methods=['POST']) # noqa: F821
|
@manager.route("/rm", methods=["POST"]) # noqa: F821
|
||||||
@login_required
|
@login_required
|
||||||
@validate_request("file_ids")
|
@validate_request("file_ids")
|
||||||
def rm():
|
def rm():
|
||||||
req = request.json
|
req = request.json
|
||||||
file_ids = req["file_ids"]
|
file_ids = req["file_ids"]
|
||||||
|
|
||||||
|
def _delete_single_file(file):
|
||||||
|
try:
|
||||||
|
if file.location:
|
||||||
|
STORAGE_IMPL.rm(file.parent_id, file.location)
|
||||||
|
except Exception:
|
||||||
|
logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}")
|
||||||
|
|
||||||
|
informs = File2DocumentService.get_by_file_id(file.id)
|
||||||
|
for inform in informs:
|
||||||
|
doc_id = inform.document_id
|
||||||
|
e, doc = DocumentService.get_by_id(doc_id)
|
||||||
|
if e and doc:
|
||||||
|
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||||
|
if tenant_id:
|
||||||
|
DocumentService.remove_document(doc, tenant_id)
|
||||||
|
File2DocumentService.delete_by_file_id(file.id)
|
||||||
|
|
||||||
|
FileService.delete(file)
|
||||||
|
|
||||||
|
def _delete_folder_recursive(folder, tenant_id):
|
||||||
|
sub_files = FileService.list_all_files_by_parent_id(folder.id)
|
||||||
|
for sub_file in sub_files:
|
||||||
|
if sub_file.type == FileType.FOLDER.value:
|
||||||
|
_delete_folder_recursive(sub_file, tenant_id)
|
||||||
|
else:
|
||||||
|
_delete_single_file(sub_file)
|
||||||
|
|
||||||
|
FileService.delete(folder)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for file_id in file_ids:
|
for file_id in file_ids:
|
||||||
e, file = FileService.get_by_id(file_id)
|
e, file = FileService.get_by_id(file_id)
|
||||||
if not e:
|
if not e or not file:
|
||||||
return get_data_error_result(message="File or Folder not found!")
|
return get_data_error_result(message="File or Folder not found!")
|
||||||
if not file.tenant_id:
|
if not file.tenant_id:
|
||||||
return get_data_error_result(message="Tenant not found!")
|
return get_data_error_result(message="Tenant not found!")
|
||||||
|
if not check_file_team_permission(file, current_user.id):
|
||||||
|
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||||
|
|
||||||
if file.source_type == FileSource.KNOWLEDGEBASE:
|
if file.source_type == FileSource.KNOWLEDGEBASE:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if file.type == FileType.FOLDER.value:
|
if file.type == FileType.FOLDER.value:
|
||||||
file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
|
_delete_folder_recursive(file, current_user.id)
|
||||||
for inner_file_id in file_id_list:
|
continue
|
||||||
e, file = FileService.get_by_id(inner_file_id)
|
|
||||||
if not e:
|
|
||||||
return get_data_error_result(message="File not found!")
|
|
||||||
STORAGE_IMPL.rm(file.parent_id, file.location)
|
|
||||||
FileService.delete_folder_by_pf_id(current_user.id, file_id)
|
|
||||||
else:
|
|
||||||
STORAGE_IMPL.rm(file.parent_id, file.location)
|
|
||||||
if not FileService.delete(file):
|
|
||||||
return get_data_error_result(
|
|
||||||
message="Database error (File removal)!")
|
|
||||||
|
|
||||||
# delete file2document
|
_delete_single_file(file)
|
||||||
informs = File2DocumentService.get_by_file_id(file_id)
|
|
||||||
for inform in informs:
|
|
||||||
doc_id = inform.document_id
|
|
||||||
e, doc = DocumentService.get_by_id(doc_id)
|
|
||||||
if not e:
|
|
||||||
return get_data_error_result(message="Document not found!")
|
|
||||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
|
||||||
if not tenant_id:
|
|
||||||
return get_data_error_result(message="Tenant not found!")
|
|
||||||
if not DocumentService.remove_document(doc, tenant_id):
|
|
||||||
return get_data_error_result(
|
|
||||||
message="Database error (Document removal)!")
|
|
||||||
File2DocumentService.delete_by_file_id(file_id)
|
|
||||||
|
|
||||||
return get_json_result(data=True)
|
return get_json_result(data=True)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
@ -292,6 +305,8 @@ def rename():
|
|||||||
e, file = FileService.get_by_id(req["file_id"])
|
e, file = FileService.get_by_id(req["file_id"])
|
||||||
if not e:
|
if not e:
|
||||||
return get_data_error_result(message="File not found!")
|
return get_data_error_result(message="File not found!")
|
||||||
|
if not check_file_team_permission(file, current_user.id):
|
||||||
|
return get_json_result(data=False, message='No authorization.', code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||||
if file.type != FileType.FOLDER.value \
|
if file.type != FileType.FOLDER.value \
|
||||||
and pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
|
and pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
|
||||||
file.name.lower()).suffix:
|
file.name.lower()).suffix:
|
||||||
@ -328,6 +343,8 @@ def get(file_id):
|
|||||||
e, file = FileService.get_by_id(file_id)
|
e, file = FileService.get_by_id(file_id)
|
||||||
if not e:
|
if not e:
|
||||||
return get_data_error_result(message="Document not found!")
|
return get_data_error_result(message="Document not found!")
|
||||||
|
if not check_file_team_permission(file, current_user.id):
|
||||||
|
return get_json_result(data=False, message='No authorization.', code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||||
|
|
||||||
blob = STORAGE_IMPL.get(file.parent_id, file.location)
|
blob = STORAGE_IMPL.get(file.parent_id, file.location)
|
||||||
if not blob:
|
if not blob:
|
||||||
@ -348,29 +365,89 @@ def get(file_id):
|
|||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/mv', methods=['POST']) # noqa: F821
|
@manager.route("/mv", methods=["POST"]) # noqa: F821
|
||||||
@login_required
|
@login_required
|
||||||
@validate_request("src_file_ids", "dest_file_id")
|
@validate_request("src_file_ids", "dest_file_id")
|
||||||
def move():
|
def move():
|
||||||
req = request.json
|
req = request.json
|
||||||
try:
|
try:
|
||||||
file_ids = req["src_file_ids"]
|
file_ids = req["src_file_ids"]
|
||||||
parent_id = req["dest_file_id"]
|
dest_parent_id = req["dest_file_id"]
|
||||||
|
|
||||||
|
ok, dest_folder = FileService.get_by_id(dest_parent_id)
|
||||||
|
if not ok or not dest_folder:
|
||||||
|
return get_data_error_result(message="Parent Folder not found!")
|
||||||
|
|
||||||
files = FileService.get_by_ids(file_ids)
|
files = FileService.get_by_ids(file_ids)
|
||||||
files_dict = {}
|
if not files:
|
||||||
for file in files:
|
return get_data_error_result(message="Source files not found!")
|
||||||
files_dict[file.id] = file
|
|
||||||
|
files_dict = {f.id: f for f in files}
|
||||||
|
|
||||||
for file_id in file_ids:
|
for file_id in file_ids:
|
||||||
file = files_dict[file_id]
|
file = files_dict.get(file_id)
|
||||||
if not file:
|
if not file:
|
||||||
return get_data_error_result(message="File or Folder not found!")
|
return get_data_error_result(message="File or Folder not found!")
|
||||||
if not file.tenant_id:
|
if not file.tenant_id:
|
||||||
return get_data_error_result(message="Tenant not found!")
|
return get_data_error_result(message="Tenant not found!")
|
||||||
fe, _ = FileService.get_by_id(parent_id)
|
if not check_file_team_permission(file, current_user.id):
|
||||||
if not fe:
|
return get_json_result(
|
||||||
return get_data_error_result(message="Parent Folder not found!")
|
data=False,
|
||||||
FileService.move_file(file_ids, parent_id)
|
message="No authorization.",
|
||||||
|
code=settings.RetCode.AUTHENTICATION_ERROR,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _move_entry_recursive(source_file_entry, dest_folder):
|
||||||
|
if source_file_entry.type == FileType.FOLDER.value:
|
||||||
|
existing_folder = FileService.query(name=source_file_entry.name, parent_id=dest_folder.id)
|
||||||
|
if existing_folder:
|
||||||
|
new_folder = existing_folder[0]
|
||||||
|
else:
|
||||||
|
new_folder = FileService.insert(
|
||||||
|
{
|
||||||
|
"id": get_uuid(),
|
||||||
|
"parent_id": dest_folder.id,
|
||||||
|
"tenant_id": source_file_entry.tenant_id,
|
||||||
|
"created_by": current_user.id,
|
||||||
|
"name": source_file_entry.name,
|
||||||
|
"location": "",
|
||||||
|
"size": 0,
|
||||||
|
"type": FileType.FOLDER.value,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
sub_files = FileService.list_all_files_by_parent_id(source_file_entry.id)
|
||||||
|
for sub_file in sub_files:
|
||||||
|
_move_entry_recursive(sub_file, new_folder)
|
||||||
|
|
||||||
|
FileService.delete_by_id(source_file_entry.id)
|
||||||
|
return
|
||||||
|
|
||||||
|
old_parent_id = source_file_entry.parent_id
|
||||||
|
old_location = source_file_entry.location
|
||||||
|
filename = source_file_entry.name
|
||||||
|
|
||||||
|
new_location = filename
|
||||||
|
while STORAGE_IMPL.obj_exist(dest_folder.id, new_location):
|
||||||
|
new_location += "_"
|
||||||
|
|
||||||
|
try:
|
||||||
|
STORAGE_IMPL.move(old_parent_id, old_location, dest_folder.id, new_location)
|
||||||
|
except Exception as storage_err:
|
||||||
|
raise RuntimeError(f"Move file failed at storage layer: {str(storage_err)}")
|
||||||
|
|
||||||
|
FileService.update_by_id(
|
||||||
|
source_file_entry.id,
|
||||||
|
{
|
||||||
|
"parent_id": dest_folder.id,
|
||||||
|
"location": new_location,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
_move_entry_recursive(file, dest_folder)
|
||||||
|
|
||||||
return get_json_result(data=True)
|
return get_json_result(data=True)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|||||||
@ -14,18 +14,21 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
|
|
||||||
from flask import request
|
from flask import request
|
||||||
from flask_login import login_required, current_user
|
from flask_login import login_required, current_user
|
||||||
|
|
||||||
from api.db.services import duplicate_name
|
from api.db.services import duplicate_name
|
||||||
from api.db.services.document_service import DocumentService
|
from api.db.services.document_service import DocumentService, queue_raptor_o_graphrag_tasks
|
||||||
from api.db.services.file2document_service import File2DocumentService
|
from api.db.services.file2document_service import File2DocumentService
|
||||||
from api.db.services.file_service import FileService
|
from api.db.services.file_service import FileService
|
||||||
|
from api.db.services.pipeline_operation_log_service import PipelineOperationLogService
|
||||||
|
from api.db.services.task_service import TaskService, GRAPH_RAPTOR_FAKE_DOC_ID
|
||||||
from api.db.services.user_service import TenantService, UserTenantService
|
from api.db.services.user_service import TenantService, UserTenantService
|
||||||
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request, not_allowed_parameters, active_required
|
from api.utils.api_utils import get_error_data_result, server_error_response, get_data_error_result, validate_request, not_allowed_parameters
|
||||||
from api.utils import get_uuid
|
from api.utils import get_uuid
|
||||||
from api.db import StatusEnum, FileSource
|
from api.db import PipelineTaskType, StatusEnum, FileSource, VALID_FILE_TYPES, VALID_TASK_STATUS
|
||||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||||
from api.db.db_models import File
|
from api.db.db_models import File
|
||||||
from api.utils.api_utils import get_json_result
|
from api.utils.api_utils import get_json_result
|
||||||
@ -33,12 +36,12 @@ from api import settings
|
|||||||
from rag.nlp import search
|
from rag.nlp import search
|
||||||
from api.constants import DATASET_NAME_LIMIT
|
from api.constants import DATASET_NAME_LIMIT
|
||||||
from rag.settings import PAGERANK_FLD
|
from rag.settings import PAGERANK_FLD
|
||||||
|
from rag.utils.redis_conn import REDIS_CONN
|
||||||
from rag.utils.storage_factory import STORAGE_IMPL
|
from rag.utils.storage_factory import STORAGE_IMPL
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/create', methods=['post']) # noqa: F821
|
@manager.route('/create', methods=['post']) # noqa: F821
|
||||||
@login_required
|
@login_required
|
||||||
@active_required
|
|
||||||
@validate_request("name")
|
@validate_request("name")
|
||||||
def create():
|
def create():
|
||||||
req = request.json
|
req = request.json
|
||||||
@ -62,10 +65,40 @@ def create():
|
|||||||
req["name"] = dataset_name
|
req["name"] = dataset_name
|
||||||
req["tenant_id"] = current_user.id
|
req["tenant_id"] = current_user.id
|
||||||
req["created_by"] = current_user.id
|
req["created_by"] = current_user.id
|
||||||
|
if not req.get("parser_id"):
|
||||||
|
req["parser_id"] = "naive"
|
||||||
e, t = TenantService.get_by_id(current_user.id)
|
e, t = TenantService.get_by_id(current_user.id)
|
||||||
if not e:
|
if not e:
|
||||||
return get_data_error_result(message="Tenant not found.")
|
return get_data_error_result(message="Tenant not found.")
|
||||||
req["embd_id"] = t.embd_id
|
|
||||||
|
req["parser_config"] = {
|
||||||
|
"layout_recognize": "DeepDOC",
|
||||||
|
"chunk_token_num": 512,
|
||||||
|
"delimiter": "\n",
|
||||||
|
"auto_keywords": 0,
|
||||||
|
"auto_questions": 0,
|
||||||
|
"html4excel": False,
|
||||||
|
"topn_tags": 3,
|
||||||
|
"raptor": {
|
||||||
|
"use_raptor": True,
|
||||||
|
"prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.",
|
||||||
|
"max_token": 256,
|
||||||
|
"threshold": 0.1,
|
||||||
|
"max_cluster": 64,
|
||||||
|
"random_seed": 0
|
||||||
|
},
|
||||||
|
"graphrag": {
|
||||||
|
"use_graphrag": True,
|
||||||
|
"entity_types": [
|
||||||
|
"organization",
|
||||||
|
"person",
|
||||||
|
"geo",
|
||||||
|
"event",
|
||||||
|
"category"
|
||||||
|
],
|
||||||
|
"method": "light"
|
||||||
|
}
|
||||||
|
}
|
||||||
if not KnowledgebaseService.save(**req):
|
if not KnowledgebaseService.save(**req):
|
||||||
return get_data_error_result()
|
return get_data_error_result()
|
||||||
return get_json_result(data={"kb_id": req["id"]})
|
return get_json_result(data={"kb_id": req["id"]})
|
||||||
@ -156,6 +189,9 @@ def detail():
|
|||||||
return get_data_error_result(
|
return get_data_error_result(
|
||||||
message="Can't find this knowledgebase!")
|
message="Can't find this knowledgebase!")
|
||||||
kb["size"] = DocumentService.get_total_size_by_kb_id(kb_id=kb["id"],keywords="", run_status=[], types=[])
|
kb["size"] = DocumentService.get_total_size_by_kb_id(kb_id=kb["id"],keywords="", run_status=[], types=[])
|
||||||
|
for key in ["graphrag_task_finish_at", "raptor_task_finish_at", "mindmap_task_finish_at"]:
|
||||||
|
if finish_at := kb.get(key):
|
||||||
|
kb[key] = finish_at.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
return get_json_result(data=kb)
|
return get_json_result(data=kb)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
@ -251,7 +287,7 @@ def list_tags(kb_id):
|
|||||||
tenants = UserTenantService.get_tenants_by_user_id(current_user.id)
|
tenants = UserTenantService.get_tenants_by_user_id(current_user.id)
|
||||||
tags = []
|
tags = []
|
||||||
for tenant in tenants:
|
for tenant in tenants:
|
||||||
tags += settings.retrievaler.all_tags(tenant["tenant_id"], [kb_id])
|
tags += settings.retriever.all_tags(tenant["tenant_id"], [kb_id])
|
||||||
return get_json_result(data=tags)
|
return get_json_result(data=tags)
|
||||||
|
|
||||||
|
|
||||||
@ -270,7 +306,7 @@ def list_tags_from_kbs():
|
|||||||
tenants = UserTenantService.get_tenants_by_user_id(current_user.id)
|
tenants = UserTenantService.get_tenants_by_user_id(current_user.id)
|
||||||
tags = []
|
tags = []
|
||||||
for tenant in tenants:
|
for tenant in tenants:
|
||||||
tags += settings.retrievaler.all_tags(tenant["tenant_id"], kb_ids)
|
tags += settings.retriever.all_tags(tenant["tenant_id"], kb_ids)
|
||||||
return get_json_result(data=tags)
|
return get_json_result(data=tags)
|
||||||
|
|
||||||
|
|
||||||
@ -331,7 +367,7 @@ def knowledge_graph(kb_id):
|
|||||||
obj = {"graph": {}, "mind_map": {}}
|
obj = {"graph": {}, "mind_map": {}}
|
||||||
if not settings.docStoreConn.indexExist(search.index_name(kb.tenant_id), kb_id):
|
if not settings.docStoreConn.indexExist(search.index_name(kb.tenant_id), kb_id):
|
||||||
return get_json_result(data=obj)
|
return get_json_result(data=obj)
|
||||||
sres = settings.retrievaler.search(req, search.index_name(kb.tenant_id), [kb_id])
|
sres = settings.retriever.search(req, search.index_name(kb.tenant_id), [kb_id])
|
||||||
if not len(sres.ids):
|
if not len(sres.ids):
|
||||||
return get_json_result(data=obj)
|
return get_json_result(data=obj)
|
||||||
|
|
||||||
@ -396,3 +432,359 @@ def get_basic_info():
|
|||||||
basic_info = DocumentService.knowledgebase_basic_info(kb_id)
|
basic_info = DocumentService.knowledgebase_basic_info(kb_id)
|
||||||
|
|
||||||
return get_json_result(data=basic_info)
|
return get_json_result(data=basic_info)
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/list_pipeline_logs", methods=["POST"]) # noqa: F821
|
||||||
|
@login_required
|
||||||
|
def list_pipeline_logs():
|
||||||
|
kb_id = request.args.get("kb_id")
|
||||||
|
if not kb_id:
|
||||||
|
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
|
|
||||||
|
keywords = request.args.get("keywords", "")
|
||||||
|
|
||||||
|
page_number = int(request.args.get("page", 0))
|
||||||
|
items_per_page = int(request.args.get("page_size", 0))
|
||||||
|
orderby = request.args.get("orderby", "create_time")
|
||||||
|
if request.args.get("desc", "true").lower() == "false":
|
||||||
|
desc = False
|
||||||
|
else:
|
||||||
|
desc = True
|
||||||
|
create_date_from = request.args.get("create_date_from", "")
|
||||||
|
create_date_to = request.args.get("create_date_to", "")
|
||||||
|
if create_date_to > create_date_from:
|
||||||
|
return get_data_error_result(message="Create data filter is abnormal.")
|
||||||
|
|
||||||
|
req = request.get_json()
|
||||||
|
|
||||||
|
operation_status = req.get("operation_status", [])
|
||||||
|
if operation_status:
|
||||||
|
invalid_status = {s for s in operation_status if s not in VALID_TASK_STATUS}
|
||||||
|
if invalid_status:
|
||||||
|
return get_data_error_result(message=f"Invalid filter operation_status status conditions: {', '.join(invalid_status)}")
|
||||||
|
|
||||||
|
types = req.get("types", [])
|
||||||
|
if types:
|
||||||
|
invalid_types = {t for t in types if t not in VALID_FILE_TYPES}
|
||||||
|
if invalid_types:
|
||||||
|
return get_data_error_result(message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}")
|
||||||
|
|
||||||
|
suffix = req.get("suffix", [])
|
||||||
|
|
||||||
|
try:
|
||||||
|
logs, tol = PipelineOperationLogService.get_file_logs_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, keywords, operation_status, types, suffix, create_date_from, create_date_to)
|
||||||
|
return get_json_result(data={"total": tol, "logs": logs})
|
||||||
|
except Exception as e:
|
||||||
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/list_pipeline_dataset_logs", methods=["POST"]) # noqa: F821
|
||||||
|
@login_required
|
||||||
|
def list_pipeline_dataset_logs():
|
||||||
|
kb_id = request.args.get("kb_id")
|
||||||
|
if not kb_id:
|
||||||
|
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
|
|
||||||
|
page_number = int(request.args.get("page", 0))
|
||||||
|
items_per_page = int(request.args.get("page_size", 0))
|
||||||
|
orderby = request.args.get("orderby", "create_time")
|
||||||
|
if request.args.get("desc", "true").lower() == "false":
|
||||||
|
desc = False
|
||||||
|
else:
|
||||||
|
desc = True
|
||||||
|
create_date_from = request.args.get("create_date_from", "")
|
||||||
|
create_date_to = request.args.get("create_date_to", "")
|
||||||
|
if create_date_to > create_date_from:
|
||||||
|
return get_data_error_result(message="Create data filter is abnormal.")
|
||||||
|
|
||||||
|
req = request.get_json()
|
||||||
|
|
||||||
|
operation_status = req.get("operation_status", [])
|
||||||
|
if operation_status:
|
||||||
|
invalid_status = {s for s in operation_status if s not in VALID_TASK_STATUS}
|
||||||
|
if invalid_status:
|
||||||
|
return get_data_error_result(message=f"Invalid filter operation_status status conditions: {', '.join(invalid_status)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
logs, tol = PipelineOperationLogService.get_dataset_logs_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, operation_status, create_date_from, create_date_to)
|
||||||
|
return get_json_result(data={"total": tol, "logs": logs})
|
||||||
|
except Exception as e:
|
||||||
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/delete_pipeline_logs", methods=["POST"]) # noqa: F821
|
||||||
|
@login_required
|
||||||
|
def delete_pipeline_logs():
|
||||||
|
kb_id = request.args.get("kb_id")
|
||||||
|
if not kb_id:
|
||||||
|
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
|
|
||||||
|
req = request.get_json()
|
||||||
|
log_ids = req.get("log_ids", [])
|
||||||
|
|
||||||
|
PipelineOperationLogService.delete_by_ids(log_ids)
|
||||||
|
|
||||||
|
return get_json_result(data=True)
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/pipeline_log_detail", methods=["GET"]) # noqa: F821
|
||||||
|
@login_required
|
||||||
|
def pipeline_log_detail():
|
||||||
|
log_id = request.args.get("log_id")
|
||||||
|
if not log_id:
|
||||||
|
return get_json_result(data=False, message='Lack of "Pipeline log ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
|
|
||||||
|
ok, log = PipelineOperationLogService.get_by_id(log_id)
|
||||||
|
if not ok:
|
||||||
|
return get_data_error_result(message="Invalid pipeline log ID")
|
||||||
|
|
||||||
|
return get_json_result(data=log.to_dict())
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/run_graphrag", methods=["POST"]) # noqa: F821
|
||||||
|
@login_required
|
||||||
|
def run_graphrag():
|
||||||
|
req = request.json
|
||||||
|
|
||||||
|
kb_id = req.get("kb_id", "")
|
||||||
|
if not kb_id:
|
||||||
|
return get_error_data_result(message='Lack of "KB ID"')
|
||||||
|
|
||||||
|
ok, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
|
if not ok:
|
||||||
|
return get_error_data_result(message="Invalid Knowledgebase ID")
|
||||||
|
|
||||||
|
task_id = kb.graphrag_task_id
|
||||||
|
if task_id:
|
||||||
|
ok, task = TaskService.get_by_id(task_id)
|
||||||
|
if not ok:
|
||||||
|
logging.warning(f"A valid GraphRAG task id is expected for kb {kb_id}")
|
||||||
|
|
||||||
|
if task and task.progress not in [-1, 1]:
|
||||||
|
return get_error_data_result(message=f"Task {task_id} in progress with status {task.progress}. A Graph Task is already running.")
|
||||||
|
|
||||||
|
documents, _ = DocumentService.get_by_kb_id(
|
||||||
|
kb_id=kb_id,
|
||||||
|
page_number=0,
|
||||||
|
items_per_page=0,
|
||||||
|
orderby="create_time",
|
||||||
|
desc=False,
|
||||||
|
keywords="",
|
||||||
|
run_status=[],
|
||||||
|
types=[],
|
||||||
|
suffix=[],
|
||||||
|
)
|
||||||
|
if not documents:
|
||||||
|
return get_error_data_result(message=f"No documents in Knowledgebase {kb_id}")
|
||||||
|
|
||||||
|
sample_document = documents[0]
|
||||||
|
document_ids = [document["id"] for document in documents]
|
||||||
|
|
||||||
|
task_id = queue_raptor_o_graphrag_tasks(sample_doc_id=sample_document, ty="graphrag", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
|
||||||
|
|
||||||
|
if not KnowledgebaseService.update_by_id(kb.id, {"graphrag_task_id": task_id}):
|
||||||
|
logging.warning(f"Cannot save graphrag_task_id for kb {kb_id}")
|
||||||
|
|
||||||
|
return get_json_result(data={"graphrag_task_id": task_id})
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/trace_graphrag", methods=["GET"]) # noqa: F821
|
||||||
|
@login_required
|
||||||
|
def trace_graphrag():
|
||||||
|
kb_id = request.args.get("kb_id", "")
|
||||||
|
if not kb_id:
|
||||||
|
return get_error_data_result(message='Lack of "KB ID"')
|
||||||
|
|
||||||
|
ok, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
|
if not ok:
|
||||||
|
return get_error_data_result(message="Invalid Knowledgebase ID")
|
||||||
|
|
||||||
|
task_id = kb.graphrag_task_id
|
||||||
|
if not task_id:
|
||||||
|
return get_json_result(data={})
|
||||||
|
|
||||||
|
ok, task = TaskService.get_by_id(task_id)
|
||||||
|
if not ok:
|
||||||
|
return get_error_data_result(message="GraphRAG Task Not Found or Error Occurred")
|
||||||
|
|
||||||
|
return get_json_result(data=task.to_dict())
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/run_raptor", methods=["POST"]) # noqa: F821
|
||||||
|
@login_required
|
||||||
|
def run_raptor():
|
||||||
|
req = request.json
|
||||||
|
|
||||||
|
kb_id = req.get("kb_id", "")
|
||||||
|
if not kb_id:
|
||||||
|
return get_error_data_result(message='Lack of "KB ID"')
|
||||||
|
|
||||||
|
ok, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
|
if not ok:
|
||||||
|
return get_error_data_result(message="Invalid Knowledgebase ID")
|
||||||
|
|
||||||
|
task_id = kb.raptor_task_id
|
||||||
|
if task_id:
|
||||||
|
ok, task = TaskService.get_by_id(task_id)
|
||||||
|
if not ok:
|
||||||
|
logging.warning(f"A valid RAPTOR task id is expected for kb {kb_id}")
|
||||||
|
|
||||||
|
if task and task.progress not in [-1, 1]:
|
||||||
|
return get_error_data_result(message=f"Task {task_id} in progress with status {task.progress}. A RAPTOR Task is already running.")
|
||||||
|
|
||||||
|
documents, _ = DocumentService.get_by_kb_id(
|
||||||
|
kb_id=kb_id,
|
||||||
|
page_number=0,
|
||||||
|
items_per_page=0,
|
||||||
|
orderby="create_time",
|
||||||
|
desc=False,
|
||||||
|
keywords="",
|
||||||
|
run_status=[],
|
||||||
|
types=[],
|
||||||
|
suffix=[],
|
||||||
|
)
|
||||||
|
if not documents:
|
||||||
|
return get_error_data_result(message=f"No documents in Knowledgebase {kb_id}")
|
||||||
|
|
||||||
|
sample_document = documents[0]
|
||||||
|
document_ids = [document["id"] for document in documents]
|
||||||
|
|
||||||
|
task_id = queue_raptor_o_graphrag_tasks(sample_doc_id=sample_document, ty="raptor", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
|
||||||
|
|
||||||
|
if not KnowledgebaseService.update_by_id(kb.id, {"raptor_task_id": task_id}):
|
||||||
|
logging.warning(f"Cannot save raptor_task_id for kb {kb_id}")
|
||||||
|
|
||||||
|
return get_json_result(data={"raptor_task_id": task_id})
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/trace_raptor", methods=["GET"]) # noqa: F821
|
||||||
|
@login_required
|
||||||
|
def trace_raptor():
|
||||||
|
kb_id = request.args.get("kb_id", "")
|
||||||
|
if not kb_id:
|
||||||
|
return get_error_data_result(message='Lack of "KB ID"')
|
||||||
|
|
||||||
|
ok, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
|
if not ok:
|
||||||
|
return get_error_data_result(message="Invalid Knowledgebase ID")
|
||||||
|
|
||||||
|
task_id = kb.raptor_task_id
|
||||||
|
if not task_id:
|
||||||
|
return get_json_result(data={})
|
||||||
|
|
||||||
|
ok, task = TaskService.get_by_id(task_id)
|
||||||
|
if not ok:
|
||||||
|
return get_error_data_result(message="RAPTOR Task Not Found or Error Occurred")
|
||||||
|
|
||||||
|
return get_json_result(data=task.to_dict())
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/run_mindmap", methods=["POST"]) # noqa: F821
|
||||||
|
@login_required
|
||||||
|
def run_mindmap():
|
||||||
|
req = request.json
|
||||||
|
|
||||||
|
kb_id = req.get("kb_id", "")
|
||||||
|
if not kb_id:
|
||||||
|
return get_error_data_result(message='Lack of "KB ID"')
|
||||||
|
|
||||||
|
ok, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
|
if not ok:
|
||||||
|
return get_error_data_result(message="Invalid Knowledgebase ID")
|
||||||
|
|
||||||
|
task_id = kb.mindmap_task_id
|
||||||
|
if task_id:
|
||||||
|
ok, task = TaskService.get_by_id(task_id)
|
||||||
|
if not ok:
|
||||||
|
logging.warning(f"A valid Mindmap task id is expected for kb {kb_id}")
|
||||||
|
|
||||||
|
if task and task.progress not in [-1, 1]:
|
||||||
|
return get_error_data_result(message=f"Task {task_id} in progress with status {task.progress}. A Mindmap Task is already running.")
|
||||||
|
|
||||||
|
documents, _ = DocumentService.get_by_kb_id(
|
||||||
|
kb_id=kb_id,
|
||||||
|
page_number=0,
|
||||||
|
items_per_page=0,
|
||||||
|
orderby="create_time",
|
||||||
|
desc=False,
|
||||||
|
keywords="",
|
||||||
|
run_status=[],
|
||||||
|
types=[],
|
||||||
|
suffix=[],
|
||||||
|
)
|
||||||
|
if not documents:
|
||||||
|
return get_error_data_result(message=f"No documents in Knowledgebase {kb_id}")
|
||||||
|
|
||||||
|
sample_document = documents[0]
|
||||||
|
document_ids = [document["id"] for document in documents]
|
||||||
|
|
||||||
|
task_id = queue_raptor_o_graphrag_tasks(sample_doc_id=sample_document, ty="mindmap", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
|
||||||
|
|
||||||
|
if not KnowledgebaseService.update_by_id(kb.id, {"mindmap_task_id": task_id}):
|
||||||
|
logging.warning(f"Cannot save mindmap_task_id for kb {kb_id}")
|
||||||
|
|
||||||
|
return get_json_result(data={"mindmap_task_id": task_id})
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/trace_mindmap", methods=["GET"]) # noqa: F821
|
||||||
|
@login_required
|
||||||
|
def trace_mindmap():
|
||||||
|
kb_id = request.args.get("kb_id", "")
|
||||||
|
if not kb_id:
|
||||||
|
return get_error_data_result(message='Lack of "KB ID"')
|
||||||
|
|
||||||
|
ok, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
|
if not ok:
|
||||||
|
return get_error_data_result(message="Invalid Knowledgebase ID")
|
||||||
|
|
||||||
|
task_id = kb.mindmap_task_id
|
||||||
|
if not task_id:
|
||||||
|
return get_json_result(data={})
|
||||||
|
|
||||||
|
ok, task = TaskService.get_by_id(task_id)
|
||||||
|
if not ok:
|
||||||
|
return get_error_data_result(message="Mindmap Task Not Found or Error Occurred")
|
||||||
|
|
||||||
|
return get_json_result(data=task.to_dict())
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/unbind_task", methods=["DELETE"]) # noqa: F821
|
||||||
|
@login_required
|
||||||
|
def delete_kb_task():
|
||||||
|
kb_id = request.args.get("kb_id", "")
|
||||||
|
if not kb_id:
|
||||||
|
return get_error_data_result(message='Lack of "KB ID"')
|
||||||
|
ok, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
|
if not ok:
|
||||||
|
return get_json_result(data=True)
|
||||||
|
|
||||||
|
pipeline_task_type = request.args.get("pipeline_task_type", "")
|
||||||
|
if not pipeline_task_type or pipeline_task_type not in [PipelineTaskType.GRAPH_RAG, PipelineTaskType.RAPTOR, PipelineTaskType.MINDMAP]:
|
||||||
|
return get_error_data_result(message="Invalid task type")
|
||||||
|
|
||||||
|
match pipeline_task_type:
|
||||||
|
case PipelineTaskType.GRAPH_RAG:
|
||||||
|
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), kb_id)
|
||||||
|
kb_task_id_field = "graphrag_task_id"
|
||||||
|
task_id = kb.graphrag_task_id
|
||||||
|
kb_task_finish_at = "graphrag_task_finish_at"
|
||||||
|
case PipelineTaskType.RAPTOR:
|
||||||
|
kb_task_id_field = "raptor_task_id"
|
||||||
|
task_id = kb.raptor_task_id
|
||||||
|
kb_task_finish_at = "raptor_task_finish_at"
|
||||||
|
case PipelineTaskType.MINDMAP:
|
||||||
|
kb_task_id_field = "mindmap_task_id"
|
||||||
|
task_id = kb.mindmap_task_id
|
||||||
|
kb_task_finish_at = "mindmap_task_finish_at"
|
||||||
|
case _:
|
||||||
|
return get_error_data_result(message="Internal Error: Invalid task type")
|
||||||
|
|
||||||
|
def cancel_task(task_id):
|
||||||
|
REDIS_CONN.set(f"{task_id}-cancel", "x")
|
||||||
|
cancel_task(task_id)
|
||||||
|
|
||||||
|
ok = KnowledgebaseService.update_by_id(kb_id, {kb_task_id_field: "", kb_task_finish_at: None})
|
||||||
|
if not ok:
|
||||||
|
return server_error_response(f"Internal error: cannot delete task {pipeline_task_type}")
|
||||||
|
|
||||||
|
return get_json_result(data=True)
|
||||||
|
|||||||
@ -194,6 +194,9 @@ def add_llm():
|
|||||||
elif factory == "Azure-OpenAI":
|
elif factory == "Azure-OpenAI":
|
||||||
api_key = apikey_json(["api_key", "api_version"])
|
api_key = apikey_json(["api_key", "api_version"])
|
||||||
|
|
||||||
|
elif factory == "OpenRouter":
|
||||||
|
api_key = apikey_json(["api_key", "provider_order"])
|
||||||
|
|
||||||
llm = {
|
llm = {
|
||||||
"tenant_id": current_user.id,
|
"tenant_id": current_user.id,
|
||||||
"llm_factory": factory,
|
"llm_factory": factory,
|
||||||
|
|||||||
@ -1,8 +1,26 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
from flask import Response
|
from flask import Response
|
||||||
from flask_login import login_required
|
from flask_login import login_required
|
||||||
from api.utils.api_utils import get_json_result
|
from api.utils.api_utils import get_json_result
|
||||||
from plugin import GlobalPluginManager
|
from plugin import GlobalPluginManager
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/llm_tools', methods=['GET']) # noqa: F821
|
@manager.route('/llm_tools', methods=['GET']) # noqa: F821
|
||||||
@login_required
|
@login_required
|
||||||
def llm_tools() -> Response:
|
def llm_tools() -> Response:
|
||||||
|
|||||||
@ -25,6 +25,7 @@ from api.utils.api_utils import get_data_error_result, get_error_data_result, ge
|
|||||||
from api.utils.api_utils import get_result
|
from api.utils.api_utils import get_result
|
||||||
from flask import request
|
from flask import request
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/agents', methods=['GET']) # noqa: F821
|
@manager.route('/agents', methods=['GET']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def list_agents(tenant_id):
|
def list_agents(tenant_id):
|
||||||
@ -41,7 +42,7 @@ def list_agents(tenant_id):
|
|||||||
desc = False
|
desc = False
|
||||||
else:
|
else:
|
||||||
desc = True
|
desc = True
|
||||||
canvas = UserCanvasService.get_list(tenant_id,page_number,items_per_page,orderby,desc,id,title)
|
canvas = UserCanvasService.get_list(tenant_id, page_number, items_per_page, orderby, desc, id, title)
|
||||||
return get_result(data=canvas)
|
return get_result(data=canvas)
|
||||||
|
|
||||||
|
|
||||||
@ -93,7 +94,7 @@ def update_agent(tenant_id: str, agent_id: str):
|
|||||||
req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
|
req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
|
||||||
|
|
||||||
req["dsl"] = json.loads(req["dsl"])
|
req["dsl"] = json.loads(req["dsl"])
|
||||||
|
|
||||||
if req.get("title") is not None:
|
if req.get("title") is not None:
|
||||||
req["title"] = req["title"].strip()
|
req["title"] = req["title"].strip()
|
||||||
|
|
||||||
|
|||||||
@ -215,7 +215,8 @@ def delete(tenant_id):
|
|||||||
continue
|
continue
|
||||||
kb_id_instance_pairs.append((kb_id, kb))
|
kb_id_instance_pairs.append((kb_id, kb))
|
||||||
if len(error_kb_ids) > 0:
|
if len(error_kb_ids) > 0:
|
||||||
return get_error_permission_result(message=f"""User '{tenant_id}' lacks permission for datasets: '{", ".join(error_kb_ids)}'""")
|
return get_error_permission_result(
|
||||||
|
message=f"""User '{tenant_id}' lacks permission for datasets: '{", ".join(error_kb_ids)}'""")
|
||||||
|
|
||||||
errors = []
|
errors = []
|
||||||
success_count = 0
|
success_count = 0
|
||||||
@ -232,7 +233,8 @@ def delete(tenant_id):
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
File2DocumentService.delete_by_document_id(doc.id)
|
File2DocumentService.delete_by_document_id(doc.id)
|
||||||
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kb.name])
|
FileService.filter_delete(
|
||||||
|
[File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kb.name])
|
||||||
if not KnowledgebaseService.delete_by_id(kb_id):
|
if not KnowledgebaseService.delete_by_id(kb_id):
|
||||||
errors.append(f"Delete dataset error for {kb_id}")
|
errors.append(f"Delete dataset error for {kb_id}")
|
||||||
continue
|
continue
|
||||||
@ -329,7 +331,8 @@ def update(tenant_id, dataset_id):
|
|||||||
try:
|
try:
|
||||||
kb = KnowledgebaseService.get_or_none(id=dataset_id, tenant_id=tenant_id)
|
kb = KnowledgebaseService.get_or_none(id=dataset_id, tenant_id=tenant_id)
|
||||||
if kb is None:
|
if kb is None:
|
||||||
return get_error_permission_result(message=f"User '{tenant_id}' lacks permission for dataset '{dataset_id}'")
|
return get_error_permission_result(
|
||||||
|
message=f"User '{tenant_id}' lacks permission for dataset '{dataset_id}'")
|
||||||
|
|
||||||
if req.get("parser_config"):
|
if req.get("parser_config"):
|
||||||
req["parser_config"] = deep_merge(kb.parser_config, req["parser_config"])
|
req["parser_config"] = deep_merge(kb.parser_config, req["parser_config"])
|
||||||
@ -341,7 +344,8 @@ def update(tenant_id, dataset_id):
|
|||||||
del req["parser_config"]
|
del req["parser_config"]
|
||||||
|
|
||||||
if "name" in req and req["name"].lower() != kb.name.lower():
|
if "name" in req and req["name"].lower() != kb.name.lower():
|
||||||
exists = KnowledgebaseService.get_or_none(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value)
|
exists = KnowledgebaseService.get_or_none(name=req["name"], tenant_id=tenant_id,
|
||||||
|
status=StatusEnum.VALID.value)
|
||||||
if exists:
|
if exists:
|
||||||
return get_error_data_result(message=f"Dataset name '{req['name']}' already exists")
|
return get_error_data_result(message=f"Dataset name '{req['name']}' already exists")
|
||||||
|
|
||||||
@ -349,7 +353,8 @@ def update(tenant_id, dataset_id):
|
|||||||
if not req["embd_id"]:
|
if not req["embd_id"]:
|
||||||
req["embd_id"] = kb.embd_id
|
req["embd_id"] = kb.embd_id
|
||||||
if kb.chunk_num != 0 and req["embd_id"] != kb.embd_id:
|
if kb.chunk_num != 0 and req["embd_id"] != kb.embd_id:
|
||||||
return get_error_data_result(message=f"When chunk_num ({kb.chunk_num}) > 0, embedding_model must remain {kb.embd_id}")
|
return get_error_data_result(
|
||||||
|
message=f"When chunk_num ({kb.chunk_num}) > 0, embedding_model must remain {kb.embd_id}")
|
||||||
ok, err = verify_embedding_availability(req["embd_id"], tenant_id)
|
ok, err = verify_embedding_availability(req["embd_id"], tenant_id)
|
||||||
if not ok:
|
if not ok:
|
||||||
return err
|
return err
|
||||||
@ -359,10 +364,12 @@ def update(tenant_id, dataset_id):
|
|||||||
return get_error_argument_result(message="'pagerank' can only be set when doc_engine is elasticsearch")
|
return get_error_argument_result(message="'pagerank' can only be set when doc_engine is elasticsearch")
|
||||||
|
|
||||||
if req["pagerank"] > 0:
|
if req["pagerank"] > 0:
|
||||||
settings.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: req["pagerank"]}, search.index_name(kb.tenant_id), kb.id)
|
settings.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: req["pagerank"]},
|
||||||
|
search.index_name(kb.tenant_id), kb.id)
|
||||||
else:
|
else:
|
||||||
# Elasticsearch requires PAGERANK_FLD be non-zero!
|
# Elasticsearch requires PAGERANK_FLD be non-zero!
|
||||||
settings.docStoreConn.update({"exists": PAGERANK_FLD}, {"remove": PAGERANK_FLD}, search.index_name(kb.tenant_id), kb.id)
|
settings.docStoreConn.update({"exists": PAGERANK_FLD}, {"remove": PAGERANK_FLD},
|
||||||
|
search.index_name(kb.tenant_id), kb.id)
|
||||||
|
|
||||||
if not KnowledgebaseService.update_by_id(kb.id, req):
|
if not KnowledgebaseService.update_by_id(kb.id, req):
|
||||||
return get_error_data_result(message="Update dataset error.(Database error)")
|
return get_error_data_result(message="Update dataset error.(Database error)")
|
||||||
@ -454,7 +461,7 @@ def list_datasets(tenant_id):
|
|||||||
return get_error_permission_result(message=f"User '{tenant_id}' lacks permission for dataset '{name}'")
|
return get_error_permission_result(message=f"User '{tenant_id}' lacks permission for dataset '{name}'")
|
||||||
|
|
||||||
tenants = TenantService.get_joined_tenants_by_user_id(tenant_id)
|
tenants = TenantService.get_joined_tenants_by_user_id(tenant_id)
|
||||||
kbs = KnowledgebaseService.get_list(
|
kbs, total = KnowledgebaseService.get_list(
|
||||||
[m["tenant_id"] for m in tenants],
|
[m["tenant_id"] for m in tenants],
|
||||||
tenant_id,
|
tenant_id,
|
||||||
args["page"],
|
args["page"],
|
||||||
@ -468,14 +475,15 @@ def list_datasets(tenant_id):
|
|||||||
response_data_list = []
|
response_data_list = []
|
||||||
for kb in kbs:
|
for kb in kbs:
|
||||||
response_data_list.append(remap_dictionary_keys(kb))
|
response_data_list.append(remap_dictionary_keys(kb))
|
||||||
return get_result(data=response_data_list)
|
return get_result(data=response_data_list, total=total)
|
||||||
except OperationalError as e:
|
except OperationalError as e:
|
||||||
logging.exception(e)
|
logging.exception(e)
|
||||||
return get_error_data_result(message="Database operation failed")
|
return get_error_data_result(message="Database operation failed")
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/datasets/<dataset_id>/knowledge_graph', methods=['GET']) # noqa: F821
|
@manager.route('/datasets/<dataset_id>/knowledge_graph', methods=['GET']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def knowledge_graph(tenant_id,dataset_id):
|
def knowledge_graph(tenant_id, dataset_id):
|
||||||
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||||
return get_result(
|
return get_result(
|
||||||
data=False,
|
data=False,
|
||||||
@ -491,7 +499,7 @@ def knowledge_graph(tenant_id,dataset_id):
|
|||||||
obj = {"graph": {}, "mind_map": {}}
|
obj = {"graph": {}, "mind_map": {}}
|
||||||
if not settings.docStoreConn.indexExist(search.index_name(kb.tenant_id), dataset_id):
|
if not settings.docStoreConn.indexExist(search.index_name(kb.tenant_id), dataset_id):
|
||||||
return get_result(data=obj)
|
return get_result(data=obj)
|
||||||
sres = settings.retrievaler.search(req, search.index_name(kb.tenant_id), [dataset_id])
|
sres = settings.retriever.search(req, search.index_name(kb.tenant_id), [dataset_id])
|
||||||
if not len(sres.ids):
|
if not len(sres.ids):
|
||||||
return get_result(data=obj)
|
return get_result(data=obj)
|
||||||
|
|
||||||
@ -507,14 +515,16 @@ def knowledge_graph(tenant_id,dataset_id):
|
|||||||
if "nodes" in obj["graph"]:
|
if "nodes" in obj["graph"]:
|
||||||
obj["graph"]["nodes"] = sorted(obj["graph"]["nodes"], key=lambda x: x.get("pagerank", 0), reverse=True)[:256]
|
obj["graph"]["nodes"] = sorted(obj["graph"]["nodes"], key=lambda x: x.get("pagerank", 0), reverse=True)[:256]
|
||||||
if "edges" in obj["graph"]:
|
if "edges" in obj["graph"]:
|
||||||
node_id_set = { o["id"] for o in obj["graph"]["nodes"] }
|
node_id_set = {o["id"] for o in obj["graph"]["nodes"]}
|
||||||
filtered_edges = [o for o in obj["graph"]["edges"] if o["source"] != o["target"] and o["source"] in node_id_set and o["target"] in node_id_set]
|
filtered_edges = [o for o in obj["graph"]["edges"] if
|
||||||
|
o["source"] != o["target"] and o["source"] in node_id_set and o["target"] in node_id_set]
|
||||||
obj["graph"]["edges"] = sorted(filtered_edges, key=lambda x: x.get("weight", 0), reverse=True)[:128]
|
obj["graph"]["edges"] = sorted(filtered_edges, key=lambda x: x.get("weight", 0), reverse=True)[:128]
|
||||||
return get_result(data=obj)
|
return get_result(data=obj)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/datasets/<dataset_id>/knowledge_graph', methods=['DELETE']) # noqa: F821
|
@manager.route('/datasets/<dataset_id>/knowledge_graph', methods=['DELETE']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def delete_knowledge_graph(tenant_id,dataset_id):
|
def delete_knowledge_graph(tenant_id, dataset_id):
|
||||||
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||||
return get_result(
|
return get_result(
|
||||||
data=False,
|
data=False,
|
||||||
@ -522,6 +532,7 @@ def delete_knowledge_graph(tenant_id,dataset_id):
|
|||||||
code=settings.RetCode.AUTHENTICATION_ERROR
|
code=settings.RetCode.AUTHENTICATION_ERROR
|
||||||
)
|
)
|
||||||
_, kb = KnowledgebaseService.get_by_id(dataset_id)
|
_, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||||
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), dataset_id)
|
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]},
|
||||||
|
search.index_name(kb.tenant_id), dataset_id)
|
||||||
|
|
||||||
return get_result(data=True)
|
return get_result(data=True)
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#
|
#
|
||||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -31,6 +31,89 @@ from api.db.services.dialog_service import meta_filter, convert_conditions
|
|||||||
@apikey_required
|
@apikey_required
|
||||||
@validate_request("knowledge_id", "query")
|
@validate_request("knowledge_id", "query")
|
||||||
def retrieval(tenant_id):
|
def retrieval(tenant_id):
|
||||||
|
"""
|
||||||
|
Dify-compatible retrieval API
|
||||||
|
---
|
||||||
|
tags:
|
||||||
|
- SDK
|
||||||
|
security:
|
||||||
|
- ApiKeyAuth: []
|
||||||
|
parameters:
|
||||||
|
- in: body
|
||||||
|
name: body
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- knowledge_id
|
||||||
|
- query
|
||||||
|
properties:
|
||||||
|
knowledge_id:
|
||||||
|
type: string
|
||||||
|
description: Knowledge base ID
|
||||||
|
query:
|
||||||
|
type: string
|
||||||
|
description: Query text
|
||||||
|
use_kg:
|
||||||
|
type: boolean
|
||||||
|
description: Whether to use knowledge graph
|
||||||
|
default: false
|
||||||
|
retrieval_setting:
|
||||||
|
type: object
|
||||||
|
description: Retrieval configuration
|
||||||
|
properties:
|
||||||
|
score_threshold:
|
||||||
|
type: number
|
||||||
|
description: Similarity threshold
|
||||||
|
default: 0.0
|
||||||
|
top_k:
|
||||||
|
type: integer
|
||||||
|
description: Number of results to return
|
||||||
|
default: 1024
|
||||||
|
metadata_condition:
|
||||||
|
type: object
|
||||||
|
description: Metadata filter condition
|
||||||
|
properties:
|
||||||
|
conditions:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
description: Field name
|
||||||
|
comparison_operator:
|
||||||
|
type: string
|
||||||
|
description: Comparison operator
|
||||||
|
value:
|
||||||
|
type: string
|
||||||
|
description: Field value
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: Retrieval succeeded
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
records:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
content:
|
||||||
|
type: string
|
||||||
|
description: Content text
|
||||||
|
score:
|
||||||
|
type: number
|
||||||
|
description: Similarity score
|
||||||
|
title:
|
||||||
|
type: string
|
||||||
|
description: Document title
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
description: Metadata info
|
||||||
|
404:
|
||||||
|
description: Knowledge base or document not found
|
||||||
|
"""
|
||||||
req = request.json
|
req = request.json
|
||||||
question = req["query"]
|
question = req["query"]
|
||||||
kb_id = req["knowledge_id"]
|
kb_id = req["knowledge_id"]
|
||||||
@ -38,9 +121,9 @@ def retrieval(tenant_id):
|
|||||||
retrieval_setting = req.get("retrieval_setting", {})
|
retrieval_setting = req.get("retrieval_setting", {})
|
||||||
similarity_threshold = float(retrieval_setting.get("score_threshold", 0.0))
|
similarity_threshold = float(retrieval_setting.get("score_threshold", 0.0))
|
||||||
top = int(retrieval_setting.get("top_k", 1024))
|
top = int(retrieval_setting.get("top_k", 1024))
|
||||||
metadata_condition = req.get("metadata_condition",{})
|
metadata_condition = req.get("metadata_condition", {})
|
||||||
metas = DocumentService.get_meta_by_kbs([kb_id])
|
metas = DocumentService.get_meta_by_kbs([kb_id])
|
||||||
|
|
||||||
doc_ids = []
|
doc_ids = []
|
||||||
try:
|
try:
|
||||||
|
|
||||||
@ -50,12 +133,12 @@ def retrieval(tenant_id):
|
|||||||
|
|
||||||
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
|
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
|
||||||
print(metadata_condition)
|
print(metadata_condition)
|
||||||
print("after",convert_conditions(metadata_condition))
|
# print("after", convert_conditions(metadata_condition))
|
||||||
doc_ids.extend(meta_filter(metas, convert_conditions(metadata_condition)))
|
doc_ids.extend(meta_filter(metas, convert_conditions(metadata_condition)))
|
||||||
print("doc_ids",doc_ids)
|
# print("doc_ids", doc_ids)
|
||||||
if not doc_ids and metadata_condition is not None:
|
if not doc_ids and metadata_condition is not None:
|
||||||
doc_ids = ['-999']
|
doc_ids = ['-999']
|
||||||
ranks = settings.retrievaler.retrieval(
|
ranks = settings.retriever.retrieval(
|
||||||
question,
|
question,
|
||||||
embd_mdl,
|
embd_mdl,
|
||||||
kb.tenant_id,
|
kb.tenant_id,
|
||||||
@ -70,17 +153,17 @@ def retrieval(tenant_id):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if use_kg:
|
if use_kg:
|
||||||
ck = settings.kg_retrievaler.retrieval(question,
|
ck = settings.kg_retriever.retrieval(question,
|
||||||
[tenant_id],
|
[tenant_id],
|
||||||
[kb_id],
|
[kb_id],
|
||||||
embd_mdl,
|
embd_mdl,
|
||||||
LLMBundle(kb.tenant_id, LLMType.CHAT))
|
LLMBundle(kb.tenant_id, LLMType.CHAT))
|
||||||
if ck["content_with_weight"]:
|
if ck["content_with_weight"]:
|
||||||
ranks["chunks"].insert(0, ck)
|
ranks["chunks"].insert(0, ck)
|
||||||
|
|
||||||
records = []
|
records = []
|
||||||
for c in ranks["chunks"]:
|
for c in ranks["chunks"]:
|
||||||
e, doc = DocumentService.get_by_id( c["doc_id"])
|
e, doc = DocumentService.get_by_id(c["doc_id"])
|
||||||
c.pop("vector", None)
|
c.pop("vector", None)
|
||||||
meta = getattr(doc, 'meta_fields', {})
|
meta = getattr(doc, 'meta_fields', {})
|
||||||
meta["doc_id"] = c["doc_id"]
|
meta["doc_id"] = c["doc_id"]
|
||||||
@ -100,5 +183,3 @@ def retrieval(tenant_id):
|
|||||||
)
|
)
|
||||||
logging.exception(e)
|
logging.exception(e)
|
||||||
return build_error_result(message=str(e), code=settings.RetCode.SERVER_ERROR)
|
return build_error_result(message=str(e), code=settings.RetCode.SERVER_ERROR)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -458,7 +458,7 @@ def list_docs(dataset_id, tenant_id):
|
|||||||
required: false
|
required: false
|
||||||
default: true
|
default: true
|
||||||
description: Order in descending.
|
description: Order in descending.
|
||||||
- in: query
|
- in: query
|
||||||
name: create_time_from
|
name: create_time_from
|
||||||
type: integer
|
type: integer
|
||||||
required: false
|
required: false
|
||||||
@ -470,6 +470,20 @@ def list_docs(dataset_id, tenant_id):
|
|||||||
required: false
|
required: false
|
||||||
default: 0
|
default: 0
|
||||||
description: Unix timestamp for filtering documents created before this time. 0 means no filter.
|
description: Unix timestamp for filtering documents created before this time. 0 means no filter.
|
||||||
|
- in: query
|
||||||
|
name: suffix
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
required: false
|
||||||
|
description: Filter by file suffix (e.g., ["pdf", "txt", "docx"]).
|
||||||
|
- in: query
|
||||||
|
name: run
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
required: false
|
||||||
|
description: Filter by document run status. Supports both numeric ("0", "1", "2", "3", "4") and text formats ("UNSTART", "RUNNING", "CANCEL", "DONE", "FAIL").
|
||||||
- in: header
|
- in: header
|
||||||
name: Authorization
|
name: Authorization
|
||||||
type: string
|
type: string
|
||||||
@ -512,63 +526,62 @@ def list_docs(dataset_id, tenant_id):
|
|||||||
description: Processing status.
|
description: Processing status.
|
||||||
"""
|
"""
|
||||||
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
|
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
|
||||||
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
|
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
|
||||||
id = request.args.get("id")
|
|
||||||
name = request.args.get("name")
|
|
||||||
|
|
||||||
if id and not DocumentService.query(id=id, kb_id=dataset_id):
|
q = request.args
|
||||||
return get_error_data_result(message=f"You don't own the document {id}.")
|
document_id = q.get("id")
|
||||||
|
name = q.get("name")
|
||||||
|
|
||||||
|
if document_id and not DocumentService.query(id=document_id, kb_id=dataset_id):
|
||||||
|
return get_error_data_result(message=f"You don't own the document {document_id}.")
|
||||||
if name and not DocumentService.query(name=name, kb_id=dataset_id):
|
if name and not DocumentService.query(name=name, kb_id=dataset_id):
|
||||||
return get_error_data_result(message=f"You don't own the document {name}.")
|
return get_error_data_result(message=f"You don't own the document {name}.")
|
||||||
|
|
||||||
page = int(request.args.get("page", 1))
|
page = int(q.get("page", 1))
|
||||||
keywords = request.args.get("keywords", "")
|
page_size = int(q.get("page_size", 30))
|
||||||
page_size = int(request.args.get("page_size", 30))
|
orderby = q.get("orderby", "create_time")
|
||||||
orderby = request.args.get("orderby", "create_time")
|
desc = str(q.get("desc", "true")).strip().lower() != "false"
|
||||||
if request.args.get("desc") == "False":
|
keywords = q.get("keywords", "")
|
||||||
desc = False
|
|
||||||
else:
|
|
||||||
desc = True
|
|
||||||
docs, tol = DocumentService.get_list(dataset_id, page, page_size, orderby, desc, keywords, id, name)
|
|
||||||
|
|
||||||
create_time_from = int(request.args.get("create_time_from", 0))
|
# filters - align with OpenAPI parameter names
|
||||||
create_time_to = int(request.args.get("create_time_to", 0))
|
suffix = q.getlist("suffix")
|
||||||
|
run_status = q.getlist("run")
|
||||||
|
create_time_from = int(q.get("create_time_from", 0))
|
||||||
|
create_time_to = int(q.get("create_time_to", 0))
|
||||||
|
|
||||||
|
# map run status (accept text or numeric) - align with API parameter
|
||||||
|
run_status_text_to_numeric = {"UNSTART": "0", "RUNNING": "1", "CANCEL": "2", "DONE": "3", "FAIL": "4"}
|
||||||
|
run_status_converted = [run_status_text_to_numeric.get(v, v) for v in run_status]
|
||||||
|
|
||||||
|
docs, total = DocumentService.get_list(
|
||||||
|
dataset_id, page, page_size, orderby, desc, keywords, document_id, name, suffix, run_status_converted
|
||||||
|
)
|
||||||
|
|
||||||
|
# time range filter (0 means no bound)
|
||||||
if create_time_from or create_time_to:
|
if create_time_from or create_time_to:
|
||||||
filtered_docs = []
|
docs = [
|
||||||
for doc in docs:
|
d for d in docs
|
||||||
doc_create_time = doc.get("create_time", 0)
|
if (create_time_from == 0 or d.get("create_time", 0) >= create_time_from)
|
||||||
if (create_time_from == 0 or doc_create_time >= create_time_from) and (create_time_to == 0 or doc_create_time <= create_time_to):
|
and (create_time_to == 0 or d.get("create_time", 0) <= create_time_to)
|
||||||
filtered_docs.append(doc)
|
]
|
||||||
docs = filtered_docs
|
|
||||||
|
|
||||||
# rename key's name
|
# rename keys + map run status back to text for output
|
||||||
renamed_doc_list = []
|
|
||||||
key_mapping = {
|
key_mapping = {
|
||||||
"chunk_num": "chunk_count",
|
"chunk_num": "chunk_count",
|
||||||
"kb_id": "dataset_id",
|
"kb_id": "dataset_id",
|
||||||
"token_num": "token_count",
|
"token_num": "token_count",
|
||||||
"parser_id": "chunk_method",
|
"parser_id": "chunk_method",
|
||||||
}
|
}
|
||||||
run_mapping = {
|
run_status_numeric_to_text = {"0": "UNSTART", "1": "RUNNING", "2": "CANCEL", "3": "DONE", "4": "FAIL"}
|
||||||
"0": "UNSTART",
|
|
||||||
"1": "RUNNING",
|
|
||||||
"2": "CANCEL",
|
|
||||||
"3": "DONE",
|
|
||||||
"4": "FAIL",
|
|
||||||
}
|
|
||||||
for doc in docs:
|
|
||||||
renamed_doc = {}
|
|
||||||
for key, value in doc.items():
|
|
||||||
if key == "run":
|
|
||||||
renamed_doc["run"] = run_mapping.get(str(value))
|
|
||||||
new_key = key_mapping.get(key, key)
|
|
||||||
renamed_doc[new_key] = value
|
|
||||||
if key == "run":
|
|
||||||
renamed_doc["run"] = run_mapping.get(value)
|
|
||||||
renamed_doc_list.append(renamed_doc)
|
|
||||||
return get_result(data={"total": tol, "docs": renamed_doc_list})
|
|
||||||
|
|
||||||
|
output_docs = []
|
||||||
|
for d in docs:
|
||||||
|
renamed_doc = {key_mapping.get(k, k): v for k, v in d.items()}
|
||||||
|
if "run" in d:
|
||||||
|
renamed_doc["run"] = run_status_numeric_to_text.get(str(d["run"]), d["run"])
|
||||||
|
output_docs.append(renamed_doc)
|
||||||
|
|
||||||
|
return get_result(data={"total": total, "docs": output_docs})
|
||||||
|
|
||||||
@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821
|
@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
@ -982,7 +995,7 @@ def list_chunks(tenant_id, dataset_id, document_id):
|
|||||||
_ = Chunk(**final_chunk)
|
_ = Chunk(**final_chunk)
|
||||||
|
|
||||||
elif settings.docStoreConn.indexExist(search.index_name(tenant_id), dataset_id):
|
elif settings.docStoreConn.indexExist(search.index_name(tenant_id), dataset_id):
|
||||||
sres = settings.retrievaler.search(query, search.index_name(tenant_id), [dataset_id], emb_mdl=None, highlight=True)
|
sres = settings.retriever.search(query, search.index_name(tenant_id), [dataset_id], emb_mdl=None, highlight=True)
|
||||||
res["total"] = sres.total
|
res["total"] = sres.total
|
||||||
for id in sres.ids:
|
for id in sres.ids:
|
||||||
d = {
|
d = {
|
||||||
@ -1446,7 +1459,7 @@ def retrieval_test(tenant_id):
|
|||||||
chat_mdl = LLMBundle(kb.tenant_id, LLMType.CHAT)
|
chat_mdl = LLMBundle(kb.tenant_id, LLMType.CHAT)
|
||||||
question += keyword_extraction(chat_mdl, question)
|
question += keyword_extraction(chat_mdl, question)
|
||||||
|
|
||||||
ranks = settings.retrievaler.retrieval(
|
ranks = settings.retriever.retrieval(
|
||||||
question,
|
question,
|
||||||
embd_mdl,
|
embd_mdl,
|
||||||
tenant_ids,
|
tenant_ids,
|
||||||
@ -1462,7 +1475,7 @@ def retrieval_test(tenant_id):
|
|||||||
rank_feature=label_question(question, kbs),
|
rank_feature=label_question(question, kbs),
|
||||||
)
|
)
|
||||||
if use_kg:
|
if use_kg:
|
||||||
ck = settings.kg_retrievaler.retrieval(question, [k.tenant_id for k in kbs], kb_ids, embd_mdl, LLMBundle(kb.tenant_id, LLMType.CHAT))
|
ck = settings.kg_retriever.retrieval(question, [k.tenant_id for k in kbs], kb_ids, embd_mdl, LLMBundle(kb.tenant_id, LLMType.CHAT))
|
||||||
if ck["content_with_weight"]:
|
if ck["content_with_weight"]:
|
||||||
ranks["chunks"].insert(0, ck)
|
ranks["chunks"].insert(0, ck)
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,20 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
import pathlib
|
import pathlib
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@ -17,7 +34,8 @@ from api.utils.api_utils import get_json_result
|
|||||||
from api.utils.file_utils import filename_type
|
from api.utils.file_utils import filename_type
|
||||||
from rag.utils.storage_factory import STORAGE_IMPL
|
from rag.utils.storage_factory import STORAGE_IMPL
|
||||||
|
|
||||||
@manager.route('/file/upload', methods=['POST']) # noqa: F821
|
|
||||||
|
@manager.route('/file/upload', methods=['POST']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def upload(tenant_id):
|
def upload(tenant_id):
|
||||||
"""
|
"""
|
||||||
@ -44,22 +62,22 @@ def upload(tenant_id):
|
|||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
data:
|
data:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
id:
|
id:
|
||||||
type: string
|
type: string
|
||||||
description: File ID
|
description: File ID
|
||||||
name:
|
name:
|
||||||
type: string
|
type: string
|
||||||
description: File name
|
description: File name
|
||||||
size:
|
size:
|
||||||
type: integer
|
type: integer
|
||||||
description: File size in bytes
|
description: File size in bytes
|
||||||
type:
|
type:
|
||||||
type: string
|
type: string
|
||||||
description: File type (e.g., document, folder)
|
description: File type (e.g., document, folder)
|
||||||
"""
|
"""
|
||||||
pf_id = request.form.get("parent_id")
|
pf_id = request.form.get("parent_id")
|
||||||
|
|
||||||
@ -97,12 +115,14 @@ def upload(tenant_id):
|
|||||||
e, file = FileService.get_by_id(file_id_list[len_id_list - 1])
|
e, file = FileService.get_by_id(file_id_list[len_id_list - 1])
|
||||||
if not e:
|
if not e:
|
||||||
return get_json_result(data=False, message="Folder not found!", code=404)
|
return get_json_result(data=False, message="Folder not found!", code=404)
|
||||||
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 1], file_obj_names, len_id_list)
|
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 1], file_obj_names,
|
||||||
|
len_id_list)
|
||||||
else:
|
else:
|
||||||
e, file = FileService.get_by_id(file_id_list[len_id_list - 2])
|
e, file = FileService.get_by_id(file_id_list[len_id_list - 2])
|
||||||
if not e:
|
if not e:
|
||||||
return get_json_result(data=False, message="Folder not found!", code=404)
|
return get_json_result(data=False, message="Folder not found!", code=404)
|
||||||
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 2], file_obj_names, len_id_list)
|
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 2], file_obj_names,
|
||||||
|
len_id_list)
|
||||||
|
|
||||||
filetype = filename_type(file_obj_names[file_len - 1])
|
filetype = filename_type(file_obj_names[file_len - 1])
|
||||||
location = file_obj_names[file_len - 1]
|
location = file_obj_names[file_len - 1]
|
||||||
@ -129,7 +149,7 @@ def upload(tenant_id):
|
|||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/file/create', methods=['POST']) # noqa: F821
|
@manager.route('/file/create', methods=['POST']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def create(tenant_id):
|
def create(tenant_id):
|
||||||
"""
|
"""
|
||||||
@ -207,7 +227,7 @@ def create(tenant_id):
|
|||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/file/list', methods=['GET']) # noqa: F821
|
@manager.route('/file/list', methods=['GET']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def list_files(tenant_id):
|
def list_files(tenant_id):
|
||||||
"""
|
"""
|
||||||
@ -299,7 +319,7 @@ def list_files(tenant_id):
|
|||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/file/root_folder', methods=['GET']) # noqa: F821
|
@manager.route('/file/root_folder', methods=['GET']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def get_root_folder(tenant_id):
|
def get_root_folder(tenant_id):
|
||||||
"""
|
"""
|
||||||
@ -335,7 +355,7 @@ def get_root_folder(tenant_id):
|
|||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/file/parent_folder', methods=['GET']) # noqa: F821
|
@manager.route('/file/parent_folder', methods=['GET']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def get_parent_folder():
|
def get_parent_folder():
|
||||||
"""
|
"""
|
||||||
@ -380,7 +400,7 @@ def get_parent_folder():
|
|||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/file/all_parent_folder', methods=['GET']) # noqa: F821
|
@manager.route('/file/all_parent_folder', methods=['GET']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def get_all_parent_folders(tenant_id):
|
def get_all_parent_folders(tenant_id):
|
||||||
"""
|
"""
|
||||||
@ -428,7 +448,7 @@ def get_all_parent_folders(tenant_id):
|
|||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/file/rm', methods=['POST']) # noqa: F821
|
@manager.route('/file/rm', methods=['POST']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def rm(tenant_id):
|
def rm(tenant_id):
|
||||||
"""
|
"""
|
||||||
@ -502,7 +522,7 @@ def rm(tenant_id):
|
|||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/file/rename', methods=['POST']) # noqa: F821
|
@manager.route('/file/rename', methods=['POST']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def rename(tenant_id):
|
def rename(tenant_id):
|
||||||
"""
|
"""
|
||||||
@ -542,7 +562,8 @@ def rename(tenant_id):
|
|||||||
if not e:
|
if not e:
|
||||||
return get_json_result(message="File not found!", code=404)
|
return get_json_result(message="File not found!", code=404)
|
||||||
|
|
||||||
if file.type != FileType.FOLDER.value and pathlib.Path(req["name"].lower()).suffix != pathlib.Path(file.name.lower()).suffix:
|
if file.type != FileType.FOLDER.value and pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
|
||||||
|
file.name.lower()).suffix:
|
||||||
return get_json_result(data=False, message="The extension of file can't be changed", code=400)
|
return get_json_result(data=False, message="The extension of file can't be changed", code=400)
|
||||||
|
|
||||||
for existing_file in FileService.query(name=req["name"], pf_id=file.parent_id):
|
for existing_file in FileService.query(name=req["name"], pf_id=file.parent_id):
|
||||||
@ -562,9 +583,9 @@ def rename(tenant_id):
|
|||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/file/get/<file_id>', methods=['GET']) # noqa: F821
|
@manager.route('/file/get/<file_id>', methods=['GET']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def get(tenant_id,file_id):
|
def get(tenant_id, file_id):
|
||||||
"""
|
"""
|
||||||
Download a file.
|
Download a file.
|
||||||
---
|
---
|
||||||
@ -610,7 +631,7 @@ def get(tenant_id,file_id):
|
|||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/file/mv', methods=['POST']) # noqa: F821
|
@manager.route('/file/mv', methods=['POST']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def move(tenant_id):
|
def move(tenant_id):
|
||||||
"""
|
"""
|
||||||
@ -669,6 +690,7 @@ def move(tenant_id):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/file/convert', methods=['POST']) # noqa: F821
|
@manager.route('/file/convert', methods=['POST']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def convert(tenant_id):
|
def convert(tenant_id):
|
||||||
@ -735,4 +757,4 @@ def convert(tenant_id):
|
|||||||
file2documents.append(file2document.to_json())
|
file2documents.append(file2document.to_json())
|
||||||
return get_json_result(data=file2documents)
|
return get_json_result(data=file2documents)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|||||||
@ -36,7 +36,8 @@ from api.db.services.llm_service import LLMBundle
|
|||||||
from api.db.services.search_service import SearchService
|
from api.db.services.search_service import SearchService
|
||||||
from api.db.services.user_service import UserTenantService
|
from api.db.services.user_service import UserTenantService
|
||||||
from api.utils import get_uuid
|
from api.utils import get_uuid
|
||||||
from api.utils.api_utils import check_duplicate_ids, get_data_openai, get_error_data_result, get_json_result, get_result, server_error_response, token_required, validate_request
|
from api.utils.api_utils import check_duplicate_ids, get_data_openai, get_error_data_result, get_json_result, \
|
||||||
|
get_result, server_error_response, token_required, validate_request
|
||||||
from rag.app.tag import label_question
|
from rag.app.tag import label_question
|
||||||
from rag.prompts.template import load_prompt
|
from rag.prompts.template import load_prompt
|
||||||
from rag.prompts.generator import cross_languages, gen_meta_filter, keyword_extraction, chunks_format
|
from rag.prompts.generator import cross_languages, gen_meta_filter, keyword_extraction, chunks_format
|
||||||
@ -88,7 +89,8 @@ def create_agent_session(tenant_id, agent_id):
|
|||||||
canvas.reset()
|
canvas.reset()
|
||||||
|
|
||||||
cvs.dsl = json.loads(str(canvas))
|
cvs.dsl = json.loads(str(canvas))
|
||||||
conv = {"id": session_id, "dialog_id": cvs.id, "user_id": user_id, "message": [{"role": "assistant", "content": canvas.get_prologue()}], "source": "agent", "dsl": cvs.dsl}
|
conv = {"id": session_id, "dialog_id": cvs.id, "user_id": user_id,
|
||||||
|
"message": [{"role": "assistant", "content": canvas.get_prologue()}], "source": "agent", "dsl": cvs.dsl}
|
||||||
API4ConversationService.save(**conv)
|
API4ConversationService.save(**conv)
|
||||||
conv["agent_id"] = conv.pop("dialog_id")
|
conv["agent_id"] = conv.pop("dialog_id")
|
||||||
return get_result(data=conv)
|
return get_result(data=conv)
|
||||||
@ -279,7 +281,7 @@ def chat_completion_openai_like(tenant_id, chat_id):
|
|||||||
reasoning_match = re.search(r"<think>(.*?)</think>", answer, flags=re.DOTALL)
|
reasoning_match = re.search(r"<think>(.*?)</think>", answer, flags=re.DOTALL)
|
||||||
if reasoning_match:
|
if reasoning_match:
|
||||||
reasoning_part = reasoning_match.group(1)
|
reasoning_part = reasoning_match.group(1)
|
||||||
content_part = answer[reasoning_match.end() :]
|
content_part = answer[reasoning_match.end():]
|
||||||
else:
|
else:
|
||||||
reasoning_part = ""
|
reasoning_part = ""
|
||||||
content_part = answer
|
content_part = answer
|
||||||
@ -324,7 +326,8 @@ def chat_completion_openai_like(tenant_id, chat_id):
|
|||||||
response["choices"][0]["delta"]["content"] = None
|
response["choices"][0]["delta"]["content"] = None
|
||||||
response["choices"][0]["delta"]["reasoning_content"] = None
|
response["choices"][0]["delta"]["reasoning_content"] = None
|
||||||
response["choices"][0]["finish_reason"] = "stop"
|
response["choices"][0]["finish_reason"] = "stop"
|
||||||
response["usage"] = {"prompt_tokens": len(prompt), "completion_tokens": token_used, "total_tokens": len(prompt) + token_used}
|
response["usage"] = {"prompt_tokens": len(prompt), "completion_tokens": token_used,
|
||||||
|
"total_tokens": len(prompt) + token_used}
|
||||||
if need_reference:
|
if need_reference:
|
||||||
response["choices"][0]["delta"]["reference"] = chunks_format(last_ans.get("reference", []))
|
response["choices"][0]["delta"]["reference"] = chunks_format(last_ans.get("reference", []))
|
||||||
response["choices"][0]["delta"]["final_content"] = last_ans.get("answer", "")
|
response["choices"][0]["delta"]["final_content"] = last_ans.get("answer", "")
|
||||||
@ -559,7 +562,8 @@ def list_agent_session(tenant_id, agent_id):
|
|||||||
desc = True
|
desc = True
|
||||||
# dsl defaults to True in all cases except for False and false
|
# dsl defaults to True in all cases except for False and false
|
||||||
include_dsl = request.args.get("dsl") != "False" and request.args.get("dsl") != "false"
|
include_dsl = request.args.get("dsl") != "False" and request.args.get("dsl") != "false"
|
||||||
total, convs = API4ConversationService.get_list(agent_id, tenant_id, page_number, items_per_page, orderby, desc, id, user_id, include_dsl)
|
total, convs = API4ConversationService.get_list(agent_id, tenant_id, page_number, items_per_page, orderby, desc, id,
|
||||||
|
user_id, include_dsl)
|
||||||
if not convs:
|
if not convs:
|
||||||
return get_result(data=[])
|
return get_result(data=[])
|
||||||
for conv in convs:
|
for conv in convs:
|
||||||
@ -581,7 +585,8 @@ def list_agent_session(tenant_id, agent_id):
|
|||||||
if message_num != 0 and messages[message_num]["role"] != "user":
|
if message_num != 0 and messages[message_num]["role"] != "user":
|
||||||
chunk_list = []
|
chunk_list = []
|
||||||
# Add boundary and type checks to prevent KeyError
|
# Add boundary and type checks to prevent KeyError
|
||||||
if chunk_num < len(conv["reference"]) and conv["reference"][chunk_num] is not None and isinstance(conv["reference"][chunk_num], dict) and "chunks" in conv["reference"][chunk_num]:
|
if chunk_num < len(conv["reference"]) and conv["reference"][chunk_num] is not None and isinstance(
|
||||||
|
conv["reference"][chunk_num], dict) and "chunks" in conv["reference"][chunk_num]:
|
||||||
chunks = conv["reference"][chunk_num]["chunks"]
|
chunks = conv["reference"][chunk_num]["chunks"]
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
# Ensure chunk is a dictionary before calling get method
|
# Ensure chunk is a dictionary before calling get method
|
||||||
@ -639,13 +644,16 @@ def delete(tenant_id, chat_id):
|
|||||||
|
|
||||||
if errors:
|
if errors:
|
||||||
if success_count > 0:
|
if success_count > 0:
|
||||||
return get_result(data={"success_count": success_count, "errors": errors}, message=f"Partially deleted {success_count} sessions with {len(errors)} errors")
|
return get_result(data={"success_count": success_count, "errors": errors},
|
||||||
|
message=f"Partially deleted {success_count} sessions with {len(errors)} errors")
|
||||||
else:
|
else:
|
||||||
return get_error_data_result(message="; ".join(errors))
|
return get_error_data_result(message="; ".join(errors))
|
||||||
|
|
||||||
if duplicate_messages:
|
if duplicate_messages:
|
||||||
if success_count > 0:
|
if success_count > 0:
|
||||||
return get_result(message=f"Partially deleted {success_count} sessions with {len(duplicate_messages)} errors", data={"success_count": success_count, "errors": duplicate_messages})
|
return get_result(
|
||||||
|
message=f"Partially deleted {success_count} sessions with {len(duplicate_messages)} errors",
|
||||||
|
data={"success_count": success_count, "errors": duplicate_messages})
|
||||||
else:
|
else:
|
||||||
return get_error_data_result(message=";".join(duplicate_messages))
|
return get_error_data_result(message=";".join(duplicate_messages))
|
||||||
|
|
||||||
@ -691,13 +699,16 @@ def delete_agent_session(tenant_id, agent_id):
|
|||||||
|
|
||||||
if errors:
|
if errors:
|
||||||
if success_count > 0:
|
if success_count > 0:
|
||||||
return get_result(data={"success_count": success_count, "errors": errors}, message=f"Partially deleted {success_count} sessions with {len(errors)} errors")
|
return get_result(data={"success_count": success_count, "errors": errors},
|
||||||
|
message=f"Partially deleted {success_count} sessions with {len(errors)} errors")
|
||||||
else:
|
else:
|
||||||
return get_error_data_result(message="; ".join(errors))
|
return get_error_data_result(message="; ".join(errors))
|
||||||
|
|
||||||
if duplicate_messages:
|
if duplicate_messages:
|
||||||
if success_count > 0:
|
if success_count > 0:
|
||||||
return get_result(message=f"Partially deleted {success_count} sessions with {len(duplicate_messages)} errors", data={"success_count": success_count, "errors": duplicate_messages})
|
return get_result(
|
||||||
|
message=f"Partially deleted {success_count} sessions with {len(duplicate_messages)} errors",
|
||||||
|
data={"success_count": success_count, "errors": duplicate_messages})
|
||||||
else:
|
else:
|
||||||
return get_error_data_result(message=";".join(duplicate_messages))
|
return get_error_data_result(message=";".join(duplicate_messages))
|
||||||
|
|
||||||
@ -730,7 +741,9 @@ def ask_about(tenant_id):
|
|||||||
for ans in ask(req["question"], req["kb_ids"], uid):
|
for ans in ask(req["question"], req["kb_ids"], uid):
|
||||||
yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n"
|
yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
yield "data:" + json.dumps({"code": 500, "message": str(e), "data": {"answer": "**ERROR**: " + str(e), "reference": []}}, ensure_ascii=False) + "\n\n"
|
yield "data:" + json.dumps(
|
||||||
|
{"code": 500, "message": str(e), "data": {"answer": "**ERROR**: " + str(e), "reference": []}},
|
||||||
|
ensure_ascii=False) + "\n\n"
|
||||||
yield "data:" + json.dumps({"code": 0, "message": "", "data": True}, ensure_ascii=False) + "\n\n"
|
yield "data:" + json.dumps({"code": 0, "message": "", "data": True}, ensure_ascii=False) + "\n\n"
|
||||||
|
|
||||||
resp = Response(stream(), mimetype="text/event-stream")
|
resp = Response(stream(), mimetype="text/event-stream")
|
||||||
@ -882,7 +895,9 @@ def begin_inputs(agent_id):
|
|||||||
return get_error_data_result(f"Can't find agent by ID: {agent_id}")
|
return get_error_data_result(f"Can't find agent by ID: {agent_id}")
|
||||||
|
|
||||||
canvas = Canvas(json.dumps(cvs.dsl), objs[0].tenant_id)
|
canvas = Canvas(json.dumps(cvs.dsl), objs[0].tenant_id)
|
||||||
return get_result(data={"title": cvs.title, "avatar": cvs.avatar, "inputs": canvas.get_component_input_form("begin"), "prologue": canvas.get_prologue(), "mode": canvas.get_mode()})
|
return get_result(
|
||||||
|
data={"title": cvs.title, "avatar": cvs.avatar, "inputs": canvas.get_component_input_form("begin"),
|
||||||
|
"prologue": canvas.get_prologue(), "mode": canvas.get_mode()})
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/searchbots/ask", methods=["POST"]) # noqa: F821
|
@manager.route("/searchbots/ask", methods=["POST"]) # noqa: F821
|
||||||
@ -911,7 +926,9 @@ def ask_about_embedded():
|
|||||||
for ans in ask(req["question"], req["kb_ids"], uid, search_config=search_config):
|
for ans in ask(req["question"], req["kb_ids"], uid, search_config=search_config):
|
||||||
yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n"
|
yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
yield "data:" + json.dumps({"code": 500, "message": str(e), "data": {"answer": "**ERROR**: " + str(e), "reference": []}}, ensure_ascii=False) + "\n\n"
|
yield "data:" + json.dumps(
|
||||||
|
{"code": 500, "message": str(e), "data": {"answer": "**ERROR**: " + str(e), "reference": []}},
|
||||||
|
ensure_ascii=False) + "\n\n"
|
||||||
yield "data:" + json.dumps({"code": 0, "message": "", "data": True}, ensure_ascii=False) + "\n\n"
|
yield "data:" + json.dumps({"code": 0, "message": "", "data": True}, ensure_ascii=False) + "\n\n"
|
||||||
|
|
||||||
resp = Response(stream(), mimetype="text/event-stream")
|
resp = Response(stream(), mimetype="text/event-stream")
|
||||||
@ -978,7 +995,8 @@ def retrieval_test_embedded():
|
|||||||
tenant_ids.append(tenant.tenant_id)
|
tenant_ids.append(tenant.tenant_id)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.", code=settings.RetCode.OPERATING_ERROR)
|
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.",
|
||||||
|
code=settings.RetCode.OPERATING_ERROR)
|
||||||
|
|
||||||
e, kb = KnowledgebaseService.get_by_id(kb_ids[0])
|
e, kb = KnowledgebaseService.get_by_id(kb_ids[0])
|
||||||
if not e:
|
if not e:
|
||||||
@ -998,11 +1016,13 @@ def retrieval_test_embedded():
|
|||||||
question += keyword_extraction(chat_mdl, question)
|
question += keyword_extraction(chat_mdl, question)
|
||||||
|
|
||||||
labels = label_question(question, [kb])
|
labels = label_question(question, [kb])
|
||||||
ranks = settings.retrievaler.retrieval(
|
ranks = settings.retriever.retrieval(
|
||||||
question, embd_mdl, tenant_ids, kb_ids, page, size, similarity_threshold, vector_similarity_weight, top, doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight"), rank_feature=labels
|
question, embd_mdl, tenant_ids, kb_ids, page, size, similarity_threshold, vector_similarity_weight, top,
|
||||||
|
doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight"), rank_feature=labels
|
||||||
)
|
)
|
||||||
if use_kg:
|
if use_kg:
|
||||||
ck = settings.kg_retrievaler.retrieval(question, tenant_ids, kb_ids, embd_mdl, LLMBundle(kb.tenant_id, LLMType.CHAT))
|
ck = settings.kg_retriever.retrieval(question, tenant_ids, kb_ids, embd_mdl,
|
||||||
|
LLMBundle(kb.tenant_id, LLMType.CHAT))
|
||||||
if ck["content_with_weight"]:
|
if ck["content_with_weight"]:
|
||||||
ranks["chunks"].insert(0, ck)
|
ranks["chunks"].insert(0, ck)
|
||||||
|
|
||||||
@ -1013,7 +1033,8 @@ def retrieval_test_embedded():
|
|||||||
return get_json_result(data=ranks)
|
return get_json_result(data=ranks)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if str(e).find("not_found") > 0:
|
if str(e).find("not_found") > 0:
|
||||||
return get_json_result(data=False, message="No chunk found! Check the chunk status please!", code=settings.RetCode.DATA_ERROR)
|
return get_json_result(data=False, message="No chunk found! Check the chunk status please!",
|
||||||
|
code=settings.RetCode.DATA_ERROR)
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
@ -1082,7 +1103,8 @@ def detail_share_embedded():
|
|||||||
if SearchService.query(tenant_id=tenant.tenant_id, id=search_id):
|
if SearchService.query(tenant_id=tenant.tenant_id, id=search_id):
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
return get_json_result(data=False, message="Has no permission for this operation.", code=settings.RetCode.OPERATING_ERROR)
|
return get_json_result(data=False, message="Has no permission for this operation.",
|
||||||
|
code=settings.RetCode.OPERATING_ERROR)
|
||||||
|
|
||||||
search = SearchService.get_detail(search_id)
|
search = SearchService.get_detail(search_id)
|
||||||
if not search:
|
if not search:
|
||||||
|
|||||||
@ -39,6 +39,7 @@ from rag.utils.redis_conn import REDIS_CONN
|
|||||||
from flask import jsonify
|
from flask import jsonify
|
||||||
from api.utils.health_utils import run_health_checks
|
from api.utils.health_utils import run_health_checks
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/version", methods=["GET"]) # noqa: F821
|
@manager.route("/version", methods=["GET"]) # noqa: F821
|
||||||
@login_required
|
@login_required
|
||||||
def version():
|
def version():
|
||||||
@ -161,7 +162,7 @@ def status():
|
|||||||
task_executors = REDIS_CONN.smembers("TASKEXE")
|
task_executors = REDIS_CONN.smembers("TASKEXE")
|
||||||
now = datetime.now().timestamp()
|
now = datetime.now().timestamp()
|
||||||
for task_executor_id in task_executors:
|
for task_executor_id in task_executors:
|
||||||
heartbeats = REDIS_CONN.zrangebyscore(task_executor_id, now - 60*30, now)
|
heartbeats = REDIS_CONN.zrangebyscore(task_executor_id, now - 60 * 30, now)
|
||||||
heartbeats = [json.loads(heartbeat) for heartbeat in heartbeats]
|
heartbeats = [json.loads(heartbeat) for heartbeat in heartbeats]
|
||||||
task_executor_heartbeats[task_executor_id] = heartbeats
|
task_executor_heartbeats[task_executor_id] = heartbeats
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -177,6 +178,11 @@ def healthz():
|
|||||||
return jsonify(result), (200 if all_ok else 500)
|
return jsonify(result), (200 if all_ok else 500)
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/ping", methods=["GET"]) # noqa: F821
|
||||||
|
def ping():
|
||||||
|
return "pong", 200
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/new_token", methods=["POST"]) # noqa: F821
|
@manager.route("/new_token", methods=["POST"]) # noqa: F821
|
||||||
@login_required
|
@login_required
|
||||||
def new_token():
|
def new_token():
|
||||||
@ -268,7 +274,8 @@ def token_list():
|
|||||||
objs = [o.to_dict() for o in objs]
|
objs = [o.to_dict() for o in objs]
|
||||||
for o in objs:
|
for o in objs:
|
||||||
if not o["beta"]:
|
if not o["beta"]:
|
||||||
o["beta"] = generate_confirmation_token(generate_confirmation_token(tenants[0].tenant_id)).replace("ragflow-", "")[:32]
|
o["beta"] = generate_confirmation_token(generate_confirmation_token(tenants[0].tenant_id)).replace(
|
||||||
|
"ragflow-", "")[:32]
|
||||||
APITokenService.filter_update([APIToken.tenant_id == tenant_id, APIToken.token == o["token"]], o)
|
APITokenService.filter_update([APIToken.tenant_id == tenant_id, APIToken.token == o["token"]], o)
|
||||||
return get_json_result(data=objs)
|
return get_json_result(data=objs)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@ -70,7 +70,8 @@ def create(tenant_id):
|
|||||||
return get_data_error_result(message=f"{invite_user_email} is already in the team.")
|
return get_data_error_result(message=f"{invite_user_email} is already in the team.")
|
||||||
if user_tenant_role == UserTenantRole.OWNER:
|
if user_tenant_role == UserTenantRole.OWNER:
|
||||||
return get_data_error_result(message=f"{invite_user_email} is the owner of the team.")
|
return get_data_error_result(message=f"{invite_user_email} is the owner of the team.")
|
||||||
return get_data_error_result(message=f"{invite_user_email} is in the team, but the role: {user_tenant_role} is invalid.")
|
return get_data_error_result(
|
||||||
|
message=f"{invite_user_email} is in the team, but the role: {user_tenant_role} is invalid.")
|
||||||
|
|
||||||
UserTenantService.save(
|
UserTenantService.save(
|
||||||
id=get_uuid(),
|
id=get_uuid(),
|
||||||
@ -132,7 +133,8 @@ def tenant_list():
|
|||||||
@login_required
|
@login_required
|
||||||
def agree(tenant_id):
|
def agree(tenant_id):
|
||||||
try:
|
try:
|
||||||
UserTenantService.filter_update([UserTenant.tenant_id == tenant_id, UserTenant.user_id == current_user.id], {"role": UserTenantRole.NORMAL})
|
UserTenantService.filter_update([UserTenant.tenant_id == tenant_id, UserTenant.user_id == current_user.id],
|
||||||
|
{"role": UserTenantRole.NORMAL})
|
||||||
return get_json_result(data=True)
|
return get_json_result(data=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|||||||
@ -15,11 +15,14 @@
|
|||||||
#
|
#
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import string
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import secrets
|
import secrets
|
||||||
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from flask import redirect, request, session
|
from flask import redirect, request, session, make_response
|
||||||
from flask_login import current_user, login_required, login_user, logout_user
|
from flask_login import current_user, login_required, login_user, logout_user
|
||||||
from werkzeug.security import check_password_hash, generate_password_hash
|
from werkzeug.security import check_password_hash, generate_password_hash
|
||||||
|
|
||||||
@ -46,6 +49,19 @@ from api.utils.api_utils import (
|
|||||||
validate_request,
|
validate_request,
|
||||||
)
|
)
|
||||||
from api.utils.crypt import decrypt
|
from api.utils.crypt import decrypt
|
||||||
|
from rag.utils.redis_conn import REDIS_CONN
|
||||||
|
from api.apps import smtp_mail_server
|
||||||
|
from api.utils.web_utils import (
|
||||||
|
send_email_html,
|
||||||
|
OTP_LENGTH,
|
||||||
|
OTP_TTL_SECONDS,
|
||||||
|
ATTEMPT_LIMIT,
|
||||||
|
ATTEMPT_LOCK_SECONDS,
|
||||||
|
RESEND_COOLDOWN_SECONDS,
|
||||||
|
otp_keys,
|
||||||
|
hash_code,
|
||||||
|
captcha_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/login", methods=["POST", "GET"]) # noqa: F821
|
@manager.route("/login", methods=["POST", "GET"]) # noqa: F821
|
||||||
@ -98,7 +114,14 @@ def login():
|
|||||||
return get_json_result(data=False, code=settings.RetCode.SERVER_ERROR, message="Fail to crypt password")
|
return get_json_result(data=False, code=settings.RetCode.SERVER_ERROR, message="Fail to crypt password")
|
||||||
|
|
||||||
user = UserService.query_user(email, password)
|
user = UserService.query_user(email, password)
|
||||||
if user:
|
|
||||||
|
if user and hasattr(user, 'is_active') and user.is_active == "0":
|
||||||
|
return get_json_result(
|
||||||
|
data=False,
|
||||||
|
code=settings.RetCode.FORBIDDEN,
|
||||||
|
message="This account has been disabled, please contact the administrator!",
|
||||||
|
)
|
||||||
|
elif user:
|
||||||
response_data = user.to_json()
|
response_data = user.to_json()
|
||||||
user.access_token = get_uuid()
|
user.access_token = get_uuid()
|
||||||
login_user(user)
|
login_user(user)
|
||||||
@ -227,6 +250,9 @@ def oauth_callback(channel):
|
|||||||
# User exists, try to log in
|
# User exists, try to log in
|
||||||
user = users[0]
|
user = users[0]
|
||||||
user.access_token = get_uuid()
|
user.access_token = get_uuid()
|
||||||
|
if user and hasattr(user, 'is_active') and user.is_active == "0":
|
||||||
|
return redirect("/?error=user_inactive")
|
||||||
|
|
||||||
login_user(user)
|
login_user(user)
|
||||||
user.save()
|
user.save()
|
||||||
return redirect(f"/?auth={user.get_id()}")
|
return redirect(f"/?auth={user.get_id()}")
|
||||||
@ -317,6 +343,8 @@ def github_callback():
|
|||||||
# User has already registered, try to log in
|
# User has already registered, try to log in
|
||||||
user = users[0]
|
user = users[0]
|
||||||
user.access_token = get_uuid()
|
user.access_token = get_uuid()
|
||||||
|
if user and hasattr(user, 'is_active') and user.is_active == "0":
|
||||||
|
return redirect("/?error=user_inactive")
|
||||||
login_user(user)
|
login_user(user)
|
||||||
user.save()
|
user.save()
|
||||||
return redirect("/?auth=%s" % user.get_id())
|
return redirect("/?auth=%s" % user.get_id())
|
||||||
@ -418,6 +446,8 @@ def feishu_callback():
|
|||||||
|
|
||||||
# User has already registered, try to log in
|
# User has already registered, try to log in
|
||||||
user = users[0]
|
user = users[0]
|
||||||
|
if user and hasattr(user, 'is_active') and user.is_active == "0":
|
||||||
|
return redirect("/?error=user_inactive")
|
||||||
user.access_token = get_uuid()
|
user.access_token = get_uuid()
|
||||||
login_user(user)
|
login_user(user)
|
||||||
user.save()
|
user.save()
|
||||||
@ -811,3 +841,172 @@ def set_tenant_info():
|
|||||||
return get_json_result(data=True)
|
return get_json_result(data=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/forget/captcha", methods=["GET"]) # noqa: F821
|
||||||
|
def forget_get_captcha():
|
||||||
|
"""
|
||||||
|
GET /forget/captcha?email=<email>
|
||||||
|
- Generate an image captcha and cache it in Redis under key captcha:{email} with TTL = OTP_TTL_SECONDS.
|
||||||
|
- Returns the captcha as a PNG image.
|
||||||
|
"""
|
||||||
|
email = (request.args.get("email") or "")
|
||||||
|
if not email:
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.ARGUMENT_ERROR, message="email is required")
|
||||||
|
|
||||||
|
users = UserService.query(email=email)
|
||||||
|
if not users:
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.DATA_ERROR, message="invalid email")
|
||||||
|
|
||||||
|
# Generate captcha text
|
||||||
|
allowed = string.ascii_uppercase + string.digits
|
||||||
|
captcha_text = "".join(secrets.choice(allowed) for _ in range(OTP_LENGTH))
|
||||||
|
REDIS_CONN.set(captcha_key(email), captcha_text, 60) # Valid for 60 seconds
|
||||||
|
|
||||||
|
from captcha.image import ImageCaptcha
|
||||||
|
image = ImageCaptcha(width=300, height=120, font_sizes=[50, 60, 70])
|
||||||
|
img_bytes = image.generate(captcha_text).read()
|
||||||
|
response = make_response(img_bytes)
|
||||||
|
response.headers.set("Content-Type", "image/JPEG")
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/forget/otp", methods=["POST"]) # noqa: F821
|
||||||
|
def forget_send_otp():
|
||||||
|
"""
|
||||||
|
POST /forget/otp
|
||||||
|
- Verify the image captcha stored at captcha:{email} (case-insensitive).
|
||||||
|
- On success, generate an email OTP (A–Z with length = OTP_LENGTH), store hash + salt (and timestamp) in Redis with TTL, reset attempts and cooldown, and send the OTP via email.
|
||||||
|
"""
|
||||||
|
req = request.get_json()
|
||||||
|
email = req.get("email") or ""
|
||||||
|
captcha = (req.get("captcha") or "").strip()
|
||||||
|
|
||||||
|
if not email or not captcha:
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.ARGUMENT_ERROR, message="email and captcha required")
|
||||||
|
|
||||||
|
users = UserService.query(email=email)
|
||||||
|
if not users:
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.DATA_ERROR, message="invalid email")
|
||||||
|
|
||||||
|
stored_captcha = REDIS_CONN.get(captcha_key(email))
|
||||||
|
if not stored_captcha:
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.NOT_EFFECTIVE, message="invalid or expired captcha")
|
||||||
|
if (stored_captcha or "").strip().lower() != captcha.lower():
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.AUTHENTICATION_ERROR, message="invalid or expired captcha")
|
||||||
|
|
||||||
|
# Delete captcha to prevent reuse
|
||||||
|
REDIS_CONN.delete(captcha_key(email))
|
||||||
|
|
||||||
|
k_code, k_attempts, k_last, k_lock = otp_keys(email)
|
||||||
|
now = int(time.time())
|
||||||
|
last_ts = REDIS_CONN.get(k_last)
|
||||||
|
if last_ts:
|
||||||
|
try:
|
||||||
|
elapsed = now - int(last_ts)
|
||||||
|
except Exception:
|
||||||
|
elapsed = RESEND_COOLDOWN_SECONDS
|
||||||
|
remaining = RESEND_COOLDOWN_SECONDS - elapsed
|
||||||
|
if remaining > 0:
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.NOT_EFFECTIVE, message=f"you still have to wait {remaining} seconds")
|
||||||
|
|
||||||
|
# Generate OTP (uppercase letters only) and store hashed
|
||||||
|
otp = "".join(secrets.choice(string.ascii_uppercase) for _ in range(OTP_LENGTH))
|
||||||
|
salt = os.urandom(16)
|
||||||
|
code_hash = hash_code(otp, salt)
|
||||||
|
REDIS_CONN.set(k_code, f"{code_hash}:{salt.hex()}", OTP_TTL_SECONDS)
|
||||||
|
REDIS_CONN.set(k_attempts, 0, OTP_TTL_SECONDS)
|
||||||
|
REDIS_CONN.set(k_last, now, OTP_TTL_SECONDS)
|
||||||
|
REDIS_CONN.delete(k_lock)
|
||||||
|
|
||||||
|
ttl_min = OTP_TTL_SECONDS // 60
|
||||||
|
|
||||||
|
if not smtp_mail_server:
|
||||||
|
logging.warning("SMTP mail server not initialized; skip sending email.")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
send_email_html(
|
||||||
|
subject="Your Password Reset Code",
|
||||||
|
to_email=email,
|
||||||
|
template_key="reset_code",
|
||||||
|
code=otp,
|
||||||
|
ttl_min=ttl_min,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.SERVER_ERROR, message="failed to send email")
|
||||||
|
|
||||||
|
return get_json_result(data=True, code=settings.RetCode.SUCCESS, message="verification passed, email sent")
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route("/forget", methods=["POST"]) # noqa: F821
|
||||||
|
def forget():
|
||||||
|
"""
|
||||||
|
POST: Verify email + OTP and reset password, then log the user in.
|
||||||
|
Request JSON: { email, otp, new_password, confirm_new_password }
|
||||||
|
"""
|
||||||
|
req = request.get_json()
|
||||||
|
email = req.get("email") or ""
|
||||||
|
otp = (req.get("otp") or "").strip()
|
||||||
|
new_pwd = req.get("new_password")
|
||||||
|
new_pwd2 = req.get("confirm_new_password")
|
||||||
|
|
||||||
|
if not all([email, otp, new_pwd, new_pwd2]):
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.ARGUMENT_ERROR, message="email, otp and passwords are required")
|
||||||
|
|
||||||
|
# For reset, passwords are provided as-is (no decrypt needed)
|
||||||
|
if new_pwd != new_pwd2:
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.ARGUMENT_ERROR, message="passwords do not match")
|
||||||
|
|
||||||
|
users = UserService.query(email=email)
|
||||||
|
if not users:
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.DATA_ERROR, message="invalid email")
|
||||||
|
|
||||||
|
user = users[0]
|
||||||
|
# Verify OTP from Redis
|
||||||
|
k_code, k_attempts, k_last, k_lock = otp_keys(email)
|
||||||
|
if REDIS_CONN.get(k_lock):
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.NOT_EFFECTIVE, message="too many attempts, try later")
|
||||||
|
|
||||||
|
stored = REDIS_CONN.get(k_code)
|
||||||
|
if not stored:
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.NOT_EFFECTIVE, message="expired otp")
|
||||||
|
|
||||||
|
try:
|
||||||
|
stored_hash, salt_hex = str(stored).split(":", 1)
|
||||||
|
salt = bytes.fromhex(salt_hex)
|
||||||
|
except Exception:
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.EXCEPTION_ERROR, message="otp storage corrupted")
|
||||||
|
|
||||||
|
# Case-insensitive verification: OTP generated uppercase
|
||||||
|
calc = hash_code(otp.upper(), salt)
|
||||||
|
if calc != stored_hash:
|
||||||
|
# bump attempts
|
||||||
|
try:
|
||||||
|
attempts = int(REDIS_CONN.get(k_attempts) or 0) + 1
|
||||||
|
except Exception:
|
||||||
|
attempts = 1
|
||||||
|
REDIS_CONN.set(k_attempts, attempts, OTP_TTL_SECONDS)
|
||||||
|
if attempts >= ATTEMPT_LIMIT:
|
||||||
|
REDIS_CONN.set(k_lock, int(time.time()), ATTEMPT_LOCK_SECONDS)
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.AUTHENTICATION_ERROR, message="expired otp")
|
||||||
|
|
||||||
|
# Success: consume OTP and reset password
|
||||||
|
REDIS_CONN.delete(k_code)
|
||||||
|
REDIS_CONN.delete(k_attempts)
|
||||||
|
REDIS_CONN.delete(k_last)
|
||||||
|
REDIS_CONN.delete(k_lock)
|
||||||
|
|
||||||
|
try:
|
||||||
|
UserService.update_user_password(user.id, new_pwd)
|
||||||
|
except Exception as e:
|
||||||
|
logging.exception(e)
|
||||||
|
return get_json_result(data=False, code=settings.RetCode.EXCEPTION_ERROR, message="failed to reset password")
|
||||||
|
|
||||||
|
# Auto login (reuse login flow)
|
||||||
|
user.access_token = get_uuid()
|
||||||
|
login_user(user)
|
||||||
|
user.update_time = (current_timestamp(),)
|
||||||
|
user.update_date = (datetime_format(datetime.now()),)
|
||||||
|
user.save()
|
||||||
|
msg = "Password reset successful. Logged in."
|
||||||
|
return construct_response(data=user.to_json(), auth=user.get_id(), message=msg)
|
||||||
|
|||||||
2
api/common/README.md
Normal file
2
api/common/README.md
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
The python files in this directory are shared between service. They contain common utilities, models, and functions that can be used across various
|
||||||
|
services to ensure consistency and reduce code duplication.
|
||||||
21
api/common/base64.py
Normal file
21
api/common/base64.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
import base64
|
||||||
|
|
||||||
|
def encode_to_base64(input_string):
|
||||||
|
base64_encoded = base64.b64encode(input_string.encode('utf-8'))
|
||||||
|
return base64_encoded.decode('utf-8')
|
||||||
59
api/common/check_team_permission.py
Normal file
59
api/common/check_team_permission.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
from api.db import TenantPermission
|
||||||
|
from api.db.db_models import File, Knowledgebase
|
||||||
|
from api.db.services.file_service import FileService
|
||||||
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||||
|
from api.db.services.user_service import TenantService
|
||||||
|
|
||||||
|
|
||||||
|
def check_kb_team_permission(kb: dict | Knowledgebase, other: str) -> bool:
|
||||||
|
kb = kb.to_dict() if isinstance(kb, Knowledgebase) else kb
|
||||||
|
|
||||||
|
kb_tenant_id = kb["tenant_id"]
|
||||||
|
|
||||||
|
if kb_tenant_id == other:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if kb["permission"] != TenantPermission.TEAM:
|
||||||
|
return False
|
||||||
|
|
||||||
|
joined_tenants = TenantService.get_joined_tenants_by_user_id(other)
|
||||||
|
return any(tenant["tenant_id"] == kb_tenant_id for tenant in joined_tenants)
|
||||||
|
|
||||||
|
|
||||||
|
def check_file_team_permission(file: dict | File, other: str) -> bool:
|
||||||
|
file = file.to_dict() if isinstance(file, File) else file
|
||||||
|
|
||||||
|
file_tenant_id = file["tenant_id"]
|
||||||
|
if file_tenant_id == other:
|
||||||
|
return True
|
||||||
|
|
||||||
|
file_id = file["id"]
|
||||||
|
|
||||||
|
kb_ids = [kb_info["kb_id"] for kb_info in FileService.get_kb_id_by_file_id(file_id)]
|
||||||
|
|
||||||
|
for kb_id in kb_ids:
|
||||||
|
ok, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
|
if not ok:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if check_kb_team_permission(kb, other):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
43
api/common/exceptions.py
Normal file
43
api/common/exceptions.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
class AdminException(Exception):
|
||||||
|
def __init__(self, message, code=400):
|
||||||
|
super().__init__(message)
|
||||||
|
self.type = "admin"
|
||||||
|
self.code = code
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
|
||||||
|
class UserNotFoundError(AdminException):
|
||||||
|
def __init__(self, username):
|
||||||
|
super().__init__(f"User '{username}' not found", 404)
|
||||||
|
|
||||||
|
|
||||||
|
class UserAlreadyExistsError(AdminException):
|
||||||
|
def __init__(self, username):
|
||||||
|
super().__init__(f"User '{username}' already exists", 409)
|
||||||
|
|
||||||
|
|
||||||
|
class CannotDeleteAdminError(AdminException):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__("Cannot delete admin account", 403)
|
||||||
|
|
||||||
|
|
||||||
|
class NotAdminError(AdminException):
|
||||||
|
def __init__(self, username):
|
||||||
|
super().__init__(f"User '{username}' is not admin", 403)
|
||||||
@ -127,4 +127,15 @@ class MCPServerType(StrEnum):
|
|||||||
VALID_MCP_SERVER_TYPES = {MCPServerType.SSE, MCPServerType.STREAMABLE_HTTP}
|
VALID_MCP_SERVER_TYPES = {MCPServerType.SSE, MCPServerType.STREAMABLE_HTTP}
|
||||||
|
|
||||||
|
|
||||||
|
class PipelineTaskType(StrEnum):
|
||||||
|
PARSE = "Parse"
|
||||||
|
DOWNLOAD = "Download"
|
||||||
|
RAPTOR = "RAPTOR"
|
||||||
|
GRAPH_RAG = "GraphRAG"
|
||||||
|
MINDMAP = "Mindmap"
|
||||||
|
|
||||||
|
|
||||||
|
VALID_PIPELINE_TASK_TYPES = {PipelineTaskType.PARSE, PipelineTaskType.DOWNLOAD, PipelineTaskType.RAPTOR, PipelineTaskType.GRAPH_RAG, PipelineTaskType.MINDMAP}
|
||||||
|
|
||||||
|
|
||||||
KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase"
|
KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase"
|
||||||
|
|||||||
@ -313,9 +313,75 @@ class RetryingPooledMySQLDatabase(PooledMySQLDatabase):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
class RetryingPooledPostgresqlDatabase(PooledPostgresqlDatabase):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.max_retries = kwargs.pop("max_retries", 5)
|
||||||
|
self.retry_delay = kwargs.pop("retry_delay", 1)
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def execute_sql(self, sql, params=None, commit=True):
|
||||||
|
for attempt in range(self.max_retries + 1):
|
||||||
|
try:
|
||||||
|
return super().execute_sql(sql, params, commit)
|
||||||
|
except (OperationalError, InterfaceError) as e:
|
||||||
|
# PostgreSQL specific error codes
|
||||||
|
# 57P01: admin_shutdown
|
||||||
|
# 57P02: crash_shutdown
|
||||||
|
# 57P03: cannot_connect_now
|
||||||
|
# 08006: connection_failure
|
||||||
|
# 08003: connection_does_not_exist
|
||||||
|
# 08000: connection_exception
|
||||||
|
error_messages = ['connection', 'server closed', 'connection refused',
|
||||||
|
'no connection to the server', 'terminating connection']
|
||||||
|
|
||||||
|
should_retry = any(msg in str(e).lower() for msg in error_messages)
|
||||||
|
|
||||||
|
if should_retry and attempt < self.max_retries:
|
||||||
|
logging.warning(
|
||||||
|
f"PostgreSQL connection issue (attempt {attempt+1}/{self.max_retries}): {e}"
|
||||||
|
)
|
||||||
|
self._handle_connection_loss()
|
||||||
|
time.sleep(self.retry_delay * (2 ** attempt))
|
||||||
|
else:
|
||||||
|
logging.error(f"PostgreSQL execution failure: {e}")
|
||||||
|
raise
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _handle_connection_loss(self):
|
||||||
|
try:
|
||||||
|
self.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self.connect()
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to reconnect to PostgreSQL: {e}")
|
||||||
|
time.sleep(0.1)
|
||||||
|
self.connect()
|
||||||
|
|
||||||
|
def begin(self):
|
||||||
|
for attempt in range(self.max_retries + 1):
|
||||||
|
try:
|
||||||
|
return super().begin()
|
||||||
|
except (OperationalError, InterfaceError) as e:
|
||||||
|
error_messages = ['connection', 'server closed', 'connection refused',
|
||||||
|
'no connection to the server', 'terminating connection']
|
||||||
|
|
||||||
|
should_retry = any(msg in str(e).lower() for msg in error_messages)
|
||||||
|
|
||||||
|
if should_retry and attempt < self.max_retries:
|
||||||
|
logging.warning(
|
||||||
|
f"PostgreSQL connection lost during transaction (attempt {attempt+1}/{self.max_retries})"
|
||||||
|
)
|
||||||
|
self._handle_connection_loss()
|
||||||
|
time.sleep(self.retry_delay * (2 ** attempt))
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
class PooledDatabase(Enum):
|
class PooledDatabase(Enum):
|
||||||
MYSQL = RetryingPooledMySQLDatabase
|
MYSQL = RetryingPooledMySQLDatabase
|
||||||
POSTGRES = PooledPostgresqlDatabase
|
POSTGRES = RetryingPooledPostgresqlDatabase
|
||||||
|
|
||||||
|
|
||||||
class DatabaseMigrator(Enum):
|
class DatabaseMigrator(Enum):
|
||||||
@ -641,7 +707,7 @@ class TenantLLM(DataBaseModel):
|
|||||||
llm_factory = CharField(max_length=128, null=False, help_text="LLM factory name", index=True)
|
llm_factory = CharField(max_length=128, null=False, help_text="LLM factory name", index=True)
|
||||||
model_type = CharField(max_length=128, null=True, help_text="LLM, Text Embedding, Image2Text, ASR", index=True)
|
model_type = CharField(max_length=128, null=True, help_text="LLM, Text Embedding, Image2Text, ASR", index=True)
|
||||||
llm_name = CharField(max_length=128, null=True, help_text="LLM name", default="", index=True)
|
llm_name = CharField(max_length=128, null=True, help_text="LLM name", default="", index=True)
|
||||||
api_key = CharField(max_length=2048, null=True, help_text="API KEY", index=True)
|
api_key = TextField(null=True, help_text="API KEY")
|
||||||
api_base = CharField(max_length=255, null=True, help_text="API Base")
|
api_base = CharField(max_length=255, null=True, help_text="API Base")
|
||||||
max_tokens = IntegerField(default=8192, index=True)
|
max_tokens = IntegerField(default=8192, index=True)
|
||||||
used_tokens = IntegerField(default=0, index=True)
|
used_tokens = IntegerField(default=0, index=True)
|
||||||
@ -684,8 +750,17 @@ class Knowledgebase(DataBaseModel):
|
|||||||
vector_similarity_weight = FloatField(default=0.3, index=True)
|
vector_similarity_weight = FloatField(default=0.3, index=True)
|
||||||
|
|
||||||
parser_id = CharField(max_length=32, null=False, help_text="default parser ID", default=ParserType.NAIVE.value, index=True)
|
parser_id = CharField(max_length=32, null=False, help_text="default parser ID", default=ParserType.NAIVE.value, index=True)
|
||||||
|
pipeline_id = CharField(max_length=32, null=True, help_text="Pipeline ID", index=True)
|
||||||
parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]})
|
parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]})
|
||||||
pagerank = IntegerField(default=0, index=False)
|
pagerank = IntegerField(default=0, index=False)
|
||||||
|
|
||||||
|
graphrag_task_id = CharField(max_length=32, null=True, help_text="Graph RAG task ID", index=True)
|
||||||
|
graphrag_task_finish_at = DateTimeField(null=True)
|
||||||
|
raptor_task_id = CharField(max_length=32, null=True, help_text="RAPTOR task ID", index=True)
|
||||||
|
raptor_task_finish_at = DateTimeField(null=True)
|
||||||
|
mindmap_task_id = CharField(max_length=32, null=True, help_text="Mindmap task ID", index=True)
|
||||||
|
mindmap_task_finish_at = DateTimeField(null=True)
|
||||||
|
|
||||||
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted, 1: validate)", default="1", index=True)
|
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted, 1: validate)", default="1", index=True)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
@ -700,6 +775,7 @@ class Document(DataBaseModel):
|
|||||||
thumbnail = TextField(null=True, help_text="thumbnail base64 string")
|
thumbnail = TextField(null=True, help_text="thumbnail base64 string")
|
||||||
kb_id = CharField(max_length=256, null=False, index=True)
|
kb_id = CharField(max_length=256, null=False, index=True)
|
||||||
parser_id = CharField(max_length=32, null=False, help_text="default parser ID", index=True)
|
parser_id = CharField(max_length=32, null=False, help_text="default parser ID", index=True)
|
||||||
|
pipeline_id = CharField(max_length=32, null=True, help_text="pipleline ID", index=True)
|
||||||
parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]})
|
parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]})
|
||||||
source_type = CharField(max_length=128, null=False, default="local", help_text="where dose this document come from", index=True)
|
source_type = CharField(max_length=128, null=False, default="local", help_text="where dose this document come from", index=True)
|
||||||
type = CharField(max_length=32, null=False, help_text="file extension", index=True)
|
type = CharField(max_length=32, null=False, help_text="file extension", index=True)
|
||||||
@ -942,6 +1018,32 @@ class Search(DataBaseModel):
|
|||||||
db_table = "search"
|
db_table = "search"
|
||||||
|
|
||||||
|
|
||||||
|
class PipelineOperationLog(DataBaseModel):
|
||||||
|
id = CharField(max_length=32, primary_key=True)
|
||||||
|
document_id = CharField(max_length=32, index=True)
|
||||||
|
tenant_id = CharField(max_length=32, null=False, index=True)
|
||||||
|
kb_id = CharField(max_length=32, null=False, index=True)
|
||||||
|
pipeline_id = CharField(max_length=32, null=True, help_text="Pipeline ID", index=True)
|
||||||
|
pipeline_title = CharField(max_length=32, null=True, help_text="Pipeline title", index=True)
|
||||||
|
parser_id = CharField(max_length=32, null=False, help_text="Parser ID", index=True)
|
||||||
|
document_name = CharField(max_length=255, null=False, help_text="File name")
|
||||||
|
document_suffix = CharField(max_length=255, null=False, help_text="File suffix")
|
||||||
|
document_type = CharField(max_length=255, null=False, help_text="Document type")
|
||||||
|
source_from = CharField(max_length=255, null=False, help_text="Source")
|
||||||
|
progress = FloatField(default=0, index=True)
|
||||||
|
progress_msg = TextField(null=True, help_text="process message", default="")
|
||||||
|
process_begin_at = DateTimeField(null=True, index=True)
|
||||||
|
process_duration = FloatField(default=0)
|
||||||
|
dsl = JSONField(null=True, default=dict)
|
||||||
|
task_type = CharField(max_length=32, null=False, default="")
|
||||||
|
operation_status = CharField(max_length=32, null=False, help_text="Operation status")
|
||||||
|
avatar = TextField(null=True, help_text="avatar base64 string")
|
||||||
|
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted, 1: validate)", default="1", index=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
db_table = "pipeline_operation_log"
|
||||||
|
|
||||||
|
|
||||||
def migrate_db():
|
def migrate_db():
|
||||||
logging.disable(logging.ERROR)
|
logging.disable(logging.ERROR)
|
||||||
migrator = DatabaseMigrator[settings.DATABASE_TYPE.upper()].value(DB)
|
migrator = DatabaseMigrator[settings.DATABASE_TYPE.upper()].value(DB)
|
||||||
@ -1058,7 +1160,6 @@ def migrate_db():
|
|||||||
migrate(migrator.add_column("dialog", "meta_data_filter", JSONField(null=True, default={})))
|
migrate(migrator.add_column("dialog", "meta_data_filter", JSONField(null=True, default={})))
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
migrate(migrator.alter_column_type("canvas_template", "title", JSONField(null=True, default=dict, help_text="Canvas title")))
|
migrate(migrator.alter_column_type("canvas_template", "title", JSONField(null=True, default=dict, help_text="Canvas title")))
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -1075,4 +1176,40 @@ def migrate_db():
|
|||||||
migrate(migrator.add_column("canvas_template", "canvas_category", CharField(max_length=32, null=False, default="agent_canvas", help_text="agent_canvas|dataflow_canvas", index=True)))
|
migrate(migrator.add_column("canvas_template", "canvas_category", CharField(max_length=32, null=False, default="agent_canvas", help_text="agent_canvas|dataflow_canvas", index=True)))
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
try:
|
||||||
|
migrate(migrator.add_column("knowledgebase", "pipeline_id", CharField(max_length=32, null=True, help_text="Pipeline ID", index=True)))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
migrate(migrator.add_column("document", "pipeline_id", CharField(max_length=32, null=True, help_text="Pipeline ID", index=True)))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
migrate(migrator.add_column("knowledgebase", "graphrag_task_id", CharField(max_length=32, null=True, help_text="Gragh RAG task ID", index=True)))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
migrate(migrator.add_column("knowledgebase", "raptor_task_id", CharField(max_length=32, null=True, help_text="RAPTOR task ID", index=True)))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
migrate(migrator.add_column("knowledgebase", "graphrag_task_finish_at", DateTimeField(null=True)))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
migrate(migrator.add_column("knowledgebase", "raptor_task_finish_at", CharField(null=True)))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
migrate(migrator.add_column("knowledgebase", "mindmap_task_id", CharField(max_length=32, null=True, help_text="Mindmap task ID", index=True)))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
migrate(migrator.add_column("knowledgebase", "mindmap_task_finish_at", CharField(null=True)))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
migrate(migrator.alter_column_type("tenant_llm", "api_key", TextField(null=True, help_text="API KEY")))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
logging.disable(logging.NOTSET)
|
logging.disable(logging.NOTSET)
|
||||||
|
|||||||
@ -14,7 +14,6 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
import logging
|
import logging
|
||||||
import base64
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
@ -32,11 +31,7 @@ from api.db.services.llm_service import LLMService, LLMBundle, get_init_tenant_l
|
|||||||
from api.db.services.user_service import TenantService, UserTenantService
|
from api.db.services.user_service import TenantService, UserTenantService
|
||||||
from api import settings
|
from api import settings
|
||||||
from api.utils.file_utils import get_project_base_directory
|
from api.utils.file_utils import get_project_base_directory
|
||||||
|
from api.common.base64 import encode_to_base64
|
||||||
|
|
||||||
def encode_to_base64(input_string):
|
|
||||||
base64_encoded = base64.b64encode(input_string.encode('utf-8'))
|
|
||||||
return base64_encoded.decode('utf-8')
|
|
||||||
|
|
||||||
|
|
||||||
def init_superuser():
|
def init_superuser():
|
||||||
|
|||||||
@ -17,13 +17,26 @@ import logging
|
|||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from api import settings
|
from api import settings
|
||||||
from api.db import FileType, UserTenantRole
|
from api.utils.api_utils import group_by
|
||||||
from api.db.db_models import TenantLLM
|
from api.db import FileType, UserTenantRole, ActiveEnum
|
||||||
|
from api.db.services.api_service import APITokenService, API4ConversationService
|
||||||
|
from api.db.services.canvas_service import UserCanvasService
|
||||||
|
from api.db.services.conversation_service import ConversationService
|
||||||
|
from api.db.services.dialog_service import DialogService
|
||||||
|
from api.db.services.document_service import DocumentService
|
||||||
|
from api.db.services.file2document_service import File2DocumentService
|
||||||
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||||
|
from api.db.services.langfuse_service import TenantLangfuseService
|
||||||
from api.db.services.llm_service import get_init_tenant_llm
|
from api.db.services.llm_service import get_init_tenant_llm
|
||||||
from api.db.services.file_service import FileService
|
from api.db.services.file_service import FileService
|
||||||
|
from api.db.services.mcp_server_service import MCPServerService
|
||||||
|
from api.db.services.search_service import SearchService
|
||||||
|
from api.db.services.task_service import TaskService
|
||||||
from api.db.services.tenant_llm_service import TenantLLMService
|
from api.db.services.tenant_llm_service import TenantLLMService
|
||||||
|
from api.db.services.user_canvas_version import UserCanvasVersionService
|
||||||
from api.db.services.user_service import TenantService, UserService, UserTenantService
|
from api.db.services.user_service import TenantService, UserService, UserTenantService
|
||||||
|
from rag.utils.storage_factory import STORAGE_IMPL
|
||||||
|
from rag.nlp import search
|
||||||
|
|
||||||
|
|
||||||
def create_new_user(user_info: dict) -> dict:
|
def create_new_user(user_info: dict) -> dict:
|
||||||
@ -104,7 +117,7 @@ def create_new_user(user_info: dict) -> dict:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.exception(e)
|
logging.exception(e)
|
||||||
try:
|
try:
|
||||||
TenantLLM.delete().where(TenantLLM.tenant_id == user_id).execute()
|
TenantLLMService.delete_by_tenant_id(user_id)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.exception(e)
|
logging.exception(e)
|
||||||
try:
|
try:
|
||||||
@ -118,3 +131,197 @@ def create_new_user(user_info: dict) -> dict:
|
|||||||
logging.exception(e)
|
logging.exception(e)
|
||||||
# reraise
|
# reraise
|
||||||
raise create_error
|
raise create_error
|
||||||
|
|
||||||
|
|
||||||
|
def delete_user_data(user_id: str) -> dict:
|
||||||
|
# use user_id to delete
|
||||||
|
usr = UserService.filter_by_id(user_id)
|
||||||
|
if not usr:
|
||||||
|
return {"success": False, "message": f"{user_id} can't be found."}
|
||||||
|
# check is inactive and not admin
|
||||||
|
if usr.is_active == ActiveEnum.ACTIVE.value:
|
||||||
|
return {"success": False, "message": f"{user_id} is active and can't be deleted."}
|
||||||
|
if usr.is_superuser:
|
||||||
|
return {"success": False, "message": "Can't delete the super user."}
|
||||||
|
# tenant info
|
||||||
|
tenants = UserTenantService.get_user_tenant_relation_by_user_id(usr.id)
|
||||||
|
owned_tenant = [t for t in tenants if t["role"] == UserTenantRole.OWNER.value]
|
||||||
|
|
||||||
|
done_msg = ''
|
||||||
|
try:
|
||||||
|
# step1. delete owned tenant info
|
||||||
|
if owned_tenant:
|
||||||
|
done_msg += "Start to delete owned tenant.\n"
|
||||||
|
tenant_id = owned_tenant[0]["tenant_id"]
|
||||||
|
kb_ids = KnowledgebaseService.get_kb_ids(usr.id)
|
||||||
|
# step1.1 delete knowledgebase related file and info
|
||||||
|
if kb_ids:
|
||||||
|
# step1.1.1 delete files in storage, remove bucket
|
||||||
|
for kb_id in kb_ids:
|
||||||
|
if STORAGE_IMPL.bucket_exists(kb_id):
|
||||||
|
STORAGE_IMPL.remove_bucket(kb_id)
|
||||||
|
done_msg += f"- Removed {len(kb_ids)} dataset's buckets.\n"
|
||||||
|
# step1.1.2 delete file and document info in db
|
||||||
|
doc_ids = DocumentService.get_all_doc_ids_by_kb_ids(kb_ids)
|
||||||
|
if doc_ids:
|
||||||
|
doc_delete_res = DocumentService.delete_by_ids([i["id"] for i in doc_ids])
|
||||||
|
done_msg += f"- Deleted {doc_delete_res} document records.\n"
|
||||||
|
task_delete_res = TaskService.delete_by_doc_ids([i["id"] for i in doc_ids])
|
||||||
|
done_msg += f"- Deleted {task_delete_res} task records.\n"
|
||||||
|
file_ids = FileService.get_all_file_ids_by_tenant_id(usr.id)
|
||||||
|
if file_ids:
|
||||||
|
file_delete_res = FileService.delete_by_ids([f["id"] for f in file_ids])
|
||||||
|
done_msg += f"- Deleted {file_delete_res} file records.\n"
|
||||||
|
if doc_ids or file_ids:
|
||||||
|
file2doc_delete_res = File2DocumentService.delete_by_document_ids_or_file_ids(
|
||||||
|
[i["id"] for i in doc_ids],
|
||||||
|
[f["id"] for f in file_ids]
|
||||||
|
)
|
||||||
|
done_msg += f"- Deleted {file2doc_delete_res} document-file relation records.\n"
|
||||||
|
# step1.1.3 delete chunk in es
|
||||||
|
r = settings.docStoreConn.delete({"kb_id": kb_ids},
|
||||||
|
search.index_name(tenant_id), kb_ids)
|
||||||
|
done_msg += f"- Deleted {r} chunk records.\n"
|
||||||
|
kb_delete_res = KnowledgebaseService.delete_by_ids(kb_ids)
|
||||||
|
done_msg += f"- Deleted {kb_delete_res} knowledgebase records.\n"
|
||||||
|
# step1.1.4 delete agents
|
||||||
|
agent_delete_res = delete_user_agents(usr.id)
|
||||||
|
done_msg += f"- Deleted {agent_delete_res['agents_deleted_count']} agent, {agent_delete_res['version_deleted_count']} versions records.\n"
|
||||||
|
# step1.1.5 delete dialogs
|
||||||
|
dialog_delete_res = delete_user_dialogs(usr.id)
|
||||||
|
done_msg += f"- Deleted {dialog_delete_res['dialogs_deleted_count']} dialogs, {dialog_delete_res['conversations_deleted_count']} conversations, {dialog_delete_res['api_token_deleted_count']} api tokens, {dialog_delete_res['api4conversation_deleted_count']} api4conversations.\n"
|
||||||
|
# step1.1.6 delete mcp server
|
||||||
|
mcp_delete_res = MCPServerService.delete_by_tenant_id(usr.id)
|
||||||
|
done_msg += f"- Deleted {mcp_delete_res} MCP server.\n"
|
||||||
|
# step1.1.7 delete search
|
||||||
|
search_delete_res = SearchService.delete_by_tenant_id(usr.id)
|
||||||
|
done_msg += f"- Deleted {search_delete_res} search records.\n"
|
||||||
|
# step1.2 delete tenant_llm and tenant_langfuse
|
||||||
|
llm_delete_res = TenantLLMService.delete_by_tenant_id(tenant_id)
|
||||||
|
done_msg += f"- Deleted {llm_delete_res} tenant-LLM records.\n"
|
||||||
|
langfuse_delete_res = TenantLangfuseService.delete_ty_tenant_id(tenant_id)
|
||||||
|
done_msg += f"- Deleted {langfuse_delete_res} langfuse records.\n"
|
||||||
|
# step1.3 delete own tenant
|
||||||
|
tenant_delete_res = TenantService.delete_by_id(tenant_id)
|
||||||
|
done_msg += f"- Deleted {tenant_delete_res} tenant.\n"
|
||||||
|
# step2 delete user-tenant relation
|
||||||
|
if tenants:
|
||||||
|
# step2.1 delete docs and files in joined team
|
||||||
|
joined_tenants = [t for t in tenants if t["role"] == UserTenantRole.NORMAL.value]
|
||||||
|
if joined_tenants:
|
||||||
|
done_msg += "Start to delete data in joined tenants.\n"
|
||||||
|
created_documents = DocumentService.get_all_docs_by_creator_id(usr.id)
|
||||||
|
if created_documents:
|
||||||
|
# step2.1.1 delete files
|
||||||
|
doc_file_info = File2DocumentService.get_by_document_ids([d['id'] for d in created_documents])
|
||||||
|
created_files = FileService.get_by_ids([f['file_id'] for f in doc_file_info])
|
||||||
|
if created_files:
|
||||||
|
# step2.1.1.1 delete file in storage
|
||||||
|
for f in created_files:
|
||||||
|
STORAGE_IMPL.rm(f.parent_id, f.location)
|
||||||
|
done_msg += f"- Deleted {len(created_files)} uploaded file.\n"
|
||||||
|
# step2.1.1.2 delete file record
|
||||||
|
file_delete_res = FileService.delete_by_ids([f.id for f in created_files])
|
||||||
|
done_msg += f"- Deleted {file_delete_res} file records.\n"
|
||||||
|
# step2.1.2 delete document-file relation record
|
||||||
|
file2doc_delete_res = File2DocumentService.delete_by_document_ids_or_file_ids(
|
||||||
|
[d['id'] for d in created_documents],
|
||||||
|
[f.id for f in created_files]
|
||||||
|
)
|
||||||
|
done_msg += f"- Deleted {file2doc_delete_res} document-file relation records.\n"
|
||||||
|
# step2.1.3 delete chunks
|
||||||
|
doc_groups = group_by(created_documents, "tenant_id")
|
||||||
|
kb_grouped_doc = {k: group_by(v, "kb_id") for k, v in doc_groups.items()}
|
||||||
|
# chunks in {'tenant_id': {'kb_id': [{'id': doc_id}]}} structure
|
||||||
|
chunk_delete_res = 0
|
||||||
|
kb_doc_info = {}
|
||||||
|
for _tenant_id, kb_doc in kb_grouped_doc.items():
|
||||||
|
for _kb_id, docs in kb_doc.items():
|
||||||
|
chunk_delete_res += settings.docStoreConn.delete(
|
||||||
|
{"doc_id": [d["id"] for d in docs]},
|
||||||
|
search.index_name(_tenant_id), _kb_id
|
||||||
|
)
|
||||||
|
# record doc info
|
||||||
|
if _kb_id in kb_doc_info.keys():
|
||||||
|
kb_doc_info[_kb_id]['doc_num'] += 1
|
||||||
|
kb_doc_info[_kb_id]['token_num'] += sum([d["token_num"] for d in docs])
|
||||||
|
kb_doc_info[_kb_id]['chunk_num'] += sum([d["chunk_num"] for d in docs])
|
||||||
|
else:
|
||||||
|
kb_doc_info[_kb_id] = {
|
||||||
|
'doc_num': 1,
|
||||||
|
'token_num': sum([d["token_num"] for d in docs]),
|
||||||
|
'chunk_num': sum([d["chunk_num"] for d in docs])
|
||||||
|
}
|
||||||
|
done_msg += f"- Deleted {chunk_delete_res} chunks.\n"
|
||||||
|
# step2.1.4 delete tasks
|
||||||
|
task_delete_res = TaskService.delete_by_doc_ids([d['id'] for d in created_documents])
|
||||||
|
done_msg += f"- Deleted {task_delete_res} tasks.\n"
|
||||||
|
# step2.1.5 delete document record
|
||||||
|
doc_delete_res = DocumentService.delete_by_ids([d['id'] for d in created_documents])
|
||||||
|
done_msg += f"- Deleted {doc_delete_res} documents.\n"
|
||||||
|
# step2.1.6 update knowledge base doc&chunk&token cnt
|
||||||
|
for kb_id, doc_num in kb_doc_info.items():
|
||||||
|
KnowledgebaseService.decrease_document_num_in_delete(kb_id, doc_num)
|
||||||
|
|
||||||
|
# step2.2 delete relation
|
||||||
|
user_tenant_delete_res = UserTenantService.delete_by_ids([t["id"] for t in tenants])
|
||||||
|
done_msg += f"- Deleted {user_tenant_delete_res} user-tenant records.\n"
|
||||||
|
# step3 finally delete user
|
||||||
|
user_delete_res = UserService.delete_by_id(usr.id)
|
||||||
|
done_msg += f"- Deleted {user_delete_res} user.\nDelete done!"
|
||||||
|
|
||||||
|
return {"success": True, "message": f"Successfully deleted user. Details:\n{done_msg}"}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.exception(e)
|
||||||
|
return {"success": False, "message": f"Error: {str(e)}. Already done:\n{done_msg}"}
|
||||||
|
|
||||||
|
|
||||||
|
def delete_user_agents(user_id: str) -> dict:
|
||||||
|
"""
|
||||||
|
use user_id to delete
|
||||||
|
:return: {
|
||||||
|
"agents_deleted_count": 1,
|
||||||
|
"version_deleted_count": 2
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
agents_deleted_count, agents_version_deleted_count = 0, 0
|
||||||
|
user_agents = UserCanvasService.get_all_agents_by_tenant_ids([user_id], user_id)
|
||||||
|
if user_agents:
|
||||||
|
agents_version = UserCanvasVersionService.get_all_canvas_version_by_canvas_ids([a['id'] for a in user_agents])
|
||||||
|
agents_version_deleted_count = UserCanvasVersionService.delete_by_ids([v['id'] for v in agents_version])
|
||||||
|
agents_deleted_count = UserCanvasService.delete_by_ids([a['id'] for a in user_agents])
|
||||||
|
return {
|
||||||
|
"agents_deleted_count": agents_deleted_count,
|
||||||
|
"version_deleted_count": agents_version_deleted_count
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def delete_user_dialogs(user_id: str) -> dict:
|
||||||
|
"""
|
||||||
|
use user_id to delete
|
||||||
|
:return: {
|
||||||
|
"dialogs_deleted_count": 1,
|
||||||
|
"conversations_deleted_count": 1,
|
||||||
|
"api_token_deleted_count": 2,
|
||||||
|
"api4conversation_deleted_count": 2
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
dialog_deleted_count, conversations_deleted_count, api_token_deleted_count, api4conversation_deleted_count = 0, 0, 0, 0
|
||||||
|
user_dialogs = DialogService.get_all_dialogs_by_tenant_id(user_id)
|
||||||
|
if user_dialogs:
|
||||||
|
# delete conversation
|
||||||
|
conversations = ConversationService.get_all_conversation_by_dialog_ids([ud['id'] for ud in user_dialogs])
|
||||||
|
conversations_deleted_count = ConversationService.delete_by_ids([c['id'] for c in conversations])
|
||||||
|
# delete api token
|
||||||
|
api_token_deleted_count = APITokenService.delete_by_tenant_id(user_id)
|
||||||
|
# delete api for conversation
|
||||||
|
api4conversation_deleted_count = API4ConversationService.delete_by_dialog_ids([ud['id'] for ud in user_dialogs])
|
||||||
|
# delete dialog at last
|
||||||
|
dialog_deleted_count = DialogService.delete_by_ids([ud['id'] for ud in user_dialogs])
|
||||||
|
return {
|
||||||
|
"dialogs_deleted_count": dialog_deleted_count,
|
||||||
|
"conversations_deleted_count": conversations_deleted_count,
|
||||||
|
"api_token_deleted_count": api_token_deleted_count,
|
||||||
|
"api4conversation_deleted_count": api4conversation_deleted_count
|
||||||
|
}
|
||||||
|
|||||||
@ -35,6 +35,11 @@ class APITokenService(CommonService):
|
|||||||
cls.model.token == token
|
cls.model.token == token
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def delete_by_tenant_id(cls, tenant_id):
|
||||||
|
return cls.model.delete().where(cls.model.tenant_id == tenant_id).execute()
|
||||||
|
|
||||||
|
|
||||||
class API4ConversationService(CommonService):
|
class API4ConversationService(CommonService):
|
||||||
model = API4Conversation
|
model = API4Conversation
|
||||||
@ -100,3 +105,8 @@ class API4ConversationService(CommonService):
|
|||||||
cls.model.create_date <= to_date,
|
cls.model.create_date <= to_date,
|
||||||
cls.model.source == source
|
cls.model.source == source
|
||||||
).group_by(cls.model.create_date.truncate("day")).dicts()
|
).group_by(cls.model.create_date.truncate("day")).dicts()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def delete_by_dialog_ids(cls, dialog_ids):
|
||||||
|
return cls.model.delete().where(cls.model.dialog_id.in_(dialog_ids)).execute()
|
||||||
|
|||||||
@ -66,6 +66,7 @@ class UserCanvasService(CommonService):
|
|||||||
def get_all_agents_by_tenant_ids(cls, tenant_ids, user_id):
|
def get_all_agents_by_tenant_ids(cls, tenant_ids, user_id):
|
||||||
# will get all permitted agents, be cautious
|
# will get all permitted agents, be cautious
|
||||||
fields = [
|
fields = [
|
||||||
|
cls.model.id,
|
||||||
cls.model.title,
|
cls.model.title,
|
||||||
cls.model.permission,
|
cls.model.permission,
|
||||||
cls.model.canvas_type,
|
cls.model.canvas_type,
|
||||||
@ -93,7 +94,7 @@ class UserCanvasService(CommonService):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_by_tenant_id(cls, pid):
|
def get_by_canvas_id(cls, pid):
|
||||||
try:
|
try:
|
||||||
|
|
||||||
fields = [
|
fields = [
|
||||||
@ -125,7 +126,7 @@ class UserCanvasService(CommonService):
|
|||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_by_tenant_ids(cls, joined_tenant_ids, user_id,
|
def get_by_tenant_ids(cls, joined_tenant_ids, user_id,
|
||||||
page_number, items_per_page,
|
page_number, items_per_page,
|
||||||
orderby, desc, keywords, canvas_category=CanvasCategory.Agent,
|
orderby, desc, keywords, canvas_category=None
|
||||||
):
|
):
|
||||||
fields = [
|
fields = [
|
||||||
cls.model.id,
|
cls.model.id,
|
||||||
@ -134,6 +135,7 @@ class UserCanvasService(CommonService):
|
|||||||
cls.model.dsl,
|
cls.model.dsl,
|
||||||
cls.model.description,
|
cls.model.description,
|
||||||
cls.model.permission,
|
cls.model.permission,
|
||||||
|
cls.model.user_id.alias("tenant_id"),
|
||||||
User.nickname,
|
User.nickname,
|
||||||
User.avatar.alias('tenant_avatar'),
|
User.avatar.alias('tenant_avatar'),
|
||||||
cls.model.update_time,
|
cls.model.update_time,
|
||||||
@ -141,31 +143,30 @@ class UserCanvasService(CommonService):
|
|||||||
]
|
]
|
||||||
if keywords:
|
if keywords:
|
||||||
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
||||||
((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission ==
|
(((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id)),
|
||||||
TenantPermission.TEAM.value)) | (
|
|
||||||
cls.model.user_id == user_id)),
|
|
||||||
(fn.LOWER(cls.model.title).contains(keywords.lower()))
|
(fn.LOWER(cls.model.title).contains(keywords.lower()))
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
||||||
((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission ==
|
(((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id))
|
||||||
TenantPermission.TEAM.value)) | (
|
|
||||||
cls.model.user_id == user_id))
|
|
||||||
)
|
)
|
||||||
agents = agents.where(cls.model.canvas_category == canvas_category)
|
if canvas_category:
|
||||||
|
agents = agents.where(cls.model.canvas_category == canvas_category)
|
||||||
if desc:
|
if desc:
|
||||||
agents = agents.order_by(cls.model.getter_by(orderby).desc())
|
agents = agents.order_by(cls.model.getter_by(orderby).desc())
|
||||||
else:
|
else:
|
||||||
agents = agents.order_by(cls.model.getter_by(orderby).asc())
|
agents = agents.order_by(cls.model.getter_by(orderby).asc())
|
||||||
|
|
||||||
count = agents.count()
|
count = agents.count()
|
||||||
agents = agents.paginate(page_number, items_per_page)
|
if page_number and items_per_page:
|
||||||
|
agents = agents.paginate(page_number, items_per_page)
|
||||||
return list(agents.dicts()), count
|
return list(agents.dicts()), count
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def accessible(cls, canvas_id, tenant_id):
|
def accessible(cls, canvas_id, tenant_id):
|
||||||
from api.db.services.user_service import UserTenantService
|
from api.db.services.user_service import UserTenantService
|
||||||
e, c = UserCanvasService.get_by_tenant_id(canvas_id)
|
e, c = UserCanvasService.get_by_canvas_id(canvas_id)
|
||||||
if not e:
|
if not e:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
@ -48,6 +48,21 @@ class ConversationService(CommonService):
|
|||||||
|
|
||||||
return list(sessions.dicts())
|
return list(sessions.dicts())
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def get_all_conversation_by_dialog_ids(cls, dialog_ids):
|
||||||
|
sessions = cls.model.select().where(cls.model.dialog_id.in_(dialog_ids))
|
||||||
|
sessions.order_by(cls.model.create_time.asc())
|
||||||
|
offset, limit = 0, 100
|
||||||
|
res = []
|
||||||
|
while True:
|
||||||
|
s_batch = sessions.offset(offset).limit(limit)
|
||||||
|
_temp = list(s_batch.dicts())
|
||||||
|
if not _temp:
|
||||||
|
break
|
||||||
|
res.extend(_temp)
|
||||||
|
offset += limit
|
||||||
|
return res
|
||||||
|
|
||||||
def structure_answer(conv, ans, message_id, session_id):
|
def structure_answer(conv, ans, message_id, session_id):
|
||||||
reference = ans["reference"]
|
reference = ans["reference"]
|
||||||
|
|||||||
@ -159,6 +159,22 @@ class DialogService(CommonService):
|
|||||||
|
|
||||||
return list(dialogs.dicts()), count
|
return list(dialogs.dicts()), count
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def get_all_dialogs_by_tenant_id(cls, tenant_id):
|
||||||
|
fields = [cls.model.id]
|
||||||
|
dialogs = cls.model.select(*fields).where(cls.model.tenant_id == tenant_id)
|
||||||
|
dialogs.order_by(cls.model.create_time.asc())
|
||||||
|
offset, limit = 0, 100
|
||||||
|
res = []
|
||||||
|
while True:
|
||||||
|
d_batch = dialogs.offset(offset).limit(limit)
|
||||||
|
_temp = list(d_batch.dicts())
|
||||||
|
if not _temp:
|
||||||
|
break
|
||||||
|
res.extend(_temp)
|
||||||
|
offset += limit
|
||||||
|
return res
|
||||||
|
|
||||||
def chat_solo(dialog, messages, stream=True):
|
def chat_solo(dialog, messages, stream=True):
|
||||||
if TenantLLMService.llm_id2llm_type(dialog.llm_id) == "image2text":
|
if TenantLLMService.llm_id2llm_type(dialog.llm_id) == "image2text":
|
||||||
@ -354,7 +370,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|||||||
chat_mdl.bind_tools(toolcall_session, tools)
|
chat_mdl.bind_tools(toolcall_session, tools)
|
||||||
bind_models_ts = timer()
|
bind_models_ts = timer()
|
||||||
|
|
||||||
retriever = settings.retrievaler
|
retriever = settings.retriever
|
||||||
questions = [m["content"] for m in messages if m["role"] == "user"][-3:]
|
questions = [m["content"] for m in messages if m["role"] == "user"][-3:]
|
||||||
attachments = kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else []
|
attachments = kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else []
|
||||||
if "doc_ids" in messages[-1]:
|
if "doc_ids" in messages[-1]:
|
||||||
@ -450,13 +466,17 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|||||||
rerank_mdl=rerank_mdl,
|
rerank_mdl=rerank_mdl,
|
||||||
rank_feature=label_question(" ".join(questions), kbs),
|
rank_feature=label_question(" ".join(questions), kbs),
|
||||||
)
|
)
|
||||||
|
if prompt_config.get("toc_enhance"):
|
||||||
|
cks = retriever.retrieval_by_toc(" ".join(questions), kbinfos["chunks"], tenant_ids, chat_mdl, dialog.top_n)
|
||||||
|
if cks:
|
||||||
|
kbinfos["chunks"] = cks
|
||||||
if prompt_config.get("tavily_api_key"):
|
if prompt_config.get("tavily_api_key"):
|
||||||
tav = Tavily(prompt_config["tavily_api_key"])
|
tav = Tavily(prompt_config["tavily_api_key"])
|
||||||
tav_res = tav.retrieve_chunks(" ".join(questions))
|
tav_res = tav.retrieve_chunks(" ".join(questions))
|
||||||
kbinfos["chunks"].extend(tav_res["chunks"])
|
kbinfos["chunks"].extend(tav_res["chunks"])
|
||||||
kbinfos["doc_aggs"].extend(tav_res["doc_aggs"])
|
kbinfos["doc_aggs"].extend(tav_res["doc_aggs"])
|
||||||
if prompt_config.get("use_kg"):
|
if prompt_config.get("use_kg"):
|
||||||
ck = settings.kg_retrievaler.retrieval(" ".join(questions), tenant_ids, dialog.kb_ids, embd_mdl,
|
ck = settings.kg_retriever.retrieval(" ".join(questions), tenant_ids, dialog.kb_ids, embd_mdl,
|
||||||
LLMBundle(dialog.tenant_id, LLMType.CHAT))
|
LLMBundle(dialog.tenant_id, LLMType.CHAT))
|
||||||
if ck["content_with_weight"]:
|
if ck["content_with_weight"]:
|
||||||
kbinfos["chunks"].insert(0, ck)
|
kbinfos["chunks"].insert(0, ck)
|
||||||
@ -642,7 +662,7 @@ Please write the SQL, only SQL, without any other explanations or text.
|
|||||||
|
|
||||||
logging.debug(f"{question} get SQL(refined): {sql}")
|
logging.debug(f"{question} get SQL(refined): {sql}")
|
||||||
tried_times += 1
|
tried_times += 1
|
||||||
return settings.retrievaler.sql_retrieval(sql, format="json"), sql
|
return settings.retriever.sql_retrieval(sql, format="json"), sql
|
||||||
|
|
||||||
tbl, sql = get_table()
|
tbl, sql = get_table()
|
||||||
if tbl is None:
|
if tbl is None:
|
||||||
@ -736,7 +756,7 @@ def ask(question, kb_ids, tenant_id, chat_llm_name=None, search_config={}):
|
|||||||
embedding_list = list(set([kb.embd_id for kb in kbs]))
|
embedding_list = list(set([kb.embd_id for kb in kbs]))
|
||||||
|
|
||||||
is_knowledge_graph = all([kb.parser_id == ParserType.KG for kb in kbs])
|
is_knowledge_graph = all([kb.parser_id == ParserType.KG for kb in kbs])
|
||||||
retriever = settings.retrievaler if not is_knowledge_graph else settings.kg_retrievaler
|
retriever = settings.retriever if not is_knowledge_graph else settings.kg_retriever
|
||||||
|
|
||||||
embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING, embedding_list[0])
|
embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING, embedding_list[0])
|
||||||
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, chat_llm_name)
|
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, chat_llm_name)
|
||||||
@ -832,7 +852,7 @@ def gen_mindmap(question, kb_ids, tenant_id, search_config={}):
|
|||||||
if not doc_ids:
|
if not doc_ids:
|
||||||
doc_ids = None
|
doc_ids = None
|
||||||
|
|
||||||
ranks = settings.retrievaler.retrieval(
|
ranks = settings.retriever.retrieval(
|
||||||
question=question,
|
question=question,
|
||||||
embd_mdl=embd_mdl,
|
embd_mdl=embd_mdl,
|
||||||
tenant_ids=tenant_ids,
|
tenant_ids=tenant_ids,
|
||||||
|
|||||||
@ -24,12 +24,13 @@ from io import BytesIO
|
|||||||
|
|
||||||
import trio
|
import trio
|
||||||
import xxhash
|
import xxhash
|
||||||
from peewee import fn, Case
|
from peewee import fn, Case, JOIN
|
||||||
|
|
||||||
from api import settings
|
from api import settings
|
||||||
from api.constants import IMG_BASE64_PREFIX, FILE_NAME_LEN_LIMIT
|
from api.constants import IMG_BASE64_PREFIX, FILE_NAME_LEN_LIMIT
|
||||||
from api.db import FileType, LLMType, ParserType, StatusEnum, TaskStatus, UserTenantRole
|
from api.db import FileType, LLMType, ParserType, StatusEnum, TaskStatus, UserTenantRole, CanvasCategory
|
||||||
from api.db.db_models import DB, Document, Knowledgebase, Task, Tenant, UserTenant, File2Document, File
|
from api.db.db_models import DB, Document, Knowledgebase, Task, Tenant, UserTenant, File2Document, File, UserCanvas, \
|
||||||
|
User
|
||||||
from api.db.db_utils import bulk_insert_into_db
|
from api.db.db_utils import bulk_insert_into_db
|
||||||
from api.db.services.common_service import CommonService
|
from api.db.services.common_service import CommonService
|
||||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||||
@ -51,6 +52,7 @@ class DocumentService(CommonService):
|
|||||||
cls.model.thumbnail,
|
cls.model.thumbnail,
|
||||||
cls.model.kb_id,
|
cls.model.kb_id,
|
||||||
cls.model.parser_id,
|
cls.model.parser_id,
|
||||||
|
cls.model.pipeline_id,
|
||||||
cls.model.parser_config,
|
cls.model.parser_config,
|
||||||
cls.model.source_type,
|
cls.model.source_type,
|
||||||
cls.model.type,
|
cls.model.type,
|
||||||
@ -77,9 +79,12 @@ class DocumentService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_list(cls, kb_id, page_number, items_per_page,
|
def get_list(cls, kb_id, page_number, items_per_page,
|
||||||
orderby, desc, keywords, id, name):
|
orderby, desc, keywords, id, name, suffix=None, run = None):
|
||||||
fields = cls.get_cls_model_fields()
|
fields = cls.get_cls_model_fields()
|
||||||
docs = cls.model.select(*fields).join(File2Document, on = (File2Document.document_id == cls.model.id)).join(File, on = (File.id == File2Document.file_id)).where(cls.model.kb_id == kb_id)
|
docs = cls.model.select(*[*fields, UserCanvas.title]).join(File2Document, on = (File2Document.document_id == cls.model.id))\
|
||||||
|
.join(File, on = (File.id == File2Document.file_id))\
|
||||||
|
.join(UserCanvas, on = ((cls.model.pipeline_id == UserCanvas.id) & (UserCanvas.canvas_category == CanvasCategory.DataFlow.value)), join_type=JOIN.LEFT_OUTER)\
|
||||||
|
.where(cls.model.kb_id == kb_id)
|
||||||
if id:
|
if id:
|
||||||
docs = docs.where(
|
docs = docs.where(
|
||||||
cls.model.id == id)
|
cls.model.id == id)
|
||||||
@ -91,6 +96,10 @@ class DocumentService(CommonService):
|
|||||||
docs = docs.where(
|
docs = docs.where(
|
||||||
fn.LOWER(cls.model.name).contains(keywords.lower())
|
fn.LOWER(cls.model.name).contains(keywords.lower())
|
||||||
)
|
)
|
||||||
|
if suffix:
|
||||||
|
docs = docs.where(cls.model.suffix.in_(suffix))
|
||||||
|
if run:
|
||||||
|
docs = docs.where(cls.model.run.in_(run))
|
||||||
if desc:
|
if desc:
|
||||||
docs = docs.order_by(cls.model.getter_by(orderby).desc())
|
docs = docs.order_by(cls.model.getter_by(orderby).desc())
|
||||||
else:
|
else:
|
||||||
@ -117,12 +126,22 @@ class DocumentService(CommonService):
|
|||||||
orderby, desc, keywords, run_status, types, suffix):
|
orderby, desc, keywords, run_status, types, suffix):
|
||||||
fields = cls.get_cls_model_fields()
|
fields = cls.get_cls_model_fields()
|
||||||
if keywords:
|
if keywords:
|
||||||
docs = cls.model.select(*fields).join(File2Document, on=(File2Document.document_id == cls.model.id)).join(File, on=(File.id == File2Document.file_id)).where(
|
docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name"), User.nickname])\
|
||||||
(cls.model.kb_id == kb_id),
|
.join(File2Document, on=(File2Document.document_id == cls.model.id))\
|
||||||
(fn.LOWER(cls.model.name).contains(keywords.lower()))
|
.join(File, on=(File.id == File2Document.file_id))\
|
||||||
)
|
.join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
|
||||||
|
.join(User, on=(cls.model.created_by == User.id), join_type=JOIN.LEFT_OUTER)\
|
||||||
|
.where(
|
||||||
|
(cls.model.kb_id == kb_id),
|
||||||
|
(fn.LOWER(cls.model.name).contains(keywords.lower()))
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
docs = cls.model.select(*fields).join(File2Document, on=(File2Document.document_id == cls.model.id)).join(File, on=(File.id == File2Document.file_id)).where(cls.model.kb_id == kb_id)
|
docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name"), User.nickname])\
|
||||||
|
.join(File2Document, on=(File2Document.document_id == cls.model.id))\
|
||||||
|
.join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
|
||||||
|
.join(File, on=(File.id == File2Document.file_id))\
|
||||||
|
.join(User, on=(cls.model.created_by == User.id), join_type=JOIN.LEFT_OUTER)\
|
||||||
|
.where(cls.model.kb_id == kb_id)
|
||||||
|
|
||||||
if run_status:
|
if run_status:
|
||||||
docs = docs.where(cls.model.run.in_(run_status))
|
docs = docs.where(cls.model.run.in_(run_status))
|
||||||
@ -228,6 +247,46 @@ class DocumentService(CommonService):
|
|||||||
|
|
||||||
return int(query.scalar()) or 0
|
return int(query.scalar()) or 0
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def get_all_doc_ids_by_kb_ids(cls, kb_ids):
|
||||||
|
fields = [cls.model.id]
|
||||||
|
docs = cls.model.select(*fields).where(cls.model.kb_id.in_(kb_ids))
|
||||||
|
docs.order_by(cls.model.create_time.asc())
|
||||||
|
# maybe cause slow query by deep paginate, optimize later
|
||||||
|
offset, limit = 0, 100
|
||||||
|
res = []
|
||||||
|
while True:
|
||||||
|
doc_batch = docs.offset(offset).limit(limit)
|
||||||
|
_temp = list(doc_batch.dicts())
|
||||||
|
if not _temp:
|
||||||
|
break
|
||||||
|
res.extend(_temp)
|
||||||
|
offset += limit
|
||||||
|
return res
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def get_all_docs_by_creator_id(cls, creator_id):
|
||||||
|
fields = [
|
||||||
|
cls.model.id, cls.model.kb_id, cls.model.token_num, cls.model.chunk_num, Knowledgebase.tenant_id
|
||||||
|
]
|
||||||
|
docs = cls.model.select(*fields).join(Knowledgebase, on=(Knowledgebase.id == cls.model.kb_id)).where(
|
||||||
|
cls.model.created_by == creator_id
|
||||||
|
)
|
||||||
|
docs.order_by(cls.model.create_time.asc())
|
||||||
|
# maybe cause slow query by deep paginate, optimize later
|
||||||
|
offset, limit = 0, 100
|
||||||
|
res = []
|
||||||
|
while True:
|
||||||
|
doc_batch = docs.offset(offset).limit(limit)
|
||||||
|
_temp = list(doc_batch.dicts())
|
||||||
|
if not _temp:
|
||||||
|
break
|
||||||
|
res.extend(_temp)
|
||||||
|
offset += limit
|
||||||
|
return res
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def insert(cls, doc):
|
def insert(cls, doc):
|
||||||
@ -330,8 +389,7 @@ class DocumentService(CommonService):
|
|||||||
process_duration=cls.model.process_duration + duration).where(
|
process_duration=cls.model.process_duration + duration).where(
|
||||||
cls.model.id == doc_id).execute()
|
cls.model.id == doc_id).execute()
|
||||||
if num == 0:
|
if num == 0:
|
||||||
raise LookupError(
|
logging.warning("Document not found which is supposed to be there")
|
||||||
"Document not found which is supposed to be there")
|
|
||||||
num = Knowledgebase.update(
|
num = Knowledgebase.update(
|
||||||
token_num=Knowledgebase.token_num +
|
token_num=Knowledgebase.token_num +
|
||||||
token_num,
|
token_num,
|
||||||
@ -597,17 +655,33 @@ class DocumentService(CommonService):
|
|||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def update_progress(cls):
|
def update_progress(cls):
|
||||||
docs = cls.get_unfinished_docs()
|
docs = cls.get_unfinished_docs()
|
||||||
|
|
||||||
|
cls._sync_progress(docs)
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def update_progress_immediately(cls, docs:list[dict]):
|
||||||
|
if not docs:
|
||||||
|
return
|
||||||
|
|
||||||
|
cls._sync_progress(docs)
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def _sync_progress(cls, docs:list[dict]):
|
||||||
|
from api.db.services.task_service import TaskService
|
||||||
|
|
||||||
for d in docs:
|
for d in docs:
|
||||||
try:
|
try:
|
||||||
tsks = Task.query(doc_id=d["id"], order_by=Task.create_time)
|
tsks = TaskService.query(doc_id=d["id"], order_by=Task.create_time)
|
||||||
if not tsks:
|
if not tsks:
|
||||||
continue
|
continue
|
||||||
msg = []
|
msg = []
|
||||||
prg = 0
|
prg = 0
|
||||||
finished = True
|
finished = True
|
||||||
bad = 0
|
bad = 0
|
||||||
has_raptor = False
|
|
||||||
has_graphrag = False
|
|
||||||
e, doc = DocumentService.get_by_id(d["id"])
|
e, doc = DocumentService.get_by_id(d["id"])
|
||||||
status = doc.run # TaskStatus.RUNNING.value
|
status = doc.run # TaskStatus.RUNNING.value
|
||||||
priority = 0
|
priority = 0
|
||||||
@ -619,24 +693,14 @@ class DocumentService(CommonService):
|
|||||||
prg += t.progress if t.progress >= 0 else 0
|
prg += t.progress if t.progress >= 0 else 0
|
||||||
if t.progress_msg.strip():
|
if t.progress_msg.strip():
|
||||||
msg.append(t.progress_msg)
|
msg.append(t.progress_msg)
|
||||||
if t.task_type == "raptor":
|
|
||||||
has_raptor = True
|
|
||||||
elif t.task_type == "graphrag":
|
|
||||||
has_graphrag = True
|
|
||||||
priority = max(priority, t.priority)
|
priority = max(priority, t.priority)
|
||||||
prg /= len(tsks)
|
prg /= len(tsks)
|
||||||
if finished and bad:
|
if finished and bad:
|
||||||
prg = -1
|
prg = -1
|
||||||
status = TaskStatus.FAIL.value
|
status = TaskStatus.FAIL.value
|
||||||
elif finished:
|
elif finished:
|
||||||
if (d["parser_config"].get("raptor") or {}).get("use_raptor") and not has_raptor:
|
prg = 1
|
||||||
queue_raptor_o_graphrag_tasks(d, "raptor", priority)
|
status = TaskStatus.DONE.value
|
||||||
prg = 0.98 * len(tsks) / (len(tsks) + 1)
|
|
||||||
elif (d["parser_config"].get("graphrag") or {}).get("use_graphrag") and not has_graphrag:
|
|
||||||
queue_raptor_o_graphrag_tasks(d, "graphrag", priority)
|
|
||||||
prg = 0.98 * len(tsks) / (len(tsks) + 1)
|
|
||||||
else:
|
|
||||||
status = TaskStatus.DONE.value
|
|
||||||
|
|
||||||
msg = "\n".join(sorted(msg))
|
msg = "\n".join(sorted(msg))
|
||||||
info = {
|
info = {
|
||||||
@ -648,7 +712,7 @@ class DocumentService(CommonService):
|
|||||||
info["progress"] = prg
|
info["progress"] = prg
|
||||||
if msg:
|
if msg:
|
||||||
info["progress_msg"] = msg
|
info["progress_msg"] = msg
|
||||||
if msg.endswith("created task graphrag") or msg.endswith("created task raptor"):
|
if msg.endswith("created task graphrag") or msg.endswith("created task raptor") or msg.endswith("created task mindmap"):
|
||||||
info["progress_msg"] += "\n%d tasks are ahead in the queue..."%get_queue_length(priority)
|
info["progress_msg"] += "\n%d tasks are ahead in the queue..."%get_queue_length(priority)
|
||||||
else:
|
else:
|
||||||
info["progress_msg"] = "%d tasks are ahead in the queue..."%get_queue_length(priority)
|
info["progress_msg"] = "%d tasks are ahead in the queue..."%get_queue_length(priority)
|
||||||
@ -729,21 +793,28 @@ class DocumentService(CommonService):
|
|||||||
"cancelled": int(cancelled),
|
"cancelled": int(cancelled),
|
||||||
}
|
}
|
||||||
|
|
||||||
def queue_raptor_o_graphrag_tasks(doc, ty, priority):
|
def queue_raptor_o_graphrag_tasks(sample_doc_id, ty, priority, fake_doc_id="", doc_ids=[]):
|
||||||
chunking_config = DocumentService.get_chunking_config(doc["id"])
|
"""
|
||||||
|
You can provide a fake_doc_id to bypass the restriction of tasks at the knowledgebase level.
|
||||||
|
Optionally, specify a list of doc_ids to determine which documents participate in the task.
|
||||||
|
"""
|
||||||
|
assert ty in ["graphrag", "raptor", "mindmap"], "type should be graphrag, raptor or mindmap"
|
||||||
|
|
||||||
|
chunking_config = DocumentService.get_chunking_config(sample_doc_id["id"])
|
||||||
hasher = xxhash.xxh64()
|
hasher = xxhash.xxh64()
|
||||||
for field in sorted(chunking_config.keys()):
|
for field in sorted(chunking_config.keys()):
|
||||||
hasher.update(str(chunking_config[field]).encode("utf-8"))
|
hasher.update(str(chunking_config[field]).encode("utf-8"))
|
||||||
|
|
||||||
def new_task():
|
def new_task():
|
||||||
nonlocal doc
|
nonlocal sample_doc_id
|
||||||
return {
|
return {
|
||||||
"id": get_uuid(),
|
"id": get_uuid(),
|
||||||
"doc_id": doc["id"],
|
"doc_id": sample_doc_id["id"],
|
||||||
"from_page": 100000000,
|
"from_page": 100000000,
|
||||||
"to_page": 100000000,
|
"to_page": 100000000,
|
||||||
"task_type": ty,
|
"task_type": ty,
|
||||||
"progress_msg": datetime.now().strftime("%H:%M:%S") + " created task " + ty
|
"progress_msg": datetime.now().strftime("%H:%M:%S") + " created task " + ty,
|
||||||
|
"begin_at": datetime.now(),
|
||||||
}
|
}
|
||||||
|
|
||||||
task = new_task()
|
task = new_task()
|
||||||
@ -752,7 +823,12 @@ def queue_raptor_o_graphrag_tasks(doc, ty, priority):
|
|||||||
hasher.update(ty.encode("utf-8"))
|
hasher.update(ty.encode("utf-8"))
|
||||||
task["digest"] = hasher.hexdigest()
|
task["digest"] = hasher.hexdigest()
|
||||||
bulk_insert_into_db(Task, [task], True)
|
bulk_insert_into_db(Task, [task], True)
|
||||||
|
|
||||||
|
task["doc_id"] = fake_doc_id
|
||||||
|
task["doc_ids"] = doc_ids
|
||||||
|
DocumentService.begin2parse(sample_doc_id["id"])
|
||||||
assert REDIS_CONN.queue_product(get_svr_queue_name(priority), message=task), "Can't access Redis. Please check the Redis' status."
|
assert REDIS_CONN.queue_product(get_svr_queue_name(priority), message=task), "Can't access Redis. Please check the Redis' status."
|
||||||
|
return task["id"]
|
||||||
|
|
||||||
|
|
||||||
def get_queue_length(priority):
|
def get_queue_length(priority):
|
||||||
|
|||||||
@ -38,6 +38,12 @@ class File2DocumentService(CommonService):
|
|||||||
objs = cls.model.select().where(cls.model.document_id == document_id)
|
objs = cls.model.select().where(cls.model.document_id == document_id)
|
||||||
return objs
|
return objs
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def get_by_document_ids(cls, document_ids):
|
||||||
|
objs = cls.model.select().where(cls.model.document_id.in_(document_ids))
|
||||||
|
return list(objs.dicts())
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def insert(cls, obj):
|
def insert(cls, obj):
|
||||||
@ -50,6 +56,15 @@ class File2DocumentService(CommonService):
|
|||||||
def delete_by_file_id(cls, file_id):
|
def delete_by_file_id(cls, file_id):
|
||||||
return cls.model.delete().where(cls.model.file_id == file_id).execute()
|
return cls.model.delete().where(cls.model.file_id == file_id).execute()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def delete_by_document_ids_or_file_ids(cls, document_ids, file_ids):
|
||||||
|
if not document_ids:
|
||||||
|
return cls.model.delete().where(cls.model.file_id.in_(file_ids)).execute()
|
||||||
|
elif not file_ids:
|
||||||
|
return cls.model.delete().where(cls.model.document_id.in_(document_ids)).execute()
|
||||||
|
return cls.model.delete().where(cls.model.document_id.in_(document_ids) | cls.model.file_id.in_(file_ids)).execute()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def delete_by_document_id(cls, doc_id):
|
def delete_by_document_id(cls, doc_id):
|
||||||
|
|||||||
@ -161,6 +161,23 @@ class FileService(CommonService):
|
|||||||
result_ids.append(folder_id)
|
result_ids.append(folder_id)
|
||||||
return result_ids
|
return result_ids
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def get_all_file_ids_by_tenant_id(cls, tenant_id):
|
||||||
|
fields = [cls.model.id]
|
||||||
|
files = cls.model.select(*fields).where(cls.model.tenant_id == tenant_id)
|
||||||
|
files.order_by(cls.model.create_time.asc())
|
||||||
|
offset, limit = 0, 100
|
||||||
|
res = []
|
||||||
|
while True:
|
||||||
|
file_batch = files.offset(offset).limit(limit)
|
||||||
|
_temp = list(file_batch.dicts())
|
||||||
|
if not _temp:
|
||||||
|
break
|
||||||
|
res.extend(_temp)
|
||||||
|
offset += limit
|
||||||
|
return res
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def create_folder(cls, file, parent_id, name, count):
|
def create_folder(cls, file, parent_id, name, count):
|
||||||
@ -440,6 +457,7 @@ class FileService(CommonService):
|
|||||||
"id": doc_id,
|
"id": doc_id,
|
||||||
"kb_id": kb.id,
|
"kb_id": kb.id,
|
||||||
"parser_id": self.get_parser(filetype, filename, kb.parser_id),
|
"parser_id": self.get_parser(filetype, filename, kb.parser_id),
|
||||||
|
"pipeline_id": kb.pipeline_id,
|
||||||
"parser_config": kb.parser_config,
|
"parser_config": kb.parser_config,
|
||||||
"created_by": user_id,
|
"created_by": user_id,
|
||||||
"type": filetype,
|
"type": filetype,
|
||||||
@ -458,6 +476,16 @@ class FileService(CommonService):
|
|||||||
|
|
||||||
return err, files
|
return err, files
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def list_all_files_by_parent_id(cls, parent_id):
|
||||||
|
try:
|
||||||
|
files = cls.model.select().where((cls.model.parent_id == parent_id) & (cls.model.id != parent_id))
|
||||||
|
return list(files)
|
||||||
|
except Exception:
|
||||||
|
logging.exception("list_by_parent_id failed")
|
||||||
|
raise RuntimeError("Database error (list_by_parent_id)!")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse_docs(file_objs, user_id):
|
def parse_docs(file_objs, user_id):
|
||||||
exe = ThreadPoolExecutor(max_workers=12)
|
exe = ThreadPoolExecutor(max_workers=12)
|
||||||
@ -495,7 +523,7 @@ class FileService(CommonService):
|
|||||||
return ParserType.AUDIO.value
|
return ParserType.AUDIO.value
|
||||||
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
||||||
return ParserType.PRESENTATION.value
|
return ParserType.PRESENTATION.value
|
||||||
if re.search(r"\.(eml)$", filename):
|
if re.search(r"\.(msg|eml)$", filename):
|
||||||
return ParserType.EMAIL.value
|
return ParserType.EMAIL.value
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|||||||
@ -15,10 +15,10 @@
|
|||||||
#
|
#
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from peewee import fn
|
from peewee import fn, JOIN
|
||||||
|
|
||||||
from api.db import StatusEnum, TenantPermission
|
from api.db import StatusEnum, TenantPermission
|
||||||
from api.db.db_models import DB, Document, Knowledgebase, Tenant, User, UserTenant
|
from api.db.db_models import DB, Document, Knowledgebase, User, UserTenant, UserCanvas
|
||||||
from api.db.services.common_service import CommonService
|
from api.db.services.common_service import CommonService
|
||||||
from api.utils import current_timestamp, datetime_format
|
from api.utils import current_timestamp, datetime_format
|
||||||
|
|
||||||
@ -260,20 +260,29 @@ class KnowledgebaseService(CommonService):
|
|||||||
cls.model.token_num,
|
cls.model.token_num,
|
||||||
cls.model.chunk_num,
|
cls.model.chunk_num,
|
||||||
cls.model.parser_id,
|
cls.model.parser_id,
|
||||||
|
cls.model.pipeline_id,
|
||||||
|
UserCanvas.title.alias("pipeline_name"),
|
||||||
|
UserCanvas.avatar.alias("pipeline_avatar"),
|
||||||
cls.model.parser_config,
|
cls.model.parser_config,
|
||||||
cls.model.pagerank,
|
cls.model.pagerank,
|
||||||
|
cls.model.graphrag_task_id,
|
||||||
|
cls.model.graphrag_task_finish_at,
|
||||||
|
cls.model.raptor_task_id,
|
||||||
|
cls.model.raptor_task_finish_at,
|
||||||
|
cls.model.mindmap_task_id,
|
||||||
|
cls.model.mindmap_task_finish_at,
|
||||||
cls.model.create_time,
|
cls.model.create_time,
|
||||||
cls.model.update_time
|
cls.model.update_time
|
||||||
]
|
]
|
||||||
kbs = cls.model.select(*fields).join(Tenant, on=(
|
kbs = cls.model.select(*fields)\
|
||||||
(Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where(
|
.join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
|
||||||
|
.where(
|
||||||
(cls.model.id == kb_id),
|
(cls.model.id == kb_id),
|
||||||
(cls.model.status == StatusEnum.VALID.value)
|
(cls.model.status == StatusEnum.VALID.value)
|
||||||
)
|
).dicts()
|
||||||
if not kbs:
|
if not kbs:
|
||||||
return
|
return
|
||||||
d = kbs[0].to_dict()
|
return kbs[0]
|
||||||
return d
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
@ -370,6 +379,7 @@ class KnowledgebaseService(CommonService):
|
|||||||
# name: Optional name filter
|
# name: Optional name filter
|
||||||
# Returns:
|
# Returns:
|
||||||
# List of knowledge bases
|
# List of knowledge bases
|
||||||
|
# Total count of knowledge bases
|
||||||
kbs = cls.model.select()
|
kbs = cls.model.select()
|
||||||
if id:
|
if id:
|
||||||
kbs = kbs.where(cls.model.id == id)
|
kbs = kbs.where(cls.model.id == id)
|
||||||
@ -381,14 +391,16 @@ class KnowledgebaseService(CommonService):
|
|||||||
cls.model.tenant_id == user_id))
|
cls.model.tenant_id == user_id))
|
||||||
& (cls.model.status == StatusEnum.VALID.value)
|
& (cls.model.status == StatusEnum.VALID.value)
|
||||||
)
|
)
|
||||||
|
|
||||||
if desc:
|
if desc:
|
||||||
kbs = kbs.order_by(cls.model.getter_by(orderby).desc())
|
kbs = kbs.order_by(cls.model.getter_by(orderby).desc())
|
||||||
else:
|
else:
|
||||||
kbs = kbs.order_by(cls.model.getter_by(orderby).asc())
|
kbs = kbs.order_by(cls.model.getter_by(orderby).asc())
|
||||||
|
|
||||||
|
total = kbs.count()
|
||||||
kbs = kbs.paginate(page_number, items_per_page)
|
kbs = kbs.paginate(page_number, items_per_page)
|
||||||
|
|
||||||
return list(kbs.dicts())
|
return list(kbs.dicts()), total
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
@ -471,3 +483,17 @@ class KnowledgebaseService(CommonService):
|
|||||||
else:
|
else:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def decrease_document_num_in_delete(cls, kb_id, doc_num_info: dict):
|
||||||
|
kb_row = cls.model.get_by_id(kb_id)
|
||||||
|
if not kb_row:
|
||||||
|
raise RuntimeError(f"kb_id {kb_id} does not exist")
|
||||||
|
update_dict = {
|
||||||
|
'doc_num': kb_row.doc_num - doc_num_info['doc_num'],
|
||||||
|
'chunk_num': kb_row.chunk_num - doc_num_info['chunk_num'],
|
||||||
|
'token_num': kb_row.token_num - doc_num_info['token_num'],
|
||||||
|
'update_time': current_timestamp(),
|
||||||
|
'update_date': datetime_format(datetime.now())
|
||||||
|
}
|
||||||
|
return cls.model.update(update_dict).where(cls.model.id == kb_id).execute()
|
||||||
|
|||||||
@ -51,6 +51,11 @@ class TenantLangfuseService(CommonService):
|
|||||||
except peewee.DoesNotExist:
|
except peewee.DoesNotExist:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def delete_ty_tenant_id(cls, tenant_id):
|
||||||
|
return cls.model.delete().where(cls.model.tenant_id == tenant_id).execute()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def update_by_tenant(cls, tenant_id, langfuse_keys):
|
def update_by_tenant(cls, tenant_id, langfuse_keys):
|
||||||
langfuse_keys["update_time"] = current_timestamp()
|
langfuse_keys["update_time"] = current_timestamp()
|
||||||
|
|||||||
@ -205,32 +205,31 @@ class LLMBundle(LLM4Tenant):
|
|||||||
return txt
|
return txt
|
||||||
|
|
||||||
return txt[last_think_end + len("</think>") :]
|
return txt[last_think_end + len("</think>") :]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _clean_param(chat_partial, **kwargs):
|
def _clean_param(chat_partial, **kwargs):
|
||||||
func = chat_partial.func
|
func = chat_partial.func
|
||||||
sig = inspect.signature(func)
|
sig = inspect.signature(func)
|
||||||
keyword_args = []
|
|
||||||
support_var_args = False
|
support_var_args = False
|
||||||
for param in sig.parameters.values():
|
allowed_params = set()
|
||||||
if param.kind == inspect.Parameter.VAR_KEYWORD or param.kind == inspect.Parameter.VAR_POSITIONAL:
|
|
||||||
support_var_args = True
|
|
||||||
elif param.kind == inspect.Parameter.KEYWORD_ONLY:
|
|
||||||
keyword_args.append(param.name)
|
|
||||||
|
|
||||||
use_kwargs = kwargs
|
for param in sig.parameters.values():
|
||||||
if not support_var_args:
|
if param.kind == inspect.Parameter.VAR_KEYWORD:
|
||||||
use_kwargs = {k: v for k, v in kwargs.items() if k in keyword_args}
|
support_var_args = True
|
||||||
return use_kwargs
|
elif param.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.KEYWORD_ONLY):
|
||||||
|
allowed_params.add(param.name)
|
||||||
|
if support_var_args:
|
||||||
|
return kwargs
|
||||||
|
else:
|
||||||
|
return {k: v for k, v in kwargs.items() if k in allowed_params}
|
||||||
def chat(self, system: str, history: list, gen_conf: dict = {}, **kwargs) -> str:
|
def chat(self, system: str, history: list, gen_conf: dict = {}, **kwargs) -> str:
|
||||||
if self.langfuse:
|
if self.langfuse:
|
||||||
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="chat", model=self.llm_name, input={"system": system, "history": history})
|
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="chat", model=self.llm_name, input={"system": system, "history": history})
|
||||||
|
|
||||||
chat_partial = partial(self.mdl.chat, system, history, gen_conf)
|
chat_partial = partial(self.mdl.chat, system, history, gen_conf, **kwargs)
|
||||||
if self.is_tools and self.mdl.is_tools:
|
if self.is_tools and self.mdl.is_tools:
|
||||||
chat_partial = partial(self.mdl.chat_with_tools, system, history, gen_conf)
|
chat_partial = partial(self.mdl.chat_with_tools, system, history, gen_conf, **kwargs)
|
||||||
|
|
||||||
use_kwargs = self._clean_param(chat_partial, **kwargs)
|
use_kwargs = self._clean_param(chat_partial, **kwargs)
|
||||||
txt, used_tokens = chat_partial(**use_kwargs)
|
txt, used_tokens = chat_partial(**use_kwargs)
|
||||||
txt = self._remove_reasoning_content(txt)
|
txt = self._remove_reasoning_content(txt)
|
||||||
@ -266,7 +265,7 @@ class LLMBundle(LLM4Tenant):
|
|||||||
break
|
break
|
||||||
|
|
||||||
if txt.endswith("</think>"):
|
if txt.endswith("</think>"):
|
||||||
ans = ans.rstrip("</think>")
|
ans = ans[: -len("</think>")]
|
||||||
|
|
||||||
if not self.verbose_tool_use:
|
if not self.verbose_tool_use:
|
||||||
txt = re.sub(r"<tool_call>.*?</tool_call>", "", txt, flags=re.DOTALL)
|
txt = re.sub(r"<tool_call>.*?</tool_call>", "", txt, flags=re.DOTALL)
|
||||||
|
|||||||
@ -33,7 +33,8 @@ class MCPServerService(CommonService):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_servers(cls, tenant_id: str, id_list: list[str] | None, page_number, items_per_page, orderby, desc, keywords):
|
def get_servers(cls, tenant_id: str, id_list: list[str] | None, page_number, items_per_page, orderby, desc,
|
||||||
|
keywords):
|
||||||
"""Retrieve all MCP servers associated with a tenant.
|
"""Retrieve all MCP servers associated with a tenant.
|
||||||
|
|
||||||
This method fetches all MCP servers for a given tenant, ordered by creation time.
|
This method fetches all MCP servers for a given tenant, ordered by creation time.
|
||||||
@ -84,3 +85,8 @@ class MCPServerService(CommonService):
|
|||||||
return bool(mcp_server), mcp_server
|
return bool(mcp_server), mcp_server
|
||||||
except Exception:
|
except Exception:
|
||||||
return False, None
|
return False, None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def delete_by_tenant_id(cls, tenant_id: str):
|
||||||
|
return cls.model.delete().where(cls.model.tenant_id == tenant_id).execute()
|
||||||
|
|||||||
263
api/db/services/pipeline_operation_log_service.py
Normal file
263
api/db/services/pipeline_operation_log_service.py
Normal file
@ -0,0 +1,263 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
from peewee import fn
|
||||||
|
|
||||||
|
from api.db import VALID_PIPELINE_TASK_TYPES, PipelineTaskType
|
||||||
|
from api.db.db_models import DB, Document, PipelineOperationLog
|
||||||
|
from api.db.services.canvas_service import UserCanvasService
|
||||||
|
from api.db.services.common_service import CommonService
|
||||||
|
from api.db.services.document_service import DocumentService
|
||||||
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||||
|
from api.db.services.task_service import GRAPH_RAPTOR_FAKE_DOC_ID
|
||||||
|
from api.utils import current_timestamp, datetime_format, get_uuid
|
||||||
|
|
||||||
|
|
||||||
|
class PipelineOperationLogService(CommonService):
|
||||||
|
model = PipelineOperationLog
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_file_logs_fields(cls):
|
||||||
|
return [
|
||||||
|
cls.model.id,
|
||||||
|
cls.model.document_id,
|
||||||
|
cls.model.tenant_id,
|
||||||
|
cls.model.kb_id,
|
||||||
|
cls.model.pipeline_id,
|
||||||
|
cls.model.pipeline_title,
|
||||||
|
cls.model.parser_id,
|
||||||
|
cls.model.document_name,
|
||||||
|
cls.model.document_suffix,
|
||||||
|
cls.model.document_type,
|
||||||
|
cls.model.source_from,
|
||||||
|
cls.model.progress,
|
||||||
|
cls.model.progress_msg,
|
||||||
|
cls.model.process_begin_at,
|
||||||
|
cls.model.process_duration,
|
||||||
|
cls.model.dsl,
|
||||||
|
cls.model.task_type,
|
||||||
|
cls.model.operation_status,
|
||||||
|
cls.model.avatar,
|
||||||
|
cls.model.status,
|
||||||
|
cls.model.create_time,
|
||||||
|
cls.model.create_date,
|
||||||
|
cls.model.update_time,
|
||||||
|
cls.model.update_date,
|
||||||
|
]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_dataset_logs_fields(cls):
|
||||||
|
return [
|
||||||
|
cls.model.id,
|
||||||
|
cls.model.tenant_id,
|
||||||
|
cls.model.kb_id,
|
||||||
|
cls.model.progress,
|
||||||
|
cls.model.progress_msg,
|
||||||
|
cls.model.process_begin_at,
|
||||||
|
cls.model.process_duration,
|
||||||
|
cls.model.task_type,
|
||||||
|
cls.model.operation_status,
|
||||||
|
cls.model.avatar,
|
||||||
|
cls.model.status,
|
||||||
|
cls.model.create_time,
|
||||||
|
cls.model.create_date,
|
||||||
|
cls.model.update_time,
|
||||||
|
cls.model.update_date,
|
||||||
|
]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def save(cls, **kwargs):
|
||||||
|
"""
|
||||||
|
wrap this function in a transaction
|
||||||
|
"""
|
||||||
|
sample_obj = cls.model(**kwargs).save(force_insert=True)
|
||||||
|
return sample_obj
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def create(cls, document_id, pipeline_id, task_type, fake_document_ids=[], dsl: str = "{}"):
|
||||||
|
referred_document_id = document_id
|
||||||
|
|
||||||
|
if referred_document_id == GRAPH_RAPTOR_FAKE_DOC_ID and fake_document_ids:
|
||||||
|
referred_document_id = fake_document_ids[0]
|
||||||
|
ok, document = DocumentService.get_by_id(referred_document_id)
|
||||||
|
if not ok:
|
||||||
|
logging.warning(f"Document for referred_document_id {referred_document_id} not found")
|
||||||
|
return
|
||||||
|
DocumentService.update_progress_immediately([document.to_dict()])
|
||||||
|
ok, document = DocumentService.get_by_id(referred_document_id)
|
||||||
|
if not ok:
|
||||||
|
logging.warning(f"Document for referred_document_id {referred_document_id} not found")
|
||||||
|
return
|
||||||
|
if document.progress not in [1, -1]:
|
||||||
|
return
|
||||||
|
operation_status = document.run
|
||||||
|
|
||||||
|
if pipeline_id:
|
||||||
|
ok, user_pipeline = UserCanvasService.get_by_id(pipeline_id)
|
||||||
|
if not ok:
|
||||||
|
raise RuntimeError(f"Pipeline {pipeline_id} not found")
|
||||||
|
tenant_id = user_pipeline.user_id
|
||||||
|
title = user_pipeline.title
|
||||||
|
avatar = user_pipeline.avatar
|
||||||
|
else:
|
||||||
|
ok, kb_info = KnowledgebaseService.get_by_id(document.kb_id)
|
||||||
|
if not ok:
|
||||||
|
raise RuntimeError(f"Cannot find knowledge base {document.kb_id} for referred_document {referred_document_id}")
|
||||||
|
|
||||||
|
tenant_id = kb_info.tenant_id
|
||||||
|
title = document.parser_id
|
||||||
|
avatar = document.thumbnail
|
||||||
|
|
||||||
|
if task_type not in VALID_PIPELINE_TASK_TYPES:
|
||||||
|
raise ValueError(f"Invalid task type: {task_type}")
|
||||||
|
|
||||||
|
if task_type in [PipelineTaskType.GRAPH_RAG, PipelineTaskType.RAPTOR, PipelineTaskType.MINDMAP]:
|
||||||
|
finish_at = document.process_begin_at + timedelta(seconds=document.process_duration)
|
||||||
|
if task_type == PipelineTaskType.GRAPH_RAG:
|
||||||
|
KnowledgebaseService.update_by_id(
|
||||||
|
document.kb_id,
|
||||||
|
{"graphrag_task_finish_at": finish_at},
|
||||||
|
)
|
||||||
|
elif task_type == PipelineTaskType.RAPTOR:
|
||||||
|
KnowledgebaseService.update_by_id(
|
||||||
|
document.kb_id,
|
||||||
|
{"raptor_task_finish_at": finish_at},
|
||||||
|
)
|
||||||
|
elif task_type == PipelineTaskType.MINDMAP:
|
||||||
|
KnowledgebaseService.update_by_id(
|
||||||
|
document.kb_id,
|
||||||
|
{"mindmap_task_finish_at": finish_at},
|
||||||
|
)
|
||||||
|
|
||||||
|
log = dict(
|
||||||
|
id=get_uuid(),
|
||||||
|
document_id=document_id, # GRAPH_RAPTOR_FAKE_DOC_ID or real document_id
|
||||||
|
tenant_id=tenant_id,
|
||||||
|
kb_id=document.kb_id,
|
||||||
|
pipeline_id=pipeline_id,
|
||||||
|
pipeline_title=title,
|
||||||
|
parser_id=document.parser_id,
|
||||||
|
document_name=document.name,
|
||||||
|
document_suffix=document.suffix,
|
||||||
|
document_type=document.type,
|
||||||
|
source_from="", # TODO: add in the future
|
||||||
|
progress=document.progress,
|
||||||
|
progress_msg=document.progress_msg,
|
||||||
|
process_begin_at=document.process_begin_at,
|
||||||
|
process_duration=document.process_duration,
|
||||||
|
dsl=json.loads(dsl),
|
||||||
|
task_type=task_type,
|
||||||
|
operation_status=operation_status,
|
||||||
|
avatar=avatar,
|
||||||
|
)
|
||||||
|
log["create_time"] = current_timestamp()
|
||||||
|
log["create_date"] = datetime_format(datetime.now())
|
||||||
|
log["update_time"] = current_timestamp()
|
||||||
|
log["update_date"] = datetime_format(datetime.now())
|
||||||
|
|
||||||
|
with DB.atomic():
|
||||||
|
obj = cls.save(**log)
|
||||||
|
|
||||||
|
limit = int(os.getenv("PIPELINE_OPERATION_LOG_LIMIT", 1000))
|
||||||
|
total = cls.model.select().where(cls.model.kb_id == document.kb_id).count()
|
||||||
|
|
||||||
|
if total > limit:
|
||||||
|
keep_ids = [m.id for m in cls.model.select(cls.model.id).where(cls.model.kb_id == document.kb_id).order_by(cls.model.create_time.desc()).limit(limit)]
|
||||||
|
|
||||||
|
deleted = cls.model.delete().where(cls.model.kb_id == document.kb_id, cls.model.id.not_in(keep_ids)).execute()
|
||||||
|
logging.info(f"[PipelineOperationLogService] Cleaned {deleted} old logs, kept latest {limit} for {document.kb_id}")
|
||||||
|
|
||||||
|
return obj
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def record_pipeline_operation(cls, document_id, pipeline_id, task_type, fake_document_ids=[]):
|
||||||
|
return cls.create(document_id=document_id, pipeline_id=pipeline_id, task_type=task_type, fake_document_ids=fake_document_ids)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def get_file_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, keywords, operation_status, types, suffix, create_date_from=None, create_date_to=None):
|
||||||
|
fields = cls.get_file_logs_fields()
|
||||||
|
if keywords:
|
||||||
|
logs = cls.model.select(*fields).where((cls.model.kb_id == kb_id), (fn.LOWER(cls.model.document_name).contains(keywords.lower())))
|
||||||
|
else:
|
||||||
|
logs = cls.model.select(*fields).where(cls.model.kb_id == kb_id)
|
||||||
|
|
||||||
|
logs = logs.where(cls.model.document_id != GRAPH_RAPTOR_FAKE_DOC_ID)
|
||||||
|
|
||||||
|
if operation_status:
|
||||||
|
logs = logs.where(cls.model.operation_status.in_(operation_status))
|
||||||
|
if types:
|
||||||
|
logs = logs.where(cls.model.document_type.in_(types))
|
||||||
|
if suffix:
|
||||||
|
logs = logs.where(cls.model.document_suffix.in_(suffix))
|
||||||
|
if create_date_from:
|
||||||
|
logs = logs.where(cls.model.create_date >= create_date_from)
|
||||||
|
if create_date_to:
|
||||||
|
logs = logs.where(cls.model.create_date <= create_date_to)
|
||||||
|
|
||||||
|
count = logs.count()
|
||||||
|
if desc:
|
||||||
|
logs = logs.order_by(cls.model.getter_by(orderby).desc())
|
||||||
|
else:
|
||||||
|
logs = logs.order_by(cls.model.getter_by(orderby).asc())
|
||||||
|
|
||||||
|
if page_number and items_per_page:
|
||||||
|
logs = logs.paginate(page_number, items_per_page)
|
||||||
|
|
||||||
|
return list(logs.dicts()), count
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def get_documents_info(cls, id):
|
||||||
|
fields = [Document.id, Document.name, Document.progress, Document.kb_id]
|
||||||
|
return (
|
||||||
|
cls.model.select(*fields)
|
||||||
|
.join(Document, on=(cls.model.document_id == Document.id))
|
||||||
|
.where(
|
||||||
|
cls.model.id == id
|
||||||
|
)
|
||||||
|
.dicts()
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def get_dataset_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, operation_status, create_date_from=None, create_date_to=None):
|
||||||
|
fields = cls.get_dataset_logs_fields()
|
||||||
|
logs = cls.model.select(*fields).where((cls.model.kb_id == kb_id), (cls.model.document_id == GRAPH_RAPTOR_FAKE_DOC_ID))
|
||||||
|
|
||||||
|
if operation_status:
|
||||||
|
logs = logs.where(cls.model.operation_status.in_(operation_status))
|
||||||
|
if create_date_from:
|
||||||
|
logs = logs.where(cls.model.create_date >= create_date_from)
|
||||||
|
if create_date_to:
|
||||||
|
logs = logs.where(cls.model.create_date <= create_date_to)
|
||||||
|
|
||||||
|
count = logs.count()
|
||||||
|
if desc:
|
||||||
|
logs = logs.order_by(cls.model.getter_by(orderby).desc())
|
||||||
|
else:
|
||||||
|
logs = logs.order_by(cls.model.getter_by(orderby).asc())
|
||||||
|
|
||||||
|
if page_number and items_per_page:
|
||||||
|
logs = logs.paginate(page_number, items_per_page)
|
||||||
|
|
||||||
|
return list(logs.dicts()), count
|
||||||
@ -94,7 +94,8 @@ class SearchService(CommonService):
|
|||||||
query = (
|
query = (
|
||||||
cls.model.select(*fields)
|
cls.model.select(*fields)
|
||||||
.join(User, on=(cls.model.tenant_id == User.id))
|
.join(User, on=(cls.model.tenant_id == User.id))
|
||||||
.where(((cls.model.tenant_id.in_(joined_tenant_ids)) | (cls.model.tenant_id == user_id)) & (cls.model.status == StatusEnum.VALID.value))
|
.where(((cls.model.tenant_id.in_(joined_tenant_ids)) | (cls.model.tenant_id == user_id)) & (
|
||||||
|
cls.model.status == StatusEnum.VALID.value))
|
||||||
)
|
)
|
||||||
|
|
||||||
if keywords:
|
if keywords:
|
||||||
@ -110,3 +111,8 @@ class SearchService(CommonService):
|
|||||||
query = query.paginate(page_number, items_per_page)
|
query = query.paginate(page_number, items_per_page)
|
||||||
|
|
||||||
return list(query.dicts()), count
|
return list(query.dicts()), count
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def delete_by_tenant_id(cls, tenant_id):
|
||||||
|
return cls.model.delete().where(cls.model.tenant_id == tenant_id).execute()
|
||||||
|
|||||||
@ -35,6 +35,8 @@ from rag.utils.redis_conn import REDIS_CONN
|
|||||||
from api import settings
|
from api import settings
|
||||||
from rag.nlp import search
|
from rag.nlp import search
|
||||||
|
|
||||||
|
CANVAS_DEBUG_DOC_ID = "dataflow_x"
|
||||||
|
GRAPH_RAPTOR_FAKE_DOC_ID = "graph_raptor_x"
|
||||||
|
|
||||||
def trim_header_by_lines(text: str, max_length) -> str:
|
def trim_header_by_lines(text: str, max_length) -> str:
|
||||||
# Trim header text to maximum length while preserving line breaks
|
# Trim header text to maximum length while preserving line breaks
|
||||||
@ -70,7 +72,7 @@ class TaskService(CommonService):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_task(cls, task_id):
|
def get_task(cls, task_id, doc_ids=[]):
|
||||||
"""Retrieve detailed task information by task ID.
|
"""Retrieve detailed task information by task ID.
|
||||||
|
|
||||||
This method fetches comprehensive task details including associated document,
|
This method fetches comprehensive task details including associated document,
|
||||||
@ -84,6 +86,10 @@ class TaskService(CommonService):
|
|||||||
dict: Task details dictionary containing all task information and related metadata.
|
dict: Task details dictionary containing all task information and related metadata.
|
||||||
Returns None if task is not found or has exceeded retry limit.
|
Returns None if task is not found or has exceeded retry limit.
|
||||||
"""
|
"""
|
||||||
|
doc_id = cls.model.doc_id
|
||||||
|
if doc_id == CANVAS_DEBUG_DOC_ID and doc_ids:
|
||||||
|
doc_id = doc_ids[0]
|
||||||
|
|
||||||
fields = [
|
fields = [
|
||||||
cls.model.id,
|
cls.model.id,
|
||||||
cls.model.doc_id,
|
cls.model.doc_id,
|
||||||
@ -109,7 +115,7 @@ class TaskService(CommonService):
|
|||||||
]
|
]
|
||||||
docs = (
|
docs = (
|
||||||
cls.model.select(*fields)
|
cls.model.select(*fields)
|
||||||
.join(Document, on=(cls.model.doc_id == Document.id))
|
.join(Document, on=(doc_id == Document.id))
|
||||||
.join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id))
|
.join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id))
|
||||||
.join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))
|
.join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))
|
||||||
.where(cls.model.id == task_id)
|
.where(cls.model.id == task_id)
|
||||||
@ -159,7 +165,7 @@ class TaskService(CommonService):
|
|||||||
]
|
]
|
||||||
tasks = (
|
tasks = (
|
||||||
cls.model.select(*fields).order_by(cls.model.from_page.asc(), cls.model.create_time.desc())
|
cls.model.select(*fields).order_by(cls.model.from_page.asc(), cls.model.create_time.desc())
|
||||||
.where(cls.model.doc_id == doc_id)
|
.where(cls.model.doc_id == doc_id)
|
||||||
)
|
)
|
||||||
tasks = list(tasks.dicts())
|
tasks = list(tasks.dicts())
|
||||||
if not tasks:
|
if not tasks:
|
||||||
@ -199,18 +205,18 @@ class TaskService(CommonService):
|
|||||||
cls.model.select(
|
cls.model.select(
|
||||||
*[Document.id, Document.kb_id, Document.location, File.parent_id]
|
*[Document.id, Document.kb_id, Document.location, File.parent_id]
|
||||||
)
|
)
|
||||||
.join(Document, on=(cls.model.doc_id == Document.id))
|
.join(Document, on=(cls.model.doc_id == Document.id))
|
||||||
.join(
|
.join(
|
||||||
File2Document,
|
File2Document,
|
||||||
on=(File2Document.document_id == Document.id),
|
on=(File2Document.document_id == Document.id),
|
||||||
join_type=JOIN.LEFT_OUTER,
|
join_type=JOIN.LEFT_OUTER,
|
||||||
)
|
)
|
||||||
.join(
|
.join(
|
||||||
File,
|
File,
|
||||||
on=(File2Document.file_id == File.id),
|
on=(File2Document.file_id == File.id),
|
||||||
join_type=JOIN.LEFT_OUTER,
|
join_type=JOIN.LEFT_OUTER,
|
||||||
)
|
)
|
||||||
.where(
|
.where(
|
||||||
Document.status == StatusEnum.VALID.value,
|
Document.status == StatusEnum.VALID.value,
|
||||||
Document.run == TaskStatus.RUNNING.value,
|
Document.run == TaskStatus.RUNNING.value,
|
||||||
~(Document.type == FileType.VIRTUAL.value),
|
~(Document.type == FileType.VIRTUAL.value),
|
||||||
@ -288,25 +294,33 @@ class TaskService(CommonService):
|
|||||||
cls.model.update(progress=prog).where(
|
cls.model.update(progress=prog).where(
|
||||||
(cls.model.id == id) &
|
(cls.model.id == id) &
|
||||||
(
|
(
|
||||||
(cls.model.progress != -1) &
|
(cls.model.progress != -1) &
|
||||||
((prog == -1) | (prog > cls.model.progress))
|
((prog == -1) | (prog > cls.model.progress))
|
||||||
)
|
)
|
||||||
).execute()
|
).execute()
|
||||||
return
|
else:
|
||||||
|
with DB.lock("update_progress", -1):
|
||||||
|
if info["progress_msg"]:
|
||||||
|
progress_msg = trim_header_by_lines(task.progress_msg + "\n" + info["progress_msg"], 3000)
|
||||||
|
cls.model.update(progress_msg=progress_msg).where(cls.model.id == id).execute()
|
||||||
|
if "progress" in info:
|
||||||
|
prog = info["progress"]
|
||||||
|
cls.model.update(progress=prog).where(
|
||||||
|
(cls.model.id == id) &
|
||||||
|
(
|
||||||
|
(cls.model.progress != -1) &
|
||||||
|
((prog == -1) | (prog > cls.model.progress))
|
||||||
|
)
|
||||||
|
).execute()
|
||||||
|
|
||||||
with DB.lock("update_progress", -1):
|
process_duration = (datetime.now() - task.begin_at).total_seconds()
|
||||||
if info["progress_msg"]:
|
cls.model.update(process_duration=process_duration).where(cls.model.id == id).execute()
|
||||||
progress_msg = trim_header_by_lines(task.progress_msg + "\n" + info["progress_msg"], 3000)
|
|
||||||
cls.model.update(progress_msg=progress_msg).where(cls.model.id == id).execute()
|
@classmethod
|
||||||
if "progress" in info:
|
@DB.connection_context()
|
||||||
prog = info["progress"]
|
def delete_by_doc_ids(cls, doc_ids):
|
||||||
cls.model.update(progress=prog).where(
|
"""Delete task associated with a document."""
|
||||||
(cls.model.id == id) &
|
return cls.model.delete().where(cls.model.doc_id.in_(doc_ids)).execute()
|
||||||
(
|
|
||||||
(cls.model.progress != -1) &
|
|
||||||
((prog == -1) | (prog > cls.model.progress))
|
|
||||||
)
|
|
||||||
).execute()
|
|
||||||
|
|
||||||
|
|
||||||
def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
|
def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
|
||||||
@ -329,8 +343,16 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
|
|||||||
- Task digests are calculated for optimization and reuse
|
- Task digests are calculated for optimization and reuse
|
||||||
- Previous task chunks may be reused if available
|
- Previous task chunks may be reused if available
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def new_task():
|
def new_task():
|
||||||
return {"id": get_uuid(), "doc_id": doc["id"], "progress": 0.0, "from_page": 0, "to_page": 100000000}
|
return {
|
||||||
|
"id": get_uuid(),
|
||||||
|
"doc_id": doc["id"],
|
||||||
|
"progress": 0.0,
|
||||||
|
"from_page": 0,
|
||||||
|
"to_page": 100000000,
|
||||||
|
"begin_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
|
}
|
||||||
|
|
||||||
parse_task_array = []
|
parse_task_array = []
|
||||||
|
|
||||||
@ -343,7 +365,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
|
|||||||
page_size = doc["parser_config"].get("task_page_size") or 12
|
page_size = doc["parser_config"].get("task_page_size") or 12
|
||||||
if doc["parser_id"] == "paper":
|
if doc["parser_id"] == "paper":
|
||||||
page_size = doc["parser_config"].get("task_page_size") or 22
|
page_size = doc["parser_config"].get("task_page_size") or 22
|
||||||
if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC":
|
if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC" or doc["parser_config"].get("toc", True):
|
||||||
page_size = 10 ** 9
|
page_size = 10 ** 9
|
||||||
page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
|
page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
|
||||||
for s, e in page_ranges:
|
for s, e in page_ranges:
|
||||||
@ -472,36 +494,29 @@ def has_canceled(task_id):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def queue_dataflow(dsl:str, tenant_id:str, doc_id:str, task_id:str, flow_id:str, priority: int, callback=None) -> tuple[bool, str]:
|
def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DEBUG_DOC_ID, file:dict=None, priority: int=0, rerun:bool=False) -> tuple[bool, str]:
|
||||||
"""
|
|
||||||
Returns a tuple (success: bool, error_message: str).
|
|
||||||
"""
|
|
||||||
_ = callback
|
|
||||||
|
|
||||||
task = dict(
|
task = dict(
|
||||||
id=get_uuid() if not task_id else task_id,
|
id=task_id,
|
||||||
doc_id=doc_id,
|
doc_id=doc_id,
|
||||||
from_page=0,
|
from_page=0,
|
||||||
to_page=100000000,
|
to_page=100000000,
|
||||||
task_type="dataflow",
|
task_type="dataflow" if not rerun else "dataflow_rerun",
|
||||||
priority=priority,
|
priority=priority,
|
||||||
|
begin_at= datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
)
|
)
|
||||||
|
if doc_id not in [CANVAS_DEBUG_DOC_ID, GRAPH_RAPTOR_FAKE_DOC_ID]:
|
||||||
TaskService.model.delete().where(TaskService.model.id == task["id"]).execute()
|
TaskService.model.delete().where(TaskService.model.doc_id == doc_id).execute()
|
||||||
|
DocumentService.begin2parse(doc_id)
|
||||||
bulk_insert_into_db(model=Task, data_source=[task], replace_on_conflict=True)
|
bulk_insert_into_db(model=Task, data_source=[task], replace_on_conflict=True)
|
||||||
|
|
||||||
kb_id = DocumentService.get_knowledgebase_id(doc_id)
|
task["kb_id"] = DocumentService.get_knowledgebase_id(doc_id)
|
||||||
if not kb_id:
|
|
||||||
return False, f"Can't find KB of this document: {doc_id}"
|
|
||||||
|
|
||||||
task["kb_id"] = kb_id
|
|
||||||
task["tenant_id"] = tenant_id
|
task["tenant_id"] = tenant_id
|
||||||
task["task_type"] = "dataflow"
|
task["dataflow_id"] = flow_id
|
||||||
task["dsl"] = dsl
|
task["file"] = file
|
||||||
task["dataflow_id"] = get_uuid() if not flow_id else flow_id
|
|
||||||
|
|
||||||
if not REDIS_CONN.queue_product(
|
if not REDIS_CONN.queue_product(
|
||||||
get_svr_queue_name(priority), message=task
|
get_svr_queue_name(priority), message=task
|
||||||
):
|
):
|
||||||
return False, "Can't access Redis. Please check the Redis' status."
|
return False, "Can't access Redis. Please check the Redis' status."
|
||||||
|
|
||||||
|
|||||||
@ -57,8 +57,10 @@ class TenantLLMService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_my_llms(cls, tenant_id):
|
def get_my_llms(cls, tenant_id):
|
||||||
fields = [cls.model.llm_factory, LLMFactories.logo, LLMFactories.tags, cls.model.model_type, cls.model.llm_name, cls.model.used_tokens]
|
fields = [cls.model.llm_factory, LLMFactories.logo, LLMFactories.tags, cls.model.model_type, cls.model.llm_name,
|
||||||
objs = cls.model.select(*fields).join(LLMFactories, on=(cls.model.llm_factory == LLMFactories.name)).where(cls.model.tenant_id == tenant_id, ~cls.model.api_key.is_null()).dicts()
|
cls.model.used_tokens]
|
||||||
|
objs = cls.model.select(*fields).join(LLMFactories, on=(cls.model.llm_factory == LLMFactories.name)).where(
|
||||||
|
cls.model.tenant_id == tenant_id, ~cls.model.api_key.is_null()).dicts()
|
||||||
|
|
||||||
return list(objs)
|
return list(objs)
|
||||||
|
|
||||||
@ -122,7 +124,8 @@ class TenantLLMService(CommonService):
|
|||||||
model_config = {"llm_factory": llm[0].fid, "api_key": "", "llm_name": mdlnm, "api_base": ""}
|
model_config = {"llm_factory": llm[0].fid, "api_key": "", "llm_name": mdlnm, "api_base": ""}
|
||||||
if not model_config:
|
if not model_config:
|
||||||
if mdlnm == "flag-embedding":
|
if mdlnm == "flag-embedding":
|
||||||
model_config = {"llm_factory": "Tongyi-Qianwen", "api_key": "", "llm_name": llm_name, "api_base": ""}
|
model_config = {"llm_factory": "Tongyi-Qianwen", "api_key": "", "llm_name": llm_name,
|
||||||
|
"api_base": ""}
|
||||||
else:
|
else:
|
||||||
if not mdlnm:
|
if not mdlnm:
|
||||||
raise LookupError(f"Type of {llm_type} model is not set.")
|
raise LookupError(f"Type of {llm_type} model is not set.")
|
||||||
@ -137,27 +140,33 @@ class TenantLLMService(CommonService):
|
|||||||
if llm_type == LLMType.EMBEDDING.value:
|
if llm_type == LLMType.EMBEDDING.value:
|
||||||
if model_config["llm_factory"] not in EmbeddingModel:
|
if model_config["llm_factory"] not in EmbeddingModel:
|
||||||
return
|
return
|
||||||
return EmbeddingModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
|
return EmbeddingModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"],
|
||||||
|
base_url=model_config["api_base"])
|
||||||
|
|
||||||
if llm_type == LLMType.RERANK:
|
if llm_type == LLMType.RERANK:
|
||||||
if model_config["llm_factory"] not in RerankModel:
|
if model_config["llm_factory"] not in RerankModel:
|
||||||
return
|
return
|
||||||
return RerankModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
|
return RerankModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"],
|
||||||
|
base_url=model_config["api_base"])
|
||||||
|
|
||||||
if llm_type == LLMType.IMAGE2TEXT.value:
|
if llm_type == LLMType.IMAGE2TEXT.value:
|
||||||
if model_config["llm_factory"] not in CvModel:
|
if model_config["llm_factory"] not in CvModel:
|
||||||
return
|
return
|
||||||
return CvModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], lang, base_url=model_config["api_base"], **kwargs)
|
return CvModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], lang,
|
||||||
|
base_url=model_config["api_base"], **kwargs)
|
||||||
|
|
||||||
if llm_type == LLMType.CHAT.value:
|
if llm_type == LLMType.CHAT.value:
|
||||||
if model_config["llm_factory"] not in ChatModel:
|
if model_config["llm_factory"] not in ChatModel:
|
||||||
return
|
return
|
||||||
return ChatModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"], **kwargs)
|
return ChatModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"],
|
||||||
|
base_url=model_config["api_base"], **kwargs)
|
||||||
|
|
||||||
if llm_type == LLMType.SPEECH2TEXT:
|
if llm_type == LLMType.SPEECH2TEXT:
|
||||||
if model_config["llm_factory"] not in Seq2txtModel:
|
if model_config["llm_factory"] not in Seq2txtModel:
|
||||||
return
|
return
|
||||||
return Seq2txtModel[model_config["llm_factory"]](key=model_config["api_key"], model_name=model_config["llm_name"], lang=lang, base_url=model_config["api_base"])
|
return Seq2txtModel[model_config["llm_factory"]](key=model_config["api_key"],
|
||||||
|
model_name=model_config["llm_name"], lang=lang,
|
||||||
|
base_url=model_config["api_base"])
|
||||||
if llm_type == LLMType.TTS:
|
if llm_type == LLMType.TTS:
|
||||||
if model_config["llm_factory"] not in TTSModel:
|
if model_config["llm_factory"] not in TTSModel:
|
||||||
return
|
return
|
||||||
@ -194,11 +203,14 @@ class TenantLLMService(CommonService):
|
|||||||
try:
|
try:
|
||||||
num = (
|
num = (
|
||||||
cls.model.update(used_tokens=cls.model.used_tokens + used_tokens)
|
cls.model.update(used_tokens=cls.model.used_tokens + used_tokens)
|
||||||
.where(cls.model.tenant_id == tenant_id, cls.model.llm_name == llm_name, cls.model.llm_factory == llm_factory if llm_factory else True)
|
.where(cls.model.tenant_id == tenant_id, cls.model.llm_name == llm_name,
|
||||||
|
cls.model.llm_factory == llm_factory if llm_factory else True)
|
||||||
.execute()
|
.execute()
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
logging.exception("TenantLLMService.increase_usage got exception,Failed to update used_tokens for tenant_id=%s, llm_name=%s", tenant_id, llm_name)
|
logging.exception(
|
||||||
|
"TenantLLMService.increase_usage got exception,Failed to update used_tokens for tenant_id=%s, llm_name=%s",
|
||||||
|
tenant_id, llm_name)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
return num
|
return num
|
||||||
@ -206,9 +218,16 @@ class TenantLLMService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_openai_models(cls):
|
def get_openai_models(cls):
|
||||||
objs = cls.model.select().where((cls.model.llm_factory == "OpenAI"), ~(cls.model.llm_name == "text-embedding-3-small"), ~(cls.model.llm_name == "text-embedding-3-large")).dicts()
|
objs = cls.model.select().where((cls.model.llm_factory == "OpenAI"),
|
||||||
|
~(cls.model.llm_name == "text-embedding-3-small"),
|
||||||
|
~(cls.model.llm_name == "text-embedding-3-large")).dicts()
|
||||||
return list(objs)
|
return list(objs)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def delete_by_tenant_id(cls, tenant_id):
|
||||||
|
return cls.model.delete().where(cls.model.tenant_id == tenant_id).execute()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def llm_id2llm_type(llm_id: str) -> str | None:
|
def llm_id2llm_type(llm_id: str) -> str | None:
|
||||||
from api.db.services.llm_service import LLMService
|
from api.db.services.llm_service import LLMService
|
||||||
@ -245,8 +264,9 @@ class LLM4Tenant:
|
|||||||
langfuse_keys = TenantLangfuseService.filter_by_tenant(tenant_id=tenant_id)
|
langfuse_keys = TenantLangfuseService.filter_by_tenant(tenant_id=tenant_id)
|
||||||
self.langfuse = None
|
self.langfuse = None
|
||||||
if langfuse_keys:
|
if langfuse_keys:
|
||||||
langfuse = Langfuse(public_key=langfuse_keys.public_key, secret_key=langfuse_keys.secret_key, host=langfuse_keys.host)
|
langfuse = Langfuse(public_key=langfuse_keys.public_key, secret_key=langfuse_keys.secret_key,
|
||||||
|
host=langfuse_keys.host)
|
||||||
if langfuse.auth_check():
|
if langfuse.auth_check():
|
||||||
self.langfuse = langfuse
|
self.langfuse = langfuse
|
||||||
trace_id = self.langfuse.create_trace_id()
|
trace_id = self.langfuse.create_trace_id()
|
||||||
self.trace_context = {"trace_id": trace_id}
|
self.trace_context = {"trace_id": trace_id}
|
||||||
|
|||||||
@ -2,45 +2,60 @@ from api.db.db_models import UserCanvasVersion, DB
|
|||||||
from api.db.services.common_service import CommonService
|
from api.db.services.common_service import CommonService
|
||||||
from peewee import DoesNotExist
|
from peewee import DoesNotExist
|
||||||
|
|
||||||
|
|
||||||
class UserCanvasVersionService(CommonService):
|
class UserCanvasVersionService(CommonService):
|
||||||
model = UserCanvasVersion
|
model = UserCanvasVersion
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def list_by_canvas_id(cls, user_canvas_id):
|
def list_by_canvas_id(cls, user_canvas_id):
|
||||||
try:
|
try:
|
||||||
user_canvas_version = cls.model.select(
|
user_canvas_version = cls.model.select(
|
||||||
*[cls.model.id,
|
*[cls.model.id,
|
||||||
cls.model.create_time,
|
cls.model.create_time,
|
||||||
cls.model.title,
|
cls.model.title,
|
||||||
cls.model.create_date,
|
cls.model.create_date,
|
||||||
cls.model.update_date,
|
cls.model.update_date,
|
||||||
cls.model.user_canvas_id,
|
cls.model.user_canvas_id,
|
||||||
cls.model.update_time]
|
cls.model.update_time]
|
||||||
).where(cls.model.user_canvas_id == user_canvas_id)
|
).where(cls.model.user_canvas_id == user_canvas_id)
|
||||||
return user_canvas_version
|
return user_canvas_version
|
||||||
except DoesNotExist:
|
except DoesNotExist:
|
||||||
return None
|
return None
|
||||||
except Exception:
|
except Exception:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def get_all_canvas_version_by_canvas_ids(cls, canvas_ids):
|
||||||
|
fields = [cls.model.id]
|
||||||
|
versions = cls.model.select(*fields).where(cls.model.user_canvas_id.in_(canvas_ids))
|
||||||
|
versions.order_by(cls.model.create_time.asc())
|
||||||
|
offset, limit = 0, 100
|
||||||
|
res = []
|
||||||
|
while True:
|
||||||
|
version_batch = versions.offset(offset).limit(limit)
|
||||||
|
_temp = list(version_batch.dicts())
|
||||||
|
if not _temp:
|
||||||
|
break
|
||||||
|
res.extend(_temp)
|
||||||
|
offset += limit
|
||||||
|
return res
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def delete_all_versions(cls, user_canvas_id):
|
def delete_all_versions(cls, user_canvas_id):
|
||||||
try:
|
try:
|
||||||
user_canvas_version = cls.model.select().where(cls.model.user_canvas_id == user_canvas_id).order_by(cls.model.create_time.desc())
|
user_canvas_version = cls.model.select().where(cls.model.user_canvas_id == user_canvas_id).order_by(
|
||||||
|
cls.model.create_time.desc())
|
||||||
if user_canvas_version.count() > 20:
|
if user_canvas_version.count() > 20:
|
||||||
delete_ids = []
|
delete_ids = []
|
||||||
for i in range(20, user_canvas_version.count()):
|
for i in range(20, user_canvas_version.count()):
|
||||||
delete_ids.append(user_canvas_version[i].id)
|
delete_ids.append(user_canvas_version[i].id)
|
||||||
|
|
||||||
cls.delete_by_ids(delete_ids)
|
cls.delete_by_ids(delete_ids)
|
||||||
return True
|
return True
|
||||||
except DoesNotExist:
|
except DoesNotExist:
|
||||||
return None
|
return None
|
||||||
except Exception:
|
except Exception:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -288,6 +288,17 @@ class UserTenantService(CommonService):
|
|||||||
.join(User, on=((cls.model.tenant_id == User.id) & (UserTenant.user_id == user_id) & (UserTenant.status == StatusEnum.VALID.value)))
|
.join(User, on=((cls.model.tenant_id == User.id) & (UserTenant.user_id == user_id) & (UserTenant.status == StatusEnum.VALID.value)))
|
||||||
.where(cls.model.status == StatusEnum.VALID.value).dicts())
|
.where(cls.model.status == StatusEnum.VALID.value).dicts())
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def get_user_tenant_relation_by_user_id(cls, user_id):
|
||||||
|
fields = [
|
||||||
|
cls.model.id,
|
||||||
|
cls.model.user_id,
|
||||||
|
cls.model.tenant_id,
|
||||||
|
cls.model.role
|
||||||
|
]
|
||||||
|
return list(cls.model.select(*fields).where(cls.model.user_id == user_id).dicts().dicts())
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_num_members(cls, user_id: str):
|
def get_num_members(cls, user_id: str):
|
||||||
@ -304,4 +315,4 @@ class UserTenantService(CommonService):
|
|||||||
).first()
|
).first()
|
||||||
return user_tenant
|
return user_tenant
|
||||||
except peewee.DoesNotExist:
|
except peewee.DoesNotExist:
|
||||||
return None
|
return None
|
||||||
|
|||||||
@ -65,8 +65,8 @@ OAUTH_CONFIG = None
|
|||||||
DOC_ENGINE = None
|
DOC_ENGINE = None
|
||||||
docStoreConn = None
|
docStoreConn = None
|
||||||
|
|
||||||
retrievaler = None
|
retriever = None
|
||||||
kg_retrievaler = None
|
kg_retriever = None
|
||||||
|
|
||||||
# user registration switch
|
# user registration switch
|
||||||
REGISTER_ENABLED = 1
|
REGISTER_ENABLED = 1
|
||||||
@ -174,7 +174,7 @@ def init_settings():
|
|||||||
|
|
||||||
OAUTH_CONFIG = get_base_config("oauth", {})
|
OAUTH_CONFIG = get_base_config("oauth", {})
|
||||||
|
|
||||||
global DOC_ENGINE, docStoreConn, retrievaler, kg_retrievaler
|
global DOC_ENGINE, docStoreConn, retriever, kg_retriever
|
||||||
DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch")
|
DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch")
|
||||||
# DOC_ENGINE = os.environ.get('DOC_ENGINE', "opensearch")
|
# DOC_ENGINE = os.environ.get('DOC_ENGINE', "opensearch")
|
||||||
lower_case_doc_engine = DOC_ENGINE.lower()
|
lower_case_doc_engine = DOC_ENGINE.lower()
|
||||||
@ -187,10 +187,10 @@ def init_settings():
|
|||||||
else:
|
else:
|
||||||
raise Exception(f"Not supported doc engine: {DOC_ENGINE}")
|
raise Exception(f"Not supported doc engine: {DOC_ENGINE}")
|
||||||
|
|
||||||
retrievaler = search.Dealer(docStoreConn)
|
retriever = search.Dealer(docStoreConn)
|
||||||
from graphrag import search as kg_search
|
from graphrag import search as kg_search
|
||||||
|
|
||||||
kg_retrievaler = kg_search.KGSearch(docStoreConn)
|
kg_retriever = kg_search.KGSearch(docStoreConn)
|
||||||
|
|
||||||
if int(os.environ.get("SANDBOX_ENABLED", "0")):
|
if int(os.environ.get("SANDBOX_ENABLED", "0")):
|
||||||
global SANDBOX_HOST
|
global SANDBOX_HOST
|
||||||
|
|||||||
@ -51,15 +51,13 @@ from api import settings
|
|||||||
from api.constants import REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC
|
from api.constants import REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC
|
||||||
from api.db import ActiveEnum
|
from api.db import ActiveEnum
|
||||||
from api.db.db_models import APIToken
|
from api.db.db_models import APIToken
|
||||||
from api.db.services import UserService
|
|
||||||
from api.db.services.llm_service import LLMService
|
|
||||||
from api.db.services.tenant_llm_service import TenantLLMService
|
|
||||||
from api.utils.json import CustomJSONEncoder, json_dumps
|
from api.utils.json import CustomJSONEncoder, json_dumps
|
||||||
from api.utils import get_uuid
|
from api.utils import get_uuid
|
||||||
from rag.utils.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions
|
from rag.utils.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions
|
||||||
|
|
||||||
requests.models.complexjson.dumps = functools.partial(json.dumps, cls=CustomJSONEncoder)
|
requests.models.complexjson.dumps = functools.partial(json.dumps, cls=CustomJSONEncoder)
|
||||||
|
|
||||||
|
|
||||||
def serialize_for_json(obj):
|
def serialize_for_json(obj):
|
||||||
"""
|
"""
|
||||||
Recursively serialize objects to make them JSON serializable.
|
Recursively serialize objects to make them JSON serializable.
|
||||||
@ -68,8 +66,8 @@ def serialize_for_json(obj):
|
|||||||
if hasattr(obj, '__dict__'):
|
if hasattr(obj, '__dict__'):
|
||||||
# For objects with __dict__, try to serialize their attributes
|
# For objects with __dict__, try to serialize their attributes
|
||||||
try:
|
try:
|
||||||
return {key: serialize_for_json(value) for key, value in obj.__dict__.items()
|
return {key: serialize_for_json(value) for key, value in obj.__dict__.items()
|
||||||
if not key.startswith('_')}
|
if not key.startswith('_')}
|
||||||
except (AttributeError, TypeError):
|
except (AttributeError, TypeError):
|
||||||
return str(obj)
|
return str(obj)
|
||||||
elif hasattr(obj, '__name__'):
|
elif hasattr(obj, '__name__'):
|
||||||
@ -85,6 +83,7 @@ def serialize_for_json(obj):
|
|||||||
# Fallback: convert to string representation
|
# Fallback: convert to string representation
|
||||||
return str(obj)
|
return str(obj)
|
||||||
|
|
||||||
|
|
||||||
def request(**kwargs):
|
def request(**kwargs):
|
||||||
sess = requests.Session()
|
sess = requests.Session()
|
||||||
stream = kwargs.pop("stream", sess.stream)
|
stream = kwargs.pop("stream", sess.stream)
|
||||||
@ -105,7 +104,8 @@ def request(**kwargs):
|
|||||||
settings.HTTP_APP_KEY.encode("ascii"),
|
settings.HTTP_APP_KEY.encode("ascii"),
|
||||||
prepped.path_url.encode("ascii"),
|
prepped.path_url.encode("ascii"),
|
||||||
prepped.body if kwargs.get("json") else b"",
|
prepped.body if kwargs.get("json") else b"",
|
||||||
urlencode(sorted(kwargs["data"].items()), quote_via=quote, safe="-._~").encode("ascii") if kwargs.get("data") and isinstance(kwargs["data"], dict) else b"",
|
urlencode(sorted(kwargs["data"].items()), quote_via=quote, safe="-._~").encode(
|
||||||
|
"ascii") if kwargs.get("data") and isinstance(kwargs["data"], dict) else b"",
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
"sha1",
|
"sha1",
|
||||||
@ -127,7 +127,7 @@ def request(**kwargs):
|
|||||||
def get_exponential_backoff_interval(retries, full_jitter=False):
|
def get_exponential_backoff_interval(retries, full_jitter=False):
|
||||||
"""Calculate the exponential backoff wait time."""
|
"""Calculate the exponential backoff wait time."""
|
||||||
# Will be zero if factor equals 0
|
# Will be zero if factor equals 0
|
||||||
countdown = min(REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC * (2**retries))
|
countdown = min(REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC * (2 ** retries))
|
||||||
# Full jitter according to
|
# Full jitter according to
|
||||||
# https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
|
# https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
|
||||||
if full_jitter:
|
if full_jitter:
|
||||||
@ -151,18 +151,21 @@ def get_data_error_result(code=settings.RetCode.DATA_ERROR, message="Sorry! Data
|
|||||||
def server_error_response(e):
|
def server_error_response(e):
|
||||||
logging.exception(e)
|
logging.exception(e)
|
||||||
try:
|
try:
|
||||||
if e.code == 401:
|
msg = repr(e).lower()
|
||||||
return get_json_result(code=401, message=repr(e))
|
if getattr(e, "code", None) == 401 or ("unauthorized" in msg) or ("401" in msg):
|
||||||
except BaseException:
|
return get_json_result(code=settings.RetCode.UNAUTHORIZED, message=repr(e))
|
||||||
pass
|
except Exception as ex:
|
||||||
|
logging.warning(f"error checking authorization: {ex}")
|
||||||
|
|
||||||
if len(e.args) > 1:
|
if len(e.args) > 1:
|
||||||
try:
|
try:
|
||||||
serialized_data = serialize_for_json(e.args[1])
|
serialized_data = serialize_for_json(e.args[1])
|
||||||
return get_json_result(code= settings.RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=serialized_data)
|
return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=serialized_data)
|
||||||
except Exception:
|
except Exception:
|
||||||
return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=None)
|
return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=None)
|
||||||
if repr(e).find("index_not_found_exception") >= 0:
|
if repr(e).find("index_not_found_exception") >= 0:
|
||||||
return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, message="No chunk found, please upload file and parse it.")
|
return get_json_result(code=settings.RetCode.EXCEPTION_ERROR,
|
||||||
|
message="No chunk found, please upload file and parse it.")
|
||||||
|
|
||||||
return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, message=repr(e))
|
return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, message=repr(e))
|
||||||
|
|
||||||
@ -207,7 +210,8 @@ def validate_request(*args, **kwargs):
|
|||||||
if no_arguments:
|
if no_arguments:
|
||||||
error_string += "required argument are missing: {}; ".format(",".join(no_arguments))
|
error_string += "required argument are missing: {}; ".format(",".join(no_arguments))
|
||||||
if error_arguments:
|
if error_arguments:
|
||||||
error_string += "required argument values: {}".format(",".join(["{}={}".format(a[0], a[1]) for a in error_arguments]))
|
error_string += "required argument values: {}".format(
|
||||||
|
",".join(["{}={}".format(a[0], a[1]) for a in error_arguments]))
|
||||||
return get_json_result(code=settings.RetCode.ARGUMENT_ERROR, message=error_string)
|
return get_json_result(code=settings.RetCode.ARGUMENT_ERROR, message=error_string)
|
||||||
return func(*_args, **_kwargs)
|
return func(*_args, **_kwargs)
|
||||||
|
|
||||||
@ -222,7 +226,8 @@ def not_allowed_parameters(*params):
|
|||||||
input_arguments = flask_request.json or flask_request.form.to_dict()
|
input_arguments = flask_request.json or flask_request.form.to_dict()
|
||||||
for param in params:
|
for param in params:
|
||||||
if param in input_arguments:
|
if param in input_arguments:
|
||||||
return get_json_result(code=settings.RetCode.ARGUMENT_ERROR, message=f"Parameter {param} isn't allowed")
|
return get_json_result(code=settings.RetCode.ARGUMENT_ERROR,
|
||||||
|
message=f"Parameter {param} isn't allowed")
|
||||||
return f(*args, **kwargs)
|
return f(*args, **kwargs)
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
@ -233,12 +238,14 @@ def not_allowed_parameters(*params):
|
|||||||
def active_required(f):
|
def active_required(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrapper(*args, **kwargs):
|
def wrapper(*args, **kwargs):
|
||||||
|
from api.db.services import UserService
|
||||||
user_id = current_user.id
|
user_id = current_user.id
|
||||||
usr = UserService.filter_by_id(user_id)
|
usr = UserService.filter_by_id(user_id)
|
||||||
# check is_active
|
# check is_active
|
||||||
if not usr or not usr.is_active == ActiveEnum.ACTIVE.value:
|
if not usr or not usr.is_active == ActiveEnum.ACTIVE.value:
|
||||||
return get_json_result(code=settings.RetCode.FORBIDDEN, message="User isn't active, please activate first.")
|
return get_json_result(code=settings.RetCode.FORBIDDEN, message="User isn't active, please activate first.")
|
||||||
return f(*args, **kwargs)
|
return f(*args, **kwargs)
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
@ -259,7 +266,7 @@ def send_file_in_mem(data, filename):
|
|||||||
return send_file(f, as_attachment=True, attachment_filename=filename)
|
return send_file(f, as_attachment=True, attachment_filename=filename)
|
||||||
|
|
||||||
|
|
||||||
def get_json_result(code=settings.RetCode.SUCCESS, message="success", data=None):
|
def get_json_result(code: settings.RetCode = settings.RetCode.SUCCESS, message="success", data=None):
|
||||||
response = {"code": code, "message": message, "data": data}
|
response = {"code": code, "message": message, "data": data}
|
||||||
return jsonify(response)
|
return jsonify(response)
|
||||||
|
|
||||||
@ -314,7 +321,7 @@ def construct_result(code=settings.RetCode.DATA_ERROR, message="data is missing"
|
|||||||
return jsonify(response)
|
return jsonify(response)
|
||||||
|
|
||||||
|
|
||||||
def construct_json_result(code=settings.RetCode.SUCCESS, message="success", data=None):
|
def construct_json_result(code: settings.RetCode = settings.RetCode.SUCCESS, message="success", data=None):
|
||||||
if data is None:
|
if data is None:
|
||||||
return jsonify({"code": code, "message": message})
|
return jsonify({"code": code, "message": message})
|
||||||
else:
|
else:
|
||||||
@ -347,27 +354,39 @@ def token_required(func):
|
|||||||
token = authorization_list[1]
|
token = authorization_list[1]
|
||||||
objs = APIToken.query(token=token)
|
objs = APIToken.query(token=token)
|
||||||
if not objs:
|
if not objs:
|
||||||
return get_json_result(data=False, message="Authentication error: API key is invalid!", code=settings.RetCode.AUTHENTICATION_ERROR)
|
return get_json_result(data=False, message="Authentication error: API key is invalid!",
|
||||||
|
code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||||
kwargs["tenant_id"] = objs[0].tenant_id
|
kwargs["tenant_id"] = objs[0].tenant_id
|
||||||
return func(*args, **kwargs)
|
return func(*args, **kwargs)
|
||||||
|
|
||||||
return decorated_function
|
return decorated_function
|
||||||
|
|
||||||
|
|
||||||
def get_result(code=settings.RetCode.SUCCESS, message="", data=None):
|
def get_result(code=settings.RetCode.SUCCESS, message="", data=None, total=None):
|
||||||
if code == 0:
|
"""
|
||||||
|
Standard API response format:
|
||||||
|
{
|
||||||
|
"code": 0,
|
||||||
|
"data": [...], # List or object, backward compatible
|
||||||
|
"total": 47, # Optional field for pagination
|
||||||
|
"message": "..." # Error or status message
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
response = {"code": code}
|
||||||
|
|
||||||
|
if code == settings.RetCode.SUCCESS:
|
||||||
if data is not None:
|
if data is not None:
|
||||||
response = {"code": code, "data": data}
|
response["data"] = data
|
||||||
else:
|
if total is not None:
|
||||||
response = {"code": code}
|
response["total_datasets"] = total
|
||||||
else:
|
else:
|
||||||
response = {"code": code, "message": message}
|
response["message"] = message or "Error"
|
||||||
|
|
||||||
return jsonify(response)
|
return jsonify(response)
|
||||||
|
|
||||||
|
|
||||||
def get_error_data_result(
|
def get_error_data_result(
|
||||||
message="Sorry! Data missing!",
|
message="Sorry! Data missing!",
|
||||||
code=settings.RetCode.DATA_ERROR,
|
code=settings.RetCode.DATA_ERROR,
|
||||||
):
|
):
|
||||||
result_dict = {"code": code, "message": message}
|
result_dict = {"code": code, "message": message}
|
||||||
response = {}
|
response = {}
|
||||||
@ -402,7 +421,8 @@ def get_parser_config(chunk_method, parser_config):
|
|||||||
|
|
||||||
# Define default configurations for each chunking method
|
# Define default configurations for each chunking method
|
||||||
key_mapping = {
|
key_mapping = {
|
||||||
"naive": {"chunk_token_num": 512, "delimiter": r"\n", "html4excel": False, "layout_recognize": "DeepDOC", "raptor": {"use_raptor": False}, "graphrag": {"use_graphrag": False}},
|
"naive": {"chunk_token_num": 512, "delimiter": r"\n", "html4excel": False, "layout_recognize": "DeepDOC",
|
||||||
|
"raptor": {"use_raptor": False}, "graphrag": {"use_graphrag": False}},
|
||||||
"qa": {"raptor": {"use_raptor": False}, "graphrag": {"use_graphrag": False}},
|
"qa": {"raptor": {"use_raptor": False}, "graphrag": {"use_graphrag": False}},
|
||||||
"tag": None,
|
"tag": None,
|
||||||
"resume": None,
|
"resume": None,
|
||||||
@ -441,16 +461,16 @@ def get_parser_config(chunk_method, parser_config):
|
|||||||
|
|
||||||
|
|
||||||
def get_data_openai(
|
def get_data_openai(
|
||||||
id=None,
|
id=None,
|
||||||
created=None,
|
created=None,
|
||||||
model=None,
|
model=None,
|
||||||
prompt_tokens=0,
|
prompt_tokens=0,
|
||||||
completion_tokens=0,
|
completion_tokens=0,
|
||||||
content=None,
|
content=None,
|
||||||
finish_reason=None,
|
finish_reason=None,
|
||||||
object="chat.completion",
|
object="chat.completion",
|
||||||
param=None,
|
param=None,
|
||||||
stream=False
|
stream=False
|
||||||
):
|
):
|
||||||
total_tokens = prompt_tokens + completion_tokens
|
total_tokens = prompt_tokens + completion_tokens
|
||||||
|
|
||||||
@ -524,6 +544,8 @@ def check_duplicate_ids(ids, id_type="item"):
|
|||||||
|
|
||||||
|
|
||||||
def verify_embedding_availability(embd_id: str, tenant_id: str) -> tuple[bool, Response | None]:
|
def verify_embedding_availability(embd_id: str, tenant_id: str) -> tuple[bool, Response | None]:
|
||||||
|
from api.db.services.llm_service import LLMService
|
||||||
|
from api.db.services.tenant_llm_service import TenantLLMService
|
||||||
"""
|
"""
|
||||||
Verifies availability of an embedding model for a specific tenant.
|
Verifies availability of an embedding model for a specific tenant.
|
||||||
|
|
||||||
@ -562,7 +584,9 @@ def verify_embedding_availability(embd_id: str, tenant_id: str) -> tuple[bool, R
|
|||||||
in_llm_service = bool(LLMService.query(llm_name=llm_name, fid=llm_factory, model_type="embedding"))
|
in_llm_service = bool(LLMService.query(llm_name=llm_name, fid=llm_factory, model_type="embedding"))
|
||||||
|
|
||||||
tenant_llms = TenantLLMService.get_my_llms(tenant_id=tenant_id)
|
tenant_llms = TenantLLMService.get_my_llms(tenant_id=tenant_id)
|
||||||
is_tenant_model = any(llm["llm_name"] == llm_name and llm["llm_factory"] == llm_factory and llm["model_type"] == "embedding" for llm in tenant_llms)
|
is_tenant_model = any(
|
||||||
|
llm["llm_name"] == llm_name and llm["llm_factory"] == llm_factory and llm["model_type"] == "embedding" for
|
||||||
|
llm in tenant_llms)
|
||||||
|
|
||||||
is_builtin_model = embd_id in settings.BUILTIN_EMBEDDING_MODELS
|
is_builtin_model = embd_id in settings.BUILTIN_EMBEDDING_MODELS
|
||||||
if not (is_builtin_model or is_tenant_model or in_llm_service):
|
if not (is_builtin_model or is_tenant_model or in_llm_service):
|
||||||
@ -659,6 +683,16 @@ def remap_dictionary_keys(source_data: dict, key_aliases: dict = None) -> dict:
|
|||||||
return transformed_data
|
return transformed_data
|
||||||
|
|
||||||
|
|
||||||
|
def group_by(list_of_dict, key):
|
||||||
|
res = {}
|
||||||
|
for item in list_of_dict:
|
||||||
|
if item[key] in res.keys():
|
||||||
|
res[item[key]].append(item)
|
||||||
|
else:
|
||||||
|
res[item[key]] = [item]
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
def get_mcp_tools(mcp_servers: list, timeout: float | int = 10) -> tuple[dict, str]:
|
def get_mcp_tools(mcp_servers: list, timeout: float | int = 10) -> tuple[dict, str]:
|
||||||
results = {}
|
results = {}
|
||||||
tool_call_sessions = []
|
tool_call_sessions = []
|
||||||
@ -695,7 +729,9 @@ TimeoutException = Union[Type[BaseException], BaseException]
|
|||||||
OnTimeoutCallback = Union[Callable[..., Any], Coroutine[Any, Any, Any]]
|
OnTimeoutCallback = Union[Callable[..., Any], Coroutine[Any, Any, Any]]
|
||||||
|
|
||||||
|
|
||||||
def timeout(seconds: float | int = None, attempts: int = 2, *, exception: Optional[TimeoutException] = None, on_timeout: Optional[OnTimeoutCallback] = None):
|
def timeout(seconds: float | int | str = None, attempts: int = 2, *, exception: Optional[TimeoutException] = None, on_timeout: Optional[OnTimeoutCallback] = None):
|
||||||
|
if isinstance(seconds, str):
|
||||||
|
seconds = float(seconds)
|
||||||
def decorator(func):
|
def decorator(func):
|
||||||
@wraps(func)
|
@wraps(func)
|
||||||
def wrapper(*args, **kwargs):
|
def wrapper(*args, **kwargs):
|
||||||
@ -781,7 +817,9 @@ async def is_strong_enough(chat_model, embedding_model):
|
|||||||
_ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"]))
|
_ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"]))
|
||||||
if chat_model:
|
if chat_model:
|
||||||
with trio.fail_after(30):
|
with trio.fail_after(30):
|
||||||
res = await trio.to_thread.run_sync(lambda: chat_model.chat("Nothing special.", [{"role": "user", "content": "Are you strong enough!?"}], {}))
|
res = await trio.to_thread.run_sync(lambda: chat_model.chat("Nothing special.", [{"role": "user",
|
||||||
|
"content": "Are you strong enough!?"}],
|
||||||
|
{}))
|
||||||
if res.find("**ERROR**") >= 0:
|
if res.find("**ERROR**") >= 0:
|
||||||
raise Exception(res)
|
raise Exception(res)
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,56 @@
|
|||||||
import base64
|
import base64
|
||||||
|
import logging
|
||||||
|
from functools import partial
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
test_image_base64 = "iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAIAAAD/gAIDAAAA6ElEQVR4nO3QwQ3AIBDAsIP9d25XIC+EZE8QZc18w5l9O+AlZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBT+IYAHHLHkdEgAAAABJRU5ErkJggg=="
|
test_image_base64 = "iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAIAAAD/gAIDAAAA6ElEQVR4nO3QwQ3AIBDAsIP9d25XIC+EZE8QZc18w5l9O+AlZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBT+IYAHHLHkdEgAAAABJRU5ErkJggg=="
|
||||||
test_image = base64.b64decode(test_image_base64)
|
test_image = base64.b64decode(test_image_base64)
|
||||||
|
|
||||||
|
|
||||||
|
async def image2id(d: dict, storage_put_func: partial, objname:str, bucket:str="imagetemps"):
|
||||||
|
import logging
|
||||||
|
from io import BytesIO
|
||||||
|
import trio
|
||||||
|
from rag.svr.task_executor import minio_limiter
|
||||||
|
if not d.get("image"):
|
||||||
|
return
|
||||||
|
|
||||||
|
with BytesIO() as output_buffer:
|
||||||
|
if isinstance(d["image"], bytes):
|
||||||
|
output_buffer.write(d["image"])
|
||||||
|
output_buffer.seek(0)
|
||||||
|
else:
|
||||||
|
# If the image is in RGBA mode, convert it to RGB mode before saving it in JPEG format.
|
||||||
|
if d["image"].mode in ("RGBA", "P"):
|
||||||
|
converted_image = d["image"].convert("RGB")
|
||||||
|
d["image"] = converted_image
|
||||||
|
try:
|
||||||
|
d["image"].save(output_buffer, format='JPEG')
|
||||||
|
except OSError as e:
|
||||||
|
logging.warning(
|
||||||
|
"Saving image exception, ignore: {}".format(str(e)))
|
||||||
|
|
||||||
|
async with minio_limiter:
|
||||||
|
await trio.to_thread.run_sync(lambda: storage_put_func(bucket=bucket, fnm=objname, binary=output_buffer.getvalue()))
|
||||||
|
d["img_id"] = f"{bucket}-{objname}"
|
||||||
|
if not isinstance(d["image"], bytes):
|
||||||
|
d["image"].close()
|
||||||
|
del d["image"] # Remove image reference
|
||||||
|
|
||||||
|
|
||||||
|
def id2image(image_id:str|None, storage_get_func: partial):
|
||||||
|
if not image_id:
|
||||||
|
return
|
||||||
|
arr = image_id.split("-")
|
||||||
|
if len(arr) != 2:
|
||||||
|
return
|
||||||
|
bkt, nm = image_id.split("-")
|
||||||
|
try:
|
||||||
|
blob = storage_get_func(bucket=bkt, filename=nm)
|
||||||
|
if not blob:
|
||||||
|
return
|
||||||
|
return Image.open(BytesIO(blob))
|
||||||
|
except Exception as e:
|
||||||
|
logging.exception(e)
|
||||||
|
|||||||
@ -21,3 +21,26 @@ def string_to_bytes(string):
|
|||||||
|
|
||||||
def bytes_to_string(byte):
|
def bytes_to_string(byte):
|
||||||
return byte.decode(encoding="utf-8")
|
return byte.decode(encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def convert_bytes(size_in_bytes: int) -> str:
|
||||||
|
"""
|
||||||
|
Format size in bytes.
|
||||||
|
"""
|
||||||
|
if size_in_bytes == 0:
|
||||||
|
return "0 B"
|
||||||
|
|
||||||
|
units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
|
||||||
|
i = 0
|
||||||
|
size = float(size_in_bytes)
|
||||||
|
|
||||||
|
while size >= 1024 and i < len(units) - 1:
|
||||||
|
size /= 1024
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if i == 0 or size >= 100:
|
||||||
|
return f"{size:.0f} {units[i]}"
|
||||||
|
elif size >= 10:
|
||||||
|
return f"{size:.1f} {units[i]}"
|
||||||
|
else:
|
||||||
|
return f"{size:.2f} {units[i]}"
|
||||||
|
|||||||
25
api/utils/email_templates.py
Normal file
25
api/utils/email_templates.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
"""
|
||||||
|
Reusable HTML email templates and registry.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Invitation email template
|
||||||
|
INVITE_EMAIL_TMPL = """
|
||||||
|
<p>Hi {{email}},</p>
|
||||||
|
<p>{{inviter}} has invited you to join their team (ID: {{tenant_id}}).</p>
|
||||||
|
<p>Click the link below to complete your registration:<br>
|
||||||
|
<a href="{{invite_url}}">{{invite_url}}</a></p>
|
||||||
|
<p>If you did not request this, please ignore this email.</p>
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Password reset code template
|
||||||
|
RESET_CODE_EMAIL_TMPL = """
|
||||||
|
<p>Hello,</p>
|
||||||
|
<p>Your password reset code is: <b>{{ code }}</b></p>
|
||||||
|
<p>This code will expire in {{ ttl_min }} minutes.</p>
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Template registry
|
||||||
|
EMAIL_TEMPLATES = {
|
||||||
|
"invite": INVITE_EMAIL_TMPL,
|
||||||
|
"reset_code": RESET_CODE_EMAIL_TMPL,
|
||||||
|
}
|
||||||
@ -13,7 +13,12 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
|
# Standard library imports
|
||||||
import base64
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import io
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@ -22,13 +27,20 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import threading
|
import threading
|
||||||
|
import zipfile
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
|
# Typing
|
||||||
|
from typing import List, Union, Tuple
|
||||||
|
|
||||||
|
# Third-party imports
|
||||||
|
import olefile
|
||||||
import pdfplumber
|
import pdfplumber
|
||||||
from cachetools import LRUCache, cached
|
from cachetools import LRUCache, cached
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from ruamel.yaml import YAML
|
from ruamel.yaml import YAML
|
||||||
|
|
||||||
|
# Local imports
|
||||||
from api.constants import IMG_BASE64_PREFIX
|
from api.constants import IMG_BASE64_PREFIX
|
||||||
from api.db import FileType
|
from api.db import FileType
|
||||||
|
|
||||||
@ -155,13 +167,13 @@ def filename_type(filename):
|
|||||||
if re.match(r".*\.pdf$", filename):
|
if re.match(r".*\.pdf$", filename):
|
||||||
return FileType.PDF.value
|
return FileType.PDF.value
|
||||||
|
|
||||||
if re.match(r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
|
if re.match(r".*\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
|
||||||
return FileType.DOC.value
|
return FileType.DOC.value
|
||||||
|
|
||||||
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):
|
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):
|
||||||
return FileType.AURAL.value
|
return FileType.AURAL.value
|
||||||
|
|
||||||
if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4)$", filename):
|
if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4|avi|mkv)$", filename):
|
||||||
return FileType.VISUAL.value
|
return FileType.VISUAL.value
|
||||||
|
|
||||||
return FileType.OTHER.value
|
return FileType.OTHER.value
|
||||||
@ -284,3 +296,125 @@ def read_potential_broken_pdf(blob):
|
|||||||
return repaired
|
return repaired
|
||||||
|
|
||||||
return blob
|
return blob
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _is_zip(h: bytes) -> bool:
|
||||||
|
return h.startswith(b"PK\x03\x04") or h.startswith(b"PK\x05\x06") or h.startswith(b"PK\x07\x08")
|
||||||
|
|
||||||
|
def _is_pdf(h: bytes) -> bool:
|
||||||
|
return h.startswith(b"%PDF-")
|
||||||
|
|
||||||
|
def _is_ole(h: bytes) -> bool:
|
||||||
|
return h.startswith(b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1")
|
||||||
|
|
||||||
|
def _sha10(b: bytes) -> str:
|
||||||
|
return hashlib.sha256(b).hexdigest()[:10]
|
||||||
|
|
||||||
|
def _guess_ext(b: bytes) -> str:
|
||||||
|
h = b[:8]
|
||||||
|
if _is_zip(h):
|
||||||
|
try:
|
||||||
|
with zipfile.ZipFile(io.BytesIO(b), "r") as z:
|
||||||
|
names = [n.lower() for n in z.namelist()]
|
||||||
|
if any(n.startswith("word/") for n in names):
|
||||||
|
return ".docx"
|
||||||
|
if any(n.startswith("ppt/") for n in names):
|
||||||
|
return ".pptx"
|
||||||
|
if any(n.startswith("xl/") for n in names):
|
||||||
|
return ".xlsx"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return ".zip"
|
||||||
|
if _is_pdf(h):
|
||||||
|
return ".pdf"
|
||||||
|
if _is_ole(h):
|
||||||
|
return ".doc"
|
||||||
|
return ".bin"
|
||||||
|
|
||||||
|
# Try to extract the real embedded payload from OLE's Ole10Native
|
||||||
|
def _extract_ole10native_payload(data: bytes) -> bytes:
|
||||||
|
try:
|
||||||
|
pos = 0
|
||||||
|
if len(data) < 4:
|
||||||
|
return data
|
||||||
|
_ = int.from_bytes(data[pos:pos+4], "little")
|
||||||
|
pos += 4
|
||||||
|
# filename/src/tmp (NUL-terminated ANSI)
|
||||||
|
for _ in range(3):
|
||||||
|
z = data.index(b"\x00", pos)
|
||||||
|
pos = z + 1
|
||||||
|
# skip unknown 4 bytes
|
||||||
|
pos += 4
|
||||||
|
if pos + 4 > len(data):
|
||||||
|
return data
|
||||||
|
size = int.from_bytes(data[pos:pos+4], "little")
|
||||||
|
pos += 4
|
||||||
|
if pos + size <= len(data):
|
||||||
|
return data[pos:pos+size]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return data
|
||||||
|
|
||||||
|
def extract_embed_file(target: Union[bytes, bytearray]) -> List[Tuple[str, bytes]]:
|
||||||
|
"""
|
||||||
|
Only extract the 'first layer' of embedding, returning raw (filename, bytes).
|
||||||
|
"""
|
||||||
|
top = bytes(target)
|
||||||
|
head = top[:8]
|
||||||
|
out: List[Tuple[str, bytes]] = []
|
||||||
|
seen = set()
|
||||||
|
|
||||||
|
def push(b: bytes, name_hint: str = ""):
|
||||||
|
h10 = _sha10(b)
|
||||||
|
if h10 in seen:
|
||||||
|
return
|
||||||
|
seen.add(h10)
|
||||||
|
ext = _guess_ext(b)
|
||||||
|
# If name_hint has an extension use its basename; else fallback to guessed ext
|
||||||
|
if "." in name_hint:
|
||||||
|
fname = name_hint.split("/")[-1]
|
||||||
|
else:
|
||||||
|
fname = f"{h10}{ext}"
|
||||||
|
out.append((fname, b))
|
||||||
|
|
||||||
|
# OOXML/ZIP container (docx/xlsx/pptx)
|
||||||
|
if _is_zip(head):
|
||||||
|
try:
|
||||||
|
with zipfile.ZipFile(io.BytesIO(top), "r") as z:
|
||||||
|
embed_dirs = (
|
||||||
|
"word/embeddings/", "word/objects/", "word/activex/",
|
||||||
|
"xl/embeddings/", "ppt/embeddings/"
|
||||||
|
)
|
||||||
|
for name in z.namelist():
|
||||||
|
low = name.lower()
|
||||||
|
if any(low.startswith(d) for d in embed_dirs):
|
||||||
|
try:
|
||||||
|
b = z.read(name)
|
||||||
|
push(b, name)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return out
|
||||||
|
|
||||||
|
# OLE container (doc/ppt/xls)
|
||||||
|
if _is_ole(head):
|
||||||
|
try:
|
||||||
|
with olefile.OleFileIO(io.BytesIO(top)) as ole:
|
||||||
|
for entry in ole.listdir():
|
||||||
|
p = "/".join(entry)
|
||||||
|
try:
|
||||||
|
data = ole.openstream(entry).read()
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if not data:
|
||||||
|
continue
|
||||||
|
if "Ole10Native" in p or "ole10native" in p.lower():
|
||||||
|
data = _extract_ole10native_payload(data)
|
||||||
|
push(data, p)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return out
|
||||||
|
|
||||||
|
return out
|
||||||
104
api/utils/health.py
Normal file
104
api/utils/health.py
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
from timeit import default_timer as timer
|
||||||
|
|
||||||
|
from api import settings
|
||||||
|
from api.db.db_models import DB
|
||||||
|
from rag.utils.redis_conn import REDIS_CONN
|
||||||
|
from rag.utils.storage_factory import STORAGE_IMPL
|
||||||
|
|
||||||
|
|
||||||
|
def _ok_nok(ok: bool) -> str:
|
||||||
|
return "ok" if ok else "nok"
|
||||||
|
|
||||||
|
|
||||||
|
def check_db() -> tuple[bool, dict]:
|
||||||
|
st = timer()
|
||||||
|
try:
|
||||||
|
# lightweight probe; works for MySQL/Postgres
|
||||||
|
DB.execute_sql("SELECT 1")
|
||||||
|
return True, {"elapsed": f"{(timer() - st) * 1000.0:.1f}"}
|
||||||
|
except Exception as e:
|
||||||
|
return False, {"elapsed": f"{(timer() - st) * 1000.0:.1f}", "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def check_redis() -> tuple[bool, dict]:
|
||||||
|
st = timer()
|
||||||
|
try:
|
||||||
|
ok = bool(REDIS_CONN.health())
|
||||||
|
return ok, {"elapsed": f"{(timer() - st) * 1000.0:.1f}"}
|
||||||
|
except Exception as e:
|
||||||
|
return False, {"elapsed": f"{(timer() - st) * 1000.0:.1f}", "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def check_doc_engine() -> tuple[bool, dict]:
|
||||||
|
st = timer()
|
||||||
|
try:
|
||||||
|
meta = settings.docStoreConn.health()
|
||||||
|
# treat any successful call as ok
|
||||||
|
return True, {"elapsed": f"{(timer() - st) * 1000.0:.1f}", **(meta or {})}
|
||||||
|
except Exception as e:
|
||||||
|
return False, {"elapsed": f"{(timer() - st) * 1000.0:.1f}", "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def check_storage() -> tuple[bool, dict]:
|
||||||
|
st = timer()
|
||||||
|
try:
|
||||||
|
STORAGE_IMPL.health()
|
||||||
|
return True, {"elapsed": f"{(timer() - st) * 1000.0:.1f}"}
|
||||||
|
except Exception as e:
|
||||||
|
return False, {"elapsed": f"{(timer() - st) * 1000.0:.1f}", "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def check_chat() -> tuple[bool, dict]:
|
||||||
|
st = timer()
|
||||||
|
try:
|
||||||
|
cfg = getattr(settings, "CHAT_CFG", None)
|
||||||
|
ok = bool(cfg and cfg.get("factory"))
|
||||||
|
return ok, {"elapsed": f"{(timer() - st) * 1000.0:.1f}"}
|
||||||
|
except Exception as e:
|
||||||
|
return False, {"elapsed": f"{(timer() - st) * 1000.0:.1f}", "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def run_health_checks() -> tuple[dict, bool]:
|
||||||
|
result: dict[str, str | dict] = {}
|
||||||
|
|
||||||
|
db_ok, db_meta = check_db()
|
||||||
|
chat_ok, chat_meta = check_chat()
|
||||||
|
|
||||||
|
result["db"] = _ok_nok(db_ok)
|
||||||
|
if not db_ok:
|
||||||
|
result.setdefault("_meta", {})["db"] = db_meta
|
||||||
|
|
||||||
|
result["chat"] = _ok_nok(chat_ok)
|
||||||
|
if not chat_ok:
|
||||||
|
result.setdefault("_meta", {})["chat"] = chat_meta
|
||||||
|
|
||||||
|
# Optional probes (do not change minimal contract but exposed for observability)
|
||||||
|
try:
|
||||||
|
redis_ok, redis_meta = check_redis()
|
||||||
|
result["redis"] = _ok_nok(redis_ok)
|
||||||
|
if not redis_ok:
|
||||||
|
result.setdefault("_meta", {})["redis"] = redis_meta
|
||||||
|
except Exception:
|
||||||
|
result["redis"] = "nok"
|
||||||
|
|
||||||
|
try:
|
||||||
|
doc_ok, doc_meta = check_doc_engine()
|
||||||
|
result["doc_engine"] = _ok_nok(doc_ok)
|
||||||
|
if not doc_ok:
|
||||||
|
result.setdefault("_meta", {})["doc_engine"] = doc_meta
|
||||||
|
except Exception:
|
||||||
|
result["doc_engine"] = "nok"
|
||||||
|
|
||||||
|
try:
|
||||||
|
sto_ok, sto_meta = check_storage()
|
||||||
|
result["storage"] = _ok_nok(sto_ok)
|
||||||
|
if not sto_ok:
|
||||||
|
result.setdefault("_meta", {})["storage"] = sto_meta
|
||||||
|
except Exception:
|
||||||
|
result["storage"] = "nok"
|
||||||
|
|
||||||
|
all_ok = (result.get("db") == "ok") and (result.get("chat") == "ok")
|
||||||
|
result["status"] = "ok" if all_ok else "nok"
|
||||||
|
return result, all_ok
|
||||||
|
|
||||||
|
|
||||||
@ -13,14 +13,17 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
from timeit import default_timer as timer
|
from timeit import default_timer as timer
|
||||||
|
|
||||||
from api import settings
|
from api import settings
|
||||||
from api.db.db_models import DB
|
from api.db.db_models import DB
|
||||||
|
from rag import settings as rag_settings
|
||||||
from rag.utils.redis_conn import REDIS_CONN
|
from rag.utils.redis_conn import REDIS_CONN
|
||||||
from rag.utils.storage_factory import STORAGE_IMPL
|
from rag.utils.storage_factory import STORAGE_IMPL
|
||||||
|
from rag.utils.es_conn import ESConnection
|
||||||
|
from rag.utils.infinity_conn import InfinityConnection
|
||||||
|
|
||||||
|
|
||||||
def _ok_nok(ok: bool) -> str:
|
def _ok_nok(ok: bool) -> str:
|
||||||
@ -65,6 +68,96 @@ def check_storage() -> tuple[bool, dict]:
|
|||||||
return False, {"elapsed": f"{(timer() - st) * 1000.0:.1f}", "error": str(e)}
|
return False, {"elapsed": f"{(timer() - st) * 1000.0:.1f}", "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def get_es_cluster_stats() -> dict:
|
||||||
|
doc_engine = os.getenv('DOC_ENGINE', 'elasticsearch')
|
||||||
|
if doc_engine != 'elasticsearch':
|
||||||
|
raise Exception("Elasticsearch is not in use.")
|
||||||
|
try:
|
||||||
|
return {
|
||||||
|
"status": "alive",
|
||||||
|
"message": ESConnection().get_cluster_stats()
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
"status": "timeout",
|
||||||
|
"message": f"error: {str(e)}",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_infinity_status():
|
||||||
|
doc_engine = os.getenv('DOC_ENGINE', 'elasticsearch')
|
||||||
|
if doc_engine != 'infinity':
|
||||||
|
raise Exception("Infinity is not in use.")
|
||||||
|
try:
|
||||||
|
return {
|
||||||
|
"status": "alive",
|
||||||
|
"message": InfinityConnection().health()
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
"status": "timeout",
|
||||||
|
"message": f"error: {str(e)}",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_mysql_status():
|
||||||
|
try:
|
||||||
|
cursor = DB.execute_sql("SHOW PROCESSLIST;")
|
||||||
|
res_rows = cursor.fetchall()
|
||||||
|
headers = ['id', 'user', 'host', 'db', 'command', 'time', 'state', 'info']
|
||||||
|
cursor.close()
|
||||||
|
return {
|
||||||
|
"status": "alive",
|
||||||
|
"message": [dict(zip(headers, r)) for r in res_rows]
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
"status": "timeout",
|
||||||
|
"message": f"error: {str(e)}",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def check_minio_alive():
|
||||||
|
start_time = timer()
|
||||||
|
try:
|
||||||
|
response = requests.get(f'http://{rag_settings.MINIO["host"]}/minio/health/live')
|
||||||
|
if response.status_code == 200:
|
||||||
|
return {"status": "alive", "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||||
|
else:
|
||||||
|
return {"status": "timeout", "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
"status": "timeout",
|
||||||
|
"message": f"error: {str(e)}",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_redis_info():
|
||||||
|
try:
|
||||||
|
return {
|
||||||
|
"status": "alive",
|
||||||
|
"message": REDIS_CONN.info()
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
"status": "timeout",
|
||||||
|
"message": f"error: {str(e)}",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def check_ragflow_server_alive():
|
||||||
|
start_time = timer()
|
||||||
|
try:
|
||||||
|
response = requests.get(f'http://{settings.HOST_IP}:{settings.HOST_PORT}/v1/system/ping')
|
||||||
|
if response.status_code == 200:
|
||||||
|
return {"status": "alive", "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||||
|
else:
|
||||||
|
return {"status": "timeout", "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
"status": "timeout",
|
||||||
|
"message": f"error: {str(e)}",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def run_health_checks() -> tuple[dict, bool]:
|
def run_health_checks() -> tuple[dict, bool]:
|
||||||
@ -99,9 +192,7 @@ def run_health_checks() -> tuple[dict, bool]:
|
|||||||
except Exception:
|
except Exception:
|
||||||
result["storage"] = "nok"
|
result["storage"] = "nok"
|
||||||
|
|
||||||
|
all_ok = (result.get("db") == "ok") and (result.get("redis") == "ok") and (result.get("doc_engine") == "ok") and (
|
||||||
all_ok = (result.get("db") == "ok") and (result.get("redis") == "ok") and (result.get("doc_engine") == "ok") and (result.get("storage") == "ok")
|
result.get("storage") == "ok")
|
||||||
result["status"] = "ok" if all_ok else "nok"
|
result["status"] = "ok" if all_ok else "nok"
|
||||||
return result, all_ok
|
return result, all_ok
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -24,6 +24,7 @@ from urllib.parse import urlparse
|
|||||||
from api.apps import smtp_mail_server
|
from api.apps import smtp_mail_server
|
||||||
from flask_mail import Message
|
from flask_mail import Message
|
||||||
from flask import render_template_string
|
from flask import render_template_string
|
||||||
|
from api.utils.email_templates import EMAIL_TEMPLATES
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.common.exceptions import TimeoutException
|
from selenium.common.exceptions import TimeoutException
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
@ -34,6 +35,12 @@ from selenium.webdriver.support.ui import WebDriverWait
|
|||||||
from webdriver_manager.chrome import ChromeDriverManager
|
from webdriver_manager.chrome import ChromeDriverManager
|
||||||
|
|
||||||
|
|
||||||
|
OTP_LENGTH = 8
|
||||||
|
OTP_TTL_SECONDS = 5 * 60
|
||||||
|
ATTEMPT_LIMIT = 5
|
||||||
|
ATTEMPT_LOCK_SECONDS = 30 * 60
|
||||||
|
RESEND_COOLDOWN_SECONDS = 60
|
||||||
|
|
||||||
|
|
||||||
CONTENT_TYPE_MAP = {
|
CONTENT_TYPE_MAP = {
|
||||||
# Office
|
# Office
|
||||||
@ -178,24 +185,49 @@ def get_float(req: dict, key: str, default: float | int = 10.0) -> float:
|
|||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
INVITE_EMAIL_TMPL = """
|
def send_email_html(subject: str, to_email: str, template_key: str, **context):
|
||||||
<p>Hi {{email}},</p>
|
"""Generic HTML email sender using shared templates.
|
||||||
<p>{{inviter}} has invited you to join their team (ID: {{tenant_id}}).</p>
|
template_key must exist in EMAIL_TEMPLATES.
|
||||||
<p>Click the link below to complete your registration:<br>
|
"""
|
||||||
<a href="{{invite_url}}">{{invite_url}}</a></p>
|
from api.apps import app
|
||||||
<p>If you did not request this, please ignore this email.</p>
|
tmpl = EMAIL_TEMPLATES.get(template_key)
|
||||||
"""
|
if not tmpl:
|
||||||
|
raise ValueError(f"Unknown email template: {template_key}")
|
||||||
|
with app.app_context():
|
||||||
|
msg = Message(subject=subject, recipients=[to_email])
|
||||||
|
msg.html = render_template_string(tmpl, **context)
|
||||||
|
smtp_mail_server.send(msg)
|
||||||
|
|
||||||
|
|
||||||
def send_invite_email(to_email, invite_url, tenant_id, inviter):
|
def send_invite_email(to_email, invite_url, tenant_id, inviter):
|
||||||
from api.apps import app
|
# Reuse the generic HTML sender with 'invite' template
|
||||||
with app.app_context():
|
send_email_html(
|
||||||
msg = Message(subject="RAGFlow Invitation",
|
subject="RAGFlow Invitation",
|
||||||
recipients=[to_email])
|
to_email=to_email,
|
||||||
msg.html = render_template_string(
|
template_key="invite",
|
||||||
INVITE_EMAIL_TMPL,
|
email=to_email,
|
||||||
email=to_email,
|
invite_url=invite_url,
|
||||||
invite_url=invite_url,
|
tenant_id=tenant_id,
|
||||||
tenant_id=tenant_id,
|
inviter=inviter,
|
||||||
inviter=inviter,
|
)
|
||||||
)
|
|
||||||
smtp_mail_server.send(msg)
|
|
||||||
|
def otp_keys(email: str):
|
||||||
|
email = (email or "").strip().lower()
|
||||||
|
return (
|
||||||
|
f"otp:{email}",
|
||||||
|
f"otp_attempts:{email}",
|
||||||
|
f"otp_last_sent:{email}",
|
||||||
|
f"otp_lock:{email}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def hash_code(code: str, salt: bytes) -> str:
|
||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
|
return hmac.new(salt, (code or "").encode("utf-8"), hashlib.sha256).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def captcha_key(email: str) -> str:
|
||||||
|
return f"captcha:{email}"
|
||||||
|
|
||||||
|
|||||||
@ -31,7 +31,6 @@
|
|||||||
"entities_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
"entities_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||||
"pagerank_fea": {"type": "integer", "default": 0},
|
"pagerank_fea": {"type": "integer", "default": 0},
|
||||||
"tag_feas": {"type": "varchar", "default": "", "analyzer": "rankfeatures"},
|
"tag_feas": {"type": "varchar", "default": "", "analyzer": "rankfeatures"},
|
||||||
|
|
||||||
"from_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
"from_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||||
"to_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
"to_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||||
"entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
"entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||||
@ -39,6 +38,6 @@
|
|||||||
"source_id": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
"source_id": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||||
"n_hop_with_weight": {"type": "varchar", "default": ""},
|
"n_hop_with_weight": {"type": "varchar", "default": ""},
|
||||||
"removed_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
"removed_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||||
|
"doc_type_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||||
"doc_type_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}
|
"toc_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -803,6 +803,12 @@
|
|||||||
"tags": "TEXT EMBEDDING",
|
"tags": "TEXT EMBEDDING",
|
||||||
"max_tokens": 512,
|
"max_tokens": 512,
|
||||||
"model_type": "embedding"
|
"model_type": "embedding"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "glm-asr",
|
||||||
|
"tags": "SPEECH2TEXT",
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"model_type": "speech2text"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -965,31 +971,9 @@
|
|||||||
{
|
{
|
||||||
"name": "VolcEngine",
|
"name": "VolcEngine",
|
||||||
"logo": "",
|
"logo": "",
|
||||||
"tags": "LLM, TEXT EMBEDDING",
|
"tags": "LLM, TEXT EMBEDDING, IMAGE2TEXT",
|
||||||
"status": "1",
|
"status": "1",
|
||||||
"llm": [
|
"llm": []
|
||||||
{
|
|
||||||
"llm_name": "Doubao-pro-128k",
|
|
||||||
"tags": "LLM,CHAT,128k",
|
|
||||||
"max_tokens": 131072,
|
|
||||||
"model_type": "chat",
|
|
||||||
"is_tools": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"llm_name": "Doubao-pro-32k",
|
|
||||||
"tags": "LLM,CHAT,32k",
|
|
||||||
"max_tokens": 32768,
|
|
||||||
"model_type": "chat",
|
|
||||||
"is_tools": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"llm_name": "Doubao-pro-4k",
|
|
||||||
"tags": "LLM,CHAT,4k",
|
|
||||||
"max_tokens": 4096,
|
|
||||||
"model_type": "chat",
|
|
||||||
"is_tools": true
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "BaiChuan",
|
"name": "BaiChuan",
|
||||||
@ -1361,35 +1345,35 @@
|
|||||||
"llm_name": "gemini-2.5-flash",
|
"llm_name": "gemini-2.5-flash",
|
||||||
"tags": "LLM,CHAT,1024K,IMAGE2TEXT",
|
"tags": "LLM,CHAT,1024K,IMAGE2TEXT",
|
||||||
"max_tokens": 1048576,
|
"max_tokens": 1048576,
|
||||||
"model_type": "chat",
|
"model_type": "image2text",
|
||||||
"is_tools": true
|
"is_tools": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"llm_name": "gemini-2.5-pro",
|
"llm_name": "gemini-2.5-pro",
|
||||||
"tags": "LLM,CHAT,IMAGE2TEXT,1024K",
|
"tags": "LLM,CHAT,IMAGE2TEXT,1024K",
|
||||||
"max_tokens": 1048576,
|
"max_tokens": 1048576,
|
||||||
"model_type": "chat",
|
"model_type": "image2text",
|
||||||
"is_tools": true
|
"is_tools": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"llm_name": "gemini-2.5-flash-lite",
|
"llm_name": "gemini-2.5-flash-lite",
|
||||||
"tags": "LLM,CHAT,1024K,IMAGE2TEXT",
|
"tags": "LLM,CHAT,1024K,IMAGE2TEXT",
|
||||||
"max_tokens": 1048576,
|
"max_tokens": 1048576,
|
||||||
"model_type": "chat",
|
"model_type": "image2text",
|
||||||
"is_tools": true
|
"is_tools": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"llm_name": "gemini-2.0-flash",
|
"llm_name": "gemini-2.0-flash",
|
||||||
"tags": "LLM,CHAT,1024K",
|
"tags": "LLM,CHAT,1024K",
|
||||||
"max_tokens": 1048576,
|
"max_tokens": 1048576,
|
||||||
"model_type": "chat",
|
"model_type": "image2text",
|
||||||
"is_tools": true
|
"is_tools": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"llm_name": "gemini-2.0-flash-lite",
|
"llm_name": "gemini-2.0-flash-lite",
|
||||||
"tags": "LLM,CHAT,1024K",
|
"tags": "LLM,CHAT,1024K",
|
||||||
"max_tokens": 1048576,
|
"max_tokens": 1048576,
|
||||||
"model_type": "chat",
|
"model_type": "image2text",
|
||||||
"is_tools": true
|
"is_tools": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -2816,6 +2800,13 @@
|
|||||||
"tags": "LLM,TEXT EMBEDDING,TEXT RE-RANK,IMAGE2TEXT",
|
"tags": "LLM,TEXT EMBEDDING,TEXT RE-RANK,IMAGE2TEXT",
|
||||||
"status": "1",
|
"status": "1",
|
||||||
"llm": [
|
"llm": [
|
||||||
|
{
|
||||||
|
"llm_name":"THUDM/GLM-4.1V-9B-Thinking",
|
||||||
|
"tags":"LLM,CHAT,IMAGE2TEXT, 64k",
|
||||||
|
"max_tokens":64000,
|
||||||
|
"model_type":"chat",
|
||||||
|
"is_tools": false
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"llm_name": "Qwen/Qwen3-Embedding-8B",
|
"llm_name": "Qwen/Qwen3-Embedding-8B",
|
||||||
"tags": "TEXT EMBEDDING,TEXT RE-RANK,32k",
|
"tags": "TEXT EMBEDDING,TEXT RE-RANK,32k",
|
||||||
@ -2996,7 +2987,7 @@
|
|||||||
"tags": "LLM,CHAT,IMAGE2TEXT,32k",
|
"tags": "LLM,CHAT,IMAGE2TEXT,32k",
|
||||||
"max_tokens": 32000,
|
"max_tokens": 32000,
|
||||||
"model_type": "image2text",
|
"model_type": "image2text",
|
||||||
"is_tools": true
|
"is_tools": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"llm_name": "THUDM/GLM-Z1-32B-0414",
|
"llm_name": "THUDM/GLM-Z1-32B-0414",
|
||||||
@ -3145,13 +3136,6 @@
|
|||||||
"model_type": "chat",
|
"model_type": "chat",
|
||||||
"is_tools": true
|
"is_tools": true
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"llm_name": "Qwen/Qwen2-1.5B-Instruct",
|
|
||||||
"tags": "LLM,CHAT,32k",
|
|
||||||
"max_tokens": 32000,
|
|
||||||
"model_type": "chat",
|
|
||||||
"is_tools": true
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"llm_name": "Pro/Qwen/Qwen2.5-Coder-7B-Instruct",
|
"llm_name": "Pro/Qwen/Qwen2.5-Coder-7B-Instruct",
|
||||||
"tags": "LLM,CHAT,32k",
|
"tags": "LLM,CHAT,32k",
|
||||||
@ -3159,13 +3143,6 @@
|
|||||||
"model_type": "chat",
|
"model_type": "chat",
|
||||||
"is_tools": false
|
"is_tools": false
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"llm_name": "Pro/Qwen/Qwen2-VL-7B-Instruct",
|
|
||||||
"tags": "LLM,CHAT,IMAGE2TEXT,32k",
|
|
||||||
"max_tokens": 32000,
|
|
||||||
"model_type": "image2text",
|
|
||||||
"is_tools": false
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"llm_name": "Pro/Qwen/Qwen2.5-7B-Instruct",
|
"llm_name": "Pro/Qwen/Qwen2.5-7B-Instruct",
|
||||||
"tags": "LLM,CHAT,32k",
|
"tags": "LLM,CHAT,32k",
|
||||||
@ -3533,6 +3510,13 @@
|
|||||||
"model_type": "chat",
|
"model_type": "chat",
|
||||||
"is_tools": true
|
"is_tools": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "claude-sonnet-4-5-20250929",
|
||||||
|
"tags": "LLM,CHAT,IMAGE2TEXT,200k",
|
||||||
|
"max_tokens": 204800,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"llm_name": "claude-sonnet-4-20250514",
|
"llm_name": "claude-sonnet-4-20250514",
|
||||||
"tags": "LLM,CHAT,IMAGE2TEXT,200k",
|
"tags": "LLM,CHAT,IMAGE2TEXT,200k",
|
||||||
@ -4862,8 +4846,282 @@
|
|||||||
"max_tokens": 8000,
|
"max_tokens": 8000,
|
||||||
"model_type": "chat",
|
"model_type": "chat",
|
||||||
"is_tools": true
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "LongCat-Flash-Thinking",
|
||||||
|
"tags": "LLM,CHAT,8000",
|
||||||
|
"max_tokens": 8000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "DeerAPI",
|
||||||
|
"logo": "",
|
||||||
|
"tags": "LLM,TEXT EMBEDDING,IMAGE2TEXT",
|
||||||
|
"status": "1",
|
||||||
|
"llm": [
|
||||||
|
{
|
||||||
|
"llm_name": "gpt-5-chat-latest",
|
||||||
|
"tags": "LLM,CHAT,400k",
|
||||||
|
"max_tokens": 400000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "chatgpt-4o-latest",
|
||||||
|
"tags": "LLM,CHAT,128k",
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "gpt-5-mini",
|
||||||
|
"tags": "LLM,CHAT,400k",
|
||||||
|
"max_tokens": 400000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "gpt-5-nano",
|
||||||
|
"tags": "LLM,CHAT,400k",
|
||||||
|
"max_tokens": 400000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "gpt-5",
|
||||||
|
"tags": "LLM,CHAT,400k",
|
||||||
|
"max_tokens": 400000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "gpt-4.1-mini",
|
||||||
|
"tags": "LLM,CHAT,1M",
|
||||||
|
"max_tokens": 1047576,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "gpt-4.1-nano",
|
||||||
|
"tags": "LLM,CHAT,1M",
|
||||||
|
"max_tokens": 1047576,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "gpt-4.1",
|
||||||
|
"tags": "LLM,CHAT,1M",
|
||||||
|
"max_tokens": 1047576,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "gpt-4o-mini",
|
||||||
|
"tags": "LLM,CHAT,128k",
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "o4-mini-2025-04-16",
|
||||||
|
"tags": "LLM,CHAT,200k",
|
||||||
|
"max_tokens": 200000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "o3-pro-2025-06-10",
|
||||||
|
"tags": "LLM,CHAT,200k",
|
||||||
|
"max_tokens": 200000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "claude-opus-4-1-20250805",
|
||||||
|
"tags": "LLM,CHAT,200k,IMAGE2TEXT",
|
||||||
|
"max_tokens": 200000,
|
||||||
|
"model_type": "image2text",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "claude-opus-4-1-20250805-thinking",
|
||||||
|
"tags": "LLM,CHAT,200k,IMAGE2TEXT",
|
||||||
|
"max_tokens": 200000,
|
||||||
|
"model_type": "image2text",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "claude-sonnet-4-20250514",
|
||||||
|
"tags": "LLM,CHAT,200k,IMAGE2TEXT",
|
||||||
|
"max_tokens": 200000,
|
||||||
|
"model_type": "image2text",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "claude-sonnet-4-20250514-thinking",
|
||||||
|
"tags": "LLM,CHAT,200k,IMAGE2TEXT",
|
||||||
|
"max_tokens": 200000,
|
||||||
|
"model_type": "image2text",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "claude-3-7-sonnet-latest",
|
||||||
|
"tags": "LLM,CHAT,200k",
|
||||||
|
"max_tokens": 200000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "claude-3-5-haiku-latest",
|
||||||
|
"tags": "LLM,CHAT,200k",
|
||||||
|
"max_tokens": 200000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "gemini-2.5-pro",
|
||||||
|
"tags": "LLM,CHAT,1M,IMAGE2TEXT",
|
||||||
|
"max_tokens": 1000000,
|
||||||
|
"model_type": "image2text",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "gemini-2.5-flash",
|
||||||
|
"tags": "LLM,CHAT,1M,IMAGE2TEXT",
|
||||||
|
"max_tokens": 1000000,
|
||||||
|
"model_type": "image2text",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "gemini-2.5-flash-lite",
|
||||||
|
"tags": "LLM,CHAT,1M,IMAGE2TEXT",
|
||||||
|
"max_tokens": 1000000,
|
||||||
|
"model_type": "image2text",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "gemini-2.0-flash",
|
||||||
|
"tags": "LLM,CHAT,1M,IMAGE2TEXT",
|
||||||
|
"max_tokens": 1000000,
|
||||||
|
"model_type": "image2text",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "grok-4-0709",
|
||||||
|
"tags": "LLM,CHAT,131k",
|
||||||
|
"max_tokens": 131072,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "grok-3",
|
||||||
|
"tags": "LLM,CHAT,131k",
|
||||||
|
"max_tokens": 131072,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "grok-3-mini",
|
||||||
|
"tags": "LLM,CHAT,131k",
|
||||||
|
"max_tokens": 131072,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "grok-2-image-1212",
|
||||||
|
"tags": "LLM,CHAT,32k,IMAGE2TEXT",
|
||||||
|
"max_tokens": 32768,
|
||||||
|
"model_type": "image2text",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "deepseek-v3.1",
|
||||||
|
"tags": "LLM,CHAT,64k",
|
||||||
|
"max_tokens": 64000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "deepseek-v3",
|
||||||
|
"tags": "LLM,CHAT,64k",
|
||||||
|
"max_tokens": 64000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "deepseek-r1-0528",
|
||||||
|
"tags": "LLM,CHAT,164k",
|
||||||
|
"max_tokens": 164000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "deepseek-chat",
|
||||||
|
"tags": "LLM,CHAT,32k",
|
||||||
|
"max_tokens": 32000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "deepseek-reasoner",
|
||||||
|
"tags": "LLM,CHAT,64k",
|
||||||
|
"max_tokens": 64000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "qwen3-30b-a3b",
|
||||||
|
"tags": "LLM,CHAT,128k",
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "qwen3-coder-plus-2025-07-22",
|
||||||
|
"tags": "LLM,CHAT,128k",
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"model_type": "chat",
|
||||||
|
"is_tools": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "text-embedding-ada-002",
|
||||||
|
"tags": "TEXT EMBEDDING,8K",
|
||||||
|
"max_tokens": 8191,
|
||||||
|
"model_type": "embedding",
|
||||||
|
"is_tools": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "text-embedding-3-small",
|
||||||
|
"tags": "TEXT EMBEDDING,8K",
|
||||||
|
"max_tokens": 8191,
|
||||||
|
"model_type": "embedding",
|
||||||
|
"is_tools": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "text-embedding-3-large",
|
||||||
|
"tags": "TEXT EMBEDDING,8K",
|
||||||
|
"max_tokens": 8191,
|
||||||
|
"model_type": "embedding",
|
||||||
|
"is_tools": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "whisper-1",
|
||||||
|
"tags": "SPEECH2TEXT",
|
||||||
|
"max_tokens": 26214400,
|
||||||
|
"model_type": "speech2text",
|
||||||
|
"is_tools": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "tts-1",
|
||||||
|
"tags": "TTS",
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"model_type": "tts",
|
||||||
|
"is_tools": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user