Use Infinity single-field-multi-index (#11444)

### What problem does this PR solve?

Use Infinity single-field-multi-index

### Type of change

- [x] Refactoring
- [x] Performance Improvement
This commit is contained in:
Zhichang Yu
2025-11-26 11:06:37 +08:00
committed by GitHub
parent a28c672695
commit 40e84ca41a
22 changed files with 577 additions and 140 deletions

View File

@ -193,7 +193,7 @@ jobs:
echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> ${GITHUB_ENV} echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> ${GITHUB_ENV}
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} up -d sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} up -d
uv sync --python 3.10 --only-group test --no-default-groups --frozen && uv pip install sdk/python uv sync --python 3.10 --only-group test --no-default-groups --frozen && uv pip install sdk/python --group test
- name: Run sdk tests against Elasticsearch - name: Run sdk tests against Elasticsearch
run: | run: |

View File

@ -8,7 +8,7 @@ readme = "README.md"
requires-python = ">=3.10,<3.13" requires-python = ">=3.10,<3.13"
dependencies = [ dependencies = [
"requests>=2.30.0,<3.0.0", "requests>=2.30.0,<3.0.0",
"beartype>=0.18.5,<0.19.0", "beartype>=0.20.0,<1.0.0",
"pycryptodomex>=3.10.0", "pycryptodomex>=3.10.0",
"lark>=1.1.0", "lark>=1.1.0",
] ]

298
admin/client/uv.lock generated Normal file
View File

@ -0,0 +1,298 @@
version = 1
revision = 3
requires-python = ">=3.10, <3.13"
[[package]]
name = "beartype"
version = "0.22.6"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/88/e2/105ceb1704cb80fe4ab3872529ab7b6f365cf7c74f725e6132d0efcf1560/beartype-0.22.6.tar.gz", hash = "sha256:97fbda69c20b48c5780ac2ca60ce3c1bb9af29b3a1a0216898ffabdd523e48f4", size = 1588975, upload-time = "2025-11-20T04:47:14.736Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/c9/ceecc71fe2c9495a1d8e08d44f5f31f5bca1350d5b2e27a4b6265424f59e/beartype-0.22.6-py3-none-any.whl", hash = "sha256:0584bc46a2ea2a871509679278cda992eadde676c01356ab0ac77421f3c9a093", size = 1324807, upload-time = "2025-11-20T04:47:11.837Z" },
]
[[package]]
name = "certifi"
version = "2025.11.12"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" },
]
[[package]]
name = "charset-normalizer"
version = "3.4.4"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/b8/6d51fc1d52cbd52cd4ccedd5b5b2f0f6a11bbf6765c782298b0f3e808541/charset_normalizer-3.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d", size = 209709, upload-time = "2025-10-14T04:40:11.385Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/af/1f9d7f7faafe2ddfb6f72a2e07a548a629c61ad510fe60f9630309908fef/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8", size = 148814, upload-time = "2025-10-14T04:40:13.135Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/79/3d/f2e3ac2bbc056ca0c204298ea4e3d9db9b4afe437812638759db2c976b5f/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad", size = 144467, upload-time = "2025-10-14T04:40:14.728Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/85/1bf997003815e60d57de7bd972c57dc6950446a3e4ccac43bc3070721856/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8", size = 162280, upload-time = "2025-10-14T04:40:16.14Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3e/8e/6aa1952f56b192f54921c436b87f2aaf7c7a7c3d0d1a765547d64fd83c13/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d", size = 159454, upload-time = "2025-10-14T04:40:17.567Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/36/3b/60cbd1f8e93aa25d1c669c649b7a655b0b5fb4c571858910ea9332678558/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313", size = 153609, upload-time = "2025-10-14T04:40:19.08Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/91/6a13396948b8fd3c4b4fd5bc74d045f5637d78c9675585e8e9fbe5636554/charset_normalizer-3.4.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e", size = 151849, upload-time = "2025-10-14T04:40:20.607Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/7a/59482e28b9981d105691e968c544cc0df3b7d6133152fb3dcdc8f135da7a/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93", size = 151586, upload-time = "2025-10-14T04:40:21.719Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/59/f64ef6a1c4bdd2baf892b04cd78792ed8684fbc48d4c2afe467d96b4df57/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0", size = 145290, upload-time = "2025-10-14T04:40:23.069Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6b/63/3bf9f279ddfa641ffa1962b0db6a57a9c294361cc2f5fcac997049a00e9c/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84", size = 163663, upload-time = "2025-10-14T04:40:24.17Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/09/c9e38fc8fa9e0849b172b581fd9803bdf6e694041127933934184e19f8c3/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e", size = 151964, upload-time = "2025-10-14T04:40:25.368Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/d1/d28b747e512d0da79d8b6a1ac18b7ab2ecfd81b2944c4c710e166d8dd09c/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db", size = 161064, upload-time = "2025-10-14T04:40:26.806Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/bb/9a/31d62b611d901c3b9e5500c36aab0ff5eb442043fb3a1c254200d3d397d9/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6", size = 155015, upload-time = "2025-10-14T04:40:28.284Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/f3/107e008fa2bff0c8b9319584174418e5e5285fef32f79d8ee6a430d0039c/charset_normalizer-3.4.4-cp310-cp310-win32.whl", hash = "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f", size = 99792, upload-time = "2025-10-14T04:40:29.613Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/66/e396e8a408843337d7315bab30dbf106c38966f1819f123257f5520f8a96/charset_normalizer-3.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d", size = 107198, upload-time = "2025-10-14T04:40:30.644Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/58/01b4f815bf0312704c267f2ccb6e5d42bcc7752340cd487bc9f8c3710597/charset_normalizer-3.4.4-cp310-cp310-win_arm64.whl", hash = "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69", size = 100262, upload-time = "2025-10-14T04:40:32.108Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/27/c6491ff4954e58a10f69ad90aca8a1b6fe9c5d3c6f380907af3c37435b59/charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", size = 206988, upload-time = "2025-10-14T04:40:33.79Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/59/2e87300fe67ab820b5428580a53cad894272dbb97f38a7a814a2a1ac1011/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", size = 147324, upload-time = "2025-10-14T04:40:34.961Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/fb/0cf61dc84b2b088391830f6274cb57c82e4da8bbc2efeac8c025edb88772/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", size = 142742, upload-time = "2025-10-14T04:40:36.105Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/8b/171935adf2312cd745d290ed93cf16cf0dfe320863ab7cbeeae1dcd6535f/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", size = 160863, upload-time = "2025-10-14T04:40:37.188Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/73/ad875b192bda14f2173bfc1bc9a55e009808484a4b256748d931b6948442/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", size = 157837, upload-time = "2025-10-14T04:40:38.435Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6d/fc/de9cce525b2c5b94b47c70a4b4fb19f871b24995c728e957ee68ab1671ea/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", size = 151550, upload-time = "2025-10-14T04:40:40.053Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/55/c2/43edd615fdfba8c6f2dfbd459b25a6b3b551f24ea21981e23fb768503ce1/charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", size = 149162, upload-time = "2025-10-14T04:40:41.163Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/86/bde4ad8b4d0e9429a4e82c1e8f5c659993a9a863ad62c7df05cf7b678d75/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", size = 150019, upload-time = "2025-10-14T04:40:42.276Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/86/a151eb2af293a7e7bac3a739b81072585ce36ccfb4493039f49f1d3cae8c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", size = 143310, upload-time = "2025-10-14T04:40:43.439Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/fe/43dae6144a7e07b87478fdfc4dbe9efd5defb0e7ec29f5f58a55aeef7bf7/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", size = 162022, upload-time = "2025-10-14T04:40:44.547Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/e6/7aab83774f5d2bca81f42ac58d04caf44f0cc2b65fc6db2b3b2e8a05f3b3/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", size = 149383, upload-time = "2025-10-14T04:40:46.018Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/4f/e8/b289173b4edae05c0dde07f69f8db476a0b511eac556dfe0d6bda3c43384/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", size = 159098, upload-time = "2025-10-14T04:40:47.081Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/df/fe699727754cae3f8478493c7f45f777b17c3ef0600e28abfec8619eb49c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", size = 152991, upload-time = "2025-10-14T04:40:48.246Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1a/86/584869fe4ddb6ffa3bd9f491b87a01568797fb9bd8933f557dba9771beaf/charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", size = 99456, upload-time = "2025-10-14T04:40:49.376Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/f6/62fdd5feb60530f50f7e38b4f6a1d5203f4d16ff4f9f0952962c044e919a/charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", size = 106978, upload-time = "2025-10-14T04:40:50.844Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/9d/0710916e6c82948b3be62d9d398cb4fcf4e97b56d6a6aeccd66c4b2f2bd5/charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", size = 99969, upload-time = "2025-10-14T04:40:52.272Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
]
[[package]]
name = "colorama"
version = "0.4.6"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
]
[[package]]
name = "exceptiongroup"
version = "1.3.1"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "typing-extensions" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
]
[[package]]
name = "idna"
version = "3.11"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
]
[[package]]
name = "iniconfig"
version = "2.3.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
]
[[package]]
name = "lark"
version = "1.3.1"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/34/28fff3ab31ccff1fd4f6c7c7b0ceb2b6968d8ea4950663eadcb5720591a0/lark-1.3.1.tar.gz", hash = "sha256:b426a7a6d6d53189d318f2b6236ab5d6429eaf09259f1ca33eb716eed10d2905", size = 382732, upload-time = "2025-10-27T18:25:56.653Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/3d/14ce75ef66813643812f3093ab17e46d3a206942ce7376d31ec2d36229e7/lark-1.3.1-py3-none-any.whl", hash = "sha256:c629b661023a014c37da873b4ff58a817398d12635d3bbb2c5a03be7fe5d1e12", size = 113151, upload-time = "2025-10-27T18:25:54.882Z" },
]
[[package]]
name = "packaging"
version = "25.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
]
[[package]]
name = "pluggy"
version = "1.6.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
]
[[package]]
name = "pycryptodomex"
version = "3.23.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c9/85/e24bf90972a30b0fcd16c73009add1d7d7cd9140c2498a68252028899e41/pycryptodomex-3.23.0.tar.gz", hash = "sha256:71909758f010c82bc99b0abf4ea12012c98962fbf0583c2164f8b84533c2e4da", size = 4922157, upload-time = "2025-05-17T17:23:41.434Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/dd/9c/1a8f35daa39784ed8adf93a694e7e5dc15c23c741bbda06e1d45f8979e9e/pycryptodomex-3.23.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:06698f957fe1ab229a99ba2defeeae1c09af185baa909a31a5d1f9d42b1aaed6", size = 2499240, upload-time = "2025-05-17T17:22:46.953Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/62/f5221a191a97157d240cf6643747558759126c76ee92f29a3f4aee3197a5/pycryptodomex-3.23.0-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b2c2537863eccef2d41061e82a881dcabb04944c5c06c5aa7110b577cc487545", size = 1644042, upload-time = "2025-05-17T17:22:49.098Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8c/fd/5a054543c8988d4ed7b612721d7e78a4b9bf36bc3c5ad45ef45c22d0060e/pycryptodomex-3.23.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:43c446e2ba8df8889e0e16f02211c25b4934898384c1ec1ec04d7889c0333587", size = 2186227, upload-time = "2025-05-17T17:22:51.139Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/a9/8862616a85cf450d2822dbd4fff1fcaba90877907a6ff5bc2672cafe42f8/pycryptodomex-3.23.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f489c4765093fb60e2edafdf223397bc716491b2b69fe74367b70d6999257a5c", size = 2272578, upload-time = "2025-05-17T17:22:53.676Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/9f/bda9c49a7c1842820de674ab36c79f4fbeeee03f8ff0e4f3546c3889076b/pycryptodomex-3.23.0-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bdc69d0d3d989a1029df0eed67cc5e8e5d968f3724f4519bd03e0ec68df7543c", size = 2312166, upload-time = "2025-05-17T17:22:56.585Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/cc/870b9bf8ca92866ca0186534801cf8d20554ad2a76ca959538041b7a7cf4/pycryptodomex-3.23.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6bbcb1dd0f646484939e142462d9e532482bc74475cecf9c4903d4e1cd21f003", size = 2185467, upload-time = "2025-05-17T17:22:59.237Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/96/e3/ce9348236d8e669fea5dd82a90e86be48b9c341210f44e25443162aba187/pycryptodomex-3.23.0-cp37-abi3-musllinux_1_2_i686.whl", hash = "sha256:8a4fcd42ccb04c31268d1efeecfccfd1249612b4de6374205376b8f280321744", size = 2346104, upload-time = "2025-05-17T17:23:02.112Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a5/e9/e869bcee87beb89040263c416a8a50204f7f7a83ac11897646c9e71e0daf/pycryptodomex-3.23.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:55ccbe27f049743a4caf4f4221b166560d3438d0b1e5ab929e07ae1702a4d6fd", size = 2271038, upload-time = "2025-05-17T17:23:04.872Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8d/67/09ee8500dd22614af5fbaa51a4aee6e342b5fa8aecf0a6cb9cbf52fa6d45/pycryptodomex-3.23.0-cp37-abi3-win32.whl", hash = "sha256:189afbc87f0b9f158386bf051f720e20fa6145975f1e76369303d0f31d1a8d7c", size = 1771969, upload-time = "2025-05-17T17:23:07.115Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/96/11f36f71a865dd6df03716d33bd07a67e9d20f6b8d39820470b766af323c/pycryptodomex-3.23.0-cp37-abi3-win_amd64.whl", hash = "sha256:52e5ca58c3a0b0bd5e100a9fbc8015059b05cffc6c66ce9d98b4b45e023443b9", size = 1803124, upload-time = "2025-05-17T17:23:09.267Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/93/45c1cdcbeb182ccd2e144c693eaa097763b08b38cded279f0053ed53c553/pycryptodomex-3.23.0-cp37-abi3-win_arm64.whl", hash = "sha256:02d87b80778c171445d67e23d1caef279bf4b25c3597050ccd2e13970b57fd51", size = 1707161, upload-time = "2025-05-17T17:23:11.414Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/b8/3e76d948c3c4ac71335bbe75dac53e154b40b0f8f1f022dfa295257a0c96/pycryptodomex-3.23.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ebfff755c360d674306e5891c564a274a47953562b42fb74a5c25b8fc1fb1cb5", size = 1627695, upload-time = "2025-05-17T17:23:17.38Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6a/cf/80f4297a4820dfdfd1c88cf6c4666a200f204b3488103d027b5edd9176ec/pycryptodomex-3.23.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eca54f4bb349d45afc17e3011ed4264ef1cc9e266699874cdd1349c504e64798", size = 1675772, upload-time = "2025-05-17T17:23:19.202Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d1/42/1e969ee0ad19fe3134b0e1b856c39bd0b70d47a4d0e81c2a8b05727394c9/pycryptodomex-3.23.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2596e643d4365e14d0879dc5aafe6355616c61c2176009270f3048f6d9a61f", size = 1668083, upload-time = "2025-05-17T17:23:21.867Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/c3/1de4f7631fea8a992a44ba632aa40e0008764c0fb9bf2854b0acf78c2cf2/pycryptodomex-3.23.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fdfac7cda115bca3a5abb2f9e43bc2fb66c2b65ab074913643803ca7083a79ea", size = 1706056, upload-time = "2025-05-17T17:23:24.031Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/5f/af7da8e6f1e42b52f44a24d08b8e4c726207434e2593732d39e7af5e7256/pycryptodomex-3.23.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:14c37aaece158d0ace436f76a7bb19093db3b4deade9797abfc39ec6cd6cc2fe", size = 1806478, upload-time = "2025-05-17T17:23:26.066Z" },
]
[[package]]
name = "pygments"
version = "2.19.2"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
]
[[package]]
name = "pytest"
version = "9.0.1"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "exceptiongroup", marker = "python_full_version < '3.11'" },
{ name = "iniconfig" },
{ name = "packaging" },
{ name = "pluggy" },
{ name = "pygments" },
{ name = "tomli", marker = "python_full_version < '3.11'" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/56/f013048ac4bc4c1d9be45afd4ab209ea62822fb1598f40687e6bf45dcea4/pytest-9.0.1.tar.gz", hash = "sha256:3e9c069ea73583e255c3b21cf46b8d3c56f6e3a1a8f6da94ccb0fcf57b9d73c8", size = 1564125, upload-time = "2025-11-12T13:05:09.333Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/8b/6300fb80f858cda1c51ffa17075df5d846757081d11ab4aa35cef9e6258b/pytest-9.0.1-py3-none-any.whl", hash = "sha256:67be0030d194df2dfa7b556f2e56fb3c3315bd5c8822c6951162b92b32ce7dad", size = 373668, upload-time = "2025-11-12T13:05:07.379Z" },
]
[[package]]
name = "ragflow-cli"
version = "0.22.1"
source = { virtual = "." }
dependencies = [
{ name = "beartype" },
{ name = "lark" },
{ name = "pycryptodomex" },
{ name = "requests" },
]
[package.dev-dependencies]
test = [
{ name = "pytest" },
{ name = "requests" },
{ name = "requests-toolbelt" },
]
[package.metadata]
requires-dist = [
{ name = "beartype", specifier = ">=0.20.0,<1.0.0" },
{ name = "lark", specifier = ">=1.1.0" },
{ name = "pycryptodomex", specifier = ">=3.10.0" },
{ name = "requests", specifier = ">=2.30.0,<3.0.0" },
]
[package.metadata.requires-dev]
test = [
{ name = "pytest", specifier = ">=8.3.5" },
{ name = "requests", specifier = ">=2.32.3" },
{ name = "requests-toolbelt", specifier = ">=1.0.0" },
]
[[package]]
name = "requests"
version = "2.32.5"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "certifi" },
{ name = "charset-normalizer" },
{ name = "idna" },
{ name = "urllib3" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
]
[[package]]
name = "requests-toolbelt"
version = "1.0.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "requests" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888, upload-time = "2023-05-01T04:11:33.229Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" },
]
[[package]]
name = "tomli"
version = "2.3.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/ed/3f73f72945444548f33eba9a87fc7a6e969915e7b1acc8260b30e1f76a2f/tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549", size = 17392, upload-time = "2025-10-08T22:01:47.119Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/2e/299f62b401438d5fe1624119c723f5d877acc86a4c2492da405626665f12/tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45", size = 153236, upload-time = "2025-10-08T22:01:00.137Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/7f/d8fffe6a7aefdb61bced88fcb5e280cfd71e08939da5894161bd71bea022/tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba", size = 148084, upload-time = "2025-10-08T22:01:01.63Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/5c/24935fb6a2ee63e86d80e4d3b58b222dafaf438c416752c8b58537c8b89a/tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf", size = 234832, upload-time = "2025-10-08T22:01:02.543Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/89/da/75dfd804fc11e6612846758a23f13271b76d577e299592b4371a4ca4cd09/tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441", size = 242052, upload-time = "2025-10-08T22:01:03.836Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/8c/f48ac899f7b3ca7eb13af73bacbc93aec37f9c954df3c08ad96991c8c373/tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845", size = 239555, upload-time = "2025-10-08T22:01:04.834Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/28/72f8afd73f1d0e7829bfc093f4cb98ce0a40ffc0cc997009ee1ed94ba705/tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c", size = 245128, upload-time = "2025-10-08T22:01:05.84Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/eb/a7679c8ac85208706d27436e8d421dfa39d4c914dcf5fa8083a9305f58d9/tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456", size = 96445, upload-time = "2025-10-08T22:01:06.896Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0a/fe/3d3420c4cb1ad9cb462fb52967080575f15898da97e21cb6f1361d505383/tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be", size = 107165, upload-time = "2025-10-08T22:01:08.107Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/b7/40f36368fcabc518bb11c8f06379a0fd631985046c038aca08c6d6a43c6e/tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac", size = 154891, upload-time = "2025-10-08T22:01:09.082Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/3f/d9dd692199e3b3aab2e4e4dd948abd0f790d9ded8cd10cbaae276a898434/tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22", size = 148796, upload-time = "2025-10-08T22:01:10.266Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/83/59bff4996c2cf9f9387a0f5a3394629c7efa5ef16142076a23a90f1955fa/tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f", size = 242121, upload-time = "2025-10-08T22:01:11.332Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/45/e5/7c5119ff39de8693d6baab6c0b6dcb556d192c165596e9fc231ea1052041/tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52", size = 250070, upload-time = "2025-10-08T22:01:12.498Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/45/12/ad5126d3a278f27e6701abde51d342aa78d06e27ce2bb596a01f7709a5a2/tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8", size = 245859, upload-time = "2025-10-08T22:01:13.551Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/a1/4d6865da6a71c603cfe6ad0e6556c73c76548557a8d658f9e3b142df245f/tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6", size = 250296, upload-time = "2025-10-08T22:01:14.614Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/b7/a7a7042715d55c9ba6e8b196d65d2cb662578b4d8cd17d882d45322b0d78/tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876", size = 97124, upload-time = "2025-10-08T22:01:15.629Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/06/1e/f22f100db15a68b520664eb3328fb0ae4e90530887928558112c8d1f4515/tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878", size = 107698, upload-time = "2025-10-08T22:01:16.51Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/b8/0135fadc89e73be292b473cb820b4f5a08197779206b33191e801feeae40/tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b", size = 14408, upload-time = "2025-10-08T22:01:46.04Z" },
]
[[package]]
name = "typing-extensions"
version = "4.15.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
]
[[package]]
name = "urllib3"
version = "2.5.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
]

View File

@ -20,6 +20,7 @@ import logging
import time import time
import threading import threading
import traceback import traceback
import faulthandler
from flask import Flask from flask import Flask
from flask_login import LoginManager from flask_login import LoginManager
@ -37,6 +38,7 @@ from common.versions import get_ragflow_version
stop_event = threading.Event() stop_event = threading.Event()
if __name__ == '__main__': if __name__ == '__main__':
faulthandler.enable()
init_root_logger("admin_service") init_root_logger("admin_service")
logging.info(r""" logging.info(r"""
____ ___ ______________ ___ __ _ ____ ___ ______________ ___ __ _

View File

@ -1289,7 +1289,7 @@ async def update_chunk(tenant_id, dataset_id, document_id, chunk_id):
return get_error_data_result(message=f"You don't own the document {document_id}.") return get_error_data_result(message=f"You don't own the document {document_id}.")
doc = doc[0] doc = doc[0]
req = await request_json() req = await request_json()
if "content" in req: if "content" in req and req["content"] is not None:
content = req["content"] content = req["content"]
else: else:
content = chunk.get("content_with_weight", "") content = chunk.get("content_with_weight", "")

View File

@ -20,7 +20,6 @@
from common.log_utils import init_root_logger from common.log_utils import init_root_logger
from plugin import GlobalPluginManager from plugin import GlobalPluginManager
init_root_logger("ragflow_server")
import logging import logging
import os import os
@ -30,6 +29,7 @@ import time
import traceback import traceback
import threading import threading
import uuid import uuid
import faulthandler
from api.apps import app, smtp_mail_server from api.apps import app, smtp_mail_server
from api.db.runtime_config import RuntimeConfig from api.db.runtime_config import RuntimeConfig
@ -73,6 +73,8 @@ def signal_handler(sig, frame):
sys.exit(0) sys.exit(0)
if __name__ == '__main__': if __name__ == '__main__':
faulthandler.enable()
init_root_logger("ragflow_server")
logging.info(r""" logging.info(r"""
____ ___ ______ ______ __ ____ ___ ______ ______ __
/ __ \ / | / ____// ____// /____ _ __ / __ \ / | / ____// ____// /____ _ __

View File

@ -89,7 +89,8 @@ def get_data_error_result(code=RetCode.DATA_ERROR, message="Sorry! Data missing!
def server_error_response(e): def server_error_response(e):
logging.exception(e) # Quart invokes this handler outside the original except block, so we must pass exc_info manually.
logging.error("Unhandled exception during request", exc_info=(type(e), e, e.__traceback__))
try: try:
msg = repr(e).lower() msg = repr(e).lower()
if getattr(e, "code", None) == 401 or ("unauthorized" in msg) or ("401" in msg): if getattr(e, "code", None) == 401 or ("unauthorized" in msg) or ("401" in msg):

View File

@ -74,6 +74,8 @@ GITHUB_OAUTH = None
FEISHU_OAUTH = None FEISHU_OAUTH = None
OAUTH_CONFIG = None OAUTH_CONFIG = None
DOC_ENGINE = os.getenv('DOC_ENGINE', 'elasticsearch') DOC_ENGINE = os.getenv('DOC_ENGINE', 'elasticsearch')
DOC_ENGINE_INFINITY = (DOC_ENGINE.lower() == "infinity")
docStoreConn = None docStoreConn = None
@ -229,9 +231,9 @@ def init_settings():
FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu") FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu")
OAUTH_CONFIG = get_base_config("oauth", {}) OAUTH_CONFIG = get_base_config("oauth", {})
global DOC_ENGINE, docStoreConn, ES, OB, OS, INFINITY global DOC_ENGINE, DOC_ENGINE_INFINITY, docStoreConn, ES, OB, OS, INFINITY
DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch") DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch")
# DOC_ENGINE = os.environ.get('DOC_ENGINE', "opensearch") DOC_ENGINE_INFINITY = (DOC_ENGINE.lower() == "infinity")
lower_case_doc_engine = DOC_ENGINE.lower() lower_case_doc_engine = DOC_ENGINE.lower()
if lower_case_doc_engine == "elasticsearch": if lower_case_doc_engine == "elasticsearch":
ES = get_base_config("es", {}) ES = get_base_config("es", {})

View File

@ -5,20 +5,13 @@
"create_time": {"type": "varchar", "default": ""}, "create_time": {"type": "varchar", "default": ""},
"create_timestamp_flt": {"type": "float", "default": 0.0}, "create_timestamp_flt": {"type": "float", "default": 0.0},
"img_id": {"type": "varchar", "default": ""}, "img_id": {"type": "varchar", "default": ""},
"docnm_kwd": {"type": "varchar", "default": ""}, "docnm": {"type": "varchar", "default": "", "analyzer": ["rag-coarse", "rag-fine"], "comment": "docnm_kwd, title_tks, title_sm_tks"},
"title_tks": {"type": "varchar", "default": "", "analyzer": "whitespace"},
"title_sm_tks": {"type": "varchar", "default": "", "analyzer": "whitespace"},
"name_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}, "name_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
"important_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
"tag_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}, "tag_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
"important_tks": {"type": "varchar", "default": "", "analyzer": "whitespace"}, "important_keywords": {"type": "varchar", "default": "", "analyzer": ["rag-coarse", "rag-fine"], "comment": "important_kwd, important_tks"},
"question_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}, "questions": {"type": "varchar", "default": "", "analyzer": ["rag-coarse", "rag-fine"], "comment": "question_kwd, question_tks"},
"question_tks": {"type": "varchar", "default": "", "analyzer": "whitespace"}, "content": {"type": "varchar", "default": "", "analyzer": ["rag-coarse", "rag-fine"], "comment": "content_with_weight, content_ltks, content_sm_ltks"},
"content_with_weight": {"type": "varchar", "default": ""}, "authors": {"type": "varchar", "default": "", "analyzer": ["rag-coarse", "rag-fine"], "comment": "authors_tks, authors_sm_tks"},
"content_ltks": {"type": "varchar", "default": "", "analyzer": "whitespace"},
"content_sm_ltks": {"type": "varchar", "default": "", "analyzer": "whitespace"},
"authors_tks": {"type": "varchar", "default": "", "analyzer": "whitespace"},
"authors_sm_tks": {"type": "varchar", "default": "", "analyzer": "whitespace"},
"page_num_int": {"type": "varchar", "default": ""}, "page_num_int": {"type": "varchar", "default": ""},
"top_int": {"type": "varchar", "default": ""}, "top_int": {"type": "varchar", "default": ""},
"position_int": {"type": "varchar", "default": ""}, "position_int": {"type": "varchar", "default": ""},

View File

@ -54,4 +54,3 @@ memindex_memory_quota = "1GB"
wal_dir = "/var/infinity/wal" wal_dir = "/var/infinity/wal"
[resource] [resource]
resource_dir = "/var/infinity/resource"

View File

@ -151,6 +151,7 @@ dependencies = [
"moodlepy>=0.23.0", "moodlepy>=0.23.0",
"pypandoc>=1.16", "pypandoc>=1.16",
"pyobvector==0.2.18", "pyobvector==0.2.18",
"exceptiongroup>=1.3.0,<2.0.0"
] ]
[dependency-groups] [dependency-groups]

View File

@ -26,6 +26,7 @@ from hanziconv import HanziConv
from nltk import word_tokenize from nltk import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer from nltk.stem import PorterStemmer, WordNetLemmatizer
from common.file_utils import get_project_base_directory from common.file_utils import get_project_base_directory
from common import settings
class RagTokenizer: class RagTokenizer:
@ -38,7 +39,7 @@ class RagTokenizer:
def _load_dict(self, fnm): def _load_dict(self, fnm):
logging.info(f"[HUQIE]:Build trie from {fnm}") logging.info(f"[HUQIE]:Build trie from {fnm}")
try: try:
of = open(fnm, "r", encoding='utf-8') of = open(fnm, "r", encoding="utf-8")
while True: while True:
line = of.readline() line = of.readline()
if not line: if not line:
@ -46,7 +47,7 @@ class RagTokenizer:
line = re.sub(r"[\r\n]+", "", line) line = re.sub(r"[\r\n]+", "", line)
line = re.split(r"[ \t]", line) line = re.split(r"[ \t]", line)
k = self.key_(line[0]) k = self.key_(line[0])
F = int(math.log(float(line[1]) / self.DENOMINATOR) + .5) F = int(math.log(float(line[1]) / self.DENOMINATOR) + 0.5)
if k not in self.trie_ or self.trie_[k][0] < F: if k not in self.trie_ or self.trie_[k][0] < F:
self.trie_[self.key_(line[0])] = (F, line[2]) self.trie_[self.key_(line[0])] = (F, line[2])
self.trie_[self.rkey_(line[0])] = 1 self.trie_[self.rkey_(line[0])] = 1
@ -106,8 +107,8 @@ class RagTokenizer:
if inside_code == 0x3000: if inside_code == 0x3000:
inside_code = 0x0020 inside_code = 0x0020
else: else:
inside_code -= 0xfee0 inside_code -= 0xFEE0
if inside_code < 0x0020 or inside_code > 0x7e: # After the conversion, if it's not a half-width character, return the original character. if inside_code < 0x0020 or inside_code > 0x7E: # After the conversion, if it's not a half-width character, return the original character.
rstring += uchar rstring += uchar
else: else:
rstring += chr(inside_code) rstring += chr(inside_code)
@ -124,7 +125,7 @@ class RagTokenizer:
if s < len(chars): if s < len(chars):
copy_pretks = copy.deepcopy(preTks) copy_pretks = copy.deepcopy(preTks)
remaining = "".join(chars[s:]) remaining = "".join(chars[s:])
copy_pretks.append((remaining, (-12, ''))) copy_pretks.append((remaining, (-12, "")))
tkslist.append(copy_pretks) tkslist.append(copy_pretks)
return s return s
@ -155,7 +156,7 @@ class RagTokenizer:
if k in self.trie_: if k in self.trie_:
copy_pretks.append((t, self.trie_[k])) copy_pretks.append((t, self.trie_[k]))
else: else:
copy_pretks.append((t, (-12, ''))) copy_pretks.append((t, (-12, "")))
next_res = self.dfs_(chars, mid, copy_pretks, tkslist, _depth + 1, _memo) next_res = self.dfs_(chars, mid, copy_pretks, tkslist, _depth + 1, _memo)
res = max(res, next_res) res = max(res, next_res)
_memo[state_key] = res _memo[state_key] = res
@ -163,12 +164,12 @@ class RagTokenizer:
S = s + 1 S = s + 1
if s + 2 <= len(chars): if s + 2 <= len(chars):
t1 = "".join(chars[s:s + 1]) t1 = "".join(chars[s : s + 1])
t2 = "".join(chars[s:s + 2]) t2 = "".join(chars[s : s + 2])
if self.trie_.has_keys_with_prefix(self.key_(t1)) and not self.trie_.has_keys_with_prefix(self.key_(t2)): if self.trie_.has_keys_with_prefix(self.key_(t1)) and not self.trie_.has_keys_with_prefix(self.key_(t2)):
S = s + 2 S = s + 2
if len(preTks) > 2 and len(preTks[-1][0]) == 1 and len(preTks[-2][0]) == 1 and len(preTks[-3][0]) == 1: if len(preTks) > 2 and len(preTks[-1][0]) == 1 and len(preTks[-2][0]) == 1 and len(preTks[-3][0]) == 1:
t1 = preTks[-1][0] + "".join(chars[s:s + 1]) t1 = preTks[-1][0] + "".join(chars[s : s + 1])
if self.trie_.has_keys_with_prefix(self.key_(t1)): if self.trie_.has_keys_with_prefix(self.key_(t1)):
S = s + 2 S = s + 2
@ -186,13 +187,13 @@ class RagTokenizer:
_memo[state_key] = res _memo[state_key] = res
return res return res
t = "".join(chars[s:s + 1]) t = "".join(chars[s : s + 1])
k = self.key_(t) k = self.key_(t)
copy_pretks = copy.deepcopy(preTks) copy_pretks = copy.deepcopy(preTks)
if k in self.trie_: if k in self.trie_:
copy_pretks.append((t, self.trie_[k])) copy_pretks.append((t, self.trie_[k]))
else: else:
copy_pretks.append((t, (-12, ''))) copy_pretks.append((t, (-12, "")))
result = self.dfs_(chars, s + 1, copy_pretks, tkslist, _depth + 1, _memo) result = self.dfs_(chars, s + 1, copy_pretks, tkslist, _depth + 1, _memo)
_memo[state_key] = result _memo[state_key] = result
return result return result
@ -216,7 +217,7 @@ class RagTokenizer:
F += freq F += freq
L += 0 if len(tk) < 2 else 1 L += 0 if len(tk) < 2 else 1
tks.append(tk) tks.append(tk)
#F /= len(tks) # F /= len(tks)
L /= len(tks) L /= len(tks)
logging.debug("[SC] {} {} {} {} {}".format(tks, len(tks), L, F, B / len(tks) + L + F)) logging.debug("[SC] {} {} {} {} {}".format(tks, len(tks), L, F, B / len(tks) + L + F))
return tks, B / len(tks) + L + F return tks, B / len(tks) + L + F
@ -252,8 +253,7 @@ class RagTokenizer:
while s < len(line): while s < len(line):
e = s + 1 e = s + 1
t = line[s:e] t = line[s:e]
while e < len(line) and self.trie_.has_keys_with_prefix( while e < len(line) and self.trie_.has_keys_with_prefix(self.key_(t)):
self.key_(t)):
e += 1 e += 1
t = line[s:e] t = line[s:e]
@ -264,7 +264,7 @@ class RagTokenizer:
if self.key_(t) in self.trie_: if self.key_(t) in self.trie_:
res.append((t, self.trie_[self.key_(t)])) res.append((t, self.trie_[self.key_(t)]))
else: else:
res.append((t, (0, ''))) res.append((t, (0, "")))
s = e s = e
@ -287,7 +287,7 @@ class RagTokenizer:
if self.key_(t) in self.trie_: if self.key_(t) in self.trie_:
res.append((t, self.trie_[self.key_(t)])) res.append((t, self.trie_[self.key_(t)]))
else: else:
res.append((t, (0, ''))) res.append((t, (0, "")))
s -= 1 s -= 1
@ -310,28 +310,29 @@ class RagTokenizer:
if _zh == zh: if _zh == zh:
e += 1 e += 1
continue continue
txt_lang_pairs.append((a[s: e], zh)) txt_lang_pairs.append((a[s:e], zh))
s = e s = e
e = s + 1 e = s + 1
zh = _zh zh = _zh
if s >= len(a): if s >= len(a):
continue continue
txt_lang_pairs.append((a[s: e], zh)) txt_lang_pairs.append((a[s:e], zh))
return txt_lang_pairs return txt_lang_pairs
def tokenize(self, line): def tokenize(self, line: str) -> str:
if settings.DOC_ENGINE_INFINITY:
return line
line = re.sub(r"\W+", " ", line) line = re.sub(r"\W+", " ", line)
line = self._strQ2B(line).lower() line = self._strQ2B(line).lower()
line = self._tradi2simp(line) line = self._tradi2simp(line)
arr = self._split_by_lang(line) arr = self._split_by_lang(line)
res = [] res = []
for L,lang in arr: for L, lang in arr:
if not lang: if not lang:
res.extend([self.stemmer.stem(self.lemmatizer.lemmatize(t)) for t in word_tokenize(L)]) res.extend([self.stemmer.stem(self.lemmatizer.lemmatize(t)) for t in word_tokenize(L)])
continue continue
if len(L) < 2 or re.match( if len(L) < 2 or re.match(r"[a-z\.-]+$", L) or re.match(r"[0-9\.-]+$", L):
r"[a-z\.-]+$", L) or re.match(r"[0-9\.-]+$", L):
res.append(L) res.append(L)
continue continue
@ -347,7 +348,7 @@ class RagTokenizer:
while i + same < len(tks1) and j + same < len(tks) and tks1[i + same] == tks[j + same]: while i + same < len(tks1) and j + same < len(tks) and tks1[i + same] == tks[j + same]:
same += 1 same += 1
if same > 0: if same > 0:
res.append(" ".join(tks[j: j + same])) res.append(" ".join(tks[j : j + same]))
_i = i + same _i = i + same
_j = j + same _j = j + same
j = _j + 1 j = _j + 1
@ -374,7 +375,7 @@ class RagTokenizer:
same = 1 same = 1
while i + same < len(tks1) and j + same < len(tks) and tks1[i + same] == tks[j + same]: while i + same < len(tks1) and j + same < len(tks) and tks1[i + same] == tks[j + same]:
same += 1 same += 1
res.append(" ".join(tks[j: j + same])) res.append(" ".join(tks[j : j + same]))
_i = i + same _i = i + same
_j = j + same _j = j + same
j = _j + 1 j = _j + 1
@ -391,7 +392,9 @@ class RagTokenizer:
logging.debug("[TKS] {}".format(self.merge_(res))) logging.debug("[TKS] {}".format(self.merge_(res)))
return self.merge_(res) return self.merge_(res)
def fine_grained_tokenize(self, tks): def fine_grained_tokenize(self, tks: str) -> str:
if settings.DOC_ENGINE_INFINITY:
return tks
tks = tks.split() tks = tks.split()
zh_num = len([1 for c in tks if c and is_chinese(c[0])]) zh_num = len([1 for c in tks if c and is_chinese(c[0])])
if zh_num < len(tks) * 0.2: if zh_num < len(tks) * 0.2:
@ -433,21 +436,21 @@ class RagTokenizer:
def is_chinese(s): def is_chinese(s):
if s >= u'\u4e00' and s <= u'\u9fa5': if s >= "\u4e00" and s <= "\u9fa5":
return True return True
else: else:
return False return False
def is_number(s): def is_number(s):
if s >= u'\u0030' and s <= u'\u0039': if s >= "\u0030" and s <= "\u0039":
return True return True
else: else:
return False return False
def is_alphabet(s): def is_alphabet(s):
if (u'\u0041' <= s <= u'\u005a') or (u'\u0061' <= s <= u'\u007a'): if ("\u0041" <= s <= "\u005a") or ("\u0061" <= s <= "\u007a"):
return True return True
else: else:
return False return False
@ -456,8 +459,7 @@ def is_alphabet(s):
def naive_qie(txt): def naive_qie(txt):
tks = [] tks = []
for t in txt.split(): for t in txt.split():
if tks and re.match(r".*[a-zA-Z]$", tks[-1] if tks and re.match(r".*[a-zA-Z]$", tks[-1]) and re.match(r".*[a-zA-Z]$", t):
) and re.match(r".*[a-zA-Z]$", t):
tks.append(" ") tks.append(" ")
tks.append(t) tks.append(t)
return tks return tks
@ -473,43 +475,35 @@ add_user_dict = tokenizer.add_user_dict
tradi2simp = tokenizer._tradi2simp tradi2simp = tokenizer._tradi2simp
strQ2B = tokenizer._strQ2B strQ2B = tokenizer._strQ2B
if __name__ == '__main__': if __name__ == "__main__":
tknzr = RagTokenizer(debug=True) tknzr = RagTokenizer(debug=True)
# huqie.add_user_dict("/tmp/tmp.new.tks.dict") # huqie.add_user_dict("/tmp/tmp.new.tks.dict")
tks = tknzr.tokenize( texts = [
"哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈") "over_the_past.pdf",
logging.info(tknzr.fine_grained_tokenize(tks)) "哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈",
tks = tknzr.tokenize( "公开征求意见稿提出,境外投资者可使用自有人民币或外汇投资。使用外汇投资的,可通过债券持有人在香港人民币业务清算行及香港地区经批准可进入境内银行间外汇市场进行交易的境外人民币业务参加行(以下统称香港结算行)办理外汇资金兑换。香港结算行由此所产生的头寸可到境内银行间外汇市场平盘。使用外汇投资的,在其投资的债券到期或卖出后,原则上应兑换回外汇。",
"公开征求意见稿提出,境外投资者可使用自有人民币或外汇投资。使用外汇投资的,可通过债券持有人在香港人民币业务清算行及香港地区经批准可进入境内银行间外汇市场进行交易的境外人民币业务参加行(以下统称香港结算行)办理外汇资金兑换。香港结算行由此所产生的头寸可到境内银行间外汇市场平盘。使用外汇投资的,在其投资的债券到期或卖出后,原则上应兑换回外汇。") "多校划片就是一个小区对应多个小学初中,让买了学区房的家庭也不确定到底能上哪个学校。目的是通过这种方式为学区房降温,把就近入学落到实处。南京市长江大桥",
logging.info(tknzr.fine_grained_tokenize(tks)) "实际上当时他们已经将业务中心偏移到安全部门和针对政府企业的部门 Scripts are compiled and cached aaaaaaaaa",
tks = tknzr.tokenize( "虽然我不怎么玩",
"多校划片就是一个小区对应多个小学初中,让买了学区房的家庭也不确定到底能上哪个学校。目的是通过这种方式为学区房降温,把就近入学落到实处。南京市长江大桥") "蓝月亮如何在外资夹击中生存,那是全宇宙最有意思的",
logging.info(tknzr.fine_grained_tokenize(tks)) "涡轮增压发动机num最大功率,不像别的共享买车锁电子化的手段,我们接过来是否有意义,黄黄爱美食,不过,今天阿奇要讲到的这家农贸市场,说实话,还真蛮有特色的!不仅环境好,还打出了",
tks = tknzr.tokenize( "这周日你去吗?这周日你有空吗?",
"实际上当时他们已经将业务中心偏移到安全部门和针对政府企业的部门 Scripts are compiled and cached aaaaaaaaa") "Unity3D开发经验 测试开发工程师 c++双11双11 985 211 ",
logging.info(tknzr.fine_grained_tokenize(tks)) "数据分析项目经理|数据分析挖掘|数据分析方向|商品数据分析|搜索数据分析 sql python hive tableau Cocos2d-",
tks = tknzr.tokenize("虽然我不怎么玩") ]
logging.info(tknzr.fine_grained_tokenize(tks)) for text in texts:
tks = tknzr.tokenize("蓝月亮如何在外资夹击中生存,那是全宇宙最有意思的") print(text)
logging.info(tknzr.fine_grained_tokenize(tks)) tks1 = tknzr.tokenize(text)
tks = tknzr.tokenize( tks2 = tknzr.fine_grained_tokenize(tks1)
"涡轮增压发动机num最大功率,不像别的共享买车锁电子化的手段,我们接过来是否有意义,黄黄爱美食,不过,今天阿奇要讲到的这家农贸市场,说实话,还真蛮有特色的!不仅环境好,还打出了") print(tks1)
logging.info(tknzr.fine_grained_tokenize(tks)) print(tks2)
tks = tknzr.tokenize("这周日你去吗?这周日你有空吗?")
logging.info(tknzr.fine_grained_tokenize(tks))
tks = tknzr.tokenize("Unity3D开发经验 测试开发工程师 c++双11双11 985 211 ")
logging.info(tknzr.fine_grained_tokenize(tks))
tks = tknzr.tokenize(
"数据分析项目经理|数据分析挖掘|数据分析方向|商品数据分析|搜索数据分析 sql python hive tableau Cocos2d-")
logging.info(tknzr.fine_grained_tokenize(tks))
if len(sys.argv) < 2: if len(sys.argv) < 2:
sys.exit() sys.exit()
tknzr.DEBUG = False
tknzr.load_user_dict(sys.argv[1]) tknzr.load_user_dict(sys.argv[1])
of = open(sys.argv[2], "r") of = open(sys.argv[2], "r")
while True: while True:
line = of.readline() line = of.readline()
if not line: if not line:
break break
logging.info(tknzr.tokenize(line)) print(tknzr.tokenize(line))
of.close() of.close()

View File

@ -17,7 +17,6 @@ import json
import logging import logging
import re import re
import math import math
import os
from collections import OrderedDict from collections import OrderedDict
from dataclasses import dataclass from dataclasses import dataclass
@ -28,6 +27,7 @@ from rag.utils.doc_store_conn import DocStoreConnection, MatchDenseExpr, FusionE
from common.string_utils import remove_redundant_spaces from common.string_utils import remove_redundant_spaces
from common.float_utils import get_float from common.float_utils import get_float
from common.constants import PAGERANK_FLD, TAG_FLD from common.constants import PAGERANK_FLD, TAG_FLD
from common import settings
def index_name(uid): return f"ragflow_{uid}" def index_name(uid): return f"ragflow_{uid}"
@ -120,7 +120,8 @@ class Dealer:
else: else:
matchDense = self.get_vector(qst, emb_mdl, topk, req.get("similarity", 0.1)) matchDense = self.get_vector(qst, emb_mdl, topk, req.get("similarity", 0.1))
q_vec = matchDense.embedding_data q_vec = matchDense.embedding_data
src.append(f"q_{len(q_vec)}_vec") if not settings.DOC_ENGINE_INFINITY:
src.append(f"q_{len(q_vec)}_vec")
fusionExpr = FusionExpr("weighted_sum", topk, {"weights": "0.05,0.95"}) fusionExpr = FusionExpr("weighted_sum", topk, {"weights": "0.05,0.95"})
matchExprs = [matchText, matchDense, fusionExpr] matchExprs = [matchText, matchDense, fusionExpr]
@ -405,8 +406,13 @@ class Dealer:
rank_feature=rank_feature, rank_feature=rank_feature,
) )
else: else:
lower_case_doc_engine = os.getenv("DOC_ENGINE", "elasticsearch") if settings.DOC_ENGINE_INFINITY:
if lower_case_doc_engine in ["elasticsearch", "opensearch"]: # Don't need rerank here since Infinity normalizes each way score before fusion.
sim = [sres.field[id].get("_score", 0.0) for id in sres.ids]
sim = [s if s is not None else 0.0 for s in sim]
tsim = sim
vsim = sim
else:
# ElasticSearch doesn't normalize each way score before fusion. # ElasticSearch doesn't normalize each way score before fusion.
sim, tsim, vsim = self.rerank( sim, tsim, vsim = self.rerank(
sres, sres,
@ -415,12 +421,6 @@ class Dealer:
vector_similarity_weight, vector_similarity_weight,
rank_feature=rank_feature, rank_feature=rank_feature,
) )
else:
# Don't need rerank here since Infinity normalizes each way score before fusion.
sim = [sres.field[id].get("_score", 0.0) for id in sres.ids]
sim = [s if s is not None else 0.0 for s in sim]
tsim = sim
vsim = sim
sim_np = np.array(sim, dtype=np.float64) sim_np = np.array(sim, dtype=np.float64)
if sim_np.size == 0: if sim_np.size == 0:

View File

@ -44,11 +44,56 @@ logger = logging.getLogger("ragflow.infinity_conn")
def field_keyword(field_name: str): def field_keyword(field_name: str):
# The "docnm_kwd" field is always a string, not list. # Treat "*_kwd" tag-like columns as keyword lists except knowledge_graph_kwd; source_id is also keyword-like.
if field_name == "source_id" or (field_name.endswith("_kwd") and field_name != "docnm_kwd" and field_name != "knowledge_graph_kwd"): if field_name == "source_id" or (field_name.endswith("_kwd") and field_name not in ["knowledge_graph_kwd", "docnm_kwd", "important_kwd", "question_kwd"]):
return True return True
return False return False
def convert_select_fields(output_fields: list[str]) -> list[str]:
for i, field in enumerate(output_fields):
if field in ["docnm_kwd", "title_tks", "title_sm_tks"]:
output_fields[i] = "docnm"
elif field in ["important_kwd", "important_tks"]:
output_fields[i] = "important_keywords"
elif field in ["question_kwd", "question_tks"]:
output_fields[i] = "questions"
elif field in ["content_with_weight", "content_ltks", "content_sm_ltks"]:
output_fields[i] = "content"
elif field in ["authors_tks", "authors_sm_tks"]:
output_fields[i] = "authors"
return list(set(output_fields))
def convert_matching_field(field_weightstr: str) -> str:
tokens = field_weightstr.split("^")
field = tokens[0]
if field == "docnm_kwd" or field == "title_tks":
field = "docnm@ft_docnm_rag_coarse"
elif field == "title_sm_tks":
field = "docnm@ft_title_rag_fine"
elif field == "important_kwd":
field = "important_keywords@ft_important_keywords_rag_coarse"
elif field == "important_tks":
field = "important_keywords@ft_important_keywords_rag_fine"
elif field == "question_kwd":
field = "questions@ft_questions_rag_coarse"
elif field == "question_tks":
field = "questions@ft_questions_rag_fine"
elif field == "content_with_weight" or field == "content_ltks":
field = "content@ft_content_rag_coarse"
elif field == "content_sm_ltks":
field = "content@ft_content_rag_fine"
elif field == "authors_tks":
field = "authors@ft_authors_rag_coarse"
elif field == "authors_sm_tks":
field = "authors@ft_authors_rag_fine"
tokens[0] = field
return "^".join(tokens)
def list2str(lst: str|list, sep: str = " ") -> str:
if isinstance(lst, str):
return lst
return sep.join(lst)
def equivalent_condition_to_str(condition: dict, table_instance=None) -> str | None: def equivalent_condition_to_str(condition: dict, table_instance=None) -> str | None:
assert "_id" not in condition assert "_id" not in condition
@ -77,13 +122,13 @@ def equivalent_condition_to_str(condition: dict, table_instance=None) -> str | N
for item in v: for item in v:
if isinstance(item, str): if isinstance(item, str):
item = item.replace("'", "''") item = item.replace("'", "''")
inCond.append(f"filter_fulltext('{k}', '{item}')") inCond.append(f"filter_fulltext('{convert_matching_field(k)}', '{item}')")
if inCond: if inCond:
strInCond = " or ".join(inCond) strInCond = " or ".join(inCond)
strInCond = f"({strInCond})" strInCond = f"({strInCond})"
cond.append(strInCond) cond.append(strInCond)
else: else:
cond.append(f"filter_fulltext('{k}', '{v}')") cond.append(f"filter_fulltext('{convert_matching_field(k)}', '{v}')")
elif isinstance(v, list): elif isinstance(v, list):
inCond = list() inCond = list()
for item in v: for item in v:
@ -181,11 +226,15 @@ class InfinityConnection(DocStoreConnection):
logger.info(f"INFINITY added following column to table {table_name}: {field_name} {field_info}") logger.info(f"INFINITY added following column to table {table_name}: {field_name} {field_info}")
if field_info["type"] != "varchar" or "analyzer" not in field_info: if field_info["type"] != "varchar" or "analyzer" not in field_info:
continue continue
inf_table.create_index( analyzers = field_info["analyzer"]
f"text_idx_{field_name}", if isinstance(analyzers, str):
IndexInfo(field_name, IndexType.FullText, {"ANALYZER": field_info["analyzer"]}), analyzers = [analyzers]
ConflictType.Ignore, for analyzer in analyzers:
) inf_table.create_index(
f"ft_{re.sub(r'[^a-zA-Z0-9]', '_', field_name)}_{re.sub(r'[^a-zA-Z0-9]', '_', analyzer)}",
IndexInfo(field_name, IndexType.FullText, {"ANALYZER": analyzer}),
ConflictType.Ignore,
)
""" """
Database operations Database operations
@ -245,11 +294,15 @@ class InfinityConnection(DocStoreConnection):
for field_name, field_info in schema.items(): for field_name, field_info in schema.items():
if field_info["type"] != "varchar" or "analyzer" not in field_info: if field_info["type"] != "varchar" or "analyzer" not in field_info:
continue continue
inf_table.create_index( analyzers = field_info["analyzer"]
f"text_idx_{field_name}", if isinstance(analyzers, str):
IndexInfo(field_name, IndexType.FullText, {"ANALYZER": field_info["analyzer"]}), analyzers = [analyzers]
ConflictType.Ignore, for analyzer in analyzers:
) inf_table.create_index(
f"ft_{re.sub(r'[^a-zA-Z0-9]', '_', field_name)}_{re.sub(r'[^a-zA-Z0-9]', '_', analyzer)}",
IndexInfo(field_name, IndexType.FullText, {"ANALYZER": analyzer}),
ConflictType.Ignore,
)
self.connPool.release_conn(inf_conn) self.connPool.release_conn(inf_conn)
logger.info(f"INFINITY created table {table_name}, vector size {vectorSize}") logger.info(f"INFINITY created table {table_name}, vector size {vectorSize}")
@ -302,6 +355,7 @@ class InfinityConnection(DocStoreConnection):
df_list = list() df_list = list()
table_list = list() table_list = list()
output = selectFields.copy() output = selectFields.copy()
output = convert_select_fields(output)
for essential_field in ["id"] + aggFields: for essential_field in ["id"] + aggFields:
if essential_field not in output: if essential_field not in output:
output.append(essential_field) output.append(essential_field)
@ -352,6 +406,7 @@ class InfinityConnection(DocStoreConnection):
if isinstance(matchExpr, MatchTextExpr): if isinstance(matchExpr, MatchTextExpr):
if filter_cond and "filter" not in matchExpr.extra_options: if filter_cond and "filter" not in matchExpr.extra_options:
matchExpr.extra_options.update({"filter": filter_cond}) matchExpr.extra_options.update({"filter": filter_cond})
matchExpr.fields = [convert_matching_field(field) for field in matchExpr.fields]
fields = ",".join(matchExpr.fields) fields = ",".join(matchExpr.fields)
filter_fulltext = f"filter_fulltext('{fields}', '{matchExpr.matching_text}')" filter_fulltext = f"filter_fulltext('{fields}', '{matchExpr.matching_text}')"
if filter_cond: if filter_cond:
@ -470,7 +525,10 @@ class InfinityConnection(DocStoreConnection):
df_list.append(kb_res) df_list.append(kb_res)
self.connPool.release_conn(inf_conn) self.connPool.release_conn(inf_conn)
res = concat_dataframes(df_list, ["id"]) res = concat_dataframes(df_list, ["id"])
res_fields = self.get_fields(res, res.columns.tolist()) fields = set(res.columns.tolist())
for field in ["docnm_kwd", "title_tks", "title_sm_tks", "important_kwd", "important_tks", "question_kwd", "question_tks","content_with_weight", "content_ltks", "content_sm_ltks", "authors_tks", "authors_sm_tks"]:
fields.add(field)
res_fields = self.get_fields(res, list(fields))
return res_fields.get(chunkId, None) return res_fields.get(chunkId, None)
def insert(self, documents: list[dict], indexName: str, knowledgebaseId: str = None) -> list[str]: def insert(self, documents: list[dict], indexName: str, knowledgebaseId: str = None) -> list[str]:
@ -508,8 +566,39 @@ class InfinityConnection(DocStoreConnection):
for d in docs: for d in docs:
assert "_id" not in d assert "_id" not in d
assert "id" in d assert "id" in d
for k, v in d.items(): for k, v in list(d.items()):
if field_keyword(k): if k == "docnm_kwd":
d["docnm"] = v
elif k == "title_kwd":
if not d.get("docnm_kwd"):
d["docnm"] = list2str(v)
elif k == "title_sm_tks":
if not d.get("docnm_kwd"):
d["docnm"] = list2str(v)
elif k == "important_kwd":
d["important_keywords"] = list2str(v)
elif k == "important_tks":
if not d.get("important_kwd"):
d["important_keywords"] = v
elif k == "content_with_weight":
d["content"] = v
elif k == "content_ltks":
if not d.get("content_with_weight"):
d["content"] = v
elif k == "content_sm_ltks":
if not d.get("content_with_weight"):
d["content"] = v
elif k == "authors_tks":
d["authors"] = v
elif k == "authors_sm_tks":
if not d.get("authors_tks"):
d["authors"] = v
elif k == "question_kwd":
d["questions"] = list2str(v, "\n")
elif k == "question_tks":
if not d.get("question_kwd"):
d["questions"] = list2str(v)
elif field_keyword(k):
if isinstance(v, list): if isinstance(v, list):
d[k] = "###".join(v) d[k] = "###".join(v)
else: else:
@ -528,6 +617,9 @@ class InfinityConnection(DocStoreConnection):
d[k] = "_".join(f"{num:08x}" for num in v) d[k] = "_".join(f"{num:08x}" for num in v)
else: else:
d[k] = v d[k] = v
for k in ["docnm_kwd", "title_tks", "title_sm_tks", "important_kwd", "important_tks", "content_with_weight", "content_ltks", "content_sm_ltks", "authors_tks", "authors_sm_tks", "question_kwd", "question_tks"]:
if k in d:
del d[k]
for n, vs in embedding_clmns: for n, vs in embedding_clmns:
if n in d: if n in d:
@ -562,7 +654,38 @@ class InfinityConnection(DocStoreConnection):
filter = equivalent_condition_to_str(condition, table_instance) filter = equivalent_condition_to_str(condition, table_instance)
removeValue = {} removeValue = {}
for k, v in list(newValue.items()): for k, v in list(newValue.items()):
if field_keyword(k): if k == "docnm_kwd":
newValue["docnm"] = list2str(v)
elif k == "title_kwd":
if not newValue.get("docnm_kwd"):
newValue["docnm"] = list2str(v)
elif k == "title_sm_tks":
if not newValue.get("docnm_kwd"):
newValue["docnm"] = v
elif k == "important_kwd":
newValue["important_keywords"] = list2str(v)
elif k == "important_tks":
if not newValue.get("important_kwd"):
newValue["important_keywords"] = v
elif k == "content_with_weight":
newValue["content"] = v
elif k == "content_ltks":
if not newValue.get("content_with_weight"):
newValue["content"] = v
elif k == "content_sm_ltks":
if not newValue.get("content_with_weight"):
newValue["content"] = v
elif k == "authors_tks":
newValue["authors"] = v
elif k == "authors_sm_tks":
if not newValue.get("authors_tks"):
newValue["authors"] = v
elif k == "question_kwd":
newValue["questions"] = "\n".join(v)
elif k == "question_tks":
if not newValue.get("question_kwd"):
newValue["questions"] = list2str(v)
elif field_keyword(k):
if isinstance(v, list): if isinstance(v, list):
newValue[k] = "###".join(v) newValue[k] = "###".join(v)
else: else:
@ -593,6 +716,9 @@ class InfinityConnection(DocStoreConnection):
del newValue[k] del newValue[k]
else: else:
newValue[k] = v newValue[k] = v
for k in ["docnm_kwd", "title_tks", "title_sm_tks", "important_kwd", "important_tks", "content_with_weight", "content_ltks", "content_sm_ltks", "authors_tks", "authors_sm_tks", "question_kwd", "question_tks"]:
if k in newValue:
del newValue[k]
remove_opt = {} # "[k,new_value]": [id_to_update, ...] remove_opt = {} # "[k,new_value]": [id_to_update, ...]
if removeValue: if removeValue:
@ -656,22 +782,45 @@ class InfinityConnection(DocStoreConnection):
return {} return {}
fieldsAll = fields.copy() fieldsAll = fields.copy()
fieldsAll.append("id") fieldsAll.append("id")
fieldsAll = set(fieldsAll)
if "docnm" in res.columns:
for field in ["docnm_kwd", "title_tks", "title_sm_tks"]:
if field in fieldsAll:
res[field] = res["docnm"]
if "important_keywords" in res.columns:
if "important_kwd" in fieldsAll:
res["important_kwd"] = res["important_keywords"].apply(lambda v: v.split())
if "important_tks" in fieldsAll:
res["important_tks"] = res["important_keywords"]
if "questions" in res.columns:
if "question_kwd" in fieldsAll:
res["question_kwd"] = res["questions"].apply(lambda v: v.splitlines())
if "question_tks" in fieldsAll:
res["question_tks"] = res["questions"]
if "content" in res.columns:
for field in ["content_with_weight", "content_ltks", "content_sm_ltks"]:
if field in fieldsAll:
res[field] = res["content"]
if "authors" in res.columns:
for field in ["authors_tks", "authors_sm_tks"]:
if field in fieldsAll:
res[field] = res["authors"]
column_map = {col.lower(): col for col in res.columns} column_map = {col.lower(): col for col in res.columns}
matched_columns = {column_map[col.lower()]: col for col in set(fieldsAll) if col.lower() in column_map} matched_columns = {column_map[col.lower()]: col for col in fieldsAll if col.lower() in column_map}
none_columns = [col for col in set(fieldsAll) if col.lower() not in column_map] none_columns = [col for col in fieldsAll if col.lower() not in column_map]
res2 = res[matched_columns.keys()] res2 = res[matched_columns.keys()]
res2 = res2.rename(columns=matched_columns) res2 = res2.rename(columns=matched_columns)
res2.drop_duplicates(subset=["id"], inplace=True) res2.drop_duplicates(subset=["id"], inplace=True)
for column in res2.columns: for column in list(res2.columns):
k = column.lower() k = column.lower()
if field_keyword(k): if field_keyword(k):
res2[column] = res2[column].apply(lambda v: [kwd for kwd in v.split("###") if kwd]) res2[column] = res2[column].apply(lambda v: [kwd for kwd in v.split("###") if kwd])
elif re.search(r"_feas$", k): elif re.search(r"_feas$", k):
res2[column] = res2[column].apply(lambda v: json.loads(v) if v else {}) res2[column] = res2[column].apply(lambda v: json.loads(v) if v else {})
elif k == "position_int": elif k == "position_int":
def to_position_int(v): def to_position_int(v):
if v: if v:
arr = [int(hex_val, 16) for hex_val in v.split("_")] arr = [int(hex_val, 16) for hex_val in v.split("_")]
@ -685,6 +834,9 @@ class InfinityConnection(DocStoreConnection):
res2[column] = res2[column].apply(lambda v: [int(hex_val, 16) for hex_val in v.split("_")] if v else []) res2[column] = res2[column].apply(lambda v: [int(hex_val, 16) for hex_val in v.split("_")] if v else [])
else: else:
pass pass
for column in ["docnm", "important_keywords", "questions", "content", "authors"]:
if column in res2:
del res2[column]
for column in none_columns: for column in none_columns:
res2[column] = None res2[column] = None

View File

@ -1,11 +0,0 @@
# ragflow-sdk
# build and publish python SDK to pypi.org
```shell
uv build
uv pip install twine
export TWINE_USERNAME="__token__"
export TWINE_PASSWORD=$YOUR_PYPI_API_TOKEN
twine upload dist/*.whl
```

View File

@ -6,7 +6,7 @@ authors = [{ name = "Zhichang Yu", email = "yuzhichang@gmail.com" }]
license = { text = "Apache License, Version 2.0" } license = { text = "Apache License, Version 2.0" }
readme = "README.md" readme = "README.md"
requires-python = ">=3.10,<3.13" requires-python = ">=3.10,<3.13"
dependencies = ["requests>=2.30.0,<3.0.0", "beartype>=0.18.5,<0.19.0"] dependencies = ["requests>=2.30.0,<3.0.0", "beartype>=0.20.0,<1.0.0"]
[dependency-groups] [dependency-groups]

8
sdk/python/uv.lock generated
View File

@ -13,11 +13,11 @@ wheels = [
[[package]] [[package]]
name = "beartype" name = "beartype"
version = "0.18.5" version = "0.22.6"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/96/15/4e623478a9628ad4cee2391f19aba0b16c1dd6fedcb2a399f0928097b597/beartype-0.18.5.tar.gz", hash = "sha256:264ddc2f1da9ec94ff639141fbe33d22e12a9f75aa863b83b7046ffff1381927", size = 1193506, upload-time = "2024-04-21T07:25:58.64Z" } sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/88/e2/105ceb1704cb80fe4ab3872529ab7b6f365cf7c74f725e6132d0efcf1560/beartype-0.22.6.tar.gz", hash = "sha256:97fbda69c20b48c5780ac2ca60ce3c1bb9af29b3a1a0216898ffabdd523e48f4", size = 1588975, upload-time = "2025-11-20T04:47:14.736Z" }
wheels = [ wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/43/7a1259741bd989723272ac7d381a43be932422abcff09a1d9f7ba212cb74/beartype-0.18.5-py3-none-any.whl", hash = "sha256:5301a14f2a9a5540fe47ec6d34d758e9cd8331d36c4760fc7a5499ab86310089", size = 917762, upload-time = "2024-04-21T07:25:55.758Z" }, { url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/c9/ceecc71fe2c9495a1d8e08d44f5f31f5bca1350d5b2e27a4b6265424f59e/beartype-0.22.6-py3-none-any.whl", hash = "sha256:0584bc46a2ea2a871509679278cda992eadde676c01356ab0ac77421f3c9a093", size = 1324807, upload-time = "2025-11-20T04:47:11.837Z" },
] ]
[[package]] [[package]]
@ -375,7 +375,7 @@ test = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "beartype", specifier = ">=0.18.5,<0.19.0" }, { name = "beartype", specifier = ">=0.20.0,<1.0.0" },
{ name = "requests", specifier = ">=2.30.0,<3.0.0" }, { name = "requests", specifier = ">=2.30.0,<3.0.0" },
] ]

View File

@ -93,8 +93,9 @@ class TestChunksList:
({"keywords": None}, 5), ({"keywords": None}, 5),
({"keywords": ""}, 5), ({"keywords": ""}, 5),
({"keywords": "1"}, 1), ({"keywords": "1"}, 1),
pytest.param({"keywords": "chunk"}, 4, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6509")), ({"keywords": "chunk"}, 4),
({"keywords": "ragflow"}, 1), pytest.param({"keywords": "ragflow"}, 1, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6509")),
pytest.param({"keywords": "ragflow"}, 5, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") != "infinity", reason="issues/6509")),
({"keywords": "unknown"}, 0), ({"keywords": "unknown"}, 0),
], ],
) )

View File

@ -47,7 +47,7 @@ class TestUpdatedChunk:
@pytest.mark.parametrize( @pytest.mark.parametrize(
"payload, expected_code, expected_message", "payload, expected_code, expected_message",
[ [
({"content": None}, 100, "TypeError('expected string or bytes-like object')"), pytest.param({"content": None}, 0, "", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6509")),
pytest.param( pytest.param(
{"content": ""}, {"content": ""},
100, 100,

View File

@ -76,8 +76,9 @@ class TestChunksList:
({"keywords": None}, 5), ({"keywords": None}, 5),
({"keywords": ""}, 5), ({"keywords": ""}, 5),
({"keywords": "1"}, 1), ({"keywords": "1"}, 1),
pytest.param({"keywords": "chunk"}, 4, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6509")), ({"keywords": "chunk"}, 4),
({"keywords": "ragflow"}, 1), pytest.param({"keywords": "ragflow"}, 1, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6509")),
pytest.param({"keywords": "ragflow"}, 5, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") != "infinity", reason="issues/6509")),
({"keywords": "unknown"}, 0), ({"keywords": "unknown"}, 0),
], ],
) )

View File

@ -25,7 +25,7 @@ class TestUpdatedChunk:
@pytest.mark.parametrize( @pytest.mark.parametrize(
"payload, expected_message", "payload, expected_message",
[ [
({"content": None}, "TypeError('expected string or bytes-like object')"), ({"content": None}, ""),
pytest.param( pytest.param(
{"content": ""}, {"content": ""},
"""APIRequestFailedError(\'Error code: 400, with error text {"error":{"code":"1213","message":"未正常接收到prompt参数。"}}\')""", """APIRequestFailedError(\'Error code: 400, with error text {"error":{"code":"1213","message":"未正常接收到prompt参数。"}}\')""",

4
uv.lock generated
View File

@ -1494,7 +1494,7 @@ name = "exceptiongroup"
version = "1.3.0" version = "1.3.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [ dependencies = [
{ name = "typing-extensions", marker = "python_full_version < '3.11'" }, { name = "typing-extensions" },
] ]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" } sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" }
wheels = [ wheels = [
@ -5371,6 +5371,7 @@ dependencies = [
{ name = "elastic-transport" }, { name = "elastic-transport" },
{ name = "elasticsearch" }, { name = "elasticsearch" },
{ name = "elasticsearch-dsl" }, { name = "elasticsearch-dsl" },
{ name = "exceptiongroup" },
{ name = "extract-msg" }, { name = "extract-msg" },
{ name = "filelock" }, { name = "filelock" },
{ name = "flasgger" }, { name = "flasgger" },
@ -5532,6 +5533,7 @@ requires-dist = [
{ name = "elastic-transport", specifier = "==8.12.0" }, { name = "elastic-transport", specifier = "==8.12.0" },
{ name = "elasticsearch", specifier = "==8.12.1" }, { name = "elasticsearch", specifier = "==8.12.1" },
{ name = "elasticsearch-dsl", specifier = "==8.12.0" }, { name = "elasticsearch-dsl", specifier = "==8.12.0" },
{ name = "exceptiongroup", specifier = ">=1.3.0,<2.0.0" },
{ name = "extract-msg", specifier = ">=0.39.0" }, { name = "extract-msg", specifier = ">=0.39.0" },
{ name = "filelock", specifier = "==3.15.4" }, { name = "filelock", specifier = "==3.15.4" },
{ name = "flasgger", specifier = ">=0.9.7.1,<0.10.0" }, { name = "flasgger", specifier = ">=0.9.7.1,<0.10.0" },