Compare commits

...

2 Commits

Author SHA1 Message Date
2fd5ac1031 Feat: Add Webdav storage as data source (#11422)
### What problem does this PR solve?

This PR adds webdav storage as data source for data sync service.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
2025-11-26 14:14:42 +08:00
40e84ca41a Use Infinity single-field-multi-index (#11444)
### What problem does this PR solve?

Use Infinity single-field-multi-index

### Type of change

- [x] Refactoring
- [x] Performance Improvement
2025-11-26 11:06:37 +08:00
30 changed files with 1089 additions and 147 deletions

View File

@ -193,7 +193,7 @@ jobs:
echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> ${GITHUB_ENV} echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> ${GITHUB_ENV}
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} up -d sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} up -d
uv sync --python 3.10 --only-group test --no-default-groups --frozen && uv pip install sdk/python uv sync --python 3.10 --only-group test --no-default-groups --frozen && uv pip install sdk/python --group test
- name: Run sdk tests against Elasticsearch - name: Run sdk tests against Elasticsearch
run: | run: |

View File

@ -8,7 +8,7 @@ readme = "README.md"
requires-python = ">=3.10,<3.13" requires-python = ">=3.10,<3.13"
dependencies = [ dependencies = [
"requests>=2.30.0,<3.0.0", "requests>=2.30.0,<3.0.0",
"beartype>=0.18.5,<0.19.0", "beartype>=0.20.0,<1.0.0",
"pycryptodomex>=3.10.0", "pycryptodomex>=3.10.0",
"lark>=1.1.0", "lark>=1.1.0",
] ]

298
admin/client/uv.lock generated Normal file
View File

@ -0,0 +1,298 @@
version = 1
revision = 3
requires-python = ">=3.10, <3.13"
[[package]]
name = "beartype"
version = "0.22.6"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/88/e2/105ceb1704cb80fe4ab3872529ab7b6f365cf7c74f725e6132d0efcf1560/beartype-0.22.6.tar.gz", hash = "sha256:97fbda69c20b48c5780ac2ca60ce3c1bb9af29b3a1a0216898ffabdd523e48f4", size = 1588975, upload-time = "2025-11-20T04:47:14.736Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/c9/ceecc71fe2c9495a1d8e08d44f5f31f5bca1350d5b2e27a4b6265424f59e/beartype-0.22.6-py3-none-any.whl", hash = "sha256:0584bc46a2ea2a871509679278cda992eadde676c01356ab0ac77421f3c9a093", size = 1324807, upload-time = "2025-11-20T04:47:11.837Z" },
]
[[package]]
name = "certifi"
version = "2025.11.12"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" },
]
[[package]]
name = "charset-normalizer"
version = "3.4.4"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/b8/6d51fc1d52cbd52cd4ccedd5b5b2f0f6a11bbf6765c782298b0f3e808541/charset_normalizer-3.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d", size = 209709, upload-time = "2025-10-14T04:40:11.385Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/af/1f9d7f7faafe2ddfb6f72a2e07a548a629c61ad510fe60f9630309908fef/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8", size = 148814, upload-time = "2025-10-14T04:40:13.135Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/79/3d/f2e3ac2bbc056ca0c204298ea4e3d9db9b4afe437812638759db2c976b5f/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad", size = 144467, upload-time = "2025-10-14T04:40:14.728Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/85/1bf997003815e60d57de7bd972c57dc6950446a3e4ccac43bc3070721856/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8", size = 162280, upload-time = "2025-10-14T04:40:16.14Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3e/8e/6aa1952f56b192f54921c436b87f2aaf7c7a7c3d0d1a765547d64fd83c13/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d", size = 159454, upload-time = "2025-10-14T04:40:17.567Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/36/3b/60cbd1f8e93aa25d1c669c649b7a655b0b5fb4c571858910ea9332678558/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313", size = 153609, upload-time = "2025-10-14T04:40:19.08Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/91/6a13396948b8fd3c4b4fd5bc74d045f5637d78c9675585e8e9fbe5636554/charset_normalizer-3.4.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e", size = 151849, upload-time = "2025-10-14T04:40:20.607Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/7a/59482e28b9981d105691e968c544cc0df3b7d6133152fb3dcdc8f135da7a/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93", size = 151586, upload-time = "2025-10-14T04:40:21.719Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/59/f64ef6a1c4bdd2baf892b04cd78792ed8684fbc48d4c2afe467d96b4df57/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0", size = 145290, upload-time = "2025-10-14T04:40:23.069Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6b/63/3bf9f279ddfa641ffa1962b0db6a57a9c294361cc2f5fcac997049a00e9c/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84", size = 163663, upload-time = "2025-10-14T04:40:24.17Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/09/c9e38fc8fa9e0849b172b581fd9803bdf6e694041127933934184e19f8c3/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e", size = 151964, upload-time = "2025-10-14T04:40:25.368Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/d1/d28b747e512d0da79d8b6a1ac18b7ab2ecfd81b2944c4c710e166d8dd09c/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db", size = 161064, upload-time = "2025-10-14T04:40:26.806Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/bb/9a/31d62b611d901c3b9e5500c36aab0ff5eb442043fb3a1c254200d3d397d9/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6", size = 155015, upload-time = "2025-10-14T04:40:28.284Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/f3/107e008fa2bff0c8b9319584174418e5e5285fef32f79d8ee6a430d0039c/charset_normalizer-3.4.4-cp310-cp310-win32.whl", hash = "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f", size = 99792, upload-time = "2025-10-14T04:40:29.613Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/66/e396e8a408843337d7315bab30dbf106c38966f1819f123257f5520f8a96/charset_normalizer-3.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d", size = 107198, upload-time = "2025-10-14T04:40:30.644Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/58/01b4f815bf0312704c267f2ccb6e5d42bcc7752340cd487bc9f8c3710597/charset_normalizer-3.4.4-cp310-cp310-win_arm64.whl", hash = "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69", size = 100262, upload-time = "2025-10-14T04:40:32.108Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/27/c6491ff4954e58a10f69ad90aca8a1b6fe9c5d3c6f380907af3c37435b59/charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", size = 206988, upload-time = "2025-10-14T04:40:33.79Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/59/2e87300fe67ab820b5428580a53cad894272dbb97f38a7a814a2a1ac1011/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", size = 147324, upload-time = "2025-10-14T04:40:34.961Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/fb/0cf61dc84b2b088391830f6274cb57c82e4da8bbc2efeac8c025edb88772/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", size = 142742, upload-time = "2025-10-14T04:40:36.105Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/8b/171935adf2312cd745d290ed93cf16cf0dfe320863ab7cbeeae1dcd6535f/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", size = 160863, upload-time = "2025-10-14T04:40:37.188Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/73/ad875b192bda14f2173bfc1bc9a55e009808484a4b256748d931b6948442/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", size = 157837, upload-time = "2025-10-14T04:40:38.435Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6d/fc/de9cce525b2c5b94b47c70a4b4fb19f871b24995c728e957ee68ab1671ea/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", size = 151550, upload-time = "2025-10-14T04:40:40.053Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/55/c2/43edd615fdfba8c6f2dfbd459b25a6b3b551f24ea21981e23fb768503ce1/charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", size = 149162, upload-time = "2025-10-14T04:40:41.163Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/86/bde4ad8b4d0e9429a4e82c1e8f5c659993a9a863ad62c7df05cf7b678d75/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", size = 150019, upload-time = "2025-10-14T04:40:42.276Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/86/a151eb2af293a7e7bac3a739b81072585ce36ccfb4493039f49f1d3cae8c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", size = 143310, upload-time = "2025-10-14T04:40:43.439Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/fe/43dae6144a7e07b87478fdfc4dbe9efd5defb0e7ec29f5f58a55aeef7bf7/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", size = 162022, upload-time = "2025-10-14T04:40:44.547Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/e6/7aab83774f5d2bca81f42ac58d04caf44f0cc2b65fc6db2b3b2e8a05f3b3/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", size = 149383, upload-time = "2025-10-14T04:40:46.018Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/4f/e8/b289173b4edae05c0dde07f69f8db476a0b511eac556dfe0d6bda3c43384/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", size = 159098, upload-time = "2025-10-14T04:40:47.081Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/df/fe699727754cae3f8478493c7f45f777b17c3ef0600e28abfec8619eb49c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", size = 152991, upload-time = "2025-10-14T04:40:48.246Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1a/86/584869fe4ddb6ffa3bd9f491b87a01568797fb9bd8933f557dba9771beaf/charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", size = 99456, upload-time = "2025-10-14T04:40:49.376Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/f6/62fdd5feb60530f50f7e38b4f6a1d5203f4d16ff4f9f0952962c044e919a/charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", size = 106978, upload-time = "2025-10-14T04:40:50.844Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/9d/0710916e6c82948b3be62d9d398cb4fcf4e97b56d6a6aeccd66c4b2f2bd5/charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", size = 99969, upload-time = "2025-10-14T04:40:52.272Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
]
[[package]]
name = "colorama"
version = "0.4.6"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
]
[[package]]
name = "exceptiongroup"
version = "1.3.1"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "typing-extensions" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
]
[[package]]
name = "idna"
version = "3.11"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
]
[[package]]
name = "iniconfig"
version = "2.3.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
]
[[package]]
name = "lark"
version = "1.3.1"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/34/28fff3ab31ccff1fd4f6c7c7b0ceb2b6968d8ea4950663eadcb5720591a0/lark-1.3.1.tar.gz", hash = "sha256:b426a7a6d6d53189d318f2b6236ab5d6429eaf09259f1ca33eb716eed10d2905", size = 382732, upload-time = "2025-10-27T18:25:56.653Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/3d/14ce75ef66813643812f3093ab17e46d3a206942ce7376d31ec2d36229e7/lark-1.3.1-py3-none-any.whl", hash = "sha256:c629b661023a014c37da873b4ff58a817398d12635d3bbb2c5a03be7fe5d1e12", size = 113151, upload-time = "2025-10-27T18:25:54.882Z" },
]
[[package]]
name = "packaging"
version = "25.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
]
[[package]]
name = "pluggy"
version = "1.6.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
]
[[package]]
name = "pycryptodomex"
version = "3.23.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c9/85/e24bf90972a30b0fcd16c73009add1d7d7cd9140c2498a68252028899e41/pycryptodomex-3.23.0.tar.gz", hash = "sha256:71909758f010c82bc99b0abf4ea12012c98962fbf0583c2164f8b84533c2e4da", size = 4922157, upload-time = "2025-05-17T17:23:41.434Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/dd/9c/1a8f35daa39784ed8adf93a694e7e5dc15c23c741bbda06e1d45f8979e9e/pycryptodomex-3.23.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:06698f957fe1ab229a99ba2defeeae1c09af185baa909a31a5d1f9d42b1aaed6", size = 2499240, upload-time = "2025-05-17T17:22:46.953Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/62/f5221a191a97157d240cf6643747558759126c76ee92f29a3f4aee3197a5/pycryptodomex-3.23.0-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b2c2537863eccef2d41061e82a881dcabb04944c5c06c5aa7110b577cc487545", size = 1644042, upload-time = "2025-05-17T17:22:49.098Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8c/fd/5a054543c8988d4ed7b612721d7e78a4b9bf36bc3c5ad45ef45c22d0060e/pycryptodomex-3.23.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:43c446e2ba8df8889e0e16f02211c25b4934898384c1ec1ec04d7889c0333587", size = 2186227, upload-time = "2025-05-17T17:22:51.139Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/a9/8862616a85cf450d2822dbd4fff1fcaba90877907a6ff5bc2672cafe42f8/pycryptodomex-3.23.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f489c4765093fb60e2edafdf223397bc716491b2b69fe74367b70d6999257a5c", size = 2272578, upload-time = "2025-05-17T17:22:53.676Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/9f/bda9c49a7c1842820de674ab36c79f4fbeeee03f8ff0e4f3546c3889076b/pycryptodomex-3.23.0-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bdc69d0d3d989a1029df0eed67cc5e8e5d968f3724f4519bd03e0ec68df7543c", size = 2312166, upload-time = "2025-05-17T17:22:56.585Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/cc/870b9bf8ca92866ca0186534801cf8d20554ad2a76ca959538041b7a7cf4/pycryptodomex-3.23.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6bbcb1dd0f646484939e142462d9e532482bc74475cecf9c4903d4e1cd21f003", size = 2185467, upload-time = "2025-05-17T17:22:59.237Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/96/e3/ce9348236d8e669fea5dd82a90e86be48b9c341210f44e25443162aba187/pycryptodomex-3.23.0-cp37-abi3-musllinux_1_2_i686.whl", hash = "sha256:8a4fcd42ccb04c31268d1efeecfccfd1249612b4de6374205376b8f280321744", size = 2346104, upload-time = "2025-05-17T17:23:02.112Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a5/e9/e869bcee87beb89040263c416a8a50204f7f7a83ac11897646c9e71e0daf/pycryptodomex-3.23.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:55ccbe27f049743a4caf4f4221b166560d3438d0b1e5ab929e07ae1702a4d6fd", size = 2271038, upload-time = "2025-05-17T17:23:04.872Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8d/67/09ee8500dd22614af5fbaa51a4aee6e342b5fa8aecf0a6cb9cbf52fa6d45/pycryptodomex-3.23.0-cp37-abi3-win32.whl", hash = "sha256:189afbc87f0b9f158386bf051f720e20fa6145975f1e76369303d0f31d1a8d7c", size = 1771969, upload-time = "2025-05-17T17:23:07.115Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/96/11f36f71a865dd6df03716d33bd07a67e9d20f6b8d39820470b766af323c/pycryptodomex-3.23.0-cp37-abi3-win_amd64.whl", hash = "sha256:52e5ca58c3a0b0bd5e100a9fbc8015059b05cffc6c66ce9d98b4b45e023443b9", size = 1803124, upload-time = "2025-05-17T17:23:09.267Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/93/45c1cdcbeb182ccd2e144c693eaa097763b08b38cded279f0053ed53c553/pycryptodomex-3.23.0-cp37-abi3-win_arm64.whl", hash = "sha256:02d87b80778c171445d67e23d1caef279bf4b25c3597050ccd2e13970b57fd51", size = 1707161, upload-time = "2025-05-17T17:23:11.414Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/b8/3e76d948c3c4ac71335bbe75dac53e154b40b0f8f1f022dfa295257a0c96/pycryptodomex-3.23.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ebfff755c360d674306e5891c564a274a47953562b42fb74a5c25b8fc1fb1cb5", size = 1627695, upload-time = "2025-05-17T17:23:17.38Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6a/cf/80f4297a4820dfdfd1c88cf6c4666a200f204b3488103d027b5edd9176ec/pycryptodomex-3.23.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eca54f4bb349d45afc17e3011ed4264ef1cc9e266699874cdd1349c504e64798", size = 1675772, upload-time = "2025-05-17T17:23:19.202Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d1/42/1e969ee0ad19fe3134b0e1b856c39bd0b70d47a4d0e81c2a8b05727394c9/pycryptodomex-3.23.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2596e643d4365e14d0879dc5aafe6355616c61c2176009270f3048f6d9a61f", size = 1668083, upload-time = "2025-05-17T17:23:21.867Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/c3/1de4f7631fea8a992a44ba632aa40e0008764c0fb9bf2854b0acf78c2cf2/pycryptodomex-3.23.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fdfac7cda115bca3a5abb2f9e43bc2fb66c2b65ab074913643803ca7083a79ea", size = 1706056, upload-time = "2025-05-17T17:23:24.031Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/5f/af7da8e6f1e42b52f44a24d08b8e4c726207434e2593732d39e7af5e7256/pycryptodomex-3.23.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:14c37aaece158d0ace436f76a7bb19093db3b4deade9797abfc39ec6cd6cc2fe", size = 1806478, upload-time = "2025-05-17T17:23:26.066Z" },
]
[[package]]
name = "pygments"
version = "2.19.2"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
]
[[package]]
name = "pytest"
version = "9.0.1"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "exceptiongroup", marker = "python_full_version < '3.11'" },
{ name = "iniconfig" },
{ name = "packaging" },
{ name = "pluggy" },
{ name = "pygments" },
{ name = "tomli", marker = "python_full_version < '3.11'" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/56/f013048ac4bc4c1d9be45afd4ab209ea62822fb1598f40687e6bf45dcea4/pytest-9.0.1.tar.gz", hash = "sha256:3e9c069ea73583e255c3b21cf46b8d3c56f6e3a1a8f6da94ccb0fcf57b9d73c8", size = 1564125, upload-time = "2025-11-12T13:05:09.333Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/8b/6300fb80f858cda1c51ffa17075df5d846757081d11ab4aa35cef9e6258b/pytest-9.0.1-py3-none-any.whl", hash = "sha256:67be0030d194df2dfa7b556f2e56fb3c3315bd5c8822c6951162b92b32ce7dad", size = 373668, upload-time = "2025-11-12T13:05:07.379Z" },
]
[[package]]
name = "ragflow-cli"
version = "0.22.1"
source = { virtual = "." }
dependencies = [
{ name = "beartype" },
{ name = "lark" },
{ name = "pycryptodomex" },
{ name = "requests" },
]
[package.dev-dependencies]
test = [
{ name = "pytest" },
{ name = "requests" },
{ name = "requests-toolbelt" },
]
[package.metadata]
requires-dist = [
{ name = "beartype", specifier = ">=0.20.0,<1.0.0" },
{ name = "lark", specifier = ">=1.1.0" },
{ name = "pycryptodomex", specifier = ">=3.10.0" },
{ name = "requests", specifier = ">=2.30.0,<3.0.0" },
]
[package.metadata.requires-dev]
test = [
{ name = "pytest", specifier = ">=8.3.5" },
{ name = "requests", specifier = ">=2.32.3" },
{ name = "requests-toolbelt", specifier = ">=1.0.0" },
]
[[package]]
name = "requests"
version = "2.32.5"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "certifi" },
{ name = "charset-normalizer" },
{ name = "idna" },
{ name = "urllib3" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
]
[[package]]
name = "requests-toolbelt"
version = "1.0.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "requests" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888, upload-time = "2023-05-01T04:11:33.229Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" },
]
[[package]]
name = "tomli"
version = "2.3.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/ed/3f73f72945444548f33eba9a87fc7a6e969915e7b1acc8260b30e1f76a2f/tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549", size = 17392, upload-time = "2025-10-08T22:01:47.119Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/2e/299f62b401438d5fe1624119c723f5d877acc86a4c2492da405626665f12/tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45", size = 153236, upload-time = "2025-10-08T22:01:00.137Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/7f/d8fffe6a7aefdb61bced88fcb5e280cfd71e08939da5894161bd71bea022/tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba", size = 148084, upload-time = "2025-10-08T22:01:01.63Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/5c/24935fb6a2ee63e86d80e4d3b58b222dafaf438c416752c8b58537c8b89a/tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf", size = 234832, upload-time = "2025-10-08T22:01:02.543Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/89/da/75dfd804fc11e6612846758a23f13271b76d577e299592b4371a4ca4cd09/tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441", size = 242052, upload-time = "2025-10-08T22:01:03.836Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/8c/f48ac899f7b3ca7eb13af73bacbc93aec37f9c954df3c08ad96991c8c373/tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845", size = 239555, upload-time = "2025-10-08T22:01:04.834Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/28/72f8afd73f1d0e7829bfc093f4cb98ce0a40ffc0cc997009ee1ed94ba705/tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c", size = 245128, upload-time = "2025-10-08T22:01:05.84Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/eb/a7679c8ac85208706d27436e8d421dfa39d4c914dcf5fa8083a9305f58d9/tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456", size = 96445, upload-time = "2025-10-08T22:01:06.896Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0a/fe/3d3420c4cb1ad9cb462fb52967080575f15898da97e21cb6f1361d505383/tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be", size = 107165, upload-time = "2025-10-08T22:01:08.107Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/b7/40f36368fcabc518bb11c8f06379a0fd631985046c038aca08c6d6a43c6e/tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac", size = 154891, upload-time = "2025-10-08T22:01:09.082Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/3f/d9dd692199e3b3aab2e4e4dd948abd0f790d9ded8cd10cbaae276a898434/tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22", size = 148796, upload-time = "2025-10-08T22:01:10.266Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/83/59bff4996c2cf9f9387a0f5a3394629c7efa5ef16142076a23a90f1955fa/tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f", size = 242121, upload-time = "2025-10-08T22:01:11.332Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/45/e5/7c5119ff39de8693d6baab6c0b6dcb556d192c165596e9fc231ea1052041/tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52", size = 250070, upload-time = "2025-10-08T22:01:12.498Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/45/12/ad5126d3a278f27e6701abde51d342aa78d06e27ce2bb596a01f7709a5a2/tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8", size = 245859, upload-time = "2025-10-08T22:01:13.551Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/a1/4d6865da6a71c603cfe6ad0e6556c73c76548557a8d658f9e3b142df245f/tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6", size = 250296, upload-time = "2025-10-08T22:01:14.614Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/b7/a7a7042715d55c9ba6e8b196d65d2cb662578b4d8cd17d882d45322b0d78/tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876", size = 97124, upload-time = "2025-10-08T22:01:15.629Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/06/1e/f22f100db15a68b520664eb3328fb0ae4e90530887928558112c8d1f4515/tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878", size = 107698, upload-time = "2025-10-08T22:01:16.51Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/b8/0135fadc89e73be292b473cb820b4f5a08197779206b33191e801feeae40/tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b", size = 14408, upload-time = "2025-10-08T22:01:46.04Z" },
]
[[package]]
name = "typing-extensions"
version = "4.15.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
]
[[package]]
name = "urllib3"
version = "2.5.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
]

View File

@ -20,6 +20,7 @@ import logging
import time import time
import threading import threading
import traceback import traceback
import faulthandler
from flask import Flask from flask import Flask
from flask_login import LoginManager from flask_login import LoginManager
@ -37,6 +38,7 @@ from common.versions import get_ragflow_version
stop_event = threading.Event() stop_event = threading.Event()
if __name__ == '__main__': if __name__ == '__main__':
faulthandler.enable()
init_root_logger("admin_service") init_root_logger("admin_service")
logging.info(r""" logging.info(r"""
____ ___ ______________ ___ __ _ ____ ___ ______________ ___ __ _

View File

@ -1289,7 +1289,7 @@ async def update_chunk(tenant_id, dataset_id, document_id, chunk_id):
return get_error_data_result(message=f"You don't own the document {document_id}.") return get_error_data_result(message=f"You don't own the document {document_id}.")
doc = doc[0] doc = doc[0]
req = await request_json() req = await request_json()
if "content" in req: if "content" in req and req["content"] is not None:
content = req["content"] content = req["content"]
else: else:
content = chunk.get("content_with_weight", "") content = chunk.get("content_with_weight", "")

View File

@ -20,7 +20,6 @@
from common.log_utils import init_root_logger from common.log_utils import init_root_logger
from plugin import GlobalPluginManager from plugin import GlobalPluginManager
init_root_logger("ragflow_server")
import logging import logging
import os import os
@ -30,6 +29,7 @@ import time
import traceback import traceback
import threading import threading
import uuid import uuid
import faulthandler
from api.apps import app, smtp_mail_server from api.apps import app, smtp_mail_server
from api.db.runtime_config import RuntimeConfig from api.db.runtime_config import RuntimeConfig
@ -73,6 +73,8 @@ def signal_handler(sig, frame):
sys.exit(0) sys.exit(0)
if __name__ == '__main__': if __name__ == '__main__':
faulthandler.enable()
init_root_logger("ragflow_server")
logging.info(r""" logging.info(r"""
____ ___ ______ ______ __ ____ ___ ______ ______ __
/ __ \ / | / ____// ____// /____ _ __ / __ \ / | / ____// ____// /____ _ __

View File

@ -89,7 +89,8 @@ def get_data_error_result(code=RetCode.DATA_ERROR, message="Sorry! Data missing!
def server_error_response(e): def server_error_response(e):
logging.exception(e) # Quart invokes this handler outside the original except block, so we must pass exc_info manually.
logging.error("Unhandled exception during request", exc_info=(type(e), e, e.__traceback__))
try: try:
msg = repr(e).lower() msg = repr(e).lower()
if getattr(e, "code", None) == 401 or ("unauthorized" in msg) or ("401" in msg): if getattr(e, "code", None) == 401 or ("unauthorized" in msg) or ("401" in msg):

View File

@ -118,6 +118,7 @@ class FileSource(StrEnum):
SHAREPOINT = "sharepoint" SHAREPOINT = "sharepoint"
SLACK = "slack" SLACK = "slack"
TEAMS = "teams" TEAMS = "teams"
WEBDAV = "webdav"
MOODLE = "moodle" MOODLE = "moodle"
DROPBOX = "dropbox" DROPBOX = "dropbox"

View File

@ -14,6 +14,7 @@ from .google_drive.connector import GoogleDriveConnector
from .jira.connector import JiraConnector from .jira.connector import JiraConnector
from .sharepoint_connector import SharePointConnector from .sharepoint_connector import SharePointConnector
from .teams_connector import TeamsConnector from .teams_connector import TeamsConnector
from .webdav_connector import WebDAVConnector
from .moodle_connector import MoodleConnector from .moodle_connector import MoodleConnector
from .config import BlobType, DocumentSource from .config import BlobType, DocumentSource
from .models import Document, TextSection, ImageSection, BasicExpertInfo from .models import Document, TextSection, ImageSection, BasicExpertInfo
@ -37,6 +38,7 @@ __all__ = [
"JiraConnector", "JiraConnector",
"SharePointConnector", "SharePointConnector",
"TeamsConnector", "TeamsConnector",
"WebDAVConnector",
"MoodleConnector", "MoodleConnector",
"BlobType", "BlobType",
"DocumentSource", "DocumentSource",

View File

@ -48,6 +48,7 @@ class DocumentSource(str, Enum):
GOOGLE_DRIVE = "google_drive" GOOGLE_DRIVE = "google_drive"
GMAIL = "gmail" GMAIL = "gmail"
DISCORD = "discord" DISCORD = "discord"
WEBDAV = "webdav"
MOODLE = "moodle" MOODLE = "moodle"
S3_COMPATIBLE = "s3_compatible" S3_COMPATIBLE = "s3_compatible"
DROPBOX = "dropbox" DROPBOX = "dropbox"

View File

@ -0,0 +1,370 @@
"""WebDAV connector"""
import logging
import os
from datetime import datetime, timezone
from typing import Any, Optional
from webdav4.client import Client as WebDAVClient
from common.data_source.utils import (
get_file_ext,
)
from common.data_source.config import DocumentSource, INDEX_BATCH_SIZE, BLOB_STORAGE_SIZE_THRESHOLD
from common.data_source.exceptions import (
ConnectorMissingCredentialError,
ConnectorValidationError,
CredentialExpiredError,
InsufficientPermissionsError
)
from common.data_source.interfaces import LoadConnector, PollConnector
from common.data_source.models import Document, SecondsSinceUnixEpoch, GenerateDocumentsOutput
class WebDAVConnector(LoadConnector, PollConnector):
"""WebDAV connector for syncing files from WebDAV servers"""
def __init__(
self,
base_url: str,
remote_path: str = "/",
batch_size: int = INDEX_BATCH_SIZE,
) -> None:
"""Initialize WebDAV connector
Args:
base_url: Base URL of the WebDAV server (e.g., "https://webdav.example.com")
remote_path: Remote path to sync from (default: "/")
batch_size: Number of documents per batch
"""
self.base_url = base_url.rstrip("/")
if not remote_path:
remote_path = "/"
if not remote_path.startswith("/"):
remote_path = f"/{remote_path}"
if remote_path.endswith("/") and remote_path != "/":
remote_path = remote_path.rstrip("/")
self.remote_path = remote_path
self.batch_size = batch_size
self.client: Optional[WebDAVClient] = None
self._allow_images: bool | None = None
self.size_threshold: int | None = BLOB_STORAGE_SIZE_THRESHOLD
def set_allow_images(self, allow_images: bool) -> None:
"""Set whether to process images"""
logging.info(f"Setting allow_images to {allow_images}.")
self._allow_images = allow_images
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
"""Load credentials and initialize WebDAV client
Args:
credentials: Dictionary containing 'username' and 'password'
Returns:
None
Raises:
ConnectorMissingCredentialError: If required credentials are missing
"""
logging.debug(f"Loading credentials for WebDAV server {self.base_url}")
username = credentials.get("username")
password = credentials.get("password")
if not username or not password:
raise ConnectorMissingCredentialError(
"WebDAV requires 'username' and 'password' credentials"
)
try:
# Initialize WebDAV client
self.client = WebDAVClient(
base_url=self.base_url,
auth=(username, password)
)
# Test connection
self.client.exists(self.remote_path)
except Exception as e:
logging.error(f"Failed to connect to WebDAV server: {e}")
raise ConnectorMissingCredentialError(
f"Failed to authenticate with WebDAV server: {e}"
)
return None
def _list_files_recursive(
self,
path: str,
start: datetime,
end: datetime,
) -> list[tuple[str, dict]]:
"""Recursively list all files in the given path
Args:
path: Path to list files from
start: Start datetime for filtering
end: End datetime for filtering
Returns:
List of tuples containing (file_path, file_info)
"""
if self.client is None:
raise ConnectorMissingCredentialError("WebDAV client not initialized")
files = []
try:
logging.debug(f"Listing directory: {path}")
for item in self.client.ls(path, detail=True):
item_path = item['name']
if item_path == path or item_path == path + '/':
continue
logging.debug(f"Found item: {item_path}, type: {item.get('type')}")
if item.get('type') == 'directory':
try:
files.extend(self._list_files_recursive(item_path, start, end))
except Exception as e:
logging.error(f"Error recursing into directory {item_path}: {e}")
continue
else:
try:
modified_time = item.get('modified')
if modified_time:
if isinstance(modified_time, datetime):
modified = modified_time
if modified.tzinfo is None:
modified = modified.replace(tzinfo=timezone.utc)
elif isinstance(modified_time, str):
try:
modified = datetime.strptime(modified_time, '%a, %d %b %Y %H:%M:%S %Z')
modified = modified.replace(tzinfo=timezone.utc)
except (ValueError, TypeError):
try:
modified = datetime.fromisoformat(modified_time.replace('Z', '+00:00'))
except (ValueError, TypeError):
logging.warning(f"Could not parse modified time for {item_path}: {modified_time}")
modified = datetime.now(timezone.utc)
else:
modified = datetime.now(timezone.utc)
else:
modified = datetime.now(timezone.utc)
logging.debug(f"File {item_path}: modified={modified}, start={start}, end={end}, include={start < modified <= end}")
if start < modified <= end:
files.append((item_path, item))
else:
logging.debug(f"File {item_path} filtered out by time range")
except Exception as e:
logging.error(f"Error processing file {item_path}: {e}")
continue
except Exception as e:
logging.error(f"Error listing directory {path}: {e}")
return files
def _yield_webdav_documents(
self,
start: datetime,
end: datetime,
) -> GenerateDocumentsOutput:
"""Generate documents from WebDAV server
Args:
start: Start datetime for filtering
end: End datetime for filtering
Yields:
Batches of documents
"""
if self.client is None:
raise ConnectorMissingCredentialError("WebDAV client not initialized")
logging.info(f"Searching for files in {self.remote_path} between {start} and {end}")
files = self._list_files_recursive(self.remote_path, start, end)
logging.info(f"Found {len(files)} files matching time criteria")
batch: list[Document] = []
for file_path, file_info in files:
file_name = os.path.basename(file_path)
size_bytes = file_info.get('size', 0)
if (
self.size_threshold is not None
and isinstance(size_bytes, int)
and size_bytes > self.size_threshold
):
logging.warning(
f"{file_name} exceeds size threshold of {self.size_threshold}. Skipping."
)
continue
try:
logging.debug(f"Downloading file: {file_path}")
from io import BytesIO
buffer = BytesIO()
self.client.download_fileobj(file_path, buffer)
blob = buffer.getvalue()
if blob is None or len(blob) == 0:
logging.warning(f"Downloaded content is empty for {file_path}")
continue
modified_time = file_info.get('modified')
if modified_time:
if isinstance(modified_time, datetime):
modified = modified_time
if modified.tzinfo is None:
modified = modified.replace(tzinfo=timezone.utc)
elif isinstance(modified_time, str):
try:
modified = datetime.strptime(modified_time, '%a, %d %b %Y %H:%M:%S %Z')
modified = modified.replace(tzinfo=timezone.utc)
except (ValueError, TypeError):
try:
modified = datetime.fromisoformat(modified_time.replace('Z', '+00:00'))
except (ValueError, TypeError):
logging.warning(f"Could not parse modified time for {file_path}: {modified_time}")
modified = datetime.now(timezone.utc)
else:
modified = datetime.now(timezone.utc)
else:
modified = datetime.now(timezone.utc)
batch.append(
Document(
id=f"webdav:{self.base_url}:{file_path}",
blob=blob,
source=DocumentSource.WEBDAV,
semantic_identifier=file_name,
extension=get_file_ext(file_name),
doc_updated_at=modified,
size_bytes=size_bytes if size_bytes else 0
)
)
if len(batch) == self.batch_size:
yield batch
batch = []
except Exception as e:
logging.exception(f"Error downloading file {file_path}: {e}")
if batch:
yield batch
def load_from_state(self) -> GenerateDocumentsOutput:
"""Load all documents from WebDAV server
Yields:
Batches of documents
"""
logging.debug(f"Loading documents from WebDAV server {self.base_url}")
return self._yield_webdav_documents(
start=datetime(1970, 1, 1, tzinfo=timezone.utc),
end=datetime.now(timezone.utc),
)
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
"""Poll WebDAV server for updated documents
Args:
start: Start timestamp (seconds since Unix epoch)
end: End timestamp (seconds since Unix epoch)
Yields:
Batches of documents
"""
if self.client is None:
raise ConnectorMissingCredentialError("WebDAV client not initialized")
start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)
for batch in self._yield_webdav_documents(start_datetime, end_datetime):
yield batch
def validate_connector_settings(self) -> None:
"""Validate WebDAV connector settings
Raises:
ConnectorMissingCredentialError: If credentials are not loaded
ConnectorValidationError: If settings are invalid
"""
if self.client is None:
raise ConnectorMissingCredentialError(
"WebDAV credentials not loaded."
)
if not self.base_url:
raise ConnectorValidationError(
"No base URL was provided in connector settings."
)
try:
if not self.client.exists(self.remote_path):
raise ConnectorValidationError(
f"Remote path '{self.remote_path}' does not exist on WebDAV server."
)
except Exception as e:
error_message = str(e)
if "401" in error_message or "unauthorized" in error_message.lower():
raise CredentialExpiredError(
"WebDAV credentials appear invalid or expired."
)
if "403" in error_message or "forbidden" in error_message.lower():
raise InsufficientPermissionsError(
f"Insufficient permissions to access path '{self.remote_path}' on WebDAV server."
)
if "404" in error_message or "not found" in error_message.lower():
raise ConnectorValidationError(
f"Remote path '{self.remote_path}' does not exist on WebDAV server."
)
raise ConnectorValidationError(
f"Unexpected WebDAV client error: {e}"
)
if __name__ == "__main__":
credentials_dict = {
"username": os.environ.get("WEBDAV_USERNAME"),
"password": os.environ.get("WEBDAV_PASSWORD"),
}
connector = WebDAVConnector(
base_url=os.environ.get("WEBDAV_URL") or "https://webdav.example.com",
remote_path=os.environ.get("WEBDAV_PATH") or "/",
)
try:
connector.load_credentials(credentials_dict)
connector.validate_connector_settings()
document_batch_generator = connector.load_from_state()
for document_batch in document_batch_generator:
print("First batch of documents:")
for doc in document_batch:
print(f"Document ID: {doc.id}")
print(f"Semantic Identifier: {doc.semantic_identifier}")
print(f"Source: {doc.source}")
print(f"Updated At: {doc.doc_updated_at}")
print("---")
break
except ConnectorMissingCredentialError as e:
print(f"Error: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")

View File

@ -74,6 +74,8 @@ GITHUB_OAUTH = None
FEISHU_OAUTH = None FEISHU_OAUTH = None
OAUTH_CONFIG = None OAUTH_CONFIG = None
DOC_ENGINE = os.getenv('DOC_ENGINE', 'elasticsearch') DOC_ENGINE = os.getenv('DOC_ENGINE', 'elasticsearch')
DOC_ENGINE_INFINITY = (DOC_ENGINE.lower() == "infinity")
docStoreConn = None docStoreConn = None
@ -229,9 +231,9 @@ def init_settings():
FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu") FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu")
OAUTH_CONFIG = get_base_config("oauth", {}) OAUTH_CONFIG = get_base_config("oauth", {})
global DOC_ENGINE, docStoreConn, ES, OB, OS, INFINITY global DOC_ENGINE, DOC_ENGINE_INFINITY, docStoreConn, ES, OB, OS, INFINITY
DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch") DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch")
# DOC_ENGINE = os.environ.get('DOC_ENGINE', "opensearch") DOC_ENGINE_INFINITY = (DOC_ENGINE.lower() == "infinity")
lower_case_doc_engine = DOC_ENGINE.lower() lower_case_doc_engine = DOC_ENGINE.lower()
if lower_case_doc_engine == "elasticsearch": if lower_case_doc_engine == "elasticsearch":
ES = get_base_config("es", {}) ES = get_base_config("es", {})

View File

@ -5,20 +5,13 @@
"create_time": {"type": "varchar", "default": ""}, "create_time": {"type": "varchar", "default": ""},
"create_timestamp_flt": {"type": "float", "default": 0.0}, "create_timestamp_flt": {"type": "float", "default": 0.0},
"img_id": {"type": "varchar", "default": ""}, "img_id": {"type": "varchar", "default": ""},
"docnm_kwd": {"type": "varchar", "default": ""}, "docnm": {"type": "varchar", "default": "", "analyzer": ["rag-coarse", "rag-fine"], "comment": "docnm_kwd, title_tks, title_sm_tks"},
"title_tks": {"type": "varchar", "default": "", "analyzer": "whitespace"},
"title_sm_tks": {"type": "varchar", "default": "", "analyzer": "whitespace"},
"name_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}, "name_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
"important_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
"tag_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}, "tag_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
"important_tks": {"type": "varchar", "default": "", "analyzer": "whitespace"}, "important_keywords": {"type": "varchar", "default": "", "analyzer": ["rag-coarse", "rag-fine"], "comment": "important_kwd, important_tks"},
"question_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}, "questions": {"type": "varchar", "default": "", "analyzer": ["rag-coarse", "rag-fine"], "comment": "question_kwd, question_tks"},
"question_tks": {"type": "varchar", "default": "", "analyzer": "whitespace"}, "content": {"type": "varchar", "default": "", "analyzer": ["rag-coarse", "rag-fine"], "comment": "content_with_weight, content_ltks, content_sm_ltks"},
"content_with_weight": {"type": "varchar", "default": ""}, "authors": {"type": "varchar", "default": "", "analyzer": ["rag-coarse", "rag-fine"], "comment": "authors_tks, authors_sm_tks"},
"content_ltks": {"type": "varchar", "default": "", "analyzer": "whitespace"},
"content_sm_ltks": {"type": "varchar", "default": "", "analyzer": "whitespace"},
"authors_tks": {"type": "varchar", "default": "", "analyzer": "whitespace"},
"authors_sm_tks": {"type": "varchar", "default": "", "analyzer": "whitespace"},
"page_num_int": {"type": "varchar", "default": ""}, "page_num_int": {"type": "varchar", "default": ""},
"top_int": {"type": "varchar", "default": ""}, "top_int": {"type": "varchar", "default": ""},
"position_int": {"type": "varchar", "default": ""}, "position_int": {"type": "varchar", "default": ""},

View File

@ -54,4 +54,3 @@ memindex_memory_quota = "1GB"
wal_dir = "/var/infinity/wal" wal_dir = "/var/infinity/wal"
[resource] [resource]
resource_dir = "/var/infinity/resource"

View File

@ -116,6 +116,7 @@ dependencies = [
"google-genai>=1.41.0,<2.0.0", "google-genai>=1.41.0,<2.0.0",
"volcengine==1.0.194", "volcengine==1.0.194",
"voyageai==0.2.3", "voyageai==0.2.3",
"webdav4>=0.10.0,<0.11.0",
"webdriver-manager==4.0.1", "webdriver-manager==4.0.1",
"werkzeug==3.0.6", "werkzeug==3.0.6",
"wikipedia==1.4.0", "wikipedia==1.4.0",
@ -151,6 +152,7 @@ dependencies = [
"moodlepy>=0.23.0", "moodlepy>=0.23.0",
"pypandoc>=1.16", "pypandoc>=1.16",
"pyobvector==0.2.18", "pyobvector==0.2.18",
"exceptiongroup>=1.3.0,<2.0.0"
] ]
[dependency-groups] [dependency-groups]

View File

@ -26,6 +26,7 @@ from hanziconv import HanziConv
from nltk import word_tokenize from nltk import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer from nltk.stem import PorterStemmer, WordNetLemmatizer
from common.file_utils import get_project_base_directory from common.file_utils import get_project_base_directory
from common import settings
class RagTokenizer: class RagTokenizer:
@ -38,7 +39,7 @@ class RagTokenizer:
def _load_dict(self, fnm): def _load_dict(self, fnm):
logging.info(f"[HUQIE]:Build trie from {fnm}") logging.info(f"[HUQIE]:Build trie from {fnm}")
try: try:
of = open(fnm, "r", encoding='utf-8') of = open(fnm, "r", encoding="utf-8")
while True: while True:
line = of.readline() line = of.readline()
if not line: if not line:
@ -46,7 +47,7 @@ class RagTokenizer:
line = re.sub(r"[\r\n]+", "", line) line = re.sub(r"[\r\n]+", "", line)
line = re.split(r"[ \t]", line) line = re.split(r"[ \t]", line)
k = self.key_(line[0]) k = self.key_(line[0])
F = int(math.log(float(line[1]) / self.DENOMINATOR) + .5) F = int(math.log(float(line[1]) / self.DENOMINATOR) + 0.5)
if k not in self.trie_ or self.trie_[k][0] < F: if k not in self.trie_ or self.trie_[k][0] < F:
self.trie_[self.key_(line[0])] = (F, line[2]) self.trie_[self.key_(line[0])] = (F, line[2])
self.trie_[self.rkey_(line[0])] = 1 self.trie_[self.rkey_(line[0])] = 1
@ -106,8 +107,8 @@ class RagTokenizer:
if inside_code == 0x3000: if inside_code == 0x3000:
inside_code = 0x0020 inside_code = 0x0020
else: else:
inside_code -= 0xfee0 inside_code -= 0xFEE0
if inside_code < 0x0020 or inside_code > 0x7e: # After the conversion, if it's not a half-width character, return the original character. if inside_code < 0x0020 or inside_code > 0x7E: # After the conversion, if it's not a half-width character, return the original character.
rstring += uchar rstring += uchar
else: else:
rstring += chr(inside_code) rstring += chr(inside_code)
@ -124,7 +125,7 @@ class RagTokenizer:
if s < len(chars): if s < len(chars):
copy_pretks = copy.deepcopy(preTks) copy_pretks = copy.deepcopy(preTks)
remaining = "".join(chars[s:]) remaining = "".join(chars[s:])
copy_pretks.append((remaining, (-12, ''))) copy_pretks.append((remaining, (-12, "")))
tkslist.append(copy_pretks) tkslist.append(copy_pretks)
return s return s
@ -155,7 +156,7 @@ class RagTokenizer:
if k in self.trie_: if k in self.trie_:
copy_pretks.append((t, self.trie_[k])) copy_pretks.append((t, self.trie_[k]))
else: else:
copy_pretks.append((t, (-12, ''))) copy_pretks.append((t, (-12, "")))
next_res = self.dfs_(chars, mid, copy_pretks, tkslist, _depth + 1, _memo) next_res = self.dfs_(chars, mid, copy_pretks, tkslist, _depth + 1, _memo)
res = max(res, next_res) res = max(res, next_res)
_memo[state_key] = res _memo[state_key] = res
@ -163,12 +164,12 @@ class RagTokenizer:
S = s + 1 S = s + 1
if s + 2 <= len(chars): if s + 2 <= len(chars):
t1 = "".join(chars[s:s + 1]) t1 = "".join(chars[s : s + 1])
t2 = "".join(chars[s:s + 2]) t2 = "".join(chars[s : s + 2])
if self.trie_.has_keys_with_prefix(self.key_(t1)) and not self.trie_.has_keys_with_prefix(self.key_(t2)): if self.trie_.has_keys_with_prefix(self.key_(t1)) and not self.trie_.has_keys_with_prefix(self.key_(t2)):
S = s + 2 S = s + 2
if len(preTks) > 2 and len(preTks[-1][0]) == 1 and len(preTks[-2][0]) == 1 and len(preTks[-3][0]) == 1: if len(preTks) > 2 and len(preTks[-1][0]) == 1 and len(preTks[-2][0]) == 1 and len(preTks[-3][0]) == 1:
t1 = preTks[-1][0] + "".join(chars[s:s + 1]) t1 = preTks[-1][0] + "".join(chars[s : s + 1])
if self.trie_.has_keys_with_prefix(self.key_(t1)): if self.trie_.has_keys_with_prefix(self.key_(t1)):
S = s + 2 S = s + 2
@ -186,13 +187,13 @@ class RagTokenizer:
_memo[state_key] = res _memo[state_key] = res
return res return res
t = "".join(chars[s:s + 1]) t = "".join(chars[s : s + 1])
k = self.key_(t) k = self.key_(t)
copy_pretks = copy.deepcopy(preTks) copy_pretks = copy.deepcopy(preTks)
if k in self.trie_: if k in self.trie_:
copy_pretks.append((t, self.trie_[k])) copy_pretks.append((t, self.trie_[k]))
else: else:
copy_pretks.append((t, (-12, ''))) copy_pretks.append((t, (-12, "")))
result = self.dfs_(chars, s + 1, copy_pretks, tkslist, _depth + 1, _memo) result = self.dfs_(chars, s + 1, copy_pretks, tkslist, _depth + 1, _memo)
_memo[state_key] = result _memo[state_key] = result
return result return result
@ -216,7 +217,7 @@ class RagTokenizer:
F += freq F += freq
L += 0 if len(tk) < 2 else 1 L += 0 if len(tk) < 2 else 1
tks.append(tk) tks.append(tk)
#F /= len(tks) # F /= len(tks)
L /= len(tks) L /= len(tks)
logging.debug("[SC] {} {} {} {} {}".format(tks, len(tks), L, F, B / len(tks) + L + F)) logging.debug("[SC] {} {} {} {} {}".format(tks, len(tks), L, F, B / len(tks) + L + F))
return tks, B / len(tks) + L + F return tks, B / len(tks) + L + F
@ -252,8 +253,7 @@ class RagTokenizer:
while s < len(line): while s < len(line):
e = s + 1 e = s + 1
t = line[s:e] t = line[s:e]
while e < len(line) and self.trie_.has_keys_with_prefix( while e < len(line) and self.trie_.has_keys_with_prefix(self.key_(t)):
self.key_(t)):
e += 1 e += 1
t = line[s:e] t = line[s:e]
@ -264,7 +264,7 @@ class RagTokenizer:
if self.key_(t) in self.trie_: if self.key_(t) in self.trie_:
res.append((t, self.trie_[self.key_(t)])) res.append((t, self.trie_[self.key_(t)]))
else: else:
res.append((t, (0, ''))) res.append((t, (0, "")))
s = e s = e
@ -287,7 +287,7 @@ class RagTokenizer:
if self.key_(t) in self.trie_: if self.key_(t) in self.trie_:
res.append((t, self.trie_[self.key_(t)])) res.append((t, self.trie_[self.key_(t)]))
else: else:
res.append((t, (0, ''))) res.append((t, (0, "")))
s -= 1 s -= 1
@ -310,28 +310,29 @@ class RagTokenizer:
if _zh == zh: if _zh == zh:
e += 1 e += 1
continue continue
txt_lang_pairs.append((a[s: e], zh)) txt_lang_pairs.append((a[s:e], zh))
s = e s = e
e = s + 1 e = s + 1
zh = _zh zh = _zh
if s >= len(a): if s >= len(a):
continue continue
txt_lang_pairs.append((a[s: e], zh)) txt_lang_pairs.append((a[s:e], zh))
return txt_lang_pairs return txt_lang_pairs
def tokenize(self, line): def tokenize(self, line: str) -> str:
if settings.DOC_ENGINE_INFINITY:
return line
line = re.sub(r"\W+", " ", line) line = re.sub(r"\W+", " ", line)
line = self._strQ2B(line).lower() line = self._strQ2B(line).lower()
line = self._tradi2simp(line) line = self._tradi2simp(line)
arr = self._split_by_lang(line) arr = self._split_by_lang(line)
res = [] res = []
for L,lang in arr: for L, lang in arr:
if not lang: if not lang:
res.extend([self.stemmer.stem(self.lemmatizer.lemmatize(t)) for t in word_tokenize(L)]) res.extend([self.stemmer.stem(self.lemmatizer.lemmatize(t)) for t in word_tokenize(L)])
continue continue
if len(L) < 2 or re.match( if len(L) < 2 or re.match(r"[a-z\.-]+$", L) or re.match(r"[0-9\.-]+$", L):
r"[a-z\.-]+$", L) or re.match(r"[0-9\.-]+$", L):
res.append(L) res.append(L)
continue continue
@ -347,7 +348,7 @@ class RagTokenizer:
while i + same < len(tks1) and j + same < len(tks) and tks1[i + same] == tks[j + same]: while i + same < len(tks1) and j + same < len(tks) and tks1[i + same] == tks[j + same]:
same += 1 same += 1
if same > 0: if same > 0:
res.append(" ".join(tks[j: j + same])) res.append(" ".join(tks[j : j + same]))
_i = i + same _i = i + same
_j = j + same _j = j + same
j = _j + 1 j = _j + 1
@ -374,7 +375,7 @@ class RagTokenizer:
same = 1 same = 1
while i + same < len(tks1) and j + same < len(tks) and tks1[i + same] == tks[j + same]: while i + same < len(tks1) and j + same < len(tks) and tks1[i + same] == tks[j + same]:
same += 1 same += 1
res.append(" ".join(tks[j: j + same])) res.append(" ".join(tks[j : j + same]))
_i = i + same _i = i + same
_j = j + same _j = j + same
j = _j + 1 j = _j + 1
@ -391,7 +392,9 @@ class RagTokenizer:
logging.debug("[TKS] {}".format(self.merge_(res))) logging.debug("[TKS] {}".format(self.merge_(res)))
return self.merge_(res) return self.merge_(res)
def fine_grained_tokenize(self, tks): def fine_grained_tokenize(self, tks: str) -> str:
if settings.DOC_ENGINE_INFINITY:
return tks
tks = tks.split() tks = tks.split()
zh_num = len([1 for c in tks if c and is_chinese(c[0])]) zh_num = len([1 for c in tks if c and is_chinese(c[0])])
if zh_num < len(tks) * 0.2: if zh_num < len(tks) * 0.2:
@ -433,21 +436,21 @@ class RagTokenizer:
def is_chinese(s): def is_chinese(s):
if s >= u'\u4e00' and s <= u'\u9fa5': if s >= "\u4e00" and s <= "\u9fa5":
return True return True
else: else:
return False return False
def is_number(s): def is_number(s):
if s >= u'\u0030' and s <= u'\u0039': if s >= "\u0030" and s <= "\u0039":
return True return True
else: else:
return False return False
def is_alphabet(s): def is_alphabet(s):
if (u'\u0041' <= s <= u'\u005a') or (u'\u0061' <= s <= u'\u007a'): if ("\u0041" <= s <= "\u005a") or ("\u0061" <= s <= "\u007a"):
return True return True
else: else:
return False return False
@ -456,8 +459,7 @@ def is_alphabet(s):
def naive_qie(txt): def naive_qie(txt):
tks = [] tks = []
for t in txt.split(): for t in txt.split():
if tks and re.match(r".*[a-zA-Z]$", tks[-1] if tks and re.match(r".*[a-zA-Z]$", tks[-1]) and re.match(r".*[a-zA-Z]$", t):
) and re.match(r".*[a-zA-Z]$", t):
tks.append(" ") tks.append(" ")
tks.append(t) tks.append(t)
return tks return tks
@ -473,43 +475,35 @@ add_user_dict = tokenizer.add_user_dict
tradi2simp = tokenizer._tradi2simp tradi2simp = tokenizer._tradi2simp
strQ2B = tokenizer._strQ2B strQ2B = tokenizer._strQ2B
if __name__ == '__main__': if __name__ == "__main__":
tknzr = RagTokenizer(debug=True) tknzr = RagTokenizer(debug=True)
# huqie.add_user_dict("/tmp/tmp.new.tks.dict") # huqie.add_user_dict("/tmp/tmp.new.tks.dict")
tks = tknzr.tokenize( texts = [
"哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈") "over_the_past.pdf",
logging.info(tknzr.fine_grained_tokenize(tks)) "哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈",
tks = tknzr.tokenize( "公开征求意见稿提出,境外投资者可使用自有人民币或外汇投资。使用外汇投资的,可通过债券持有人在香港人民币业务清算行及香港地区经批准可进入境内银行间外汇市场进行交易的境外人民币业务参加行(以下统称香港结算行)办理外汇资金兑换。香港结算行由此所产生的头寸可到境内银行间外汇市场平盘。使用外汇投资的,在其投资的债券到期或卖出后,原则上应兑换回外汇。",
"公开征求意见稿提出,境外投资者可使用自有人民币或外汇投资。使用外汇投资的,可通过债券持有人在香港人民币业务清算行及香港地区经批准可进入境内银行间外汇市场进行交易的境外人民币业务参加行(以下统称香港结算行)办理外汇资金兑换。香港结算行由此所产生的头寸可到境内银行间外汇市场平盘。使用外汇投资的,在其投资的债券到期或卖出后,原则上应兑换回外汇。") "多校划片就是一个小区对应多个小学初中,让买了学区房的家庭也不确定到底能上哪个学校。目的是通过这种方式为学区房降温,把就近入学落到实处。南京市长江大桥",
logging.info(tknzr.fine_grained_tokenize(tks)) "实际上当时他们已经将业务中心偏移到安全部门和针对政府企业的部门 Scripts are compiled and cached aaaaaaaaa",
tks = tknzr.tokenize( "虽然我不怎么玩",
"多校划片就是一个小区对应多个小学初中,让买了学区房的家庭也不确定到底能上哪个学校。目的是通过这种方式为学区房降温,把就近入学落到实处。南京市长江大桥") "蓝月亮如何在外资夹击中生存,那是全宇宙最有意思的",
logging.info(tknzr.fine_grained_tokenize(tks)) "涡轮增压发动机num最大功率,不像别的共享买车锁电子化的手段,我们接过来是否有意义,黄黄爱美食,不过,今天阿奇要讲到的这家农贸市场,说实话,还真蛮有特色的!不仅环境好,还打出了",
tks = tknzr.tokenize( "这周日你去吗?这周日你有空吗?",
"实际上当时他们已经将业务中心偏移到安全部门和针对政府企业的部门 Scripts are compiled and cached aaaaaaaaa") "Unity3D开发经验 测试开发工程师 c++双11双11 985 211 ",
logging.info(tknzr.fine_grained_tokenize(tks)) "数据分析项目经理|数据分析挖掘|数据分析方向|商品数据分析|搜索数据分析 sql python hive tableau Cocos2d-",
tks = tknzr.tokenize("虽然我不怎么玩") ]
logging.info(tknzr.fine_grained_tokenize(tks)) for text in texts:
tks = tknzr.tokenize("蓝月亮如何在外资夹击中生存,那是全宇宙最有意思的") print(text)
logging.info(tknzr.fine_grained_tokenize(tks)) tks1 = tknzr.tokenize(text)
tks = tknzr.tokenize( tks2 = tknzr.fine_grained_tokenize(tks1)
"涡轮增压发动机num最大功率,不像别的共享买车锁电子化的手段,我们接过来是否有意义,黄黄爱美食,不过,今天阿奇要讲到的这家农贸市场,说实话,还真蛮有特色的!不仅环境好,还打出了") print(tks1)
logging.info(tknzr.fine_grained_tokenize(tks)) print(tks2)
tks = tknzr.tokenize("这周日你去吗?这周日你有空吗?")
logging.info(tknzr.fine_grained_tokenize(tks))
tks = tknzr.tokenize("Unity3D开发经验 测试开发工程师 c++双11双11 985 211 ")
logging.info(tknzr.fine_grained_tokenize(tks))
tks = tknzr.tokenize(
"数据分析项目经理|数据分析挖掘|数据分析方向|商品数据分析|搜索数据分析 sql python hive tableau Cocos2d-")
logging.info(tknzr.fine_grained_tokenize(tks))
if len(sys.argv) < 2: if len(sys.argv) < 2:
sys.exit() sys.exit()
tknzr.DEBUG = False
tknzr.load_user_dict(sys.argv[1]) tknzr.load_user_dict(sys.argv[1])
of = open(sys.argv[2], "r") of = open(sys.argv[2], "r")
while True: while True:
line = of.readline() line = of.readline()
if not line: if not line:
break break
logging.info(tknzr.tokenize(line)) print(tknzr.tokenize(line))
of.close() of.close()

View File

@ -17,7 +17,6 @@ import json
import logging import logging
import re import re
import math import math
import os
from collections import OrderedDict from collections import OrderedDict
from dataclasses import dataclass from dataclasses import dataclass
@ -28,6 +27,7 @@ from rag.utils.doc_store_conn import DocStoreConnection, MatchDenseExpr, FusionE
from common.string_utils import remove_redundant_spaces from common.string_utils import remove_redundant_spaces
from common.float_utils import get_float from common.float_utils import get_float
from common.constants import PAGERANK_FLD, TAG_FLD from common.constants import PAGERANK_FLD, TAG_FLD
from common import settings
def index_name(uid): return f"ragflow_{uid}" def index_name(uid): return f"ragflow_{uid}"
@ -120,7 +120,8 @@ class Dealer:
else: else:
matchDense = self.get_vector(qst, emb_mdl, topk, req.get("similarity", 0.1)) matchDense = self.get_vector(qst, emb_mdl, topk, req.get("similarity", 0.1))
q_vec = matchDense.embedding_data q_vec = matchDense.embedding_data
src.append(f"q_{len(q_vec)}_vec") if not settings.DOC_ENGINE_INFINITY:
src.append(f"q_{len(q_vec)}_vec")
fusionExpr = FusionExpr("weighted_sum", topk, {"weights": "0.05,0.95"}) fusionExpr = FusionExpr("weighted_sum", topk, {"weights": "0.05,0.95"})
matchExprs = [matchText, matchDense, fusionExpr] matchExprs = [matchText, matchDense, fusionExpr]
@ -405,8 +406,13 @@ class Dealer:
rank_feature=rank_feature, rank_feature=rank_feature,
) )
else: else:
lower_case_doc_engine = os.getenv("DOC_ENGINE", "elasticsearch") if settings.DOC_ENGINE_INFINITY:
if lower_case_doc_engine in ["elasticsearch", "opensearch"]: # Don't need rerank here since Infinity normalizes each way score before fusion.
sim = [sres.field[id].get("_score", 0.0) for id in sres.ids]
sim = [s if s is not None else 0.0 for s in sim]
tsim = sim
vsim = sim
else:
# ElasticSearch doesn't normalize each way score before fusion. # ElasticSearch doesn't normalize each way score before fusion.
sim, tsim, vsim = self.rerank( sim, tsim, vsim = self.rerank(
sres, sres,
@ -415,12 +421,6 @@ class Dealer:
vector_similarity_weight, vector_similarity_weight,
rank_feature=rank_feature, rank_feature=rank_feature,
) )
else:
# Don't need rerank here since Infinity normalizes each way score before fusion.
sim = [sres.field[id].get("_score", 0.0) for id in sres.ids]
sim = [s if s is not None else 0.0 for s in sim]
tsim = sim
vsim = sim
sim_np = np.array(sim, dtype=np.float64) sim_np = np.array(sim, dtype=np.float64)
if sim_np.size == 0: if sim_np.size == 0:

View File

@ -37,7 +37,7 @@ from api.db.services.connector_service import ConnectorService, SyncLogsService
from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.knowledgebase_service import KnowledgebaseService
from common import settings from common import settings
from common.config_utils import show_configs from common.config_utils import show_configs
from common.data_source import BlobStorageConnector, NotionConnector, DiscordConnector, GoogleDriveConnector, MoodleConnector, JiraConnector, DropboxConnector from common.data_source import BlobStorageConnector, NotionConnector, DiscordConnector, GoogleDriveConnector, MoodleConnector, JiraConnector, DropboxConnector, WebDAVConnector
from common.constants import FileSource, TaskStatus from common.constants import FileSource, TaskStatus
from common.data_source.config import INDEX_BATCH_SIZE from common.data_source.config import INDEX_BATCH_SIZE
from common.data_source.confluence_connector import ConfluenceConnector from common.data_source.confluence_connector import ConfluenceConnector
@ -67,6 +67,8 @@ class SyncBase:
next_update = datetime(1970, 1, 1, tzinfo=timezone.utc) next_update = datetime(1970, 1, 1, tzinfo=timezone.utc)
if task["poll_range_start"]: if task["poll_range_start"]:
next_update = task["poll_range_start"] next_update = task["poll_range_start"]
failed_docs = 0
for document_batch in document_batch_generator: for document_batch in document_batch_generator:
if not document_batch: if not document_batch:
continue continue
@ -87,13 +89,30 @@ class SyncBase:
for doc in document_batch for doc in document_batch
] ]
e, kb = KnowledgebaseService.get_by_id(task["kb_id"]) try:
err, dids = SyncLogsService.duplicate_and_parse(kb, docs, task["tenant_id"], f"{self.SOURCE_NAME}/{task['connector_id']}", task["auto_parse"]) e, kb = KnowledgebaseService.get_by_id(task["kb_id"])
SyncLogsService.increase_docs(task["id"], min_update, max_update, len(docs), "\n".join(err), len(err)) err, dids = SyncLogsService.duplicate_and_parse(kb, docs, task["tenant_id"], f"{self.SOURCE_NAME}/{task['connector_id']}", task["auto_parse"])
doc_num += len(docs) SyncLogsService.increase_docs(task["id"], min_update, max_update, len(docs), "\n".join(err), len(err))
doc_num += len(docs)
except Exception as batch_ex:
error_msg = str(batch_ex)
error_code = getattr(batch_ex, 'args', (None,))[0] if hasattr(batch_ex, 'args') else None
if error_code == 1267 or "collation" in error_msg.lower():
logging.warning(f"Skipping {len(docs)} document(s) due to database collation conflict (error 1267)")
for doc in docs:
logging.debug(f"Skipped: {doc['semantic_identifier']}")
else:
logging.error(f"Error processing batch of {len(docs)} documents: {error_msg}")
failed_docs += len(docs)
continue
prefix = "[Jira] " if self.SOURCE_NAME == FileSource.JIRA else "" prefix = "[Jira] " if self.SOURCE_NAME == FileSource.JIRA else ""
logging.info(f"{prefix}{doc_num} docs synchronized till {next_update}") if failed_docs > 0:
logging.info(f"{prefix}{doc_num} docs synchronized till {next_update} ({failed_docs} skipped)")
else:
logging.info(f"{prefix}{doc_num} docs synchronized till {next_update}")
SyncLogsService.done(task["id"], task["connector_id"]) SyncLogsService.done(task["id"], task["connector_id"])
task["poll_range_start"] = next_update task["poll_range_start"] = next_update
@ -433,6 +452,36 @@ class Teams(SyncBase):
pass pass
class WebDAV(SyncBase):
SOURCE_NAME: str = FileSource.WEBDAV
async def _generate(self, task: dict):
self.connector = WebDAVConnector(
base_url=self.conf["base_url"],
remote_path=self.conf.get("remote_path", "/")
)
self.connector.load_credentials(self.conf["credentials"])
logging.info(f"Task info: reindex={task['reindex']}, poll_range_start={task['poll_range_start']}")
if task["reindex"]=="1" or not task["poll_range_start"]:
logging.info("Using load_from_state (full sync)")
document_batch_generator = self.connector.load_from_state()
begin_info = "totally"
else:
start_ts = task["poll_range_start"].timestamp()
end_ts = datetime.now(timezone.utc).timestamp()
logging.info(f"Polling WebDAV from {task['poll_range_start']} (ts: {start_ts}) to now (ts: {end_ts})")
document_batch_generator = self.connector.poll_source(start_ts, end_ts)
begin_info = "from {}".format(task["poll_range_start"])
logging.info("Connect to WebDAV: {}(path: {}) {}".format(
self.conf["base_url"],
self.conf.get("remote_path", "/"),
begin_info
))
return document_batch_generator
class Moodle(SyncBase): class Moodle(SyncBase):
SOURCE_NAME: str = FileSource.MOODLE SOURCE_NAME: str = FileSource.MOODLE
@ -477,6 +526,7 @@ func_factory = {
FileSource.TEAMS: Teams, FileSource.TEAMS: Teams,
FileSource.MOODLE: Moodle, FileSource.MOODLE: Moodle,
FileSource.DROPBOX: Dropbox, FileSource.DROPBOX: Dropbox,
FileSource.WEBDAV: WebDAV,
} }

View File

@ -44,11 +44,56 @@ logger = logging.getLogger("ragflow.infinity_conn")
def field_keyword(field_name: str): def field_keyword(field_name: str):
# The "docnm_kwd" field is always a string, not list. # Treat "*_kwd" tag-like columns as keyword lists except knowledge_graph_kwd; source_id is also keyword-like.
if field_name == "source_id" or (field_name.endswith("_kwd") and field_name != "docnm_kwd" and field_name != "knowledge_graph_kwd"): if field_name == "source_id" or (field_name.endswith("_kwd") and field_name not in ["knowledge_graph_kwd", "docnm_kwd", "important_kwd", "question_kwd"]):
return True return True
return False return False
def convert_select_fields(output_fields: list[str]) -> list[str]:
for i, field in enumerate(output_fields):
if field in ["docnm_kwd", "title_tks", "title_sm_tks"]:
output_fields[i] = "docnm"
elif field in ["important_kwd", "important_tks"]:
output_fields[i] = "important_keywords"
elif field in ["question_kwd", "question_tks"]:
output_fields[i] = "questions"
elif field in ["content_with_weight", "content_ltks", "content_sm_ltks"]:
output_fields[i] = "content"
elif field in ["authors_tks", "authors_sm_tks"]:
output_fields[i] = "authors"
return list(set(output_fields))
def convert_matching_field(field_weightstr: str) -> str:
tokens = field_weightstr.split("^")
field = tokens[0]
if field == "docnm_kwd" or field == "title_tks":
field = "docnm@ft_docnm_rag_coarse"
elif field == "title_sm_tks":
field = "docnm@ft_title_rag_fine"
elif field == "important_kwd":
field = "important_keywords@ft_important_keywords_rag_coarse"
elif field == "important_tks":
field = "important_keywords@ft_important_keywords_rag_fine"
elif field == "question_kwd":
field = "questions@ft_questions_rag_coarse"
elif field == "question_tks":
field = "questions@ft_questions_rag_fine"
elif field == "content_with_weight" or field == "content_ltks":
field = "content@ft_content_rag_coarse"
elif field == "content_sm_ltks":
field = "content@ft_content_rag_fine"
elif field == "authors_tks":
field = "authors@ft_authors_rag_coarse"
elif field == "authors_sm_tks":
field = "authors@ft_authors_rag_fine"
tokens[0] = field
return "^".join(tokens)
def list2str(lst: str|list, sep: str = " ") -> str:
if isinstance(lst, str):
return lst
return sep.join(lst)
def equivalent_condition_to_str(condition: dict, table_instance=None) -> str | None: def equivalent_condition_to_str(condition: dict, table_instance=None) -> str | None:
assert "_id" not in condition assert "_id" not in condition
@ -77,13 +122,13 @@ def equivalent_condition_to_str(condition: dict, table_instance=None) -> str | N
for item in v: for item in v:
if isinstance(item, str): if isinstance(item, str):
item = item.replace("'", "''") item = item.replace("'", "''")
inCond.append(f"filter_fulltext('{k}', '{item}')") inCond.append(f"filter_fulltext('{convert_matching_field(k)}', '{item}')")
if inCond: if inCond:
strInCond = " or ".join(inCond) strInCond = " or ".join(inCond)
strInCond = f"({strInCond})" strInCond = f"({strInCond})"
cond.append(strInCond) cond.append(strInCond)
else: else:
cond.append(f"filter_fulltext('{k}', '{v}')") cond.append(f"filter_fulltext('{convert_matching_field(k)}', '{v}')")
elif isinstance(v, list): elif isinstance(v, list):
inCond = list() inCond = list()
for item in v: for item in v:
@ -181,11 +226,15 @@ class InfinityConnection(DocStoreConnection):
logger.info(f"INFINITY added following column to table {table_name}: {field_name} {field_info}") logger.info(f"INFINITY added following column to table {table_name}: {field_name} {field_info}")
if field_info["type"] != "varchar" or "analyzer" not in field_info: if field_info["type"] != "varchar" or "analyzer" not in field_info:
continue continue
inf_table.create_index( analyzers = field_info["analyzer"]
f"text_idx_{field_name}", if isinstance(analyzers, str):
IndexInfo(field_name, IndexType.FullText, {"ANALYZER": field_info["analyzer"]}), analyzers = [analyzers]
ConflictType.Ignore, for analyzer in analyzers:
) inf_table.create_index(
f"ft_{re.sub(r'[^a-zA-Z0-9]', '_', field_name)}_{re.sub(r'[^a-zA-Z0-9]', '_', analyzer)}",
IndexInfo(field_name, IndexType.FullText, {"ANALYZER": analyzer}),
ConflictType.Ignore,
)
""" """
Database operations Database operations
@ -245,11 +294,15 @@ class InfinityConnection(DocStoreConnection):
for field_name, field_info in schema.items(): for field_name, field_info in schema.items():
if field_info["type"] != "varchar" or "analyzer" not in field_info: if field_info["type"] != "varchar" or "analyzer" not in field_info:
continue continue
inf_table.create_index( analyzers = field_info["analyzer"]
f"text_idx_{field_name}", if isinstance(analyzers, str):
IndexInfo(field_name, IndexType.FullText, {"ANALYZER": field_info["analyzer"]}), analyzers = [analyzers]
ConflictType.Ignore, for analyzer in analyzers:
) inf_table.create_index(
f"ft_{re.sub(r'[^a-zA-Z0-9]', '_', field_name)}_{re.sub(r'[^a-zA-Z0-9]', '_', analyzer)}",
IndexInfo(field_name, IndexType.FullText, {"ANALYZER": analyzer}),
ConflictType.Ignore,
)
self.connPool.release_conn(inf_conn) self.connPool.release_conn(inf_conn)
logger.info(f"INFINITY created table {table_name}, vector size {vectorSize}") logger.info(f"INFINITY created table {table_name}, vector size {vectorSize}")
@ -302,6 +355,7 @@ class InfinityConnection(DocStoreConnection):
df_list = list() df_list = list()
table_list = list() table_list = list()
output = selectFields.copy() output = selectFields.copy()
output = convert_select_fields(output)
for essential_field in ["id"] + aggFields: for essential_field in ["id"] + aggFields:
if essential_field not in output: if essential_field not in output:
output.append(essential_field) output.append(essential_field)
@ -352,6 +406,7 @@ class InfinityConnection(DocStoreConnection):
if isinstance(matchExpr, MatchTextExpr): if isinstance(matchExpr, MatchTextExpr):
if filter_cond and "filter" not in matchExpr.extra_options: if filter_cond and "filter" not in matchExpr.extra_options:
matchExpr.extra_options.update({"filter": filter_cond}) matchExpr.extra_options.update({"filter": filter_cond})
matchExpr.fields = [convert_matching_field(field) for field in matchExpr.fields]
fields = ",".join(matchExpr.fields) fields = ",".join(matchExpr.fields)
filter_fulltext = f"filter_fulltext('{fields}', '{matchExpr.matching_text}')" filter_fulltext = f"filter_fulltext('{fields}', '{matchExpr.matching_text}')"
if filter_cond: if filter_cond:
@ -470,7 +525,10 @@ class InfinityConnection(DocStoreConnection):
df_list.append(kb_res) df_list.append(kb_res)
self.connPool.release_conn(inf_conn) self.connPool.release_conn(inf_conn)
res = concat_dataframes(df_list, ["id"]) res = concat_dataframes(df_list, ["id"])
res_fields = self.get_fields(res, res.columns.tolist()) fields = set(res.columns.tolist())
for field in ["docnm_kwd", "title_tks", "title_sm_tks", "important_kwd", "important_tks", "question_kwd", "question_tks","content_with_weight", "content_ltks", "content_sm_ltks", "authors_tks", "authors_sm_tks"]:
fields.add(field)
res_fields = self.get_fields(res, list(fields))
return res_fields.get(chunkId, None) return res_fields.get(chunkId, None)
def insert(self, documents: list[dict], indexName: str, knowledgebaseId: str = None) -> list[str]: def insert(self, documents: list[dict], indexName: str, knowledgebaseId: str = None) -> list[str]:
@ -508,8 +566,39 @@ class InfinityConnection(DocStoreConnection):
for d in docs: for d in docs:
assert "_id" not in d assert "_id" not in d
assert "id" in d assert "id" in d
for k, v in d.items(): for k, v in list(d.items()):
if field_keyword(k): if k == "docnm_kwd":
d["docnm"] = v
elif k == "title_kwd":
if not d.get("docnm_kwd"):
d["docnm"] = list2str(v)
elif k == "title_sm_tks":
if not d.get("docnm_kwd"):
d["docnm"] = list2str(v)
elif k == "important_kwd":
d["important_keywords"] = list2str(v)
elif k == "important_tks":
if not d.get("important_kwd"):
d["important_keywords"] = v
elif k == "content_with_weight":
d["content"] = v
elif k == "content_ltks":
if not d.get("content_with_weight"):
d["content"] = v
elif k == "content_sm_ltks":
if not d.get("content_with_weight"):
d["content"] = v
elif k == "authors_tks":
d["authors"] = v
elif k == "authors_sm_tks":
if not d.get("authors_tks"):
d["authors"] = v
elif k == "question_kwd":
d["questions"] = list2str(v, "\n")
elif k == "question_tks":
if not d.get("question_kwd"):
d["questions"] = list2str(v)
elif field_keyword(k):
if isinstance(v, list): if isinstance(v, list):
d[k] = "###".join(v) d[k] = "###".join(v)
else: else:
@ -528,6 +617,9 @@ class InfinityConnection(DocStoreConnection):
d[k] = "_".join(f"{num:08x}" for num in v) d[k] = "_".join(f"{num:08x}" for num in v)
else: else:
d[k] = v d[k] = v
for k in ["docnm_kwd", "title_tks", "title_sm_tks", "important_kwd", "important_tks", "content_with_weight", "content_ltks", "content_sm_ltks", "authors_tks", "authors_sm_tks", "question_kwd", "question_tks"]:
if k in d:
del d[k]
for n, vs in embedding_clmns: for n, vs in embedding_clmns:
if n in d: if n in d:
@ -562,7 +654,38 @@ class InfinityConnection(DocStoreConnection):
filter = equivalent_condition_to_str(condition, table_instance) filter = equivalent_condition_to_str(condition, table_instance)
removeValue = {} removeValue = {}
for k, v in list(newValue.items()): for k, v in list(newValue.items()):
if field_keyword(k): if k == "docnm_kwd":
newValue["docnm"] = list2str(v)
elif k == "title_kwd":
if not newValue.get("docnm_kwd"):
newValue["docnm"] = list2str(v)
elif k == "title_sm_tks":
if not newValue.get("docnm_kwd"):
newValue["docnm"] = v
elif k == "important_kwd":
newValue["important_keywords"] = list2str(v)
elif k == "important_tks":
if not newValue.get("important_kwd"):
newValue["important_keywords"] = v
elif k == "content_with_weight":
newValue["content"] = v
elif k == "content_ltks":
if not newValue.get("content_with_weight"):
newValue["content"] = v
elif k == "content_sm_ltks":
if not newValue.get("content_with_weight"):
newValue["content"] = v
elif k == "authors_tks":
newValue["authors"] = v
elif k == "authors_sm_tks":
if not newValue.get("authors_tks"):
newValue["authors"] = v
elif k == "question_kwd":
newValue["questions"] = "\n".join(v)
elif k == "question_tks":
if not newValue.get("question_kwd"):
newValue["questions"] = list2str(v)
elif field_keyword(k):
if isinstance(v, list): if isinstance(v, list):
newValue[k] = "###".join(v) newValue[k] = "###".join(v)
else: else:
@ -593,6 +716,9 @@ class InfinityConnection(DocStoreConnection):
del newValue[k] del newValue[k]
else: else:
newValue[k] = v newValue[k] = v
for k in ["docnm_kwd", "title_tks", "title_sm_tks", "important_kwd", "important_tks", "content_with_weight", "content_ltks", "content_sm_ltks", "authors_tks", "authors_sm_tks", "question_kwd", "question_tks"]:
if k in newValue:
del newValue[k]
remove_opt = {} # "[k,new_value]": [id_to_update, ...] remove_opt = {} # "[k,new_value]": [id_to_update, ...]
if removeValue: if removeValue:
@ -656,22 +782,45 @@ class InfinityConnection(DocStoreConnection):
return {} return {}
fieldsAll = fields.copy() fieldsAll = fields.copy()
fieldsAll.append("id") fieldsAll.append("id")
fieldsAll = set(fieldsAll)
if "docnm" in res.columns:
for field in ["docnm_kwd", "title_tks", "title_sm_tks"]:
if field in fieldsAll:
res[field] = res["docnm"]
if "important_keywords" in res.columns:
if "important_kwd" in fieldsAll:
res["important_kwd"] = res["important_keywords"].apply(lambda v: v.split())
if "important_tks" in fieldsAll:
res["important_tks"] = res["important_keywords"]
if "questions" in res.columns:
if "question_kwd" in fieldsAll:
res["question_kwd"] = res["questions"].apply(lambda v: v.splitlines())
if "question_tks" in fieldsAll:
res["question_tks"] = res["questions"]
if "content" in res.columns:
for field in ["content_with_weight", "content_ltks", "content_sm_ltks"]:
if field in fieldsAll:
res[field] = res["content"]
if "authors" in res.columns:
for field in ["authors_tks", "authors_sm_tks"]:
if field in fieldsAll:
res[field] = res["authors"]
column_map = {col.lower(): col for col in res.columns} column_map = {col.lower(): col for col in res.columns}
matched_columns = {column_map[col.lower()]: col for col in set(fieldsAll) if col.lower() in column_map} matched_columns = {column_map[col.lower()]: col for col in fieldsAll if col.lower() in column_map}
none_columns = [col for col in set(fieldsAll) if col.lower() not in column_map] none_columns = [col for col in fieldsAll if col.lower() not in column_map]
res2 = res[matched_columns.keys()] res2 = res[matched_columns.keys()]
res2 = res2.rename(columns=matched_columns) res2 = res2.rename(columns=matched_columns)
res2.drop_duplicates(subset=["id"], inplace=True) res2.drop_duplicates(subset=["id"], inplace=True)
for column in res2.columns: for column in list(res2.columns):
k = column.lower() k = column.lower()
if field_keyword(k): if field_keyword(k):
res2[column] = res2[column].apply(lambda v: [kwd for kwd in v.split("###") if kwd]) res2[column] = res2[column].apply(lambda v: [kwd for kwd in v.split("###") if kwd])
elif re.search(r"_feas$", k): elif re.search(r"_feas$", k):
res2[column] = res2[column].apply(lambda v: json.loads(v) if v else {}) res2[column] = res2[column].apply(lambda v: json.loads(v) if v else {})
elif k == "position_int": elif k == "position_int":
def to_position_int(v): def to_position_int(v):
if v: if v:
arr = [int(hex_val, 16) for hex_val in v.split("_")] arr = [int(hex_val, 16) for hex_val in v.split("_")]
@ -685,6 +834,9 @@ class InfinityConnection(DocStoreConnection):
res2[column] = res2[column].apply(lambda v: [int(hex_val, 16) for hex_val in v.split("_")] if v else []) res2[column] = res2[column].apply(lambda v: [int(hex_val, 16) for hex_val in v.split("_")] if v else [])
else: else:
pass pass
for column in ["docnm", "important_keywords", "questions", "content", "authors"]:
if column in res2:
del res2[column]
for column in none_columns: for column in none_columns:
res2[column] = None res2[column] = None

View File

@ -1,11 +0,0 @@
# ragflow-sdk
# build and publish python SDK to pypi.org
```shell
uv build
uv pip install twine
export TWINE_USERNAME="__token__"
export TWINE_PASSWORD=$YOUR_PYPI_API_TOKEN
twine upload dist/*.whl
```

View File

@ -6,7 +6,7 @@ authors = [{ name = "Zhichang Yu", email = "yuzhichang@gmail.com" }]
license = { text = "Apache License, Version 2.0" } license = { text = "Apache License, Version 2.0" }
readme = "README.md" readme = "README.md"
requires-python = ">=3.10,<3.13" requires-python = ">=3.10,<3.13"
dependencies = ["requests>=2.30.0,<3.0.0", "beartype>=0.18.5,<0.19.0"] dependencies = ["requests>=2.30.0,<3.0.0", "beartype>=0.20.0,<1.0.0"]
[dependency-groups] [dependency-groups]

8
sdk/python/uv.lock generated
View File

@ -13,11 +13,11 @@ wheels = [
[[package]] [[package]]
name = "beartype" name = "beartype"
version = "0.18.5" version = "0.22.6"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/96/15/4e623478a9628ad4cee2391f19aba0b16c1dd6fedcb2a399f0928097b597/beartype-0.18.5.tar.gz", hash = "sha256:264ddc2f1da9ec94ff639141fbe33d22e12a9f75aa863b83b7046ffff1381927", size = 1193506, upload-time = "2024-04-21T07:25:58.64Z" } sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/88/e2/105ceb1704cb80fe4ab3872529ab7b6f365cf7c74f725e6132d0efcf1560/beartype-0.22.6.tar.gz", hash = "sha256:97fbda69c20b48c5780ac2ca60ce3c1bb9af29b3a1a0216898ffabdd523e48f4", size = 1588975, upload-time = "2025-11-20T04:47:14.736Z" }
wheels = [ wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/43/7a1259741bd989723272ac7d381a43be932422abcff09a1d9f7ba212cb74/beartype-0.18.5-py3-none-any.whl", hash = "sha256:5301a14f2a9a5540fe47ec6d34d758e9cd8331d36c4760fc7a5499ab86310089", size = 917762, upload-time = "2024-04-21T07:25:55.758Z" }, { url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/c9/ceecc71fe2c9495a1d8e08d44f5f31f5bca1350d5b2e27a4b6265424f59e/beartype-0.22.6-py3-none-any.whl", hash = "sha256:0584bc46a2ea2a871509679278cda992eadde676c01356ab0ac77421f3c9a093", size = 1324807, upload-time = "2025-11-20T04:47:11.837Z" },
] ]
[[package]] [[package]]
@ -375,7 +375,7 @@ test = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "beartype", specifier = ">=0.18.5,<0.19.0" }, { name = "beartype", specifier = ">=0.20.0,<1.0.0" },
{ name = "requests", specifier = ">=2.30.0,<3.0.0" }, { name = "requests", specifier = ">=2.30.0,<3.0.0" },
] ]

View File

@ -93,8 +93,9 @@ class TestChunksList:
({"keywords": None}, 5), ({"keywords": None}, 5),
({"keywords": ""}, 5), ({"keywords": ""}, 5),
({"keywords": "1"}, 1), ({"keywords": "1"}, 1),
pytest.param({"keywords": "chunk"}, 4, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6509")), ({"keywords": "chunk"}, 4),
({"keywords": "ragflow"}, 1), pytest.param({"keywords": "ragflow"}, 1, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6509")),
pytest.param({"keywords": "ragflow"}, 5, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") != "infinity", reason="issues/6509")),
({"keywords": "unknown"}, 0), ({"keywords": "unknown"}, 0),
], ],
) )

View File

@ -47,7 +47,7 @@ class TestUpdatedChunk:
@pytest.mark.parametrize( @pytest.mark.parametrize(
"payload, expected_code, expected_message", "payload, expected_code, expected_message",
[ [
({"content": None}, 100, "TypeError('expected string or bytes-like object')"), pytest.param({"content": None}, 0, "", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6509")),
pytest.param( pytest.param(
{"content": ""}, {"content": ""},
100, 100,

View File

@ -76,8 +76,9 @@ class TestChunksList:
({"keywords": None}, 5), ({"keywords": None}, 5),
({"keywords": ""}, 5), ({"keywords": ""}, 5),
({"keywords": "1"}, 1), ({"keywords": "1"}, 1),
pytest.param({"keywords": "chunk"}, 4, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6509")), ({"keywords": "chunk"}, 4),
({"keywords": "ragflow"}, 1), pytest.param({"keywords": "ragflow"}, 1, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6509")),
pytest.param({"keywords": "ragflow"}, 5, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") != "infinity", reason="issues/6509")),
({"keywords": "unknown"}, 0), ({"keywords": "unknown"}, 0),
], ],
) )

View File

@ -25,7 +25,7 @@ class TestUpdatedChunk:
@pytest.mark.parametrize( @pytest.mark.parametrize(
"payload, expected_message", "payload, expected_message",
[ [
({"content": None}, "TypeError('expected string or bytes-like object')"), ({"content": None}, ""),
pytest.param( pytest.param(
{"content": ""}, {"content": ""},
"""APIRequestFailedError(\'Error code: 400, with error text {"error":{"code":"1213","message":"未正常接收到prompt参数。"}}\')""", """APIRequestFailedError(\'Error code: 400, with error text {"error":{"code":"1213","message":"未正常接收到prompt参数。"}}\')""",

21
uv.lock generated
View File

@ -1,5 +1,5 @@
version = 1 version = 1
revision = 3 revision = 2
requires-python = ">=3.10, <3.13" requires-python = ">=3.10, <3.13"
resolution-markers = [ resolution-markers = [
"python_full_version >= '3.12' and sys_platform == 'darwin'", "python_full_version >= '3.12' and sys_platform == 'darwin'",
@ -1494,7 +1494,7 @@ name = "exceptiongroup"
version = "1.3.0" version = "1.3.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [ dependencies = [
{ name = "typing-extensions", marker = "python_full_version < '3.11'" }, { name = "typing-extensions" },
] ]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" } sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" }
wheels = [ wheels = [
@ -5371,6 +5371,7 @@ dependencies = [
{ name = "elastic-transport" }, { name = "elastic-transport" },
{ name = "elasticsearch" }, { name = "elasticsearch" },
{ name = "elasticsearch-dsl" }, { name = "elasticsearch-dsl" },
{ name = "exceptiongroup" },
{ name = "extract-msg" }, { name = "extract-msg" },
{ name = "filelock" }, { name = "filelock" },
{ name = "flasgger" }, { name = "flasgger" },
@ -5474,6 +5475,7 @@ dependencies = [
{ name = "vertexai" }, { name = "vertexai" },
{ name = "volcengine" }, { name = "volcengine" },
{ name = "voyageai" }, { name = "voyageai" },
{ name = "webdav4" },
{ name = "webdriver-manager" }, { name = "webdriver-manager" },
{ name = "werkzeug" }, { name = "werkzeug" },
{ name = "wikipedia" }, { name = "wikipedia" },
@ -5532,6 +5534,7 @@ requires-dist = [
{ name = "elastic-transport", specifier = "==8.12.0" }, { name = "elastic-transport", specifier = "==8.12.0" },
{ name = "elasticsearch", specifier = "==8.12.1" }, { name = "elasticsearch", specifier = "==8.12.1" },
{ name = "elasticsearch-dsl", specifier = "==8.12.0" }, { name = "elasticsearch-dsl", specifier = "==8.12.0" },
{ name = "exceptiongroup", specifier = ">=1.3.0,<2.0.0" },
{ name = "extract-msg", specifier = ">=0.39.0" }, { name = "extract-msg", specifier = ">=0.39.0" },
{ name = "filelock", specifier = "==3.15.4" }, { name = "filelock", specifier = "==3.15.4" },
{ name = "flasgger", specifier = ">=0.9.7.1,<0.10.0" }, { name = "flasgger", specifier = ">=0.9.7.1,<0.10.0" },
@ -5635,6 +5638,7 @@ requires-dist = [
{ name = "vertexai", specifier = "==1.70.0" }, { name = "vertexai", specifier = "==1.70.0" },
{ name = "volcengine", specifier = "==1.0.194" }, { name = "volcengine", specifier = "==1.0.194" },
{ name = "voyageai", specifier = "==0.2.3" }, { name = "voyageai", specifier = "==0.2.3" },
{ name = "webdav4", specifier = ">=0.10.0,<0.11.0" },
{ name = "webdriver-manager", specifier = "==4.0.1" }, { name = "webdriver-manager", specifier = "==4.0.1" },
{ name = "werkzeug", specifier = "==3.0.6" }, { name = "werkzeug", specifier = "==3.0.6" },
{ name = "wikipedia", specifier = "==1.4.0" }, { name = "wikipedia", specifier = "==1.4.0" },
@ -7163,6 +7167,19 @@ wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166, upload-time = "2024-01-06T02:10:55.763Z" }, { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166, upload-time = "2024-01-06T02:10:55.763Z" },
] ]
[[package]]
name = "webdav4"
version = "0.10.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "httpx" },
{ name = "python-dateutil" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/3d/d604f9d5195689e578f124f196a5d7e80f3106c8404f5c19b2181691de19/webdav4-0.10.0.tar.gz", hash = "sha256:387da6f0ee384e77149dddd9bcfd434afa155882f6c440a529a7cb458624407f", size = 229195, upload-time = "2024-07-13T19:42:42.593Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/02/1b77232297fa52f7bedcf70f3ebe3817e9295f302389fb57dd0e6c077329/webdav4-0.10.0-py3-none-any.whl", hash = "sha256:8f915d72483e572089a3af0a2ad20c7e12d04eee9b9134eb718dbfa37af221d8", size = 36350, upload-time = "2024-07-13T19:42:41.087Z" },
]
[[package]] [[package]]
name = "webdriver-manager" name = "webdriver-manager"
version = "4.0.1" version = "4.0.1"

View File

@ -0,0 +1,15 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
<svg xmlns="http://www.w3.org/2000/svg"
aria-label="NextCloud" role="img"
viewBox="0 0 512 512">
<rect
width="512" height="512"
rx="15%"
fill="#0082c9"/>
<g stroke="#ffffff" stroke-width="33" fill="none">
<circle r="40" cy="256" cx="120"/>

After

Width:  |  Height:  |  Size: 449 B

View File

@ -736,6 +736,9 @@ Example: Virtual Hosted Style`,
'Sync pages and databases from Notion for knowledge retrieval.', 'Sync pages and databases from Notion for knowledge retrieval.',
google_driveDescription: google_driveDescription:
'Connect your Google Drive via OAuth and sync specific folders or drives.', 'Connect your Google Drive via OAuth and sync specific folders or drives.',
webdavDescription: 'Connect to WebDAV servers to sync files.',
webdavRemotePathTip:
'Optional: Specify a folder path on the WebDAV server (e.g., /Documents). Leave empty to sync from root.',
google_driveTokenTip: google_driveTokenTip:
'Upload the OAuth token JSON generated from the OAuth helper or Google Cloud Console. You may also upload a client_secret JSON from an "installed" or "web" application. If this is your first sync, a browser window will open to complete the OAuth consent. If the JSON already contains a refresh token, it will be reused automatically.', 'Upload the OAuth token JSON generated from the OAuth helper or Google Cloud Console. You may also upload a client_secret JSON from an "installed" or "web" application. If this is your first sync, a browser window will open to complete the OAuth consent. If the JSON already contains a refresh token, it will be reused automatically.',
google_drivePrimaryAdminTip: google_drivePrimaryAdminTip:

View File

@ -12,6 +12,7 @@ export enum DataSourceKey {
MOODLE = 'moodle', MOODLE = 'moodle',
// GMAIL = 'gmail', // GMAIL = 'gmail',
JIRA = 'jira', JIRA = 'jira',
WEBDAV = 'webdav',
DROPBOX = 'dropbox', DROPBOX = 'dropbox',
// SHAREPOINT = 'sharepoint', // SHAREPOINT = 'sharepoint',
// SLACK = 'slack', // SLACK = 'slack',
@ -54,6 +55,11 @@ export const DataSourceInfo = {
description: t(`setting.${DataSourceKey.JIRA}Description`), description: t(`setting.${DataSourceKey.JIRA}Description`),
icon: <SvgIcon name={'data-source/jira'} width={38} />, icon: <SvgIcon name={'data-source/jira'} width={38} />,
}, },
[DataSourceKey.WEBDAV]: {
name: 'WebDAV',
description: t(`setting.${DataSourceKey.WEBDAV}Description`),
icon: <SvgIcon name={'data-source/webdav'} width={38} />,
},
[DataSourceKey.DROPBOX]: { [DataSourceKey.DROPBOX]: {
name: 'Dropbox', name: 'Dropbox',
description: t(`setting.${DataSourceKey.DROPBOX}Description`), description: t(`setting.${DataSourceKey.DROPBOX}Description`),
@ -429,6 +435,35 @@ export const DataSourceFormFields = {
tooltip: t('setting.jiraPasswordTip'), tooltip: t('setting.jiraPasswordTip'),
}, },
], ],
[DataSourceKey.WEBDAV]: [
{
label: 'WebDAV Server URL',
name: 'config.base_url',
type: FormFieldType.Text,
required: true,
placeholder: 'https://webdav.example.com',
},
{
label: 'Username',
name: 'config.credentials.username',
type: FormFieldType.Text,
required: true,
},
{
label: 'Password',
name: 'config.credentials.password',
type: FormFieldType.Password,
required: true,
},
{
label: 'Remote Path',
name: 'config.remote_path',
type: FormFieldType.Text,
required: false,
placeholder: '/',
tooltip: t('setting.webdavRemotePathTip'),
},
],
[DataSourceKey.DROPBOX]: [ [DataSourceKey.DROPBOX]: [
{ {
label: 'Access Token', label: 'Access Token',
@ -546,6 +581,18 @@ export const DataSourceFormDefaultValues = {
}, },
}, },
}, },
[DataSourceKey.WEBDAV]: {
name: '',
source: DataSourceKey.WEBDAV,
config: {
base_url: '',
remote_path: '/',
credentials: {
username: '',
password: '',
},
},
},
[DataSourceKey.DROPBOX]: { [DataSourceKey.DROPBOX]: {
name: '', name: '',
source: DataSourceKey.DROPBOX, source: DataSourceKey.DROPBOX,