Import rag_tokenizer from Infinity (#11647)

### What problem does this PR solve?

- Original rag/nlp/rag_tokenizer.py is put to Infinity and infinity-sdk
via https://github.com/infiniflow/infinity/pull/3117 .
Import rag_tokenizer from infinity and inherit from
rag_tokenizer.RagTokenizer in new rag/nlp/rag_tokenizer.py.

- Bump infinity to 0.6.8

### Type of change
- [x] Refactoring
This commit is contained in:
qinling0210
2025-12-02 14:59:37 +08:00
committed by GitHub
parent e3987e21b9
commit 2ffe6f7439
7 changed files with 3712 additions and 4082 deletions

View File

@ -49,7 +49,7 @@ dependencies = [
"html-text==0.6.2",
"httpx[socks]>=0.28.1,<0.29.0",
"huggingface-hub>=0.25.0,<0.26.0",
"infinity-sdk==0.6.7",
"infinity-sdk==0.6.8",
"infinity-emb>=0.0.66,<0.0.67",
"itsdangerous==2.1.2",
"json-repair==0.35.0",
@ -170,9 +170,6 @@ test = [
"requests-toolbelt>=1.0.0",
]
[[tool.uv.index]]
url = "https://mirrors.aliyun.com/pypi/simple"
[[tool.uv.index]]
url = "https://pypi.tuna.tsinghua.edu.cn/simple"