diff --git a/Dockerfile b/Dockerfile index 8f01c9014..4b2c922c3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,9 +21,7 @@ RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co if [ "$LIGHTEN" != "1" ]; then \ (tar -cf - \ /huggingface.co/BAAI/bge-large-zh-v1.5 \ - /huggingface.co/BAAI/bge-reranker-v2-m3 \ /huggingface.co/maidalun1020/bce-embedding-base_v1 \ - /huggingface.co/maidalun1020/bce-reranker-base_v1 \ | tar -xf - --strip-components=2 -C /root/.ragflow) \ fi diff --git a/api/apps/sdk/dataset.py b/api/apps/sdk/dataset.py index 4b8562ece..c20669cd9 100644 --- a/api/apps/sdk/dataset.py +++ b/api/apps/sdk/dataset.py @@ -147,16 +147,6 @@ def create(tenant_id): else: valid_embedding_models = [ "BAAI/bge-large-zh-v1.5", - "BAAI/bge-base-en-v1.5", - "BAAI/bge-large-en-v1.5", - "BAAI/bge-small-en-v1.5", - "BAAI/bge-small-zh-v1.5", - "jinaai/jina-embeddings-v2-base-en", - "jinaai/jina-embeddings-v2-small-en", - "nomic-ai/nomic-embed-text-v1.5", - "sentence-transformers/all-MiniLM-L6-v2", - "text-embedding-v2", - "text-embedding-v3", "maidalun1020/bce-embedding-base_v1", ] embd_model = LLMService.query( diff --git a/conf/llm_factories.json b/conf/llm_factories.json index 549e4aab3..985a4a333 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -415,56 +415,7 @@ "logo": "", "tags": "TEXT EMBEDDING", "status": "1", - "llm": [ - { - "llm_name": "BAAI/bge-small-en-v1.5", - "tags": "TEXT EMBEDDING,", - "max_tokens": 512, - "model_type": "embedding" - }, - { - "llm_name": "BAAI/bge-small-zh-v1.5", - "tags": "TEXT EMBEDDING,", - "max_tokens": 512, - "model_type": "embedding" - }, - { - "llm_name": "BAAI/bge-base-en-v1.5", - "tags": "TEXT EMBEDDING,", - "max_tokens": 512, - "model_type": "embedding" - }, - { - "llm_name": "BAAI/bge-large-en-v1.5", - "tags": "TEXT EMBEDDING,", - "max_tokens": 512, - "model_type": "embedding" - }, - { - "llm_name": "sentence-transformers/all-MiniLM-L6-v2", - "tags": "TEXT EMBEDDING,", - "max_tokens": 512, - "model_type": "embedding" - }, - { - "llm_name": "nomic-ai/nomic-embed-text-v1.5", - "tags": "TEXT EMBEDDING,", - "max_tokens": 8192, - "model_type": "embedding" - }, - { - "llm_name": "jinaai/jina-embeddings-v2-small-en", - "tags": "TEXT EMBEDDING,", - "max_tokens": 2147483648, - "model_type": "embedding" - }, - { - "llm_name": "jinaai/jina-embeddings-v2-base-en", - "tags": "TEXT EMBEDDING,", - "max_tokens": 2147483648, - "model_type": "embedding" - } - ] + "llm": [] }, { "name": "Xinference", @@ -484,12 +435,6 @@ "tags": "TEXT EMBEDDING,", "max_tokens": 512, "model_type": "embedding" - }, - { - "llm_name": "maidalun1020/bce-reranker-base_v1", - "tags": "RE-RANK, 512", - "max_tokens": 512, - "model_type": "rerank" } ] }, @@ -679,12 +624,6 @@ "tags": "TEXT EMBEDDING,", "max_tokens": 1024, "model_type": "embedding" - }, - { - "llm_name": "BAAI/bge-reranker-v2-m3", - "tags": "RE-RANK,2k", - "max_tokens": 2048, - "model_type": "rerank" } ] }, diff --git a/docker/README.md b/docker/README.md index 036867136..19f51f9d4 100644 --- a/docker/README.md +++ b/docker/README.md @@ -82,18 +82,8 @@ The [.env](./.env) file contains important environment variables for Docker. - `infiniflow/ragflow:v0.17.2`: The RAGFlow Docker image with embedding models including: - Built-in embedding models: - `BAAI/bge-large-zh-v1.5` - - `BAAI/bge-reranker-v2-m3` - `maidalun1020/bce-embedding-base_v1` - - `maidalun1020/bce-reranker-base_v1` - - Embedding models that will be downloaded once you select them in the RAGFlow UI: - - `BAAI/bge-base-en-v1.5` - - `BAAI/bge-large-en-v1.5` - - `BAAI/bge-small-en-v1.5` - - `BAAI/bge-small-zh-v1.5` - - `jinaai/jina-embeddings-v2-base-en` - - `jinaai/jina-embeddings-v2-small-en` - - `nomic-ai/nomic-embed-text-v1.5` - - `sentence-transformers/all-MiniLM-L6-v2` + > [!TIP] > If you cannot download the RAGFlow Docker image, try the following mirrors. diff --git a/docs/configurations.md b/docs/configurations.md index 7889b01aa..b294c7102 100644 --- a/docs/configurations.md +++ b/docs/configurations.md @@ -101,19 +101,9 @@ The [.env](https://github.com/infiniflow/ragflow/blob/main/docker/.env) file con - `infiniflow/ragflow:v0.17.2`: The RAGFlow Docker image with embedding models including: - Built-in embedding models: - `BAAI/bge-large-zh-v1.5` - - `BAAI/bge-reranker-v2-m3` - `maidalun1020/bce-embedding-base_v1` - - `maidalun1020/bce-reranker-base_v1` - - Embedding models that will be downloaded once you select them in the RAGFlow UI: - - `BAAI/bge-base-en-v1.5` - - `BAAI/bge-large-en-v1.5` - - `BAAI/bge-small-en-v1.5` - - `BAAI/bge-small-zh-v1.5` - - `jinaai/jina-embeddings-v2-base-en` - - `jinaai/jina-embeddings-v2-small-en` - - `nomic-ai/nomic-embed-text-v1.5` - - `sentence-transformers/all-MiniLM-L6-v2` - + + :::tip NOTE If you cannot download the RAGFlow Docker image, try the following mirrors. diff --git a/download_deps.py b/download_deps.py index 7526af48a..baf65303a 100644 --- a/download_deps.py +++ b/download_deps.py @@ -29,9 +29,7 @@ repos = [ "InfiniFlow/deepdoc", "InfiniFlow/huqie", "BAAI/bge-large-zh-v1.5", - "BAAI/bge-reranker-v2-m3", "maidalun1020/bce-embedding-base_v1", - "maidalun1020/bce-reranker-base_v1", ] def download_model(repo_id): diff --git a/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_retrieval_chunks.py b/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_retrieval_chunks.py index 833deff50..5b2103640 100644 --- a/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_retrieval_chunks.py +++ b/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_retrieval_chunks.py @@ -228,6 +228,7 @@ class TestChunksRetrieval: else: assert expected_message in res["message"] + @pytest.mark.skip @pytest.mark.parametrize( "payload, expected_code, expected_message", [ diff --git a/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py b/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py index 38e2d21a1..d20c180cd 100644 --- a/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py +++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py @@ -145,28 +145,6 @@ class TestAdvancedConfigurations: "name, embedding_model, expected_code", [ ("BAAI/bge-large-zh-v1.5", "BAAI/bge-large-zh-v1.5", 0), - ("BAAI/bge-base-en-v1.5", "BAAI/bge-base-en-v1.5", 0), - ("BAAI/bge-large-en-v1.5", "BAAI/bge-large-en-v1.5", 0), - ("BAAI/bge-small-en-v1.5", "BAAI/bge-small-en-v1.5", 0), - ("BAAI/bge-small-zh-v1.5", "BAAI/bge-small-zh-v1.5", 0), - ( - "jinaai/jina-embeddings-v2-base-en", - "jinaai/jina-embeddings-v2-base-en", - 0, - ), - ( - "jinaai/jina-embeddings-v2-small-en", - "jinaai/jina-embeddings-v2-small-en", - 0, - ), - ("nomic-ai/nomic-embed-text-v1.5", "nomic-ai/nomic-embed-text-v1.5", 0), - ( - "sentence-transformers/all-MiniLM-L6-v2", - "sentence-transformers/all-MiniLM-L6-v2", - 0, - ), - ("text-embedding-v2", "text-embedding-v2", 0), - ("text-embedding-v3", "text-embedding-v3", 0), ( "maidalun1020/bce-embedding-base_v1", "maidalun1020/bce-embedding-base_v1", diff --git a/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py b/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py index dfa3fa388..f160a3b74 100644 --- a/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py +++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py @@ -86,16 +86,6 @@ class TestDatasetUpdate: "embedding_model, expected_code, expected_message", [ ("BAAI/bge-large-zh-v1.5", 0, ""), - ("BAAI/bge-base-en-v1.5", 0, ""), - ("BAAI/bge-large-en-v1.5", 0, ""), - ("BAAI/bge-small-en-v1.5", 0, ""), - ("BAAI/bge-small-zh-v1.5", 0, ""), - ("jinaai/jina-embeddings-v2-base-en", 0, ""), - ("jinaai/jina-embeddings-v2-small-en", 0, ""), - ("nomic-ai/nomic-embed-text-v1.5", 0, ""), - ("sentence-transformers/all-MiniLM-L6-v2", 0, ""), - ("text-embedding-v2", 0, ""), - ("text-embedding-v3", 0, ""), ("maidalun1020/bce-embedding-base_v1", 0, ""), ( "other_embedding_model",