Don't release full image (#10654)

### What problem does this PR solve?

Introduced gpu profile in .env
Added Dockerfile_tei
fix datrie
Removed LIGHTEN flag

### Type of change

- [x] Documentation Update
- [x] Refactoring
This commit is contained in:
Zhichang Yu
2025-10-23 23:02:27 +08:00
committed by GitHub
parent 92739ea804
commit 73144e278b
67 changed files with 2792 additions and 3608 deletions

View File

@ -141,7 +141,7 @@ def set_tenant_info(auth):
tenant_info = {
"tenant_id": tenant_id,
"llm_id": "glm-4-flash@ZHIPU-AI",
"embd_id": "BAAI/bge-large-zh-v1.5@BAAI",
"embd_id": "BAAI/bge-small-en-v1.5@Builtin",
"img2txt_id": "",
"asr_id": "",
"tts_id": None,

View File

@ -224,11 +224,10 @@ class TestDatasetCreate:
@pytest.mark.parametrize(
"name, embedding_model",
[
("BAAI/bge-large-zh-v1.5@BAAI", "BAAI/bge-large-zh-v1.5@BAAI"),
("maidalun1020/bce-embedding-base_v1@Youdao", "maidalun1020/bce-embedding-base_v1@Youdao"),
("BAAI/bge-small-en-v1.5@Builtin", "BAAI/bge-small-en-v1.5@Builtin"),
("embedding-3@ZHIPU-AI", "embedding-3@ZHIPU-AI"),
],
ids=["builtin_baai", "builtin_youdao", "tenant_zhipu"],
ids=["builtin_baai", "tenant_zhipu"],
)
def test_embedding_model(self, HttpApiAuth, name, embedding_model):
payload = {"name": name, "embedding_model": embedding_model}
@ -262,11 +261,11 @@ class TestDatasetCreate:
[
("empty", ""),
("space", " "),
("missing_at", "BAAI/bge-large-zh-v1.5BAAI"),
("missing_model_name", "@BAAI"),
("missing_provider", "BAAI/bge-large-zh-v1.5@"),
("whitespace_only_model_name", " @BAAI"),
("whitespace_only_provider", "BAAI/bge-large-zh-v1.5@ "),
("missing_at", "BAAI/bge-small-en-v1.5Builtin"),
("missing_model_name", "@Builtin"),
("missing_provider", "BAAI/bge-small-en-v1.5@"),
("whitespace_only_model_name", " @Builtin"),
("whitespace_only_provider", "BAAI/bge-small-env1.5@ "),
],
ids=["empty", "space", "missing_at", "empty_model_name", "empty_provider", "whitespace_only_model_name", "whitespace_only_provider"],
)
@ -284,14 +283,14 @@ class TestDatasetCreate:
payload = {"name": "embedding_model_unset"}
res = create_dataset(HttpApiAuth, payload)
assert res["code"] == 0, res
assert res["data"]["embedding_model"] == "BAAI/bge-large-zh-v1.5@BAAI", res
assert res["data"]["embedding_model"] == "BAAI/bge-small-en-v1.5@Builtin", res
@pytest.mark.p2
def test_embedding_model_none(self, HttpApiAuth):
payload = {"name": "embedding_model_none", "embedding_model": None}
res = create_dataset(HttpApiAuth, payload)
assert res["code"] == 0, res
assert res["data"]["embedding_model"] == "BAAI/bge-large-zh-v1.5@BAAI", res
assert res["data"]["embedding_model"] == "BAAI/bge-small-en-v1.5@Builtin", res
@pytest.mark.p1
@pytest.mark.parametrize(

View File

@ -259,11 +259,10 @@ class TestDatasetUpdate:
@pytest.mark.parametrize(
"embedding_model",
[
"BAAI/bge-large-zh-v1.5@BAAI",
"maidalun1020/bce-embedding-base_v1@Youdao",
"BAAI/bge-small-en-v1.5@Builtin",
"embedding-3@ZHIPU-AI",
],
ids=["builtin_baai", "builtin_youdao", "tenant_zhipu"],
ids=["builtin_baai", "tenant_zhipu"],
)
def test_embedding_model(self, HttpApiAuth, add_dataset_func, embedding_model):
dataset_id = add_dataset_func
@ -302,11 +301,11 @@ class TestDatasetUpdate:
[
("empty", ""),
("space", " "),
("missing_at", "BAAI/bge-large-zh-v1.5BAAI"),
("missing_model_name", "@BAAI"),
("missing_provider", "BAAI/bge-large-zh-v1.5@"),
("whitespace_only_model_name", " @BAAI"),
("whitespace_only_provider", "BAAI/bge-large-zh-v1.5@ "),
("missing_at", "BAAI/bge-small-en-v1.5Builtin"),
("missing_model_name", "@Builtin"),
("missing_provider", "BAAI/bge-small-en-v1.5@"),
("whitespace_only_model_name", " @Builtin"),
("whitespace_only_provider", "BAAI/bge-small-en-v1.5@ "),
],
ids=["empty", "space", "missing_at", "empty_model_name", "empty_provider", "whitespace_only_model_name", "whitespace_only_provider"],
)
@ -329,7 +328,7 @@ class TestDatasetUpdate:
res = list_datasets(HttpApiAuth)
assert res["code"] == 0, res
assert res["data"][0]["embedding_model"] == "BAAI/bge-large-zh-v1.5@BAAI", res
assert res["data"][0]["embedding_model"] == "BAAI/bge-small-en-v1.5@Builtin", res
@pytest.mark.p1
@pytest.mark.parametrize(

View File

@ -182,11 +182,10 @@ class TestDatasetCreate:
@pytest.mark.parametrize(
"name, embedding_model",
[
("BAAI/bge-large-zh-v1.5@BAAI", "BAAI/bge-large-zh-v1.5@BAAI"),
("maidalun1020/bce-embedding-base_v1@Youdao", "maidalun1020/bce-embedding-base_v1@Youdao"),
("BAAI/bge-small-en-v1.5@Builtin", "BAAI/bge-small-en-v1.5@Builtin"),
("embedding-3@ZHIPU-AI", "embedding-3@ZHIPU-AI"),
],
ids=["builtin_baai", "builtin_youdao", "tenant_zhipu"],
ids=["builtin_baai", "tenant_zhipu"],
)
def test_embedding_model(self, client, name, embedding_model):
payload = {"name": name, "embedding_model": embedding_model}
@ -219,11 +218,11 @@ class TestDatasetCreate:
[
("empty", ""),
("space", " "),
("missing_at", "BAAI/bge-large-zh-v1.5BAAI"),
("missing_model_name", "@BAAI"),
("missing_provider", "BAAI/bge-large-zh-v1.5@"),
("whitespace_only_model_name", " @BAAI"),
("whitespace_only_provider", "BAAI/bge-large-zh-v1.5@ "),
("missing_at", "BAAI/bge-small-en-v1.5Builtin"),
("missing_model_name", "@Builtin"),
("missing_provider", "BAAI/bge-small-en-v1.5@"),
("whitespace_only_model_name", " @Builtin"),
("whitespace_only_provider", "BAAI/bge-small-en-v1.5@ "),
],
ids=["empty", "space", "missing_at", "empty_model_name", "empty_provider", "whitespace_only_model_name", "whitespace_only_provider"],
)
@ -240,13 +239,13 @@ class TestDatasetCreate:
def test_embedding_model_unset(self, client):
payload = {"name": "embedding_model_unset"}
dataset = client.create_dataset(**payload)
assert dataset.embedding_model == "BAAI/bge-large-zh-v1.5@BAAI", str(dataset)
assert dataset.embedding_model == "BAAI/bge-small-en-v1.5@Builtin", str(dataset)
@pytest.mark.p2
def test_embedding_model_none(self, client):
payload = {"name": "embedding_model_none", "embedding_model": None}
dataset = client.create_dataset(**payload)
assert dataset.embedding_model == "BAAI/bge-large-zh-v1.5@BAAI", str(dataset)
assert dataset.embedding_model == "BAAI/bge-small-en-v1.5@Builtin", str(dataset)
@pytest.mark.p1
@pytest.mark.parametrize(

View File

@ -168,11 +168,10 @@ class TestDatasetUpdate:
@pytest.mark.parametrize(
"embedding_model",
[
"BAAI/bge-large-zh-v1.5@BAAI",
"maidalun1020/bce-embedding-base_v1@Youdao",
"BAAI/bge-small-en-v1.5@Builtin",
"embedding-3@ZHIPU-AI",
],
ids=["builtin_baai", "builtin_youdao", "tenant_zhipu"],
ids=["builtin_baai", "tenant_zhipu"],
)
def test_embedding_model(self, client, add_dataset_func, embedding_model):
dataset = add_dataset_func
@ -209,11 +208,11 @@ class TestDatasetUpdate:
[
("empty", ""),
("space", " "),
("missing_at", "BAAI/bge-large-zh-v1.5BAAI"),
("missing_model_name", "@BAAI"),
("missing_provider", "BAAI/bge-large-zh-v1.5@"),
("whitespace_only_model_name", " @BAAI"),
("whitespace_only_provider", "BAAI/bge-large-zh-v1.5@ "),
("missing_at", "BAAI/bge-small-en-v1.5Builtin"),
("missing_model_name", "@Builtin"),
("missing_provider", "BAAI/bge-small-en-v1.5@"),
("whitespace_only_model_name", " @Builtin"),
("whitespace_only_provider", "BAAI/bge-small-en-v1.5@ "),
],
ids=["empty", "space", "missing_at", "empty_model_name", "empty_provider", "whitespace_only_model_name", "whitespace_only_provider"],
)
@ -231,10 +230,10 @@ class TestDatasetUpdate:
def test_embedding_model_none(self, client, add_dataset_func):
dataset = add_dataset_func
dataset.update({"embedding_model": None})
assert dataset.embedding_model == "BAAI/bge-large-zh-v1.5@BAAI", str(dataset)
assert dataset.embedding_model == "BAAI/bge-small-en-v1.5@Builtin", str(dataset)
retrieved_dataset = client.get_dataset(name=dataset.name)
assert retrieved_dataset.embedding_model == "BAAI/bge-large-zh-v1.5@BAAI", str(retrieved_dataset)
assert retrieved_dataset.embedding_model == "BAAI/bge-small-en-v1.5@Builtin", str(retrieved_dataset)
@pytest.mark.p1
@pytest.mark.parametrize(

View File

@ -57,7 +57,7 @@ class TestAddChunk:
"payload, expected_code, expected_message",
[
({"content_with_weight": None}, 100, """TypeError("unsupported operand type(s) for +: 'NoneType' and 'str'")"""),
({"content_with_weight": ""}, 0, ""),
({"content_with_weight": ""}, 100, """Exception('Error: 413 - {"error":"Input validation error: `inputs` cannot be empty","error_type":"Validation"}')"""),
pytest.param(
{"content_with_weight": 1},
100,
@ -124,7 +124,7 @@ class TestAddChunk:
"payload, expected_code, expected_message",
[
({"content_with_weight": "chunk test", "question_kwd": ["a", "b", "c"]}, 0, ""),
({"content_with_weight": "chunk test", "question_kwd": [""]}, 0, ""),
({"content_with_weight": "chunk test", "question_kwd": [""]}, 100, """Exception('Error: 413 - {"error":"Input validation error: `inputs` cannot be empty","error_type":"Validation"}')"""),
({"content_with_weight": "chunk test", "question_kwd": [1]}, 100, "TypeError('sequence item 0: expected str instance, int found')"),
({"content_with_weight": "chunk test", "question_kwd": ["a", "a"]}, 0, ""),
({"content_with_weight": "chunk test", "question_kwd": "abc"}, 102, "`question_kwd` is required to be a list"),

View File

@ -45,7 +45,7 @@ class TestUpdateChunk:
"payload, expected_code, expected_message",
[
({"content_with_weight": None}, 100, "TypeError('expected string or bytes-like object')"),
({"content_with_weight": ""}, 0, ""),
({"content_with_weight": ""}, 100, """Exception('Error: 413 - {"error":"Input validation error: `inputs` cannot be empty","error_type":"Validation"}')"""),
({"content_with_weight": 1}, 100, "TypeError('expected string or bytes-like object')"),
({"content_with_weight": "update chunk"}, 0, ""),
({"content_with_weight": " "}, 0, ""),
@ -103,7 +103,7 @@ class TestUpdateChunk:
"payload, expected_code, expected_message",
[
({"question_kwd": ["a", "b", "c"]}, 0, ""),
({"question_kwd": [""]}, 0, ""),
({"question_kwd": [""]}, 100, """Exception('Error: 413 - {"error":"Input validation error: `inputs` cannot be empty","error_type":"Validation"}')"""),
({"question_kwd": [1]}, 100, "TypeError('sequence item 0: expected str instance, int found')"),
({"question_kwd": ["a", "a"]}, 0, ""),
({"question_kwd": "abc"}, 102, "`question_kwd` should be a list"),

View File

@ -149,11 +149,10 @@ class TestDatasetUpdate:
@pytest.mark.parametrize(
"embedding_model",
[
"BAAI/bge-large-zh-v1.5@BAAI",
"maidalun1020/bce-embedding-base_v1@Youdao",
"BAAI/bge-small-en-v1.5@Builtin",
"embedding-3@ZHIPU-AI",
],
ids=["builtin_baai", "builtin_youdao", "tenant_zhipu"],
ids=["builtin_baai", "tenant_zhipu"],
)
def test_embedding_model(self, WebApiAuth, add_dataset_func, embedding_model):
kb_id = add_dataset_func