Don't release full image (#10654)

### What problem does this PR solve? Introduced gpu profile in .env Added Dockerfile_tei fix datrie Removed LIGHTEN flag ### Type of change - [x] Documentation Update - [x] Refactoring
2026-01-30 23:26:36 +08:00 · 2025-10-23 23:02:27 +08:00
parent 92739ea804
commit 73144e278b
67 changed files with 2792 additions and 3608 deletions
--- a/api/utils/api_utils.py
+++ b/api/utils/api_utils.py
@ -588,7 +588,7 @@ def verify_embedding_availability(embd_id: str, tenant_id: str) -> tuple[bool, R
            llm["llm_name"] == llm_name and llm["llm_factory"] == llm_factory and llm["model_type"] == "embedding" for
            llm in tenant_llms)

-        is_builtin_model = embd_id in settings.BUILTIN_EMBEDDING_MODELS
+        is_builtin_model = llm_factory=='Builtin'
        if not (is_builtin_model or is_tenant_model or in_llm_service):
            return False, get_error_argument_result(f"Unsupported model: <{embd_id}>")

--- a/api/utils/common.py
+++ b/api/utils/common.py
@ -14,6 +14,12 @@
 #  limitations under the License.
 #

+import threading
+import subprocess
+import sys
+import os
+import logging
+
 def string_to_bytes(string):
    return string if isinstance(
        string, bytes) else string.encode(encoding="utf-8")
@ -44,3 +50,48 @@ def convert_bytes(size_in_bytes: int) -> str:
        return f"{size:.1f} {units[i]}"
    else:
        return f"{size:.2f} {units[i]}"
+
+
+def once(func):
+    """
+    A thread-safe decorator that ensures the decorated function runs exactly once,
+    caching and returning its result for all subsequent calls. This prevents
+    race conditions in multi-threaded environments by using a lock to protect
+    the execution state.
+
+    Args:
+        func (callable): The function to be executed only once.
+
+    Returns:
+        callable: A wrapper function that executes `func` on the first call
+                  and returns the cached result thereafter.
+
+    Example:
+        @once
+        def compute_expensive_value():
+            print("Computing...")
+            return 42
+
+        # First call: executes and prints
+        # Subsequent calls: return 42 without executing
+    """
+    executed = False
+    result = None
+    lock = threading.Lock()
+    def wrapper(*args, **kwargs):
+        nonlocal executed, result
+        with lock:
+            if not executed:
+                executed = True
+                result = func(*args, **kwargs)
+        return result
+    return wrapper
+
+@once
+def pip_install_torch():
+    device = os.getenv("DEVICE", "cpu")
+    if device=="cpu":
+        return
+    logging.info("Installing pytorch")
+    pkg_names = ["torch>=2.5.0,<3.0.0"]
+    subprocess.check_call([sys.executable, "-m", "pip", "install", *pkg_names])