opea-project · yao531441 · Dec 17, 2025 · Dec 17, 2025 · Dec 18, 2025 · Dec 18, 2025
@@ -18,7 +18,8 @@ HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 # LLM model ID from HuggingFace
 # Default model supports multilingual translation
 LLM_MODEL_ID=swiss-ai/Apertus-8B-Instruct-2509
-
+# Gaudi CARDS
+NUM_CARDS=1
 # Directory to cache downloaded models
 # Models can be large (several GB), ensure sufficient disk space
 MODEL_CACHE=./data

@@ -111,6 +111,7 @@ The service works with any HuggingFace text generation model. Recommended models
 
 - **swiss-ai/Apertus-8B-Instruct-2509** - Multilingual translation (default)
 - **haoranxu/ALMA-7B** - Specialized translation model
+- \*\*Qwen/Qwen2.5-7B-Instruct - Other Common model (Gaudi default)
 
 ## 🛠️ Development
 
@@ -206,7 +207,7 @@ Translate text between languages.
 docker compose logs -f
 
 # Specific service
-docker compose logs -f polylingua-xeon-backend-server
+docker compose logs -f polylingua-backend-server
 docker compose logs -f polylingua-ui-server
 ```
 

@@ -26,7 +26,7 @@ http {
 
     # Backend server
     upstream backend {
-        server polylingua-xeon-backend-server:8888;
+        server polylingua-backend-server:8888;
     }
 
     server {

@@ -45,9 +45,9 @@ services:
       HF_HUB_ENABLE_HF_TRANSFER: 0
     restart: unless-stopped
 
-  polylingua-xeon-backend-server:
+  polylingua-backend-server:
     image: ${REGISTRY:-opea}/polylingua:${TAG:-latest}
-    container_name: polylingua-xeon-backend-server
+    container_name: polylingua-backend-server
     depends_on:
       - vllm-service
       - llm
@@ -68,7 +68,7 @@ services:
     image: ${REGISTRY:-opea}/polylingua-ui:${TAG:-latest}
     container_name: polylingua-ui-server
     depends_on:
-      - polylingua-xeon-backend-server
+      - polylingua-backend-server
     ports:
       - "5173:5173"
     environment:
@@ -83,7 +83,7 @@ services:
     image: nginx:alpine
     container_name: polylingua-nginx-server
     depends_on:
-      - polylingua-xeon-backend-server
+      - polylingua-backend-server
       - polylingua-ui-server
     ports:
       - "${NGINX_PORT:-80}:80"

@@ -0,0 +1,106 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  vllm-service:
+    image: opea/vllm-gaudi:1.22.0
+    container_name: vllm-gaudi-server
+    ports:
+      - "8028:80"
+    volumes:
+      - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HF_TOKEN}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-true}
+      NUM_CARDS: ${NUM_CARDS:-1}
+      VLLM_TORCH_PROFILER_DIR: "/mnt"
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256
+  llm:
+    image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
+    container_name: llm-textgen-server
+    depends_on:
+      vllm-service:
+        condition: service_healthy
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      LLM_ENDPOINT: ${VLLM_ENDPOINT}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      HF_TOKEN: ${HF_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    restart: unless-stopped
+
+  polylingua-backend-server:
+    image: ${REGISTRY:-opea}/polylingua:${TAG:-latest}
+    container_name: polylingua-backend-server
+    depends_on:
+      - vllm-service
+      - llm
+    ports:
+      - "8888:8888"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
+      - MEGA_SERVICE_PORT=8888
+      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
+    ipc: host
+    restart: always
+
+  polylingua-ui-server:
+    image: ${REGISTRY:-opea}/polylingua-ui:${TAG:-latest}
+    container_name: polylingua-ui-server
+    depends_on:
+      - polylingua-backend-server
+    ports:
+      - "5173:5173"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT}
+    ipc: host
+    restart: always
+
+  polylingua-nginx-server:
+    image: nginx:alpine
+    container_name: polylingua-nginx-server
+    depends_on:
+      - polylingua-backend-server
+      - polylingua-ui-server
+    ports:
+      - "${NGINX_PORT:-80}:80"
+    volumes:
+      - ../../../../deploy/nginx.conf:/etc/nginx/nginx.conf:ro
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+    ipc: host
+    restart: always
+
+networks:
+  default:
+    driver: bridge
@@ -81,6 +81,9 @@ echo "export BACKEND_SERVICE_IP=\"${host_ip}\"" >> .env
 export BACKEND_SERVICE_PORT="8888"
 echo "export BACKEND_SERVICE_PORT=\"8888\"" >> .env
 
+export NUM_CARDS="1"
+echo "export NUM_CARDS=\"1\"" >> .env
+
 # Docker Configuration
 echo ""
 echo "--- Docker Configuration ---"