Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion PolyLingua/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# LLM model ID from HuggingFace
# Default model supports multilingual translation
LLM_MODEL_ID=swiss-ai/Apertus-8B-Instruct-2509

# Gaudi CARDS
NUM_CARDS=1
# Directory to cache downloaded models
# Models can be large (several GB), ensure sufficient disk space
MODEL_CACHE=./data
Expand Down
3 changes: 2 additions & 1 deletion PolyLingua/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ The service works with any HuggingFace text generation model. Recommended models

- **swiss-ai/Apertus-8B-Instruct-2509** - Multilingual translation (default)
- **haoranxu/ALMA-7B** - Specialized translation model
- \*\*Qwen/Qwen2.5-7B-Instruct - Other Common model (Gaudi default)

## 🛠️ Development

Expand Down Expand Up @@ -206,7 +207,7 @@ Translate text between languages.
docker compose logs -f

# Specific service
docker compose logs -f polylingua-xeon-backend-server
docker compose logs -f polylingua-backend-server
docker compose logs -f polylingua-ui-server
```

Expand Down
2 changes: 1 addition & 1 deletion PolyLingua/deploy/nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ http {

# Backend server
upstream backend {
server polylingua-xeon-backend-server:8888;
server polylingua-backend-server:8888;
}

server {
Expand Down
8 changes: 4 additions & 4 deletions PolyLingua/docker_compose/intel/cpu/xeon/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ services:
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped

polylingua-xeon-backend-server:
polylingua-backend-server:
image: ${REGISTRY:-opea}/polylingua:${TAG:-latest}
container_name: polylingua-xeon-backend-server
container_name: polylingua-backend-server
depends_on:
- vllm-service
- llm
Expand All @@ -68,7 +68,7 @@ services:
image: ${REGISTRY:-opea}/polylingua-ui:${TAG:-latest}
container_name: polylingua-ui-server
depends_on:
- polylingua-xeon-backend-server
- polylingua-backend-server
ports:
- "5173:5173"
environment:
Expand All @@ -83,7 +83,7 @@ services:
image: nginx:alpine
container_name: polylingua-nginx-server
depends_on:
- polylingua-xeon-backend-server
- polylingua-backend-server
- polylingua-ui-server
ports:
- "${NGINX_PORT:-80}:80"
Expand Down
106 changes: 106 additions & 0 deletions PolyLingua/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

services:
vllm-service:
image: opea/vllm-gaudi:1.22.0
container_name: vllm-gaudi-server
ports:
- "8028:80"
volumes:
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HF_TOKEN}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-true}
NUM_CARDS: ${NUM_CARDS:-1}
VLLM_TORCH_PROFILER_DIR: "/mnt"
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256
llm:
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
container_name: llm-textgen-server
depends_on:
vllm-service:
condition: service_healthy
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LLM_ENDPOINT: ${VLLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
HF_TOKEN: ${HF_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped

polylingua-backend-server:
image: ${REGISTRY:-opea}/polylingua:${TAG:-latest}
container_name: polylingua-backend-server
depends_on:
- vllm-service
- llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- MEGA_SERVICE_PORT=8888
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
ipc: host
restart: always

polylingua-ui-server:
image: ${REGISTRY:-opea}/polylingua-ui:${TAG:-latest}
container_name: polylingua-ui-server
depends_on:
- polylingua-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always

polylingua-nginx-server:
image: nginx:alpine
container_name: polylingua-nginx-server
depends_on:
- polylingua-backend-server
- polylingua-ui-server
ports:
- "${NGINX_PORT:-80}:80"
volumes:
- ../../../../deploy/nginx.conf:/etc/nginx/nginx.conf:ro
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
ipc: host
restart: always

networks:
default:
driver: bridge
3 changes: 3 additions & 0 deletions PolyLingua/set_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ echo "export BACKEND_SERVICE_IP=\"${host_ip}\"" >> .env
export BACKEND_SERVICE_PORT="8888"
echo "export BACKEND_SERVICE_PORT=\"8888\"" >> .env

export NUM_CARDS="1"
echo "export NUM_CARDS=\"1\"" >> .env

# Docker Configuration
echo ""
echo "--- Docker Configuration ---"
Expand Down
Loading