diff --git a/PolyLingua/.env.example b/PolyLingua/.env.example index a48a3de7cd..0c72cc3459 100644 --- a/PolyLingua/.env.example +++ b/PolyLingua/.env.example @@ -18,7 +18,8 @@ HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # LLM model ID from HuggingFace # Default model supports multilingual translation LLM_MODEL_ID=swiss-ai/Apertus-8B-Instruct-2509 - +# Gaudi CARDS +NUM_CARDS=1 # Directory to cache downloaded models # Models can be large (several GB), ensure sufficient disk space MODEL_CACHE=./data diff --git a/PolyLingua/README.md b/PolyLingua/README.md index 26bff952f9..22f48a4ee6 100644 --- a/PolyLingua/README.md +++ b/PolyLingua/README.md @@ -111,6 +111,7 @@ The service works with any HuggingFace text generation model. Recommended models - **swiss-ai/Apertus-8B-Instruct-2509** - Multilingual translation (default) - **haoranxu/ALMA-7B** - Specialized translation model +- \*\*Qwen/Qwen2.5-7B-Instruct - Other Common model (Gaudi default) ## 🛠️ Development @@ -206,7 +207,7 @@ Translate text between languages. docker compose logs -f # Specific service -docker compose logs -f polylingua-xeon-backend-server +docker compose logs -f polylingua-backend-server docker compose logs -f polylingua-ui-server ``` diff --git a/PolyLingua/deploy/nginx.conf b/PolyLingua/deploy/nginx.conf index 7476021049..cfc152eab4 100644 --- a/PolyLingua/deploy/nginx.conf +++ b/PolyLingua/deploy/nginx.conf @@ -26,7 +26,7 @@ http { # Backend server upstream backend { - server polylingua-xeon-backend-server:8888; + server polylingua-backend-server:8888; } server { diff --git a/PolyLingua/docker_compose/intel/cpu/xeon/compose.yaml b/PolyLingua/docker_compose/intel/cpu/xeon/compose.yaml index 5741acadc6..3aa14418b2 100644 --- a/PolyLingua/docker_compose/intel/cpu/xeon/compose.yaml +++ b/PolyLingua/docker_compose/intel/cpu/xeon/compose.yaml @@ -45,9 +45,9 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 restart: unless-stopped - polylingua-xeon-backend-server: + polylingua-backend-server: image: ${REGISTRY:-opea}/polylingua:${TAG:-latest} - container_name: polylingua-xeon-backend-server + container_name: polylingua-backend-server depends_on: - vllm-service - llm @@ -68,7 +68,7 @@ services: image: ${REGISTRY:-opea}/polylingua-ui:${TAG:-latest} container_name: polylingua-ui-server depends_on: - - polylingua-xeon-backend-server + - polylingua-backend-server ports: - "5173:5173" environment: @@ -83,7 +83,7 @@ services: image: nginx:alpine container_name: polylingua-nginx-server depends_on: - - polylingua-xeon-backend-server + - polylingua-backend-server - polylingua-ui-server ports: - "${NGINX_PORT:-80}:80" diff --git a/PolyLingua/docker_compose/intel/hpu/gaudi/compose.yaml b/PolyLingua/docker_compose/intel/hpu/gaudi/compose.yaml new file mode 100644 index 0000000000..6d3ff9bd42 --- /dev/null +++ b/PolyLingua/docker_compose/intel/hpu/gaudi/compose.yaml @@ -0,0 +1,106 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + vllm-service: + image: opea/vllm-gaudi:1.22.0 + container_name: vllm-gaudi-server + ports: + - "8028:80" + volumes: + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-true} + NUM_CARDS: ${NUM_CARDS:-1} + VLLM_TORCH_PROFILER_DIR: "/mnt" + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + runtime: habana + cap_add: + - SYS_NICE + ipc: host + command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 + llm: + image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} + container_name: llm-textgen-server + depends_on: + vllm-service: + condition: service_healthy + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${VLLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + HF_TOKEN: ${HF_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + restart: unless-stopped + + polylingua-backend-server: + image: ${REGISTRY:-opea}/polylingua:${TAG:-latest} + container_name: polylingua-backend-server + depends_on: + - vllm-service + - llm + ports: + - "8888:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - MEGA_SERVICE_PORT=8888 + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - LLM_SERVICE_PORT=${LLM_SERVICE_PORT} + ipc: host + restart: always + + polylingua-ui-server: + image: ${REGISTRY:-opea}/polylingua-ui:${TAG:-latest} + container_name: polylingua-ui-server + depends_on: + - polylingua-backend-server + ports: + - "5173:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + + polylingua-nginx-server: + image: nginx:alpine + container_name: polylingua-nginx-server + depends_on: + - polylingua-backend-server + - polylingua-ui-server + ports: + - "${NGINX_PORT:-80}:80" + volumes: + - ../../../../deploy/nginx.conf:/etc/nginx/nginx.conf:ro + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/PolyLingua/set_env.sh b/PolyLingua/set_env.sh index 3a56740ed5..56b6cef019 100755 --- a/PolyLingua/set_env.sh +++ b/PolyLingua/set_env.sh @@ -81,6 +81,9 @@ echo "export BACKEND_SERVICE_IP=\"${host_ip}\"" >> .env export BACKEND_SERVICE_PORT="8888" echo "export BACKEND_SERVICE_PORT=\"8888\"" >> .env +export NUM_CARDS="1" +echo "export NUM_CARDS=\"1\"" >> .env + # Docker Configuration echo "" echo "--- Docker Configuration ---" diff --git a/PolyLingua/tests/test_compose_on_gaudi.sh b/PolyLingua/tests/test_compose_on_gaudi.sh new file mode 100755 index 0000000000..4b4158b420 --- /dev/null +++ b/PolyLingua/tests/test_compose_on_gaudi.sh @@ -0,0 +1,390 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "IMAGE_REPO=${IMAGE_REPO}" +echo "IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${MODEL_CACHE:-"./data"} + +# Get the directory where this script is located +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# PolyLingua root is one level up from tests directory +WORKPATH=$(dirname "$SCRIPT_DIR") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +echo "Script directory: $SCRIPT_DIR" +echo "Working directory: $WORKPATH" + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + cd $WORKPATH/docker_image_build + + # Clone GenAIComps + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . + popd && sleep 1s + + # Build all images using build.yaml + echo "Building PolyLingua images with --no-cache, check docker_image_build.log for details..." + service_list="polylingua polylingua-ui llm-textgen" + + if ! docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log 2>&1; then + echo "::error::Docker Compose build failed. Printing build logs..." + cat "${LOG_PATH}/docker_image_build.log" + exit 1 + fi + + echo "Image build completed" + echo "Verifying built images..." + if ! docker images | grep -q "polylingua" || ! docker images | grep -q "polylingua-ui" || ! docker images | grep -q "llm-textgen"; then + echo "::error::One or more required images are missing after build!" + docker images + exit 1 + fi + docker images | grep -E "polylingua|llm-textgen" + sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel/hpu/gaudi/ + export host_ip=${ip_address} + export no_proxy="localhost,127.0.0.1,$ip_address" + + # Load environment variables + if [ ! -f .env ]; then + echo "Creating .env file..." + export HF_TOKEN=${HF_TOKEN} + export LLM_MODEL_ID="Qwen/Qwen2.5-7B-Instruct" + export VLLM_ENDPOINT="http://${host_ip}:8028" + export LLM_SERVICE_HOST_IP=${host_ip} + export LLM_SERVICE_PORT=9000 + export MEGA_SERVICE_HOST_IP=${host_ip} + export MEGA_SERVICE_PORT=8888 + export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888" + export BACKEND_SERVICE_NAME="polylingua" + export BACKEND_SERVICE_IP=${host_ip} + export BACKEND_SERVICE_PORT=8888 + export FRONTEND_SERVICE_IP=${host_ip} + export FRONTEND_SERVICE_PORT=5173 + export NGINX_PORT=80 + + cat > .env < ${LOG_PATH}/start_services_with_compose.log 2>&1; then + echo "::error::Docker Compose failed to start. Printing logs..." + cat "${LOG_PATH}/start_services_with_compose.log" + echo "::group::Docker Compose PS" + docker compose -f compose.yaml ps + echo "::endgroup::" + echo "::group::Docker Logs" + docker compose -f compose.yaml logs + echo "::endgroup::" + exit 1 + fi + + # Wait for vLLM service to be ready + echo "Waiting for vLLM service to become healthy (this may take up to 30 minutes)..." + local vllm_health_url="http://${ip_address}:8028/health" + + n=0 + until [[ "$n" -ge 180 ]]; do + http_status=$(curl -s -o /dev/null -w "%{http_code}" "$vllm_health_url") + + if [[ "$http_status" -eq 200 ]]; then + echo "vLLM service is healthy (returned status 200)!" + break + fi + + if ! docker ps --filter "name=vllm-gaudi-server" --filter "status=running" -q | grep -q .; then + echo "::error::vLLM container has stopped unexpectedly!" + docker logs vllm-gaudi-server + exit 1 + fi + + echo "Waiting for vLLM health endpoint... status: $http_status ($n/180)" + sleep 10s + n=$((n+1)) + done + + if [[ "$n" -ge 180 ]]; then + echo "::error::Timeout waiting for vLLM service after 30 minutes." + echo "Final health check status: $http_status" + echo "Dumping container logs:" + docker logs vllm-gaudi-server + exit 1 + fi + + echo "Waiting additional 10s for all services to stabilize..." + sleep 10s +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + local CONTENT_TYPE="${6:-application/json}" + + echo "Testing $SERVICE_NAME at $URL" + + if [[ "$CONTENT_TYPE" == "multipart/form-data" ]]; then + # Handle file upload + local HTTP_STATUS=$(eval curl -s -o /dev/null -w "%{http_code}" -X POST $INPUT_DATA "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + local CONTENT=$(eval curl -s -X POST $INPUT_DATA "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] ✓ Content is as expected." + else + echo "[ $SERVICE_NAME ] ✗ Content does not match expected result" + echo "Expected: $EXPECTED_RESULT" + echo "Got: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] ✗ HTTP status is $HTTP_STATUS (expected 200)" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + # Handle JSON request + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H "Content-Type: $CONTENT_TYPE" "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H "Content-Type: $CONTENT_TYPE" "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] ✓ Content is as expected." + else + echo "[ $SERVICE_NAME ] ✗ Content does not match expected result" + echo "Expected: $EXPECTED_RESULT" + echo "Got: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] ✗ HTTP status is $HTTP_STATUS (expected 200)" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + fi + + sleep 2s +} + +function validate_microservices() { + echo "======================================" + echo "Validating Microservices" + echo "======================================" + + # Test vLLM service health + echo "Testing vLLM service health..." + curl -s http://${ip_address}:8028/health || { + echo "vLLM health check failed" + exit 1 + } + echo "✓ vLLM service health check passed" + + # Test vLLM service chat completions + validate_services \ + "http://${ip_address}:8028/v1/chat/completions" \ + "content" \ + "vllm" \ + "vllm-gaudi-server" \ + '{"model": "Qwen/Qwen2.5-7B-Instruct", "messages": [{"role": "user", "content": "Translate Hello to Spanish"}], "max_tokens": 32}' + + # Test LLM microservice + validate_services \ + "http://${ip_address}:9000/v1/chat/completions" \ + "data: " \ + "llm" \ + "llm-textgen-server" \ + '{"query":"Translate Hello to Spanish", "max_tokens": 32}' +} + +function validate_megaservice() { + echo "======================================" + echo "Validating Megaservice" + echo "======================================" + + # Test 1: Basic text translation (English to Spanish) + echo "Test 1: Basic English to Spanish translation..." + validate_services \ + "http://${ip_address}:8888/v1/translation" \ + "choices" \ + "mega-polylingua-basic" \ + "polylingua-backend-server" \ + '{"language_from": "English", "language_to": "Spanish", "source_language": "Hello, how are you today?"}' + + # Test 2: Language auto-detection + echo "Test 2: Auto-detection test..." + validate_services \ + "http://${ip_address}:8888/v1/translation" \ + "choices" \ + "mega-polylingua-auto" \ + "polylingua-backend-server" \ + '{"language_from": "auto", "language_to": "French", "source_language": "Hello world"}' + + # Test 3: Different language pair (English to German) + echo "Test 3: English to German translation..." + validate_services \ + "http://${ip_address}:8888/v1/translation" \ + "choices" \ + "mega-polylingua-german" \ + "polylingua-backend-server" \ + '{"language_from": "English", "language_to": "German", "source_language": "Good morning"}' +} + +function validate_file_translation() { + echo "======================================" + echo "Validating File Upload Translation" + echo "======================================" + + # Create test file + cd $WORKPATH/tests + mkdir -p test_data + echo "Hello, this is a test document for translation. It contains multiple sentences. We want to test if file upload works correctly." > test_data/sample.txt + + # Test file upload translation + echo "Testing file upload translation..." + validate_services \ + "http://${ip_address}:8888/v1/translation" \ + "choices" \ + "file-translation" \ + "polylingua-backend-server" \ + '-F "file=@test_data/sample.txt" -F "language_from=English" -F "language_to=Spanish"' \ + "multipart/form-data" +} + +function validate_nginx() { + echo "======================================" + echo "Validating Nginx Proxy" + echo "======================================" + + # Test translation via nginx + validate_services \ + "http://${ip_address}:80/v1/translation" \ + "choices" \ + "nginx-proxy" \ + "polylingua-nginx-server" \ + '{"language_from": "English", "language_to": "Italian", "source_language": "Thank you very much"}' +} + +function validate_ui() { + echo "======================================" + echo "Validating UI Service" + echo "======================================" + + # Check if UI is accessible + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" http://${ip_address}:5173) + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ UI ] ✓ UI service is accessible" + else + echo "[ UI ] ✗ UI service returned HTTP status $HTTP_STATUS" + docker logs polylingua-ui-server + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/hpu/gaudi/ + echo "Stopping services..." + docker compose -f compose.yaml down + echo "Services stopped" +} + +function main() { + echo "======================================" + echo "PolyLingua E2E Test Suite" + echo "======================================" + echo "Platform: Intel Gaudi (HPU)" + echo "LLM Backend: vLLM" + echo "IP Address: ${ip_address}" + echo "======================================" + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + echo "::group::build_docker_images" + if [[ "$IMAGE_REPO" == "opea" ]]; then + build_docker_images + else + echo "Skipping image build (using IMAGE_REPO=${IMAGE_REPO})" + fi + echo "::endgroup::" + + echo "::group::start_services" + start_services + echo "::endgroup::" + + echo "::group::validate_microservices" + validate_microservices + echo "::endgroup::" + + echo "::group::validate_megaservice" + validate_megaservice + echo "::endgroup::" + + echo "::group::validate_file_translation" + validate_file_translation + echo "::endgroup::" + + echo "::group::validate_nginx" + validate_nginx + echo "::endgroup::" + + echo "::group::validate_ui" + validate_ui + echo "::endgroup::" + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + docker system prune -f + + echo "======================================" + echo "✓ All tests passed successfully!" + echo "======================================" +} + +main diff --git a/PolyLingua/tests/test_compose_on_xeon.sh b/PolyLingua/tests/test_compose_on_xeon.sh index 7bcd67496f..282300f9b5 100755 --- a/PolyLingua/tests/test_compose_on_xeon.sh +++ b/PolyLingua/tests/test_compose_on_xeon.sh @@ -220,7 +220,7 @@ function validate_megaservice() { "http://${ip_address}:8888/v1/translation" \ "choices" \ "mega-polylingua-basic" \ - "polylingua-xeon-backend-server" \ + "polylingua-backend-server" \ '{"language_from": "English", "language_to": "Spanish", "source_language": "Hello, how are you today?"}' # Test 2: Language auto-detection @@ -229,7 +229,7 @@ function validate_megaservice() { "http://${ip_address}:8888/v1/translation" \ "choices" \ "mega-polylingua-auto" \ - "polylingua-xeon-backend-server" \ + "polylingua-backend-server" \ '{"language_from": "auto", "language_to": "French", "source_language": "Hello world"}' # Test 3: Different language pair (English to German) @@ -238,7 +238,7 @@ function validate_megaservice() { "http://${ip_address}:8888/v1/translation" \ "choices" \ "mega-polylingua-german" \ - "polylingua-xeon-backend-server" \ + "polylingua-backend-server" \ '{"language_from": "English", "language_to": "German", "source_language": "Good morning"}' } @@ -258,7 +258,7 @@ function validate_file_translation() { "http://${ip_address}:8888/v1/translation" \ "choices" \ "file-translation" \ - "polylingua-xeon-backend-server" \ + "polylingua-backend-server" \ '-F "file=@test_data/sample.txt" -F "language_from=English" -F "language_to=Spanish"' \ "multipart/form-data" }