Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions hacks/download-cli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ check_binary_exists() {
return 1
}

# Determine OS and architecture
OS=$(uname | tr '[:upper:]' '[:lower:]')
ARCH=$(uname -m)
case "$ARCH" in
arm64|aarch64) ARCH="arm64" ;;
x86_64) ARCH="amd64" ;;
*) echo "Unsupported architecture: $ARCH"; exit 1 ;;
esac

# Install JQ (mandatory)
if ! check_binary_exists "${root_directory}/bin/jq"; then
Expand Down Expand Up @@ -148,6 +156,20 @@ if ! check_binary_exists "${root_directory}/bin/tkn"; then
echo "✅ Tekton CLI installed successfully!"
fi

# Install Helm
HELM_VERSION="v3.17.3"
HELM_TARBALL="helm-${HELM_VERSION}-${OS}-${ARCH}.tar.gz"

if ! check_binary_exists "${root_directory}/bin/helm"; then
echo "⬇️ Downloading HELM (v${HELM_VERSION})..."
wget --progress=bar:force:noscroll "https://get.helm.sh/${HELM_TARBALL}"
tar -zxvf "${HELM_TARBALL}"
delete_if_exists "${root_directory}/bin/helm"
mv "${OS}-${ARCH}/helm" "${root_directory}/bin/helm"
chmod +x "${root_directory}/bin/helm"
rm -rf "${OS}-${ARCH}" "${HELM_TARBALL}"
fi
Comment on lines +159 to +171
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Verify Helm download integrity

It’s best practice to validate the Helm tarball (e.g., by checking its SHA256 checksum or GPG signature) before extraction to prevent installation of corrupted or malicious binaries. Consider fetching and verifying the checksum or signature alongside the download.

🤖 Prompt for AI Agents
In hacks/download-cli.sh around lines 159 to 171, the Helm tarball is downloaded
and extracted without verifying its integrity. To fix this, add steps to
download the official SHA256 checksum or GPG signature for the Helm tarball,
then verify the downloaded file against this checksum or signature before
extracting it. If the verification fails, abort the installation to prevent
using corrupted or malicious binaries.


if [[ $TEST_ENV == "local" ]]; then
# Install ROSA
if ! check_binary_exists "${root_directory}/bin/rosa"; then
Expand Down
11 changes: 11 additions & 0 deletions unofficial_components/playbooks/llm-d-on-kind/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
playbook:
name: llm-d-on-kind
description: This playbook is used to install LLM-D on a kind cluster using llm-d-simulator.
steps:
- role:
name: kind-install
- role:
name: llm-d-install-with-manifests
input_env:
TEST_NAMESPACE: llm-d-test

Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
role:
created_date: "20250617"
name: llm-d-install-with-manifests
files:
test_requests_file: files/test-request.sh
manifests:
route_manifests_dir: manifests/populated_manifests/route
kvcache_manifests_dir: manifests/populated_manifests/kvcache
scheduler_manifests_dir: manifests/populated_manifests/scheduler
prefill_manifests_dir: manifests/populated_manifests/prefill
decode_manifests_dir: manifests/populated_manifests/decode
simulator_prefill_manifests_dir: manifests/populated_manifests_using_simulator/prefill
simulator_decode_manifests_dir: manifests/populated_manifests_using_simulator/decode

description: |
This role is used to install Prefill/Decode type deployment ofLLM-D using llm-d-simulator.
It will create all required resource by manfiests and verify the deployment by sending test request to the LLM-D simulator and wait for the request to be processed.
For now, KIND cluster is supported.

pre-requirements:
- Provide Cluster (KIND) if you want to use your own cluster.
- Cluster should have GPUs If you don't want to use llm-d-simulator.


Input Environment:
The parameters include:
- CLUSTER_TYPE: Set CLUSTER_TYPE(ex "KIND") (default: KIND)
- KUBECONFIG_PATH: Set KUBECONFIG_PATH if you set KIND(ex "/path/to/kubeconfig") (default: ~/.kube/config)

To run it:
./loopy roles run poc-llm-d-install-with-manifests \
-p CLUSTER_TYPE=KIND \
-p MODEL_ID=meta-llama/Llama-3.2-3B-Instruct \
-p TEST_NAMESPACE=llm-d-test \
-p USE_SIMULATOR=true \
-p GIE_BACKEND=istio \
-p ISTIO_HUB_VERSION=1.26-alpha.9befed2f1439d883120f8de70fd70d84ca0ebc3d \
-p ISTIO_HUB=gcr.io/istio-testing \
-p GIE_CRD_MANIFESTS_URL=https://github.com/llm-d/llm-d-inference-scheduler/deploy/components/crds-gie \
-p GATEWAY_API_CRD_MANIFESTS_URL=https://github.com/llm-d/llm-d-inference-scheduler/deploy/components/crds-gateway-api

input_env:
- name: TEST_NAMESPACE
description: |
Set this, if you want to use specific namespace for test (ex "llm-d-test")
default: llm-d-test
- name: USE_SIMULATOR
description: |
Set this, if you want to use llm-d-simulator (ex "true")
default: true
- name: GIE_BACKEND
description: |
Set this, if you want to use specific GIE implementaiton (ex "istio")
For now, only istio is supported.
default: istio
- name: ISTIO_HUB_VERSION
description: |
Set this, if you want to use specific ISTIO image tag for helm install(ex "1.26")
default: 1.26-alpha.9befed2f1439d883120f8de70fd70d84ca0ebc3d
- name: ISTIO_HUB
description: |
Set this, if you want to use specific ISTIO image hub for helm install(ex "gcr.io/istio-testing")
default: gcr.io/istio-testing
- name: GIE_CRD_MANIFESTS_URL
description: Set this, if you want to use specific GIE CRD (ex "https://github.com/llm-d/llm-d-inference-scheduler/deploy/components/crds-gie")
default: https://github.com/llm-d/llm-d-inference-scheduler/deploy/components/crds-gie
- name: GATEWAY_API_CRD_MANIFESTS_URL
description: Set this, if you want to use specific GATEWAY API CRD (ex "https://github.com/llm-d/llm-d-inference-scheduler/deploy/components/crds-gateway-api")
default: https://github.com/llm-d/llm-d-inference-scheduler/deploy/components/crds-gateway-api
- name: MODEL_ID
description: Set this, if you want to use specific model id (ex "meta-llama/Llama-3.2-3B-Instruct")
default: meta-llama/Llama-3.2-3B-Instruct








Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
#!/bin/bash
# -----------------------------------------------------------------------------
# test-request.sh
#
# Description:
# Quick smoke tests against your llm-d deployment:
# 1) GET /v1/models on the decode pod
# 2) POST /v1/completions on the decode pod
# 3) GET /v1/models via the gateway
# 4) POST /v1/completions via the gateway
#
# -----------------------------------------------------------------------------

set -euo pipefail

if ! command -v kubectl &>/dev/null; then
echo "Error: 'kubectl' not found in PATH." >&2
exit 1
fi
Comment on lines +16 to +19
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Inconsistent CLI dependency detection

The script checks for kubectl but later invokes oc, leading to potential failures if only one client is available. Consolidate to a single CLI or detect both.

Apply this diff:

- if ! command -v kubectl &>/dev/null; then
-   echo "Error: 'kubectl' not found in PATH." >&2
+ if ! command -v kubectl &>/dev/null && ! command -v oc &>/dev/null; then
+   echo "Error: 'kubectl' or 'oc' not found in PATH." >&2
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if ! command -v kubectl &>/dev/null; then
echo "Error: 'kubectl' not found in PATH." >&2
exit 1
fi
if ! command -v kubectl &>/dev/null && ! command -v oc &>/dev/null; then
echo "Error: 'kubectl' or 'oc' not found in PATH." >&2
exit 1
fi
🤖 Prompt for AI Agents
In
unofficial_components/roles/poc-llm-d-install-with-manifests/files/test-request.sh
around lines 16 to 19, the script checks only for the presence of 'kubectl' but
later uses 'oc', causing inconsistency. Modify the script to check for both
'kubectl' and 'oc' commands, and ensure it uses only one CLI consistently
throughout. Either consolidate to using only 'kubectl' or only 'oc', or add
detection logic to select the available CLI before proceeding.


show_help() {
cat <<EOF
Usage: $(basename "$0") [OPTIONS]

Quick smoke tests against your llm-d deployment.

Options:
-n, --namespace NAMESPACE Kubernetes namespace to use (default: llm-d)
-m, --model MODEL_ID Model to query (optional: served model will be discovered from model listing)
-k, --minikube Run only Minikube DNS gateway tests
-h, --help Show this help message and exit
EOF
exit 0
}

# ── Parse flags ───────────────────────────────────────────────────────────────
NAMESPACE="llm-d"
CLI_MODEL_ID=""
USE_MINIKUBE=false
ID=1234
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Use dynamic pod identifier instead of fixed ID

Define ID=$(gen_id) rather than a static value 1234 to avoid collisions across concurrent runs.

- ID=1234
+ ID=$(gen_id)
🤖 Prompt for AI Agents
In
unofficial_components/roles/poc-llm-d-install-with-manifests/files/test-request.sh
at line 40, replace the static assignment ID=1234 with a dynamic assignment
ID=$(gen_id) to generate a unique pod identifier for each run, preventing
collisions during concurrent executions.


while [[ $# -gt 0 ]]; do
case $1 in
-n|--namespace)
NAMESPACE="$2"
shift 2
;;
-m|--model)
CLI_MODEL_ID="$2"
shift 2
;;
-k|--minikube)
USE_MINIKUBE=true
shift
;;
-h|--help)
show_help
;;
*)
echo "Unknown option: $1"
show_help
;;
esac
done

MODEL_ID="${CLI_MODEL_ID:-}"

echo "Namespace: $NAMESPACE"
if [[ -n "$MODEL_ID" ]]; then
echo "Model ID: $MODEL_ID"
else
echo "Model ID: none; will be discover from first entry in /v1/models"
fi
echo

# ── Helper to generate a unique suffix ───────────────────────────────────────
gen_id() { echo $(( RANDOM % 10000 + 1 )); }

# ── Extract all model IDs from JSON blob (for display on error) ──────────────
extract_models() {
printf '%s' "$1" | grep -o '"id":"[^"]*"' | cut -d'"' -f4
}

# ── Grab the FIRST model ID from JSON blob ───────────────────────────────────
infer_first_model() {
printf '%s' "$1" | grep -o '"id":"[^"]*"' | head -n1 | cut -d'"' -f4
}

validation() {
# Discover the decode pod IP
POD_IP=$(kubectl get pods -n "$NAMESPACE" \
-o jsonpath='{range .items[*]}{.metadata.name}{" "}{.status.podIP}{"\n"}{end}' \
| grep decode | awk '{print $2}' | head -1)

if [[ -z "$POD_IP" ]]; then
echo "Error: no decode pod found in namespace $NAMESPACE"
exit 1
fi

# Create curl pod if it doesn't exist
if [[ $(oc get pods -n $NAMESPACE | grep curl-$ID | wc -l) -eq 0 ]]; then
oc run curl-$ID -n $NAMESPACE --image=registry.access.redhat.com/rhel7/rhel-tools -- sleep infinity 2> /dev/null
fi
# ── 1) GET /v1/models on decode pod ─────────────────────────────────────────
Comment on lines +100 to +104
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Standardize pod management commands

These lines use oc, conflicting with earlier kubectl checks and a heavy image. Switch to kubectl and a lightweight curl image for portability.

- if [[ $(oc get pods -n $NAMESPACE | grep curl-$ID | wc -l) -eq 0 ]]; then
-   oc run curl-$ID -n $NAMESPACE --image=registry.access.redhat.com/rhel7/rhel-tools  -- sleep infinity 2> /dev/null    
+ if [[ $(kubectl get pods -n "$NAMESPACE" | grep "curl-$ID" | wc -l) -eq 0 ]]; then
+   kubectl run curl-"$ID" -n "$NAMESPACE" --image=curlimages/curl -- sleep infinity
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# Create curl pod if it doesn't exist
if [[ $(oc get pods -n $NAMESPACE | grep curl-$ID | wc -l) -eq 0 ]]; then
oc run curl-$ID -n $NAMESPACE --image=registry.access.redhat.com/rhel7/rhel-tools -- sleep infinity 2> /dev/null
fi
# ── 1) GET /v1/models on decode pod ─────────────────────────────────────────
# Create curl pod if it doesn't exist
if [[ $(kubectl get pods -n "$NAMESPACE" | grep "curl-$ID" | wc -l) -eq 0 ]]; then
kubectl run curl-"$ID" -n "$NAMESPACE" --image=curlimages/curl -- sleep infinity
fi
# ── 1) GET /v1/models on decode pod ─────────────────────────────────────────
🤖 Prompt for AI Agents
In
unofficial_components/roles/poc-llm-d-install-with-manifests/files/test-request.sh
around lines 100 to 104, replace the use of `oc` commands with `kubectl` for
consistency with earlier pod management commands. Also, change the pod image
from the heavy `registry.access.redhat.com/rhel7/rhel-tools` to a lightweight
curl image such as `curlimages/curl` to improve portability and reduce resource
usage. Ensure the pod creation logic and namespace usage remain correct with
these changes.

echo "1 -> Fetching available models from the decode pod at ${POD_IP}…"
oc wait --for=condition=ready pod curl-$ID -n $NAMESPACE --timeout=10m
LIST_JSON=$(kubectl exec curl-"$ID" \
--namespace "$NAMESPACE" -- \
curl -sS http://${POD_IP}:8000/v1/models \
-H 'accept: application/json' \
-H 'Content-Type: application/json')
echo "$LIST_JSON"
echo

# infer or validate
if [[ -z "$MODEL_ID" ]]; then
MODEL_ID=$(infer_first_model "$LIST_JSON")
echo "Discovered model to use: $MODEL_ID"
else
if ! grep -q "\"id\":\"$MODEL_ID\"" <<<"$LIST_JSON"; then
echo "Error: requested model '$MODEL_ID' not found in available models:"
extract_models "$LIST_JSON" | head -n1
exit 1
fi
fi
echo

# ── 2) POST /v1/completions on decode pod ──────────────────────────────────
echo "2 -> Sending a completion request to the decode pod at ${POD_IP}…"
kubectl exec curl-"$ID" \
--namespace "$NAMESPACE" -- \
curl -sS -X POST http://${POD_IP}:8000/v1/completions \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"model":"'"$MODEL_ID"'",
"prompt":"Who are you?"
}'
echo

# 3) GET /v1/models via the gateway
GATEWAY_ADDR=$(kubectl get gateway -n "$NAMESPACE" | tail -n1 | awk '{print $3}')
echo "3 -> Fetching available models via the gateway at ${GATEWAY_ADDR}…"

GW_JSON=$(kubectl exec curl-"$ID" \
--namespace "$NAMESPACE" -- \
curl -sS http://${GATEWAY_ADDR}/v1/models \
-H 'accept: application/json' \
-H 'Content-Type: application/json')
echo "$GW_JSON"
echo

if ! grep -q "\"id\":\"$MODEL_ID\"" <<<"$GW_JSON"; then
echo "Error: model '$MODEL_ID' not available via gateway:"
extract_models "$GW_JSON"
exit 1
fi
echo

# ── 4) POST /v1/completions via gateway ────────────────────────────────────
echo "4 -> Sending a completion request via the gateway at ${GATEWAY_ADDR} with model '${MODEL_ID}'…"

GW_JSON=$(kubectl exec curl-"$ID" \
--namespace "$NAMESPACE" -- \
curl -sS -X POST http://${GATEWAY_ADDR}/v1/completions \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"model":"'"$MODEL_ID"'",
"prompt":"Who are you?"
}')
echo "$GW_JSON"
echo
}

# ── Minikube gateway validation ───────────────────────────────────────────────
minikube_validation() {
SVC_HOST="llm-d-inference-gateway-istio.${NAMESPACE}.svc.cluster.local:80"
echo "Minikube validation: hitting gateway DNS at ${SVC_HOST}"

# 1) GET /v1/models via DNS gateway
echo "1 -> GET /v1/models via DNS at ${SVC_HOST}…"
LIST_JSON=$(kubectl run --rm -i curl-"$ID" \
--namespace "$NAMESPACE" \
--image=curlimages/curl --restart=Never -- \
curl -sS http://${SVC_HOST}/v1/models \
-H 'accept: application/json' \
-H 'Content-Type: application/json')
echo "$LIST_JSON"
echo

# Discover or validate
if [[ -z "$MODEL_ID" ]]; then
MODEL_ID=$(infer_first_model "$LIST_JSON")
echo "Inferred model to use: $MODEL_ID"
else
if ! grep -q "\"id\":\"$MODEL_ID\"" <<<"$LIST_JSON"; then
echo "Error: requested model '$MODEL_ID' not found in available models:"
extract_models "$LIST_JSON"
exit 1
fi
fi
echo

# 2) POST /v1/completions via DNS gateway
echo "2 -> POST /v1/completions via DNS at ${SVC_HOST} with model '${MODEL_ID}'…"
kubectl run --rm -i curl-"$ID" \
--namespace "$NAMESPACE" \
--image=curlimages/curl --restart=Never -- \
curl -sS -X POST http://${SVC_HOST}/v1/completions \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"model":"'"$MODEL_ID"'",
"prompt":"You are a helpful AI assistant."
}'
echo
}

# ── Main ───────────────────────────────────────────
if [[ "$USE_MINIKUBE" == true ]]; then
minikube_validation
else
validation
fi
oc delete pod curl-$ID -n $NAMESPACE --force --grace-period=0
echo "All tests complete."
Loading
Loading