Jooho · Jooho · Jun 18, 2025 · Jun 18, 2025 · coderabbitai · Jun 18, 2025
diff --git a/hacks/download-cli.sh b/hacks/download-cli.sh
@@ -51,6 +51,14 @@ check_binary_exists() {
     return 1
 }
 
+# Determine OS and architecture
+OS=$(uname | tr '[:upper:]' '[:lower:]')
+ARCH=$(uname -m)
+case "$ARCH" in
+  arm64|aarch64) ARCH="arm64" ;;
+  x86_64) ARCH="amd64" ;;
+  *) echo "Unsupported architecture: $ARCH"; exit 1 ;;
+esac
 
 # Install JQ (mandatory)
 if ! check_binary_exists "${root_directory}/bin/jq"; then
@@ -148,6 +156,20 @@ if ! check_binary_exists "${root_directory}/bin/tkn"; then
     echo "✅ Tekton CLI installed successfully!"
 fi
 
+# Install Helm
+HELM_VERSION="v3.17.3"
+HELM_TARBALL="helm-${HELM_VERSION}-${OS}-${ARCH}.tar.gz"
+
+if ! check_binary_exists "${root_directory}/bin/helm"; then
+  echo "⬇️ Downloading HELM (v${HELM_VERSION})..."
+  wget --progress=bar:force:noscroll "https://get.helm.sh/${HELM_TARBALL}"
+  tar -zxvf "${HELM_TARBALL}"
+  delete_if_exists "${root_directory}/bin/helm"
+  mv "${OS}-${ARCH}/helm" "${root_directory}/bin/helm"
+  chmod +x "${root_directory}/bin/helm"
+  rm -rf "${OS}-${ARCH}" "${HELM_TARBALL}"
+fi
+
 if [[ $TEST_ENV == "local" ]]; then
   # Install ROSA
   if ! check_binary_exists "${root_directory}/bin/rosa"; then

diff --git a/unofficial_components/playbooks/llm-d-on-kind/config.yaml b/unofficial_components/playbooks/llm-d-on-kind/config.yaml
@@ -0,0 +1,11 @@
+playbook:
+  name: llm-d-on-kind
+  description: This playbook is used to install LLM-D on a kind cluster using llm-d-simulator.
+  steps:
+    - role:
+        name: kind-install
+    - role:
+        name: llm-d-install-with-manifests
+        input_env:
+          TEST_NAMESPACE: llm-d-test
+
diff --git a/unofficial_components/roles/poc-llm-d-install-with-manifests/config.yaml b/unofficial_components/roles/poc-llm-d-install-with-manifests/config.yaml
@@ -0,0 +1,80 @@
+role:
+  created_date: "20250617"
+  name: llm-d-install-with-manifests
+  files:
+    test_requests_file: files/test-request.sh
+  manifests:
+    route_manifests_dir: manifests/populated_manifests/route
+    kvcache_manifests_dir: manifests/populated_manifests/kvcache
+    scheduler_manifests_dir: manifests/populated_manifests/scheduler
+    prefill_manifests_dir: manifests/populated_manifests/prefill
+    decode_manifests_dir: manifests/populated_manifests/decode
+    simulator_prefill_manifests_dir: manifests/populated_manifests_using_simulator/prefill
+    simulator_decode_manifests_dir: manifests/populated_manifests_using_simulator/decode    
+
+  description: |
+    This role is used to install Prefill/Decode type deployment ofLLM-D using llm-d-simulator.
+    It will create all required resource by manfiests and verify the deployment by sending test request to the LLM-D simulator and wait for the request to be processed.
+    For now, KIND cluster is supported.
+
+    pre-requirements:
+      - Provide Cluster (KIND) if you want to use your own cluster.
+      - Cluster should have GPUs If you don't want to use llm-d-simulator.
+
+
+    Input Environment:
+      The parameters include:
+      - CLUSTER_TYPE: Set CLUSTER_TYPE(ex "KIND") (default: KIND)
+      - KUBECONFIG_PATH: Set KUBECONFIG_PATH if you set KIND(ex "/path/to/kubeconfig") (default: ~/.kube/config)
+
+    To run it:
+    ./loopy roles run poc-llm-d-install-with-manifests  \
+    -p CLUSTER_TYPE=KIND \
+    -p MODEL_ID=meta-llama/Llama-3.2-3B-Instruct \
+    -p TEST_NAMESPACE=llm-d-test \
+    -p USE_SIMULATOR=true \
+    -p GIE_BACKEND=istio \
+    -p ISTIO_HUB_VERSION=1.26-alpha.9befed2f1439d883120f8de70fd70d84ca0ebc3d \
+    -p ISTIO_HUB=gcr.io/istio-testing \
+    -p GIE_CRD_MANIFESTS_URL=https://github.com/llm-d/llm-d-inference-scheduler/deploy/components/crds-gie \
+    -p GATEWAY_API_CRD_MANIFESTS_URL=https://github.com/llm-d/llm-d-inference-scheduler/deploy/components/crds-gateway-api
+
+  input_env:
+    - name: TEST_NAMESPACE
+      description: |
+        Set this, if you want to use specific namespace for test (ex "llm-d-test")
+      default: llm-d-test
+    - name: USE_SIMULATOR
+      description: |
+        Set this, if you want to use llm-d-simulator (ex "true")
+      default: true
+    - name: GIE_BACKEND
+      description: |
+        Set this, if you want to use specific GIE implementaiton (ex "istio")
+        For now, only istio is supported.
+      default: istio
+    - name: ISTIO_HUB_VERSION
+      description: |
+        Set this, if you want to use specific ISTIO image tag for helm install(ex "1.26")
+      default: 1.26-alpha.9befed2f1439d883120f8de70fd70d84ca0ebc3d
+    - name: ISTIO_HUB
+      description: |
+        Set this, if you want to use specific ISTIO image hub for helm install(ex "gcr.io/istio-testing")
+      default: gcr.io/istio-testing        
+    - name: GIE_CRD_MANIFESTS_URL
+      description: Set this, if you want to use specific GIE CRD (ex "https://github.com/llm-d/llm-d-inference-scheduler/deploy/components/crds-gie")
+      default: https://github.com/llm-d/llm-d-inference-scheduler/deploy/components/crds-gie
+    - name: GATEWAY_API_CRD_MANIFESTS_URL
+      description: Set this, if you want to use specific GATEWAY API CRD (ex "https://github.com/llm-d/llm-d-inference-scheduler/deploy/components/crds-gateway-api")
+      default: https://github.com/llm-d/llm-d-inference-scheduler/deploy/components/crds-gateway-api
+    - name: MODEL_ID
+      description: Set this, if you want to use specific model id (ex "meta-llama/Llama-3.2-3B-Instruct")
+      default: meta-llama/Llama-3.2-3B-Instruct
+
+
+
+
+
+
+
+
diff --git a/unofficial_components/roles/poc-llm-d-install-with-manifests/files/test-request.sh b/unofficial_components/roles/poc-llm-d-install-with-manifests/files/test-request.sh
@@ -0,0 +1,227 @@
+#!/bin/bash
+# -----------------------------------------------------------------------------
+# test-request.sh
+#
+# Description:
+#   Quick smoke tests against your llm-d deployment:
+#     1) GET /v1/models on the decode pod
+#     2) POST /v1/completions on the decode pod
+#     3) GET /v1/models via the gateway
+#     4) POST /v1/completions via the gateway
+#
+# -----------------------------------------------------------------------------
+
+set -euo pipefail
+
+if ! command -v kubectl &>/dev/null; then
+  echo "Error: 'kubectl' not found in PATH." >&2
+  exit 1
+fi
-if ! command -v kubectl &>/dev/null; then
-  echo "Error: 'kubectl' not found in PATH." >&2
-  exit 1
-fi
+if ! command -v kubectl &>/dev/null && ! command -v oc &>/dev/null; then
+  echo "Error: 'kubectl' or 'oc' not found in PATH." >&2
+  exit 1
+fi
-if ! command -v kubectl &>/dev/null; then
-  echo "Error: 'kubectl' not found in PATH." >&2
-  exit 1
-fi
+if ! command -v kubectl &>/dev/null && ! command -v oc &>/dev/null; then
+  echo "Error: 'kubectl' or 'oc' not found in PATH." >&2
+  exit 1
+fi
+
+show_help() {
+  cat <<EOF
+Usage: $(basename "$0") [OPTIONS]
+
+Quick smoke tests against your llm-d deployment.
+
+Options:
+  -n, --namespace NAMESPACE   Kubernetes namespace to use (default: llm-d)
+  -m, --model MODEL_ID        Model to query (optional: served model will be discovered from model listing)
+  -k, --minikube              Run only Minikube DNS gateway tests
+  -h, --help                  Show this help message and exit
+EOF
+  exit 0
+}
+
+# ── Parse flags ───────────────────────────────────────────────────────────────
+NAMESPACE="llm-d"
+CLI_MODEL_ID=""
+USE_MINIKUBE=false
+ID=1234
+
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    -n|--namespace)
+      NAMESPACE="$2"
+      shift 2
+      ;;
+    -m|--model)
+      CLI_MODEL_ID="$2"
+      shift 2
+      ;;
+    -k|--minikube)
+      USE_MINIKUBE=true
+      shift
+      ;;
+    -h|--help)
+      show_help
+      ;;
+    *)
+      echo "Unknown option: $1"
+      show_help
+      ;;
+  esac
+done
+
+MODEL_ID="${CLI_MODEL_ID:-}"
+
+echo "Namespace: $NAMESPACE"
+if [[ -n "$MODEL_ID" ]]; then
+  echo "Model ID:  $MODEL_ID"
+else
+  echo "Model ID:  none; will be discover from first entry in /v1/models"
+fi
+echo
+
+# ── Helper to generate a unique suffix ───────────────────────────────────────
+gen_id() { echo $(( RANDOM % 10000 + 1 )); }
+
+# ── Extract all model IDs from JSON blob (for display on error) ──────────────
+extract_models() {
+  printf '%s' "$1" | grep -o '"id":"[^"]*"' | cut -d'"' -f4
+}
+
+# ── Grab the FIRST model ID from JSON blob ───────────────────────────────────
+infer_first_model() {
+  printf '%s' "$1" | grep -o '"id":"[^"]*"' | head -n1 | cut -d'"' -f4
+}
+
+validation() {
+  # Discover the decode pod IP
+  POD_IP=$(kubectl get pods -n "$NAMESPACE" \
+    -o jsonpath='{range .items[*]}{.metadata.name}{" "}{.status.podIP}{"\n"}{end}' \
+    | grep decode | awk '{print $2}' | head -1)
+
+  if [[ -z "$POD_IP" ]]; then
+      echo "Error: no decode pod found in namespace $NAMESPACE"
+      exit 1
+  fi
+
+  # Create curl pod if it doesn't exist
+  if [[ $(oc get pods -n $NAMESPACE | grep curl-$ID | wc -l) -eq 0 ]]; then
+    oc run curl-$ID -n $NAMESPACE --image=registry.access.redhat.com/rhel7/rhel-tools  -- sleep infinity 2> /dev/null    
+  fi
+  # ── 1) GET /v1/models on decode pod ─────────────────────────────────────────
-  # Create curl pod if it doesn't exist
-  if [[ $(oc get pods -n $NAMESPACE | grep curl-$ID | wc -l) -eq 0 ]]; then
-    oc run curl-$ID -n $NAMESPACE --image=registry.access.redhat.com/rhel7/rhel-tools  -- sleep infinity 2> /dev/null    
-  fi
-  # ── 1) GET /v1/models on decode pod ─────────────────────────────────────────
+  # Create curl pod if it doesn't exist
+  if [[ $(kubectl get pods -n "$NAMESPACE" | grep "curl-$ID" | wc -l) -eq 0 ]]; then
+    kubectl run curl-"$ID" -n "$NAMESPACE" --image=curlimages/curl -- sleep infinity
+  fi
+  # ── 1) GET /v1/models on decode pod ─────────────────────────────────────────
-  # Create curl pod if it doesn't exist
-  if [[ $(oc get pods -n $NAMESPACE | grep curl-$ID | wc -l) -eq 0 ]]; then
-    oc run curl-$ID -n $NAMESPACE --image=registry.access.redhat.com/rhel7/rhel-tools  -- sleep infinity 2> /dev/null    
-  fi
-  # ── 1) GET /v1/models on decode pod ─────────────────────────────────────────
+  # Create curl pod if it doesn't exist
+  if [[ $(kubectl get pods -n "$NAMESPACE" | grep "curl-$ID" | wc -l) -eq 0 ]]; then
+    kubectl run curl-"$ID" -n "$NAMESPACE" --image=curlimages/curl -- sleep infinity
+  fi
+  # ── 1) GET /v1/models on decode pod ─────────────────────────────────────────
+  echo "1 -> Fetching available models from the decode pod at ${POD_IP}…"
+  oc wait --for=condition=ready pod curl-$ID -n $NAMESPACE --timeout=10m
+  LIST_JSON=$(kubectl exec curl-"$ID" \
+    --namespace "$NAMESPACE" -- \
+    curl -sS http://${POD_IP}:8000/v1/models \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json')
+  echo "$LIST_JSON"
+  echo
+
+  # infer or validate
+  if [[ -z "$MODEL_ID" ]]; then
+    MODEL_ID=$(infer_first_model "$LIST_JSON")
+    echo "Discovered model to use: $MODEL_ID"
+  else
+    if ! grep -q "\"id\":\"$MODEL_ID\"" <<<"$LIST_JSON"; then
+      echo "Error: requested model '$MODEL_ID' not found in available models:"
+      extract_models "$LIST_JSON" | head -n1
+      exit 1
+    fi
+  fi
+  echo
+
+  # ── 2) POST /v1/completions on decode pod ──────────────────────────────────
+  echo "2 -> Sending a completion request to the decode pod at ${POD_IP}…"
+  kubectl exec curl-"$ID" \
+    --namespace "$NAMESPACE" -- \
+    curl -sS -X POST http://${POD_IP}:8000/v1/completions \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+        "model":"'"$MODEL_ID"'",
+        "prompt":"Who are you?"
+      }'
+  echo
+
+  # 3) GET /v1/models via the gateway
+  GATEWAY_ADDR=$(kubectl get gateway -n "$NAMESPACE" | tail -n1 | awk '{print $3}')
+  echo "3 -> Fetching available models via the gateway at ${GATEWAY_ADDR}…"
+
+  GW_JSON=$(kubectl exec curl-"$ID" \
+    --namespace "$NAMESPACE" -- \
+    curl -sS http://${GATEWAY_ADDR}/v1/models \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json')
+  echo "$GW_JSON"
+  echo
+
+  if ! grep -q "\"id\":\"$MODEL_ID\"" <<<"$GW_JSON"; then
+    echo "Error: model '$MODEL_ID' not available via gateway:"
+    extract_models "$GW_JSON"
+    exit 1
+  fi
+  echo
+
+  # ── 4) POST /v1/completions via gateway ────────────────────────────────────
+  echo "4 -> Sending a completion request via the gateway at ${GATEWAY_ADDR} with model '${MODEL_ID}'…"
+
+  GW_JSON=$(kubectl exec curl-"$ID" \
+    --namespace "$NAMESPACE" -- \
+    curl -sS -X POST http://${GATEWAY_ADDR}/v1/completions \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+        "model":"'"$MODEL_ID"'",
+        "prompt":"Who are you?"
+      }')
+  echo "$GW_JSON"
+  echo
+}
+
+# ── Minikube gateway validation ───────────────────────────────────────────────
+minikube_validation() {
+  SVC_HOST="llm-d-inference-gateway-istio.${NAMESPACE}.svc.cluster.local:80"
+  echo "Minikube validation: hitting gateway DNS at ${SVC_HOST}"
+
+  # 1) GET /v1/models via DNS gateway
+  echo "1 -> GET /v1/models via DNS at ${SVC_HOST}…"
+  LIST_JSON=$(kubectl run --rm -i curl-"$ID" \
+    --namespace "$NAMESPACE" \
+    --image=curlimages/curl --restart=Never -- \
+    curl -sS http://${SVC_HOST}/v1/models \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json')
+  echo "$LIST_JSON"
+  echo
+
+  # Discover or validate
+  if [[ -z "$MODEL_ID" ]]; then
+    MODEL_ID=$(infer_first_model "$LIST_JSON")
+    echo "Inferred model to use: $MODEL_ID"
+  else
+    if ! grep -q "\"id\":\"$MODEL_ID\"" <<<"$LIST_JSON"; then
+      echo "Error: requested model '$MODEL_ID' not found in available models:"
+      extract_models "$LIST_JSON"
+      exit 1
+    fi
+  fi
+  echo
+
+  # 2) POST /v1/completions via DNS gateway
+  echo "2 -> POST /v1/completions via DNS at ${SVC_HOST} with model '${MODEL_ID}'…"
+  kubectl run --rm -i curl-"$ID" \
+    --namespace "$NAMESPACE" \
+    --image=curlimages/curl --restart=Never -- \
+    curl -sS -X POST http://${SVC_HOST}/v1/completions \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+        "model":"'"$MODEL_ID"'",
+        "prompt":"You are a helpful AI assistant."
+      }'
+  echo
+}
+
+# ── Main ───────────────────────────────────────────
+if [[ "$USE_MINIKUBE" == true ]]; then
+  minikube_validation
+else
+  validation
+fi
+oc delete pod curl-$ID -n $NAMESPACE --force --grace-period=0
+echo "All tests complete."