From 30fef116b31ee480239d1f9b2bfdeb0ec54d7b5a Mon Sep 17 00:00:00 2001
From: "devin-ai-integration[bot]"
 <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 5 Mar 2025 09:30:27 +0800
Subject: [PATCH 01/13] Add Python CLI package with config import command (#14)

# Python CLI Package for On-Prem Stack

This PR adds:
1. Python CLI package for on-prem stack
   - Single-line installation via `pip install liquidai-cli`
   - YAML-based configuration management
- Modular CLI design spanning stack, model, database, and infrastructure
operations
   - Type-safe configuration and Docker container management
   - Interactive and automated workflows
   - Advanced model management integrating Hugging Face and vLLM

2. Development tooling:
   - black for code formatting (like prettier)
   - flake8 for linting (like eslint)
   - pre-commit hooks for automated checks (like husky)
   - Python-specific gitignore

3. Unit Tests and CI:
   - Tests for core helper functions (config and docker utilities)
   - GitHub Actions workflow for CI
   - Coverage reporting with Codecov
   - Multi-Python version testing (3.8-3.12)

Link to Devin run:
https://app.devin.ai/sessions/f75525e7bf3744dc9044bf4bab5d187b

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: liren@liquid.ai <liren@liquid.ai>
---
 .flake8                                      |   4 +
 .github/workflows/python-ci.yml              |  48 +++++
 python_cli/.gitignore                        | 174 ++++++++++++++++
 python_cli/.pre-commit-config.yaml           |  10 +
 python_cli/README.md                         | 163 +++++++++++++++
 python_cli/liquidai_cli/__init__.py          |   3 +
 python_cli/liquidai_cli/cli.py               |  27 +++
 python_cli/liquidai_cli/commands/__init__.py |   9 +
 python_cli/liquidai_cli/commands/config.py   |  93 +++++++++
 python_cli/liquidai_cli/commands/db.py       |  31 +++
 python_cli/liquidai_cli/commands/infra.py    |  27 +++
 python_cli/liquidai_cli/commands/model.py    | 208 +++++++++++++++++++
 python_cli/liquidai_cli/commands/stack.py    | 136 ++++++++++++
 python_cli/liquidai_cli/utils/__init__.py    |   1 +
 python_cli/liquidai_cli/utils/config.py      | 108 ++++++++++
 python_cli/liquidai_cli/utils/docker.py      |  80 +++++++
 python_cli/liquidai_cli/utils/prompt.py      |  36 ++++
 python_cli/pyproject.toml                    |  49 +++++
 python_cli/tests/__init__.py                 |   1 +
 python_cli/tests/test_docker.py              |  63 ++++++
 python_cli/tests/test_utils.py               |  61 ++++++
 21 files changed, 1332 insertions(+)
 create mode 100644 .flake8
 create mode 100644 .github/workflows/python-ci.yml
 create mode 100644 python_cli/.gitignore
 create mode 100644 python_cli/.pre-commit-config.yaml
 create mode 100644 python_cli/README.md
 create mode 100644 python_cli/liquidai_cli/__init__.py
 create mode 100644 python_cli/liquidai_cli/cli.py
 create mode 100644 python_cli/liquidai_cli/commands/__init__.py
 create mode 100644 python_cli/liquidai_cli/commands/config.py
 create mode 100644 python_cli/liquidai_cli/commands/db.py
 create mode 100644 python_cli/liquidai_cli/commands/infra.py
 create mode 100644 python_cli/liquidai_cli/commands/model.py
 create mode 100644 python_cli/liquidai_cli/commands/stack.py
 create mode 100644 python_cli/liquidai_cli/utils/__init__.py
 create mode 100644 python_cli/liquidai_cli/utils/config.py
 create mode 100644 python_cli/liquidai_cli/utils/docker.py
 create mode 100644 python_cli/liquidai_cli/utils/prompt.py
 create mode 100644 python_cli/pyproject.toml
 create mode 100644 python_cli/tests/__init__.py
 create mode 100644 python_cli/tests/test_docker.py
 create mode 100644 python_cli/tests/test_utils.py

diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..acf4482
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,4 @@
+[flake8]
+max-line-length = 120
+extend-ignore = E203
+exclude = .git,__pycache__,build,dist,*.egg-info,venv
diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
new file mode 100644
index 0000000..85a6a44
--- /dev/null
+++ b/.github/workflows/python-ci.yml
@@ -0,0 +1,48 @@
+name: Python CI
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - 'python_cli/**'
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'python_cli/**'
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+
+    steps:
+    - uses: actions/checkout@v3
+    
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+        
+    - name: Install dependencies
+      run: |
+        cd python_cli
+        python -m pip install --upgrade pip
+        pip install .[test]
+        pip install black flake8
+        
+    - name: Run black
+      run: |
+        cd python_cli
+        black . --check
+        
+    - name: Run flake8
+      run: |
+        cd python_cli
+        flake8 .
+        
+    - name: Run tests
+      run: |
+        cd python_cli
+        pytest --cov=liquidai_cli --cov-report=xml
diff --git a/python_cli/.gitignore b/python_cli/.gitignore
new file mode 100644
index 0000000..0a19790
--- /dev/null
+++ b/python_cli/.gitignore
@@ -0,0 +1,174 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
diff --git a/python_cli/.pre-commit-config.yaml b/python_cli/.pre-commit-config.yaml
new file mode 100644
index 0000000..5f369aa
--- /dev/null
+++ b/python_cli/.pre-commit-config.yaml
@@ -0,0 +1,10 @@
+repos:
+-   repo: https://github.com/psf/black
+    rev: 24.1.1
+    hooks:
+    -   id: black
+        language_version: python3
+-   repo: https://github.com/pycqa/flake8
+    rev: 7.0.0
+    hooks:
+    -   id: flake8
diff --git a/python_cli/README.md b/python_cli/README.md
new file mode 100644
index 0000000..ecad7af
--- /dev/null
+++ b/python_cli/README.md
@@ -0,0 +1,163 @@
+# Liquid Labs CLI
+
+Command line interface for managing Liquid Labs on-prem stack.
+
+## Installation
+
+```bash
+pip install liquidai-cli
+```
+
+## Configuration
+
+The CLI uses a YAML configuration file (`liquid.yaml`) in your working directory. A default configuration will be created on first use, but you can customize it:
+
+```yaml
+stack:
+  version: "c3d7dbacd1"
+  model_image: "liquidai/lfm-7b-e:0.0.1"
+  api_secret: "local_api_token"
+  # Other values will be auto-generated
+database:
+  name: "liquid_labs"
+  user: "local_user"
+  password: "local_password"
+  port: 5432
+  schema: "labs"
+```
+
+## Usage
+
+### Stack Management
+
+```bash
+# Launch stack
+liquidai stack launch
+
+# Launch with upgrades
+liquidai stack launch --upgrade-stack --upgrade-model
+
+# Shutdown stack
+liquidai stack shutdown
+
+# Test API endpoints
+liquidai stack test
+
+# Purge stack (removes all components)
+liquidai stack purge
+
+# Purge without confirmation
+liquidai stack purge --force
+```
+
+### Model Operations
+
+```bash
+# Run a HuggingFace model
+liquidai model run-hf \
+  --name llama-7b \
+  --path meta-llama/Llama-2-7b-chat-hf \
+  --port 9000 \
+  --gpu-memory-utilization 0.6 \
+  --max-num-seqs 600 \
+  --max-model-len 32768
+
+# Run a local checkpoint
+liquidai model run-checkpoint \
+  --path /path/to/checkpoint \
+  --port 9000 \
+  --gpu-memory-utilization 0.6 \
+  --max-num-seqs 600
+
+# List running models
+liquidai model list
+
+# Stop a specific model
+liquidai model stop llama-7b
+
+# Stop a model interactively
+liquidai model stop
+```
+
+### Database Operations
+
+```bash
+# Connect to database using pgcli
+liquidai db connect
+```
+
+### Infrastructure
+
+```bash
+# Create Cloudflare tunnel with token
+liquidai tunnel create --token YOUR_TOKEN
+
+# Create tunnel interactively
+liquidai tunnel create
+```
+
+### Configuration Management
+
+```bash
+# Import configuration from .env file
+liquidai config import
+
+# Import from specific .env file
+liquidai config import --env-file /path/to/.env
+
+# Import to specific config file
+liquidai config import --config-file /path/to/liquid.yaml
+
+# Force overwrite existing config
+liquidai config import --force
+```
+
+## Command Reference
+
+### Stack Commands
+
+- `launch [--upgrade-stack] [--upgrade-model]`: Launch the stack
+  - `--upgrade-stack`: Upgrade stack version
+  - `--upgrade-model`: Upgrade model version
+- `shutdown`: Shutdown the stack
+- `test`: Test API endpoints
+- `purge [--force]`: Remove all components
+  - `--force`: Skip confirmation prompt
+
+### Model Commands
+
+- `run-hf`: Run a HuggingFace model
+  - `--name`: Name for the model container
+  - `--path`: HuggingFace model path
+  - `--port`: Port to expose (default: 9000)
+  - `--gpu`: GPU index to use (default: "all")
+  - `--gpu-memory-utilization`: GPU memory fraction (default: 0.6)
+  - `--max-num-seqs`: Max parallel sequences (default: 600)
+  - `--max-model-len`: Max model length (default: 32768)
+  - `--hf-token`: HuggingFace token (or use HUGGING_FACE_TOKEN env var)
+
+- `run-checkpoint`: Run a local checkpoint
+  - `--path`: Path to checkpoint directory
+  - `--port`: Port to expose (default: 9000)
+  - `--gpu`: GPU index to use (default: "all")
+  - `--gpu-memory-utilization`: GPU memory fraction (default: 0.6)
+  - `--max-num-seqs`: Max parallel sequences (default: 600)
+
+- `list`: List running models
+- `stop [NAME]`: Stop a model (interactive if NAME not provided)
+
+### Database Commands
+
+- `connect`: Connect to database using pgcli
+
+### Infrastructure Commands
+
+- `tunnel create [--token TOKEN]`: Create Cloudflare tunnel
+  - `--token`: Cloudflare tunnel token
+
+### Configuration Commands
+
+- `config import [--env-file PATH] [--config-file PATH] [--force]`: Import .env configuration
+  - `--env-file`: Path to .env file (default: .env)
+  - `--config-file`: Path to YAML config file (default: liquid.yaml)
+  - `--force`: Force overwrite existing config
diff --git a/python_cli/liquidai_cli/__init__.py b/python_cli/liquidai_cli/__init__.py
new file mode 100644
index 0000000..2c265e9
--- /dev/null
+++ b/python_cli/liquidai_cli/__init__.py
@@ -0,0 +1,3 @@
+"""Liquid Labs CLI tool for managing on-prem stack."""
+
+__version__ = "0.1.0"
diff --git a/python_cli/liquidai_cli/cli.py b/python_cli/liquidai_cli/cli.py
new file mode 100644
index 0000000..5914f1e
--- /dev/null
+++ b/python_cli/liquidai_cli/cli.py
@@ -0,0 +1,27 @@
+"""Main CLI entry point for Liquid Labs CLI."""
+
+import typer
+from liquidai_cli.commands import stack, model, db, infra, config
+
+# Create main CLI app
+app = typer.Typer(
+    name="liquidai",
+    help="Liquid Labs CLI tool for managing on-prem stack",
+    no_args_is_help=True,
+)
+
+# Register command groups
+app.add_typer(stack.app, name="stack", help="Manage the on-prem stack")
+app.add_typer(model.app, name="model", help="Manage ML models")
+app.add_typer(db.app, name="db", help="Database operations")
+app.add_typer(infra.app, name="tunnel", help="Infrastructure operations")
+app.add_typer(config.app, name="config", help="Manage configuration")
+
+
+def main():
+    """Entry point for the CLI."""
+    app()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python_cli/liquidai_cli/commands/__init__.py b/python_cli/liquidai_cli/commands/__init__.py
new file mode 100644
index 0000000..b4bfcc4
--- /dev/null
+++ b/python_cli/liquidai_cli/commands/__init__.py
@@ -0,0 +1,9 @@
+"""Command implementations for the Liquid Labs CLI."""
+
+from . import stack
+from . import model
+from . import db
+from . import infra
+from . import config
+
+__all__ = ["stack", "model", "db", "infra", "config"]
diff --git a/python_cli/liquidai_cli/commands/config.py b/python_cli/liquidai_cli/commands/config.py
new file mode 100644
index 0000000..3c21987
--- /dev/null
+++ b/python_cli/liquidai_cli/commands/config.py
@@ -0,0 +1,93 @@
+"""Configuration management commands."""
+
+import typer
+from pathlib import Path
+from typing import Dict, Any
+from liquidai_cli.utils.config import (
+    save_config,
+    DEFAULT_CONFIG,
+    generate_random_string,
+)
+
+app = typer.Typer(help="Manage configuration")
+
+
+def parse_env_file(env_file: Path) -> Dict[str, Any]:
+    """Parse environment variables from .env file."""
+    env_vars = {}
+    with open(env_file) as f:
+        for line in f:
+            line = line.strip()
+            if line and not line.startswith("#"):
+                try:
+                    key, value = line.split("=", 1)
+                    env_vars[key.strip()] = value.strip()
+                except ValueError:
+                    typer.echo(f"Warning: Skipping invalid line in {env_file}: {line}", err=True)
+    return env_vars
+
+
+@app.command()
+def import_env(
+    env_file: Path = typer.Option(
+        Path(".env"),
+        "--env-file",
+        "-e",
+        help="Path to .env file to import",
+        exists=True,
+    ),
+    config_file: Path = typer.Option(
+        Path("liquid.yaml"),
+        "--config-file",
+        "-c",
+        help="Path to YAML config file",
+    ),
+    force: bool = typer.Option(
+        False,
+        "--force",
+        "-f",
+        help="Overwrite existing config file",
+    ),
+):
+    """Import configuration from .env file into YAML format.
+
+    This command reads an existing .env file and converts it to the new YAML configuration format.
+    It preserves all environment variables and their relationships while maintaining backward compatibility.
+    """
+    if config_file.exists() and not force:
+        typer.echo(f"Config file {config_file} already exists. Use --force to overwrite.", err=True)
+        raise typer.Exit(1)
+
+    # Read and parse .env file
+    env_vars = parse_env_file(env_file)
+
+    # Map to YAML structure
+    config = DEFAULT_CONFIG.copy()
+
+    # Stack configuration
+    config["stack"].update(
+        {
+            "version": env_vars.get("STACK_VERSION", config["stack"]["version"]),
+            "model_image": env_vars.get("MODEL_IMAGE", config["stack"]["model_image"]),
+            "jwt_secret": env_vars.get("JWT_SECRET", generate_random_string(64)),
+            "api_secret": env_vars.get("API_SECRET", config["stack"]["api_secret"]),
+            "auth_secret": env_vars.get("AUTH_SECRET", generate_random_string(64)),
+            # MODEL_NAME is auto-generated from model_image in save_config
+        }
+    )
+
+    # Database configuration
+    config["database"].update(
+        {
+            "name": env_vars.get("POSTGRES_DB", config["database"]["name"]),
+            "user": env_vars.get("POSTGRES_USER", config["database"]["user"]),
+            "password": env_vars.get("POSTGRES_PASSWORD", config["database"]["password"]),
+            "port": int(env_vars.get("POSTGRES_PORT", str(config["database"]["port"]))),
+            "schema": env_vars.get("POSTGRES_SCHEMA", config["database"]["schema"]),
+        }
+    )
+
+    # Save the configuration
+    save_config(config, config_file)
+    typer.echo(f"Successfully imported configuration from {env_file} to {config_file}")
+    typer.echo("\nNote: MODEL_NAME and DATABASE_URL are auto-generated and were not imported.")
diff --git a/python_cli/liquidai_cli/commands/db.py b/python_cli/liquidai_cli/commands/db.py
new file mode 100644
index 0000000..a292df5
--- /dev/null
+++ b/python_cli/liquidai_cli/commands/db.py
@@ -0,0 +1,31 @@
+"""Database management commands."""
+
+import typer
+import subprocess
+from liquidai_cli.utils.config import load_config
+
+app = typer.Typer(help="Database operations")
+
+
+@app.command()
+def connect():
+    """Connect to the database using pgcli."""
+    try:
+        subprocess.run(["pgcli", "--version"], check=True, capture_output=True)
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        typer.echo("Error: pgcli is not installed. Please install it first:", err=True)
+        typer.echo("  pip install pgcli")
+        raise typer.Exit(1)
+
+    config = load_config()
+    db_config = config["database"]
+
+    cmd = [
+        "PGOPTIONS=--search_path={}".format(db_config["schema"]),
+        "pgcli",
+        "postgresql://{}:{}@0.0.0.0:{}/{}".format(
+            db_config["user"], db_config["password"], db_config["port"], db_config["name"]
+        ),
+    ]
+
+    subprocess.run(" ".join(cmd), shell=True)
diff --git a/python_cli/liquidai_cli/commands/infra.py b/python_cli/liquidai_cli/commands/infra.py
new file mode 100644
index 0000000..ff63255
--- /dev/null
+++ b/python_cli/liquidai_cli/commands/infra.py
@@ -0,0 +1,27 @@
+"""Infrastructure management commands."""
+
+import typer
+from typing import Optional
+from liquidai_cli.utils.docker import DockerHelper
+
+app = typer.Typer(help="Infrastructure operations")
+docker_helper = DockerHelper()
+
+
+@app.command()
+def create(
+    token: Optional[str] = typer.Option(None, "--token", help="Cloudflare tunnel token"),
+):
+    """Create a Cloudflare tunnel."""
+    if not token:
+        token = typer.prompt("Enter your Cloudflare tunnel token")
+
+    docker_helper.run_container(
+        image="cloudflare/cloudflared:latest",
+        name="liquid-labs-tunnel",
+        network="liquid_labs_network",
+        command=["tunnel", "--no-autoupdate", "run", "--protocol", "h2mux", "--token", token],
+        detach=True,
+    )
+
+    typer.echo("Cloudflare tunnel has been created successfully")
diff --git a/python_cli/liquidai_cli/commands/model.py b/python_cli/liquidai_cli/commands/model.py
new file mode 100644
index 0000000..175e955
--- /dev/null
+++ b/python_cli/liquidai_cli/commands/model.py
@@ -0,0 +1,208 @@
+"""Model management commands."""
+
+import typer
+from pathlib import Path
+from typing import Optional, Dict, List, cast
+from liquidai_cli.utils.docker import DockerHelper
+from liquidai_cli.utils.config import load_config
+
+app = typer.Typer(help="Manage ML models")
+docker_helper = DockerHelper()
+
+
+@app.command(name="run-hf")
+def run_huggingface(
+    name: str = typer.Option(..., "--name", help="Name for the model container"),
+    path: str = typer.Option(..., "--path", help="Hugging Face model path"),
+    port: int = typer.Option(9000, "--port", help="Port to expose locally"),
+    gpu: str = typer.Option("all", "--gpu", help="Specific GPU index to use"),
+    gpu_memory_utilization: float = typer.Option(0.6, "--gpu-memory-utilization", help="Fraction of GPU memory to use"),
+    max_num_seqs: int = typer.Option(600, "--max-num-seqs", help="Maximum number of sequences to generate in parallel"),
+    max_model_len: int = typer.Option(32768, "--max-model-len", help="Maximum length of the model"),
+    hf_token: Optional[str] = typer.Option(
+        None, "--hf-token", help="Hugging Face access token", envvar="HUGGING_FACE_TOKEN"
+    ),
+):
+    """Launch a model from Hugging Face."""
+    if not hf_token:
+        typer.echo(
+            "Error: Hugging Face token not provided. Set HUGGING_FACE_TOKEN environment variable or use --hf-token",
+            err=True,
+        )
+        raise typer.Exit(1)
+
+    docker_helper.run_container(
+        image="vllm/vllm-openai:latest",
+        name=name,
+        environment={"HUGGING_FACE_HUB_TOKEN": hf_token},
+        ports={8000: port},
+        device_requests=[{"Driver": "nvidia", "Count": -1, "Capabilities": [["gpu"]]}],
+        command=[
+            "--host",
+            "0.0.0.0",
+            "--port",
+            "8000",
+            "--model",
+            path,
+            "--served-model-name",
+            name,
+            "--tensor-parallel-size",
+            "1",
+            "--max-logprobs",
+            "0",
+            "--gpu-memory-utilization",
+            str(gpu_memory_utilization),
+            "--max-num-seqs",
+            str(max_num_seqs),
+            "--max-model-len",
+            str(max_model_len),
+            "--max-seq-len-to-capture",
+            str(max_model_len),
+        ],
+        health_cmd="curl --fail http://localhost:8000/health || exit 1",
+        health_interval=30,
+    )
+
+    typer.echo(f"Model '{name}' started successfully")
+    typer.echo(f"The vLLM API will be accessible at http://localhost:{port}")
+    typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
+
+
+@app.command(name="run-checkpoint")
+def run_checkpoint(
+    path: str = typer.Option(..., "--path", help="Path to model checkpoint directory"),
+    port: int = typer.Option(9000, "--port", help="Port to expose locally"),
+    gpu: str = typer.Option("all", "--gpu", help="Specific GPU index to use"),
+    gpu_memory_utilization: float = typer.Option(
+        0.60, "--gpu-memory-utilization", help="Fraction of GPU memory to use"
+    ),
+    max_num_seqs: int = typer.Option(600, "--max-num-seqs", help="Maximum number of sequences to cache"),
+):
+    """Launch a model from local checkpoint."""
+    import json
+
+    checkpoint_path = Path(path).resolve()
+    if not checkpoint_path.is_dir():
+        typer.echo(f"Error: Model checkpoint directory does not exist: {path}", err=True)
+        raise typer.Exit(1)
+
+    metadata_file = checkpoint_path / "model_metadata.json"
+    if not metadata_file.is_file():
+        typer.echo("Error: model_metadata.json does not exist in the model checkpoint directory", err=True)
+        raise typer.Exit(1)
+
+    with open(metadata_file) as f:
+        metadata = json.load(f)
+        model_name = metadata.get("model_name")
+
+    if not model_name:
+        typer.echo("Error: model_name is not defined in model_metadata.json", err=True)
+        raise typer.Exit(1)
+
+    config = load_config()
+    stack_version = config["stack"]["version"]
+    image_name = f"liquidai/liquid-labs-vllm:{stack_version}"
+
+    docker_helper.run_container(
+        image=image_name,
+        name=model_name,
+        ports={8000: port},
+        device_requests=[{"Driver": "nvidia", "Count": -1, "Capabilities": [["gpu"]]}],
+        volumes={str(checkpoint_path): {"bind": "/model", "mode": "ro"}},
+        command=[
+            "--host",
+            "0.0.0.0",
+            "--port",
+            "8000",
+            "--model",
+            "/model",
+            "--served-model-name",
+            model_name,
+            "--tensor-parallel-size",
+            "1",
+            "--max-logprobs",
+            "0",
+            "--dtype",
+            "bfloat16",
+            "--enable-chunked-prefill",
+            "false",
+            "--gpu-memory-utilization",
+            str(gpu_memory_utilization),
+            "--max-num-seqs",
+            str(max_num_seqs),
+            "--max-model-len",
+            "32768",
+            "--max-seq-len-to-capture",
+            "32768",
+        ],
+        health_cmd="curl --fail http://localhost:8000/health || exit 1",
+        health_interval=30,
+    )
+
+    typer.echo(f"Model '{model_name}' started successfully")
+    typer.echo(f"The vLLM API will be accessible at http://localhost:{port}")
+    typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
+
+
+@app.command()
+def list():
+    """List running models."""
+    containers = docker_helper.list_containers("vllm/vllm-openai")
+
+    if not containers:
+        typer.echo("No running vLLM containers found.")
+        return
+
+    typer.echo("Running vLLM containers:")
+    typer.echo("----------------------")
+
+    for i, container in enumerate(containers, 1):
+        ports = container.get("ports", {})
+        port = "unknown"
+        if isinstance(ports, dict):
+            port_mappings = cast(List[Dict[str, str]], ports.get("8000/tcp", []))
+            if port_mappings:
+                mapping = port_mappings[0]
+                if isinstance(mapping, dict):
+                    port = mapping.get("HostPort", "unknown")
+        typer.echo(f"{i}) {container['name']} (Port: {port})")
+
+
+@app.command()
+def stop(
+    name: Optional[str] = typer.Argument(None, help="Name of the model to stop"),
+):
+    """Stop a running model."""
+    if name:
+        docker_helper.stop_container(name)
+        typer.echo(f"Stopped and removed container: {name}")
+        return
+
+    # Interactive mode if no name provided
+    containers = docker_helper.list_containers("vllm/vllm-openai")
+    if not containers:
+        typer.echo("No running vLLM containers found.")
+        return
+
+    typer.echo("Select a container to stop:")
+    for i, container in enumerate(containers, 1):
+        ports = container.get("ports", {})
+        port = "unknown"
+        if isinstance(ports, dict):
+            port_mappings = cast(List[Dict[str, str]], ports.get("8000/tcp", []))
+            if port_mappings:
+                mapping = port_mappings[0]
+                if isinstance(mapping, dict):
+                    port = mapping.get("HostPort", "unknown")
+        typer.echo(f"{i}) {container['name']} (Port: {port})")
+
+    try:
+        choice = typer.prompt("Enter container number", type=int)
+        if 1 <= choice <= len(containers):
+            container = containers[choice - 1]
+            docker_helper.stop_container(container["name"])
+            typer.echo(f"Stopped and removed container: {container['name']}")
+        else:
+            typer.echo("Invalid selection", err=True)
+    except typer.Abort:
+        typer.echo("\nOperation cancelled.")
diff --git a/python_cli/liquidai_cli/commands/stack.py b/python_cli/liquidai_cli/commands/stack.py
new file mode 100644
index 0000000..24695cc
--- /dev/null
+++ b/python_cli/liquidai_cli/commands/stack.py
@@ -0,0 +1,136 @@
+"""Stack management commands."""
+
+import typer
+from pathlib import Path
+from liquidai_cli.utils.docker import DockerHelper
+from liquidai_cli.utils.config import load_config, extract_model_name
+from liquidai_cli.utils.prompt import confirm_action
+
+app = typer.Typer(help="Manage the on-prem stack")
+docker_helper = DockerHelper()
+
+
+@app.command()
+def launch(
+    upgrade_stack: bool = typer.Option(False, "--upgrade-stack", help="Upgrade stack version"),
+    upgrade_model: bool = typer.Option(False, "--upgrade-model", help="Upgrade model version"),
+):
+    """Launch the on-prem stack."""
+    config = load_config()
+
+    if upgrade_stack:
+        config["stack"]["version"] = "c3d7dbacd1"
+    if upgrade_model:
+        config["stack"]["model_image"] = "liquidai/lfm-7b-e:0.0.1"
+
+    # Set model name
+    model_image = config["stack"]["model_image"]
+    model_name = f"lfm-{extract_model_name(model_image)}"
+    config["stack"]["model_name"] = model_name
+
+    # Generate environment file for docker-compose
+    env_vars = {
+        "JWT_SECRET": config["stack"]["jwt_secret"],
+        "API_SECRET": config["stack"]["api_secret"],
+        "AUTH_SECRET": config["stack"]["auth_secret"],
+        "STACK_VERSION": config["stack"]["version"],
+        "MODEL_IMAGE": config["stack"]["model_image"],
+        "MODEL_NAME": config["stack"]["model_name"],
+        "POSTGRES_DB": config["database"]["name"],
+        "POSTGRES_USER": config["database"]["user"],
+        "POSTGRES_PASSWORD": config["database"]["password"],
+        "POSTGRES_PORT": str(config["database"]["port"]),
+        "POSTGRES_SCHEMA": config["database"]["schema"],
+        "DATABASE_URL": (
+            f"postgresql://{config['database']['user']}:{config['database']['password']}"
+            f"@liquid-labs-postgres:{config['database']['port']}/{config['database']['name']}"
+        ),
+    }
+
+    # Export environment variables for docker-compose
+    for key, value in env_vars.items():
+        typer.echo(f"Setting {key}")
+        docker_helper.set_env(key, value)
+
+    # Ensure postgres volume exists
+    docker_helper.ensure_volume("postgres_data")
+
+    # Launch stack
+    docker_helper.run_compose(Path("docker-compose.yaml"), Path(".env"))
+
+    typer.echo("The on-prem stack is now running.")
+    typer.echo(f"\nModel '{model_name}' is accessible at http://localhost:8000")
+    typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
+
+
+@app.command()
+def shutdown():
+    """Shutdown the on-prem stack."""
+    docker_helper.run_compose(Path("docker-compose.yaml"), Path(".env"), action="down")
+    typer.echo("Stack has been shut down.")
+
+
+@app.command()
+def purge(
+    force: bool = typer.Option(False, "--force", "-f", help="Skip confirmation prompt"),
+):
+    """Remove all Liquid Labs components."""
+    message = (
+        "This will remove all Liquid Labs components:\n"
+        "  - Stop and remove all containers\n"
+        "  - Delete postgres_data volume\n"
+        "  - Remove liquid_labs_network\n"
+        "  - Delete .env file\n"
+        "\nAre you sure?"
+    )
+
+    if not confirm_action(message, default=False, force=force):
+        return
+
+    # Shutdown containers
+    docker_helper.run_compose(Path("docker-compose.yaml"), Path(".env"), action="down")
+
+    # Remove volume and network
+    docker_helper.remove_volume("postgres_data")
+    docker_helper.remove_network("liquid_labs_network")
+
+    # Remove .env file
+    try:
+        Path(".env").unlink()
+    except FileNotFoundError:
+        pass
+
+    typer.echo("Cleanup complete. All Liquid Labs components have been removed.")
+
+
+@app.command()
+def test():
+    """Test the API endpoints."""
+    import requests
+    from liquidai_cli.utils.config import load_config
+
+    config = load_config()
+    api_secret = config["stack"]["api_secret"]
+    model_name = config["stack"]["model_name"]
+
+    if not all([api_secret, model_name]):
+        typer.echo("Error: API_SECRET or MODEL_NAME not found in configuration", err=True)
+        raise typer.Exit(1)
+
+    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_secret}"}
+
+    # Test models endpoint
+    typer.echo("Testing API call to get available models...")
+    response = requests.get("http://0.0.0.0:8000/v1/models", headers=headers)
+    typer.echo(response.json())
+
+    # Test chat completion
+    typer.echo("\nTesting model call...")
+    data = {
+        "model": model_name,
+        "messages": [{"role": "user", "content": "At which temperature does silver melt?"}],
+        "max_tokens": 128,
+        "temperature": 0,
+    }
+    response = requests.post("http://0.0.0.0:8000/v1/chat/completions", headers=headers, json=data)
+    typer.echo(response.json())
diff --git a/python_cli/liquidai_cli/utils/__init__.py b/python_cli/liquidai_cli/utils/__init__.py
new file mode 100644
index 0000000..23b9dad
--- /dev/null
+++ b/python_cli/liquidai_cli/utils/__init__.py
@@ -0,0 +1 @@
+"""Utility functions for the Liquid Labs CLI."""
diff --git a/python_cli/liquidai_cli/utils/config.py b/python_cli/liquidai_cli/utils/config.py
new file mode 100644
index 0000000..53c89df
--- /dev/null
+++ b/python_cli/liquidai_cli/utils/config.py
@@ -0,0 +1,108 @@
+"""Configuration utilities for the Liquid Labs CLI."""
+
+import re
+import secrets
+import string
+from pathlib import Path
+from typing import Dict, Optional, Any
+
+import typer
+from ruamel.yaml import YAML
+
+yaml = YAML()
+
+DEFAULT_CONFIG = {
+    "stack": {
+        "version": "c3d7dbacd1",
+        "model_image": "liquidai/lfm-7b-e:0.0.1",
+        "jwt_secret": None,  # Generated on first use
+        "api_secret": "local_api_token",
+        "auth_secret": None,  # Generated on first use
+        "model_name": None,  # Generated from model_image
+    },
+    "database": {
+        "name": "liquid_labs",
+        "user": "local_user",
+        "password": "local_password",
+        "port": 5432,
+        "schema": "labs",
+    },
+}
+
+
+def generate_random_string(length: int) -> str:
+    """Generate a random string of specified length."""
+    alphabet = string.ascii_letters + string.digits
+    return "".join(secrets.choice(alphabet) for _ in range(length))
+
+
+def extract_model_name(image_tag: str) -> Optional[str]:
+    """Extract model name from image tag."""
+    pattern = r"liquidai/[^-]+-([^:]+)"
+    match = re.search(pattern, image_tag)
+    return match.group(1) if match else None
+
+
+def load_config(config_file: Path = Path("liquid.yaml")) -> Dict[str, Any]:
+    """Load configuration from YAML file."""
+    if not config_file.exists():
+        return create_default_config(config_file)
+
+    with open(config_file) as f:
+        config = yaml.load(f)
+
+    # Generate secrets if they don't exist
+    if not config["stack"]["jwt_secret"]:
+        config["stack"]["jwt_secret"] = generate_random_string(64)
+    if not config["stack"]["auth_secret"]:
+        config["stack"]["auth_secret"] = generate_random_string(64)
+
+    save_config(config, config_file)
+    return config
+
+
+def create_default_config(config_file: Path) -> Dict[str, Any]:
+    """Create default configuration file."""
+    config = DEFAULT_CONFIG.copy()
+    config["stack"]["jwt_secret"] = generate_random_string(64)
+    config["stack"]["auth_secret"] = generate_random_string(64)
+
+    save_config(config, config_file)
+    return config
+
+
+def save_config(config: Dict[str, Any], config_file: Path) -> None:
+    """Save configuration to YAML file."""
+    # Ensure model_name is generated from model_image
+    if config["stack"].get("model_image"):
+        model_name = extract_model_name(config["stack"]["model_image"])
+        if model_name:
+            config["stack"]["model_name"] = f"lfm-{model_name}"
+
+    with open(config_file, "w") as f:
+        yaml.dump(config, f)
+
+
+def get_config_value(
+    config: Dict[str, Any],
+    key_path: str,
+    prompt: Optional[str] = None,
+    default: Optional[str] = None,
+    required: bool = False,
+) -> str:
+    """Get configuration value, prompting user if not found and required."""
+    keys = key_path.split(".")
+    value = config
+
+    try:
+        for key in keys:
+            value = value[key]
+        if value is None and required:
+            if prompt:
+                return typer.prompt(prompt)
+            return default if default else ""
+        return str(value) if value is not None else (default if default else "")
+    except (KeyError, TypeError):
+        if required and prompt:
+            return typer.prompt(prompt)
+        return default if default else ""
diff --git a/python_cli/liquidai_cli/utils/docker.py b/python_cli/liquidai_cli/utils/docker.py
new file mode 100644
index 0000000..058ed21
--- /dev/null
+++ b/python_cli/liquidai_cli/utils/docker.py
@@ -0,0 +1,80 @@
+"""Docker utilities for the Liquid Labs CLI."""
+
+import subprocess
+from typing import List, Dict, Any
+import docker
+from docker.errors import NotFound
+from pathlib import Path
+
+
+class DockerHelper:
+    def __init__(self):
+        self.client = docker.from_env()
+
+    def run_compose(self, compose_file: Path, env_file: Path, action: str = "up") -> None:
+        """Run docker-compose command."""
+        cmd = ["docker", "compose", "--env-file", str(env_file)]
+
+        if action == "up":
+            cmd.extend(["up", "-d", "--wait"])
+        elif action == "down":
+            cmd.extend(["down"])
+
+        subprocess.run(cmd, check=True)
+
+    def ensure_volume(self, name: str) -> None:
+        """Ensure a Docker volume exists."""
+        try:
+            self.client.volumes.get(name)
+        except NotFound:
+            self.client.volumes.create(name)
+
+    def remove_volume(self, name: str) -> None:
+        """Remove a Docker volume if it exists."""
+        try:
+            volume = self.client.volumes.get(name)
+            volume.remove()
+        except NotFound:
+            pass
+
+    def remove_network(self, name: str) -> None:
+        """Remove a Docker network if it exists."""
+        try:
+            network = self.client.networks.get(name)
+            network.remove()
+        except NotFound:
+            pass
+
+    def run_container(self, image: str, name: str, **kwargs) -> None:
+        """Run a Docker container."""
+        try:
+            container = self.client.containers.get(name)
+            container.remove(force=True)
+        except NotFound:
+            pass
+
+        self.client.containers.run(image, name=name, detach=True, **kwargs)
+
+    def list_containers(self, ancestor: str) -> List[Dict[str, Any]]:
+        """List containers by ancestor image."""
+        containers = self.client.containers.list(filters={"ancestor": ancestor})
+        result = []
+        for c in containers:
+            ports = {}
+            try:
+                network_settings = c.attrs.get("NetworkSettings", {})
+                if isinstance(network_settings, dict):
+                    ports = network_settings.get("Ports", {})
+            except (KeyError, TypeError, AttributeError):
+                pass
+            result.append({"name": c.name, "ports": ports})
+        return result
+
+    def stop_container(self, name: str) -> None:
+        """Stop and remove a container."""
+        try:
+            container = self.client.containers.get(name)
+            container.stop()
+            container.remove()
+        except NotFound:
+            pass
diff --git a/python_cli/liquidai_cli/utils/prompt.py b/python_cli/liquidai_cli/utils/prompt.py
new file mode 100644
index 0000000..8f43e8c
--- /dev/null
+++ b/python_cli/liquidai_cli/utils/prompt.py
@@ -0,0 +1,36 @@
+"""Interactive prompt utilities for the Liquid Labs CLI."""
+
+import typer
+from typing import Optional
+
+
+def confirm_action(message: str, default: bool = False, abort: bool = True, force: bool = False) -> bool:
+    """Prompt for confirmation unless force is True."""
+    if force:
+        return True
+
+    try:
+        return typer.confirm(message, default=default, abort=abort)
+    except typer.Abort:
+        typer.echo("\nOperation cancelled.")
+        raise typer.Exit(1)
+
+
+def prompt_value(
+    message: str, default: Optional[str] = None, hide_input: bool = False, required: bool = True
+) -> Optional[str]:
+    """Prompt for a value with optional default and input hiding."""
+    try:
+        value = typer.prompt(
+            message,
+            default=default,
+            hide_input=hide_input,
+            show_default=not hide_input and default is not None,
+        )
+        if not value and required:
+            typer.echo("Error: This value is required", err=True)
+            return prompt_value(message, default, hide_input, required)
+        return value
+    except typer.Abort:
+        typer.echo("\nOperation cancelled.")
+        raise typer.Exit(1)
diff --git a/python_cli/pyproject.toml b/python_cli/pyproject.toml
new file mode 100644
index 0000000..9e3f2c4
--- /dev/null
+++ b/python_cli/pyproject.toml
@@ -0,0 +1,49 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "liquidai-cli"
+version = "0.1.0"
+description = "CLI tool for Liquid Labs on-prem stack"
+readme = "README.md"
+requires-python = ">=3.10"
+license = "MIT"
+authors = [
+    { name = "Liquid Labs" }
+]
+dependencies = [
+    "typer>=0.9.0",
+    "python-dotenv>=1.0.0",
+    "docker>=6.1.0",
+    "rich>=10.11.0",
+    "pydantic>=2.0.0",
+    "ruamel.yaml>=0.17.21"
+]
+
+[project.scripts]
+liquidai = "liquidai_cli.cli:main"
+
+[tool.hatch.build.targets.wheel]
+packages = ["liquidai_cli"]
+
+[tool.black]
+line-length = 120
+target-version = ['py310']
+include = '\.pyi?$'
+
+[tool.flake8]
+max-line-length = 120
+extend-ignore = ['E203']
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+addopts = "-v --cov=liquidai_cli"
+
+[project.optional-dependencies]
+test = [
+    "pytest>=7.0.0",
+    "pytest-cov>=4.0.0",
+    "pytest-mock>=3.10.0",
+]
diff --git a/python_cli/tests/__init__.py b/python_cli/tests/__init__.py
new file mode 100644
index 0000000..307ab04
--- /dev/null
+++ b/python_cli/tests/__init__.py
@@ -0,0 +1 @@
+"""Test suite for the Liquid Labs CLI."""
diff --git a/python_cli/tests/test_docker.py b/python_cli/tests/test_docker.py
new file mode 100644
index 0000000..27435be
--- /dev/null
+++ b/python_cli/tests/test_docker.py
@@ -0,0 +1,63 @@
+"""Tests for Docker helper functions."""
+
+import pytest
+from unittest.mock import patch, MagicMock
+from docker.errors import NotFound
+from liquidai_cli.utils.docker import DockerHelper
+
+
+@pytest.fixture
+def mock_docker_client():
+    """Mock Docker client for testing."""
+    with patch("docker.from_env") as mock:
+        client = MagicMock()
+        mock.return_value = client
+        yield client
+
+
+def test_ensure_volume(mock_docker_client):
+    """Test Docker volume creation."""
+    helper = DockerHelper()
+
+    # Test volume exists
+    volume = MagicMock()
+    mock_docker_client.volumes.get.return_value = volume
+    helper.ensure_volume("test_volume")
+    mock_docker_client.volumes.get.assert_called_once_with("test_volume")
+    mock_docker_client.volumes.create.assert_not_called()
+
+    # Test volume doesn't exist
+    mock_docker_client.volumes.get.side_effect = NotFound("not found")
+    helper.ensure_volume("new_volume")
+    mock_docker_client.volumes.create.assert_called_once_with("new_volume")
+
+
+def test_remove_volume(mock_docker_client):
+    """Test Docker volume removal."""
+    helper = DockerHelper()
+
+    # Test remove existing volume
+    volume = MagicMock()
+    mock_docker_client.volumes.get.return_value = volume
+    helper.remove_volume("test_volume")
+    volume.remove.assert_called_once()
+
+    # Test remove non-existent volume
+    mock_docker_client.volumes.get.side_effect = NotFound("not found")
+    helper.remove_volume("missing_volume")  # Should not raise
+
+
+def test_list_containers(mock_docker_client):
+    """Test container listing."""
+    helper = DockerHelper()
+
+    # Mock container with network settings
+    container = MagicMock()
+    container.name = "test-container"
+    container.attrs = {"NetworkSettings": {"Ports": {"8000/tcp": [{"HostPort": "9000"}]}}}
+    mock_docker_client.containers.list.return_value = [container]
+
+    containers = helper.list_containers("test-image")
+    assert len(containers) == 1
+    assert containers[0]["name"] == "test-container"
+    assert containers[0]["ports"]["8000/tcp"][0]["HostPort"] == "9000"
diff --git a/python_cli/tests/test_utils.py b/python_cli/tests/test_utils.py
new file mode 100644
index 0000000..b0e2be9
--- /dev/null
+++ b/python_cli/tests/test_utils.py
@@ -0,0 +1,61 @@
+"""Tests for core utility functions."""
+
+from liquidai_cli.utils.config import (
+    generate_random_string,
+    extract_model_name,
+    load_config,
+    DEFAULT_CONFIG,
+)
+
+
+def test_generate_random_string():
+    """Test random string generation."""
+    # Test length
+    assert len(generate_random_string(10)) == 10
+    assert len(generate_random_string(64)) == 64
+
+    # Test uniqueness
+    str1 = generate_random_string(32)
+    str2 = generate_random_string(32)
+    assert str1 != str2
+
+    # Test character set
+    str_test = generate_random_string(100)
+    assert all(c.isalnum() for c in str_test)
+
+
+def test_extract_model_name():
+    """Test model name extraction from image tag."""
+    test_cases = [
+        ("liquidai/lfm-7b-e:0.0.1", "7b-e"),
+        ("liquidai/test-model:latest", "model"),
+        ("invalid/format", None),
+        ("liquidai/lfm-13b:latest", "13b"),
+    ]
+
+    for image_tag, expected in test_cases:
+        assert extract_model_name(image_tag) == expected
+
+
+def test_load_config(tmp_path):
+    """Test configuration loading and defaults."""
+    config_file = tmp_path / "liquid.yaml"
+
+    # Test default config creation
+    config = load_config(config_file)
+    assert config["stack"]["version"] == DEFAULT_CONFIG["stack"]["version"]
+    assert config["database"]["name"] == DEFAULT_CONFIG["database"]["name"]
+
+    # Test secrets are generated
+    assert len(config["stack"]["jwt_secret"]) == 64
+    assert len(config["stack"]["auth_secret"]) == 64
+
+    # Test config is persisted
+    config2 = load_config(config_file)
+    assert config["stack"]["jwt_secret"] == config2["stack"]["jwt_secret"]
+    assert config["stack"]["auth_secret"] == config2["stack"]["auth_secret"]
+
+    # Test model name generation
+    config["stack"]["model_image"] = "liquidai/lfm-7b-e:0.0.1"
+    config3 = load_config(config_file)
+    assert config3["stack"]["model_name"] == "lfm-7b-e"

From 0cde4f21fcf4496f5eecb8707d27e82a8afc7fb6 Mon Sep 17 00:00:00 2001
From: Mengxiao Lin <mengxiao.lin@liquid.ai>
Date: Mon, 21 Apr 2025 20:22:11 +0000
Subject: [PATCH 02/13] Fix bugs in launch on python-cli

---
 .gitignore                                |  1 +
 python_cli/liquidai_cli/commands/stack.py | 15 +++++------
 python_cli/liquidai_cli/utils/docker.py   | 33 ++++++++++++++++++++---
 3 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7742e64..bcd34ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 .env*
 *.zip
+uv.lock
diff --git a/python_cli/liquidai_cli/commands/stack.py b/python_cli/liquidai_cli/commands/stack.py
index 24695cc..94dd63d 100644
--- a/python_cli/liquidai_cli/commands/stack.py
+++ b/python_cli/liquidai_cli/commands/stack.py
@@ -7,7 +7,7 @@
 from liquidai_cli.utils.prompt import confirm_action
 
 app = typer.Typer(help="Manage the on-prem stack")
-docker_helper = DockerHelper()
+docker_helper = DockerHelper(Path(".env"))
 
 
 @app.command()
@@ -50,13 +50,13 @@ def launch(
     # Export environment variables for docker-compose
     for key, value in env_vars.items():
         typer.echo(f"Setting {key}")
-        docker_helper.set_env(key, value)
+        docker_helper.set_and_export_env_var(key, value)
 
     # Ensure postgres volume exists
     docker_helper.ensure_volume("postgres_data")
 
     # Launch stack
-    docker_helper.run_compose(Path("docker-compose.yaml"), Path(".env"))
+    docker_helper.run_compose(Path("docker-compose.yaml"))
 
     typer.echo("The on-prem stack is now running.")
     typer.echo(f"\nModel '{model_name}' is accessible at http://localhost:8000")
@@ -66,7 +66,7 @@ def launch(
 @app.command()
 def shutdown():
     """Shutdown the on-prem stack."""
-    docker_helper.run_compose(Path("docker-compose.yaml"), Path(".env"), action="down")
+    docker_helper.run_compose(Path("docker-compose.yaml"), action="down")
     typer.echo("Stack has been shut down.")
 
 
@@ -88,17 +88,14 @@ def purge(
         return
 
     # Shutdown containers
-    docker_helper.run_compose(Path("docker-compose.yaml"), Path(".env"), action="down")
+    docker_helper.run_compose(Path("docker-compose.yaml"), action="down")
 
     # Remove volume and network
     docker_helper.remove_volume("postgres_data")
     docker_helper.remove_network("liquid_labs_network")
 
     # Remove .env file
-    try:
-        Path(".env").unlink()
-    except FileNotFoundError:
-        pass
+    docker_helper.remove_env_file()
 
     typer.echo("Cleanup complete. All Liquid Labs components have been removed.")
 
diff --git a/python_cli/liquidai_cli/utils/docker.py b/python_cli/liquidai_cli/utils/docker.py
index 058ed21..660ff67 100644
--- a/python_cli/liquidai_cli/utils/docker.py
+++ b/python_cli/liquidai_cli/utils/docker.py
@@ -5,22 +5,25 @@
 import docker
 from docker.errors import NotFound
 from pathlib import Path
+import logging 
 
+logger = logging.getLogger(__name__)
 
 class DockerHelper:
-    def __init__(self):
+    def __init__(self, env_file: Path = Path(".env")):
         self.client = docker.from_env()
+        self.env_file = env_file
 
-    def run_compose(self, compose_file: Path, env_file: Path, action: str = "up") -> None:
+    def run_compose(self, compose_file: Path, action: str = "up") -> None:
         """Run docker-compose command."""
-        cmd = ["docker", "compose", "--env-file", str(env_file)]
+        cmd = ["docker", "compose", "--env-file", str(self.env_file)]
 
         if action == "up":
             cmd.extend(["up", "-d", "--wait"])
         elif action == "down":
             cmd.extend(["down"])
 
-        subprocess.run(cmd, check=True)
+        subprocess.run(cmd)
 
     def ensure_volume(self, name: str) -> None:
         """Ensure a Docker volume exists."""
@@ -78,3 +81,25 @@ def stop_container(self, name: str) -> None:
             container.remove()
         except NotFound:
             pass
+
+    def set_and_export_env_var(self, key: str, value: str) -> None:
+        """Set and export an environment variable into env_file."""
+        with open(self.env_file, "r") as f:
+            lines = f.readlines()
+        # Check if the key already exists
+        for i, line in enumerate(lines):
+            if line.startswith(key):
+                lines[i] = f"{key}={value}\n"
+                break
+        else:
+            lines.append(f"{key}={value}\n")
+        # Write the updated lines back to the file
+        with open(self.env_file, "w") as f:
+            f.writelines(lines)
+
+    def remove_env_file(self) -> None:
+        """Remove the env_file if it exists."""
+        try:
+            self.env_file.unlink()
+        except FileNotFoundError:
+            pass
\ No newline at end of file

From a5750a46849a9dfa7dc9b99f756f08edd1aaeae9 Mon Sep 17 00:00:00 2001
From: Mengxiao Lin <mengxiao.lin@liquid.ai>
Date: Mon, 21 Apr 2025 22:40:37 +0000
Subject: [PATCH 03/13] Fixed listing containers in python-cli

---
 python_cli/liquidai_cli/commands/model.py |  4 ++--
 python_cli/liquidai_cli/utils/docker.py   | 14 ++++++++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/python_cli/liquidai_cli/commands/model.py b/python_cli/liquidai_cli/commands/model.py
index 175e955..30f1c22 100644
--- a/python_cli/liquidai_cli/commands/model.py
+++ b/python_cli/liquidai_cli/commands/model.py
@@ -147,7 +147,7 @@ def run_checkpoint(
 @app.command()
 def list():
     """List running models."""
-    containers = docker_helper.list_containers("vllm/vllm-openai")
+    containers = docker_helper.list_containers("liquidai/liquid-labs-vllm")
 
     if not containers:
         typer.echo("No running vLLM containers found.")
@@ -179,7 +179,7 @@ def stop(
         return
 
     # Interactive mode if no name provided
-    containers = docker_helper.list_containers("vllm/vllm-openai")
+    containers = docker_helper.list_containers("liquidai/liquid-labs-vllm")
     if not containers:
         typer.echo("No running vLLM containers found.")
         return
diff --git a/python_cli/liquidai_cli/utils/docker.py b/python_cli/liquidai_cli/utils/docker.py
index 660ff67..c8ece57 100644
--- a/python_cli/liquidai_cli/utils/docker.py
+++ b/python_cli/liquidai_cli/utils/docker.py
@@ -60,9 +60,19 @@ def run_container(self, image: str, name: str, **kwargs) -> None:
 
     def list_containers(self, ancestor: str) -> List[Dict[str, Any]]:
         """List containers by ancestor image."""
-        containers = self.client.containers.list(filters={"ancestor": ancestor})
+        matching_containers = set()
+        matching_containers.update(self.client.containers.list(filters={"ancestor": ancestor}))
+
+        # Get all images that match the ancestor image name and check their containers
+        image_base_name = ancestor.split(":")[0]
+        images = self.client.images.list(name=image_base_name)
+        for image in images:
+            containers = self.client.containers.list(
+                filters={"ancestor": image.id}
+            )
+            matching_containers.update(containers)
         result = []
-        for c in containers:
+        for c in matching_containers:
             ports = {}
             try:
                 network_settings = c.attrs.get("NetworkSettings", {})

From fab6aeccf3cb4b2d42cbbe6457d6461e159c2a61 Mon Sep 17 00:00:00 2001
From: Mengxiao Lin <mengxiao.lin@liquid.ai>
Date: Wed, 23 Apr 2025 10:07:39 -0700
Subject: [PATCH 04/13] Launch model containers with model images in python
 (#24)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# Summary
A new command `liquidai model run-model-image` is added to launch a
model with our model images. Also refactor the stack
launch/test/shutdown to work with the new mode launch approach. The new
`docker-compose.yaml` file only describes the python-api, DB and web
containers. All model containers will be managed and launched by the
python `model` commands.

The end-to-end test flow are list here:
## Launch
```
(base) ubuntu@192-222-52-66:~/git/on-prem-stack/python_cli$ uv run liquidai stack launch
Setting JWT_SECRET
Setting API_SECRET
Setting AUTH_SECRET
Setting STACK_VERSION
Setting MODEL_IMAGE
Setting MODEL_NAME
Setting POSTGRES_DB
Setting POSTGRES_USER
Setting POSTGRES_PASSWORD
Setting POSTGRES_PORT
Setting POSTGRES_SCHEMA
Setting DATABASE_URL
WARN[0000] a network with name liquid_labs_network exists but was not created for project "python_cli".
Set `external: true` to use an existing network
[+] Running 3/3
 ✔ Container liquid-labs-postgres    Healthy                                                                                                                                                        12.0s
 ✔ Container liquid-labs-web         Healthy                                                                                                                                                        12.0s
 ✔ Container liquid-labs-python-api  Healthy                                                                                                                                                        17.0s
Creating volume for model data: lfm-7b-e
Loading model data from image: liquidai/lfm-7b-e:0.0.1
Launching model container: lfm-7b-e
Model 'lfm-7b-e' started successfully
Please wait 1-2 minutes for the model to load before making API calls
The on-prem stack is now running.
```

## Stop a model
```
(base) ubuntu@192-222-52-66:~/git/on-prem-stack/python_cli$ uv run liquidai model stop lfm-7b-e
Stopped and removed container: lfm-7b-e
```

## Launch a new model
```
(base) ubuntu@192-222-52-66:~/git/on-prem-stack/python_cli$ uv run liquidai model run-model-image --name lfm-3b-e --image "liquidai/lfm-3b-e:0.0.6"
Creating volume for model data: lfm-3b-e
Loading model data from image: liquidai/lfm-3b-e:0.0.6
Launching model container: lfm-3b-e
Model 'lfm-3b-e' started successfully
Please wait 1-2 minutes for the model to load before making API calls
```

## Test a model
```
(base) ubuntu@192-222-52-66:~/git/on-prem-stack/python_cli$ uv run liquidai stack test
Testing API call to get available models...
{'data': [{'id': 'lfm-3b-e', 'status': 'running'}]}

Testing model call...
Testing model: lfm-3b-e
{'id': 'chatcmpl-59437b10fb3a4a99aa7d279632d3e1f2', 'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Silver melts at approximately 961.78 degrees Celsius (1,763.10 degrees Fahrenheit). This is a key property of the metal, which is used in various applications due to its high melting point and thermal conductivity.', 'role': 'assistant', 'tool_calls': []}}], 'created': 1745346397, 'model': 'lfm-3b-e', 'object': 'chat.completion', 'usage': {'completion_tokens': 51, 'prompt_tokens': 40, 'total_tokens': 91}}
```

## Shutdown the stack
```
(base) ubuntu@192-222-52-66:~/git/on-prem-stack/python_cli$ uv run liquidai stack shutdown
Stopped and removed model container: lfm-3b-e
[+] Running 3/3
 ✔ Container liquid-labs-python-api  Removed                                                                                                                                                         0.4s
 ✔ Container liquid-labs-web         Removed                                                                                                                                                        10.2s
 ✔ Container liquid-labs-postgres    Removed                                                                                                                                                         0.2s
Stack has been shut down.
```
---
 .github/workflows/python-ci.yml           |   4 +-
 python_cli/README.md                      |  64 +++----------
 python_cli/docker-compose.yaml            |  96 +++++++++++++++++++
 python_cli/liquidai_cli/commands/model.py | 109 +++++++++++++++++++---
 python_cli/liquidai_cli/commands/stack.py |  38 +++++---
 python_cli/liquidai_cli/utils/device.py   |  24 +++++
 python_cli/liquidai_cli/utils/docker.py   |  25 +++--
 7 files changed, 276 insertions(+), 84 deletions(-)
 create mode 100644 python_cli/docker-compose.yaml
 create mode 100644 python_cli/liquidai_cli/utils/device.py

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 85a6a44..08fd981 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -2,11 +2,11 @@ name: Python CI
 
 on:
   push:
-    branches: [ main ]
+    branches: [ main, python-cli ]
     paths:
       - 'python_cli/**'
   pull_request:
-    branches: [ main ]
+    branches: [ main, python-cli ]
     paths:
       - 'python_cli/**'
 
diff --git a/python_cli/README.md b/python_cli/README.md
index ecad7af..8ff63a8 100644
--- a/python_cli/README.md
+++ b/python_cli/README.md
@@ -3,11 +3,19 @@
 Command line interface for managing Liquid Labs on-prem stack.
 
 ## Installation
-
 ```bash
 pip install liquidai-cli
 ```
 
+A `docker-compose.yaml` file is also shipped together with the package. Any changes to this file may cause some unexpected behaviors.
+
+### Run with `uv`
+`uv` allows to run this tool without installing the package into the system.
+
+```bash
+uv run --directory [PATH_TO_THIS_DIRECTORY] liquidai [command] [args]
+```
+
 ## Configuration
 
 The CLI uses a YAML configuration file (`liquid.yaml`) in your working directory. A default configuration will be created on first use, but you can customize it:
@@ -53,6 +61,11 @@ liquidai stack purge --force
 ### Model Operations
 
 ```bash
+# Run a model in docker container
+liquidai model run-model-image \
+  --name lfm-3b-e \
+  --image "liquidai/lfm-3b-e:0.0.6"
+
 # Run a HuggingFace model
 liquidai model run-hf \
   --name llama-7b \
@@ -113,51 +126,4 @@ liquidai config import --force
 ```
 
 ## Command Reference
-
-### Stack Commands
-
-- `launch [--upgrade-stack] [--upgrade-model]`: Launch the stack
-  - `--upgrade-stack`: Upgrade stack version
-  - `--upgrade-model`: Upgrade model version
-- `shutdown`: Shutdown the stack
-- `test`: Test API endpoints
-- `purge [--force]`: Remove all components
-  - `--force`: Skip confirmation prompt
-
-### Model Commands
-
-- `run-hf`: Run a HuggingFace model
-  - `--name`: Name for the model container
-  - `--path`: HuggingFace model path
-  - `--port`: Port to expose (default: 9000)
-  - `--gpu`: GPU index to use (default: "all")
-  - `--gpu-memory-utilization`: GPU memory fraction (default: 0.6)
-  - `--max-num-seqs`: Max parallel sequences (default: 600)
-  - `--max-model-len`: Max model length (default: 32768)
-  - `--hf-token`: HuggingFace token (or use HUGGING_FACE_TOKEN env var)
-
-- `run-checkpoint`: Run a local checkpoint
-  - `--path`: Path to checkpoint directory
-  - `--port`: Port to expose (default: 9000)
-  - `--gpu`: GPU index to use (default: "all")
-  - `--gpu-memory-utilization`: GPU memory fraction (default: 0.6)
-  - `--max-num-seqs`: Max parallel sequences (default: 600)
-
-- `list`: List running models
-- `stop [NAME]`: Stop a model (interactive if NAME not provided)
-
-### Database Commands
-
-- `connect`: Connect to database using pgcli
-
-### Infrastructure Commands
-
-- `tunnel create [--token TOKEN]`: Create Cloudflare tunnel
-  - `--token`: Cloudflare tunnel token
-
-### Configuration Commands
-
-- `config import [--env-file PATH] [--config-file PATH] [--force]`: Import .env configuration
-  - `--env-file`: Path to .env file (default: .env)
-  - `--config-file`: Path to YAML config file (default: liquid.yaml)
-  - `--force`: Force overwrite existing config
+Call `liquidai [command] --help` to get the detailed usage reference.
\ No newline at end of file
diff --git a/python_cli/docker-compose.yaml b/python_cli/docker-compose.yaml
new file mode 100644
index 0000000..9ac5539
--- /dev/null
+++ b/python_cli/docker-compose.yaml
@@ -0,0 +1,96 @@
+# Docker Compose file for on-prem-stack Python CLI.
+services:
+  liquid-labs-python-api:
+    image: liquidai/liquid-labs-python-api:${STACK_VERSION}
+    container_name: liquid-labs-python-api
+    depends_on:
+      liquid-labs-web:
+        condition: service_healthy
+    healthcheck:
+      test: [ "CMD", "curl", "-f", "http://localhost:8000/health" ]
+      interval: 60s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
+      start_interval: 5s
+    environment:
+      # When ENV=production, http requests will be redirected to https
+      - ENV=internal
+      - IS_DOCKER=true
+      - CONTAINER_PORT=9000
+      - VLLM_IMAGE_NAME=liquidai/liquid-labs-vllm:${STACK_VERSION}
+      - JWT_SECRET=${JWT_SECRET}
+      - API_SECRET=${API_SECRET}
+      - NVIDIA_VISIBLE_DEVICES=all
+      - POSTGRES_SCHEMA=labs
+      - DATABASE_URL=${DATABASE_URL}
+    ports:
+      - "8000:8000"
+    networks:
+      - liquid_labs_network
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock
+    # This is equivalent to "runtime: nvidia", but does not require
+    # the nvidia-container-runtime to be added in docker config.
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [ gpu ]
+
+  liquid-labs-postgres:
+    image: postgres:15
+    container_name: liquid-labs-postgres
+    environment:
+      POSTGRES_DB: ${POSTGRES_DB}
+      POSTGRES_USER: ${POSTGRES_USER}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
+      POSTGRES_PORT: ${POSTGRES_PORT}
+      POSTGRES_SCHEMA: ${POSTGRES_SCHEMA}
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+    ports:
+      - "${POSTGRES_PORT}:5432"
+    networks:
+      - liquid_labs_network
+    healthcheck:
+      test: [ "CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}" ]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+
+  liquid-labs-web:
+    image: liquidai/liquid-labs-web:${STACK_VERSION}
+    container_name: liquid-labs-web
+    depends_on:
+      liquid-labs-postgres:
+        condition: service_healthy
+    healthcheck:
+      test: [ "CMD", "curl", "-f", "http://localhost:3000/api/health" ]
+      interval: 60s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
+      start_interval: 5s
+    environment:
+      - NEXT_PUBLIC_API_BASE_URL=http://liquid-labs-python-api:8000
+      - API_SECRET=${API_SECRET}
+      - AUTH_SECRET=${AUTH_SECRET}
+      - JWT_SECRET=${JWT_SECRET}
+      - NEXT_PUBLIC_DEPLOYMENT_MODE=on_prem
+      - DATABASE_URL=${DATABASE_URL}
+    networks:
+      - liquid_labs_network
+    ports:
+      - "3000:3000"
+
+networks:
+  liquid_labs_network:
+    name: liquid_labs_network
+    driver: bridge
+
+volumes:
+  postgres_data:
+    external: true
\ No newline at end of file
diff --git a/python_cli/liquidai_cli/commands/model.py b/python_cli/liquidai_cli/commands/model.py
index 30f1c22..445d571 100644
--- a/python_cli/liquidai_cli/commands/model.py
+++ b/python_cli/liquidai_cli/commands/model.py
@@ -5,23 +5,103 @@
 from typing import Optional, Dict, List, cast
 from liquidai_cli.utils.docker import DockerHelper
 from liquidai_cli.utils.config import load_config
+from liquidai_cli.utils.device import get_device_requests_from_gpus
+from typing_extensions import Annotated
 
 app = typer.Typer(help="Manage ML models")
 docker_helper = DockerHelper()
 
 
+@app.command(name="run-model-image")
+def run_model_image(
+    name: str = typer.Option(..., "--name", help="Name for the model"),
+    model_image: str = typer.Option(..., "--image", help="Model image name"),
+    port: Annotated[int, typer.Option("--port", help="Port to expose locally")] = 9000,
+    gpu: Annotated[str, typer.Option("--gpu", help="Specific GPU index to use")] = "all",
+    gpu_memory_utilization: Annotated[
+        float, typer.Option("--gpu-memory-utilization", help="Fraction of GPU memory to use")
+    ] = 0.6,
+    max_num_seqs: Annotated[
+        int, typer.Option("--max-num-seqs", help="Maximum number of sequences to generate in parallel")
+    ] = 750,
+    max_model_len: Annotated[int, typer.Option("--max-model-len", help="Maximum length of the model")] = 32768,
+):
+    """
+    Launch a model stored in a Docker image. Default Liquid Foundation Models (LFM) are delivered in this approach.
+    """
+    # Create a volume to cache the model data
+    typer.echo(f"Creating volume for model data: {name}")
+    model_volume_name = f"model_data_{name}"
+    docker_helper.ensure_volume(model_volume_name)
+    model_volume_loader_container_name = f"liquid-labs-model-volume-{name}"
+    typer.echo(f"Loading model data from image: {model_image}")
+    model_volume_loader_container = docker_helper.run_container(
+        image=model_image,
+        name=model_volume_loader_container_name,
+        volumes={model_volume_name: {"bind": "/model", "mode": "rw"}},
+        network="liquid_labs_network",
+    )
+    result = model_volume_loader_container.wait()
+    if result["StatusCode"] != 0:
+        typer.echo(f"Error loading model data: {result['StatusCode']}", err=True)
+        raise typer.Exit(1)
+    model_volume_loader_container.remove()
+
+    typer.echo(f"Launching model container: {name}")
+    stack_version = docker_helper.get_env_var("STACK_VERSION")
+    docker_helper.run_container(
+        image=f"liquidai/liquid-labs-vllm:{stack_version}",
+        name=name,
+        device_requests=get_device_requests_from_gpus(gpu),
+        volumes={model_volume_name: {"bind": "/model", "mode": "ro"}},
+        network="liquid_labs_network",
+        command=[
+            "--model",
+            "/model",
+            "--served-model-name",
+            name,
+            "--port",
+            str(port),
+            "--max-logprobs",
+            "0",
+            "--dtype",
+            "bfloat16",
+            "--device",
+            "cuda",
+            "--enable-chunked-prefill",
+            "False",
+            "--tensor-parallel-size",
+            "1",
+            "--gpu-memory-utilization",
+            str(gpu_memory_utilization),
+            "--max-model-len",
+            str(max_model_len),
+            "--max-num-seqs",
+            str(max_num_seqs),
+            "--max-seq-len-to-capture",
+            str(max_model_len),
+        ],
+    )
+    typer.echo(f"Model '{name}' started successfully")
+    typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
+
+
 @app.command(name="run-hf")
 def run_huggingface(
     name: str = typer.Option(..., "--name", help="Name for the model container"),
     path: str = typer.Option(..., "--path", help="Hugging Face model path"),
-    port: int = typer.Option(9000, "--port", help="Port to expose locally"),
-    gpu: str = typer.Option("all", "--gpu", help="Specific GPU index to use"),
-    gpu_memory_utilization: float = typer.Option(0.6, "--gpu-memory-utilization", help="Fraction of GPU memory to use"),
-    max_num_seqs: int = typer.Option(600, "--max-num-seqs", help="Maximum number of sequences to generate in parallel"),
-    max_model_len: int = typer.Option(32768, "--max-model-len", help="Maximum length of the model"),
-    hf_token: Optional[str] = typer.Option(
-        None, "--hf-token", help="Hugging Face access token", envvar="HUGGING_FACE_TOKEN"
-    ),
+    port: Annotated[int, typer.Option("--port", help="Port to expose locally")] = 9000,
+    gpu: Annotated[str, typer.Option("--gpu", help="Specific GPU index to use")] = "all",
+    gpu_memory_utilization: Annotated[
+        float, typer.Option("--gpu-memory-utilization", help="Fraction of GPU memory to use")
+    ] = 0.6,
+    max_num_seqs: Annotated[
+        int, typer.Option("--max-num-seqs", help="Maximum number of sequences to generate in parallel")
+    ] = 600,
+    max_model_len: Annotated[int, typer.Option("--max-model-len", help="Maximum length of the model")] = 32768,
+    hf_token: Annotated[
+        Optional[str], typer.Option("--hf-token", help="Hugging Face access token", envvar="HUGGING_FACE_TOKEN")
+    ] = None,
 ):
     """Launch a model from Hugging Face."""
     if not hf_token:
@@ -31,17 +111,18 @@ def run_huggingface(
         )
         raise typer.Exit(1)
 
+    stack_version = docker_helper.get_env_var("STACK_VERSION")
     docker_helper.run_container(
-        image="vllm/vllm-openai:latest",
+        image=f"liquidai/liquid-labs-vllm:{stack_version}",
         name=name,
         environment={"HUGGING_FACE_HUB_TOKEN": hf_token},
-        ports={8000: port},
-        device_requests=[{"Driver": "nvidia", "Count": -1, "Capabilities": [["gpu"]]}],
+        device_requests=get_device_requests_from_gpus(gpu),
+        network="liquid_labs_network",
         command=[
             "--host",
             "0.0.0.0",
             "--port",
-            "8000",
+            str(port),
             "--model",
             path,
             "--served-model-name",
@@ -59,8 +140,6 @@ def run_huggingface(
             "--max-seq-len-to-capture",
             str(max_model_len),
         ],
-        health_cmd="curl --fail http://localhost:8000/health || exit 1",
-        health_interval=30,
     )
 
     typer.echo(f"Model '{name}' started successfully")
@@ -107,7 +186,7 @@ def run_checkpoint(
         image=image_name,
         name=model_name,
         ports={8000: port},
-        device_requests=[{"Driver": "nvidia", "Count": -1, "Capabilities": [["gpu"]]}],
+        device_requests=get_device_requests_from_gpus(gpu),
         volumes={str(checkpoint_path): {"bind": "/model", "mode": "ro"}},
         command=[
             "--host",
diff --git a/python_cli/liquidai_cli/commands/stack.py b/python_cli/liquidai_cli/commands/stack.py
index 94dd63d..be8b1fd 100644
--- a/python_cli/liquidai_cli/commands/stack.py
+++ b/python_cli/liquidai_cli/commands/stack.py
@@ -5,6 +5,7 @@
 from liquidai_cli.utils.docker import DockerHelper
 from liquidai_cli.utils.config import load_config, extract_model_name
 from liquidai_cli.utils.prompt import confirm_action
+from liquidai_cli.commands.model import run_model_image
 
 app = typer.Typer(help="Manage the on-prem stack")
 docker_helper = DockerHelper(Path(".env"))
@@ -57,15 +58,22 @@ def launch(
 
     # Launch stack
     docker_helper.run_compose(Path("docker-compose.yaml"))
+    # Run default model image
+    run_model_image(model_name, config["stack"]["model_image"])
 
     typer.echo("The on-prem stack is now running.")
-    typer.echo(f"\nModel '{model_name}' is accessible at http://localhost:8000")
-    typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
 
 
 @app.command()
 def shutdown():
     """Shutdown the on-prem stack."""
+    # Shutdown running models
+    containers = docker_helper.list_containers("liquidai/liquid-labs-vllm")
+    for container in containers:
+        container_name = container["name"]
+        docker_helper.stop_container(container_name)
+        typer.echo(f"Stopped and removed model container: {container_name}")
+
     docker_helper.run_compose(Path("docker-compose.yaml"), action="down")
     typer.echo("Stack has been shut down.")
 
@@ -119,15 +127,23 @@ def test():
     # Test models endpoint
     typer.echo("Testing API call to get available models...")
     response = requests.get("http://0.0.0.0:8000/v1/models", headers=headers)
-    typer.echo(response.json())
+    available_model_json = response.json()
+    typer.echo(available_model_json)
+    if not available_model_json.get("data"):
+        typer.echo("Error: No models found in the response", err=True)
+        raise typer.Exit(1)
 
     # Test chat completion
     typer.echo("\nTesting model call...")
-    data = {
-        "model": model_name,
-        "messages": [{"role": "user", "content": "At which temperature does silver melt?"}],
-        "max_tokens": 128,
-        "temperature": 0,
-    }
-    response = requests.post("http://0.0.0.0:8000/v1/chat/completions", headers=headers, json=data)
-    typer.echo(response.json())
+    for model_info in available_model_json["data"]:
+        model_name = model_info["id"]
+        if model_info["status"] == "running":
+            typer.echo(f"Testing model: {model_name}")
+            data = {
+                "model": model_name,
+                "messages": [{"role": "user", "content": "At which temperature does silver melt?"}],
+                "max_tokens": 128,
+                "temperature": 0,
+            }
+            response = requests.post("http://0.0.0.0:8000/v1/chat/completions", headers=headers, json=data)
+            typer.echo(response.json())
diff --git a/python_cli/liquidai_cli/utils/device.py b/python_cli/liquidai_cli/utils/device.py
new file mode 100644
index 0000000..3ae616d
--- /dev/null
+++ b/python_cli/liquidai_cli/utils/device.py
@@ -0,0 +1,24 @@
+"""
+Utils on managing devices parameters.
+"""
+
+from docker.types import DeviceRequest
+from typing import List
+
+
+def get_device_requests_from_gpus(gpus: str) -> List[DeviceRequest]:
+    """
+    Get device requests for GPUs.
+    Args:
+        gpus (str): requested gpus in a comma-separated string, or "all".
+    Returns:
+        List[DeviceRequest]: List of device requests for Docker.
+    """
+    if not gpus:
+        return []
+
+    if gpus == "all":
+        return [{"Driver": "nvidia", "Count": -1, "Capabilities": [["gpu"]]}]
+    else:
+        gpu_indices = gpus.split(",")
+        return [{"Driver": "nvidia", "DeviceIDs": gpu_indices, "Capabilities": [["gpu"]]}]
diff --git a/python_cli/liquidai_cli/utils/docker.py b/python_cli/liquidai_cli/utils/docker.py
index c8ece57..e706bf4 100644
--- a/python_cli/liquidai_cli/utils/docker.py
+++ b/python_cli/liquidai_cli/utils/docker.py
@@ -5,14 +5,16 @@
 import docker
 from docker.errors import NotFound
 from pathlib import Path
-import logging 
+import logging
 
 logger = logging.getLogger(__name__)
 
+
 class DockerHelper:
     def __init__(self, env_file: Path = Path(".env")):
         self.client = docker.from_env()
         self.env_file = env_file
+        self.env_dict = {}
 
     def run_compose(self, compose_file: Path, action: str = "up") -> None:
         """Run docker-compose command."""
@@ -48,7 +50,7 @@ def remove_network(self, name: str) -> None:
         except NotFound:
             pass
 
-    def run_container(self, image: str, name: str, **kwargs) -> None:
+    def run_container(self, image: str, name: str, **kwargs) -> docker.models.containers.Container:
         """Run a Docker container."""
         try:
             container = self.client.containers.get(name)
@@ -56,7 +58,7 @@ def run_container(self, image: str, name: str, **kwargs) -> None:
         except NotFound:
             pass
 
-        self.client.containers.run(image, name=name, detach=True, **kwargs)
+        return self.client.containers.run(image, name=name, detach=True, **kwargs)
 
     def list_containers(self, ancestor: str) -> List[Dict[str, Any]]:
         """List containers by ancestor image."""
@@ -67,9 +69,7 @@ def list_containers(self, ancestor: str) -> List[Dict[str, Any]]:
         image_base_name = ancestor.split(":")[0]
         images = self.client.images.list(name=image_base_name)
         for image in images:
-            containers = self.client.containers.list(
-                filters={"ancestor": image.id}
-            )
+            containers = self.client.containers.list(filters={"ancestor": image.id})
             matching_containers.update(containers)
         result = []
         for c in matching_containers:
@@ -92,8 +92,19 @@ def stop_container(self, name: str) -> None:
         except NotFound:
             pass
 
+    def get_env_var(self, key: str) -> str:
+        """Get an environment variable from the env_file."""
+        if key in self.env_dict:
+            return self.env_dict[key]
+        with open(self.env_file, "r") as f:
+            for line in f:
+                if line.startswith(key):
+                    return line.split("=")[1].strip()
+        return ""
+
     def set_and_export_env_var(self, key: str, value: str) -> None:
         """Set and export an environment variable into env_file."""
+        self.env_dict[key] = value
         with open(self.env_file, "r") as f:
             lines = f.readlines()
         # Check if the key already exists
@@ -112,4 +123,4 @@ def remove_env_file(self) -> None:
         try:
             self.env_file.unlink()
         except FileNotFoundError:
-            pass
\ No newline at end of file
+            pass

From 94c1c5b9dfdcd103d0043fd7830cf746d353730d Mon Sep 17 00:00:00 2001
From: Mengxiao Lin <mengxiao.lin@liquid.ai>
Date: Thu, 24 Apr 2025 07:50:34 -0700
Subject: [PATCH 05/13] Support run-checkpoint in Python CLI (#25)

Addressed the bugs in devin's original code. Now Python cli supports to
run a local checkpoint.

```
$ uv run liquidai model run-checkpoint --path ~/test_checkpoint/
Model 'lfm-3b-e-checkpoint' started successfully
Please wait 1-2 minutes for the model to load before making API calls

$ uv run liquidai stack test
Testing API call to get available models...
{'data': [{'id': 'lfm-3b-e-checkpoint', 'status': 'running'}]}

Testing model call...
Testing model: lfm-3b-e-checkpoint
{'id': 'chatcmpl-0e8778b43eef4a3da54b42c79f743995', 'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Silver melts at approximately 961.78 degrees Celsius (1763.20 degrees Fahrenheit). This is a key property of the metal, which is used in various applications due to its high melting point and thermal conductivity.', 'role': 'assistant', 'tool_calls': []}}], 'created': 1745444813, 'model': 'lfm-3b-e-checkpoint', 'object': 'chat.completion', 'usage': {'completion_tokens': 50, 'prompt_tokens': 43, 'total_tokens': 93}}
```

This PR also brings back the docker health check command, which allows
`docker ps` to recognize the health status of the model containers.
---
 python_cli/liquidai_cli/commands/model.py | 63 +++++++++++++++--------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/python_cli/liquidai_cli/commands/model.py b/python_cli/liquidai_cli/commands/model.py
index 445d571..4de2456 100644
--- a/python_cli/liquidai_cli/commands/model.py
+++ b/python_cli/liquidai_cli/commands/model.py
@@ -4,13 +4,15 @@
 from pathlib import Path
 from typing import Optional, Dict, List, cast
 from liquidai_cli.utils.docker import DockerHelper
-from liquidai_cli.utils.config import load_config
 from liquidai_cli.utils.device import get_device_requests_from_gpus
 from typing_extensions import Annotated
 
 app = typer.Typer(help="Manage ML models")
 docker_helper = DockerHelper()
 
+NANOSECONDS_IN_SECOND = 1_000_000_000
+HEALTHCHECK_INTERVAL = 30 * NANOSECONDS_IN_SECOND
+
 
 @app.command(name="run-model-image")
 def run_model_image(
@@ -19,10 +21,12 @@ def run_model_image(
     port: Annotated[int, typer.Option("--port", help="Port to expose locally")] = 9000,
     gpu: Annotated[str, typer.Option("--gpu", help="Specific GPU index to use")] = "all",
     gpu_memory_utilization: Annotated[
-        float, typer.Option("--gpu-memory-utilization", help="Fraction of GPU memory to use")
+        float,
+        typer.Option("--gpu-memory-utilization", help="Fraction of GPU memory to use"),
     ] = 0.6,
     max_num_seqs: Annotated[
-        int, typer.Option("--max-num-seqs", help="Maximum number of sequences to generate in parallel")
+        int,
+        typer.Option("--max-num-seqs", help="Maximum number of sequences to generate in parallel"),
     ] = 750,
     max_model_len: Annotated[int, typer.Option("--max-model-len", help="Maximum length of the model")] = 32768,
 ):
@@ -81,6 +85,11 @@ def run_model_image(
             "--max-seq-len-to-capture",
             str(max_model_len),
         ],
+        healthcheck={
+            "test": f"curl --fail http://localhost:{port}/health || exit 1",
+            "interval": HEALTHCHECK_INTERVAL,
+            "start_period": HEALTHCHECK_INTERVAL,
+        },
     )
     typer.echo(f"Model '{name}' started successfully")
     typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
@@ -93,14 +102,17 @@ def run_huggingface(
     port: Annotated[int, typer.Option("--port", help="Port to expose locally")] = 9000,
     gpu: Annotated[str, typer.Option("--gpu", help="Specific GPU index to use")] = "all",
     gpu_memory_utilization: Annotated[
-        float, typer.Option("--gpu-memory-utilization", help="Fraction of GPU memory to use")
+        float,
+        typer.Option("--gpu-memory-utilization", help="Fraction of GPU memory to use"),
     ] = 0.6,
     max_num_seqs: Annotated[
-        int, typer.Option("--max-num-seqs", help="Maximum number of sequences to generate in parallel")
+        int,
+        typer.Option("--max-num-seqs", help="Maximum number of sequences to generate in parallel"),
     ] = 600,
     max_model_len: Annotated[int, typer.Option("--max-model-len", help="Maximum length of the model")] = 32768,
     hf_token: Annotated[
-        Optional[str], typer.Option("--hf-token", help="Hugging Face access token", envvar="HUGGING_FACE_TOKEN")
+        Optional[str],
+        typer.Option("--hf-token", help="Hugging Face access token", envvar="HUGGING_FACE_TOKEN"),
     ] = None,
 ):
     """Launch a model from Hugging Face."""
@@ -140,22 +152,27 @@ def run_huggingface(
             "--max-seq-len-to-capture",
             str(max_model_len),
         ],
+        healthcheck={
+            "test": f"curl --fail http://localhost:{port}/health || exit 1",
+            "interval": HEALTHCHECK_INTERVAL,
+            "start_period": HEALTHCHECK_INTERVAL,
+        },
     )
 
     typer.echo(f"Model '{name}' started successfully")
-    typer.echo(f"The vLLM API will be accessible at http://localhost:{port}")
     typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
 
 
 @app.command(name="run-checkpoint")
 def run_checkpoint(
     path: str = typer.Option(..., "--path", help="Path to model checkpoint directory"),
-    port: int = typer.Option(9000, "--port", help="Port to expose locally"),
-    gpu: str = typer.Option("all", "--gpu", help="Specific GPU index to use"),
-    gpu_memory_utilization: float = typer.Option(
-        0.60, "--gpu-memory-utilization", help="Fraction of GPU memory to use"
-    ),
-    max_num_seqs: int = typer.Option(600, "--max-num-seqs", help="Maximum number of sequences to cache"),
+    port: Annotated[int, typer.Option("--port", help="Port to expose locally")] = 9000,
+    gpu: Annotated[str, typer.Option("--gpu", help="Specific GPU index to use")] = "all",
+    gpu_memory_utilization: Annotated[
+        float,
+        typer.Option("--gpu-memory-utilization", help="Fraction of GPU memory to use"),
+    ] = 0.6,
+    max_num_seqs: Annotated[int, typer.Option("--max-num-seqs", help="Maximum number of sequences to cache")] = 600,
 ):
     """Launch a model from local checkpoint."""
     import json
@@ -167,7 +184,10 @@ def run_checkpoint(
 
     metadata_file = checkpoint_path / "model_metadata.json"
     if not metadata_file.is_file():
-        typer.echo("Error: model_metadata.json does not exist in the model checkpoint directory", err=True)
+        typer.echo(
+            "Error: model_metadata.json does not exist in the model checkpoint directory",
+            err=True,
+        )
         raise typer.Exit(1)
 
     with open(metadata_file) as f:
@@ -178,21 +198,20 @@ def run_checkpoint(
         typer.echo("Error: model_name is not defined in model_metadata.json", err=True)
         raise typer.Exit(1)
 
-    config = load_config()
-    stack_version = config["stack"]["version"]
+    stack_version = docker_helper.get_env_var("STACK_VERSION")
     image_name = f"liquidai/liquid-labs-vllm:{stack_version}"
 
     docker_helper.run_container(
         image=image_name,
         name=model_name,
-        ports={8000: port},
         device_requests=get_device_requests_from_gpus(gpu),
         volumes={str(checkpoint_path): {"bind": "/model", "mode": "ro"}},
+        network="liquid_labs_network",
         command=[
             "--host",
             "0.0.0.0",
             "--port",
-            "8000",
+            str(port),
             "--model",
             "/model",
             "--served-model-name",
@@ -214,12 +233,14 @@ def run_checkpoint(
             "--max-seq-len-to-capture",
             "32768",
         ],
-        health_cmd="curl --fail http://localhost:8000/health || exit 1",
-        health_interval=30,
+        healthcheck={
+            "test": f"curl --fail http://localhost:{port}/health || exit 1",
+            "interval": HEALTHCHECK_INTERVAL,
+            "start_period": HEALTHCHECK_INTERVAL,
+        },
     )
 
     typer.echo(f"Model '{model_name}' started successfully")
-    typer.echo(f"The vLLM API will be accessible at http://localhost:{port}")
     typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
 
 

From 4083ee4260e8f0cd29be6419eb7ce8ab42a2c24f Mon Sep 17 00:00:00 2001
From: Mengxiao Lin <mengxiao.lin@liquid.ai>
Date: Fri, 25 Apr 2025 10:56:53 -0700
Subject: [PATCH 06/13] Python CLI incremental bug fixes (#27)

1. `env_file` doesn't exist on the first run. Fixing this issue by
touching the file when docker help is initialized.
2. Terminate the stack operations when `docker compose` command fails.
3. Remove unnecessary dependencies.
---
 python_cli/liquidai_cli/utils/docker.py | 5 +++--
 python_cli/pyproject.toml               | 3 ---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/python_cli/liquidai_cli/utils/docker.py b/python_cli/liquidai_cli/utils/docker.py
index e706bf4..458c420 100644
--- a/python_cli/liquidai_cli/utils/docker.py
+++ b/python_cli/liquidai_cli/utils/docker.py
@@ -14,18 +14,19 @@ class DockerHelper:
     def __init__(self, env_file: Path = Path(".env")):
         self.client = docker.from_env()
         self.env_file = env_file
+        env_file.touch()
         self.env_dict = {}
 
     def run_compose(self, compose_file: Path, action: str = "up") -> None:
         """Run docker-compose command."""
-        cmd = ["docker", "compose", "--env-file", str(self.env_file)]
+        cmd = ["docker", "compose", "--env-file", str(self.env_file), "-f", str(compose_file)]
 
         if action == "up":
             cmd.extend(["up", "-d", "--wait"])
         elif action == "down":
             cmd.extend(["down"])
 
-        subprocess.run(cmd)
+        subprocess.run(cmd, check=True)
 
     def ensure_volume(self, name: str) -> None:
         """Ensure a Docker volume exists."""
diff --git a/python_cli/pyproject.toml b/python_cli/pyproject.toml
index 9e3f2c4..5e85f38 100644
--- a/python_cli/pyproject.toml
+++ b/python_cli/pyproject.toml
@@ -14,10 +14,7 @@ authors = [
 ]
 dependencies = [
     "typer>=0.9.0",
-    "python-dotenv>=1.0.0",
     "docker>=6.1.0",
-    "rich>=10.11.0",
-    "pydantic>=2.0.0",
     "ruamel.yaml>=0.17.21"
 ]
 

From de418637bc7c4bf36dbc9e25e479bbabccb8c2ec Mon Sep 17 00:00:00 2001
From: Mengxiao Lin <mengxiao.lin@liquid.ai>
Date: Mon, 28 Apr 2025 12:22:23 -0700
Subject: [PATCH 07/13] Decouple db migration for Python CLI (#29)

Following https://github.com/Liquid4All/on-prem-stack/pull/28, db
migration step is moved into the new docker container. Also moved the
docker-compose file into the package to make it easier to be referenced
as a package resource.
---
 python_cli/liquidai_cli/commands/stack.py     | 16 +++++++++++---
 .../docker_compose_files}/docker-compose.yaml | 21 ++++++++++++++++++-
 2 files changed, 33 insertions(+), 4 deletions(-)
 rename python_cli/{ => liquidai_cli/docker_compose_files}/docker-compose.yaml (79%)

diff --git a/python_cli/liquidai_cli/commands/stack.py b/python_cli/liquidai_cli/commands/stack.py
index be8b1fd..71e3cf1 100644
--- a/python_cli/liquidai_cli/commands/stack.py
+++ b/python_cli/liquidai_cli/commands/stack.py
@@ -6,6 +6,7 @@
 from liquidai_cli.utils.config import load_config, extract_model_name
 from liquidai_cli.utils.prompt import confirm_action
 from liquidai_cli.commands.model import run_model_image
+from importlib import resources as impresources
 
 app = typer.Typer(help="Manage the on-prem stack")
 docker_helper = DockerHelper(Path(".env"))
@@ -57,7 +58,7 @@ def launch(
     docker_helper.ensure_volume("postgres_data")
 
     # Launch stack
-    docker_helper.run_compose(Path("docker-compose.yaml"))
+    docker_helper.run_compose(get_docker_compose_file())
     # Run default model image
     run_model_image(model_name, config["stack"]["model_image"])
 
@@ -74,7 +75,7 @@ def shutdown():
         docker_helper.stop_container(container_name)
         typer.echo(f"Stopped and removed model container: {container_name}")
 
-    docker_helper.run_compose(Path("docker-compose.yaml"), action="down")
+    docker_helper.run_compose(get_docker_compose_file(), action="down")
     typer.echo("Stack has been shut down.")
 
 
@@ -96,7 +97,7 @@ def purge(
         return
 
     # Shutdown containers
-    docker_helper.run_compose(Path("docker-compose.yaml"), action="down")
+    docker_helper.run_compose(get_docker_compose_file(), action="down")
 
     # Remove volume and network
     docker_helper.remove_volume("postgres_data")
@@ -147,3 +148,12 @@ def test():
             }
             response = requests.post("http://0.0.0.0:8000/v1/chat/completions", headers=headers, json=data)
             typer.echo(response.json())
+
+
+def get_docker_compose_file() -> Path:
+    import liquidai_cli.docker_compose_files as docker_compose_files
+
+    path = impresources.files(docker_compose_files).joinpath("docker-compose.yaml")
+    if not path.exists():
+        raise FileNotFoundError(f"Docker compose file not found: {path}")
+    return path
diff --git a/python_cli/docker-compose.yaml b/python_cli/liquidai_cli/docker_compose_files/docker-compose.yaml
similarity index 79%
rename from python_cli/docker-compose.yaml
rename to python_cli/liquidai_cli/docker_compose_files/docker-compose.yaml
index 9ac5539..2db15da 100644
--- a/python_cli/docker-compose.yaml
+++ b/python_cli/liquidai_cli/docker_compose_files/docker-compose.yaml
@@ -4,8 +4,10 @@ services:
     image: liquidai/liquid-labs-python-api:${STACK_VERSION}
     container_name: liquid-labs-python-api
     depends_on:
-      liquid-labs-web:
+      liquid-labs-postgres:
         condition: service_healthy
+      liquid-labs-db-migration:
+        condition: service_completed_successfully
     healthcheck:
       test: [ "CMD", "curl", "-f", "http://localhost:8000/health" ]
       interval: 60s
@@ -67,6 +69,8 @@ services:
     depends_on:
       liquid-labs-postgres:
         condition: service_healthy
+      liquid-labs-db-migration:
+        condition: service_completed_successfully
     healthcheck:
       test: [ "CMD", "curl", "-f", "http://localhost:3000/api/health" ]
       interval: 60s
@@ -85,6 +89,21 @@ services:
       - liquid_labs_network
     ports:
       - "3000:3000"
+  liquid-labs-db-migration:
+    image: liquidai/liquid-labs-db-migration
+    container_name: liquid-labs-db-migration
+    depends_on:
+      liquid-labs-postgres:
+        condition: service_healthy
+    environment:
+      POSTGRES_DB: ${POSTGRES_DB}
+      POSTGRES_USER: ${POSTGRES_USER}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
+      POSTGRES_PORT: ${POSTGRES_PORT}
+      POSTGRES_SCHEMA: ${POSTGRES_SCHEMA}
+      DATABASE_URL: ${DATABASE_URL}
+    networks:
+      - liquid_labs_network
 
 networks:
   liquid_labs_network:

From bf0aef3681c5b7e810041fefa0d235de4f81e4c7 Mon Sep 17 00:00:00 2001
From: Mengxiao Lin <mengxiao.lin@liquid.ai>
Date: Tue, 29 Apr 2025 08:09:42 -0700
Subject: [PATCH 08/13] Split docker image version in Python CLI

New docker images are distributed with different version codes. This PR
enables Python CLI to use different version codes for different images
to catch up the latest updates.
---
 python_cli/liquidai_cli/commands/config.py           |  5 ++++-
 python_cli/liquidai_cli/commands/model.py            | 12 ++++++------
 python_cli/liquidai_cli/commands/stack.py            |  5 ++++-
 .../docker_compose_files/docker-compose.yaml         |  8 ++++----
 python_cli/liquidai_cli/utils/config.py              |  4 ++++
 5 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/python_cli/liquidai_cli/commands/config.py b/python_cli/liquidai_cli/commands/config.py
index 3c21987..83beacc 100644
--- a/python_cli/liquidai_cli/commands/config.py
+++ b/python_cli/liquidai_cli/commands/config.py
@@ -67,7 +67,10 @@ def import_env(
     # Stack configuration
     config["stack"].update(
         {
-            "version": env_vars.get("STACK_VERSION", config["stack"]["version"]),
+            "vllm_version": env_vars.get("VLLM_VERSION", config["stack"]["vllm_version"]),
+            "python_api_version": env_vars.get("PYTHON_API_VERSION", config["stack"]["python_api_version"]),
+            "web_version": env_vars.get("WEB_VERSION", config["stack"]["web_version"]),
+            "db_migration_version": env_vars.get("DB_MIGRATION_VERSION", config["stack"]["db_migration_version"]),
             "model_image": env_vars.get("MODEL_IMAGE", config["stack"]["model_image"]),
             "jwt_secret": env_vars.get("JWT_SECRET", generate_random_string(64)),
             "api_secret": env_vars.get("API_SECRET", config["stack"]["api_secret"]),
diff --git a/python_cli/liquidai_cli/commands/model.py b/python_cli/liquidai_cli/commands/model.py
index 4de2456..d837489 100644
--- a/python_cli/liquidai_cli/commands/model.py
+++ b/python_cli/liquidai_cli/commands/model.py
@@ -52,9 +52,9 @@ def run_model_image(
     model_volume_loader_container.remove()
 
     typer.echo(f"Launching model container: {name}")
-    stack_version = docker_helper.get_env_var("STACK_VERSION")
+    vllm_version = docker_helper.get_env_var("VLLM_VERSION")
     docker_helper.run_container(
-        image=f"liquidai/liquid-labs-vllm:{stack_version}",
+        image=f"liquidai/liquid-labs-vllm:{vllm_version}",
         name=name,
         device_requests=get_device_requests_from_gpus(gpu),
         volumes={model_volume_name: {"bind": "/model", "mode": "ro"}},
@@ -123,9 +123,9 @@ def run_huggingface(
         )
         raise typer.Exit(1)
 
-    stack_version = docker_helper.get_env_var("STACK_VERSION")
+    vllm_version = docker_helper.get_env_var("VLLM_VERSION")
     docker_helper.run_container(
-        image=f"liquidai/liquid-labs-vllm:{stack_version}",
+        image=f"liquidai/liquid-labs-vllm:{vllm_version}",
         name=name,
         environment={"HUGGING_FACE_HUB_TOKEN": hf_token},
         device_requests=get_device_requests_from_gpus(gpu),
@@ -198,8 +198,8 @@ def run_checkpoint(
         typer.echo("Error: model_name is not defined in model_metadata.json", err=True)
         raise typer.Exit(1)
 
-    stack_version = docker_helper.get_env_var("STACK_VERSION")
-    image_name = f"liquidai/liquid-labs-vllm:{stack_version}"
+    vllm_version = docker_helper.get_env_var("VLLM_VERSION")
+    image_name = f"liquidai/liquid-labs-vllm:{vllm_version}"
 
     docker_helper.run_container(
         image=image_name,
diff --git a/python_cli/liquidai_cli/commands/stack.py b/python_cli/liquidai_cli/commands/stack.py
index 71e3cf1..62cc733 100644
--- a/python_cli/liquidai_cli/commands/stack.py
+++ b/python_cli/liquidai_cli/commands/stack.py
@@ -35,7 +35,10 @@ def launch(
         "JWT_SECRET": config["stack"]["jwt_secret"],
         "API_SECRET": config["stack"]["api_secret"],
         "AUTH_SECRET": config["stack"]["auth_secret"],
-        "STACK_VERSION": config["stack"]["version"],
+        "VLLM_VERSION": config["stack"]["vllm_version"],
+        "PYTHON_API_VERSION": config["stack"]["python_api_version"],
+        "WEB_VERSION": config["stack"]["web_version"],
+        "DB_MIGRATION_VERSION": config["stack"]["db_migration_version"],
         "MODEL_IMAGE": config["stack"]["model_image"],
         "MODEL_NAME": config["stack"]["model_name"],
         "POSTGRES_DB": config["database"]["name"],
diff --git a/python_cli/liquidai_cli/docker_compose_files/docker-compose.yaml b/python_cli/liquidai_cli/docker_compose_files/docker-compose.yaml
index 2db15da..0bf0aed 100644
--- a/python_cli/liquidai_cli/docker_compose_files/docker-compose.yaml
+++ b/python_cli/liquidai_cli/docker_compose_files/docker-compose.yaml
@@ -1,7 +1,7 @@
 # Docker Compose file for on-prem-stack Python CLI.
 services:
   liquid-labs-python-api:
-    image: liquidai/liquid-labs-python-api:${STACK_VERSION}
+    image: liquidai/liquid-labs-python-api:${PYTHON_API_VERSION}
     container_name: liquid-labs-python-api
     depends_on:
       liquid-labs-postgres:
@@ -20,7 +20,7 @@ services:
       - ENV=internal
       - IS_DOCKER=true
       - CONTAINER_PORT=9000
-      - VLLM_IMAGE_NAME=liquidai/liquid-labs-vllm:${STACK_VERSION}
+      - VLLM_IMAGE_NAME=liquidai/liquid-labs-vllm:${VLLM_VERSION}
       - JWT_SECRET=${JWT_SECRET}
       - API_SECRET=${API_SECRET}
       - NVIDIA_VISIBLE_DEVICES=all
@@ -64,7 +64,7 @@ services:
       retries: 5
 
   liquid-labs-web:
-    image: liquidai/liquid-labs-web:${STACK_VERSION}
+    image: liquidai/liquid-labs-web:${WEB_VERSION}
     container_name: liquid-labs-web
     depends_on:
       liquid-labs-postgres:
@@ -90,7 +90,7 @@ services:
     ports:
       - "3000:3000"
   liquid-labs-db-migration:
-    image: liquidai/liquid-labs-db-migration
+    image: liquidai/liquid-labs-db-migration:${DB_MIGRATION_VERSION}
     container_name: liquid-labs-db-migration
     depends_on:
       liquid-labs-postgres:
diff --git a/python_cli/liquidai_cli/utils/config.py b/python_cli/liquidai_cli/utils/config.py
index 53c89df..a08e537 100644
--- a/python_cli/liquidai_cli/utils/config.py
+++ b/python_cli/liquidai_cli/utils/config.py
@@ -14,6 +14,10 @@
 DEFAULT_CONFIG = {
     "stack": {
         "version": "c3d7dbacd1",
+        "vllm_version": "e5bb8474e8",
+        "python_api_version": "d2501caa69",
+        "web_version": "1d7d6c7cbb",
+        "db_migration_version": "2b70027d9a",
         "model_image": "liquidai/lfm-7b-e:0.0.1",
         "jwt_secret": None,  # Generated on first use
         "api_secret": "local_api_token",

From c61833e8ab0438fd3b3d19e623ff71d42c6ae319 Mon Sep 17 00:00:00 2001
From: Mengxiao Lin <mengxiao.lin@liquid.ai>
Date: Tue, 6 May 2025 12:46:42 -0700
Subject: [PATCH 09/13] Wait for model container to be health in running models
 (#32)

During my work, I found that model launch can easily fail due to
multiple reasons (not enough memory, vLLM not compatible with the model,
etc.) but it is confusing since the Python CLI only prints out the model
has started and asks users to wait for its ready.

This PR allows run model command to wait for the vLLM container to be
ready based on the health check command.

Python CLI will show the command to check container logs if the model
container fails:
```
$ liquidai model run-model-image --name lfm-7b-e --image "liquidai/lfm-7b-e:0.0.1"
Creating volume for model data: lfm-7b-e
Loading model data from image: liquidai/lfm-7b-e:0.0.1
Launching model container: lfm-7b-e
Model 'lfm-7b-e' started successfully
Waiting for model 'lfm-7b-e' to be healthy. This may take a 1-2 minutes...
Container lfm-7b-e is not healthy yet. Status: starting
Container lfm-7b-e is not healthy yet. Status: starting
Container lfm-7b-e is not healthy yet. Status: starting
Error: Model 'lfm-7b-e' failed to start serving requests
Use `docker logs 4d6a1c51978b` to obtain container loggings.
```

A successful launch looks like
```
$ liquidai model run-model-image --name "lfm-3b-e" --image "liquidai/lfm-3b-e:0.0.6"
Creating volume for model data: lfm-3b-e
Loading model data from image: liquidai/lfm-3b-e:0.0.6
Launching model container: lfm-3b-e
Model 'lfm-3b-e' started successfully
Waiting for model 'lfm-3b-e' to be healthy. This may take a 1-2 minutes...
Container lfm-3b-e is not healthy yet. Status: starting
Container lfm-3b-e is not healthy yet. Status: starting
Container lfm-3b-e is not healthy yet. Status: starting
Container lfm-3b-e is not healthy yet. Status: starting
Container lfm-3b-e is not healthy yet. Status: starting
Container lfm-3b-e is not healthy yet. Status: starting
Container lfm-3b-e is not healthy yet. Status: starting
Model 'lfm-3b-e' has started serving requests.
```
---
 python_cli/liquidai_cli/commands/model.py | 43 +++++++++++++++++------
 python_cli/liquidai_cli/utils/docker.py   | 34 ++++++++++++++++--
 2 files changed, 65 insertions(+), 12 deletions(-)

diff --git a/python_cli/liquidai_cli/commands/model.py b/python_cli/liquidai_cli/commands/model.py
index d837489..afb0d41 100644
--- a/python_cli/liquidai_cli/commands/model.py
+++ b/python_cli/liquidai_cli/commands/model.py
@@ -6,6 +6,8 @@
 from liquidai_cli.utils.docker import DockerHelper
 from liquidai_cli.utils.device import get_device_requests_from_gpus
 from typing_extensions import Annotated
+from docker.models.containers import Container
+
 
 app = typer.Typer(help="Manage ML models")
 docker_helper = DockerHelper()
@@ -29,6 +31,7 @@ def run_model_image(
         typer.Option("--max-num-seqs", help="Maximum number of sequences to generate in parallel"),
     ] = 750,
     max_model_len: Annotated[int, typer.Option("--max-model-len", help="Maximum length of the model")] = 32768,
+    wait_for_health: Annotated[bool, typer.Option("--wait", help="Wait for health check to pass")] = True,
 ):
     """
     Launch a model stored in a Docker image. Default Liquid Foundation Models (LFM) are delivered in this approach.
@@ -53,7 +56,7 @@ def run_model_image(
 
     typer.echo(f"Launching model container: {name}")
     vllm_version = docker_helper.get_env_var("VLLM_VERSION")
-    docker_helper.run_container(
+    container = docker_helper.run_container(
         image=f"liquidai/liquid-labs-vllm:{vllm_version}",
         name=name,
         device_requests=get_device_requests_from_gpus(gpu),
@@ -91,8 +94,11 @@ def run_model_image(
             "start_period": HEALTHCHECK_INTERVAL,
         },
     )
-    typer.echo(f"Model '{name}' started successfully")
-    typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
+    if not wait_for_health:
+        typer.echo(f"Model '{name}' started successfully")
+        typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
+    else:
+        wait_for_model_health_or_print_logs_command(name, container)
 
 
 @app.command(name="run-hf")
@@ -114,6 +120,7 @@ def run_huggingface(
         Optional[str],
         typer.Option("--hf-token", help="Hugging Face access token", envvar="HUGGING_FACE_TOKEN"),
     ] = None,
+    wait_for_health: Annotated[bool, typer.Option("--wait", help="Wait for health check to pass")] = True,
 ):
     """Launch a model from Hugging Face."""
     if not hf_token:
@@ -124,7 +131,7 @@ def run_huggingface(
         raise typer.Exit(1)
 
     vllm_version = docker_helper.get_env_var("VLLM_VERSION")
-    docker_helper.run_container(
+    container = docker_helper.run_container(
         image=f"liquidai/liquid-labs-vllm:{vllm_version}",
         name=name,
         environment={"HUGGING_FACE_HUB_TOKEN": hf_token},
@@ -158,9 +165,11 @@ def run_huggingface(
             "start_period": HEALTHCHECK_INTERVAL,
         },
     )
-
-    typer.echo(f"Model '{name}' started successfully")
-    typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
+    if not wait_for_health:
+        typer.echo(f"Model '{name}' started successfully")
+        typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
+    else:
+        wait_for_model_health_or_print_logs_command(name, container)
 
 
 @app.command(name="run-checkpoint")
@@ -173,6 +182,7 @@ def run_checkpoint(
         typer.Option("--gpu-memory-utilization", help="Fraction of GPU memory to use"),
     ] = 0.6,
     max_num_seqs: Annotated[int, typer.Option("--max-num-seqs", help="Maximum number of sequences to cache")] = 600,
+    wait_for_health: Annotated[bool, typer.Option("--wait", help="Wait for health check to pass")] = True,
 ):
     """Launch a model from local checkpoint."""
     import json
@@ -201,7 +211,7 @@ def run_checkpoint(
     vllm_version = docker_helper.get_env_var("VLLM_VERSION")
     image_name = f"liquidai/liquid-labs-vllm:{vllm_version}"
 
-    docker_helper.run_container(
+    container = docker_helper.run_container(
         image=image_name,
         name=model_name,
         device_requests=get_device_requests_from_gpus(gpu),
@@ -240,8 +250,11 @@ def run_checkpoint(
         },
     )
 
-    typer.echo(f"Model '{model_name}' started successfully")
-    typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
+    if not wait_for_health:
+        typer.echo(f"Model '{model_name}' started successfully")
+        typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
+    else:
+        wait_for_model_health_or_print_logs_command(model_name, container)
 
 
 @app.command()
@@ -306,3 +319,13 @@ def stop(
             typer.echo("Invalid selection", err=True)
     except typer.Abort:
         typer.echo("\nOperation cancelled.")
+
+
+def wait_for_model_health_or_print_logs_command(name: str, container: Container):
+    typer.echo(f"Model '{name}' started successfully")
+    typer.echo(f"Waiting for model '{name}' to be healthy. This may take a 1-2 minutes...")
+    if docker_helper.wait_for_container_health_check(container, 15):
+        typer.echo(f"Model '{name}' has started serving requests.")
+    else:
+        typer.echo(f"Error: Model '{name}' failed to start serving requests", err=True)
+        typer.echo(f"Use `docker logs {container.short_id}` to obtain container loggings.")
diff --git a/python_cli/liquidai_cli/utils/docker.py b/python_cli/liquidai_cli/utils/docker.py
index 458c420..d3ec28e 100644
--- a/python_cli/liquidai_cli/utils/docker.py
+++ b/python_cli/liquidai_cli/utils/docker.py
@@ -1,11 +1,13 @@
 """Docker utilities for the Liquid Labs CLI."""
 
 import subprocess
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Optional
 import docker
 from docker.errors import NotFound
 from pathlib import Path
 import logging
+from docker.models.containers import Container
+import time
 
 logger = logging.getLogger(__name__)
 
@@ -51,7 +53,7 @@ def remove_network(self, name: str) -> None:
         except NotFound:
             pass
 
-    def run_container(self, image: str, name: str, **kwargs) -> docker.models.containers.Container:
+    def run_container(self, image: str, name: str, **kwargs) -> Container:
         """Run a Docker container."""
         try:
             container = self.client.containers.get(name)
@@ -125,3 +127,31 @@ def remove_env_file(self) -> None:
             self.env_file.unlink()
         except FileNotFoundError:
             pass
+
+    def wait_for_container_health_check(
+        self, container: Container, check_period: int, timeout: Optional[int] = None
+    ) -> bool:
+        """
+        Wait for a container to be healthy. Returns True if healthy, False
+        if timeout or the container exit with a non-zero code.
+
+        Args:
+        * container: the container to check
+        * check_period: the period to wait between checks (in seconds)
+        * timeout: the maximum time to wait for the container to be healthy (in seconds)
+        """
+        counter = 0
+        while True:
+            inspect_results = self.client.api.inspect_container(container.id)
+            health_status = inspect_results.get("State", {}).get("Health", {}).get("Status")
+            if health_status == "healthy":
+                return True
+            elif health_status == "unhealthy":
+                return False
+            else:
+                print(f"Container {container.name} is not healthy yet. Status: {health_status}")
+            if timeout and counter >= timeout:
+                print(f"Timeout waiting for container {container.name} to be healthy.")
+                return False
+            time.sleep(check_period)
+            counter += check_period

From 455ea965af2187466cfc0aeb22422ac23454172e Mon Sep 17 00:00:00 2001
From: Mengxiao Lin <mengxiao.lin@liquid.ai>
Date: Tue, 6 May 2025 13:20:43 -0700
Subject: [PATCH 10/13] Support port forwarding in Python CLI (#33)

Before this PR, `--port` variable on running model commands will only
change the port within Docker container. This PR will force vLLM to be
launched on port 9000 and forward them to the port assigned by users. By
default, we don't expose this port because users are able to access the
model through the labs Python API stack.
---
 python_cli/README.md                      |  8 +++---
 python_cli/liquidai_cli/commands/model.py | 35 ++++++++++++++++-------
 2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/python_cli/README.md b/python_cli/README.md
index 8ff63a8..22917b7 100644
--- a/python_cli/README.md
+++ b/python_cli/README.md
@@ -66,7 +66,7 @@ liquidai model run-model-image \
   --name lfm-3b-e \
   --image "liquidai/lfm-3b-e:0.0.6"
 
-# Run a HuggingFace model
+# Run a HuggingFace model and expose on port 9000
 liquidai model run-hf \
   --name llama-7b \
   --path meta-llama/Llama-2-7b-chat-hf \
@@ -75,10 +75,10 @@ liquidai model run-hf \
   --max-num-seqs 600 \
   --max-model-len 32768
 
-# Run a local checkpoint
+# Run a local checkpoint and expose on port 9001 to avoid conflicts
 liquidai model run-checkpoint \
   --path /path/to/checkpoint \
-  --port 9000 \
+  --port 9001 \
   --gpu-memory-utilization 0.6 \
   --max-num-seqs 600
 
@@ -126,4 +126,4 @@ liquidai config import --force
 ```
 
 ## Command Reference
-Call `liquidai [command] --help` to get the detailed usage reference.
\ No newline at end of file
+Call `liquidai [command] --help` to get the detailed usage reference.
diff --git a/python_cli/liquidai_cli/commands/model.py b/python_cli/liquidai_cli/commands/model.py
index afb0d41..4bfd706 100644
--- a/python_cli/liquidai_cli/commands/model.py
+++ b/python_cli/liquidai_cli/commands/model.py
@@ -20,7 +20,7 @@
 def run_model_image(
     name: str = typer.Option(..., "--name", help="Name for the model"),
     model_image: str = typer.Option(..., "--image", help="Model image name"),
-    port: Annotated[int, typer.Option("--port", help="Port to expose locally")] = 9000,
+    port: Annotated[Optional[int], typer.Option("--port", help="Port to expose locally")] = None,
     gpu: Annotated[str, typer.Option("--gpu", help="Specific GPU index to use")] = "all",
     gpu_memory_utilization: Annotated[
         float,
@@ -56,19 +56,22 @@ def run_model_image(
 
     typer.echo(f"Launching model container: {name}")
     vllm_version = docker_helper.get_env_var("VLLM_VERSION")
+    ports_mapping = {"9000/tcp": port} if port else None
+
     container = docker_helper.run_container(
         image=f"liquidai/liquid-labs-vllm:{vllm_version}",
         name=name,
         device_requests=get_device_requests_from_gpus(gpu),
         volumes={model_volume_name: {"bind": "/model", "mode": "ro"}},
         network="liquid_labs_network",
+        ports=ports_mapping,
         command=[
             "--model",
             "/model",
             "--served-model-name",
             name,
             "--port",
-            str(port),
+            str(9000),
             "--max-logprobs",
             "0",
             "--dtype",
@@ -89,7 +92,7 @@ def run_model_image(
             str(max_model_len),
         ],
         healthcheck={
-            "test": f"curl --fail http://localhost:{port}/health || exit 1",
+            "test": "curl --fail http://localhost:9000/health || exit 1",
             "interval": HEALTHCHECK_INTERVAL,
             "start_period": HEALTHCHECK_INTERVAL,
         },
@@ -99,13 +102,15 @@ def run_model_image(
         typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
     else:
         wait_for_model_health_or_print_logs_command(name, container)
+    if port:
+        typer.echo(f"Model is accessible at http://localhost:{port}/v1/")
 
 
 @app.command(name="run-hf")
 def run_huggingface(
     name: str = typer.Option(..., "--name", help="Name for the model container"),
     path: str = typer.Option(..., "--path", help="Hugging Face model path"),
-    port: Annotated[int, typer.Option("--port", help="Port to expose locally")] = 9000,
+    port: Annotated[Optional[int], typer.Option("--port", help="Port to expose locally")] = None,
     gpu: Annotated[str, typer.Option("--gpu", help="Specific GPU index to use")] = "all",
     gpu_memory_utilization: Annotated[
         float,
@@ -131,17 +136,19 @@ def run_huggingface(
         raise typer.Exit(1)
 
     vllm_version = docker_helper.get_env_var("VLLM_VERSION")
+    ports_mapping = {"9000/tcp": port} if port else None
     container = docker_helper.run_container(
         image=f"liquidai/liquid-labs-vllm:{vllm_version}",
         name=name,
         environment={"HUGGING_FACE_HUB_TOKEN": hf_token},
         device_requests=get_device_requests_from_gpus(gpu),
         network="liquid_labs_network",
+        ports=ports_mapping,
         command=[
             "--host",
             "0.0.0.0",
             "--port",
-            str(port),
+            "9000",
             "--model",
             path,
             "--served-model-name",
@@ -160,7 +167,7 @@ def run_huggingface(
             str(max_model_len),
         ],
         healthcheck={
-            "test": f"curl --fail http://localhost:{port}/health || exit 1",
+            "test": "curl --fail http://localhost:9000/health || exit 1",
             "interval": HEALTHCHECK_INTERVAL,
             "start_period": HEALTHCHECK_INTERVAL,
         },
@@ -170,12 +177,14 @@ def run_huggingface(
         typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
     else:
         wait_for_model_health_or_print_logs_command(name, container)
+    if port:
+        typer.echo(f"Model is accessible at http://localhost:{port}/v1/")
 
 
 @app.command(name="run-checkpoint")
 def run_checkpoint(
     path: str = typer.Option(..., "--path", help="Path to model checkpoint directory"),
-    port: Annotated[int, typer.Option("--port", help="Port to expose locally")] = 9000,
+    port: Annotated[Optional[int], typer.Option("--port", help="Port to expose locally")] = None,
     gpu: Annotated[str, typer.Option("--gpu", help="Specific GPU index to use")] = "all",
     gpu_memory_utilization: Annotated[
         float,
@@ -210,6 +219,7 @@ def run_checkpoint(
 
     vllm_version = docker_helper.get_env_var("VLLM_VERSION")
     image_name = f"liquidai/liquid-labs-vllm:{vllm_version}"
+    ports_mapping = {"9000/tcp": port} if port else None
 
     container = docker_helper.run_container(
         image=image_name,
@@ -217,11 +227,12 @@ def run_checkpoint(
         device_requests=get_device_requests_from_gpus(gpu),
         volumes={str(checkpoint_path): {"bind": "/model", "mode": "ro"}},
         network="liquid_labs_network",
+        ports=ports_mapping,
         command=[
             "--host",
             "0.0.0.0",
             "--port",
-            str(port),
+            "9000",
             "--model",
             "/model",
             "--served-model-name",
@@ -244,7 +255,7 @@ def run_checkpoint(
             "32768",
         ],
         healthcheck={
-            "test": f"curl --fail http://localhost:{port}/health || exit 1",
+            "test": "curl --fail http://localhost:9000/health || exit 1",
             "interval": HEALTHCHECK_INTERVAL,
             "start_period": HEALTHCHECK_INTERVAL,
         },
@@ -255,6 +266,8 @@ def run_checkpoint(
         typer.echo("Please wait 1-2 minutes for the model to load before making API calls")
     else:
         wait_for_model_health_or_print_logs_command(model_name, container)
+    if port:
+        typer.echo(f"Model is accessible at http://localhost:{port}/v1/")
 
 
 @app.command()
@@ -273,7 +286,7 @@ def list():
         ports = container.get("ports", {})
         port = "unknown"
         if isinstance(ports, dict):
-            port_mappings = cast(List[Dict[str, str]], ports.get("8000/tcp", []))
+            port_mappings = cast(List[Dict[str, str]], ports.get("9000/tcp", []))
             if port_mappings:
                 mapping = port_mappings[0]
                 if isinstance(mapping, dict):
@@ -302,7 +315,7 @@ def stop(
         ports = container.get("ports", {})
         port = "unknown"
         if isinstance(ports, dict):
-            port_mappings = cast(List[Dict[str, str]], ports.get("8000/tcp", []))
+            port_mappings = cast(List[Dict[str, str]], ports.get("9000/tcp", []))
             if port_mappings:
                 mapping = port_mappings[0]
                 if isinstance(mapping, dict):

From c617b9de91b5049ebea3892719c708caab5126fa Mon Sep 17 00:00:00 2001
From: "devin-ai-integration[bot]"
 <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Sun, 11 May 2025 23:46:10 -0700
Subject: [PATCH 11/13] Add GitHub Action to publish Python package to PyPI on
 release (#23)

# Add GitHub Action for PyPI Publishing

This PR adds a GitHub Action workflow that automatically publishes the
`liquidai-cli` Python package to PyPI when a new release is created.

## Changes:
- Add workflow file that triggers on release creation
- Set up testing with Python 3.10, 3.11, and 3.12
- Configure build and publish steps using PyPI credentials

## Required Secrets:
Before this can work in production, the following secrets need to be
added to the repository:
- `PYPI_USERNAME`: PyPI username (or `__token__` if using API token)
- `PYPI_API_TOKEN`: PyPI API token for authentication

Link to Devin run:
https://app.devin.ai/sessions/091656c5a70248bd94734475445a4b72
User: liren@liquid.ai

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: liren@liquid.ai <tuliren@gmail.com>
---
 .github/workflows/publish-to-pypi.yml | 29 +++++++++++++++++++++++++++
 python_cli/liquidai_cli/__init__.py   |  2 +-
 python_cli/pyproject.toml             |  4 ++--
 3 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 .github/workflows/publish-to-pypi.yml

diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
new file mode 100644
index 0000000..237578c
--- /dev/null
+++ b/.github/workflows/publish-to-pypi.yml
@@ -0,0 +1,29 @@
+name: Publish Python Package to PyPI
+
+on:
+  release:
+    types: [created]
+
+jobs:
+  build-and-publish:
+    runs-on: ubuntu-latest
+    
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build twine
+    - name: Build and publish
+      env:
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+      run: |
+        cd python_cli
+        python -m build
+        twine check dist/*
+        twine upload dist/*
diff --git a/python_cli/liquidai_cli/__init__.py b/python_cli/liquidai_cli/__init__.py
index 2c265e9..e958c11 100644
--- a/python_cli/liquidai_cli/__init__.py
+++ b/python_cli/liquidai_cli/__init__.py
@@ -1,3 +1,3 @@
 """Liquid Labs CLI tool for managing on-prem stack."""
 
-__version__ = "0.1.0"
+__version__ = "0.0.1b0"
diff --git a/python_cli/pyproject.toml b/python_cli/pyproject.toml
index 5e85f38..47fa0eb 100644
--- a/python_cli/pyproject.toml
+++ b/python_cli/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "liquidai-cli"
-version = "0.1.0"
+version = "0.0.1b0"
 description = "CLI tool for Liquid Labs on-prem stack"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -19,7 +19,7 @@ dependencies = [
 ]
 
 [project.scripts]
-liquidai = "liquidai_cli.cli:main"
+liquid-cli = "liquidai_cli.cli:main"
 
 [tool.hatch.build.targets.wheel]
 packages = ["liquidai_cli"]

From c8a6fed023b70d5db46e1885d04d8c7c5db51af8 Mon Sep 17 00:00:00 2001
From: Liren Tu <tuliren@gmail.com>
Date: Sun, 11 May 2025 23:53:07 -0700
Subject: [PATCH 12/13] Update ci filename

---
 .../workflows/{publish-to-pypi.yml => publish-to-pypi.yaml}   | 0
 .github/workflows/{python-ci.yml => python-ci.yaml}           | 4 ++--
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename .github/workflows/{publish-to-pypi.yml => publish-to-pypi.yaml} (100%)
 rename .github/workflows/{python-ci.yml => python-ci.yaml} (96%)

diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yaml
similarity index 100%
rename from .github/workflows/publish-to-pypi.yml
rename to .github/workflows/publish-to-pypi.yaml
diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yaml
similarity index 96%
rename from .github/workflows/python-ci.yml
rename to .github/workflows/python-ci.yaml
index 08fd981..a3d3d5f 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yaml
@@ -18,8 +18,8 @@ jobs:
         python-version: ["3.10", "3.11", "3.12"]
 
     steps:
-    - uses: actions/checkout@v3
-    
+    - uses: actions/checkout@v4
+
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v4
       with:

From ac2f51ae26f3f255604be19a9fd8cd35af113850 Mon Sep 17 00:00:00 2001
From: Mengxiao Lin <mengxiao.lin@liquid.ai>
Date: Mon, 12 May 2025 17:00:31 +0000
Subject: [PATCH 13/13] Update vLLM version for python CLI

---
 python_cli/liquidai_cli/utils/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python_cli/liquidai_cli/utils/config.py b/python_cli/liquidai_cli/utils/config.py
index a08e537..2dc4b4d 100644
--- a/python_cli/liquidai_cli/utils/config.py
+++ b/python_cli/liquidai_cli/utils/config.py
@@ -14,7 +14,7 @@
 DEFAULT_CONFIG = {
     "stack": {
         "version": "c3d7dbacd1",
-        "vllm_version": "e5bb8474e8",
+        "vllm_version": "bdf3162975",
         "python_api_version": "d2501caa69",
         "web_version": "1d7d6c7cbb",
         "db_migration_version": "2b70027d9a",