diff --git a/.gitignore b/.gitignore index a248d4ca..4f282ee0 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,9 @@ *.tfstate.lock. *.terraform.lock.hcl +# Terraform plan files +*.plan + # logs *.log @@ -22,6 +25,8 @@ # Ignored Terraform files *gitignore*.tf +terraform.tfvars +!terraform.tfvars.example # Ignore Mac .DS_Store files .DS_Store diff --git a/README.md b/README.md index 68baa2f6..dcfda364 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,7 @@ The folder `examples` contains the following Terraform implementation examples : | Azure | [adb-uc](examples/adb-uc/) | ADB Unity Catalog Process | | Azure | [adb-unity-catalog-basic-demo](examples/adb-unity-catalog-basic-demo/) | ADB Unity Catalog end-to-end demo including UC metastore setup, Users/groups sync from AAD to databricks account, UC Catalog, External locations, Schemas, & Access Grants | | Azure | [adb-overwatch](examples/adb-overwatch/) | Overwatch multi-workspace deployment on Azure | +| Azure | [adb-coding-assistants-cluster](examples/adb-coding-assistants-cluster/) | Databricks cluster with Claude Code CLI for AI-assisted development | | AWS | [aws-workspace-basic](examples/aws-workspace-basic/) | Provisioning AWS Databricks E2 | | AWS | [aws-workspace-with-firewall](examples/aws-workspace-with-firewall/) | Provisioning AWS Databricks E2 with an AWS Firewall | | AWS | [aws-exfiltration-protection](examples/aws-exfiltration-protection/) | An implementation of [Data Exfiltration Protection on AWS](https://www.databricks.com/blog/2021/02/02/data-exfiltration-protection-with-databricks-on-aws.html) | diff --git a/examples/adb-coding-assistants-cluster/Makefile b/examples/adb-coding-assistants-cluster/Makefile new file mode 100644 index 00000000..653039d8 --- /dev/null +++ b/examples/adb-coding-assistants-cluster/Makefile @@ -0,0 +1,7 @@ +.PHONY: docs test_docs + +docs: + terraform-docs -c ../../.terraform-docs.yml . + +test_docs: + terraform-docs -c ../../.terraform-docs.yml --output-check . diff --git a/examples/adb-coding-assistants-cluster/README.md b/examples/adb-coding-assistants-cluster/README.md new file mode 100644 index 00000000..2d0ff7bd --- /dev/null +++ b/examples/adb-coding-assistants-cluster/README.md @@ -0,0 +1,363 @@ +# Provisioning Databricks Cluster with Claude Code CLI + +This template provides a self-contained deployment of a Databricks cluster pre-configured with Claude Code CLI for AI-assisted development directly on the cluster. + +## What Gets Deployed + +* Unity Catalog Volume for init script storage +* Databricks cluster with Claude Code CLI auto-installed on startup +* MLflow experiment for tracing Claude Code sessions +* Bash helper functions for easy usage + +## How to use + +1. Copy `terraform.tfvars.example` to `terraform.tfvars` +2. Update `terraform.tfvars` with your values: + - `databricks_resource_id`: Your Azure Databricks workspace resource ID + - `cluster_name`: Name for your cluster + - `catalog_name`: Unity Catalog name to use +3. (Optional) Customize cluster configuration in `terraform.tfvars` (node type, autoscaling, etc.) +4. (Optional) Configure your [remote backend](https://developer.hashicorp.com/terraform/language/settings/backends/azurerm) +5. Run `terraform init` to initialize terraform and get provider ready +6. Run `terraform plan` to review the resources that will be created +7. Run `terraform apply` to create the resources + +## Prerequisites + +- Databricks workspace with Unity Catalog enabled +- Unity Catalog with an existing catalog and schema +- **Unity Catalog metastore must have a root storage credential configured** (required for volumes) +- Permission to create clusters +- (For Azure) Authenticated via `az login` or environment variables +- Databricks Runtime 14.3 LTS or higher recommended + +> **Note**: If you encounter an error about missing root storage credential, you need to configure the metastore's root storage credential first. See [Databricks documentation](https://docs.databricks.com/api-explorer/workspace/metastores/update) for details. + +## Post-Deployment + +After the cluster starts, you can connect via SSH to use Claude Code and other development tools. + +### 1. Configure SSH Tunnel + +Use the Databricks CLI to set up SSH access to your new cluster: + +```bash +# Authenticate if needed +databricks auth login --host https://your-workspace-url.cloud.databricks.com + +# Set up SSH config (replace 'claude-dev' with your preferred alias) +databricks ssh setup --name claude-dev +# Select your cluster from the list when prompted +``` + +This creates an entry in your `~/.ssh/config` file. + +### 2. Connect via VSCode or Cursor + +1. Install the **Remote - SSH** extension in VSCode or Cursor. +2. Open the Command Palette (`Cmd+Shift+P` / `Ctrl+Shift+P`). +3. Select **Remote-SSH: Connect to Host**. +4. Choose `claude-dev` (or the alias you created). +5. Select **Linux** as the platform. +6. Once connected, open your persistent workspace folder: `/Workspace/Users//`. + +> **Important: Work Storage Location** +> ⚠️ **DO NOT use Databricks Repos (`/Repos/...`) for active development work.** Repos folders can be unreliable for persistent storage and may lose uncommitted changes during cluster restarts or sync operations. +> +> ✅ **Use `/Workspace/Users//` instead.** This location provides reliable persistent storage. You can use regular git commands to manage version control (see "Using Git in /Workspace" section below). + +### 3. Launch Claude Code + +Open the terminal in your remote VSCode/Cursor session and run: + +```bash +# 1. Load environment variables and helpers +source ~/.bashrc + +# 2. Enable MLflow tracing (optional but recommended) +claude-tracing-enable + +# 3. Start Claude Code +claude +``` + +**First-time setup tips:** +- Claude will ask for file permissions; use `Shift+Tab` to auto-allow edits in the current directory. +- If you need to refresh credentials, run `claude-refresh-token`. + +### 4. Remote Web App Development (Port Forwarding) + +VSCode and Cursor automatically forward ports. For example, to run a Streamlit app: + +1. Create `app.py`: + ```python + import streamlit as st + st.title("Databricks Remote App") + st.write("Running on cluster!") + ``` +2. Run it: + ```bash + streamlit run app.py --server.port 8501 + ``` +3. Click "Open in Browser" in the popup notification to view it at `localhost:8501`. + +### 5. Using the Databricks Python Interpreter + +You don't need to configure a virtual environment. Databricks manages it for you. + +1. In the remote terminal, find the python path: + ```bash + echo $DATABRICKS_VIRTUAL_ENV + # Output example: /local_disk0/.ephemeral_nfs/envs/pythonEnv-xxxx/bin/python + ``` +2. In VSCode/Cursor, open the Command Palette and select **Python: Select Interpreter**. +3. Paste the path from above. + +### 6. Persistent Sessions with tmux + +To keep your agent running even if you disconnect: + +```bash +# Start a new session +tmux new -s claude-session + +# Detach (Ctrl+B, then D) +# Reattach later +tmux attach -t claude-session +``` + +This allows you to leave long-running tasks (like "Build a data pipeline") executing on the cluster while you are offline. + +### 7. Using Git in /Workspace + +Since `/Workspace` doesn't have native Repos integration, use standard git commands: + +```bash +# Navigate to your workspace directory +cd /Workspace/Users// + +# Option 1: Clone an existing repository +git clone https://github.com/your-org/your-repo.git +cd your-repo + +# Option 2: Initialize a new repository +mkdir my-project && cd my-project +git init +git remote add origin https://github.com/your-org/your-repo.git + +# Configure git (first time only) +git config user.name "Your Name" +git config user.email "your.email@company.com" + +# Regular git workflow +git add . +git commit -m "Your commit message" +git push origin main +``` + +**Git Authentication Options:** + +1. **Personal Access Token (PAT)** - Recommended: + ```bash + # GitHub: Create at https://github.com/settings/tokens + # Use token as password when prompted + git clone https://github.com/your-org/repo.git + ``` + +2. **SSH Keys**: + ```bash + # Generate SSH key on the cluster + ssh-keygen -t ed25519 -C "your.email@company.com" + + # Add to GitHub: Copy output and add at https://github.com/settings/keys + cat ~/.ssh/id_ed25519.pub + + # Clone using SSH + git clone git@github.com:your-org/repo.git + ``` + +3. **Git Credential Manager**: + ```bash + # Store credentials to avoid repeated prompts + git config --global credential.helper store + ``` + +## Helper Commands + +### Claude CLI Commands + +| Command | Purpose | +|---------|---------| +| `check-claude` | Verify Claude CLI installation and configuration | +| `claude-debug` | Show detailed Claude configuration | +| `claude-refresh-token` | Regenerate Claude settings from environment | +| `claude-token-status` | Check token freshness and auto-refresh status | +| `claude-tracing-enable` | Enable MLflow tracing for Claude sessions | +| `claude-tracing-status` | Check tracing status | +| `claude-tracing-disable` | Disable tracing | + +### Git Workspace Commands + +| Command | Purpose | +|---------|---------| +| `git-workspace-init` | Interactive setup for git in /Workspace (clone or init) | +| `git-workspace-check` | Verify location and check for uncommitted/unpushed changes | +| `git-workspace-setup-auth` | Configure git authentication (PAT, SSH, or credential helper) | + +These helpers warn you if working in `/Repos` and ensure your work is backed up in git. + +### VS Code/Cursor Remote Commands + +| Command | Purpose | +|---------|---------| +| `claude-vscode-setup` | Show Remote SSH setup instructions | +| `claude-vscode-env` | Get Python interpreter path for IDE | +| `claude-vscode-check` | Verify Remote SSH configuration | +| `claude-vscode-config` | Generate settings.json snippet | + +## Offline Installation + +For air-gapped or restricted network environments, use the separate offline module: [`adb-coding-assistants-cluster-offline`](../../modules/adb-coding-assistants-cluster-offline/README.md). See the [Offline Installation Guide](../../modules/adb-coding-assistants-cluster-offline/scripts/OFFLINE-INSTALLATION.md) for detailed instructions. + +## Configuration Examples + +### Single-Node Development Cluster + +```hcl +cluster_mode = "SINGLE_NODE" +num_workers = 0 +node_type_id = "Standard_D8pds_v6" +``` + +### Autoscaling Production Cluster + +```hcl +cluster_mode = "STANDARD" +num_workers = null # Enable autoscaling +min_workers = 2 +max_workers = 8 +node_type_id = "Standard_D8pds_v6" +``` + +## Authentication + +This example uses Databricks unified authentication. Authentication can be provided via: + +1. **Azure CLI** (recommended for local development): + ```bash + az login + terraform apply + ``` + +2. **Environment Variables** (recommended for CI/CD): + ```bash + export DATABRICKS_HOST="https://adb-xxx.azuredatabricks.net" + export DATABRICKS_TOKEN="dapi..." + terraform apply + ``` + +3. **Configuration Profile**: + ```bash + export DATABRICKS_CONFIG_PROFILE="my-profile" + terraform apply + ``` + +For more details on authentication, see the [Databricks unified authentication documentation](https://docs.databricks.com/dev-tools/auth/unified-auth.html). + +## Troubleshooting + +### Init Script Fails + +Check cluster event logs in the Databricks UI under **Compute** → **Your Cluster** → **Event Log**. + +Common issues: +- Network connectivity to download packages +- Unity Catalog volume permissions +- Insufficient cluster permissions + +### Claude Not Found After Login + +```bash +# Reload bashrc +source ~/.bashrc + +# Verify PATH +check-claude +``` + +### Authentication Issues + +```bash +# Check environment variables +check-claude + +# Regenerate configuration +claude-refresh-token +``` + +## Additional Resources + +- [Scripts Documentation](scripts/README.md) +- [Databricks Init Scripts Documentation](https://docs.databricks.com/clusters/init-scripts.html) +- [Unity Catalog Volumes Documentation](https://docs.databricks.com/data-governance/unity-catalog/volumes.html) + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.0 | +| [azurerm](#requirement\_azurerm) | >=4.31.0 | +| [databricks](#requirement\_databricks) | >=1.81.1 | + +## Providers + +| Name | Version | +|------|---------| +| [azurerm](#provider\_azurerm) | 4.57.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [azurerm_client_config.current](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/client_config) | data source | +| [azurerm_databricks_workspace.this](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/databricks_workspace) | data source | +| [azurerm_resource_group.this](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/resource_group) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [catalog\_name](#input\_catalog\_name) | Unity Catalog name for the volume | `string` | n/a | yes | +| [cluster\_name](#input\_cluster\_name) | Name of the Databricks cluster | `string` | n/a | yes | +| [databricks\_resource\_id](#input\_databricks\_resource\_id) | The Azure resource ID for the Databricks workspace. Format: /subscriptions/{subscription-id}/resourceGroups/{resource-group}/providers/Microsoft.Databricks/workspaces/{workspace-name} | `string` | n/a | yes | +| [autotermination\_minutes](#input\_autotermination\_minutes) | Minutes of inactivity before cluster auto-terminates | `number` | `30` | no | +| [cluster\_mode](#input\_cluster\_mode) | Cluster mode: STANDARD or SINGLE\_NODE | `string` | `"STANDARD"` | no | +| [init\_script\_source\_path](#input\_init\_script\_source\_path) | Local path to the init script | `string` | `null` | no | +| [max\_workers](#input\_max\_workers) | Maximum number of workers for autoscaling | `number` | `3` | no | +| [min\_workers](#input\_min\_workers) | Minimum number of workers for autoscaling | `number` | `1` | no | +| [mlflow\_experiment\_name](#input\_mlflow\_experiment\_name) | MLflow experiment name for Claude Code tracing | `string` | `"/Workspace/Shared/claude-code-tracing"` | no | +| [node\_type\_id](#input\_node\_type\_id) | Node type for the cluster. Default is Standard_D8pds_v6 (modern, premium SSD + local NVMe). If unavailable in your region, consider Standard_DS13_v2 as fallback. | `string` | `"Standard_D8pds_v6"` | no | +| [num\_workers](#input\_num\_workers) | Number of worker nodes (null for autoscaling) | `number` | `null` | no | +| [schema\_name](#input\_schema\_name) | Schema name for the volume | `string` | `"default"` | no | +| [spark\_version](#input\_spark\_version) | Databricks Runtime version | `string` | `"17.3.x-cpu-ml-scala2.13"` | no | +| [tags](#input\_tags) | Custom tags for the cluster | `map(string)` |
{
"Environment": "dev",
"Purpose": "coding-assistants"
}
| no | +| [volume\_name](#input\_volume\_name) | Volume name to store init scripts | `string` | `"coding_assistants"` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [cluster\_id](#output\_cluster\_id) | The ID of the created cluster | +| [cluster\_name](#output\_cluster\_name) | Name of the created cluster | +| [cluster\_url](#output\_cluster\_url) | URL to access the cluster in Databricks UI | +| [init\_script\_path](#output\_init\_script\_path) | Path to the init script in the volume | +| [mlflow\_experiment\_name](#output\_mlflow\_experiment\_name) | MLflow experiment name for tracing | +| [setup\_instructions](#output\_setup\_instructions) | Instructions for using the cluster | +| [volume\_full\_name](#output\_volume\_full\_name) | Full name of the volume | +| [volume\_path](#output\_volume\_path) | Path to the volume containing init scripts | + diff --git a/examples/adb-coding-assistants-cluster/main.tf b/examples/adb-coding-assistants-cluster/main.tf new file mode 100644 index 00000000..80fe58a3 --- /dev/null +++ b/examples/adb-coding-assistants-cluster/main.tf @@ -0,0 +1,86 @@ +# Cluster with Claude Code CLI coding assistant +# Provider configuration is in providers.tf + +# Data source to get current user +data "databricks_current_user" "me" {} + +# Local value for init script path +locals { + init_script_path = var.init_script_source_path != null ? var.init_script_source_path : "${path.root}/scripts/install-claude.sh" +} + +# Create or reference the volume for init scripts +resource "databricks_volume" "init_scripts" { + name = var.volume_name + catalog_name = var.catalog_name + schema_name = var.schema_name + volume_type = "MANAGED" + comment = "Volume for Claude Code CLI init scripts" + + lifecycle { + ignore_changes = [owner] + } +} + +# Upload the init script to the volume +resource "databricks_file" "init_script" { + source = local.init_script_path + path = "${databricks_volume.init_scripts.volume_path}/install-claude.sh" +} + +# Create the cluster with init script +resource "databricks_cluster" "coding_assistants" { + cluster_name = var.cluster_name + spark_version = var.spark_version + node_type_id = var.node_type_id + autotermination_minutes = var.autotermination_minutes + data_security_mode = "SINGLE_USER" + single_user_name = data.databricks_current_user.me.user_name + + # Autoscaling or fixed size + # Autoscaling is not supported for single-node clusters + dynamic "autoscale" { + for_each = var.cluster_mode == "STANDARD" && var.num_workers == null ? [1] : [] + content { + min_workers = var.min_workers + max_workers = var.max_workers + } + } + + # For single-node clusters, num_workers must be 0 (driver-only) + # For standard clusters, use the provided num_workers value + num_workers = var.cluster_mode == "SINGLE_NODE" ? 0 : var.num_workers + + # Single node configuration + # According to Databricks docs: single-node clusters run Spark locally with no worker nodes + spark_conf = var.cluster_mode == "SINGLE_NODE" ? { + "spark.databricks.cluster.profile" = "singleNode" + "spark.master" = "local[*]" + } : {} + + custom_tags = merge( + var.tags, + { + "ManagedBy" = "Terraform" + }, + var.cluster_mode == "SINGLE_NODE" ? { + "ResourceClass" = "SingleNode" + } : {} + ) + + # Environment variables for Claude Code CLI + spark_env_vars = { + MLFLOW_EXPERIMENT_NAME = var.mlflow_experiment_name + } + + # Init script configuration + init_scripts { + volumes { + destination = "${databricks_volume.init_scripts.volume_path}/install-claude.sh" + } + } + + depends_on = [ + databricks_file.init_script + ] +} diff --git a/examples/adb-coding-assistants-cluster/outputs.tf b/examples/adb-coding-assistants-cluster/outputs.tf new file mode 100644 index 00000000..b54b8269 --- /dev/null +++ b/examples/adb-coding-assistants-cluster/outputs.tf @@ -0,0 +1,58 @@ +output "cluster_id" { + description = "The ID of the created cluster" + value = databricks_cluster.coding_assistants.id +} + +output "cluster_url" { + description = "URL to access the cluster in Databricks UI" + value = databricks_cluster.coding_assistants.url +} + +output "cluster_name" { + description = "Name of the created cluster" + value = databricks_cluster.coding_assistants.cluster_name +} + +output "volume_path" { + description = "Path to the volume containing init scripts" + value = databricks_volume.init_scripts.volume_path +} + +output "volume_full_name" { + description = "Full name of the volume" + value = "${var.catalog_name}.${var.schema_name}.${var.volume_name}" +} + +output "init_script_path" { + description = "Path to the init script in the volume" + value = databricks_file.init_script.path +} + +output "mlflow_experiment_name" { + description = "MLflow experiment name for tracing" + value = var.mlflow_experiment_name +} + +output "setup_instructions" { + description = "Instructions for using the cluster" + value = <<-EOT + Cluster deployed successfully! + + 1. Access cluster: ${databricks_cluster.coding_assistants.url} + 2. Wait for cluster to start (init script runs automatically) + 3. Open a notebook or terminal + 4. Run: source ~/.bashrc + 5. Verify: check-claude + 6. Start using: claude "your question" + + MLflow Experiment: ${var.mlflow_experiment_name} + + Helper commands: + - check-claude: Verify installation status + - claude-debug: Show configuration details + - claude-refresh-token: Update authentication + - claude-tracing-enable: Enable MLflow tracing + - claude-tracing-status: Check tracing status + - claude-tracing-disable: Disable tracing + EOT +} diff --git a/examples/adb-coding-assistants-cluster/providers.tf b/examples/adb-coding-assistants-cluster/providers.tf new file mode 100644 index 00000000..4d7800f3 --- /dev/null +++ b/examples/adb-coding-assistants-cluster/providers.tf @@ -0,0 +1,91 @@ +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = ">=4.31.0" + } + databricks = { + source = "databricks/databricks" + version = ">=1.81.1" + } + external = { + source = "hashicorp/external" + version = ">=2.3.0" + } + } +} + +# Determine authentication approach based on variables provided +locals { + # Use profile-based auth if profile is specified + use_profile_auth = var.databricks_profile != null + + # For Azure resource ID approach + resource_regex = var.databricks_resource_id != null ? "(?i)subscriptions/(.+)/resourceGroups/(.+)/providers/Microsoft.Databricks/workspaces/(.+)" : "" + subscription_id_from_resource = var.databricks_resource_id != null ? regex(local.resource_regex, var.databricks_resource_id)[0] : null + resource_group = var.databricks_resource_id != null ? regex(local.resource_regex, var.databricks_resource_id)[1] : null + databricks_workspace_name = var.databricks_resource_id != null ? regex(local.resource_regex, var.databricks_resource_id)[2] : null +} + +# Get Azure subscription ID from Azure CLI or environment variable when not provided via resource ID +# This is needed for the Azure provider even when using profile-based Databricks auth +data "external" "azure_subscription" { + count = local.subscription_id_from_resource == null ? 1 : 0 + program = ["bash", "-c", "SUBSCRIPTION_ID=$(az account show --query id -o tsv 2>/dev/null || echo $${ARM_SUBSCRIPTION_ID:-}); echo \"{\\\"id\\\":\\\"$${SUBSCRIPTION_ID:-}\\\"}\""] +} + +locals { + # Use subscription ID from resource ID, or from Azure CLI/environment, or null (provider will try to auto-detect) + subscription_id = coalesce( + local.subscription_id_from_resource, + try(data.external.azure_subscription[0].result.id != "" ? data.external.azure_subscription[0].result.id : null, null) + ) +} + +# Data source to get current Azure client configuration (only for Azure resource ID approach) +data "azurerm_client_config" "current" { + count = local.use_profile_auth ? 0 : 1 +} + +# Data source to get the resource group (only for Azure resource ID approach) +data "azurerm_resource_group" "this" { + count = local.use_profile_auth ? 0 : 1 + name = local.resource_group +} + +# Configure the Azure Provider +# When using profile-based auth, subscription_id is not needed (provider will auto-detect if Azure CLI is configured) +# When using Azure resource ID approach, subscription_id is extracted from the resource ID +provider "azurerm" { + subscription_id = local.subscription_id + features {} + skip_provider_registration = local.use_profile_auth + + # Allow provider to work without explicit subscription_id when using profile auth + # It will attempt to auto-detect from Azure CLI or environment variables +} + +# Data source to get the Databricks workspace (only for Azure resource ID approach) +data "azurerm_databricks_workspace" "this" { + count = local.use_profile_auth ? 0 : 1 + name = local.databricks_workspace_name + resource_group_name = local.resource_group +} + +# Configure the Databricks Provider +# Two authentication approaches supported: +# +# 1. Profile-based (Recommended - Simple and cloud-agnostic): +# Set databricks_profile variable to your ~/.databrickscfg profile name +# Example: databricks_profile = "dok" +# +# 2. Azure resource ID (Azure-specific): +# Set databricks_resource_id to your Azure Databricks workspace resource ID +# Requires Azure CLI authentication (az login) +# +# See: https://docs.databricks.com/dev-tools/auth/unified-auth.html +provider "databricks" { + profile = var.databricks_profile + host = local.use_profile_auth ? null : data.azurerm_databricks_workspace.this[0].workspace_url +} + diff --git a/examples/adb-coding-assistants-cluster/scripts/README.md b/examples/adb-coding-assistants-cluster/scripts/README.md new file mode 100644 index 00000000..d4ddcecd --- /dev/null +++ b/examples/adb-coding-assistants-cluster/scripts/README.md @@ -0,0 +1,358 @@ +# Claude Code CLI Installation Scripts + +This directory contains installation scripts for Claude Code CLI on Databricks clusters. + +## Scripts Overview + +| Script | Purpose | Network Required | +|--------|---------|------------------| +| `install-claude.sh` | Full online installation with MLflow tracing | Yes | +| `install-claude-minimal.sh` | Minimal installation (Claude CLI only) | Yes | +| `vscode-setup.sh` | VS Code/Cursor Remote SSH helper | No | +| `check-network-deps.sh` | Network connectivity preflight check | Yes | + +> **Note**: For offline/air-gapped installations, use the separate [`adb-coding-assistants-cluster-offline`](../adb-coding-assistants-cluster-offline/README.md) module. + +## Quick Start + +### Online Installation (Default) + +For clusters with internet access: + +```hcl +resource "databricks_cluster" "claude_cluster" { + cluster_name = "claude-coding-assistant" + spark_version = data.databricks_spark_version.latest_lts.id + node_type_id = "Standard_D8pds_v6" + autotermination_minutes = 60 + num_workers = 0 + + init_scripts { + dbfs { + destination = "dbfs:/init-scripts/install-claude.sh" + } + } +} +``` + + +## What Gets Installed + +The script installs: + +- ✅ **Node.js 20.x** - Required runtime for Claude CLI +- ✅ **Claude Code CLI** - AI coding assistant +- ✅ **MLflow** - For tracing Claude interactions +- ✅ **System tools** - curl, git, jq +- ✅ **Bash helpers** - Convenience functions for using Claude + +## Helper Commands + +After installation, these commands are available: + +```bash +# Verify installation +check-claude + +# Show debug info +claude-debug + +# Refresh authentication +claude-refresh-token + +# Token management +claude-token-status # Check token freshness +claude-setup-token-refresh # Enable automatic hourly refresh +claude-remove-token-refresh # Disable automatic refresh + +# Enable MLflow tracing +claude-tracing-enable + +# Check tracing status +claude-tracing-status + +# Disable tracing +claude-tracing-disable + +# VS Code/Cursor Remote SSH helpers +claude-vscode-setup # Show setup guide +claude-vscode-env # Get Python virtual environment path +claude-vscode-check # Verify VS Code/Cursor setup +claude-vscode-config # Generate VS Code settings.json snippet +``` + +## VS Code/Cursor Remote SSH Setup + +For remote development using VS Code or Cursor, follow these steps: + +### Quick Setup + +1. **Get Python interpreter path** (after SSH connection): + ```bash + claude-vscode-env + # Or manually: echo $DATABRICKS_VIRTUAL_ENV + ``` + +2. **Show complete setup guide**: + ```bash + claude-vscode-setup + ``` + +3. **Generate VS Code settings**: + ```bash + claude-vscode-config + ``` + +### Detailed Steps + +#### 1. Install Remote SSH Extension + +- **VS Code**: Install "Remote - SSH" extension from marketplace +- **Cursor**: Built-in Remote SSH extension (already included) + +#### 2. Configure Default Extensions + +Open Command Palette (`Cmd+Shift+P` / `Ctrl+Shift+P`): +- Type: `Remote-SSH: Settings` +- Or manually edit `settings.json`: + +```json +{ + "remote.SSH.defaultExtensions": [ + "ms-Python.python", + "ms-toolsai.jupyter" + ] +} +``` + +#### 3. Connect to Cluster + +- Command Palette → `Remote-SSH: Connect to Host` +- Enter your cluster SSH connection details + +#### 4. Select Python Interpreter + +After connecting: + +1. Run `claude-vscode-env` to get the Python path +2. Command Palette → `Python: Select Interpreter` +3. Enter or browse to: `/databricks/python*/pythonEnv-*/bin/python` + +**Important**: Always select the `pythonEnv-xxx` interpreter for full Databricks Runtime library access. + +#### 5. Verify Setup + +```bash +# Check setup status +claude-vscode-check + +# Test in a Python file +import pyspark +import pandas +import mlflow +print("Setup successful!") +``` + +### Important Notes + +- **IPYNB notebooks** and **`*.py` Databricks notebooks** have access to Databricks globals (`dbutils`, `spark`, etc.) +- **Regular Python `*.py` files** do NOT have access to Databricks globals +- Always select the `pythonEnv-xxx` interpreter for full Databricks Runtime library access + +### Standalone Helper Script + +A standalone helper script is also available: + +```bash +# Show setup guide +./scripts/vscode-setup.sh --guide + +# Get Python interpreter path +./scripts/vscode-setup.sh --env + +# Check current setup +./scripts/vscode-setup.sh --check + +# Generate settings.json +./scripts/vscode-setup.sh --settings +``` + +## Usage Examples + +```bash +# Interactive mode +claude + +# One-shot query +echo "Write a Python function to reverse a string" | claude --print + +# From file +claude < prompt.txt + +# With streaming +claude --stream < task.md +``` + +## Internet dependencies (online mode) + +The online installer requires access to: + +| Domain | Purpose | +|--------|---------| +| `claude.ai` | Claude CLI installer script | +| `storage.googleapis.com` | Claude CLI binaries (GCS bucket) | +| `deb.nodesource.com` | Node.js repository | +| `archive.ubuntu.com` | APT packages (x86_64) | +| `ports.ubuntu.com` | APT packages (ARM64) | +| `registry.npmjs.org` | NPM packages | +| `pypi.org` | Python package index | +| `files.pythonhosted.org` | Python package downloads | +| `raw.githubusercontent.com` | Databricks skills | +| `${DATABRICKS_HOST}` | Databricks API endpoints | + +> **Tip**: Run `./scripts/check-network-deps.sh` to verify all dependencies are accessible before installation. + +## Firewall configuration + +If using a firewall, allow HTTPS (443) and HTTP (80) to these domains, or use the offline installation method. + +## Environment Variables + +### Standard Variables (Set automatically by Databricks) + +- `DATABRICKS_HOST` - Workspace URL +- `DATABRICKS_TOKEN` - Authentication token + +### Optional Configuration + +- `MLFLOW_EXPERIMENT_NAME` - Custom experiment name (default: `/Workspace/Shared/claude-code-tracing`) + +## Architecture Support + +The installer supports: + +- ✅ **amd64** (x86_64) - Default +- ✅ **arm64** (aarch64) - Auto-detected + +## Network dependency checker + +Before installation, you can verify that all required domains are accessible using the network dependency checker: + +```bash +# Standard check +./scripts/check-network-deps.sh + +# Detailed output with HTTP status codes +./scripts/check-network-deps.sh --verbose +``` + +Example output: +``` +=== Claude Code Network Dependency Check === + +Checking required domains... + +[OK] claude.ai +[OK] storage.googleapis.com +[OK] deb.nodesource.com +[OK] archive.ubuntu.com +[OK] ports.ubuntu.com +[OK] registry.npmjs.org +[OK] pypi.org +[OK] files.pythonhosted.org +[OK] raw.githubusercontent.com + +---------------------------------------- +Result: 9/9 dependencies reachable + +SUCCESS: All dependencies are accessible +``` + +If any dependencies fail, the script provides troubleshooting guidance: +``` +[OK] claude.ai +[FAIL] deb.nodesource.com - Connection timed out +... +Result: 8/9 dependencies reachable + +FAILED: Some dependencies are not accessible + +Troubleshooting tips: + - Check firewall rules allow HTTPS (443) to the failed domains + - Verify proxy settings if behind a corporate proxy + - For air-gapped environments, use the offline installation module +``` + +## Troubleshooting + +### Installation fails during cluster startup + +Check the init script logs: +```bash +cat /tmp/init-script-claude.log +``` + +### Claude command not found + +Reload bashrc: +```bash +source ~/.bashrc +``` + +### Authentication errors + +Refresh token: +```bash +claude-refresh-token +``` + +### Installation works but Claude fails + +Check configuration: +```bash +check-claude +claude-debug +``` + +## File structure + +``` +scripts/ +├── install-claude.sh # Full online installer with MLflow +├── install-claude-minimal.sh # Minimal installer (Claude CLI only) +├── vscode-setup.sh # VS Code/Cursor Remote SSH helper +├── check-network-deps.sh # Network dependency checker +└── README.md # This file +``` + +> **Offline Installation**: See the [`adb-coding-assistants-cluster-offline`](../adb-coding-assistants-cluster-offline/README.md) module for offline/air-gapped installation support. + +## Version Compatibility + +- **Databricks Runtime**: 13.0+ LTS recommended +- **Python**: 3.9+ (included in DBR) +- **Node.js**: 20.x (installed by script) +- **MLflow**: 3.4+ (installed by script) + +## Security Notes + +### Authentication +- Uses Databricks personal access tokens (auto-configured) +- Tokens are ephemeral and cluster-scoped +- No long-lived credentials stored + +### Network Security +- All traffic uses HTTPS +- Authentication via `ANTHROPIC_AUTH_TOKEN` environment variable +- Custom headers for Databricks integration + + +## Support + +- **Claude CLI Issues**: [Claude AI Documentation](https://claude.ai/docs) +- **Databricks Issues**: Contact Databricks Support +- **Script Issues**: Open issue in repository + +## License + +See repository LICENSE file. diff --git a/examples/adb-coding-assistants-cluster/scripts/check-network-deps.sh b/examples/adb-coding-assistants-cluster/scripts/check-network-deps.sh new file mode 100755 index 00000000..3ef4bec2 --- /dev/null +++ b/examples/adb-coding-assistants-cluster/scripts/check-network-deps.sh @@ -0,0 +1,238 @@ +#!/bin/bash +# +# Network Dependency Checker for Claude Code Installation +# +# Verifies connectivity to all required domains before running install-claude.sh. +# Run this script to diagnose network/firewall issues in restricted environments. +# +# Usage: +# ./check-network-deps.sh # Standard check +# ./check-network-deps.sh --verbose # Detailed output +# + +set -euo pipefail + +# ============================================================================ +# Configuration +# ============================================================================ + +CONNECT_TIMEOUT=5 +VERBOSE=false + +# Color codes (disabled if not a terminal) +if [[ -t 1 ]]; then + GREEN='\033[0;32m' + RED='\033[0;31m' + YELLOW='\033[0;33m' + BOLD='\033[1m' + NC='\033[0m' # No Color +else + GREEN='' + RED='' + YELLOW='' + BOLD='' + NC='' +fi + +# Dependencies to check: "domain|purpose|test_url" +DEPENDENCIES=( + "claude.ai|CLI installer script|https://claude.ai/install.sh" + "storage.googleapis.com|Claude CLI binaries|https://storage.googleapis.com/" + "deb.nodesource.com|Node.js repo|https://deb.nodesource.com/setup_20.x" + "archive.ubuntu.com|APT packages (x86)|http://archive.ubuntu.com/ubuntu/" + "ports.ubuntu.com|APT packages (ARM)|http://ports.ubuntu.com/ubuntu-ports/" + "registry.npmjs.org|NPM packages|https://registry.npmjs.org/" + "pypi.org|Python packages|https://pypi.org/simple/mlflow/" + "files.pythonhosted.org|Package downloads|https://files.pythonhosted.org/" + "raw.githubusercontent.com|Databricks skills|https://raw.githubusercontent.com/databricks-solutions/ai-dev-kit/main/README.md" +) + +# ============================================================================ +# Functions +# ============================================================================ + +usage() { + cat </dev/null; then + host "$domain" &>/dev/null + elif command -v nslookup &>/dev/null; then + nslookup "$domain" &>/dev/null + elif command -v getent &>/dev/null; then + getent hosts "$domain" &>/dev/null + else + # Fall back to ping for DNS resolution + ping -c 1 -W 2 "$domain" &>/dev/null + fi +} + +check_url() { + local url=$1 + local http_code + + http_code=$(curl -s -o /dev/null -w "%{http_code}" \ + --connect-timeout "$CONNECT_TIMEOUT" \ + --max-time $((CONNECT_TIMEOUT * 2)) \ + -L "$url" 2>/dev/null || echo "000") + + echo "$http_code" +} + +check_dependency() { + local entry=$1 + local domain purpose test_url + + IFS='|' read -r domain purpose test_url <<< "$entry" + + # Check DNS first + if ! check_dns "$domain"; then + log_fail "$domain - DNS resolution failed" + log_verbose "Purpose: $purpose" + log_verbose "Test URL: $test_url" + return 1 + fi + + # Check HTTP connectivity + local http_code + http_code=$(check_url "$test_url") + + if [[ "$http_code" =~ ^(2[0-9]{2}|3[0-9]{2})$ ]]; then + log_ok "$domain" + log_verbose "Purpose: $purpose" + log_verbose "HTTP status: $http_code" + log_verbose "Test URL: $test_url" + return 0 + else + case "$http_code" in + 000) + log_fail "$domain - Connection timed out" + ;; + 400) + # 400 is common for API endpoints at root - domain is reachable + log_ok "$domain" + log_verbose "Purpose: $purpose" + log_verbose "HTTP status: $http_code (API endpoint - root returns 400)" + log_verbose "Test URL: $test_url" + return 0 + ;; + 403) + log_fail "$domain - Access forbidden (HTTP 403)" + ;; + 404) + # 404 means domain is reachable, just URL changed + log_ok "$domain" + log_verbose "Purpose: $purpose" + log_verbose "HTTP status: $http_code (domain reachable)" + log_verbose "Test URL: $test_url" + return 0 + ;; + *) + log_fail "$domain - HTTP $http_code" + ;; + esac + log_verbose "Purpose: $purpose" + log_verbose "Test URL: $test_url" + return 1 + fi +} + +# ============================================================================ +# Main +# ============================================================================ + +# Parse arguments +while [[ $# -gt 0 ]]; do + case $1 in + --verbose|-v) + VERBOSE=true + shift + ;; + --help|-h) + usage + exit 0 + ;; + *) + echo "Unknown option: $1" + usage + exit 1 + ;; + esac +done + +# Check for curl +if ! command -v curl &>/dev/null; then + echo "Error: curl is required but not installed" + exit 1 +fi + +echo "" +echo -e "${BOLD}=== Claude Code Network Dependency Check ===${NC}" +echo "" +echo "Checking required domains..." +echo "" + +pass_count=0 +fail_count=0 +total=${#DEPENDENCIES[@]} + +for dep in "${DEPENDENCIES[@]}"; do + if check_dependency "$dep"; then + ((pass_count++)) + else + ((fail_count++)) + fi +done + +echo "" +echo "----------------------------------------" +echo -e "Result: ${BOLD}${pass_count}/${total}${NC} dependencies reachable" + +if [[ $fail_count -gt 0 ]]; then + echo "" + echo -e "${RED}FAILED: Some dependencies are not accessible${NC}" + echo "" + echo "Troubleshooting tips:" + echo " - Check firewall rules allow HTTPS (443) to the failed domains" + echo " - Verify proxy settings if behind a corporate proxy" + echo " - For air-gapped environments, use the offline installation module" + echo "" + exit 1 +else + echo "" + echo -e "${GREEN}SUCCESS: All dependencies are accessible${NC}" + echo "" + exit 0 +fi diff --git a/examples/adb-coding-assistants-cluster/scripts/install-claude-minimal.sh b/examples/adb-coding-assistants-cluster/scripts/install-claude-minimal.sh new file mode 100755 index 00000000..3a365444 --- /dev/null +++ b/examples/adb-coding-assistants-cluster/scripts/install-claude-minimal.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# +# Databricks Cluster Init Script - Claude Code CLI (Minimal Version) +# Installs Claude Code CLI with basic configuration only +# + +set -euo pipefail +export DEBIAN_FRONTEND=noninteractive + +LOG_FILE="/tmp/init-script-claude.log" +log() { + echo "[$(date '+%H:%M:%S')] $1" | tee -a "$LOG_FILE" +} + +# Install system dependencies +log "Installing system dependencies..." +sudo apt-get update -qq -y >> "$LOG_FILE" 2>&1 +sudo apt-get install -y -qq curl git >> "$LOG_FILE" 2>&1 || log "Warning: Some packages failed to install" + +# Install Node.js 20.x +if ! command -v node >/dev/null 2>&1; then + log "Installing Node.js 20.x..." + curl -fsSL --max-time 300 --retry 3 https://deb.nodesource.com/setup_20.x | sudo -E bash - >> "$LOG_FILE" 2>&1 + sudo apt-get install -y -qq nodejs >> "$LOG_FILE" 2>&1 + log "Node.js installed: $(node --version)" +else + log "Node.js already installed: $(node --version)" +fi + +# Install Claude Code CLI +# Note: Uses official Anthropic installer. For supply-chain verification, +# consider npm install @anthropic-ai/claude-code instead. +if ! command -v claude >/dev/null 2>&1; then + log "Installing Claude Code CLI..." + curl -fsSL https://claude.ai/install.sh | bash >> "$LOG_FILE" 2>&1 + log "Claude Code CLI installed" +else + log "Claude Code CLI already installed" +fi + +# Add basic configuration to bashrc +log "Configuring bashrc..." + +# Remove old Claude section if it exists +if [ -f "$HOME/.bashrc" ]; then + sed -i '/### CLAUDE_CODE_MINIMAL_START ###/,/### CLAUDE_CODE_MINIMAL_END ###/d' "$HOME/.bashrc" || true +fi + +# Add Claude to PATH and set environment variables +cat >> "$HOME/.bashrc" <<'BASHRC_EOF' + +### CLAUDE_CODE_MINIMAL_START ### +# Claude Code CLI - Minimal Setup +export PATH="$HOME/.claude/bin:$HOME/.local/bin:$PATH" + +# Set Anthropic environment variables for Claude CLI +if [ -n "$DATABRICKS_TOKEN" ] && [ -n "$DATABRICKS_HOST" ]; then + export ANTHROPIC_AUTH_TOKEN="$DATABRICKS_TOKEN" + export ANTHROPIC_BASE_URL="${DATABRICKS_HOST}/serving-endpoints/anthropic" + export ANTHROPIC_MODEL="databricks-claude-sonnet-4-5" + export ANTHROPIC_CUSTOM_HEADERS="x-databricks-disable-beta-headers: true" + export CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 +fi +### CLAUDE_CODE_MINIMAL_END ### +BASHRC_EOF + +log "Configuration complete. Log file: $LOG_FILE" +log "After cluster starts, run: source ~/.bashrc" diff --git a/examples/adb-coding-assistants-cluster/scripts/install-claude.sh b/examples/adb-coding-assistants-cluster/scripts/install-claude.sh new file mode 100755 index 00000000..26f12eb1 --- /dev/null +++ b/examples/adb-coding-assistants-cluster/scripts/install-claude.sh @@ -0,0 +1,760 @@ +#!/bin/bash +# +# Databricks Cluster Init Script - Claude Code CLI +# Installs Claude Code CLI with MLflow tracing +# +# Note: For offline/air-gapped installations, use the adb-coding-assistants-cluster-offline module instead +# + +set -uo pipefail +export DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a CI=true + +L="/tmp/init-script-claude.log" +log() { echo "[$(date '+%H:%M:%S')] $1" | tee -a "$L"; } +cmd_exists() { command -v "$1" >/dev/null 2>&1; } + +# Install Claude Code CLI +install_claude() { + if cmd_exists claude; then + log "[OK] Claude Code already installed" + return 0 + fi + + log "Installing Claude Code CLI..." + if curl -fsSL https://claude.ai/install.sh | bash &>>$L; then + log "[OK] Claude Code installation completed" + return 0 + else + log "[WARN] Claude Code installation failed (will be available after manual install)" + return 1 + fi +} + +# Install Node.js (required for Claude Code CLI) +install_nodejs() { + if cmd_exists node && cmd_exists npm; then + log "[OK] Node.js already installed ($(node --version))" + return 0 + fi + + log "Installing Node.js 20.x..." + if curl -fsSL --max-time 300 --retry 3 https://deb.nodesource.com/setup_20.x | sudo -E bash - &>>$L; then + if sudo apt-get update -qq -y &>>$L && sudo apt-get install -y -qq nodejs &>>$L; then + if cmd_exists node && cmd_exists npm; then + log "[OK] Node.js/npm installed successfully ($(node --version))" + return 0 + fi + fi + fi + + log "[WARN] Node.js installation failed (Claude Code CLI will not work)" + return 1 +} + +# Add helper functions to bashrc +setup_bashrc() { + local START_MARKER="### CLAUDE_CODE_HELPERS_START ###" + local END_MARKER="### CLAUDE_CODE_HELPERS_END ###" + + # Backup bashrc + [ -f "$HOME/.bashrc" ] && cp "$HOME/.bashrc" "$HOME/.bashrc.backup-$(date +%s)" + + # Remove any existing Claude sections (between markers) + if [ -f "$HOME/.bashrc" ]; then + if grep -q "$START_MARKER" "$HOME/.bashrc" 2>/dev/null; then + log "Removing old bashrc helpers..." + # Remove everything between START and END markers (inclusive) + sed -i "/$START_MARKER/,/$END_MARKER/d" "$HOME/.bashrc" + fi + fi + + W="${DATABRICKS_HOST:-}" + E="${MLFLOW_EXPERIMENT_NAME:-/Workspace/Shared/claude-code-tracing}" + + log "Adding helpers to bashrc..." + + cat >> "$HOME/.bashrc" <<'EOF' + +### CLAUDE_CODE_HELPERS_START ### +# Claude Code CLI Setup (auto-generated - do not edit manually) +export PATH="$HOME/.claude/bin:$HOME/.local/bin:$PATH" + +# Claude Code MLflow tracing helpers +export DATABRICKS_HOST="${DATABRICKS_HOST:-WS_PH}" +export MLFLOW_EXPERIMENT_NAME="${MLFLOW_EXPERIMENT_NAME:-EXP_PH}" + +# Set Anthropic environment variables for Claude CLI +# NOTE: These env vars are the PRIMARY authentication method and take precedence +# over settings.json. They are always fresh because they're set on every login. +# The settings.json file serves as a fallback for cases where env vars aren't set. +# Using ANTHROPIC_AUTH_TOKEN only (not ANTHROPIC_API_KEY) to avoid auth conflicts. +if [ -n "$DATABRICKS_TOKEN" ] && [ -n "$DATABRICKS_HOST" ]; then + export ANTHROPIC_AUTH_TOKEN="$DATABRICKS_TOKEN" + export ANTHROPIC_BASE_URL="${DATABRICKS_HOST}/serving-endpoints/anthropic" + export ANTHROPIC_MODEL="databricks-claude-sonnet-4-5" + export ANTHROPIC_CUSTOM_HEADERS="x-databricks-disable-beta-headers: true" + export CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 +fi + +# Internal function to generate Claude settings (single source of truth) +_generate_claude_config() { + local config_file="$HOME/.claude/settings.json" + + cat > "$config_file" </dev/null 2>&1; then + if ! jq empty "$config_file" 2>/dev/null; then + echo "[WARN] Claude settings JSON validation failed" >&2 + return 1 + fi + fi + + # Store token hash for change detection + if [ -n "$DATABRICKS_TOKEN" ]; then + echo -n "$DATABRICKS_TOKEN" | sha256sum | cut -d' ' -f1 > "$HOME/.claude/.token_hash" 2>/dev/null || true + fi + + return 0 +} + +# Check if token has changed and refresh if needed +_check_and_refresh_token() { + if [ -z "$DATABRICKS_TOKEN" ] || [ -z "$DATABRICKS_HOST" ]; then + return 0 # Skip if token not available + fi + + local config_file="$HOME/.claude/settings.json" + local token_hash_file="$HOME/.claude/.token_hash" + + # Calculate current token hash + local current_hash + current_hash=$(echo -n "$DATABRICKS_TOKEN" | sha256sum | cut -d' ' -f1 2>/dev/null || echo "") + + if [ -z "$current_hash" ]; then + return 0 # Skip if hash calculation failed + fi + + # Check if token has changed + if [ -f "$token_hash_file" ]; then + local stored_hash + stored_hash=$(cat "$token_hash_file" 2>/dev/null || echo "") + if [ "$current_hash" = "$stored_hash" ]; then + return 0 # Token unchanged, no refresh needed + fi + fi + + # Token changed or first time - refresh config + mkdir -p "$HOME/.claude" + if _generate_claude_config >/dev/null 2>&1; then + # Only show message if in interactive shell (not cron) + if [ -t 0 ]; then + echo "[OK] Claude Code token refreshed automatically" + fi + return 0 + fi + + return 1 +} + +# Auto-generate Claude settings from environment on first login +# NOTE: settings.json acts as a FALLBACK - env vars (set above) are the primary method. +# This is only generated if the file doesn't exist, to provide authentication when +# env vars might not be present (e.g., in some non-standard shell environments). +if [ ! -f "$HOME/.claude/settings.json" ] && [ -n "$DATABRICKS_TOKEN" ] && [ -n "$DATABRICKS_HOST" ]; then + mkdir -p "$HOME/.claude" + if _generate_claude_config; then + echo "[OK] Claude Code settings.json created (fallback - env vars take precedence)" + else + echo "[WARN] Failed to generate Claude settings (run claude-refresh-token to retry)" + fi +fi + +# Auto-refresh token on shell login if it has changed +# This ensures settings.json stays in sync with DATABRICKS_TOKEN +if [ -n "$DATABRICKS_TOKEN" ] && [ -n "$DATABRICKS_HOST" ]; then + _check_and_refresh_token +fi + +# Regenerate Claude settings from current environment +claude-refresh-token() { + if [ -z "$DATABRICKS_TOKEN" ] || [ -z "$DATABRICKS_HOST" ]; then + echo "[WARN] DATABRICKS_TOKEN and DATABRICKS_HOST must be set" + echo " On Databricks clusters, these should be automatically available" + return 1 + fi + + mkdir -p "$HOME/.claude" + if _generate_claude_config; then + echo "[OK] Claude Code settings updated with:" + echo " DATABRICKS_HOST: $DATABRICKS_HOST" + echo " DATABRICKS_TOKEN: ${DATABRICKS_TOKEN:0:20}..." + else + echo "[WARN] Failed to update Claude settings" + return 1 + fi +} + +# Setup cron job for periodic token refresh (runs hourly) +claude-setup-token-refresh() { + local cron_file="$HOME/.claude/token-refresh-cron" + + # Create cron wrapper script + mkdir -p "$HOME/.claude" + cat > "$cron_file" <<'CRON_SCRIPT' +#!/bin/bash +# Auto-generated cron script for Claude token refresh +# This script is called by cron to refresh the Claude token periodically + +# Source bashrc to get functions +if [ -f "$HOME/.bashrc" ]; then + source "$HOME/.bashrc" >/dev/null 2>&1 +fi + +# Check and refresh token if needed +_check_and_refresh_token +CRON_SCRIPT + chmod +x "$cron_file" + + # Check if cron job already exists + if crontab -l 2>/dev/null | grep -q "token-refresh-cron"; then + echo "[OK] Token refresh cron job already configured" + return 0 + fi + + # Add cron job + (crontab -l 2>/dev/null; echo "0 * * * * $cron_file") | crontab - + if [ $? -eq 0 ]; then + echo "[OK] Token refresh cron job configured (runs hourly)" + echo " To remove: crontab -e" + else + echo "[WARN] Failed to setup cron job (may require cron service)" + return 1 + fi +} + +# Remove token refresh cron job +claude-remove-token-refresh() { + if crontab -l 2>/dev/null | grep -q "token-refresh-cron"; then + crontab -l 2>/dev/null | grep -v "token-refresh-cron" | crontab - + echo "[OK] Token refresh cron job removed" + else + echo "[INFO] No token refresh cron job found" + fi +} + +# Check token freshness status +claude-token-status() { + if [ -z "$DATABRICKS_TOKEN" ] || [ -z "$DATABRICKS_HOST" ]; then + echo "[WARN] DATABRICKS_TOKEN and DATABRICKS_HOST must be set" + return 1 + fi + + local config_file="$HOME/.claude/settings.json" + local token_hash_file="$HOME/.claude/.token_hash" + + echo "=== Claude Token Status ===" + echo "" + + # Check if config file exists + if [ -f "$config_file" ]; then + echo "[OK] Settings file: $config_file" + local file_age + file_age=$(stat -c %Y "$config_file" 2>/dev/null || stat -f %m "$config_file" 2>/dev/null || echo "0") + local current_time + current_time=$(date +%s) + local age_hours + age_hours=$(( (current_time - file_age) / 3600 )) + echo " Last updated: ${age_hours} hour(s) ago" + else + echo "[ERROR] Settings file: missing" + fi + + echo "" + + # Check token hash + if [ -f "$token_hash_file" ]; then + local current_hash + current_hash=$(echo -n "$DATABRICKS_TOKEN" | sha256sum | cut -d' ' -f1 2>/dev/null || echo "") + local stored_hash + stored_hash=$(cat "$token_hash_file" 2>/dev/null || echo "") + if [ "$current_hash" = "$stored_hash" ] && [ -n "$current_hash" ]; then + echo "[OK] Token: matches stored hash (up to date)" + else + echo "[WARN] Token: differs from stored hash (needs refresh)" + echo " Run: claude-refresh-token" + fi + else + echo "[INFO] Token hash: not stored (will be created on next refresh)" + fi + + echo "" + + # Check cron job + if crontab -l 2>/dev/null | grep -q "token-refresh-cron"; then + echo "[OK] Auto-refresh: enabled (hourly cron job)" + else + echo "[INFO] Auto-refresh: disabled" + echo " Enable with: claude-setup-token-refresh" + fi +} + +claude-tracing-enable() { + if [ -z "$DATABRICKS_TOKEN" ] || [ -z "$DATABRICKS_HOST" ]; then + echo "[WARN] DATABRICKS_TOKEN and DATABRICKS_HOST must be set" + echo " On Databricks clusters, these should be automatically available" + return 1 + fi + + if ! command -v mlflow >/dev/null 2>&1; then + echo "[WARN] MLflow is not installed" + return 1 + fi + + # Create experiment if it doesn't exist + python3 </dev/null 2>&1; then + echo "[OK] Claude Code CLI: $(which claude)" + claude --version 2>&1 | head -1 || echo " (version check failed)" + else + echo "[ERROR] Claude Code CLI: not found" + [ -f "$HOME/.claude/bin/claude" ] && echo " Binary exists at: $HOME/.claude/bin/claude" + [ -f "$HOME/.local/bin/claude" ] && echo " Binary exists at: $HOME/.local/bin/claude" + fi + echo "" + + # Check configs + echo "Configuration files:" + if [ -f "$HOME/.claude/settings.json" ]; then + echo " [OK] Claude settings: $HOME/.claude/settings.json" + echo " Preview: $(head -3 $HOME/.claude/settings.json | tail -1)" + else + echo " [ERROR] Claude settings: missing" + fi + echo "" + + # Check environment + echo "Environment variables:" + [ -n "$DATABRICKS_HOST" ] && echo " [OK] DATABRICKS_HOST: ${DATABRICKS_HOST}" || echo " [ERROR] DATABRICKS_HOST: not set" + [ -n "$DATABRICKS_TOKEN" ] && echo " [OK] DATABRICKS_TOKEN: ${DATABRICKS_TOKEN:0:20}..." || echo " [ERROR] DATABRICKS_TOKEN: not set" + [ -n "$ANTHROPIC_API_KEY" ] && echo " [OK] ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:0:20}..." || echo " [ERROR] ANTHROPIC_API_KEY: not set" + [ -n "$ANTHROPIC_AUTH_TOKEN" ] && echo " [OK] ANTHROPIC_AUTH_TOKEN: ${ANTHROPIC_AUTH_TOKEN:0:20}..." || echo " [ERROR] ANTHROPIC_AUTH_TOKEN: not set" + [ -n "$ANTHROPIC_BASE_URL" ] && echo " [OK] ANTHROPIC_BASE_URL: ${ANTHROPIC_BASE_URL}" || echo " [ERROR] ANTHROPIC_BASE_URL: not set" + [ -n "$ANTHROPIC_MODEL" ] && echo " [OK] ANTHROPIC_MODEL: ${ANTHROPIC_MODEL}" || echo " [ERROR] ANTHROPIC_MODEL: not set" + [ -n "$ANTHROPIC_CUSTOM_HEADERS" ] && echo " [OK] ANTHROPIC_CUSTOM_HEADERS: ${ANTHROPIC_CUSTOM_HEADERS}" || echo " [ERROR] ANTHROPIC_CUSTOM_HEADERS: not set" + echo "" + + # Check MLflow + if command -v mlflow >/dev/null 2>&1; then + echo "[OK] MLflow: $(mlflow --version 2>&1)" + else + echo "[ERROR] MLflow: not found" + fi + echo "" + + # Test Claude authentication + echo "Testing Claude CLI authentication:" + if command -v claude >/dev/null 2>&1; then + if [ -n "$ANTHROPIC_API_KEY" ] || [ -n "$ANTHROPIC_AUTH_TOKEN" ]; then + echo " [OK] Authentication configured via environment variables" + echo " Test with: echo 'what is 1+1?' | claude --print" + else + echo " [WARN] ANTHROPIC_API_KEY/ANTHROPIC_AUTH_TOKEN not set" + echo " Run: source ~/.bashrc" + fi + fi + echo "" + + # VS Code/Cursor Remote SSH info + echo "VS Code/Cursor Remote SSH:" + local venv_path + venv_path=$(claude-vscode-env 2>/dev/null) + if [ $? -eq 0 ] && [ -n "$venv_path" ]; then + echo " [OK] Python virtual environment: $venv_path" + echo " Run 'claude-vscode-setup' for setup instructions" + else + echo " [INFO] Run 'claude-vscode-setup' for Remote SSH setup guide" + fi + echo "" + + echo "Run 'source ~/.bashrc' if commands are still not found" +} + +claude-debug() { + echo "=== Claude CLI Debug Info ===" + echo "" + echo "Settings file:" + [ -f "$HOME/.claude/settings.json" ] && cat "$HOME/.claude/settings.json" || echo " Missing!" + echo "" + echo "Environment:" + env | grep -E "ANTHROPIC|DATABRICKS" || echo " No relevant env vars" + echo "" + echo "Claude config directory:" + ls -la "$HOME/.claude/" 2>/dev/null || echo " Directory doesn't exist" +} + +# VS Code/Cursor Remote SSH helpers +claude-vscode-env() { + # Show the Databricks virtual environment path for VS Code/Cursor + if [ -n "$DATABRICKS_VIRTUAL_ENV" ]; then + echo "$DATABRICKS_VIRTUAL_ENV" + else + # Try to find pythonEnv-* directories + local python_envs + python_envs=$(find /databricks/python* -maxdepth 1 -type d -name "pythonEnv-*" 2>/dev/null | head -1) + if [ -n "$python_envs" ]; then + echo "$python_envs" + else + echo "[WARN] DATABRICKS_VIRTUAL_ENV not set and pythonEnv-* not found" + echo " Try: echo \$DATABRICKS_VIRTUAL_ENV" + return 1 + fi + fi +} + +claude-vscode-setup() { + echo "=== VS Code/Cursor Remote SSH Setup Guide ===" + echo "" + echo "1. Install Remote SSH Extension" + echo " - VS Code: Install 'Remote - SSH' extension" + echo " - Cursor: Built-in Remote SSH extension (already included)" + echo "" + echo "2. Configure Default Extensions" + echo " Open Command Palette (Cmd+Shift+P / Ctrl+Shift+P):" + echo " -> Remote-SSH: Settings" + echo "" + echo " Or edit settings.json and add:" + echo "" + cat <<'VSCODE_SETTINGS' + "remote.SSH.defaultExtensions": [ + "ms-Python.python", + "ms-toolsai.jupyter" + ] +VSCODE_SETTINGS + echo "" + echo "3. Connect to Cluster" + echo " - Command Palette -> Remote-SSH: Connect to Host" + echo " - Enter your cluster SSH connection details" + echo "" + echo "4. Select Python Interpreter" + echo " After connecting, run this command to get the Python path:" + echo "" + echo " $ claude-vscode-env" + echo "" + local venv_path + venv_path=$(claude-vscode-env 2>/dev/null) + if [ $? -eq 0 ] && [ -n "$venv_path" ]; then + echo " Current virtual environment:" + echo " $venv_path" + echo "" + echo " Then in VS Code/Cursor:" + echo " - Command Palette -> Python: Select Interpreter" + echo " - Paste the path above or browse to it" + else + echo " Run 'echo \$DATABRICKS_VIRTUAL_ENV' to find the path" + fi + echo "" + echo "5. Important Notes" + echo " * IPYNB notebooks and *.py Databricks notebooks have access to" + echo " Databricks globals (dbutils, spark, etc.)" + echo " * Regular Python *.py files do NOT have access to Databricks globals" + echo " * Always select the pythonEnv-xxx interpreter for full Databricks" + echo " Runtime library access" + echo "" + echo "6. Verify Setup" + echo " Run: claude-vscode-check" +} + +claude-vscode-check() { + echo "=== VS Code/Cursor Remote SSH Setup Check ===" + echo "" + + # Check for virtual environment + local venv_path + venv_path=$(claude-vscode-env 2>/dev/null) + if [ $? -eq 0 ] && [ -n "$venv_path" ]; then + echo "[OK] Python Virtual Environment:" + echo " $venv_path" + if [ -d "$venv_path/bin" ]; then + echo " [OK] Virtual environment directory exists" + if [ -f "$venv_path/bin/python" ]; then + echo " [OK] Python executable found" + echo " Python version: $($venv_path/bin/python --version 2>&1 || echo 'unknown')" + else + echo " [WARN] Python executable not found" + fi + else + echo " [WARN] Virtual environment directory not found" + fi + else + echo "[ERROR] Python Virtual Environment: Not found" + echo " Run: echo \$DATABRICKS_VIRTUAL_ENV" + fi + echo "" + + # Check for Python + if command -v python3 >/dev/null 2>&1; then + echo "[OK] Python3 available: $(which python3)" + echo " Version: $(python3 --version 2>&1)" + else + echo "[ERROR] Python3 not found in PATH" + fi + echo "" + + # Check for Databricks runtime libraries + echo "Databricks Runtime Libraries:" + python3 <<'PYTHON_CHECK' +import sys +libraries = ['pyspark', 'pandas', 'numpy', 'mlflow', 'databricks'] +found = [] +missing = [] + +for lib in libraries: + try: + __import__(lib) + found.append(lib) + except ImportError: + missing.append(lib) + +if found: + print(f" [OK] Available: {', '.join(found)}") +if missing: + print(f" [WARN] Missing: {', '.join(missing)}") + +# Check for Databricks globals (only available in notebooks) +try: + import dbutils + print(" [OK] dbutils available (notebook context)") +except: + print(" [INFO] dbutils not available (normal for .py files)") +PYTHON_CHECK + + echo "" + echo "VS Code/Cursor Configuration:" + echo " Run 'claude-vscode-setup' for setup instructions" + echo " Run 'claude-vscode-env' to get Python interpreter path" +} + +claude-vscode-config() { + # Generate VS Code settings.json snippet + local venv_path + local venv_rc + venv_path=$(claude-vscode-env 2>/dev/null) + venv_rc=$? + + echo "=== VS Code/Cursor settings.json Configuration ===" + echo "" + echo "Add this to your VS Code/Cursor settings.json:" + echo "" + echo "{" + echo " \"remote.SSH.defaultExtensions\": [" + echo " \"ms-Python.python\"," + echo " \"ms-toolsai.jupyter\"" + echo " ]" + if [ $venv_rc -eq 0 ] && [ -n "$venv_path" ]; then + echo "," + echo " \"python.defaultInterpreterPath\": \"$venv_path/bin/python\"" + fi + echo "}" + echo "" + if [ $venv_rc -eq 0 ] && [ -n "$venv_path" ]; then + echo "Python interpreter path:" + echo " $venv_path/bin/python" + echo "" + echo "To set this in VS Code/Cursor:" + echo " 1. Command Palette -> Python: Select Interpreter" + echo " 2. Enter interpreter path: $venv_path/bin/python" + else + echo "To find Python interpreter path, run:" + echo " claude-vscode-env" + fi +} +### CLAUDE_CODE_HELPERS_END ### +EOF + + sed -i "s|WS_PH|$W|g; s|EXP_PH|$E|g" "$HOME/.bashrc" + log "[OK] Bashrc helpers added" + log " Experiment: $E" +} + +# Install Databricks skills for Claude Code +install_databricks_skills() { + local skills_dir="$HOME/.claude/skills" + local repo_url="https://raw.githubusercontent.com/databricks-solutions/ai-dev-kit/main/databricks-skills" + + # Core skills to install (curated list for most common use cases) + local core_skills=( + "databricks-config" + "databricks-python-sdk" + "databricks-unity-catalog" + "databricks-jobs" + "asset-bundles" + "databricks-app-python" + "model-serving" + "mlflow-evaluation" + "aibi-dashboards" + "spark-declarative-pipelines" + ) + + log "Installing Databricks skills for Claude Code..." + + # Create skills directory + mkdir -p "$skills_dir" + + local installed=0 + local failed=0 + + for skill in "${core_skills[@]}"; do + local skill_dir="$skills_dir/$skill" + + # Skip if already exists + if [ -d "$skill_dir" ] && [ -f "$skill_dir/SKILL.md" ]; then + log " [INFO] Skill '$skill' already installed" + installed=$((installed + 1)) + continue + fi + + # Create skill directory + mkdir -p "$skill_dir" + + # Download SKILL.md (required) + if curl -sSL -f "${repo_url}/${skill}/SKILL.md" -o "$skill_dir/SKILL.md" 2>>$L; then + log " [OK] Installed skill: $skill" + installed=$((installed + 1)) + else + log " [WARN] Failed to download skill: $skill" + rm -rf "$skill_dir" + failed=$((failed + 1)) + fi + done + + if [ $installed -gt 0 ]; then + log "[OK] Databricks skills installed: $installed skills" + [ $failed -gt 0 ] && log "[WARN] Failed to install: $failed skills" + return 0 + else + log "[WARN] No Databricks skills installed" + return 1 + fi +} + +# Main installation +main() { + log "Starting installation..." + + # Install system dependencies (curl, git, jq - commonly used by Claude Code) + log "Installing system dependencies..." + if sudo apt-get update -qq -y &>>$L; then + if sudo apt-get install -y -qq curl git jq &>>$L; then + log "[OK] System dependencies installed (curl, git, jq)" + else + log "[WARN] Some system dependencies failed to install" + fi + else + log "[WARN] apt-get update failed" + fi + + # Install MLflow with Databricks support + log "Installing MLflow with Databricks support..." + if python3 -m pip install --quiet --upgrade "mlflow[databricks]>=3.4" &>>$L; then + log "[OK] MLflow installed successfully" + else + log "[WARN] MLflow installation failed (tracing features will not work)" + fi + + # Install tools (continue even if some fail) + install_nodejs || log "[WARN] Node.js installation skipped or failed" + install_claude || log "[WARN] Claude Code installation skipped or failed" + + # Install Databricks skills for Claude Code + install_databricks_skills || log "[WARN] Databricks skills installation incomplete" + + # Configure tools + if setup_bashrc; then + log "[OK] Bashrc configuration completed" + else + log "[WARN] Bashrc configuration failed" + fi + + log "" + log "=== Installation Summary ===" + log "Installation complete. Full log: $L" + log "" + log "Installed components:" + log " - Claude Code CLI" + log " - Node.js runtime" + log " - MLflow with Databricks support" + log " - Databricks skills (patterns and best practices)" + log "" + log "Next steps (on cluster login):" + log " 1. Run: source ~/.bashrc" + log " 2. Verify: check-claude" + log " 3. Use: claude command" + log "" + log "Databricks skills installed in: ~/.claude/skills/" + log "Skills available: databricks-config, python-sdk, unity-catalog," + log " jobs, asset-bundles, apps, model-serving, mlflow, dashboards, pipelines" + log "" + log "Helper commands:" + log " - check-claude: Verify installation status" + log " - claude-debug: Show Claude CLI configuration details" + log " - claude-refresh-token: Regenerate Claude settings" + log " - claude-token-status: Check token freshness and auto-refresh status" + log " - claude-setup-token-refresh: Enable hourly automatic token refresh (optional)" + log " - claude-remove-token-refresh: Disable automatic token refresh" + log " - claude-tracing-enable/disable/status: Manage MLflow tracing" + log " - claude-vscode-setup: Show VS Code/Cursor Remote SSH setup guide" + log " - claude-vscode-env: Get Python virtual environment path" + log " - claude-vscode-check: Verify VS Code/Cursor setup" + log " - claude-vscode-config: Generate VS Code settings.json snippet" + return 0 +} + +main +exit 0 diff --git a/examples/adb-coding-assistants-cluster/scripts/vscode-setup.sh b/examples/adb-coding-assistants-cluster/scripts/vscode-setup.sh new file mode 100755 index 00000000..89b21ada --- /dev/null +++ b/examples/adb-coding-assistants-cluster/scripts/vscode-setup.sh @@ -0,0 +1,248 @@ +#!/bin/bash +# +# VS Code/Cursor Remote SSH Setup Helper for Databricks Clusters +# This script helps configure VS Code or Cursor for remote development on Databricks clusters +# + +set -euo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +log_info() { echo -e "${BLUE}ℹ${NC} $1"; } +log_success() { echo -e "${GREEN}✓${NC} $1"; } +log_warning() { echo -e "${YELLOW}⚠${NC} $1"; } +log_error() { echo -e "${RED}✗${NC} $1"; } + +# Find Databricks Python virtual environment +find_python_env() { + if [ -n "${DATABRICKS_VIRTUAL_ENV:-}" ]; then + echo "$DATABRICKS_VIRTUAL_ENV" + return 0 + fi + + # Try to find pythonEnv-* directories + local python_envs + python_envs=$(find /databricks/python* -maxdepth 1 -type d -name "pythonEnv-*" 2>/dev/null | head -1) + if [ -n "$python_envs" ]; then + echo "$python_envs" + return 0 + fi + + return 1 +} + +# Show setup instructions +show_setup_guide() { + echo "==========================================" + echo "VS Code/Cursor Remote SSH Setup Guide" + echo "==========================================" + echo "" + + echo "1. Install Remote SSH Extension" + echo " • VS Code: Install 'Remote - SSH' extension from marketplace" + echo " • Cursor: Built-in Remote SSH extension (already included)" + echo "" + + echo "2. Configure Default Extensions" + echo " Open Command Palette (Cmd+Shift+P / Ctrl+Shift+P):" + echo " → Type: Remote-SSH: Settings" + echo "" + echo " Or manually edit settings.json:" + echo "" + echo " {" + echo " \"remote.SSH.defaultExtensions\": [" + echo " \"ms-Python.python\"," + echo " \"ms-toolsai.jupyter\"" + echo " ]" + echo " }" + echo "" + + echo "3. Connect to Cluster" + echo " • Command Palette → Remote-SSH: Connect to Host" + echo " • Enter your cluster SSH connection details" + echo " • Format: user@hostname or use SSH config entry" + echo "" + + echo "4. Select Python Interpreter" + local venv_path + if venv_path=$(find_python_env 2>/dev/null); then + echo " ✓ Found Python virtual environment:" + echo " $venv_path" + echo "" + echo " In VS Code/Cursor:" + echo " • Command Palette → Python: Select Interpreter" + echo " • Enter interpreter path:" + echo " $venv_path/bin/python" + echo "" + echo " Or copy this path:" + echo " $venv_path/bin/python" + else + echo " ⚠ Could not auto-detect Python virtual environment" + echo " Run this command to find it:" + echo " echo \$DATABRICKS_VIRTUAL_ENV" + echo "" + echo " Then in VS Code/Cursor:" + echo " • Command Palette → Python: Select Interpreter" + echo " • Paste the path from above" + fi + echo "" + + echo "5. Important Notes" + echo " • IPYNB notebooks and *.py Databricks notebooks have access to" + echo " Databricks globals (dbutils, spark, etc.)" + echo " • Regular Python *.py files do NOT have access to Databricks globals" + echo " • Always select the pythonEnv-xxx interpreter for full Databricks" + echo " Runtime library access (pyspark, pandas, numpy, mlflow, etc.)" + echo "" + + echo "6. Verify Setup" + echo " After connecting, verify Python interpreter:" + echo " • Command Palette → Python: Select Interpreter" + echo " • Should show: pythonEnv-xxx/bin/python" + echo "" + echo " Test in a Python file:" + echo " import pyspark" + echo " import pandas" + echo " print('Setup successful!')" +} + +# Generate VS Code settings.json snippet +generate_settings() { + local venv_path + venv_path=$(find_python_env 2>/dev/null || echo "") + + echo "{" + echo " \"remote.SSH.defaultExtensions\": [" + echo " \"ms-Python.python\"," + echo " \"ms-toolsai.jupyter\"" + echo " ]" + if [ -n "$venv_path" ]; then + echo "," + echo " \"python.defaultInterpreterPath\": \"$venv_path/bin/python\"" + fi + echo "}" +} + +# Check current setup +check_setup() { + echo "==========================================" + echo "VS Code/Cursor Setup Check" + echo "==========================================" + echo "" + + # Check for virtual environment + local venv_path + if venv_path=$(find_python_env 2>/dev/null); then + log_success "Python Virtual Environment found:" + echo " $venv_path" + + if [ -d "$venv_path/bin" ]; then + log_success "Virtual environment directory exists" + if [ -f "$venv_path/bin/python" ]; then + log_success "Python executable found" + echo " Python version: $($venv_path/bin/python --version 2>&1 || echo 'unknown')" + else + log_warning "Python executable not found" + fi + else + log_warning "Virtual environment directory not found" + fi + else + log_error "Python Virtual Environment not found" + echo " Run: echo \$DATABRICKS_VIRTUAL_ENV" + fi + echo "" + + # Check for Python + if command -v python3 >/dev/null 2>&1; then + log_success "Python3 available: $(which python3)" + echo " Version: $(python3 --version 2>&1)" + else + log_error "Python3 not found in PATH" + fi + echo "" + + # Check for Databricks runtime libraries + echo "Databricks Runtime Libraries:" + python3 <<'PYTHON_CHECK' +import sys +libraries = ['pyspark', 'pandas', 'numpy', 'mlflow'] +found = [] +missing = [] + +for lib in libraries: + try: + __import__(lib) + found.append(lib) + except ImportError: + missing.append(lib) + +if found: + print(f" ✓ Available: {', '.join(found)}") +if missing: + print(f" ⚠ Missing: {', '.join(missing)}") + +# Check for Databricks globals (only available in notebooks) +try: + import dbutils + print(" ✓ dbutils available (notebook context)") +except: + print(" ℹ dbutils not available (normal for .py files)") +PYTHON_CHECK + + echo "" + echo "Next steps:" + echo " • Run this script with --guide to see setup instructions" + echo " • Run this script with --settings to generate settings.json" +} + +# Main +main() { + case "${1:-}" in + --guide|-g) + show_setup_guide + ;; + --settings|-s) + generate_settings + ;; + --check|-c) + check_setup + ;; + --env|-e) + find_python_env || { + log_error "Could not find Python virtual environment" + echo "Try: echo \$DATABRICKS_VIRTUAL_ENV" + exit 1 + } + ;; + --help|-h|"") + echo "VS Code/Cursor Remote SSH Setup Helper" + echo "" + echo "Usage: $0 [OPTION]" + echo "" + echo "Options:" + echo " --guide, -g Show complete setup guide" + echo " --settings, -s Generate VS Code settings.json snippet" + echo " --check, -c Check current setup status" + echo " --env, -e Show Python virtual environment path" + echo " --help, -h Show this help message" + echo "" + echo "Examples:" + echo " $0 --guide # Show setup instructions" + echo " $0 --env # Get Python interpreter path" + echo " $0 --check # Verify setup" + ;; + *) + log_error "Unknown option: $1" + echo "Run '$0 --help' for usage information" + exit 1 + ;; + esac +} + +main "$@" diff --git a/examples/adb-coding-assistants-cluster/terraform.tfvars.example b/examples/adb-coding-assistants-cluster/terraform.tfvars.example new file mode 100644 index 00000000..32296df2 --- /dev/null +++ b/examples/adb-coding-assistants-cluster/terraform.tfvars.example @@ -0,0 +1,88 @@ +# Example terraform.tfvars file for Claude Code CLI Cluster +# Copy this to terraform.tfvars and customize for your environment + +#============================================================================= +# AUTHENTICATION - Choose ONE approach +#============================================================================= + +# OPTION 1: Profile-based (Recommended - Simple and cloud-agnostic) +# Uses your ~/.databrickscfg profile +databricks_profile = "my-profile" # Replace with your profile name from ~/.databrickscfg + +# OPTION 2: Azure Resource ID (Azure-specific) +# Comment out databricks_profile above and uncomment below to use Azure resource ID +# databricks_resource_id = "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/my-rg/providers/Microsoft.Databricks/workspaces/my-workspace" + +#============================================================================= +# REQUIRED VARIABLES +#============================================================================= + +cluster_name = "claude-coding-assistant" +catalog_name = "main" + +#============================================================================= +# OPTIONAL VARIABLES +#============================================================================= + +# Unity Catalog configuration +schema_name = "default" +volume_name = "coding_assistants" + +# Cluster configuration +spark_version = "17.3.x-cpu-ml-scala2.13" +node_type_id = "Standard_D8pds_v6" # Azure: Standard_D8pds_v6 (8 vCPU, 32 GB RAM, Premium SSD + local NVMe). Fallback: Standard_DS13_v2 if unavailable in region +autotermination_minutes = 30 + +# Cluster mode options: +# - "SINGLE_NODE": Cost-effective for individual development (recommended) +# - "STANDARD": Multi-node for team environments +cluster_mode = "SINGLE_NODE" + +# Worker configuration (ignored if cluster_mode = "SINGLE_NODE") +num_workers = 0 # Set to null to enable autoscaling, or a specific number + +# Autoscaling configuration (only used if num_workers = null) +min_workers = 1 +max_workers = 3 + +# MLflow experiment for Claude Code tracing +mlflow_experiment_name = "/Workspace/Shared/claude-code-tracing" + +# Optional: Custom init script path (defaults to bundled script) +# init_script_source_path = "./custom-install-claude.sh" + +# Custom tags +tags = { + Environment = "development" + Purpose = "ai-coding" + Owner = "data-engineering" + CostCenter = "engineering" +} + +#============================================================================= +# CLOUD-SPECIFIC NODE TYPES REFERENCE +#============================================================================= + +# Azure VM Types (Premium SSD): +# Modern Dpdsv6-series (Cobalt 100 processor, Premium SSD + local NVMe): +# - Standard_D4pds_v6 (4 cores, 16 GB RAM) - Cost-effective +# - Standard_D8pds_v6 (8 cores, 32 GB RAM) - Recommended default (modern) +# - Standard_D16pds_v6 (16 cores, 64 GB RAM) - For larger workloads +# Note: Dpdsv6-series may have limited regional availability +# +# DS-series (Premium SSD, widely available): +# - Standard_DS3_v2 (4 cores, 14 GB RAM) - Cost-effective for development +# - Standard_DS4_v2 (8 cores, 28 GB RAM) - Good for medium workloads +# - Standard_DS13_v2 (8 cores, 56 GB RAM) - Good fallback if Dpdsv6 unavailable +# - Standard_DS5_v2 (16 cores, 56 GB RAM) - More CPU, same RAM as DS13_v2 +# - Standard_DS14_v2 (16 cores, 112 GB RAM) - For large-scale workloads +# +# AWS: +# - i3.xlarge (4 cores, 30.5 GB RAM) - Recommended for single-node +# - i3.2xlarge (8 cores, 61 GB RAM) - For larger workloads +# - r5.xlarge (4 cores, 32 GB RAM) - Memory-optimized +# +# GCP: +# - n1-highmem-4 (4 cores, 26 GB RAM) - Recommended for single-node +# - n1-highmem-8 (8 cores, 52 GB RAM) - For larger workloads +# - n2-standard-4 (4 cores, 16 GB RAM) - Cost-optimized diff --git a/examples/adb-coding-assistants-cluster/variables.tf b/examples/adb-coding-assistants-cluster/variables.tf new file mode 100644 index 00000000..4dc0f3ac --- /dev/null +++ b/examples/adb-coding-assistants-cluster/variables.tf @@ -0,0 +1,106 @@ +variable "databricks_profile" { + description = "Databricks CLI profile name from ~/.databrickscfg (recommended for simple, cloud-agnostic authentication). If set, databricks_resource_id is ignored." + type = string + default = null +} + +variable "databricks_resource_id" { + description = "The Azure resource ID for the Databricks workspace (Azure-specific approach). Format: /subscriptions/{subscription-id}/resourceGroups/{resource-group}/providers/Microsoft.Databricks/workspaces/{workspace-name}. Only used if databricks_profile is not set." + type = string + default = null + + validation { + condition = var.databricks_profile != null || var.databricks_resource_id != null + error_message = "Either databricks_profile or databricks_resource_id must be set. Recommended: use databricks_profile for simpler configuration." + } +} + +variable "cluster_name" { + description = "Name of the Databricks cluster" + type = string +} + +variable "catalog_name" { + description = "Unity Catalog name for the volume" + type = string +} + +variable "schema_name" { + description = "Schema name for the volume" + type = string + default = "default" +} + +variable "volume_name" { + description = "Volume name to store init scripts" + type = string + default = "coding_assistants" +} + +variable "init_script_source_path" { + description = "Local path to the init script" + type = string + default = null +} + +variable "spark_version" { + description = "Databricks Runtime version" + type = string + default = "17.3.x-cpu-ml-scala2.13" +} + +variable "node_type_id" { + description = "Node type for the cluster. Default is Standard_D8pds_v6 (modern, premium SSD + local NVMe). If unavailable in your region, consider Standard_DS13_v2 as fallback." + type = string + default = "Standard_D8pds_v6" +} + +variable "autotermination_minutes" { + description = "Minutes of inactivity before cluster auto-terminates" + type = number + default = 30 +} + +variable "num_workers" { + description = "Number of worker nodes (null for autoscaling)" + type = number + default = null +} + +variable "min_workers" { + description = "Minimum number of workers for autoscaling" + type = number + default = 1 +} + +variable "max_workers" { + description = "Maximum number of workers for autoscaling" + type = number + default = 3 +} + +variable "mlflow_experiment_name" { + description = "MLflow experiment name for Claude Code tracing" + type = string + default = "/Workspace/Shared/claude-code-tracing" +} + +variable "cluster_mode" { + description = "Cluster mode: STANDARD or SINGLE_NODE" + type = string + default = "STANDARD" + + validation { + condition = contains(["STANDARD", "SINGLE_NODE"], var.cluster_mode) + error_message = "cluster_mode must be either STANDARD or SINGLE_NODE" + } +} + +variable "tags" { + description = "Custom tags for the cluster" + type = map(string) + default = { + Environment = "dev" + Purpose = "coding-assistants" + } +} diff --git a/examples/adb-coding-assistants-cluster/versions.tf b/examples/adb-coding-assistants-cluster/versions.tf new file mode 100644 index 00000000..7117131f --- /dev/null +++ b/examples/adb-coding-assistants-cluster/versions.tf @@ -0,0 +1,3 @@ +terraform { + required_version = ">= 1.0" +}