diff --git a/docs/remote_simulation_workflow.md b/docs/remote_simulation_workflow.md new file mode 100644 index 0000000..ef32fc7 --- /dev/null +++ b/docs/remote_simulation_workflow.md @@ -0,0 +1,110 @@ +# Remote Simulation & Training Workflow + +This guide outlines how to develop, train, and test AlohaMini using a remote cloud GPU (e.g., Lambda Labs, AWS) for the heavy simulation, while controlling everything from your local laptop (e.g., MacBook). + +## Architecture + +* **Cloud Server (The "Lab")**: Runs NVIDIA Isaac Sim. Handles physics, rendering, and training. +* **Local Machine (The "Mission Control")**: Runs the Dashboard and Teleoperation scripts. Connects to the cloud via SSH. + +## Prerequisites + +1. **Cloud Instance**: A server with an NVIDIA RTX GPU (A10, A100, RTX 3090/4090). + * Recommended: Lambda Labs or Brev.dev (Ubuntu 20.04/22.04). + * Must have **NVIDIA Drivers** and **Isaac Sim** installed (or use the Isaac Sim Docker container). +2. **Local Machine**: Your laptop (Mac/Windows/Linux). +3. **SSH Access**: You must be able to SSH into the cloud instance. + +## Setup + +### 1. Cloud Server Setup +1. SSH into your cloud instance. +2. Clone this repository: + ```bash + git clone https://github.com/blankey1337/AlohaMini.git + cd AlohaMini + ``` +3. Ensure you are in the python environment that has access to Isaac Sim (often `./python.sh` in the Isaac Sim folder). + +### 2. Local Machine Setup +1. Clone this repository locally. +2. Install dependencies: + ```bash + pip install -r software/requirements.txt + ``` + +## The Workflow + +### Phase 1: Data Collection + +1. **Start the Simulation (Cloud)** + Run the simulation environment script. This listens on ports 5555 (Cmd) and 5556 (Obs). + ```bash + # On Cloud + isaac_sim_python software/examples/alohamini/isaac_sim/isaac_alohamini_env.py + ``` + +2. **Establish Connection (Local)** + Forward the ZMQ ports from the cloud to your localhost. + ```bash + # On Local Mac + ssh -L 5555:localhost:5555 -L 5556:localhost:5556 ubuntu@ + ``` + +3. **Launch Dashboard (Local)** + Start the web dashboard to see what the robot sees. + ```bash + # On Local Mac + python software/dashboard/app.py + ``` + Open `http://localhost:5001` in your browser. + +4. **Teleoperate & Record** + * Use the Dashboard to see the camera feed. + * Run the teleop script in another terminal to control the robot with your keyboard: + ```bash + python software/examples/alohamini/standalone_teleop.py --ip 127.0.0.1 + ``` + * **To Record**: Click the **"Start Recording"** button on the Dashboard. + * Perform the task (e.g., pick up the object). + * Click **"Stop Recording"**. + * Repeat 50-100 times. The data is saved to `AlohaMini/data_sim/` on the **Cloud Server**. + +### Phase 2: Training + +You train the model directly on the Cloud GPU where the data lives. + +1. **Stop the Simulation** (to free up GPU VRAM). +2. **Run Training**: + Use the LeRobot training script (or your custom training script) pointing to the generated dataset. + ```bash + # On Cloud + python software/src/lerobot/scripts/train.py \ + --dataset data_sim \ + --policy act \ + --batch_size 8 \ + --num_epochs 1000 + ``` + *Note: Exact training command depends on the LeRobot configuration.* + +3. **Output**: This produces a model file (e.g., `outputs/train/policy.safetensors`). + +### Phase 3: Evaluation + +Test the trained model in the simulator to see if it works. + +1. **Restart Simulation (Cloud)**: + ```bash + isaac_sim_python software/examples/alohamini/isaac_sim/isaac_alohamini_env.py + ``` +2. **Run Inference Node (Cloud or Local)**: + You need a script that loads the model and closes the loop (reads obs -> runs model -> sends action). + * *Coming Soon: `eval_sim.py` which loads the safetensor and drives the ZMQ robot.* + +3. **Watch (Local)**: + Use the Dashboard to watch the robot perform the task autonomously. + +## Troubleshooting + +* **Laggy Video**: ZMQ over SSH tunneling is usually fast enough for 640x480, but if it lags, check your internet connection speed to the cloud server. +* **"Address already in use"**: Ensure no other python scripts are using ports 5555/5556 on either machine. diff --git a/software/dashboard/app.py b/software/dashboard/app.py index 6466d28..8aa2175 100644 --- a/software/dashboard/app.py +++ b/software/dashboard/app.py @@ -5,7 +5,7 @@ import zmq import cv2 import numpy as np -from flask import Flask, render_template, Response, jsonify +from flask import Flask, render_template, Response, jsonify, request app = Flask(__name__) @@ -13,15 +13,24 @@ latest_observation = {} lock = threading.Lock() connected = False +recording = False +cmd_socket = None -def zmq_worker(ip='127.0.0.1', port=5556): - global latest_observation, connected +def zmq_worker(ip='127.0.0.1', port=5556, cmd_port=5555): + global latest_observation, connected, cmd_socket context = zmq.Context() + + # Sub Socket socket = context.socket(zmq.SUB) socket.setsockopt(zmq.SUBSCRIBE, b"") socket.connect(f"tcp://{ip}:{port}") socket.setsockopt(zmq.CONFLATE, 1) + # Cmd Socket (Push to Sim) + cmd_socket = context.socket(zmq.PUSH) + cmd_socket.setsockopt(zmq.CONFLATE, 1) + cmd_socket.connect(f"tcp://{ip}:{cmd_port}") + print(f"Connecting to ZMQ Stream at {ip}:{port}...") while True: @@ -93,6 +102,33 @@ def video_feed(camera_name): return Response(generate_frames(camera_name), mimetype='multipart/x-mixed-replace; boundary=frame') +@app.route('/api/command', methods=['POST']) +def send_command(): + global cmd_socket + if not request.json or 'command' not in request.json: + return jsonify({'error': 'No command provided'}), 400 + + cmd = request.json['command'] + print(f"Received command: {cmd}") + + # Example handling + if cmd == 'reset_sim': + # Send reset command (Isaac Sim needs to handle this logic) + # For now, we can just zero out velocities or send a special flag + if cmd_socket: + cmd_socket.send_string(json.dumps({"reset": True})) + + elif cmd == 'start_recording': + # Trigger recording logic + if cmd_socket: + cmd_socket.send_string(json.dumps({"start_recording": True})) + + elif cmd == 'stop_recording': + if cmd_socket: + cmd_socket.send_string(json.dumps({"stop_recording": True})) + + return jsonify({'status': 'ok'}) + @app.route('/api/status') def get_status(): with lock: diff --git a/software/dashboard/templates/index.html b/software/dashboard/templates/index.html index 28130c8..49d8ed2 100644 --- a/software/dashboard/templates/index.html +++ b/software/dashboard/templates/index.html @@ -11,14 +11,27 @@ .camera-box img { max-width: 100%; height: auto; display: block; } .camera-title { text-align: center; font-size: 0.9em; margin-bottom: 5px; } .status-panel { flex: 1; background: #333; padding: 15px; border-radius: 5px; min-width: 300px; } + .controls-panel { width: 100%; background: #333; padding: 15px; border-radius: 5px; margin-top: 20px; } table { width: 100%; border-collapse: collapse; } td, th { padding: 5px; border-bottom: 1px solid #444; font-size: 0.9em; } th { text-align: left; color: #aaa; } .value { font-family: monospace; color: #4f4; } + .btn { padding: 10px 20px; font-size: 16px; margin-right: 10px; cursor: pointer; background: #555; color: white; border: none; border-radius: 4px; } + .btn:hover { background: #777; } + .btn-record { background: #c00; } + .btn-record:hover { background: #e00; } + .status-indicator { display: inline-block; width: 10px; height: 10px; border-radius: 50%; margin-right: 5px; } + .status-active { background: #0f0; } + .status-inactive { background: #555; } -

AlohaMini Dashboard

+
+

AlohaMini Dashboard

+
+ Disconnected +
+
@@ -33,14 +46,63 @@

Robot State

+
+

Simulation Controls

+

Control the remote simulation directly from here.

+ + + +
+