From 31edc4cc0c7c086152be566f14619ad8afefb36b Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-10-232-15-88.ec2.internal>
Date: Mon, 25 Mar 2024 19:30:50 +0000
Subject: [PATCH 1/2] visualize zed depths, quick and dirty

---
 .../recording_readers/svo_reader.py           |  18 ++++
 .../wrappers/recorded_multi_camera_wrapper.py |   2 +-
 scripts/visualizations/visualize_zed_depth.py | 101 ++++++++++++++++++
 3 files changed, 120 insertions(+), 1 deletion(-)
 create mode 100644 scripts/visualizations/visualize_zed_depth.py

diff --git a/droid/camera_utils/recording_readers/svo_reader.py b/droid/camera_utils/recording_readers/svo_reader.py
index becaed0..311b756 100644
--- a/droid/camera_utils/recording_readers/svo_reader.py
+++ b/droid/camera_utils/recording_readers/svo_reader.py
@@ -69,6 +69,24 @@ def get_frame_resolution(self):
         height = camera_info.resolution.height
         return (width, height)
 
+	def get_camera_intrinsics(self):
+		calib_params = self._cam.get_camera_information().camera_configuration.calibration_parameters
+		return {
+			self.serial_number + "_left": self._process_intrinsics(calib_params.left_cam),
+			self.serial_number + "_right": self._process_intrinsics(calib_params.right_cam),
+		}
+
+	def get_camera_baseline(self):
+		# Convert baseline to meters and return
+		return 0.001 * self._cam.get_camera_information().camera_configuration.calibration_parameters.get_camera_baseline()
+
+	### Calibration Utilities ###
+	def _process_intrinsics(self, params):
+		intrinsics = {}
+		intrinsics["cameraMatrix"] = np.array([[params.fx, 0, params.cx], [0, params.fy, params.cy], [0, 0, 1]])
+		intrinsics["distCoeffs"] = np.array(list(params.disto))
+		return intrinsics
+
     def get_frame_count(self):
         if self.skip_reading:
             return 0
diff --git a/droid/camera_utils/wrappers/recorded_multi_camera_wrapper.py b/droid/camera_utils/wrappers/recorded_multi_camera_wrapper.py
index 87e23b1..41e5968 100644
--- a/droid/camera_utils/wrappers/recorded_multi_camera_wrapper.py
+++ b/droid/camera_utils/wrappers/recorded_multi_camera_wrapper.py
@@ -42,7 +42,7 @@ def read_cameras(self, index=None, camera_type_dict={}, timestamp_dict={}):
         for cam_id in all_cam_ids:
             cam_type = camera_type_dict[cam_id]
             curr_cam_kwargs = self.camera_kwargs.get(cam_type, {})
-            self.camera_dict[cam_id].set_reading_parameters(**curr_cam_kwargs)
+            self.camera_dict[cam_id].set_reading_parameters(**curr_cam_kwargs, depth=True)
 
             timestamp = timestamp_dict.get(cam_id + "_frame_received", None)
             if index is not None:
diff --git a/scripts/visualizations/visualize_zed_depth.py b/scripts/visualizations/visualize_zed_depth.py
new file mode 100644
index 0000000..87055cc
--- /dev/null
+++ b/scripts/visualizations/visualize_zed_depth.py
@@ -0,0 +1,101 @@
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import fnmatch
+
+from r2d2.trajectory_utils.misc import load_trajectory
+from r2d2.camera_utils.recording_readers.svo_reader import SVOReader
+
+def make_cv_disparity_image(disparity, max_disparity):
+    vis_disparity = disparity / max_disparity
+    vis_disparity[vis_disparity < 0.0] = 0.0
+    vis_disparity[vis_disparity > 1.0] = 1.0
+    vis_disparity = vis_disparity
+    np_img = (vis_disparity * 255.0).astype(np.uint8)
+    mapped = cv2.applyColorMap(np_img, cv2.COLORMAP_JET)
+    mapped[vis_disparity < 1e-3, :] = 0
+    mapped[vis_disparity > 1.0 - 1e-3, :] = 0
+    return mapped
+
+filepath = "INSERT PATH TO DATA FOLDER HERE (eg. /path/to/folder/Fri_Jul__7_14:57:48_2023)"
+traj_filepath = os.path.join(filepath, "trajectory.h5")
+recording_folderpath = os.path.join(filepath, "recordings/SVO")
+traj = load_trajectory(traj_filepath, recording_folderpath=recording_folderpath, camera_kwargs={})
+
+svo_files = []
+for root, _, files in os.walk(recording_folderpath):
+    for filename in files:
+        if fnmatch.fnmatch(filename, "*.svo"):
+            svo_files.append(os.path.join(root, filename))
+
+cameras = []
+frame_counts = []
+serial_numbers = []
+cam_matrices = []
+cam_baselines = []
+
+for svo_file in svo_files:
+    # Open SVO Reader
+    serial_number = svo_file.split("/")[-1][:-4]
+    camera = SVOReader(svo_file, serial_number=serial_number)
+    camera.set_reading_parameters(image=True, depth=True, pointcloud=False, concatenate_images=False)
+    im_key = '%s_left' % serial_number
+    # Intrinsics are the same for the left and the right camera
+    cam_matrices.append(camera.get_camera_intrinsics()[im_key]['cameraMatrix'])
+    cam_baselines.append(camera.get_camera_baseline())
+    frame_count = camera.get_frame_count()
+    cameras.append(camera)
+    frame_counts.append(frame_count)
+    serial_numbers.append(serial_number)
+
+# return serial_numbers
+cameras = [x for y, x in sorted(zip(serial_numbers, cameras))]
+cam_matrices = [x for y, x in sorted(zip(serial_numbers, cam_matrices))]
+cam_baselines = [x for y, x in sorted(zip(serial_numbers, cam_baselines))]
+serial_numbers = sorted(serial_numbers)
+
+assert frame_counts.count(frame_counts[0]) == len(frame_counts)
+
+timestep = np.random.randint(frame_counts[0])
+frame = traj[timestep]
+obs = frame["observation"]
+image_obs = obs["image"]
+depth_obs = obs["depth"]
+
+zed_depths = []
+left_rgbs = []
+right_rgbs = []
+
+for i, cam_id in enumerate(serial_numbers):
+    left_key, right_key = f"{cam_id}_left", f"{cam_id}_right"
+    left_rgb, right_rgb = image_obs[left_key], image_obs[right_key] 
+    left_rgbs.append(left_rgb)
+    right_rgbs.append(right_rgb)
+    # Note (Ashwin): depth from the left and right stereo pairs are the same
+    # so I'm just arbitrarily picking one
+    zed_depths.append(depth_obs[left_key])
+
+images = []
+for serial_num, zed_depth, left_rgb, right_rgb in zip(serial_numbers, zed_depths, left_rgbs, right_rgbs):
+    zed_depth[np.isnan(zed_depth)] = 0
+    zed_depth[np.isinf(zed_depth)] = 1_000
+    zed_depth = zed_depth / 1_000
+    zed_depth_vis = make_cv_disparity_image(np.array(zed_depth), 9.0)
+    images.append(left_rgb)
+    images.append(right_rgb)
+    images.append(zed_depth_vis)
+
+# Create a 3x3 subplot grid
+fig, axes = plt.subplots(3, 3, figsize=(12, 5))
+titles = ["Left RGB", "Right RGB", "ZED Depth"] * 3
+
+# Iterate through the images and display them on the subplots
+for i, ax in enumerate(axes.ravel()):
+    if i < len(images):
+        ax.imshow(cv2.cvtColor(images[i], cv2.COLOR_BGR2RGB))
+        if i < 3:
+            ax.set_title(titles[i])
+        ax.axis('off')
+
+plt.savefig('depth_image_grid.png', bbox_inches='tight')
\ No newline at end of file

From b29ac46d41ae6c9ca8555bc4a98fb909e7090ebe Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-10-232-15-88.ec2.internal>
Date: Tue, 26 Mar 2024 02:13:52 +0000
Subject: [PATCH 2/2] clean and fix minor bugs

---
 .../recording_readers/svo_reader.py           | 43 ++++++++--------
 .../wrappers/recorded_multi_camera_wrapper.py |  2 +-
 scripts/visualizations/visualize_zed_depth.py | 49 ++++++++++---------
 3 files changed, 49 insertions(+), 45 deletions(-)

diff --git a/droid/camera_utils/recording_readers/svo_reader.py b/droid/camera_utils/recording_readers/svo_reader.py
index 311b756..7acc447 100644
--- a/droid/camera_utils/recording_readers/svo_reader.py
+++ b/droid/camera_utils/recording_readers/svo_reader.py
@@ -1,6 +1,7 @@
 from copy import deepcopy
 
 import cv2
+import numpy as np
 
 try:
     import pyzed.sl as sl
@@ -69,23 +70,23 @@ def get_frame_resolution(self):
         height = camera_info.resolution.height
         return (width, height)
 
-	def get_camera_intrinsics(self):
-		calib_params = self._cam.get_camera_information().camera_configuration.calibration_parameters
-		return {
-			self.serial_number + "_left": self._process_intrinsics(calib_params.left_cam),
-			self.serial_number + "_right": self._process_intrinsics(calib_params.right_cam),
-		}
+    def get_camera_intrinsics(self):
+        calib_params = self._cam.get_camera_information().camera_configuration.calibration_parameters
+        return {
+            self.serial_number + "_left": self._process_intrinsics(calib_params.left_cam),
+            self.serial_number + "_right": self._process_intrinsics(calib_params.right_cam),
+        }
 
-	def get_camera_baseline(self):
-		# Convert baseline to meters and return
-		return 0.001 * self._cam.get_camera_information().camera_configuration.calibration_parameters.get_camera_baseline()
+    def get_camera_baseline(self):
+        # Convert baseline to meters and return
+        return 0.001 * self._cam.get_camera_information().camera_configuration.calibration_parameters.get_camera_baseline()
 
-	### Calibration Utilities ###
-	def _process_intrinsics(self, params):
-		intrinsics = {}
-		intrinsics["cameraMatrix"] = np.array([[params.fx, 0, params.cx], [0, params.fy, params.cy], [0, 0, 1]])
-		intrinsics["distCoeffs"] = np.array(list(params.disto))
-		return intrinsics
+    ### Calibration Utilities ###
+    def _process_intrinsics(self, params):
+        intrinsics = {}
+        intrinsics["cameraMatrix"] = np.array([[params.fx, 0, params.cx], [0, params.fy, params.cy], [0, 0, 1]])
+        intrinsics["distCoeffs"] = np.array(list(params.disto))
+        return intrinsics
 
     def get_frame_count(self):
         if self.skip_reading:
@@ -144,12 +145,12 @@ def read_camera(self, ignore_data=False, correct_timestamp=None, return_timestam
                     self.serial_number + "_left": self._process_frame(self._left_img),
                     self.serial_number + "_right": self._process_frame(self._right_img),
                 }
-        # if self.depth:
-        # 	self._cam.retrieve_measure(self._left_depth, sl.MEASURE.DEPTH, resolution=self.resolution)
-        # 	self._cam.retrieve_measure(self._right_depth, sl.MEASURE.DEPTH_RIGHT, resolution=self.resolution)
-        # 	data_dict['depth'] = {
-        # 		self.serial_number + '_left': self._left_depth.get_data().copy(),
-        # 		self.serial_number + '_right': self._right_depth.get_data().copy()}
+        if self.depth:
+            self._cam.retrieve_measure(self._left_depth, sl.MEASURE.DEPTH, resolution=self.zed_resolution)
+            self._cam.retrieve_measure(self._right_depth, sl.MEASURE.DEPTH_RIGHT, resolution=self.zed_resolution)
+            data_dict['depth'] = {
+                self.serial_number + '_left': self._left_depth.get_data().copy(),
+                self.serial_number + '_right': self._right_depth.get_data().copy()}
         # if self.pointcloud:
         # 	self._cam.retrieve_measure(self._left_pointcloud, sl.MEASURE.XYZRGBA, resolution=self.resolution)
         # 	self._cam.retrieve_measure(self._right_pointcloud, sl.MEASURE.XYZRGBA_RIGHT, resolution=self.resolution)
diff --git a/droid/camera_utils/wrappers/recorded_multi_camera_wrapper.py b/droid/camera_utils/wrappers/recorded_multi_camera_wrapper.py
index 41e5968..87e23b1 100644
--- a/droid/camera_utils/wrappers/recorded_multi_camera_wrapper.py
+++ b/droid/camera_utils/wrappers/recorded_multi_camera_wrapper.py
@@ -42,7 +42,7 @@ def read_cameras(self, index=None, camera_type_dict={}, timestamp_dict={}):
         for cam_id in all_cam_ids:
             cam_type = camera_type_dict[cam_id]
             curr_cam_kwargs = self.camera_kwargs.get(cam_type, {})
-            self.camera_dict[cam_id].set_reading_parameters(**curr_cam_kwargs, depth=True)
+            self.camera_dict[cam_id].set_reading_parameters(**curr_cam_kwargs)
 
             timestamp = timestamp_dict.get(cam_id + "_frame_received", None)
             if index is not None:
diff --git a/scripts/visualizations/visualize_zed_depth.py b/scripts/visualizations/visualize_zed_depth.py
index 87055cc..eb91c87 100644
--- a/scripts/visualizations/visualize_zed_depth.py
+++ b/scripts/visualizations/visualize_zed_depth.py
@@ -4,59 +4,52 @@
 import os
 import fnmatch
 
-from r2d2.trajectory_utils.misc import load_trajectory
-from r2d2.camera_utils.recording_readers.svo_reader import SVOReader
+from droid.trajectory_utils.misc import load_trajectory
+from droid.camera_utils.recording_readers.svo_reader import SVOReader
 
-def make_cv_disparity_image(disparity, max_disparity):
-    vis_disparity = disparity / max_disparity
-    vis_disparity[vis_disparity < 0.0] = 0.0
-    vis_disparity[vis_disparity > 1.0] = 1.0
-    vis_disparity = vis_disparity
-    np_img = (vis_disparity * 255.0).astype(np.uint8)
-    mapped = cv2.applyColorMap(np_img, cv2.COLORMAP_JET)
-    mapped[vis_disparity < 1e-3, :] = 0
-    mapped[vis_disparity > 1.0 - 1e-3, :] = 0
-    return mapped
 
+# Get paths
 filepath = "INSERT PATH TO DATA FOLDER HERE (eg. /path/to/folder/Fri_Jul__7_14:57:48_2023)"
 traj_filepath = os.path.join(filepath, "trajectory.h5")
 recording_folderpath = os.path.join(filepath, "recordings/SVO")
-traj = load_trajectory(traj_filepath, recording_folderpath=recording_folderpath, camera_kwargs={})
-
 svo_files = []
 for root, _, files in os.walk(recording_folderpath):
     for filename in files:
         if fnmatch.fnmatch(filename, "*.svo"):
             svo_files.append(os.path.join(root, filename))
 
-cameras = []
+
+# Get intrinsics information and camera IDs
 frame_counts = []
 serial_numbers = []
 cam_matrices = []
 cam_baselines = []
-
 for svo_file in svo_files:
     # Open SVO Reader
     serial_number = svo_file.split("/")[-1][:-4]
     camera = SVOReader(svo_file, serial_number=serial_number)
-    camera.set_reading_parameters(image=True, depth=True, pointcloud=False, concatenate_images=False)
+    camera.set_reading_parameters(image=True, pointcloud=False, concatenate_images=False)
     im_key = '%s_left' % serial_number
     # Intrinsics are the same for the left and the right camera
     cam_matrices.append(camera.get_camera_intrinsics()[im_key]['cameraMatrix'])
     cam_baselines.append(camera.get_camera_baseline())
     frame_count = camera.get_frame_count()
-    cameras.append(camera)
     frame_counts.append(frame_count)
     serial_numbers.append(serial_number)
 
-# return serial_numbers
-cameras = [x for y, x in sorted(zip(serial_numbers, cameras))]
+
+# Get camera intrinsics and baselines just in case someone wants to use a learned
+# stereo model which consumes these
 cam_matrices = [x for y, x in sorted(zip(serial_numbers, cam_matrices))]
 cam_baselines = [x for y, x in sorted(zip(serial_numbers, cam_baselines))]
 serial_numbers = sorted(serial_numbers)
 
 assert frame_counts.count(frame_counts[0]) == len(frame_counts)
 
+
+# Load trajectory and associated images
+traj = load_trajectory(traj_filepath, recording_folderpath=recording_folderpath, camera_kwargs={cam_type: {"depth": True} for cam_type in ["varied_camera", "hand_camera"]})
+
 timestep = np.random.randint(frame_counts[0])
 frame = traj[timestep]
 obs = frame["observation"]
@@ -66,7 +59,6 @@ def make_cv_disparity_image(disparity, max_disparity):
 zed_depths = []
 left_rgbs = []
 right_rgbs = []
-
 for i, cam_id in enumerate(serial_numbers):
     left_key, right_key = f"{cam_id}_left", f"{cam_id}_right"
     left_rgb, right_rgb = image_obs[left_key], image_obs[right_key] 
@@ -76,6 +68,19 @@ def make_cv_disparity_image(disparity, max_disparity):
     # so I'm just arbitrarily picking one
     zed_depths.append(depth_obs[left_key])
 
+
+# Do plotting
+def make_cv_disparity_image(disparity, max_disparity):
+    vis_disparity = disparity / max_disparity
+    vis_disparity[vis_disparity < 0.0] = 0.0
+    vis_disparity[vis_disparity > 1.0] = 1.0
+    vis_disparity = vis_disparity
+    np_img = (vis_disparity * 255.0).astype(np.uint8)
+    mapped = cv2.applyColorMap(np_img, cv2.COLORMAP_JET)
+    mapped[vis_disparity < 1e-3, :] = 0
+    mapped[vis_disparity > 1.0 - 1e-3, :] = 0
+    return mapped
+
 images = []
 for serial_num, zed_depth, left_rgb, right_rgb in zip(serial_numbers, zed_depths, left_rgbs, right_rgbs):
     zed_depth[np.isnan(zed_depth)] = 0
@@ -85,11 +90,9 @@ def make_cv_disparity_image(disparity, max_disparity):
     images.append(left_rgb)
     images.append(right_rgb)
     images.append(zed_depth_vis)
-
 # Create a 3x3 subplot grid
 fig, axes = plt.subplots(3, 3, figsize=(12, 5))
 titles = ["Left RGB", "Right RGB", "ZED Depth"] * 3
-
 # Iterate through the images and display them on the subplots
 for i, ax in enumerate(axes.ravel()):
     if i < len(images):