diff --git a/neural-networks/README.md b/neural-networks/README.md index ee0fc1094..237cb6c4b 100644 --- a/neural-networks/README.md +++ b/neural-networks/README.md @@ -199,6 +199,7 @@ LEGEND: ✅: available; ❌: not available; 🚧: work in progress | [generic-example](generic-example/) | [midas-v2-1](https://models.luxonis.com/luxonis/midas-v2-1/be09b09e-053d-4330-a0fc-0c9d16aac007) | ✅ | ✅ | ✅ | [gen2-depth-mbnv2](https://github.com/luxonis/oak-examples/tree/master/gen2-depth-mbnv2), [gen2-fast-depth](https://github.com/luxonis/oak-examples/tree/master/gen2-fast-depth), [gen2-mega-depth](https://github.com/luxonis/oak-examples/tree/master/gen2-mega-depth) | | | [generic-example](generic-example/) | [depth-anything-v2](https://models.luxonis.com/luxonis/depth-anything-v2/c5bf9763-d29d-4b10-8642-fbd032236383) | ❌ | ✅ | ✅ | | Model is only available for RVC4. | | [foundation-stereo](depth-estimation/foundation-stereo) | [foundation-stereo](https://models.luxonis.com/luxonis/foundation-stereo/b8956c24-0b8a-4e49-bd83-ed702252d517) | ✅ | ✅ | ❌ | | Model runs on host and requires a lot of compute. | +| [neural-depth](depth-estimation/neural-depth) | [neural-depth](https://docs.luxonis.com/software-v3/depthai/depthai-components/nodes/neural_depth/) | ❌ | ✅ | ✅ | | Model is only available for RVC4. | ## Line Detection diff --git a/neural-networks/depth-estimation/neural-depth/.gitignore b/neural-networks/depth-estimation/neural-depth/.gitignore new file mode 100644 index 000000000..7a6a22ffe --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/.gitignore @@ -0,0 +1,2 @@ +host_eval/data +host_eval/outputs_neural_depth_eval \ No newline at end of file diff --git a/neural-networks/depth-estimation/neural-depth/.oakappignore b/neural-networks/depth-estimation/neural-depth/.oakappignore new file mode 100644 index 000000000..b42f14568 --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/.oakappignore @@ -0,0 +1,35 @@ +host_eval/ + +# Python virtual environments +venv/ +.venv/ + +# Node.js +# ignore node_modules, it will be reinstalled in the container +node_modules/ + +# Multimedia files +media/ + +# Documentation +README.md + +# VCS +.git/ +.github/ +.gitlab/ + +# The following files are ignored by default +# uncomment a line if you explicitly need it + +# !*.oakapp + +# Python +# !**/.mypy_cache/ +# !**/.ruff_cache/ + +# IDE files +# !**/.idea +# !**/.vscode +# !**/.zed + diff --git a/neural-networks/depth-estimation/neural-depth/README.md b/neural-networks/depth-estimation/neural-depth/README.md new file mode 100644 index 000000000..b051c6952 --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/README.md @@ -0,0 +1,67 @@ +# Neural Depth + +This example showcases [Luxonis NeuralDepth](https://docs.luxonis.com/software-v3/depthai/depthai-components/nodes/neural_depth/) model running on RVC4 device. + +If you are interested in evaluating the model on existing dataset (e.g. Middlebury Stereo dataset) please refer to the [`host_eval`](host_eval/README.md) code. + +> **Note:** RVC4 device only. + +## Demo + +![Image example](media/example.gif) + +## Usage + +Running this example requires a **RVC4 Luxonis device** connected to your computer. Refer to the [documentation](https://docs.luxonis.com/software-v3/) to setup your device if you haven't done it already. + +You can run the example fully on device ([`STANDALONE` mode](#standalone-mode-rvc4-only)) or using your computer as host ([`PERIPHERAL` mode](#peripheral-mode)). + +Here is a list of all available parameters: + +``` +-m {NANO,SMALL,MEDIUM,LARGE}, --model {NANO,SMALL,MEDIUM,LARGE} + Model variant to use. One of `['NANO', 'SMALL', 'MEDIUM', 'LARGE']`. Defaults to 'LARGE'. (default: LARGE) +-d DEVICE, --device DEVICE + Optional name, DeviceID or IP of the camera to connect to. (default: None) +-fps FPS_LIMIT, --fps_limit FPS_LIMIT + FPS limit for the model runtime. (default: None) +``` + +## Peripheral Mode + +### Installation + +You need to first prepare a **Python 3.10** environment with the following packages installed: + +- [DepthAI](https://pypi.org/project/depthai/), +- [DepthAI Nodes](https://pypi.org/project/depthai-nodes/). + +You can simply install them by running: + +```bash +pip install -r requirements.txt +``` + +Running in peripheral mode requires a host computer and there will be communication between device and host which could affect the overall speed of the app. Below are some examples of how to run the example. + +### Examples + +```bash +python3 main.py +``` + +This will run the example with default arguments. + +## Standalone Mode (RVC4 only) + +Running the example in the standalone mode, app runs entirely on the device. +To run the example in this mode, first install the `oakctl` tool using the installation instructions [here](https://docs.luxonis.com/software-v3/oak-apps/oakctl). + +The app can then be run with: + +```bash +oakctl connect +oakctl app run . +``` + +This will run the example with default argument values. If you want to change these values you need to edit the `oakapp.toml` file (refer [here](https://docs.luxonis.com/software-v3/oak-apps/configuration/) for more information about this configuration file). diff --git a/neural-networks/depth-estimation/neural-depth/host_eval/README.md b/neural-networks/depth-estimation/neural-depth/host_eval/README.md new file mode 100644 index 000000000..8db9c1eb6 --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/host_eval/README.md @@ -0,0 +1,96 @@ +# NeuralDepth Host Evaluation + +This pipeline evaluates NeuralDepth models on stereo datasets by sending image pairs from the host to an OAK device and computing disparity accuracy metrics. + +> **Note:** RVC4 peripheral mode only. + +**Eval Notes:** +Images are resized with preserved aspect ratio and padded to evaluation size 800x1280. + +## Prerequisites + +First you have to make sure evaluation dataset is downloaded. In this example we use the [Middlebury 2024 stereo dataset](https://vision.middlebury.edu/stereo/data/scenes2014/) and you can download it by running: + +```bash +cd utils +python middlebury_download.py --calibration {perfect,imperfect} --max_scenes --output +``` + +This creates a `data/` folder with `perfect/` and `imperfect/` subfolders containing scene directories. + +The `StereoDataSample` class in `utils/utils.py` is designed for the Middlebury dataset format (left: `im0.png`, right: `im1.png`, ground truth: `disp0.pfm`). Modify it to support other stereo dataset formats. + +## Usage + +Running this example requires a **RVC4 Luxonis device** connected to your computer. Refer to the [documentation](https://docs.luxonis.com/software-v3/) to setup your device if you haven't done it already. + +You can run the example using your computer as host in ([`PERIPHERAL` mode](#peripheral-mode)). + +Here is a list of all available parameters: + +```bash + -m {NANO,SMALL,MEDIUM,LARGE}, --model {NANO,SMALL,MEDIUM,LARGE} + Model variant to use. One of `['NANO', 'SMALL', 'MEDIUM', 'LARGE']`. (default: LARGE) + --dataset DATASET Path to the dataset folder. (default: data/imperfect) + -o OUTPUT, --output OUTPUT + Output folder for evaluation results. (default: outputs_neural_depth_eval) + -d DEVICE_IP, --device_ip DEVICE_IP + IP address of the device to connect to. (default: None) +``` + +## Peripheral Mode + +### Installation + +You need to first prepare a **Python 3.10** environment with the following packages installed: + +- [DepthAI](https://pypi.org/project/depthai/), +- [DepthAI Nodes](https://pypi.org/project/depthai-nodes/). + +You can simply install them by running: + +```bash +pip install -r requirements.txt +``` + +Running in peripheral mode requires a host computer and there will be communication between device and host which could affect the overall speed of the app. Below are some examples of how to run the example. + +### Examples + +```bash +python main.py +``` + +This will run the example with default arguments. + +```bash +python main.py -m NANO +``` + +This will run the example with `NANO` model variant. + +### Output + +For each scene, the pipeline generates: + +- `disparity.png`: Colorized disparity map with metrics overlay +- Console output: Per-scene and average metrics (EPE, Bad1-4, Density) + +### Metrics + +| - | EPE | Bad2 | Bad4 | +| ------------------------------------------------------------------------- | ----- | ----- | ---- | +| Middlebury 2014 (train 10 scenes) / perfect / SNPE v2.32.0 | 1.55 | 12.45 | 5.56 | +| Middlebury 2014 (train 10 scenes) / imperfect / SNPE v2.32.0 | 1.62 | 13.22 | 5.42 | +| Middlebury 2014 (train + additional 23 scenes) / perfect / SNPE v2.32.0 | 1.374 | 10.36 | 4.29 | +| Middlebury 2014 (train + additional 23 scenes) / imperfect / SNPE v2.32.0 | 1.44 | 11.03 | 4.48 | +| Middlebury 2014 (train + additional 23 scenes) / perfect / SNPE v2.33.6 | 1.15 | 8.09 | 2.9 | + +## Comments on choices of post-processing: + +In practice not all usecase require full density (predictions for 100% of pixels) and a small tradeoff can be acceptable to filter out less reliable pixels and/or occlusions.
+For example 3D usecases that rely on clean pointclouds can benefit a lot from reliable and robust filtering that can offer an accuracy boost and suppression of unreliable pixels, unconfident regions and border regions to offer cleaner pointclouds.
+ +The above is the main reason `confidence` and `edge` predictions are provided, and why border-erasure postprocessing is used (border pixel are less likely to have matching points between images and can introduce noise to pointclouds). + +Most importantly all these post-processing choices are __optional__, full disparity map is available with `confidence` and `edge` maps for the user to choose how to use them or not. diff --git a/neural-networks/depth-estimation/neural-depth/host_eval/main.py b/neural-networks/depth-estimation/neural-depth/host_eval/main.py new file mode 100644 index 000000000..b578a4f28 --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/host_eval/main.py @@ -0,0 +1,198 @@ +import cv2 +import depthai as dai +import numpy as np +from pathlib import Path + +from utils import StereoDataSample +from utils.arguments import initialize_argparser + + +MODEL_VARIANT_MAP = { + "NANO": (dai.DeviceModelZoo.NEURAL_DEPTH_NANO, (240, 384)), + "SMALL": (dai.DeviceModelZoo.NEURAL_DEPTH_SMALL, (300, 480)), + "MEDIUM": (dai.DeviceModelZoo.NEURAL_DEPTH_MEDIUM, (360, 576)), + "LARGE": (dai.DeviceModelZoo.NEURAL_DEPTH_LARGE, (480, 768)), +} + + +def parse_scenes(base_folder): + scenes = [] + base_path = Path(base_folder) + scene_dirs = sorted( + [scene for scene in base_path.iterdir() if scene.is_dir()], + key=lambda scene: scene.name, + ) + for scene_dir in scene_dirs: + left_path = scene_dir / "im0.png" + right_path = scene_dir / "im1.png" + gt_path = scene_dir / "disp0.pfm" + if left_path.exists() and right_path.exists(): + scenes.append( + { + "name": scene_dir.name, + "left": str(left_path), + "right": str(right_path), + "gt": str(gt_path) if gt_path.exists() else None, + } + ) + return scenes + + +def create_img_frame(img, sequence_num=0): + img_frame = dai.ImgFrame() + img_frame.setCvFrame(img, dai.ImgFrame.Type.GRAY8) + img_frame.setSequenceNum(sequence_num) + img_frame.setTimestamp(dai.Clock.now()) + return img_frame + + +def visualize_disparity(disp, max_disparity, metrics=None): + disp_colored = cv2.applyColorMap( + (np.clip(disp, 0, max_disparity) / max_disparity * 255).astype(np.uint8), + cv2.COLORMAP_JET, + ) + if metrics: + text_bg = np.zeros((80, disp_colored.shape[1], 3), dtype=np.uint8) + cv2.putText( + text_bg, + f"EPE: {metrics['EPE']:.2f}", + (10, 20), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (255, 255, 255), + 2, + ) + cv2.putText( + text_bg, + f"Bad2: {metrics['bad2']:.1f}% Bad4: {metrics['bad4']:.1f}%", + (10, 45), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (255, 255, 255), + 2, + ) + cv2.putText( + text_bg, + f"Density: {metrics['density']:.2f}", + (10, 70), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (255, 255, 255), + 2, + ) + disp_colored = np.vstack([text_bg, disp_colored]) + return disp_colored + + +if __name__ == "__main__": + _, args = initialize_argparser() + + eval_size = (800, 1280) # fixed at sensor max resolution + model_zoo_id, inference_size = MODEL_VARIANT_MAP[args.model] + max_disparity = 192.0 + border_erase_pixels = 10 # clean pointcloud border regions + conf_threshold = 0.55 + edge_threshold = 6.0 + + output_dir = Path(args.output) + output_dir.mkdir(parents=True, exist_ok=True) + + scenes = parse_scenes(args.dataset) + print(f"Found {len(scenes)} scenes") + + device = dai.Device(dai.DeviceInfo(args.device_ip)) + all_metrics = [] + + with dai.Pipeline(device) as pipeline: + print("Creating pipeline...") + neural_depth = pipeline.create(dai.node.NeuralNetwork) + neural_depth.setModelFromDeviceZoo(model_zoo_id) + + left_queue = neural_depth.inputs["left"].createInputQueue(maxSize=1) + right_queue = neural_depth.inputs["right"].createInputQueue(maxSize=1) + out_queue = neural_depth.out.createOutputQueue(maxSize=1) + print("Pipeline created.") + + pipeline.start() + + for idx, scene in enumerate(scenes): + print(f"\n[{idx + 1}/{len(scenes)}] {scene['name']}") + + sample = StereoDataSample( + left_path=scene["left"], + right_path=scene["right"], + eval_size=eval_size, + inference_size=inference_size, + gt_path=scene["gt"], + to_gray=True, + max_disparity=max_disparity, + padding_mode="center", + border_erase_pixels=border_erase_pixels, + ) + + left_img, right_img = sample.get_inference_inputs() + left_uint8 = left_img.astype(np.uint8).squeeze() + right_uint8 = right_img.astype(np.uint8).squeeze() + + left_frame = create_img_frame(left_uint8, sequence_num=idx) + right_frame = create_img_frame(right_uint8, sequence_num=idx) + + left_queue.send(left_frame) + right_queue.send(right_frame) + + frame = out_queue.get() + + disp = np.array( + frame.getTensor("disparity", dequantize=True), dtype=np.float32 + ) + disp_bchw = disp.reshape(1, 1, disp.shape[1], disp.shape[2]) + + conf = np.array( + frame.getTensor("confidence", dequantize=True), dtype=np.float32 + ) + conf_bchw = conf.reshape(1, 1, conf.shape[1], conf.shape[2]) + + edge = np.array(frame.getTensor("edge", dequantize=True), dtype=np.float32) + edge_bchw = edge.reshape(1, 1, edge.shape[1], edge.shape[2]) + + sample.set_predictions( + disp_bchw, conf_bchw, edge_bchw, conf_threshold, edge_threshold + ) + + disp_vis, _, _ = sample.get_predictions(target="eval", strip_padding=True) + + metrics = None + if scene["gt"]: + metrics = sample.compute_metrics(target="eval", strip_padding=True) + all_metrics.append(metrics) + print( + " EPE={:.3f}, bad2={:.1f}%, bad4={:.1f}%, density={:.2f}".format( + metrics["EPE"], + metrics["bad2"], + metrics["bad4"], + metrics["density"], + ) + ) + + scene_dir = output_dir / scene["name"] + scene_dir.mkdir(exist_ok=True) + + vis = visualize_disparity(disp_vis, max_disparity, metrics) + cv2.imwrite(str(scene_dir / "disparity.png"), vis) + + pipeline.stop() + + if all_metrics: + print(f"\n{'=' * 60}") + print("AVERAGE METRICS") + print(f"{'=' * 60}") + avg = {k: np.mean([m[k] for m in all_metrics]) for k in all_metrics[0].keys()} + print(f"EPE: {avg['EPE']:.3f}") + print(f"Bad1: {avg['bad1']:.2f}%") + print(f"Bad2: {avg['bad2']:.2f}%") + print(f"Bad3: {avg['bad3']:.2f}%") + print(f"Bad4: {avg['bad4']:.2f}%") + print(f"Density: {avg['density']:.2f}") + print(f"{'=' * 60}") + + print(f"\nResults saved to: {output_dir}") diff --git a/neural-networks/depth-estimation/neural-depth/host_eval/requirements.txt b/neural-networks/depth-estimation/neural-depth/host_eval/requirements.txt new file mode 100644 index 000000000..e585b4f7a --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/host_eval/requirements.txt @@ -0,0 +1,5 @@ +beautifulsoup4==4.12.3 +depthai==3.2.1 +numpy +opencv-python~=4.10.0 +requests diff --git a/neural-networks/depth-estimation/neural-depth/host_eval/utils/__init__.py b/neural-networks/depth-estimation/neural-depth/host_eval/utils/__init__.py new file mode 100644 index 000000000..f150a4ce9 --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/host_eval/utils/__init__.py @@ -0,0 +1,3 @@ +from .utils import StereoDataSample + +__all__ = ["StereoDataSample"] diff --git a/neural-networks/depth-estimation/neural-depth/host_eval/utils/arguments.py b/neural-networks/depth-estimation/neural-depth/host_eval/utils/arguments.py new file mode 100644 index 000000000..e9cfc8138 --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/host_eval/utils/arguments.py @@ -0,0 +1,46 @@ +import argparse + + +def initialize_argparser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "-m", + "--model", + help="Model variant to use. One of `['NANO', 'SMALL', 'MEDIUM', 'LARGE']`.", + required=False, + default="LARGE", + choices=["NANO", "SMALL", "MEDIUM", "LARGE"], + type=str, + ) + + parser.add_argument( + "--dataset", + help="Path to the dataset folder.", + required=False, + default="data/imperfect", + type=str, + ) + + parser.add_argument( + "-o", + "--output", + help="Output folder for evaluation results.", + required=False, + default="outputs_neural_depth_eval", + type=str, + ) + + parser.add_argument( + "-d", + "--device_ip", + help="IP address of the device to connect to.", + required=True, + type=str, + ) + + args = parser.parse_args() + + return parser, args diff --git a/neural-networks/depth-estimation/neural-depth/host_eval/utils/middlebury_download.py b/neural-networks/depth-estimation/neural-depth/host_eval/utils/middlebury_download.py new file mode 100644 index 000000000..3bf14681b --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/host_eval/utils/middlebury_download.py @@ -0,0 +1,104 @@ +import argparse +import os +import zipfile +from urllib.parse import urljoin + +import requests +from bs4 import BeautifulSoup + + +MIDDLEBURY_ZIP_URL = "https://vision.middlebury.edu/stereo/data/scenes2014/zip/" + + +def download_and_process_zips( + target_url, download_root_folder, calibration=None, max_scenes=None +): + try: + response = requests.get(target_url) + response.raise_for_status() + except requests.exceptions.RequestException as exc: + print(f"Error fetching URL: {exc}") + return + + soup = BeautifulSoup(response.text, "html.parser") + + links = [ + anchor["href"] + for anchor in soup.find_all("a", href=True) + if anchor["href"].endswith(".zip") + ] + + if not links: + print("No zip files found on the page.") + return + + if calibration: + filtered_links = [] + for link in links: + if "imperfect.zip" in link and "imperfect" in calibration: + filtered_links.append(link) + elif "perfect.zip" in link and "perfect" in calibration: + filtered_links.append(link) + links = filtered_links + + if max_scenes: + links = links[:max_scenes] + + print(f"Found {len(links)} zip files to download. Starting process...\n") + + for link in links: + filename = os.path.basename(link) + full_url = urljoin(target_url, link) + + if "imperfect.zip" in filename: + subfolder = "imperfect" + elif "perfect.zip" in filename: + subfolder = "perfect" + else: + subfolder = "others" + + target_dir = os.path.join(download_root_folder, subfolder) + os.makedirs(target_dir, exist_ok=True) + + local_zip_path = os.path.join(target_dir, filename) + + try: + with requests.get(full_url, stream=True) as response: + response.raise_for_status() + with open(local_zip_path, "wb") as file_handle: + for chunk in response.iter_content(chunk_size=8192): + file_handle.write(chunk) + + print(" Extracting...") + with zipfile.ZipFile(local_zip_path, "r") as zip_ref: + zip_ref.extractall(target_dir) + + print(" Deleting zip file...") + os.remove(local_zip_path) + + except Exception as exc: + print(f" Error processing {filename}: {exc}") + + print("\nDone!") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--calibration", + nargs="+", + choices=["perfect", "imperfect"], + default=["perfect", "imperfect"], + ) + parser.add_argument("--max_scenes", type=int, default=None) + parser.add_argument("--output", type=str, default="../data") + args = parser.parse_args() + + download_and_process_zips( + MIDDLEBURY_ZIP_URL, + args.output, + calibration=args.calibration, + max_scenes=args.max_scenes, + ) diff --git a/neural-networks/depth-estimation/neural-depth/host_eval/utils/utils.py b/neural-networks/depth-estimation/neural-depth/host_eval/utils/utils.py new file mode 100644 index 000000000..3fec31c5f --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/host_eval/utils/utils.py @@ -0,0 +1,604 @@ +import os +import cv2 +import numpy as np + + +class PFMReader: + @staticmethod + def read(file_path): + with open(file_path, "rb") as file: + header_lines = [] + + while len(header_lines) < 3: + line = file.readline().decode("latin-1").strip() + if not line or line.startswith("#"): + continue + header_lines.append(line) + + color_type = header_lines[0] + if color_type not in ["PF", "Pf"]: + raise ValueError(f"Invalid PFM header: Unknown type '{color_type}'") + + channels = 3 if color_type == "PF" else 1 + + dims = header_lines[1].split() + width, height = int(dims[0]), int(dims[1]) + + scale = float(header_lines[2]) + endian = "<" if scale < 0 else ">" + scale_factor = abs(scale) + + data = np.fromfile(file, dtype=f"{endian}f") + + if channels == 3: + shape = (height, width, 3) + else: + shape = (height, width) + + try: + data = np.reshape(data, shape) + except ValueError: + raise ValueError( + "PFM data mismatch: Header says " + f"{width}x{height}x{channels}, but found {data.size} floats." + ) + + data = np.flipud(data) + data[data == np.inf] = 0 + data[data < 0] = 0 + return data.astype(np.float32) + + @staticmethod + def write(file_path, image, scale=1): + image = image.astype(np.float32) + + if len(image.shape) == 3 and image.shape[2] == 3: + color = True + dtype = "PF" + else: + color = False + dtype = "Pf" + + image = np.flipud(image) + + height, width = image.shape[:2] + + scale_str = f"{-scale}" + + with open(file_path, "wb") as file: + header = f"{dtype}\n{width} {height}\n{scale_str}\n" + file.write(header.encode("latin-1")) + image.tofile(file) + + +class StereoDataSample: + PAD_TOP_RIGHT = "top_right" + PAD_CENTER = "center" + + def __init__( + self, + left_path, + right_path, + eval_size, + inference_size, + gt_path=None, + to_gray=False, + max_disparity=192.0, + padding_mode="top_right", + border_erase_pixels=0, + debug=False, + is_legacy_logic=False, + ): + self.left_path = left_path + self.right_path = right_path + self.gt_path = gt_path + self.eval_size = eval_size + self.inference_size = inference_size + self.to_gray = to_gray + self.max_disparity = max_disparity + self.padding_mode = padding_mode + self.border_erase_pixels = border_erase_pixels + self.debug = debug + self.is_legacy_logic = is_legacy_logic + + if padding_mode not in [self.PAD_TOP_RIGHT, self.PAD_CENTER]: + raise ValueError( + f"Invalid padding_mode: {padding_mode}. Must be " + f"'{self.PAD_TOP_RIGHT}' or '{self.PAD_CENTER}'" + ) + + self.original_left = self._load_image(left_path, is_gt=False) + self.original_right = self._load_image(right_path, is_gt=False) + self.original_gt = self._load_image(gt_path, is_gt=True) if gt_path else None + + self.original_size = self.original_left.shape[:2] + + self.input_left = None + self.input_right = None + self.processed_gt = None + + self._eval_disparity = None + self._eval_confidence = None + self._eval_edge = None + + self.meta_step1 = {} + self.meta_step2 = {} + + self._preprocess(debug=debug) + + def _preprocess(self, debug=False): + left_bgr_u8 = self.original_left.astype(np.uint8) + right_bgr_u8 = self.original_right.astype(np.uint8) + + left_rgb_u8 = cv2.cvtColor(left_bgr_u8, cv2.COLOR_BGR2RGB) + right_rgb_u8 = cv2.cvtColor(right_bgr_u8, cv2.COLOR_BGR2RGB) + + left_rgb_u8_f32 = left_rgb_u8.astype(np.float32) + right_rgb_u8_f32 = right_rgb_u8.astype(np.float32) + + l_eval, meta1 = self._resize_pad_safe(left_rgb_u8_f32, self.eval_size) + r_eval, _ = self._resize_pad_safe(right_rgb_u8_f32, self.eval_size) + + l_eval_u8 = l_eval.astype(np.uint8) + r_eval_u8 = r_eval.astype(np.uint8) + + self.meta_step1 = meta1 + + if debug: + print(f"[DEBUG _preprocess] After resize to eval: shape={l_eval_u8.shape}") + + if self.original_gt is not None: + if debug: + print(f"[DEBUG _preprocess] original_gt shape={self.original_gt.shape}") + gt_eval, _ = self._resize_pad_safe( + self.original_gt, self.eval_size, is_disparity=True + ) + self.processed_gt = gt_eval + if debug: + print(f"[DEBUG _preprocess] processed_gt shape={gt_eval.shape}") + + l_inf_u8_f32 = l_eval_u8.astype(np.float32) + r_inf_u8_f32 = r_eval_u8.astype(np.float32) + + l_inf_resized, meta2 = self._resize_pad_safe(l_inf_u8_f32, self.inference_size) + r_inf_resized, _ = self._resize_pad_safe(r_inf_u8_f32, self.inference_size) + + l_inf_u8 = l_inf_resized.astype(np.uint8) + r_inf_u8 = r_inf_resized.astype(np.uint8) + + if debug: + print( + f"[DEBUG _preprocess] After resize to inference: shape={l_inf_u8.shape}" + ) + + if l_inf_u8.shape[2] == 3: + l_gray_u8 = cv2.cvtColor(l_inf_u8, cv2.COLOR_RGB2GRAY) + r_gray_u8 = cv2.cvtColor(r_inf_u8, cv2.COLOR_RGB2GRAY) + else: + l_gray_u8 = l_inf_u8.squeeze() + r_gray_u8 = r_inf_u8.squeeze() + + l_gray_u8 = np.expand_dims(l_gray_u8, axis=2) + r_gray_u8 = np.expand_dims(r_gray_u8, axis=2) + + l_inf = l_gray_u8.astype(np.float32) + r_inf = r_gray_u8.astype(np.float32) + + self.meta_step2 = meta2 + self.input_left = l_inf + self.input_right = r_inf + + @staticmethod + def _border_erase(disp, border_pixels): + if border_pixels <= 0: + return disp + disp = disp.copy() + disp[:border_pixels, :] = 0 + disp[disp.shape[0] - border_pixels :, :] = 0 + disp[:, :border_pixels] = 0 + disp[:, disp.shape[1] - border_pixels :] = 0 + return disp + + def get_inference_inputs(self): + return self.input_left, self.input_right + + def get_eval_images(self, strip_padding=False): + l_img = self._ensure_color_format(self.original_left, self.to_gray) + r_img = self._ensure_color_format(self.original_right, self.to_gray) + + l_eval, _ = self._resize_pad_safe(l_img, self.eval_size) + r_eval, _ = self._resize_pad_safe(r_img, self.eval_size) + + if strip_padding: + pad_top = self.meta_step1["pad_top"] + pad_bottom = self.meta_step1["pad_bottom"] + pad_left = self.meta_step1["pad_left"] + pad_right = self.meta_step1["pad_right"] + + h_end = ( + self.eval_size[0] - pad_bottom if pad_bottom > 0 else self.eval_size[0] + ) + w_end = ( + self.eval_size[1] - pad_right if pad_right > 0 else self.eval_size[1] + ) + + return ( + l_eval[pad_top:h_end, pad_left:w_end], + r_eval[pad_top:h_end, pad_left:w_end], + ) + + return l_eval, r_eval + + def get_ground_truth(self, target="eval", strip_padding=False): + if self.original_gt is None: + return None + + if target == "eval": + if strip_padding: + pad_top = self.meta_step1["pad_top"] + pad_bottom = self.meta_step1["pad_bottom"] + pad_left = self.meta_step1["pad_left"] + pad_right = self.meta_step1["pad_right"] + + h_end = ( + self.eval_size[0] - pad_bottom + if pad_bottom > 0 + else self.eval_size[0] + ) + w_end = ( + self.eval_size[1] - pad_right + if pad_right > 0 + else self.eval_size[1] + ) + + return self.processed_gt[pad_top:h_end, pad_left:w_end] + return self.processed_gt + if target == "original": + return self.original_gt + raise ValueError(f"Unknown target: {target}") + + def set_predictions( + self, disparity, confidence, edge, conf_threshold, edge_threshold, debug=False + ): + pad_top = self.meta_step2["pad_top"] + pad_bottom = self.meta_step2["pad_bottom"] + pad_left = self.meta_step2["pad_left"] + pad_right = self.meta_step2["pad_right"] + + h_end = ( + self.inference_size[0] - pad_bottom + if pad_bottom > 0 + else self.inference_size[0] + ) + w_end = ( + self.inference_size[1] - pad_right + if pad_right > 0 + else self.inference_size[1] + ) + + if debug: + print( + f"[DEBUG set_predictions] inference_size={self.inference_size}, " + f"eval_size={self.eval_size}" + ) + print(f"[DEBUG set_predictions] meta_step2={self.meta_step2}") + print( + "[DEBUG set_predictions] crop region: " + f"[{pad_top}:{h_end}, {pad_left}:{w_end}]" + ) + print( + "[DEBUG set_predictions] disparity input shape=" + f"{disparity.shape}, range=[{disparity.min():.3f}, " + f"{disparity.max():.3f}]" + ) + + disp = np.squeeze(disparity) + cropped = disp[pad_top:h_end, pad_left:w_end] + cropped_w = cropped.shape[1] + + if debug: + print( + f"[DEBUG set_predictions] cropped shape={cropped.shape}, " + f"cropped_w={cropped_w}" + ) + print( + "[DEBUG set_predictions] disp scale factor = " + f"{self.eval_size[1]} / {cropped_w} = " + f"{self.eval_size[1] / cropped_w:.6f}" + ) + + disp_eval = cv2.resize( + cropped, + (self.eval_size[1], self.eval_size[0]), + interpolation=cv2.INTER_LINEAR, + ) + disp_eval = disp_eval * (self.eval_size[1] / cropped_w) + + if debug: + print( + "[DEBUG set_predictions] disp_eval BEFORE filtering: " + f"range=[{disp_eval.min():.3f}, {disp_eval.max():.3f}]" + ) + + conf = np.squeeze(confidence) + cropped = conf[pad_top:h_end, pad_left:w_end] + conf_eval = cv2.resize( + cropped, + (self.eval_size[1], self.eval_size[0]), + interpolation=cv2.INTER_LINEAR, + ) + + edg = np.squeeze(edge) + cropped = edg[pad_top:h_end, pad_left:w_end] + edge_eval = cv2.resize( + cropped, + (self.eval_size[1], self.eval_size[0]), + interpolation=cv2.INTER_LINEAR, + ) + + disp_eval[conf_eval < conf_threshold] = 0.0 + disp_eval = disp_eval * (edge_eval < edge_threshold).astype(np.float32) + + self._eval_disparity = disp_eval + self._eval_confidence = conf_eval + self._eval_edge = edge_eval + + def get_predictions(self, target="eval", strip_padding=False): + if self._eval_disparity is None: + raise ValueError("No predictions set yet.") + + if target == "eval": + if strip_padding: + pad_top = self.meta_step1["pad_top"] + pad_bottom = self.meta_step1["pad_bottom"] + pad_left = self.meta_step1["pad_left"] + pad_right = self.meta_step1["pad_right"] + + h_end = ( + self.eval_size[0] - pad_bottom + if pad_bottom > 0 + else self.eval_size[0] + ) + w_end = ( + self.eval_size[1] - pad_right + if pad_right > 0 + else self.eval_size[1] + ) + + disp = self._eval_disparity[pad_top:h_end, pad_left:w_end] + conf = self._eval_confidence[pad_top:h_end, pad_left:w_end] + edge = self._eval_edge[pad_top:h_end, pad_left:w_end] + + if self.border_erase_pixels > 0: + disp = self._border_erase(disp, self.border_erase_pixels) + + return disp, conf, edge + return self._eval_disparity, self._eval_confidence, self._eval_edge + + if target == "original": + pad_top = self.meta_step1["pad_top"] + pad_bottom = self.meta_step1["pad_bottom"] + pad_left = self.meta_step1["pad_left"] + pad_right = self.meta_step1["pad_right"] + + h_end = ( + self.eval_size[0] - pad_bottom if pad_bottom > 0 else self.eval_size[0] + ) + w_end = ( + self.eval_size[1] - pad_right if pad_right > 0 else self.eval_size[1] + ) + + cropped = self._eval_disparity[pad_top:h_end, pad_left:w_end] + cropped_w = cropped.shape[1] + final_disparity = cv2.resize( + cropped, + (self.original_size[1], self.original_size[0]), + interpolation=cv2.INTER_LINEAR, + ) + final_disparity = final_disparity * (self.original_size[1] / cropped_w) + + cropped = self._eval_confidence[pad_top:h_end, pad_left:w_end] + final_confidence = cv2.resize( + cropped, + (self.original_size[1], self.original_size[0]), + interpolation=cv2.INTER_LINEAR, + ) + + cropped = self._eval_edge[pad_top:h_end, pad_left:w_end] + final_edge = cv2.resize( + cropped, + (self.original_size[1], self.original_size[0]), + interpolation=cv2.INTER_LINEAR, + ) + + return final_disparity, final_confidence, final_edge + + raise ValueError(f"Unknown target: {target}") + + def compute_metrics( + self, target="eval", is_legacy_code=False, strip_padding=False, debug=False + ): + if self.original_gt is None: + return {} + if self._eval_disparity is None: + raise ValueError("No predictions available.") + + if target == "eval": + if strip_padding: + pad_top = self.meta_step1["pad_top"] + pad_bottom = self.meta_step1["pad_bottom"] + pad_left = self.meta_step1["pad_left"] + pad_right = self.meta_step1["pad_right"] + + h_end = ( + self.eval_size[0] - pad_bottom + if pad_bottom > 0 + else self.eval_size[0] + ) + w_end = ( + self.eval_size[1] - pad_right + if pad_right > 0 + else self.eval_size[1] + ) + + gt = self.processed_gt[pad_top:h_end, pad_left:w_end] + pred = self._eval_disparity[pad_top:h_end, pad_left:w_end] + + if self.border_erase_pixels > 0: + pred = self._border_erase(pred, self.border_erase_pixels) + if debug: + print( + "[DEBUG compute_metrics] Applied border_erase(" + f"{self.border_erase_pixels}) after strip_padding" + ) + else: + gt = self.processed_gt + pred = self._eval_disparity + + elif target == "original": + gt = self.original_gt + pad_top = self.meta_step1["pad_top"] + pad_bottom = self.meta_step1["pad_bottom"] + pad_left = self.meta_step1["pad_left"] + pad_right = self.meta_step1["pad_right"] + + h_end = ( + self.eval_size[0] - pad_bottom if pad_bottom > 0 else self.eval_size[0] + ) + w_end = ( + self.eval_size[1] - pad_right if pad_right > 0 else self.eval_size[1] + ) + + cropped = self._eval_disparity[pad_top:h_end, pad_left:w_end] + cropped_w = cropped.shape[1] + pred = cv2.resize( + cropped, + (self.original_size[1], self.original_size[0]), + interpolation=cv2.INTER_LINEAR, + ) + pred = pred * (self.original_size[1] / cropped_w) + + else: + raise ValueError(f"Unknown target: {target}") + + if debug: + print(f"[DEBUG compute_metrics] target={target}") + print( + "[DEBUG compute_metrics] gt shape=" + f"{gt.shape}, range=[{gt.min():.3f}, {gt.max():.3f}]" + ) + print( + "[DEBUG compute_metrics] pred shape=" + f"{pred.shape}, range=[{pred.min():.3f}, {pred.max():.3f}]" + ) + print( + "[DEBUG compute_metrics] gt>0 count=" + f"{(gt > 0).sum()}, pred>0 count={(pred > 0).sum()}" + ) + + valid_mask = (gt > 0) & (pred > 0) & (gt <= self.max_disparity) + + if valid_mask.sum() == 0: + return {"EPE": 0.0, "valid_pixels": 0, "density": 0.0} + + gt_valid_mask = (gt > 0) & (gt <= self.max_disparity) + total_gt_valid = gt_valid_mask.sum() + pred_valid_in_gt_region = ( + (pred[gt_valid_mask] > 0) & (pred[gt_valid_mask] <= self.max_disparity) + ).sum() + density = ( + pred_valid_in_gt_region / total_gt_valid if total_gt_valid > 0 else 0.0 + ) + + error = np.abs(gt[valid_mask] - pred[valid_mask]) + epe = np.mean(error) + d1 = (error > 3.0) | (error > 0.05 * np.abs(gt[valid_mask])) + bad1 = error > 1.0 + bad2 = error > 2.0 + bad3 = error > 3.0 + bad4 = error > 4.0 + + return { + "EPE": float(epe), + "D1_all": float(d1.mean()) * 100, + "bad1": float(bad1.mean()) * 100, + "bad2": float(bad2.mean()) * 100, + "bad3": float(bad3.mean()) * 100, + "bad4": float(bad4.mean()) * 100, + "density": float(density), + } + + def _resize_pad_safe(self, img, target_size, is_disparity=False): + h, w = img.shape[:2] + target_h, target_w = target_size + + scale = min(target_w / w, target_h / h) + new_w = int(w * scale) + new_h = int(h * scale) + + interp = cv2.INTER_LINEAR + resized = cv2.resize(img, (new_w, new_h), interpolation=interp) + + if is_disparity: + resized = resized * (new_w / w) + + resized = self._restore_channels(resized, img.shape) + + if self.padding_mode == self.PAD_TOP_RIGHT: + pad_top = target_h - new_h + pad_bottom = 0 + pad_left = 0 + pad_right = target_w - new_w + else: + pad_left = (target_w - new_w) // 2 + pad_right = target_w - new_w - pad_left + pad_top = (target_h - new_h) // 2 + pad_bottom = target_h - new_h - pad_top + + padded = cv2.copyMakeBorder( + resized, + pad_top, + pad_bottom, + pad_left, + pad_right, + cv2.BORDER_CONSTANT, + value=0, + ) + padded = self._restore_channels(padded, img.shape) + + meta = { + "scale": scale, + "pad_top": pad_top, + "pad_bottom": pad_bottom, + "pad_left": pad_left, + "pad_right": pad_right, + "padding_mode": self.padding_mode, + } + return padded, meta + + def _load_image(self, path, is_gt): + if not os.path.exists(path): + raise FileNotFoundError(f"File not found: {path}") + flags = cv2.IMREAD_UNCHANGED if is_gt else cv2.IMREAD_COLOR + + if is_gt: + img = PFMReader.read(path) + else: + img = cv2.imread(path, flags) + if img is None: + raise ValueError(f"Failed to load {path}") + return img.astype(np.float32) + + def _ensure_color_format(self, img, to_gray): + if len(img.shape) == 2: + img = img[:, :, np.newaxis] + if to_gray and img.shape[2] == 3: + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[:, :, np.newaxis] + elif not to_gray and img.shape[2] == 1: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + return img + + def _restore_channels(self, img, original_shape): + if len(img.shape) == 2 and len(original_shape) == 3: + img = img[:, :, np.newaxis] + return img diff --git a/neural-networks/depth-estimation/neural-depth/main.py b/neural-networks/depth-estimation/neural-depth/main.py new file mode 100644 index 000000000..578204e4a --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/main.py @@ -0,0 +1,70 @@ +import depthai as dai +from depthai_nodes.node import ApplyColormap, ApplyDepthColormap + +from utils.arguments import initialize_argparser +from utils.manual_camera_control import ManualCameraControl + +_, args = initialize_argparser() + +visualizer = dai.RemoteConnection(httpPort=8082) +device = dai.Device(dai.DeviceInfo(args.device)) if args.device else dai.Device() +platform = device.getPlatform().name +print(f"Platform: {platform}") + +if platform != "RVC4": + raise ValueError("This example is supported only on RVC4 platform") + +if args.fps_limit is None: + args.fps_limit = 10 + print( + f"\nFPS limit set to {args.fps_limit} for {platform} platform. If you want to set a custom FPS limit, use the --fps_limit flag.\n" + ) + + +with dai.Pipeline(device) as pipeline: + print("Creating pipeline...") + + # Create pipeline + cameraLeft = pipeline.create(dai.node.Camera).build( + dai.CameraBoardSocket.CAM_B, sensorFps=args.fps_limit + ) + cameraRight = pipeline.create(dai.node.Camera).build( + dai.CameraBoardSocket.CAM_C, sensorFps=args.fps_limit + ) + leftOutput = cameraLeft.requestFullResolutionOutput() + rightOutput = cameraRight.requestFullResolutionOutput() + + neuralDepth = pipeline.create(dai.node.NeuralDepth).build( + leftOutput, rightOutput, args.model + ) + + manual_cam_control = pipeline.create(ManualCameraControl).build( + frame=neuralDepth.disparity, + control_queue=neuralDepth.inputConfig.createInputQueue(), + ) + + # Visualizations + disp_out = pipeline.create(ApplyDepthColormap).build(neuralDepth.disparity) + disp_out.setPercentileRange(low=2, high=98) + + conf_out = pipeline.create(ApplyColormap).build(neuralDepth.confidence) + edge_out = pipeline.create(ApplyColormap).build(neuralDepth.edge) + + visualizer.addTopic("Disparity", disp_out.out) + visualizer.addTopic("Confidence", conf_out.out) + visualizer.addTopic("Edge", edge_out.out) + visualizer.addTopic("Controls", manual_cam_control.out) + + print("Pipeline created.") + + pipeline.start() + visualizer.registerPipeline(pipeline) + + while pipeline.isRunning(): + key = visualizer.waitKey(1) + + if key == ord("q"): + print("Got q key from the remote connection!") + break + else: + manual_cam_control.handle_key_press(key) diff --git a/neural-networks/depth-estimation/neural-depth/media/example.gif b/neural-networks/depth-estimation/neural-depth/media/example.gif new file mode 100644 index 000000000..ee651b874 Binary files /dev/null and b/neural-networks/depth-estimation/neural-depth/media/example.gif differ diff --git a/neural-networks/depth-estimation/neural-depth/oakapp.toml b/neural-networks/depth-estimation/neural-depth/oakapp.toml new file mode 100644 index 000000000..1e857e989 --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/oakapp.toml @@ -0,0 +1,24 @@ +identifier = "com.example.depth-estimation.neural-depth" +app_version="1.0.0" + +prepare_container = [ + { type = "RUN", command = "apt-get update" }, + { type = "RUN", command = "apt-get install -y python3-pip" }, + { type = "COPY", source = "requirements.txt", target = "requirements.txt" }, + { type = "RUN", command = "pip3 install -r /app/requirements.txt --break-system-packages" }, +] + +prepare_build_container = [] + +build_steps = [] + +entrypoint = ["bash", "-c", "python3 -u /app/main.py"] + +[base_image] +api_url = "https://registry-1.docker.io" +service = "registry.docker.io" +oauth_url = "https://auth.docker.io/token" +auth_type = "repository" +auth_name = "luxonis/oakapp-base" +image_name = "luxonis/oakapp-base" +image_tag = "1.2.6" \ No newline at end of file diff --git a/neural-networks/depth-estimation/neural-depth/requirements.txt b/neural-networks/depth-estimation/neural-depth/requirements.txt new file mode 100644 index 000000000..1ef4f866c --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/requirements.txt @@ -0,0 +1,2 @@ +depthai==3.2.1 +depthai-nodes @ git+https://github.com/luxonis/depthai-nodes.git@1b1dd7953feeaff1ca1a8c2234c532704b167d5f \ No newline at end of file diff --git a/neural-networks/depth-estimation/neural-depth/utils/__init__.py b/neural-networks/depth-estimation/neural-depth/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/neural-networks/depth-estimation/neural-depth/utils/arguments.py b/neural-networks/depth-estimation/neural-depth/utils/arguments.py new file mode 100644 index 000000000..a64f8170f --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/utils/arguments.py @@ -0,0 +1,47 @@ +import argparse +import depthai as dai + + +def initialize_argparser(): + """Initialize the argument parser for the script.""" + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "-m", + "--model", + help="Model variant to use. One of `['NANO', 'SMALL', 'MEDIUM', 'LARGE']`. Defaults to 'LARGE'.", + required=False, + default="LARGE", + choices=["NANO", "SMALL", "MEDIUM", "LARGE"], + type=str, + ) + + parser.add_argument( + "-d", + "--device", + help="Optional name, DeviceID or IP of the camera to connect to.", + required=False, + default=None, + type=str, + ) + + parser.add_argument( + "-fps", + "--fps_limit", + help="FPS limit for the model runtime.", + required=False, + default=None, + type=int, + ) + + args = parser.parse_args() + MODEL_VARIANT_MAP = { + "NANO": dai.DeviceModelZoo.NEURAL_DEPTH_NANO, + "SMALL": dai.DeviceModelZoo.NEURAL_DEPTH_SMALL, + "MEDIUM": dai.DeviceModelZoo.NEURAL_DEPTH_MEDIUM, + "LARGE": dai.DeviceModelZoo.NEURAL_DEPTH_LARGE, + } + args.model = MODEL_VARIANT_MAP[args.model] + return parser, args diff --git a/neural-networks/depth-estimation/neural-depth/utils/manual_camera_control.py b/neural-networks/depth-estimation/neural-depth/utils/manual_camera_control.py new file mode 100644 index 000000000..16622f895 --- /dev/null +++ b/neural-networks/depth-estimation/neural-depth/utils/manual_camera_control.py @@ -0,0 +1,71 @@ +import depthai as dai + + +class ManualCameraControl(dai.node.HostNode): + def __init__(self) -> None: + super().__init__() + self.control_queue = None + self.current_config = dai.NeuralDepthConfig() + + def build( + self, frame: dai.Node.Output, control_queue: dai.Node.Input + ) -> "ManualCameraControl": + self.link_args(frame) + self.control_queue = control_queue + return self + + def process(self, frame): + img_annotations = dai.ImgAnnotations() + img_annotation = dai.ImgAnnotation() + txts = [ + f"Curr conf thr: {self.current_config.getConfidenceThreshold()} (Use W / S to change)", + f"Curr edge thr: {self.current_config.getEdgeThreshold()} (Use A / D to change)", + ] + + for i, txt in enumerate(txts): + txt_annot = self._get_text_annotation(txt, (0.05, 0.05 + i * 0.03)) + img_annotation.texts.append(txt_annot) + + img_annotations.annotations.append(img_annotation) + img_annotations.setTimestamp(frame.getTimestamp()) + + self.out.send(img_annotations) + + def handle_key_press(self, key: int) -> None: + if key == ord("w"): + currentThreshold = self.current_config.getConfidenceThreshold() + self.current_config.setConfidenceThreshold((currentThreshold + 5) % 255) + print( + "Setting confidence threshold to:", + self.current_config.getConfidenceThreshold(), + ) + self.control_queue.send(self.current_config) + if key == ord("s"): + currentThreshold = self.current_config.getConfidenceThreshold() + self.current_config.setConfidenceThreshold((currentThreshold - 5) % 255) + print( + "Setting confidence threshold to:", + self.current_config.getConfidenceThreshold(), + ) + self.control_queue.send(self.current_config) + if key == ord("d"): + currentThreshold = self.current_config.getEdgeThreshold() + self.current_config.setEdgeThreshold((currentThreshold + 1) % 255) + print("Setting edge threshold to:", self.current_config.getEdgeThreshold()) + self.control_queue.send(self.current_config) + if key == ord("a"): + currentThreshold = self.current_config.getEdgeThreshold() + self.current_config.setEdgeThreshold((currentThreshold - 1) % 255) + print("Setting edge threshold to:", self.current_config.getEdgeThreshold()) + self.control_queue.send(self.current_config) + + def _get_text_annotation( + self, txt: str, pos: tuple[float, float] + ) -> dai.TextAnnotation: + txt_annot = dai.TextAnnotation() + txt_annot.fontSize = 15 + txt_annot.text = txt + txt_annot.position = dai.Point2f(pos[0], pos[1]) + txt_annot.backgroundColor = dai.Color(0.0, 0.0, 0.0, 0.2) + txt_annot.textColor = dai.Color(1.0, 1.0, 1.0) + return txt_annot diff --git a/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/__init__.py b/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/constants.py b/tests/constants.py index 7551dddb3..f0f169583 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -93,8 +93,7 @@ }, "integrations/roboflow-workflow": { "reason": "Can't run without arguments Roboflow arguments", - "mode": "all", - "platform": "all", + "rules": {"and": [{"platform": "all"}]}, }, "neural-networks/speech-recognition/whisper-tiny-en": { "reason": "Complex example, works only on RVC4", @@ -116,6 +115,14 @@ "reason": "Requires a lot of host compute to run. No matching distribution found for onnxruntime-gpu>=1.19.0 for MacOS", "rules": {"or": [{"mode": ["standalone"]}, {"os": ["mac"]}]}, }, + "neural-networks/depth-estimation/neural-depth": { + "reason": "Model only available for RVC4", + "rules": {"and": [{"platform": ["rvc2"]}]}, + }, + "neural-networks/depth-estimation/neural-depth/host_eval": { + "reason": "Validation script, not an actual example", + "rules": {"and": [{"platform": "all"}]}, + }, "integrations/hub-snaps-events": { "reason": "Missing token, please set DEPTHAI_HUB_API_KEY environment variable or use setToken method", "rules": {"and": [{"platform": "all"}]},