Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
## Note by redistributor

The source code was modified for [World Models lecture series](https://deeplearning.jp/lectures/world-model-2023/#) by Matsuo Lab, the University of Tokyo.

Specifically:

- OpenEXR and Imath related lines were commented out in `kubric/renderer/blender_utils.py` to avoid build errors in Colab.

# Kubric

[![Blender](https://github.com/google-research/kubric/actions/workflows/blender.yml/badge.svg?branch=main)](https://github.com/google-research/kubric/actions/workflows/blender.yml)
Expand Down
159 changes: 81 additions & 78 deletions kubric/renderer/blender_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## This file is modified from the original Kubric source code.
## OpenEXR and Imath related lines were commented out.

# Copyright 2023 The Kubric Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -19,8 +22,8 @@
from typing import Dict, Sequence, Tuple, Union

import numpy as np
import OpenEXR
import Imath
# import OpenEXR
# import Imath
import sklearn.utils
import trimesh

Expand Down Expand Up @@ -197,82 +200,82 @@ def activate_render_passes(
aux_view_layer.cycles.pass_crypto_depth = 2


def read_channels_from_exr(exr: OpenEXR.InputFile, channel_names: Sequence[str]) -> np.ndarray:
"""Reads a single channel from an EXR file and returns it as a numpy array."""
channels_header = exr.header()["channels"]
window = exr.header()["dataWindow"]
width = window.max.x - window.min.x + 1
height = window.max.y - window.min.y + 1
outputs = []
for channel_name in channel_names:
channel_type = channels_header[channel_name].type.v
numpy_type = {
Imath.PixelType.HALF: np.float16,
Imath.PixelType.FLOAT: np.float32,
Imath.PixelType.UINT: np.uint32,
}[channel_type]
array = np.frombuffer(exr.channel(channel_name), numpy_type)
array = array.reshape([height, width])
outputs.append(array)
return np.stack(outputs, axis=-1)


def get_render_layers_from_exr(filename) -> Dict[str, np.ndarray]:
exr = OpenEXR.InputFile(str(filename))
layer_names = set()
for n, _ in exr.header()["channels"].items():
layer_name, _, _ = n.partition(".")
layer_names.add(layer_name)

output = {}
if "Image" in layer_names:
# Image is in RGBA format with range [0, inf]
output["linear_rgba"] = read_channels_from_exr(exr, ["Image.R", "Image.G",
"Image.B", "Image.A"])
if "Depth" in layer_names:
# range [0, 10000000000.0] # the value 1e10 is used for background / infinity
output["depth"] = read_channels_from_exr(exr, ["Depth.V"])
if "Vector" in layer_names:
flow = read_channels_from_exr(exr, ["Vector.R", "Vector.G", "Vector.B", "Vector.A"])
# Blender exports forward and backward flow in a single image,
# and uses (-delta_col, delta_row) format, but we prefer (delta_row, delta_col)
output["backward_flow"] = np.zeros_like(flow[..., :2])
output["backward_flow"][..., 0] = flow[..., 1]
output["backward_flow"][..., 1] = -flow[..., 0]

output["forward_flow"] = np.zeros_like(flow[..., 2:])
output["forward_flow"][..., 0] = flow[..., 3]
output["forward_flow"][..., 1] = -flow[..., 2]

if "Normal" in layer_names:
# range: [-1, 1]
output["normal"] = read_channels_from_exr(exr, ["Normal.X", "Normal.Y", "Normal.Z"])

if "UV" in layer_names:
# range [0, 1]
output["uv"] = read_channels_from_exr(exr, ["UV.X", "UV.Y", "UV.Z"])

if "CryptoObject00" in layer_names:
# CryptoMatte stores the segmentation of Objects using two kinds of channels:
# - index channels (uint32) specify the object index for a pixel
# - alpha channels (float32) specify the corresponding mask value
# there may be many cryptomatte layers, which allows encoding a pixel as belonging to multiple
# objects at once (up to a maximum of # of layers many objects per pixel)
# In the EXR this is stored with 2 layers per RGBA image (CryptoObject00, CryptoObject01, ...)
# with RG being the first layer and BA being the second
# So the R and B channels are uint32 and the G and A channels are float32.
crypto_layers = [n for n in layer_names if n.startswith("CryptoObject")]
index_channels = [n + "." + c for n in crypto_layers for c in "RB"]
idxs = read_channels_from_exr(exr, index_channels)
idxs.dtype = np.uint32
output["segmentation_indices"] = idxs
alpha_channels = [n + "." + c for n in crypto_layers for c in "GA"]
alphas = read_channels_from_exr(exr, alpha_channels)
output["segmentation_alphas"] = alphas
if "ObjectCoordinates" in layer_names:
output["object_coordinates"] = read_channels_from_exr(exr,
["ObjectCoordinates.R", "ObjectCoordinates.G", "ObjectCoordinates.B"])
return output
# def read_channels_from_exr(exr: OpenEXR.InputFile, channel_names: Sequence[str]) -> np.ndarray:
# """Reads a single channel from an EXR file and returns it as a numpy array."""
# channels_header = exr.header()["channels"]
# window = exr.header()["dataWindow"]
# width = window.max.x - window.min.x + 1
# height = window.max.y - window.min.y + 1
# outputs = []
# for channel_name in channel_names:
# channel_type = channels_header[channel_name].type.v
# numpy_type = {
# Imath.PixelType.HALF: np.float16,
# Imath.PixelType.FLOAT: np.float32,
# Imath.PixelType.UINT: np.uint32,
# }[channel_type]
# array = np.frombuffer(exr.channel(channel_name), numpy_type)
# array = array.reshape([height, width])
# outputs.append(array)
# return np.stack(outputs, axis=-1)


# def get_render_layers_from_exr(filename) -> Dict[str, np.ndarray]:
# exr = OpenEXR.InputFile(str(filename))
# layer_names = set()
# for n, _ in exr.header()["channels"].items():
# layer_name, _, _ = n.partition(".")
# layer_names.add(layer_name)

# output = {}
# if "Image" in layer_names:
# # Image is in RGBA format with range [0, inf]
# output["linear_rgba"] = read_channels_from_exr(exr, ["Image.R", "Image.G",
# "Image.B", "Image.A"])
# if "Depth" in layer_names:
# # range [0, 10000000000.0] # the value 1e10 is used for background / infinity
# output["depth"] = read_channels_from_exr(exr, ["Depth.V"])
# if "Vector" in layer_names:
# flow = read_channels_from_exr(exr, ["Vector.R", "Vector.G", "Vector.B", "Vector.A"])
# # Blender exports forward and backward flow in a single image,
# # and uses (-delta_col, delta_row) format, but we prefer (delta_row, delta_col)
# output["backward_flow"] = np.zeros_like(flow[..., :2])
# output["backward_flow"][..., 0] = flow[..., 1]
# output["backward_flow"][..., 1] = -flow[..., 0]

# output["forward_flow"] = np.zeros_like(flow[..., 2:])
# output["forward_flow"][..., 0] = flow[..., 3]
# output["forward_flow"][..., 1] = -flow[..., 2]

# if "Normal" in layer_names:
# # range: [-1, 1]
# output["normal"] = read_channels_from_exr(exr, ["Normal.X", "Normal.Y", "Normal.Z"])

# if "UV" in layer_names:
# # range [0, 1]
# output["uv"] = read_channels_from_exr(exr, ["UV.X", "UV.Y", "UV.Z"])

# if "CryptoObject00" in layer_names:
# # CryptoMatte stores the segmentation of Objects using two kinds of channels:
# # - index channels (uint32) specify the object index for a pixel
# # - alpha channels (float32) specify the corresponding mask value
# # there may be many cryptomatte layers, which allows encoding a pixel as belonging to multiple
# # objects at once (up to a maximum of # of layers many objects per pixel)
# # In the EXR this is stored with 2 layers per RGBA image (CryptoObject00, CryptoObject01, ...)
# # with RG being the first layer and BA being the second
# # So the R and B channels are uint32 and the G and A channels are float32.
# crypto_layers = [n for n in layer_names if n.startswith("CryptoObject")]
# index_channels = [n + "." + c for n in crypto_layers for c in "RB"]
# idxs = read_channels_from_exr(exr, index_channels)
# idxs.dtype = np.uint32
# output["segmentation_indices"] = idxs
# alpha_channels = [n + "." + c for n in crypto_layers for c in "GA"]
# alphas = read_channels_from_exr(exr, alpha_channels)
# output["segmentation_alphas"] = alphas
# if "ObjectCoordinates" in layer_names:
# output["object_coordinates"] = read_channels_from_exr(exr,
# ["ObjectCoordinates.R", "ObjectCoordinates.G", "ObjectCoordinates.B"])
# return output


def replace_cryptomatte_hashes_by_asset_index(
Expand Down