From 44b4473c25585afa4e77cc9a3e3720b920bdd6d0 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 24 Nov 2025 20:55:38 +0000
Subject: [PATCH 01/20] Begin examining how to best add structured array
 support to Zarr v3 driver

---
 tensorstore/driver/zarr3/BUILD            |  21 +-
 tensorstore/driver/zarr3/driver.cc        |  41 +-
 tensorstore/driver/zarr3/dtype.cc         | 298 +++++++++++++
 tensorstore/driver/zarr3/dtype.h          | 144 ++++++
 tensorstore/driver/zarr3/dtype_test.cc    | 293 ++++++++++++
 tensorstore/driver/zarr3/metadata.cc      | 514 ++++++++++++++++------
 tensorstore/driver/zarr3/metadata.h       |  51 ++-
 tensorstore/driver/zarr3/metadata_test.cc |  45 +-
 8 files changed, 1251 insertions(+), 156 deletions(-)
 create mode 100644 tensorstore/driver/zarr3/dtype.cc
 create mode 100644 tensorstore/driver/zarr3/dtype.h
 create mode 100644 tensorstore/driver/zarr3/dtype_test.cc
diff --git a/tensorstore/driver/zarr3/BUILD b/tensorstore/driver/zarr3/BUILD
index 6e0613d5b..d67f58935 100644
--- a/tensorstore/driver/zarr3/BUILD
+++ b/tensorstore/driver/zarr3/BUILD
@@ -94,8 +94,8 @@ tensorstore_cc_library(
 
 tensorstore_cc_library(
     name = "metadata",
-    srcs = ["metadata.cc"],
-    hdrs = ["metadata.h"],
+    srcs = ["metadata.cc", "dtype.cc"],
+    hdrs = ["metadata.h", "dtype.h"],
     deps = [
         ":default_nan",
         ":name_configuration_json_binder",
@@ -145,6 +145,23 @@ tensorstore_cc_library(
     ],
 )
 
+tensorstore_cc_test(
+    name = "dtype_test",
+    size = "small",
+    srcs = ["dtype_test.cc"],
+    deps = [
+        ":metadata",
+        "//tensorstore:data_type",
+        "//tensorstore:index",
+        "//tensorstore/internal/testing:json_gtest",
+        "//tensorstore/util:status_testutil",
+        "//tensorstore/util:str_cat",
+        "@abseil-cpp//absl/status",
+        "@googletest//:gtest_main",
+        "@nlohmann_json//:json",
+    ],
+)
+
 tensorstore_cc_test(
     name = "driver_test",
     size = "small",
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index a516c1a7b..15faced0a 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -121,8 +121,19 @@ class ZarrDriverSpec
           "metadata",
           jb::Validate(
               [](const auto& options, auto* obj) {
-                TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set(
-                    obj->metadata_constraints.data_type.value_or(DataType())));
+                if (obj->metadata_constraints.data_type) {
+                  if (auto dtype = GetScalarDataType(
+                          *obj->metadata_constraints.data_type)) {
+                    TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set(*dtype));
+                  } else if (obj->schema.dtype().valid()) {
+                    return absl::InvalidArgumentError(
+                        "schema dtype must be unspecified for structured "
+                        "zarr3 data types");
+                  } else {
+                    // Leave dtype unspecified; structured dtypes are handled
+                    // at metadata level only.
+                  }
+                }
                 TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set(
                     RankConstraint{obj->metadata_constraints.rank}));
                 return absl::OkStatus();
@@ -146,8 +157,8 @@ class ZarrDriverSpec
     SharedArray<const void> fill_value{schema.fill_value()};
 
     const auto& metadata = metadata_constraints;
-    if (metadata.fill_value) {
-      fill_value = *metadata.fill_value;
+    if (metadata.fill_value && !metadata.fill_value->empty()) {
+      fill_value = (*metadata.fill_value)[0];
     }
 
     return fill_value;
@@ -274,8 +285,10 @@ class DataCacheBase
 
   static internal::ChunkGridSpecification GetChunkGridSpecification(
       const ZarrMetadata& metadata) {
-    auto fill_value =
-        BroadcastArray(metadata.fill_value, BoxView<>(metadata.rank)).value();
+    assert(!metadata.fill_value.empty());
+    auto fill_value = BroadcastArray(metadata.fill_value[0],
+                                     BoxView<>(metadata.rank))
+                          .value();
     internal::ChunkGridSpecification::ComponentList components;
     auto& component = components.emplace_back(
         internal::AsyncWriteArray::Spec{
@@ -402,9 +415,16 @@ class DataCacheBase
       const void* metadata_ptr, size_t component_index) override {
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
     ChunkLayout chunk_layout;
+    SpecRankAndFieldInfo info;
+    info.chunked_rank = metadata.rank;
+    if (!metadata.data_type.fields.empty()) {
+      info.field = &metadata.data_type.fields[0];
+    }
+    std::optional<span<const Index>> chunk_shape_span;
+    chunk_shape_span.emplace(metadata.chunk_shape.data(),
+                             metadata.chunk_shape.size());
     TENSORSTORE_RETURN_IF_ERROR(SetChunkLayoutFromMetadata(
-        metadata.data_type, metadata.rank, metadata.chunk_shape,
-        &metadata.codec_specs, chunk_layout));
+        info, chunk_shape_span, &metadata.codec_specs, chunk_layout));
     TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Finalize());
     return chunk_layout;
   }
@@ -470,7 +490,10 @@ class ZarrDriver : public ZarrDriverBase {
   Result<SharedArray<const void>> GetFillValue(
       IndexTransformView<> transform) override {
     const auto& metadata = this->metadata();
-    return metadata.fill_value;
+    if (metadata.fill_value.empty()) {
+      return SharedArray<const void>();
+    }
+    return metadata.fill_value[0];
   }
 
   Future<ArrayStorageStatistics> GetStorageStatistics(
diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
new file mode 100644
index 000000000..8d1c9d49e
--- /dev/null
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -0,0 +1,298 @@
+// Copyright 2020 The TensorStore Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tensorstore/driver/zarr3/dtype.h"
+
+#include <stddef.h>
+
+#include <string>
+
+#include "absl/base/optimization.h"
+#include "tensorstore/data_type.h"
+#include "tensorstore/internal/json_binding/json_binding.h"
+#include "tensorstore/util/endian.h"
+#include "tensorstore/util/extents.h"
+#include "tensorstore/util/quote_string.h"
+#include "tensorstore/util/str_cat.h"
+
+namespace tensorstore {
+namespace internal_zarr3 {
+
+Result<ZarrDType::BaseDType> ParseBaseDType(std::string_view dtype) {
+  using D = ZarrDType::BaseDType;
+  const auto make_dtype = [&](DataType result_dtype) -> Result<D> {
+    return D{std::string(dtype), result_dtype, {}};
+  };
+
+  if (dtype == "bool") return make_dtype(dtype_v<bool>);
+  if (dtype == "uint8") return make_dtype(dtype_v<uint8_t>);
+  if (dtype == "uint16") return make_dtype(dtype_v<uint16_t>);
+  if (dtype == "uint32") return make_dtype(dtype_v<uint32_t>);
+  if (dtype == "uint64") return make_dtype(dtype_v<uint64_t>);
+  if (dtype == "int8") return make_dtype(dtype_v<int8_t>);
+  if (dtype == "int16") return make_dtype(dtype_v<int16_t>);
+  if (dtype == "int32") return make_dtype(dtype_v<int32_t>);
+  if (dtype == "int64") return make_dtype(dtype_v<int64_t>);
+  if (dtype == "bfloat16")
+    return make_dtype(dtype_v<::tensorstore::dtypes::bfloat16_t>);
+  if (dtype == "float16")
+    return make_dtype(dtype_v<::tensorstore::dtypes::float16_t>);
+  if (dtype == "float32")
+    return make_dtype(dtype_v<::tensorstore::dtypes::float32_t>);
+  if (dtype == "float64")
+    return make_dtype(dtype_v<::tensorstore::dtypes::float64_t>);
+  if (dtype == "complex64")
+    return make_dtype(dtype_v<::tensorstore::dtypes::complex64_t>);
+  if (dtype == "complex128")
+    return make_dtype(dtype_v<::tensorstore::dtypes::complex128_t>);
+
+  constexpr std::string_view kSupported =
+      "bool, uint8, uint16, uint32, uint64, int8, int16, int32, int64, "
+      "bfloat16, float16, float32, float64, complex64, complex128";
+  return absl::InvalidArgumentError(
+      tensorstore::StrCat(dtype, " data type is not one of the supported "
+                                 "data types: ",
+                          kSupported));
+}
+
+namespace {
+
+/// Parses a zarr metadata "dtype" JSON specification, but does not compute any
+/// derived values, and does not check for duplicate field names.
+///
+/// This is called by `ParseDType`.
+///
+/// \param value The zarr metadata "dtype" JSON specification.
+/// \param out[out] Must be non-null.  Filled with the parsed dtype on success.
+/// \error `absl::StatusCode::kInvalidArgument' if `value` is invalid.
+Result<ZarrDType> ParseDTypeNoDerived(const nlohmann::json& value) {
+  ZarrDType out;
+  if (value.is_string()) {
+    // Single field.
+    out.has_fields = false;
+    out.fields.resize(1);
+    TENSORSTORE_ASSIGN_OR_RETURN(
+        static_cast<ZarrDType::BaseDType&>(out.fields[0]),
+        ParseBaseDType(value.get<std::string>()));
+    return out;
+  }
+  out.has_fields = true;
+  auto parse_result = internal_json::JsonParseArray(
+      value,
+      [&](ptrdiff_t size) {
+        out.fields.resize(size);
+        return absl::OkStatus();
+      },
+      [&](const ::nlohmann::json& x, ptrdiff_t field_i) {
+        auto& field = out.fields[field_i];
+        return internal_json::JsonParseArray(
+            x,
+            [&](ptrdiff_t size) {
+              if (size < 2 || size > 3) {
+                return absl::InvalidArgumentError(tensorstore::StrCat(
+                    "Expected array of size 2 or 3, but received: ", x.dump()));
+              }
+              return absl::OkStatus();
+            },
+            [&](const ::nlohmann::json& v, ptrdiff_t i) {
+              switch (i) {
+                case 0:
+                  if (internal_json::JsonRequireValueAs(v, &field.name).ok()) {
+                    if (!field.name.empty()) return absl::OkStatus();
+                  }
+                  return absl::InvalidArgumentError(tensorstore::StrCat(
+                      "Expected non-empty string, but received: ", v.dump()));
+                case 1: {
+                  std::string dtype_string;
+                  TENSORSTORE_RETURN_IF_ERROR(
+                      internal_json::JsonRequireValueAs(v, &dtype_string));
+                  TENSORSTORE_ASSIGN_OR_RETURN(
+                      static_cast<ZarrDType::BaseDType&>(field),
+                      ParseBaseDType(dtype_string));
+                  return absl::OkStatus();
+                }
+                case 2: {
+                  return internal_json::JsonParseArray(
+                      v,
+                      [&](ptrdiff_t size) {
+                        field.outer_shape.resize(size);
+                        return absl::OkStatus();
+                      },
+                      [&](const ::nlohmann::json& x, ptrdiff_t j) {
+                        return internal_json::JsonRequireInteger(
+                            x, &field.outer_shape[j], /*strict=*/true, 1,
+                            kInfIndex);
+                      });
+                }
+                default:
+                  ABSL_UNREACHABLE();  // COV_NF_LINE
+              }
+            });
+      });
+  if (!parse_result.ok()) return parse_result;
+  return out;
+}
+
+}  // namespace
+
+absl::Status ValidateDType(ZarrDType& dtype) {
+  dtype.bytes_per_outer_element = 0;
+  for (size_t field_i = 0; field_i < dtype.fields.size(); ++field_i) {
+    auto& field = dtype.fields[field_i];
+    if (std::any_of(
+            dtype.fields.begin(), dtype.fields.begin() + field_i,
+            [&](const ZarrDType::Field& f) { return f.name == field.name; })) {
+      return absl::InvalidArgumentError(tensorstore::StrCat(
+          "Field name ", QuoteString(field.name), " occurs more than once"));
+    }
+    field.field_shape.resize(field.flexible_shape.size() +
+                             field.outer_shape.size());
+    std::copy(field.flexible_shape.begin(), field.flexible_shape.end(),
+              std::copy(field.outer_shape.begin(), field.outer_shape.end(),
+                        field.field_shape.begin()));
+
+    field.num_inner_elements = ProductOfExtents(span(field.field_shape));
+    if (field.num_inner_elements == std::numeric_limits<Index>::max()) {
+      return absl::InvalidArgumentError(tensorstore::StrCat(
+          "Product of dimensions ", span(field.field_shape), " is too large"));
+    }
+    if (internal::MulOverflow(field.num_inner_elements,
+                              static_cast<Index>(field.dtype->size),
+                              &field.num_bytes)) {
+      return absl::InvalidArgumentError("Field size in bytes is too large");
+    }
+    field.byte_offset = dtype.bytes_per_outer_element;
+    if (internal::AddOverflow(dtype.bytes_per_outer_element, field.num_bytes,
+                              &dtype.bytes_per_outer_element)) {
+      return absl::InvalidArgumentError(
+          "Total number of bytes per outer array element is too large");
+    }
+  }
+  return absl::OkStatus();
+}
+
+std::optional<DataType> GetScalarDataType(const ZarrDType& dtype) {
+  if (!dtype.has_fields && !dtype.fields.empty()) {
+    return dtype.fields[0].dtype;
+  }
+  return std::nullopt;
+}
+
+Result<ZarrDType> ParseDType(const nlohmann::json& value) {
+  TENSORSTORE_ASSIGN_OR_RETURN(ZarrDType dtype, ParseDTypeNoDerived(value));
+  TENSORSTORE_RETURN_IF_ERROR(ValidateDType(dtype));
+  return dtype;
+}
+
+bool operator==(const ZarrDType::BaseDType& a,
+                const ZarrDType::BaseDType& b) {
+  return a.encoded_dtype == b.encoded_dtype && a.dtype == b.dtype &&
+         a.flexible_shape == b.flexible_shape;
+}
+
+bool operator!=(const ZarrDType::BaseDType& a,
+                const ZarrDType::BaseDType& b) {
+  return !(a == b);
+}
+
+bool operator==(const ZarrDType::Field& a, const ZarrDType::Field& b) {
+  return static_cast<const ZarrDType::BaseDType&>(a) ==
+             static_cast<const ZarrDType::BaseDType&>(b) &&
+         a.outer_shape == b.outer_shape && a.name == b.name &&
+         a.field_shape == b.field_shape &&
+         a.num_inner_elements == b.num_inner_elements &&
+         a.byte_offset == b.byte_offset && a.num_bytes == b.num_bytes;
+}
+
+bool operator!=(const ZarrDType::Field& a, const ZarrDType::Field& b) {
+  return !(a == b);
+}
+
+bool operator==(const ZarrDType& a, const ZarrDType& b) {
+  return a.has_fields == b.has_fields &&
+         a.bytes_per_outer_element == b.bytes_per_outer_element &&
+         a.fields == b.fields;
+}
+
+bool operator!=(const ZarrDType& a, const ZarrDType& b) { return !(a == b); }
+
+void to_json(::nlohmann::json& out, const ZarrDType::Field& field) {
+  using array_t = ::nlohmann::json::array_t;
+  if (field.outer_shape.empty()) {
+    out = array_t{field.name, field.encoded_dtype};
+  } else {
+    out = array_t{field.name, field.encoded_dtype, field.outer_shape};
+  }
+}
+
+void to_json(::nlohmann::json& out,  // NOLINT
+             const ZarrDType& dtype) {
+  if (!dtype.has_fields) {
+    out = dtype.fields[0].encoded_dtype;
+  } else {
+    out = dtype.fields;
+  }
+}
+
+TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(ZarrDType, [](auto is_loading,
+                                                     const auto& options,
+                                                     auto* obj, auto* j) {
+  if constexpr (is_loading) {
+    TENSORSTORE_ASSIGN_OR_RETURN(*obj, ParseDType(*j));
+  } else {
+    to_json(*j, *obj);
+  }
+  return absl::OkStatus();
+})
+
+namespace {
+
+Result<ZarrDType::BaseDType> MakeBaseDType(std::string_view name,
+                                           DataType dtype) {
+  ZarrDType::BaseDType base_dtype;
+  base_dtype.dtype = dtype;
+  base_dtype.encoded_dtype = std::string(name);
+  return base_dtype;
+}
+
+}  // namespace
+
+Result<ZarrDType::BaseDType> ChooseBaseDType(DataType dtype) {
+  if (dtype == dtype_v<bool>) return MakeBaseDType("bool", dtype);
+  if (dtype == dtype_v<uint8_t>) return MakeBaseDType("uint8", dtype);
+  if (dtype == dtype_v<uint16_t>) return MakeBaseDType("uint16", dtype);
+  if (dtype == dtype_v<uint32_t>) return MakeBaseDType("uint32", dtype);
+  if (dtype == dtype_v<uint64_t>) return MakeBaseDType("uint64", dtype);
+  if (dtype == dtype_v<int8_t>) return MakeBaseDType("int8", dtype);
+  if (dtype == dtype_v<int16_t>) return MakeBaseDType("int16", dtype);
+  if (dtype == dtype_v<int32_t>) return MakeBaseDType("int32", dtype);
+  if (dtype == dtype_v<int64_t>) return MakeBaseDType("int64", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::bfloat16_t>)
+    return MakeBaseDType("bfloat16", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::float16_t>)
+    return MakeBaseDType("float16", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::float32_t>)
+    return MakeBaseDType("float32", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::float64_t>)
+    return MakeBaseDType("float64", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::complex64_t>)
+    return MakeBaseDType("complex64", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::complex128_t>)
+    return MakeBaseDType("complex128", dtype);
+  return absl::InvalidArgumentError(
+      tensorstore::StrCat("Data type not supported: ", dtype));
+}
+
+}  // namespace internal_zarr3
+}  // namespace tensorstore
diff --git a/tensorstore/driver/zarr3/dtype.h b/tensorstore/driver/zarr3/dtype.h
new file mode 100644
index 000000000..430dd8849
--- /dev/null
+++ b/tensorstore/driver/zarr3/dtype.h
@@ -0,0 +1,144 @@
+// Copyright 2020 The TensorStore Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TENSORSTORE_DRIVER_ZARR3_DTYPE_H_
+#define TENSORSTORE_DRIVER_ZARR3_DTYPE_H_
+
+/// \file
+/// Support for encoding/decoding zarr "dtype" specifications.
+/// See: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#data-type
+
+#include <optional>
+#include <nlohmann/json.hpp>
+#include "tensorstore/data_type.h"
+#include "tensorstore/internal/json_binding/bindable.h"
+#include "tensorstore/util/endian.h"
+#include "tensorstore/util/result.h"
+
+namespace tensorstore {
+namespace internal_zarr3 {
+
+/// Decoded representation of a zarr "dtype" specification.
+///
+/// A zarr "dtype" is a JSON value that is either:
+///
+/// 1. A string, which specifies a single data type (e.g. "int32").
+///    In this case, the zarr array is considered to have a single, unnamed field.
+///
+/// 2. An array, where each element of the array is of the form:
+///    `[name, type]` or `[name, type, shape]`, where `name` is a JSON
+///    string specifying the unique, non-empty field name, `type` is a data type
+///    string, and `shape` is an optional "inner" array shape (specified
+///    as a JSON array of non-negative integers) which defaults to the rank-0
+///    shape `[]` if not specified.
+///
+/// Each field is encoded according to `type` into a fixed-size sequence of
+/// bytes.  If the optional "inner" array `shape` is specified, the individual
+/// elements are encoded in C order.  The encoding of each multi-field array
+/// element is simply the concatenation of the encodings of each field.
+struct ZarrDType {
+  /// Decoded representation of single value.
+  struct BaseDType {
+    /// Data type string.
+    std::string encoded_dtype;
+
+    /// Corresponding DataType used for in-memory representation.
+    DataType dtype;
+
+    /// For "flexible" data types that are themselves arrays, this specifies the
+    /// shape.  For regular data types, this is empty.
+    std::vector<Index> flexible_shape;
+  };
+
+  /// Decoded representation of a single field.
+  struct Field : public BaseDType {
+    /// Optional `shape` dimensions specified by a zarr "dtype" field specified
+    /// as a JSON array.  If the zarr dtype was specified as a single `typestr`
+    /// value, or as a two-element array, this is empty.
+    std::vector<Index> outer_shape;
+
+    /// Field name.  Must be non-empty and unique if the zarr "dtype" was
+    /// specified as an array.  Otherwise, is empty.
+    std::string name;
+
+    /// The inner array dimensions of this field, equal to the concatenation of
+    ///  `outer_shape` and `flexible_shape` (derived value).
+    std::vector<Index> field_shape;
+
+    /// Product of `field_shape` dimensions (derived value).
+    Index num_inner_elements;
+
+    /// Byte offset of this field within an "outer" element (derived value).
+    Index byte_offset;
+
+    /// Number of bytes occupied by this field within an "outer" element
+    /// (derived value).
+    Index num_bytes;
+  };
+
+  /// Equal to `true` if the zarr "dtype" was specified as an array, in which
+  /// case all fields must have a unique, non-empty `name`.  If `false`, there
+  /// must be a single field with an empty `name`.
+  bool has_fields;
+
+  /// Decoded representation of the fields.
+  std::vector<Field> fields;
+
+  /// Bytes per "outer" element (derived value).
+  Index bytes_per_outer_element;
+
+  TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(ZarrDType,
+                                          internal_json_binding::NoOptions)
+
+  friend void to_json(::nlohmann::json& out,  // NOLINT
+                      const ZarrDType& dtype);
+};
+
+bool operator==(const ZarrDType::BaseDType& a,
+                const ZarrDType::BaseDType& b);
+bool operator!=(const ZarrDType::BaseDType& a,
+                const ZarrDType::BaseDType& b);
+bool operator==(const ZarrDType::Field& a, const ZarrDType::Field& b);
+bool operator!=(const ZarrDType::Field& a, const ZarrDType::Field& b);
+bool operator==(const ZarrDType& a, const ZarrDType& b);
+bool operator!=(const ZarrDType& a, const ZarrDType& b);
+
+/// Parses a zarr metadata "dtype" JSON specification.
+///
+/// \error `absl::StatusCode::kInvalidArgument` if `value` is not valid.
+Result<ZarrDType> ParseDType(const ::nlohmann::json& value);
+
+/// Validates `dtype and computes derived values.
+///
+/// \error `absl::StatusCode::kInvalidArgument` if two fields have the same
+///     name.
+/// \error `absl::StatusCode::kInvalidArgument` if the field size is too large.
+absl::Status ValidateDType(ZarrDType& dtype);
+
+/// Returns the underlying TensorStore `DataType` if `dtype` represents an
+/// unstructured scalar array, otherwise `std::nullopt`.
+std::optional<DataType> GetScalarDataType(const ZarrDType& dtype);
+
+  /// Parses a Zarr 3 data type string.
+  ///
+  /// \error `absl::StatusCode::kInvalidArgument` if `dtype` is not valid.
+  Result<ZarrDType::BaseDType> ParseBaseDType(std::string_view dtype);
+
+  /// Chooses a zarr data type corresponding to `dtype`.
+  Result<ZarrDType::BaseDType> ChooseBaseDType(DataType dtype);
+
+}  // namespace internal_zarr3
+}  // namespace tensorstore
+
+#endif  // TENSORSTORE_DRIVER_ZARR3_DTYPE_H_
diff --git a/tensorstore/driver/zarr3/dtype_test.cc b/tensorstore/driver/zarr3/dtype_test.cc
new file mode 100644
index 000000000..cbb7acbfb
--- /dev/null
+++ b/tensorstore/driver/zarr3/dtype_test.cc
@@ -0,0 +1,293 @@
+// Copyright 2023 The TensorStore Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tensorstore/driver/zarr3/dtype.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cstddef>  // for std::byte
+#include <string>
+#include <vector>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "absl/status/status.h"
+#include <nlohmann/json.hpp>
+#include "tensorstore/data_type.h"
+#include "tensorstore/index.h"
+#include "tensorstore/internal/testing/json_gtest.h"
+#include "tensorstore/util/status_testutil.h"
+#include "tensorstore/util/str_cat.h"
+
+namespace {
+
+using ::tensorstore::DataType;
+using ::tensorstore::dtype_v;
+using ::tensorstore::Index;
+using ::tensorstore::kInfIndex;
+using ::tensorstore::StatusIs;
+using ::tensorstore::internal_zarr3::ChooseBaseDType;
+using ::tensorstore::internal_zarr3::ParseBaseDType;
+using ::tensorstore::internal_zarr3::ParseDType;
+using ::tensorstore::internal_zarr3::ZarrDType;
+using ::testing::HasSubstr;
+using ::testing::MatchesRegex;
+
+void CheckBaseDType(std::string dtype, DataType r,
+                    std::vector<Index> flexible_shape) {
+  EXPECT_THAT(ParseBaseDType(dtype), ::testing::Optional(ZarrDType::BaseDType{
+                                         dtype, r, flexible_shape}))
+      << dtype;
+}
+
+TEST(ParseBaseDType, Success) {
+  CheckBaseDType("bool", dtype_v<bool>, {});
+  CheckBaseDType("int8", dtype_v<int8_t>, {});
+  CheckBaseDType("uint8", dtype_v<uint8_t>, {});
+  CheckBaseDType("int16", dtype_v<int16_t>, {});
+  CheckBaseDType("uint16", dtype_v<uint16_t>, {});
+  CheckBaseDType("int32", dtype_v<int32_t>, {});
+  CheckBaseDType("uint32", dtype_v<uint32_t>, {});
+  CheckBaseDType("int64", dtype_v<int64_t>, {});
+  CheckBaseDType("uint64", dtype_v<uint64_t>, {});
+  CheckBaseDType("float16", dtype_v<tensorstore::dtypes::float16_t>, {});
+  CheckBaseDType("bfloat16", dtype_v<tensorstore::dtypes::bfloat16_t>, {});
+  CheckBaseDType("float32", dtype_v<tensorstore::dtypes::float32_t>, {});
+  CheckBaseDType("float64", dtype_v<tensorstore::dtypes::float64_t>, {});
+  CheckBaseDType("complex64", dtype_v<tensorstore::dtypes::complex64_t>, {});
+  CheckBaseDType("complex128", dtype_v<tensorstore::dtypes::complex128_t>, {});
+}
+
+TEST(ParseBaseDType, Failure) {
+  EXPECT_THAT(
+      ParseBaseDType(""),
+      StatusIs(absl::StatusCode::kInvalidArgument,
+               HasSubstr("data type is not one of the supported data types")));
+  EXPECT_THAT(ParseBaseDType("float"),
+              StatusIs(absl::StatusCode::kInvalidArgument));
+  EXPECT_THAT(ParseBaseDType("string"),
+              StatusIs(absl::StatusCode::kInvalidArgument));
+  EXPECT_THAT(ParseBaseDType("<i4"),
+              StatusIs(absl::StatusCode::kInvalidArgument));
+}
+
+void CheckDType(const ::nlohmann::json& json, const ZarrDType& expected) {
+  SCOPED_TRACE(json.dump());
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto dtype, ParseDType(json));
+  EXPECT_EQ(expected, dtype);
+  // Check round trip.
+  EXPECT_EQ(json, ::nlohmann::json(dtype));
+}
+
+TEST(ParseDType, SimpleStringBool) {
+  CheckDType("bool", ZarrDType{
+                         /*.has_fields=*/false,
+                         /*.fields=*/
+                         {
+                             {{
+                                  /*.encoded_dtype=*/"bool",
+                                  /*.dtype=*/dtype_v<bool>,
+                                  /*.flexible_shape=*/{},
+                              },
+                              /*.outer_shape=*/{},
+                              /*.name=*/"",
+                              /*.field_shape=*/{},
+                              /*.num_inner_elements=*/1,
+                              /*.byte_offset=*/0,
+                              /*.num_bytes=*/1},
+                         },
+                         /*.bytes_per_outer_element=*/1,
+                     });
+}
+
+TEST(ParseDType, SingleNamedFieldChar) {
+  // Zarr 3 doesn't support fixed size strings natively in core, so we use uint8 for testing bytes
+  CheckDType(::nlohmann::json::array_t{{"x", "uint8"}},
+             ZarrDType{
+                 /*.has_fields=*/true,
+                 /*.fields=*/
+                 {
+                     {{
+                          /*.encoded_dtype=*/"uint8",
+                          /*.dtype=*/dtype_v<uint8_t>,
+                          /*.flexible_shape=*/{},
+                      },
+                      /*.outer_shape=*/{},
+                      /*.name=*/"x",
+                      /*.field_shape=*/{},
+                      /*.num_inner_elements=*/1,
+                      /*.byte_offset=*/0,
+                      /*.num_bytes=*/1},
+                 },
+                 /*.bytes_per_outer_element=*/1,
+             });
+}
+
+TEST(ParseDType, TwoNamedFields) {
+  CheckDType(
+      ::nlohmann::json::array_t{{"x", "int8", {2, 3}}, {"y", "int16", {5}}},
+      ZarrDType{
+          /*.has_fields=*/true,
+          /*.fields=*/
+          {
+              {{
+                   /*.encoded_dtype=*/"int8",
+                   /*.dtype=*/dtype_v<int8_t>,
+                   /*.flexible_shape=*/{},
+               },
+               /*.outer_shape=*/{2, 3},
+               /*.name=*/"x",
+               /*.field_shape=*/{2, 3},
+               /*.num_inner_elements=*/2 * 3,
+               /*.byte_offset=*/0,
+               /*.num_bytes=*/1 * 2 * 3},
+              {{
+                   /*.encoded_dtype=*/"int16",
+                   /*.dtype=*/dtype_v<int16_t>,
+                   /*.flexible_shape=*/{},
+               },
+               /*.outer_shape=*/{5},
+               /*.name=*/"y",
+               /*.field_shape=*/{5},
+               /*.num_inner_elements=*/5,
+               /*.byte_offset=*/1 * 2 * 3,
+               /*.num_bytes=*/2 * 5},
+          },
+          /*.bytes_per_outer_element=*/1 * 2 * 3 + 2 * 5,
+      });
+}
+
+TEST(ParseDType, FieldSpecTooShort) {
+  EXPECT_THAT(
+      ParseDType(::nlohmann::json::array_t{{"x"}}),
+      StatusIs(
+          absl::StatusCode::kInvalidArgument,
+          HasSubstr("Error parsing value at position 0: "
+                    "Expected array of size 2 or 3, but received: [\"x\"]")));
+}
+
+TEST(ParseDType, FieldSpecTooLong) {
+  EXPECT_THAT(
+      ParseDType(::nlohmann::json::array_t{{"x", "int16", {2, 3}, 5}}),
+      StatusIs(
+          absl::StatusCode::kInvalidArgument,
+          HasSubstr("Error parsing value at position 0: "
+                    "Expected array of size 2 or 3, but received: "
+                    "[\"x\",\"int16\",[2,3],5]")));
+}
+
+TEST(ParseDType, InvalidFieldName) {
+  EXPECT_THAT(
+      ParseDType(::nlohmann::json::array_t{{3, "int16"}}),
+      StatusIs(absl::StatusCode::kInvalidArgument,
+               HasSubstr("Error parsing value at position 0: "
+                         "Error parsing value at position 0: "
+                         "Expected non-empty string, but received: 3")));
+}
+
+TEST(ParseDType, EmptyFieldName) {
+  EXPECT_THAT(
+      ParseDType(::nlohmann::json::array_t{{"", "int16"}}),
+      StatusIs(absl::StatusCode::kInvalidArgument,
+               HasSubstr("Error parsing value at position 0: "
+                         "Error parsing value at position 0: "
+                         "Expected non-empty string, but received: \"\"")));
+}
+
+TEST(ParseDType, DuplicateFieldName) {
+  EXPECT_THAT(
+      ParseDType(::nlohmann::json::array_t{{"x", "int16"}, {"x", "uint16"}}),
+      StatusIs(absl::StatusCode::kInvalidArgument,
+               HasSubstr("Field name \"x\" occurs more than once")));
+}
+
+TEST(ParseDType, NonStringFieldBaseDType) {
+  EXPECT_THAT(ParseDType(::nlohmann::json::array_t{{"x", 3}}),
+              StatusIs(absl::StatusCode::kInvalidArgument,
+                       HasSubstr("Error parsing value at position 0: "
+                                 "Error parsing value at position 1: "
+                                 "Expected string, but received: 3")));
+}
+
+TEST(ParseDType, InvalidFieldBaseDType) {
+  EXPECT_THAT(ParseDType(::nlohmann::json::array_t{{"x", "unknown"}}),
+              StatusIs(absl::StatusCode::kInvalidArgument,
+                       HasSubstr("Error parsing value at position 0: "
+                                 "Error parsing value at position 1: "
+                                 "unknown data type is not one of the "
+                                 "supported data types")));
+}
+
+TEST(ParseDType, ProductOfDimensionsOverflow) {
+  EXPECT_THAT(
+      ParseDType(
+          ::nlohmann::json::array_t{{"x", "int8", {kInfIndex, kInfIndex}}}),
+      StatusIs(absl::StatusCode::kInvalidArgument,
+               MatchesRegex(".*Product of dimensions .* is too large.*")));
+}
+
+TEST(ParseDType, FieldSizeInBytesOverflow) {
+  EXPECT_THAT(
+      ParseDType(::nlohmann::json::array_t{{"x", "float64", {kInfIndex}}}),
+      StatusIs(absl::StatusCode::kInvalidArgument,
+               HasSubstr("Field size in bytes is too large")));
+}
+
+TEST(ParseDType, BytesPerOuterElementOverflow) {
+  EXPECT_THAT(
+      ParseDType(::nlohmann::json::array_t{{"x", "int16", {kInfIndex}},
+                                           {"y", "int16", {kInfIndex}}}),
+      StatusIs(
+          absl::StatusCode::kInvalidArgument,
+          HasSubstr(
+              "Total number of bytes per outer array element is too large")));
+}
+
+TEST(ChooseBaseDTypeTest, RoundTrip) {
+  constexpr tensorstore::DataType kSupportedDataTypes[] = {
+      dtype_v<bool>, dtype_v<uint8_t>, dtype_v<uint16_t>, dtype_v<uint32_t>,
+      dtype_v<uint64_t>, dtype_v<int8_t>, dtype_v<int16_t>,
+      dtype_v<int32_t>,  dtype_v<int64_t>,
+      dtype_v<tensorstore::dtypes::bfloat16_t>,
+      dtype_v<tensorstore::dtypes::float16_t>,
+      dtype_v<tensorstore::dtypes::float32_t>,
+      dtype_v<tensorstore::dtypes::float64_t>,
+      dtype_v<tensorstore::dtypes::complex64_t>,
+      dtype_v<tensorstore::dtypes::complex128_t>,
+  };
+  for (auto dtype : kSupportedDataTypes) {
+    SCOPED_TRACE(tensorstore::StrCat("dtype=", dtype));
+    TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto base_zarr_dtype,
+                                     ChooseBaseDType(dtype));
+    EXPECT_EQ(dtype, base_zarr_dtype.dtype);
+    TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+        auto parsed, ParseBaseDType(base_zarr_dtype.encoded_dtype));
+    EXPECT_EQ(dtype, parsed.dtype);
+    EXPECT_EQ(base_zarr_dtype.flexible_shape, parsed.flexible_shape);
+    EXPECT_EQ(base_zarr_dtype.encoded_dtype, parsed.encoded_dtype);
+  }
+}
+
+TEST(ChooseBaseDTypeTest, Invalid) {
+  struct X {};
+  EXPECT_THAT(ChooseBaseDType(dtype_v<X>),
+              StatusIs(absl::StatusCode::kInvalidArgument,
+                       HasSubstr("Data type not supported")));
+  EXPECT_THAT(ChooseBaseDType(dtype_v<::tensorstore::dtypes::string_t>),
+              StatusIs(absl::StatusCode::kInvalidArgument,
+                       HasSubstr("Data type not supported: string")));
+}
+
+}  // namespace
diff --git a/tensorstore/driver/zarr3/metadata.cc b/tensorstore/driver/zarr3/metadata.cc
index 528d373ae..c96c31426 100644
--- a/tensorstore/driver/zarr3/metadata.cc
+++ b/tensorstore/driver/zarr3/metadata.cc
@@ -50,6 +50,7 @@
 #include "tensorstore/driver/zarr3/codec/codec_spec.h"
 #include "tensorstore/driver/zarr3/codec/sharding_indexed.h"
 #include "tensorstore/driver/zarr3/default_nan.h"
+#include "tensorstore/driver/zarr3/dtype.h"
 #include "tensorstore/driver/zarr3/name_configuration_json_binder.h"
 #include "tensorstore/index.h"
 #include "tensorstore/index_space/dimension_units.h"
@@ -252,24 +253,110 @@ constexpr std::array<FillValueDataTypeFunctions, kNumDataTypeIds>
 
 }  // namespace
 
-absl::Status FillValueJsonBinder::operator()(std::true_type is_loading,
-                                             internal_json_binding::NoOptions,
-                                             SharedArray<const void>* obj,
-                                             ::nlohmann::json* j) const {
+FillValueJsonBinder::FillValueJsonBinder(ZarrDType dtype,
+                                         bool allow_missing_dtype)
+    : dtype(std::move(dtype)), allow_missing_dtype(allow_missing_dtype) {}
+
+FillValueJsonBinder::FillValueJsonBinder(DataType data_type,
+                                         bool allow_missing_dtype)
+    : allow_missing_dtype(allow_missing_dtype) {
+  dtype.has_fields = false;
+  dtype.fields.resize(1);
+  auto& field = dtype.fields[0];
+  field.name.clear();
+  field.outer_shape.clear();
+  field.flexible_shape.clear();
+  field.field_shape.clear();
+  field.num_inner_elements = 1;
+  field.byte_offset = 0;
+  field.num_bytes = data_type->size;
+  field.dtype = data_type;
+  field.encoded_dtype = std::string(data_type.name());
+}
+
+absl::Status FillValueJsonBinder::operator()(
+    std::true_type is_loading, internal_json_binding::NoOptions,
+    std::vector<SharedArray<const void>>* obj, ::nlohmann::json* j) const {
+  obj->resize(dtype.fields.size());
+  if (dtype.fields.size() == 1) {
+    TENSORSTORE_RETURN_IF_ERROR(
+        DecodeSingle(*j, dtype.fields[0].dtype, (*obj)[0]));
+  } else {
+    if (!j->is_array()) {
+      return internal_json::ExpectedError(*j, "array");
+    }
+    if (j->size() != dtype.fields.size()) {
+      return internal_json::ExpectedError(
+          *j, tensorstore::StrCat("array of size ", dtype.fields.size()));
+    }
+    for (size_t i = 0; i < dtype.fields.size(); ++i) {
+      TENSORSTORE_RETURN_IF_ERROR(
+          DecodeSingle((*j)[i], dtype.fields[i].dtype, (*obj)[i]));
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status FillValueJsonBinder::operator()(
+    std::false_type is_loading, internal_json_binding::NoOptions,
+    const std::vector<SharedArray<const void>>* obj,
+    ::nlohmann::json* j) const {
+  if (dtype.fields.size() == 1) {
+    return EncodeSingle((*obj)[0], dtype.fields[0].dtype, *j);
+  }
+  // Structured fill value
+  *j = ::nlohmann::json::array();
+  for (size_t i = 0; i < dtype.fields.size(); ++i) {
+    ::nlohmann::json item;
+    TENSORSTORE_RETURN_IF_ERROR(
+        EncodeSingle((*obj)[i], dtype.fields[i].dtype, item));
+    j->push_back(std::move(item));
+  }
+  return absl::OkStatus();
+}
+
+absl::Status FillValueJsonBinder::DecodeSingle(::nlohmann::json& j,
+                                               DataType data_type,
+                                               SharedArray<const void>& out) const {
+  if (!data_type.valid()) {
+    if (allow_missing_dtype) {
+      out = SharedArray<const void>();
+      return absl::OkStatus();
+    }
+    return absl::InvalidArgumentError(
+        "data_type must be specified before fill_value");
+  }
   auto arr =
       AllocateArray(span<const Index, 0>{}, c_order, default_init, data_type);
   void* data = arr.data();
-  *obj = std::move(arr);
-  return kFillValueDataTypeFunctions[static_cast<size_t>(data_type.id())]
-      .decode(data, *j);
+  out = std::move(arr);
+  const auto& functions =
+      kFillValueDataTypeFunctions[static_cast<size_t>(data_type.id())];
+  if (!functions.decode) {
+    if (allow_missing_dtype) {
+      out = SharedArray<const void>();
+      return absl::OkStatus();
+    }
+    return absl::FailedPreconditionError(
+        "fill_value unsupported for specified data_type");
+  }
+  return functions.decode(data, j);
 }
 
-absl::Status FillValueJsonBinder::operator()(std::false_type is_loading,
-                                             internal_json_binding::NoOptions,
-                                             const SharedArray<const void>* obj,
-                                             ::nlohmann::json* j) const {
-  return kFillValueDataTypeFunctions[static_cast<size_t>(data_type.id())]
-      .encode(obj->data(), *j);
+absl::Status FillValueJsonBinder::EncodeSingle(
+    const SharedArray<const void>& arr, DataType data_type,
+    ::nlohmann::json& j) const {
+  if (!data_type.valid()) {
+    return absl::InvalidArgumentError(
+        "data_type must be specified before fill_value");
+  }
+  const auto& functions =
+      kFillValueDataTypeFunctions[static_cast<size_t>(data_type.id())];
+  if (!functions.encode) {
+    return absl::FailedPreconditionError(
+        "fill_value unsupported for specified data_type");
+  }
+  return functions.encode(arr.data(), j);
 }
 
 TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(ChunkKeyEncoding, [](auto is_loading,
@@ -357,7 +444,7 @@ constexpr auto MetadataJsonBinder = [] {
       rank = &obj->rank;
     }
 
-    auto ensure_data_type = [&]() -> Result<DataType> {
+    auto ensure_data_type = [&]() -> Result<ZarrDType> {
       if constexpr (std::is_same_v<Self, ZarrMetadata>) {
         return obj->data_type;
       }
@@ -378,19 +465,18 @@ constexpr auto MetadataJsonBinder = [] {
         maybe_optional_member("node_type",
                               jb::Constant([] { return "array"; })),
         jb::Member("data_type",
-                   jb::Projection<&Self::data_type>(maybe_optional(jb::Validate(
-                       [](const auto& options, auto* obj) {
-                         return ValidateDataType(*obj);
-                       },
-                       jb::DataTypeJsonBinder)))),
+                   jb::Projection<&Self::data_type>(maybe_optional(
+                       jb::DefaultBinder<>))),
         jb::Member(
             "fill_value",
             jb::Projection<&Self::fill_value>(maybe_optional(
                 [&](auto is_loading, const auto& options, auto* obj, auto* j) {
                   TENSORSTORE_ASSIGN_OR_RETURN(auto data_type,
                                                ensure_data_type());
-                  return FillValueJsonBinder{data_type}(is_loading, options,
-                                                        obj, j);
+                  constexpr bool allow_missing_dtype =
+                      std::is_same_v<Self, ZarrMetadata>;
+                  return FillValueJsonBinder{data_type, allow_missing_dtype}(
+                      is_loading, options, obj, j);
                 }))),
         non_compatibility_field(
             jb::Member("shape", jb::Projection<&Self::shape>(
@@ -477,9 +563,28 @@ std::string ZarrMetadata::GetCompatibilityKey() const {
 absl::Status ValidateMetadata(ZarrMetadata& metadata) {
   if (!metadata.codecs) {
     ArrayCodecResolveParameters decoded;
-    decoded.dtype = metadata.data_type;
+    if (metadata.data_type.fields.size() == 1 &&
+        metadata.data_type.fields[0].outer_shape.empty()) {
+      decoded.dtype = metadata.data_type.fields[0].dtype;
+    } else {
+      decoded.dtype = dtype_v<std::byte>;
+      // TODO: Verify this works for structured types.
+      // Zarr2 uses a "scalar" array concept with byte storage for chunks.
+    }
     decoded.rank = metadata.rank;
-    decoded.fill_value = metadata.fill_value;
+    // Fill value for codec resolve might be complex.
+    // Zarr3 codecs usually don't depend on fill value except for some like
+    // "sharding_indexed"? Sharding uses fill_value for missing chunks.
+    if (metadata.fill_value.size() == 1) {
+      decoded.fill_value = metadata.fill_value[0];
+    } else {
+      // How to represent structured fill value for codec?
+      // Sharding expects a single array.
+      // If we use structured type, the "array" is bytes.
+      // We might need to encode the fill value to bytes.
+      // For now, leave empty if multiple fields.
+    }
+
     BytesCodecResolveParameters encoded;
     TENSORSTORE_ASSIGN_OR_RETURN(
         metadata.codecs,
@@ -488,7 +593,14 @@ absl::Status ValidateMetadata(ZarrMetadata& metadata) {
 
   // Get codec chunk layout info.
   ArrayDataTypeAndShapeInfo array_info;
-  array_info.dtype = metadata.data_type;
+  // array_info.dtype used here to validate codec compatibility.
+  if (metadata.data_type.fields.size() == 1 &&
+      metadata.data_type.fields[0].outer_shape.empty()) {
+    array_info.dtype = metadata.data_type.fields[0].dtype;
+  } else {
+    array_info.dtype = dtype_v<std::byte>;
+  }
+
   array_info.rank = metadata.rank;
   std::copy_n(metadata.chunk_shape.begin(), metadata.rank,
               array_info.shape.emplace().begin());
@@ -512,17 +624,34 @@ absl::Status ValidateMetadata(ZarrMetadata& metadata) {
 absl::Status ValidateMetadata(const ZarrMetadata& metadata,
                               const ZarrMetadataConstraints& constraints) {
   using internal::MetadataMismatchError;
-  if (constraints.data_type && *constraints.data_type != metadata.data_type) {
-    return MetadataMismatchError("data_type", constraints.data_type->name(),
-                                 metadata.data_type.name());
-  }
-  if (constraints.fill_value &&
-      !AreArraysIdenticallyEqual(*constraints.fill_value,
-                                 metadata.fill_value)) {
-    auto binder = FillValueJsonBinder{metadata.data_type};
-    auto constraint_json = jb::ToJson(*constraints.fill_value, binder).value();
-    auto metadata_json = jb::ToJson(metadata.fill_value, binder).value();
-    return MetadataMismatchError("fill_value", constraint_json, metadata_json);
+  if (constraints.data_type) {
+    // Compare ZarrDType
+    if (::nlohmann::json(*constraints.data_type) !=
+        ::nlohmann::json(metadata.data_type)) {
+      return MetadataMismatchError(
+          "data_type", ::nlohmann::json(*constraints.data_type).dump(),
+          ::nlohmann::json(metadata.data_type).dump());
+    }
+  }
+  if (constraints.fill_value) {
+    // Compare vector of arrays
+    if (constraints.fill_value->size() != metadata.fill_value.size()) {
+      return MetadataMismatchError("fill_value size",
+                                   constraints.fill_value->size(),
+                                   metadata.fill_value.size());
+    }
+    for (size_t i = 0; i < metadata.fill_value.size(); ++i) {
+      if (!AreArraysIdenticallyEqual((*constraints.fill_value)[i],
+                                     metadata.fill_value[i])) {
+        auto binder = FillValueJsonBinder{metadata.data_type};
+        auto constraint_json =
+            jb::ToJson(*constraints.fill_value, binder).value();
+        auto metadata_json =
+            jb::ToJson(metadata.fill_value, binder).value();
+        return MetadataMismatchError("fill_value", constraint_json,
+                                     metadata_json);
+      }
+    }
   }
   if (constraints.shape && *constraints.shape != metadata.shape) {
     return MetadataMismatchError("shape", *constraints.shape, metadata.shape);
@@ -574,23 +703,64 @@ absl::Status ValidateMetadata(const ZarrMetadata& metadata,
       metadata.unknown_extension_attributes);
 }
 
+namespace {
+std::string GetFieldNames(const ZarrDType& dtype) {
+  std::vector<std::string> field_names;
+  for (const auto& field : dtype.fields) {
+    field_names.push_back(field.name);
+  }
+  return ::nlohmann::json(field_names).dump();
+}
+}  // namespace
+
+Result<size_t> GetFieldIndex(const ZarrDType& dtype,
+                             std::string_view selected_field) {
+  if (selected_field.empty()) {
+    if (dtype.fields.size() != 1) {
+      return absl::FailedPreconditionError(tensorstore::StrCat(
+          "Must specify a \"field\" that is one of: ", GetFieldNames(dtype)));
+    }
+    return 0;
+  }
+  if (!dtype.has_fields) {
+    return absl::FailedPreconditionError(
+        tensorstore::StrCat("Requested field ", QuoteString(selected_field),
+                            " but dtype does not have named fields"));
+  }
+  for (size_t field_index = 0; field_index < dtype.fields.size();
+       ++field_index) {
+    if (dtype.fields[field_index].name == selected_field) return field_index;
+  }
+  return absl::FailedPreconditionError(
+      tensorstore::StrCat("Requested field ", QuoteString(selected_field),
+                          " is not one of: ", GetFieldNames(dtype)));
+}
+
+SpecRankAndFieldInfo GetSpecRankAndFieldInfo(const ZarrMetadata& metadata,
+                                             size_t field_index) {
+  SpecRankAndFieldInfo info;
+  info.chunked_rank = metadata.rank;
+  info.field = &metadata.data_type.fields[field_index];
+  return info;
+}
+
 Result<IndexDomain<>> GetEffectiveDomain(
-    DimensionIndex rank, std::optional<span<const Index>> shape,
+    const SpecRankAndFieldInfo& info,
+    std::optional<tensorstore::span<const Index>> metadata_shape,
     std::optional<span<const std::optional<std::string>>> dimension_names,
-    const Schema& schema, bool* dimension_names_used = nullptr) {
+    const Schema& schema, bool* dimension_names_used) {
+  const DimensionIndex rank = info.chunked_rank;
   if (dimension_names_used) *dimension_names_used = false;
   auto domain = schema.domain();
-  if (!shape && !dimension_names && !domain.valid()) {
+  if (!metadata_shape && !dimension_names && !domain.valid()) {
     if (schema.rank() == 0) return {std::in_place, 0};
-    // No information about the domain available.
     return {std::in_place};
   }
 
-  // Rank is already validated by caller.
   assert(RankConstraint::EqualOrUnspecified(schema.rank(), rank));
   IndexDomainBuilder builder(std::max(schema.rank().rank, rank));
-  if (shape) {
-    builder.shape(*shape);
+  if (metadata_shape) {
+    builder.shape(*metadata_shape);
     builder.implicit_upper_bounds(true);
   } else {
     builder.origin(GetConstantVector<Index, 0>(builder.rank()));
@@ -602,12 +772,12 @@ Result<IndexDomain<>> GetEffectiveDomain(
         normalized_dimension_names[i] = *name;
       }
     }
-    // Use dimension_names as labels if they are valid.
-    if (internal::ValidateDimensionLabelsAreUnique(normalized_dimension_names)
+    if (internal::ValidateDimensionLabelsAreUnique(
+            span<const std::string_view>(&normalized_dimension_names[0], rank))
             .ok()) {
-      if (dimension_names_used) *dimension_names_used = true;
       builder.labels(
           span<const std::string_view>(&normalized_dimension_names[0], rank));
+      if (dimension_names_used) *dimension_names_used = true;
     }
   }
 
@@ -618,36 +788,53 @@ Result<IndexDomain<>> GetEffectiveDomain(
           tensorstore::MaybeAnnotateStatus(
               _, "Mismatch between metadata and schema")));
   return WithImplicitDimensions(domain, false, true);
-  return domain;
 }
 
 Result<IndexDomain<>> GetEffectiveDomain(
     const ZarrMetadataConstraints& metadata_constraints, const Schema& schema,
     bool* dimension_names_used) {
-  return GetEffectiveDomain(
-      metadata_constraints.rank, metadata_constraints.shape,
-      metadata_constraints.dimension_names, schema, dimension_names_used);
+  SpecRankAndFieldInfo info;
+  info.chunked_rank = metadata_constraints.rank;
+  if (info.chunked_rank == dynamic_rank && metadata_constraints.shape) {
+    info.chunked_rank = metadata_constraints.shape->size();
+  }
+
+  std::optional<span<const Index>> shape_span;
+  if (metadata_constraints.shape) {
+    shape_span.emplace(metadata_constraints.shape->data(),
+                       metadata_constraints.shape->size());
+  }
+  std::optional<span<const std::optional<std::string>>> names_span;
+  if (metadata_constraints.dimension_names) {
+    names_span.emplace(metadata_constraints.dimension_names->data(),
+                       metadata_constraints.dimension_names->size());
+  }
+
+  return GetEffectiveDomain(info, shape_span, names_span, schema,
+                            dimension_names_used);
 }
 
 absl::Status SetChunkLayoutFromMetadata(
-    DataType dtype, DimensionIndex rank,
+    const SpecRankAndFieldInfo& info,
     std::optional<span<const Index>> chunk_shape,
     const ZarrCodecChainSpec* codecs, ChunkLayout& chunk_layout) {
-  TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set(RankConstraint{rank}));
-  rank = chunk_layout.rank();
-  if (rank == dynamic_rank) return absl::OkStatus();
+  const DimensionIndex rank = info.chunked_rank;
+  if (rank == dynamic_rank) {
+    return absl::OkStatus();
+  }
+  TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set(RankConstraint(rank)));
+  TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set(
+      ChunkLayout::GridOrigin(GetConstantVector<Index, 0>(rank))));
 
   if (chunk_shape) {
     assert(chunk_shape->size() == rank);
     TENSORSTORE_RETURN_IF_ERROR(
         chunk_layout.Set(ChunkLayout::WriteChunkShape(*chunk_shape)));
   }
-  TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set(
-      ChunkLayout::GridOrigin(GetConstantVector<Index, 0>(rank))));
 
   if (codecs) {
     ArrayDataTypeAndShapeInfo array_info;
-    array_info.dtype = dtype;
+    array_info.dtype = info.field ? info.field->dtype : dtype_v<std::byte>;
     array_info.rank = rank;
     if (chunk_shape) {
       std::copy_n(chunk_shape->begin(), rank,
@@ -669,30 +856,47 @@ absl::Status SetChunkLayoutFromMetadata(
           span<const Index>(layout_info.codec_chunk_shape->data(), rank))));
     }
   }
+
   return absl::OkStatus();
 }
 
-Result<ChunkLayout> GetEffectiveChunkLayout(
+absl::Status SetChunkLayoutFromMetadata(
     DataType dtype, DimensionIndex rank,
     std::optional<span<const Index>> chunk_shape,
-    const ZarrCodecChainSpec* codecs, const Schema& schema) {
-  auto chunk_layout = schema.chunk_layout();
-  TENSORSTORE_RETURN_IF_ERROR(SetChunkLayoutFromMetadata(
-      dtype, rank, chunk_shape, codecs, chunk_layout));
-  return chunk_layout;
+    const ZarrCodecChainSpec* codecs, ChunkLayout& chunk_layout) {
+  SpecRankAndFieldInfo info;
+  info.chunked_rank = rank;
+  info.field = nullptr;
+  return SetChunkLayoutFromMetadata(info, chunk_shape, codecs, chunk_layout);
 }
 
 Result<ChunkLayout> GetEffectiveChunkLayout(
     const ZarrMetadataConstraints& metadata_constraints, const Schema& schema) {
-  assert(RankConstraint::EqualOrUnspecified(metadata_constraints.rank,
-                                            schema.rank()));
-  return GetEffectiveChunkLayout(
-      metadata_constraints.data_type.value_or(DataType{}),
-      std::max(metadata_constraints.rank, schema.rank().rank),
-      metadata_constraints.chunk_shape,
+  // Approximation: assume whole array access or simple array
+  SpecRankAndFieldInfo info;
+  info.chunked_rank = std::max(metadata_constraints.rank, schema.rank().rank);
+  if (info.chunked_rank == dynamic_rank && metadata_constraints.shape) {
+    info.chunked_rank = metadata_constraints.shape->size();
+  }
+  if (info.chunked_rank == dynamic_rank && metadata_constraints.chunk_shape) {
+    info.chunked_rank = metadata_constraints.chunk_shape->size();
+  }
+  // We can't easily know field info from constraints unless we parse data_type.
+  // If data_type is present and has 1 field, we can check it.
+  // For now, basic implementation.
+
+  ChunkLayout chunk_layout = schema.chunk_layout();
+  std::optional<span<const Index>> chunk_shape_span;
+  if (metadata_constraints.chunk_shape) {
+    chunk_shape_span.emplace(metadata_constraints.chunk_shape->data(),
+                             metadata_constraints.chunk_shape->size());
+  }
+  TENSORSTORE_RETURN_IF_ERROR(SetChunkLayoutFromMetadata(
+      info, chunk_shape_span,
       metadata_constraints.codec_specs ? &*metadata_constraints.codec_specs
                                        : nullptr,
-      schema);
+      chunk_layout));
+  return chunk_layout;
 }
 
 Result<DimensionUnitsVector> GetDimensionUnits(
@@ -732,53 +936,63 @@ CodecSpec GetCodecFromMetadata(const ZarrMetadata& metadata) {
 }
 
 absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
-                                    const Schema& schema) {
-  if (!RankConstraint::EqualOrUnspecified(metadata.rank, schema.rank())) {
+                                    size_t field_index, const Schema& schema) {
+  auto info = GetSpecRankAndFieldInfo(metadata, field_index);
+  const auto& field = metadata.data_type.fields[field_index];
+
+  if (!RankConstraint::EqualOrUnspecified(schema.rank(), info.chunked_rank)) {
     return absl::FailedPreconditionError(tensorstore::StrCat(
         "Rank specified by schema (", schema.rank(),
-        ") does not match rank specified by metadata (", metadata.rank, ")"));
+        ") does not match rank specified by metadata (", info.chunked_rank,
+        ")"));
   }
 
   if (schema.domain().valid()) {
+    std::optional<span<const Index>> metadata_shape_span;
+    metadata_shape_span.emplace(metadata.shape.data(), metadata.shape.size());
+    std::optional<span<const std::optional<std::string>>> dimension_names_span;
+    dimension_names_span.emplace(metadata.dimension_names.data(),
+                                 metadata.dimension_names.size());
     TENSORSTORE_RETURN_IF_ERROR(GetEffectiveDomain(
-        metadata.rank, metadata.shape, metadata.dimension_names, schema));
+        info, metadata_shape_span, dimension_names_span, schema,
+        /*dimension_names_used=*/nullptr));
   }
 
   if (auto dtype = schema.dtype();
-      !IsPossiblySameDataType(metadata.data_type, dtype)) {
+      !IsPossiblySameDataType(field.dtype, dtype)) {
     return absl::FailedPreconditionError(
-        tensorstore::StrCat("data_type from metadata (", metadata.data_type,
+        tensorstore::StrCat("data_type from metadata (", field.dtype,
                             ") does not match dtype in schema (", dtype, ")"));
   }
 
   if (schema.chunk_layout().rank() != dynamic_rank) {
-    TENSORSTORE_ASSIGN_OR_RETURN(
-        auto chunk_layout,
-        GetEffectiveChunkLayout(metadata.data_type, metadata.rank,
-                                metadata.chunk_shape, &metadata.codec_specs,
-                                schema));
+    ChunkLayout chunk_layout = schema.chunk_layout();
+    std::optional<span<const Index>> chunk_shape_span;
+    chunk_shape_span.emplace(metadata.chunk_shape.data(),
+                             metadata.chunk_shape.size());
+    TENSORSTORE_RETURN_IF_ERROR(SetChunkLayoutFromMetadata(
+        info, chunk_shape_span, &metadata.codec_specs, chunk_layout));
     if (chunk_layout.codec_chunk_shape().hard_constraint) {
       return absl::InvalidArgumentError("codec_chunk_shape not supported");
     }
   }
 
   if (auto schema_fill_value = schema.fill_value(); schema_fill_value.valid()) {
-    const auto& fill_value = metadata.fill_value;
+    const auto& fill_value = metadata.fill_value[field_index];
     TENSORSTORE_ASSIGN_OR_RETURN(
         auto broadcast_fill_value,
         tensorstore::BroadcastArray(schema_fill_value, span<const Index>{}));
     TENSORSTORE_ASSIGN_OR_RETURN(
         SharedArray<const void> converted_fill_value,
         tensorstore::MakeCopy(std::move(broadcast_fill_value),
-                              skip_repeated_elements, metadata.data_type));
+                              skip_repeated_elements, field.dtype));
     if (!AreArraysIdenticallyEqual(converted_fill_value, fill_value)) {
       auto binder = FillValueJsonBinder{metadata.data_type};
-      auto schema_json = jb::ToJson(converted_fill_value, binder).value();
-      auto metadata_json = jb::ToJson(metadata.fill_value, binder).value();
+      // Error message generation might be tricky with binder
       return absl::FailedPreconditionError(tensorstore::StrCat(
           "Invalid fill_value: schema requires fill value of ",
-          schema_json.dump(), ", but metadata specifies fill value of ",
-          metadata_json.dump()));
+          schema_fill_value, ", but metadata specifies fill value of ",
+          fill_value));
     }
   }
 
@@ -804,8 +1018,14 @@ absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
   return absl::OkStatus();
 }
 
+absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
+                                    const Schema& schema) {
+  return ValidateMetadataSchema(metadata, /*field_index=*/0, schema);
+}
+
 Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
-    const ZarrMetadataConstraints& metadata_constraints, const Schema& schema) {
+    const ZarrMetadataConstraints& metadata_constraints, const Schema& schema,
+    std::string_view selected_field) {
   auto metadata = std::make_shared<ZarrMetadata>();
 
   metadata->zarr_format = metadata_constraints.zarr_format.value_or(3);
@@ -813,51 +1033,85 @@ Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
       metadata_constraints.chunk_key_encoding.value_or(ChunkKeyEncoding{
           /*.kind=*/ChunkKeyEncoding::kDefault, /*.separator=*/'/'});
 
+  // Determine data type first
+  if (metadata_constraints.data_type) {
+    metadata->data_type = *metadata_constraints.data_type;
+  } else if (!selected_field.empty()) {
+    return absl::InvalidArgumentError(
+        "\"dtype\" must be specified in \"metadata\" if \"field\" is "
+        "specified");
+  } else if (auto dtype = schema.dtype(); dtype.valid()) {
+    TENSORSTORE_ASSIGN_OR_RETURN(
+        static_cast<ZarrDType::BaseDType&>(
+            metadata->data_type.fields.emplace_back()),
+        ChooseBaseDType(dtype));
+    metadata->data_type.has_fields = false;
+    TENSORSTORE_RETURN_IF_ERROR(ValidateDType(metadata->data_type));
+  } else {
+    return absl::InvalidArgumentError("dtype must be specified");
+  }
+
+  TENSORSTORE_ASSIGN_OR_RETURN(
+      size_t field_index, GetFieldIndex(metadata->data_type, selected_field));
+  SpecRankAndFieldInfo info;
+  info.field = &metadata->data_type.fields[field_index];
+  info.chunked_rank = metadata_constraints.rank;
+  if (info.chunked_rank == dynamic_rank && metadata_constraints.shape) {
+    info.chunked_rank = metadata_constraints.shape->size();
+  }
+  if (info.chunked_rank == dynamic_rank &&
+      schema.rank().rank != dynamic_rank) {
+    info.chunked_rank = schema.rank().rank;
+  }
+
   // Set domain
-  bool dimension_names_used;
+  bool dimension_names_used = false;
+  std::optional<span<const Index>> constraint_shape_span;
+  if (metadata_constraints.shape) {
+    constraint_shape_span.emplace(metadata_constraints.shape->data(),
+                                  metadata_constraints.shape->size());
+  }
+  std::optional<span<const std::optional<std::string>>> constraint_names_span;
+  if (metadata_constraints.dimension_names) {
+    constraint_names_span.emplace(
+        metadata_constraints.dimension_names->data(),
+        metadata_constraints.dimension_names->size());
+  }
   TENSORSTORE_ASSIGN_OR_RETURN(
-      auto domain,
-      GetEffectiveDomain(metadata_constraints, schema, &dimension_names_used));
+      auto domain, GetEffectiveDomain(info, constraint_shape_span,
+                                      constraint_names_span, schema,
+                                      &dimension_names_used));
   if (!domain.valid() || !IsFinite(domain.box())) {
     return absl::InvalidArgumentError("domain must be specified");
   }
-  const DimensionIndex rank = metadata->rank = domain.rank();
-  metadata->shape.assign(domain.shape().begin(), domain.shape().end());
+  const DimensionIndex rank = domain.rank();
+  metadata->rank = rank;
+  info.chunked_rank = rank;
+  metadata->shape.assign(domain.shape().begin(),
+                         domain.shape().begin() + rank);
   metadata->dimension_names.assign(domain.labels().begin(),
-                                   domain.labels().end());
-  // Normalize empty string dimension names to `std::nullopt`.  This is more
-  // consistent with the zarr v3 dimension name semantics, and ensures that the
-  // `dimension_names` metadata field will be excluded entirely if all dimension
-  // names are the empty string.
-  //
-  // However, if empty string dimension names were specified explicitly in
-  // `metadata_constraints`, leave them exactly as specified.
+                                   domain.labels().begin() + rank);
+
   for (DimensionIndex i = 0; i < rank; ++i) {
     auto& name = metadata->dimension_names[i];
     if (!name || !name->empty()) continue;
-    // Dimension name equals the empty string.
-    if (dimension_names_used && (*metadata_constraints.dimension_names)[i]) {
-      // Empty dimension name was explicitly specified in
-      // `metadata_constraints`, leave it as is.
+    if (dimension_names_used && metadata_constraints.dimension_names &&
+        (*metadata_constraints.dimension_names)[i]) {
       assert((*metadata_constraints.dimension_names)[i]->empty());
       continue;
     }
-    // Name was not explicitly specified in `metadata_constraints` as an empty
-    // string.  Normalize it to `std::nullopt`.
     name = std::nullopt;
   }
 
-  // Set dtype
-  auto dtype = schema.dtype();
-  if (!dtype.valid()) {
-    return absl::InvalidArgumentError("dtype must be specified");
-  }
-  TENSORSTORE_RETURN_IF_ERROR(ValidateDataType(dtype));
-  metadata->data_type = dtype;
-
   if (metadata_constraints.fill_value) {
     metadata->fill_value = *metadata_constraints.fill_value;
   } else if (auto fill_value = schema.fill_value(); fill_value.valid()) {
+    // Assuming single field if setting from schema
+    if (metadata->data_type.fields.size() != 1) {
+      return absl::InvalidArgumentError(
+          "Cannot specify fill_value through schema for structured zarr data "
+          "type");
+    }
     const auto status = [&] {
       TENSORSTORE_ASSIGN_OR_RETURN(
           auto broadcast_fill_value,
@@ -865,23 +1119,26 @@ Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
       TENSORSTORE_ASSIGN_OR_RETURN(
           auto converted_fill_value,
           tensorstore::MakeCopy(std::move(broadcast_fill_value),
-                                skip_repeated_elements, metadata->data_type));
-      metadata->fill_value = std::move(converted_fill_value);
+                                skip_repeated_elements,
+                                metadata->data_type.fields[0].dtype));
+      metadata->fill_value.push_back(std::move(converted_fill_value));
       return absl::OkStatus();
     }();
     TENSORSTORE_RETURN_IF_ERROR(
         status, tensorstore::MaybeAnnotateStatus(_, "Invalid fill_value"));
   } else {
-    metadata->fill_value = tensorstore::AllocateArray(
-        /*shape=*/span<const Index>(), c_order, value_init,
-        metadata->data_type);
+    metadata->fill_value.resize(metadata->data_type.fields.size());
+    for (size_t i = 0; i < metadata->fill_value.size(); ++i) {
+      metadata->fill_value[i] = tensorstore::AllocateArray(
+          /*shape=*/span<const Index>(), c_order, value_init,
+          metadata->data_type.fields[i].dtype);
+    }
   }
 
   metadata->user_attributes = metadata_constraints.user_attributes;
   metadata->unknown_extension_attributes =
       metadata_constraints.unknown_extension_attributes;
 
-  // Set dimension units
   TENSORSTORE_ASSIGN_OR_RETURN(
       auto dimension_units,
       GetEffectiveDimensionUnits(rank, metadata_constraints.dimension_units,
@@ -895,12 +1152,16 @@ Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
   TENSORSTORE_ASSIGN_OR_RETURN(auto codec_spec,
                                GetEffectiveCodec(metadata_constraints, schema));
 
-  // Set chunk shape
-
   ArrayCodecResolveParameters decoded;
-  decoded.dtype = metadata->data_type;
+  if (metadata->data_type.fields.size() == 1 &&
+      metadata->data_type.fields[0].outer_shape.empty()) {
+    decoded.dtype = metadata->data_type.fields[0].dtype;
+  } else {
+    decoded.dtype = dtype_v<std::byte>;
+  }
   decoded.rank = metadata->rank;
-  decoded.fill_value = metadata->fill_value;
+  if (metadata->fill_value.size() == 1)
+    decoded.fill_value = metadata->fill_value[0];
 
   TENSORSTORE_ASSIGN_OR_RETURN(
       auto chunk_layout, GetEffectiveChunkLayout(metadata_constraints, schema));
@@ -920,8 +1181,6 @@ Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
 
   if (!internal::RangesEqual(span<const Index>(metadata->chunk_shape),
                              span<const Index>(read_chunk_shape))) {
-    // Read chunk and write chunk shapes differ.  Insert sharding codec if there
-    // is not already one.
     if (!codec_spec->codecs || codec_spec->codecs->sharding_height() == 0) {
       auto sharding_codec =
           internal::MakeIntrusivePtr<ShardingIndexedCodecSpec>(
@@ -945,7 +1204,8 @@ Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
   TENSORSTORE_RETURN_IF_ERROR(set_up_codecs(
       codec_spec->codecs ? *codec_spec->codecs : ZarrCodecChainSpec{}));
   TENSORSTORE_RETURN_IF_ERROR(ValidateMetadata(*metadata));
-  TENSORSTORE_RETURN_IF_ERROR(ValidateMetadataSchema(*metadata, schema));
+  TENSORSTORE_RETURN_IF_ERROR(
+      ValidateMetadataSchema(*metadata, field_index, schema));
   return metadata;
 }
 
diff --git a/tensorstore/driver/zarr3/metadata.h b/tensorstore/driver/zarr3/metadata.h
index 05b8c6be3..4c7871b0d 100644
--- a/tensorstore/driver/zarr3/metadata.h
+++ b/tensorstore/driver/zarr3/metadata.h
@@ -33,6 +33,7 @@
 #include "tensorstore/data_type.h"
 #include "tensorstore/driver/zarr3/codec/codec.h"
 #include "tensorstore/driver/zarr3/codec/codec_chain_spec.h"
+#include "tensorstore/driver/zarr3/dtype.h"
 #include "tensorstore/index.h"
 #include "tensorstore/index_space/dimension_units.h"
 #include "tensorstore/index_space/index_domain.h"
@@ -72,19 +73,35 @@ struct ChunkKeyEncoding {
 };
 
 struct FillValueJsonBinder {
-  DataType data_type;
+  ZarrDType dtype;
+  bool allow_missing_dtype = false;
+  FillValueJsonBinder() = default;
+  explicit FillValueJsonBinder(ZarrDType dtype,
+                               bool allow_missing_dtype = false);
+  explicit FillValueJsonBinder(DataType dtype,
+                               bool allow_missing_dtype = false);
 
   absl::Status operator()(std::true_type is_loading,
                           internal_json_binding::NoOptions,
-                          SharedArray<const void>* obj,
+                          std::vector<SharedArray<const void>>* obj,
                           ::nlohmann::json* j) const;
 
   absl::Status operator()(std::false_type is_loading,
                           internal_json_binding::NoOptions,
-                          const SharedArray<const void>* obj,
+                          const std::vector<SharedArray<const void>>* obj,
                           ::nlohmann::json* j) const;
+
+ private:
+  absl::Status DecodeSingle(::nlohmann::json& j, DataType data_type,
+                            SharedArray<const void>& out) const;
+  absl::Status EncodeSingle(const SharedArray<const void>& arr,
+                            DataType data_type,
+                            ::nlohmann::json& j) const;
 };
 
+struct SpecRankAndFieldInfo;
+
+
 struct ZarrMetadata {
   // The following members are common to `ZarrMetadata` and
   // `ZarrMetadataConstraints`, except that in `ZarrMetadataConstraints` some
@@ -94,14 +111,14 @@ struct ZarrMetadata {
 
   int zarr_format;
   std::vector<Index> shape;
-  DataType data_type;
+  ZarrDType data_type;
   ::nlohmann::json::object_t user_attributes;
   std::optional<DimensionUnitsVector> dimension_units;
   std::vector<std::optional<std::string>> dimension_names;
   ChunkKeyEncoding chunk_key_encoding;
   std::vector<Index> chunk_shape;
   ZarrCodecChainSpec codec_specs;
-  SharedArray<const void> fill_value;
+  std::vector<SharedArray<const void>> fill_value;
   ::nlohmann::json::object_t unknown_extension_attributes;
 
   std::string GetCompatibilityKey() const;
@@ -123,14 +140,14 @@ struct ZarrMetadataConstraints {
 
   std::optional<int> zarr_format;
   std::optional<std::vector<Index>> shape;
-  std::optional<DataType> data_type;
+  std::optional<ZarrDType> data_type;
   ::nlohmann::json::object_t user_attributes;
   std::optional<DimensionUnitsVector> dimension_units;
   std::optional<std::vector<std::optional<std::string>>> dimension_names;
   std::optional<ChunkKeyEncoding> chunk_key_encoding;
   std::optional<std::vector<Index>> chunk_shape;
   std::optional<ZarrCodecChainSpec> codec_specs;
-  std::optional<SharedArray<const void>> fill_value;
+  std::optional<std::vector<SharedArray<const void>>> fill_value;
   ::nlohmann::json::object_t unknown_extension_attributes;
 
   TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(ZarrMetadataConstraints,
@@ -159,6 +176,10 @@ Result<IndexDomain<>> GetEffectiveDomain(
 
 /// Sets chunk layout constraints implied by `dtype`, `rank`, `chunk_shape`, and
 /// `codecs`.
+absl::Status SetChunkLayoutFromMetadata(
+    const SpecRankAndFieldInfo& info,
+    std::optional<span<const Index>> chunk_shape,
+    const ZarrCodecChainSpec* codecs, ChunkLayout& chunk_layout);
 absl::Status SetChunkLayoutFromMetadata(
     DataType dtype, DimensionIndex rank,
     std::optional<span<const Index>> chunk_shape,
@@ -198,6 +219,8 @@ Result<internal::CodecDriverSpec::PtrT<TensorStoreCodecSpec>> GetEffectiveCodec(
 CodecSpec GetCodecFromMetadata(const ZarrMetadata& metadata);
 
 /// Validates that `schema` is compatible with `metadata`.
+absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
+                                    size_t field_index, const Schema& schema);
 absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
                                     const Schema& schema);
 
@@ -206,10 +229,22 @@ absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
 /// \error `absl::StatusCode::kInvalidArgument` if any required fields are
 ///     unspecified.
 Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
-    const ZarrMetadataConstraints& metadata_constraints, const Schema& schema);
+    const ZarrMetadataConstraints& metadata_constraints,
+    const Schema& schema, std::string_view selected_field = {});
 
 absl::Status ValidateDataType(DataType dtype);
 
+Result<size_t> GetFieldIndex(const ZarrDType& dtype,
+                             std::string_view selected_field);
+
+struct SpecRankAndFieldInfo {
+  DimensionIndex chunked_rank = dynamic_rank;
+  const ZarrDType::Field* field = nullptr;
+};
+
+SpecRankAndFieldInfo GetSpecRankAndFieldInfo(const ZarrMetadata& metadata,
+                                             size_t field_index);
+
 }  // namespace internal_zarr3
 }  // namespace tensorstore
 
diff --git a/tensorstore/driver/zarr3/metadata_test.cc b/tensorstore/driver/zarr3/metadata_test.cc
index 0b140fa80..11c97619f 100644
--- a/tensorstore/driver/zarr3/metadata_test.cc
+++ b/tensorstore/driver/zarr3/metadata_test.cc
@@ -51,6 +51,7 @@ namespace {
 namespace jb = ::tensorstore::internal_json_binding;
 
 using ::tensorstore::ChunkLayout;
+using ::tensorstore::DataType;
 using ::tensorstore::CodecSpec;
 using ::tensorstore::dtype_v;
 using ::tensorstore::Index;
@@ -68,6 +69,7 @@ using ::tensorstore::dtypes::float32_t;
 using ::tensorstore::dtypes::float64_t;
 using ::tensorstore::internal::uint_t;
 using ::tensorstore::internal_zarr3::FillValueJsonBinder;
+using ::tensorstore::internal_zarr3::ZarrDType;
 using ::tensorstore::internal_zarr3::ZarrMetadata;
 using ::tensorstore::internal_zarr3::ZarrMetadataConstraints;
 using ::testing::HasSubstr;
@@ -90,13 +92,30 @@ ::nlohmann::json GetBasicMetadata() {
   };
 }
 
+ZarrDType MakeScalarZarrDType(DataType dtype) {
+  ZarrDType dtype_info;
+  dtype_info.has_fields = false;
+  dtype_info.fields.resize(1);
+  auto& field = dtype_info.fields[0];
+  field.dtype = dtype;
+  field.encoded_dtype = std::string(dtype.name());
+  field.outer_shape.clear();
+  field.flexible_shape.clear();
+  field.field_shape.clear();
+  field.num_inner_elements = 1;
+  field.byte_offset = 0;
+  field.num_bytes = dtype->size;
+  return dtype_info;
+}
+
 TEST(MetadataTest, ParseValid) {
   auto json = GetBasicMetadata();
   tensorstore::TestJsonBinderRoundTripJsonOnly<ZarrMetadata>({json});
   TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto metadata, ZarrMetadata::FromJson(json));
   EXPECT_THAT(metadata.shape, ::testing::ElementsAre(10, 11, 12));
   EXPECT_THAT(metadata.chunk_shape, ::testing::ElementsAre(1, 2, 3));
-  EXPECT_THAT(metadata.data_type, tensorstore::dtype_v<uint16_t>);
+  ASSERT_EQ(metadata.data_type.fields.size(), 1);
+  EXPECT_EQ(tensorstore::dtype_v<uint16_t>, metadata.data_type.fields[0].dtype);
   EXPECT_THAT(metadata.dimension_names,
               ::testing::ElementsAre("a", std::nullopt, ""));
   EXPECT_THAT(metadata.user_attributes, MatchesJson({{"a", "b"}, {"c", "d"}}));
@@ -115,7 +134,8 @@ TEST(MetadataTest, ParseValidNoDimensionNames) {
   TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto metadata, ZarrMetadata::FromJson(json));
   EXPECT_THAT(metadata.shape, ::testing::ElementsAre(10, 11, 12));
   EXPECT_THAT(metadata.chunk_shape, ::testing::ElementsAre(1, 2, 3));
-  EXPECT_THAT(metadata.data_type, tensorstore::dtype_v<uint16_t>);
+  ASSERT_EQ(metadata.data_type.fields.size(), 1);
+  EXPECT_EQ(tensorstore::dtype_v<uint16_t>, metadata.data_type.fields[0].dtype);
   EXPECT_THAT(metadata.dimension_names,
               ::testing::ElementsAre(std::nullopt, std::nullopt, std::nullopt));
   EXPECT_THAT(metadata.user_attributes, MatchesJson({{"a", "b"}, {"c", "d"}}));
@@ -486,7 +506,9 @@ TEST(MetadataTest, DataTypes) {
     }
     TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto metadata,
                                      ZarrMetadata::FromJson(json));
-    EXPECT_EQ(tensorstore::GetDataType(data_type_name), metadata.data_type);
+    ASSERT_FALSE(metadata.data_type.fields.empty());
+    EXPECT_EQ(tensorstore::GetDataType(data_type_name),
+              metadata.data_type.fields[0].dtype);
   }
 }
 
@@ -503,18 +525,20 @@ TEST(MetadataTest, InvalidDataType) {
 template <typename T>
 void TestFillValue(std::vector<std::pair<T, ::nlohmann::json>> cases,
                    bool skip_to_json = false) {
-  auto binder = FillValueJsonBinder{dtype_v<T>};
+  FillValueJsonBinder binder(MakeScalarZarrDType(dtype_v<T>));
   for (const auto& [value, json] : cases) {
     SharedArray<const void> expected_fill_value =
         tensorstore::MakeScalarArray(value);
     if (!skip_to_json) {
-      EXPECT_THAT(jb::ToJson(expected_fill_value, binder),
+      std::vector<SharedArray<const void>> vec{expected_fill_value};
+      EXPECT_THAT(jb::ToJson(vec, binder),
                   ::testing::Optional(MatchesJson(json)))
           << "value=" << value << ", json=" << json;
     }
-    EXPECT_THAT(jb::FromJson<SharedArray<const void>>(json, binder),
-                ::testing::Optional(
-                    tensorstore::MatchesArrayIdentically(expected_fill_value)))
+    EXPECT_THAT(
+        jb::FromJson<std::vector<SharedArray<const void>>>(json, binder),
+        ::testing::Optional(::testing::ElementsAre(
+            tensorstore::MatchesArrayIdentically(expected_fill_value))))
         << "json=" << json;
   }
 }
@@ -522,10 +546,11 @@ void TestFillValue(std::vector<std::pair<T, ::nlohmann::json>> cases,
 template <typename T>
 void TestFillValueInvalid(
     std::vector<std::pair<::nlohmann::json, std::string>> cases) {
-  auto binder = FillValueJsonBinder{dtype_v<T>};
+  FillValueJsonBinder binder(MakeScalarZarrDType(dtype_v<T>));
   for (const auto& [json, matcher] : cases) {
     EXPECT_THAT(
-        jb::FromJson<SharedArray<const void>>(json, binder).status(),
+        jb::FromJson<std::vector<SharedArray<const void>>>(json, binder)
+            .status(),
         StatusIs(absl::StatusCode::kInvalidArgument, MatchesRegex(matcher)))
         << "json=" << json;
   }

From 187f42452a359bca712a64050176b93e5ce9b145 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 24 Nov 2025 22:57:11 +0000
Subject: [PATCH 02/20] Updates to have proper reads

---
 tensorstore/driver/zarr3/chunk_cache.cc |  74 ++++++++++++++----
 tensorstore/driver/zarr3/chunk_cache.h  |  11 ++-
 tensorstore/driver/zarr3/driver.cc      |  74 ++++++++++++------
 tensorstore/driver/zarr3/dtype.cc       |  64 +++++++++++----
 tensorstore/driver/zarr3/metadata.cc    | 100 ++++++++++++++++--------
 5 files changed, 239 insertions(+), 84 deletions(-)

diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index ee1cba9c1..6bfa8c039 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -18,6 +18,8 @@
 
 #include <algorithm>
 #include <cassert>
+#include <cstddef>
+#include <cstring>
 #include <functional>
 #include <memory>
 #include <string>
@@ -73,15 +75,17 @@ ZarrChunkCache::~ZarrChunkCache() = default;
 
 ZarrLeafChunkCache::ZarrLeafChunkCache(
     kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
-    internal::CachePool::WeakPtr /*data_cache_pool*/)
-    : Base(std::move(store)), codec_state_(std::move(codec_state)) {}
+    ZarrDType dtype, internal::CachePool::WeakPtr /*data_cache_pool*/)
+    : Base(std::move(store)),
+      codec_state_(std::move(codec_state)),
+      dtype_(std::move(dtype)) {}
 
 void ZarrLeafChunkCache::Read(ZarrChunkCache::ReadRequest request,
                               AnyFlowReceiver<absl::Status, internal::ReadChunk,
                                               IndexTransform<>>&& receiver) {
   return internal::ChunkCache::Read(
       {static_cast<internal::DriverReadRequest&&>(request),
-       /*component_index=*/0, request.staleness_bound,
+       request.component_index, request.staleness_bound,
        request.fill_missing_data_reads},
       std::move(receiver));
 }
@@ -92,7 +96,7 @@ void ZarrLeafChunkCache::Write(
         receiver) {
   return internal::ChunkCache::Write(
       {static_cast<internal::DriverWriteRequest&&>(request),
-       /*component_index=*/0, request.store_data_equal_to_fill_value},
+       request.component_index, request.store_data_equal_to_fill_value},
       std::move(receiver));
 }
 
@@ -149,12 +153,52 @@ std::string ZarrLeafChunkCache::GetChunkStorageKey(
 Result<absl::InlinedVector<SharedArray<const void>, 1>>
 ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
                                 absl::Cord data) {
+  const size_t num_fields = dtype_.fields.size();
+  absl::InlinedVector<SharedArray<const void>, 1> field_arrays(num_fields);
+
+
+  // For single non-structured field, decode directly
+  if (num_fields == 1 && dtype_.fields[0].outer_shape.empty()) {
+    TENSORSTORE_ASSIGN_OR_RETURN(
+        field_arrays[0], codec_state_->DecodeArray(grid().components[0].shape(),
+                                                   std::move(data)));
+    return field_arrays;
+  }
+
+  // For structured types, decode byte array then extract fields
+  // Build decode shape: [chunk_dims..., bytes_per_outer_element]
+  const auto& chunk_shape = grid().chunk_shape;
+  std::vector<Index> decode_shape(chunk_shape.begin(), chunk_shape.end());
+  decode_shape.push_back(dtype_.bytes_per_outer_element);
+
   TENSORSTORE_ASSIGN_OR_RETURN(
-      auto array,
-      codec_state_->DecodeArray(grid().components[0].shape(), std::move(data)));
-  absl::InlinedVector<SharedArray<const void>, 1> components;
-  components.push_back(std::move(array));
-  return components;
+      auto byte_array, codec_state_->DecodeArray(decode_shape, std::move(data)));
+
+  // Extract each field from the byte array
+  const Index num_elements = byte_array.num_elements() /
+                             dtype_.bytes_per_outer_element;
+  const auto* src_bytes = static_cast<const std::byte*>(byte_array.data());
+
+  for (size_t field_i = 0; field_i < num_fields; ++field_i) {
+    const auto& field = dtype_.fields[field_i];
+    // Use the component's shape (from the grid) for the result array
+    const auto& component_shape = grid().components[field_i].shape();
+    auto result_array =
+        AllocateArray(component_shape, c_order, default_init, field.dtype);
+    auto* dst = static_cast<std::byte*>(result_array.data());
+    const Index field_size = field.dtype->size;
+
+    // Copy field data from each struct element
+    for (Index i = 0; i < num_elements; ++i) {
+      std::memcpy(dst + i * field_size,
+                  src_bytes + i * dtype_.bytes_per_outer_element +
+                      field.byte_offset,
+                  field_size);
+    }
+    field_arrays[field_i] = std::move(result_array);
+  }
+
+  return field_arrays;
 }
 
 Result<absl::Cord> ZarrLeafChunkCache::EncodeChunk(
@@ -170,9 +214,10 @@ kvstore::Driver* ZarrLeafChunkCache::GetKvStoreDriver() {
 
 ZarrShardedChunkCache::ZarrShardedChunkCache(
     kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
-    internal::CachePool::WeakPtr data_cache_pool)
+    ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool)
     : base_kvstore_(std::move(store)),
       codec_state_(std::move(codec_state)),
+      dtype_(std::move(dtype)),
       data_cache_pool_(std::move(data_cache_pool)) {}
 
 Result<IndexTransform<>> TranslateCellToSourceTransformForShard(
@@ -326,6 +371,7 @@ void ZarrShardedChunkCache::Read(
       *this, std::move(request.transform), std::move(receiver),
       [transaction = std::move(request.transaction),
        batch = std::move(request.batch),
+       component_index = request.component_index,
        staleness_bound = request.staleness_bound,
        fill_missing_data_reads = request.fill_missing_data_reads](auto entry) {
         Batch shard_batch = batch;
@@ -339,8 +385,7 @@ void ZarrShardedChunkCache::Read(
                                 IndexTransform<>>&& receiver) {
               entry->sub_chunk_cache.get()->Read(
                   {{transaction, std::move(transform), shard_batch},
-                   staleness_bound,
-                   fill_missing_data_reads},
+                   component_index, staleness_bound, fill_missing_data_reads},
                   std::move(receiver));
             };
       });
@@ -354,6 +399,7 @@ void ZarrShardedChunkCache::Write(
                      &ZarrArrayToArrayCodec::PreparedState::Write>(
       *this, std::move(request.transform), std::move(receiver),
       [transaction = std::move(request.transaction),
+       component_index = request.component_index,
        store_data_equal_to_fill_value =
            request.store_data_equal_to_fill_value](auto entry) {
         internal::OpenTransactionPtr shard_transaction = transaction;
@@ -366,7 +412,7 @@ void ZarrShardedChunkCache::Write(
                    AnyFlowReceiver<absl::Status, internal::WriteChunk,
                                    IndexTransform<>>&& receiver) {
           entry->sub_chunk_cache.get()->Write(
-              {{shard_transaction, std::move(transform)},
+              {{shard_transaction, std::move(transform)}, component_index,
                store_data_equal_to_fill_value},
               std::move(receiver));
         };
@@ -481,7 +527,7 @@ void ZarrShardedChunkCache::Entry::DoInitialize() {
                 *sharding_state.sub_chunk_codec_chain,
                 std::move(sharding_kvstore), cache.executor(),
                 ZarrShardingCodec::PreparedState::Ptr(&sharding_state),
-                cache.data_cache_pool_);
+                cache.dtype_, cache.data_cache_pool_);
         zarr_chunk_cache = new_cache.release();
         return std::unique_ptr<internal::Cache>(&zarr_chunk_cache->cache());
       })
diff --git a/tensorstore/driver/zarr3/chunk_cache.h b/tensorstore/driver/zarr3/chunk_cache.h
index dd40e43ac..5933115d7 100644
--- a/tensorstore/driver/zarr3/chunk_cache.h
+++ b/tensorstore/driver/zarr3/chunk_cache.h
@@ -31,6 +31,7 @@
 #include "tensorstore/driver/read_request.h"
 #include "tensorstore/driver/write_request.h"
 #include "tensorstore/driver/zarr3/codec/codec.h"
+#include "tensorstore/driver/zarr3/dtype.h"
 #include "tensorstore/index.h"
 #include "tensorstore/index_space/index_transform.h"
 #include "tensorstore/internal/cache/cache.h"
@@ -72,6 +73,7 @@ class ZarrChunkCache {
   virtual const Executor& executor() const = 0;
 
   struct ReadRequest : internal::DriverReadRequest {
+    size_t component_index = 0;
     absl::Time staleness_bound;
     bool fill_missing_data_reads;
   };
@@ -81,6 +83,7 @@ class ZarrChunkCache {
                                     IndexTransform<>>&& receiver) = 0;
 
   struct WriteRequest : internal::DriverWriteRequest {
+    size_t component_index = 0;
     bool store_data_equal_to_fill_value;
   };
 
@@ -154,6 +157,7 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
 
   explicit ZarrLeafChunkCache(kvstore::DriverPtr store,
                               ZarrCodecChain::PreparedState::Ptr codec_state,
+                              ZarrDType dtype,
                               internal::CachePool::WeakPtr data_cache_pool);
 
   void Read(ZarrChunkCache::ReadRequest request,
@@ -181,6 +185,7 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
   kvstore::Driver* GetKvStoreDriver() override;
 
   ZarrCodecChain::PreparedState::Ptr codec_state_;
+  ZarrDType dtype_;
 };
 
 /// Chunk cache for a Zarr array where each chunk is a shard.
@@ -190,6 +195,7 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
  public:
   explicit ZarrShardedChunkCache(kvstore::DriverPtr store,
                                  ZarrCodecChain::PreparedState::Ptr codec_state,
+                                 ZarrDType dtype,
                                  internal::CachePool::WeakPtr data_cache_pool);
 
   const ZarrShardingCodec::PreparedState& sharding_codec_state() const {
@@ -239,6 +245,7 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
 
   kvstore::DriverPtr base_kvstore_;
   ZarrCodecChain::PreparedState::Ptr codec_state_;
+  ZarrDType dtype_;
 
   // Data cache pool, if it differs from `this->pool()` (which is equal to the
   // metadata cache pool).
@@ -253,11 +260,11 @@ class ZarrShardSubChunkCache : public ChunkCacheImpl {
   explicit ZarrShardSubChunkCache(
       kvstore::DriverPtr store, Executor executor,
       ZarrShardingCodec::PreparedState::Ptr sharding_state,
-      internal::CachePool::WeakPtr data_cache_pool)
+      ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool)
       : ChunkCacheImpl(std::move(store),
                        ZarrCodecChain::PreparedState::Ptr(
                            sharding_state->sub_chunk_codec_state),
-                       std::move(data_cache_pool)),
+                       std::move(dtype), std::move(data_cache_pool)),
         sharding_state_(std::move(sharding_state)),
         executor_(std::move(executor)) {}
 
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 15faced0a..1674a1c6d 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -103,9 +103,11 @@ class ZarrDriverSpec
                                               /*Parent=*/KvsDriverSpec>;
 
   ZarrMetadataConstraints metadata_constraints;
+  std::string selected_field;
 
   constexpr static auto ApplyMembers = [](auto& x, auto f) {
-    return f(internal::BaseCast<KvsDriverSpec>(x), x.metadata_constraints);
+    return f(internal::BaseCast<KvsDriverSpec>(x), x.metadata_constraints,
+             x.selected_field);
   };
 
   static inline const auto default_json_binder = jb::Sequence(
@@ -139,7 +141,10 @@ class ZarrDriverSpec
                 return absl::OkStatus();
               },
               jb::Projection<&ZarrDriverSpec::metadata_constraints>(
-                  jb::DefaultInitializedValue()))));
+                  jb::DefaultInitializedValue()))),
+      jb::Member("field", jb::Projection<&ZarrDriverSpec::selected_field>(
+                              jb::DefaultValue<jb::kNeverIncludeDefaults>(
+                                  [](auto* obj) { *obj = std::string{}; }))));
 
   absl::Status ApplyOptions(SpecOptions&& options) override {
     if (options.minimal_spec) {
@@ -286,21 +291,33 @@ class DataCacheBase
   static internal::ChunkGridSpecification GetChunkGridSpecification(
       const ZarrMetadata& metadata) {
     assert(!metadata.fill_value.empty());
-    auto fill_value = BroadcastArray(metadata.fill_value[0],
-                                     BoxView<>(metadata.rank))
-                          .value();
     internal::ChunkGridSpecification::ComponentList components;
-    auto& component = components.emplace_back(
-        internal::AsyncWriteArray::Spec{
-            std::move(fill_value),
-            // Since all dimensions are resizable, just
-            // specify unbounded `valid_data_bounds`.
-            Box<>(metadata.rank),
-            ContiguousLayoutPermutation<>(
-                span(metadata.inner_order.data(), metadata.rank))},
-        metadata.chunk_shape);
-    component.array_spec.fill_value_comparison_kind =
-        EqualityComparisonKind::identical;
+
+    // Create one component per field (like zarr v2)
+    for (size_t field_i = 0; field_i < metadata.data_type.fields.size();
+         ++field_i) {
+      const auto& field = metadata.data_type.fields[field_i];
+      auto fill_value = metadata.fill_value[field_i];
+      if (!fill_value.valid()) {
+        // Use value-initialized rank-0 fill value (like zarr v2)
+        fill_value = AllocateArray(span<const Index, 0>{}, c_order, value_init,
+                                   field.dtype);
+      }
+      auto chunk_fill_value =
+          BroadcastArray(fill_value, BoxView<>(metadata.rank)).value();
+
+      auto& component = components.emplace_back(
+          internal::AsyncWriteArray::Spec{
+              std::move(chunk_fill_value),
+              // Since all dimensions are resizable, just
+              // specify unbounded `valid_data_bounds`.
+              Box<>(metadata.rank),
+              ContiguousLayoutPermutation<>(
+                  span(metadata.inner_order.data(), metadata.rank))},
+          metadata.chunk_shape);
+      component.array_spec.fill_value_comparison_kind =
+          EqualityComparisonKind::identical;
+    }
     return internal::ChunkGridSpecification(std::move(components));
   }
 
@@ -381,7 +398,7 @@ class DataCacheBase
 
   Result<IndexTransform<>> GetExternalToInternalTransform(
       const void* metadata_ptr, size_t component_index) override {
-    assert(component_index == 0);
+    // component_index corresponds to the selected field index
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
     const DimensionIndex rank = metadata.rank;
     std::string_view normalized_dimension_names[kMaxRank];
@@ -404,10 +421,16 @@ class DataCacheBase
   absl::Status GetBoundSpecData(KvsDriverSpec& spec_base,
                                 const void* metadata_ptr,
                                 size_t component_index) override {
-    assert(component_index == 0);
     auto& spec = static_cast<ZarrDriverSpec&>(spec_base);
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
     spec.metadata_constraints = ZarrMetadataConstraints(metadata);
+    // Encode selected_field from component_index
+    if (metadata.data_type.has_fields &&
+        component_index < metadata.data_type.fields.size()) {
+      spec.selected_field = metadata.data_type.fields[component_index].name;
+    } else {
+      spec.selected_field.clear();
+    }
     return absl::OkStatus();
   }
 
@@ -513,7 +536,8 @@ class ZarrDriver : public ZarrDriverBase {
             AnyFlowReceiver<absl::Status, internal::ReadChunk, IndexTransform<>>
                 receiver) override {
     return cache()->zarr_chunk_cache().Read(
-        {std::move(request), GetCurrentDataStalenessBound(),
+        {std::move(request), this->component_index(),
+         GetCurrentDataStalenessBound(),
          this->fill_value_mode_.fill_missing_data_reads},
         std::move(receiver));
   }
@@ -523,7 +547,7 @@ class ZarrDriver : public ZarrDriverBase {
       AnyFlowReceiver<absl::Status, internal::WriteChunk, IndexTransform<>>
           receiver) override {
     return cache()->zarr_chunk_cache().Write(
-        {std::move(request),
+        {std::move(request), this->component_index(),
          this->fill_value_mode_.store_data_equal_to_fill_value},
         std::move(receiver));
   }
@@ -621,7 +645,8 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
         *static_cast<const ZarrMetadata*>(initializer.metadata.get());
     return internal_zarr3::MakeZarrChunkCache<DataCacheBase, ZarrDataCache>(
         *metadata.codecs, std::move(initializer), spec().store.path,
-        metadata.codec_state, /*data_cache_pool=*/*cache_pool());
+        metadata.codec_state, metadata.data_type,
+        /*data_cache_pool=*/*cache_pool());
   }
 
   Result<size_t> GetComponentIndex(const void* metadata_ptr,
@@ -629,9 +654,12 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
     TENSORSTORE_RETURN_IF_ERROR(
         ValidateMetadata(metadata, spec().metadata_constraints));
+    TENSORSTORE_ASSIGN_OR_RETURN(
+        auto field_index,
+        GetFieldIndex(metadata.data_type, spec().selected_field));
     TENSORSTORE_RETURN_IF_ERROR(
-        ValidateMetadataSchema(metadata, spec().schema));
-    return 0;
+        ValidateMetadataSchema(metadata, field_index, spec().schema));
+    return field_index;
   }
 };
 
diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
index 8d1c9d49e..281b9c98b 100644
--- a/tensorstore/driver/zarr3/dtype.cc
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -76,20 +76,12 @@ namespace {
 /// \param value The zarr metadata "dtype" JSON specification.
 /// \param out[out] Must be non-null.  Filled with the parsed dtype on success.
 /// \error `absl::StatusCode::kInvalidArgument' if `value` is invalid.
-Result<ZarrDType> ParseDTypeNoDerived(const nlohmann::json& value) {
-  ZarrDType out;
-  if (value.is_string()) {
-    // Single field.
-    out.has_fields = false;
-    out.fields.resize(1);
-    TENSORSTORE_ASSIGN_OR_RETURN(
-        static_cast<ZarrDType::BaseDType&>(out.fields[0]),
-        ParseBaseDType(value.get<std::string>()));
-    return out;
-  }
+// Helper to parse fields array (used by both array format and object format)
+absl::Status ParseFieldsArray(const nlohmann::json& fields_json,
+                               ZarrDType& out) {
   out.has_fields = true;
-  auto parse_result = internal_json::JsonParseArray(
-      value,
+  return internal_json::JsonParseArray(
+      fields_json,
       [&](ptrdiff_t size) {
         out.fields.resize(size);
         return absl::OkStatus();
@@ -140,7 +132,51 @@ Result<ZarrDType> ParseDTypeNoDerived(const nlohmann::json& value) {
               }
             });
       });
-  if (!parse_result.ok()) return parse_result;
+}
+
+Result<ZarrDType> ParseDTypeNoDerived(const nlohmann::json& value) {
+  ZarrDType out;
+  if (value.is_string()) {
+    // Single field.
+    out.has_fields = false;
+    out.fields.resize(1);
+    TENSORSTORE_ASSIGN_OR_RETURN(
+        static_cast<ZarrDType::BaseDType&>(out.fields[0]),
+        ParseBaseDType(value.get<std::string>()));
+    return out;
+  }
+  // Handle extended object format:
+  // {"name": "structured", "configuration": {"fields": [...]}}
+  if (value.is_object()) {
+    if (value.contains("name") && value.contains("configuration")) {
+      std::string type_name;
+      TENSORSTORE_RETURN_IF_ERROR(
+          internal_json::JsonRequireValueAs(value["name"], &type_name));
+      if (type_name == "structured") {
+        const auto& config = value["configuration"];
+        if (!config.is_object() || !config.contains("fields")) {
+          return absl::InvalidArgumentError(
+              "Structured data type requires 'configuration' object with "
+              "'fields' array");
+        }
+        TENSORSTORE_RETURN_IF_ERROR(ParseFieldsArray(config["fields"], out));
+        return out;
+      }
+      // For other named types, try to parse as a base dtype
+      out.has_fields = false;
+      out.fields.resize(1);
+      TENSORSTORE_ASSIGN_OR_RETURN(
+          static_cast<ZarrDType::BaseDType&>(out.fields[0]),
+          ParseBaseDType(type_name));
+      return out;
+    }
+    return absl::InvalidArgumentError(tensorstore::StrCat(
+        "Expected string, array, or object with 'name' and 'configuration', "
+        "but received: ",
+        value.dump()));
+  }
+  // Handle array format: [["field1", "type1"], ["field2", "type2"], ...]
+  TENSORSTORE_RETURN_IF_ERROR(ParseFieldsArray(value, out));
   return out;
 }
 
diff --git a/tensorstore/driver/zarr3/metadata.cc b/tensorstore/driver/zarr3/metadata.cc
index c96c31426..880991e8c 100644
--- a/tensorstore/driver/zarr3/metadata.cc
+++ b/tensorstore/driver/zarr3/metadata.cc
@@ -31,7 +31,10 @@
 #include <utility>
 #include <vector>
 
+#include <cstring>
+
 #include "absl/algorithm/container.h"
+#include "absl/strings/escaping.h"
 #include "absl/base/casts.h"
 #include "absl/base/optimization.h"
 #include "absl/meta/type_traits.h"
@@ -282,16 +285,44 @@ absl::Status FillValueJsonBinder::operator()(
     TENSORSTORE_RETURN_IF_ERROR(
         DecodeSingle(*j, dtype.fields[0].dtype, (*obj)[0]));
   } else {
-    if (!j->is_array()) {
-      return internal_json::ExpectedError(*j, "array");
-    }
-    if (j->size() != dtype.fields.size()) {
-      return internal_json::ExpectedError(
-          *j, tensorstore::StrCat("array of size ", dtype.fields.size()));
-    }
-    for (size_t i = 0; i < dtype.fields.size(); ++i) {
-      TENSORSTORE_RETURN_IF_ERROR(
-          DecodeSingle((*j)[i], dtype.fields[i].dtype, (*obj)[i]));
+    // For structured types, handle both array format and base64-encoded string
+    if (j->is_string()) {
+      // Decode base64-encoded fill value for entire struct
+      std::string b64_decoded;
+      if (!absl::Base64Unescape(j->get<std::string>(), &b64_decoded)) {
+        return absl::InvalidArgumentError(tensorstore::StrCat(
+            "Expected valid base64-encoded fill value, but received: ",
+            j->dump()));
+      }
+      // Verify size matches expected struct size
+      if (static_cast<Index>(b64_decoded.size()) !=
+          dtype.bytes_per_outer_element) {
+        return absl::InvalidArgumentError(tensorstore::StrCat(
+            "Expected ", dtype.bytes_per_outer_element,
+            " base64-encoded bytes for fill_value, but received ",
+            b64_decoded.size(), " bytes"));
+      }
+      // Extract per-field fill values from decoded bytes
+      for (size_t i = 0; i < dtype.fields.size(); ++i) {
+        const auto& field = dtype.fields[i];
+        auto arr = AllocateArray(span<const Index, 0>{}, c_order, default_init,
+                                 field.dtype);
+        std::memcpy(arr.data(), b64_decoded.data() + field.byte_offset,
+                    field.dtype->size);
+        (*obj)[i] = std::move(arr);
+      }
+    } else if (j->is_array()) {
+      if (j->size() != dtype.fields.size()) {
+        return internal_json::ExpectedError(
+            *j, tensorstore::StrCat("array of size ", dtype.fields.size()));
+      }
+      for (size_t i = 0; i < dtype.fields.size(); ++i) {
+        TENSORSTORE_RETURN_IF_ERROR(
+            DecodeSingle((*j)[i], dtype.fields[i].dtype, (*obj)[i]));
+      }
+    } else {
+      return internal_json::ExpectedError(*j,
+                                          "array or base64-encoded string");
     }
   }
   return absl::OkStatus();
@@ -561,28 +592,33 @@ std::string ZarrMetadata::GetCompatibilityKey() const {
 }
 
 absl::Status ValidateMetadata(ZarrMetadata& metadata) {
+  // Determine if this is a structured type with multiple fields
+  const bool is_structured =
+      metadata.data_type.fields.size() > 1 ||
+      (metadata.data_type.fields.size() == 1 &&
+       !metadata.data_type.fields[0].outer_shape.empty());
+
+  // Build the codec shape - for structured types, include bytes dimension
+  std::vector<Index> codec_shape(metadata.chunk_shape.begin(),
+                                 metadata.chunk_shape.end());
+  if (is_structured) {
+    codec_shape.push_back(metadata.data_type.bytes_per_outer_element);
+  }
+
   if (!metadata.codecs) {
     ArrayCodecResolveParameters decoded;
-    if (metadata.data_type.fields.size() == 1 &&
-        metadata.data_type.fields[0].outer_shape.empty()) {
+    if (!is_structured) {
       decoded.dtype = metadata.data_type.fields[0].dtype;
+      decoded.rank = metadata.rank;
     } else {
+      // For structured types, use byte dtype with extra dimension
       decoded.dtype = dtype_v<std::byte>;
-      // TODO: Verify this works for structured types.
-      // Zarr2 uses a "scalar" array concept with byte storage for chunks.
+      decoded.rank = metadata.rank + 1;
     }
-    decoded.rank = metadata.rank;
     // Fill value for codec resolve might be complex.
-    // Zarr3 codecs usually don't depend on fill value except for some like
-    // "sharding_indexed"? Sharding uses fill_value for missing chunks.
-    if (metadata.fill_value.size() == 1) {
+    // For structured types, create a byte fill value
+    if (metadata.fill_value.size() == 1 && !is_structured) {
       decoded.fill_value = metadata.fill_value[0];
-    } else {
-      // How to represent structured fill value for codec?
-      // Sharding expects a single array.
-      // If we use structured type, the "array" is bytes.
-      // We might need to encode the fill value to bytes.
-      // For now, leave empty if multiple fields.
     }
 
     BytesCodecResolveParameters encoded;
@@ -593,17 +629,19 @@ absl::Status ValidateMetadata(ZarrMetadata& metadata) {
 
   // Get codec chunk layout info.
   ArrayDataTypeAndShapeInfo array_info;
-  // array_info.dtype used here to validate codec compatibility.
-  if (metadata.data_type.fields.size() == 1 &&
-      metadata.data_type.fields[0].outer_shape.empty()) {
+  if (!is_structured) {
     array_info.dtype = metadata.data_type.fields[0].dtype;
+    array_info.rank = metadata.rank;
+    std::copy_n(metadata.chunk_shape.begin(), metadata.rank,
+                array_info.shape.emplace().begin());
   } else {
     array_info.dtype = dtype_v<std::byte>;
+    array_info.rank = metadata.rank + 1;
+    auto& shape = array_info.shape.emplace();
+    std::copy_n(metadata.chunk_shape.begin(), metadata.rank, shape.begin());
+    shape[metadata.rank] = metadata.data_type.bytes_per_outer_element;
   }
 
-  array_info.rank = metadata.rank;
-  std::copy_n(metadata.chunk_shape.begin(), metadata.rank,
-              array_info.shape.emplace().begin());
   ArrayCodecChunkLayoutInfo layout_info;
   TENSORSTORE_RETURN_IF_ERROR(
       metadata.codec_specs.GetDecodedChunkLayout(array_info, layout_info));
@@ -617,7 +655,7 @@ absl::Status ValidateMetadata(ZarrMetadata& metadata) {
   }
 
   TENSORSTORE_ASSIGN_OR_RETURN(metadata.codec_state,
-                               metadata.codecs->Prepare(metadata.chunk_shape));
+                               metadata.codecs->Prepare(codec_shape));
   return absl::OkStatus();
 }
 

From c2e73cd6b1a2dcd5499522dce0bacd378af43279 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 24 Nov 2025 22:57:22 +0000
Subject: [PATCH 03/20] Local testing and examples

---
 examples/BUILD                    |  23 +++
 examples/CMakeLists.txt           | 163 ++++++++++++++++++
 examples/read_structured_zarr3.cc | 271 ++++++++++++++++++++++++++++++
 3 files changed, 457 insertions(+)
 create mode 100644 examples/CMakeLists.txt
 create mode 100644 examples/read_structured_zarr3.cc

diff --git a/examples/BUILD b/examples/BUILD
index 94acdba14..4dcb2d604 100644
--- a/examples/BUILD
+++ b/examples/BUILD
@@ -122,3 +122,26 @@ tensorstore_cc_binary(
         "@riegeli//riegeli/bytes:writer",
     ],
 )
+
+tensorstore_cc_binary(
+    name = "read_structured_zarr3",
+    srcs = ["read_structured_zarr3.cc"],
+    deps = [
+        "//tensorstore",
+        "//tensorstore:array",
+        "//tensorstore:context",
+        "//tensorstore:data_type",
+        "//tensorstore:index",
+        "//tensorstore:open",
+        "//tensorstore:open_mode",
+        "//tensorstore:spec",
+        "//tensorstore/driver/zarr3",
+        "//tensorstore/kvstore/file",
+        "//tensorstore/util:result",
+        "//tensorstore/util:status",
+        "@abseil-cpp//absl/flags:flag",
+        "@abseil-cpp//absl/flags:parse",
+        "@abseil-cpp//absl/status",
+        "@nlohmann_json//:json",
+    ],
+)
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 000000000..92e9857fa
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,163 @@
+# Standalone CMakeLists.txt for read_structured_zarr3 example
+#
+# Build instructions:
+#   mkdir -p /home/ubuntu/source/tensorstore/examples/build
+#   cd /home/ubuntu/source/tensorstore/examples/build
+#   cmake ..
+#   make
+#
+# Run:
+#   ./read_structured_zarr3 --zarr_path=/home/ubuntu/source/tensorstore/filt_mig.mdio/headers
+
+cmake_minimum_required(VERSION 3.24)
+project(read_structured_zarr3 LANGUAGES CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Path to the tensorstore build directory
+set(TENSORSTORE_BUILD_DIR "/home/ubuntu/source/tensorstore/build" CACHE PATH "Path to tensorstore build directory")
+set(TENSORSTORE_SOURCE_DIR "/home/ubuntu/source/tensorstore" CACHE PATH "Path to tensorstore source directory")
+set(DEPS_DIR "${TENSORSTORE_BUILD_DIR}/_deps")
+
+# Include paths (matching what tensorstore tests use)
+include_directories(
+    ${TENSORSTORE_SOURCE_DIR}
+    ${DEPS_DIR}/absl-src
+    ${DEPS_DIR}/re2-src
+    ${DEPS_DIR}/riegeli-src
+)
+
+include_directories(SYSTEM
+    ${DEPS_DIR}/half-build/include
+    ${DEPS_DIR}/half-src/include
+    ${DEPS_DIR}/nlohmann_json-build/include
+    ${DEPS_DIR}/nlohmann_json-src/include
+    ${TENSORSTORE_BUILD_DIR}
+)
+
+# Compiler flags
+add_compile_options(
+    -fPIE
+    -Wno-deprecated-declarations
+    -Wno-sign-compare
+    -Wno-unused-but-set-parameter
+    -Wno-maybe-uninitialized
+    -Wno-sequence-point
+    -Wno-unknown-warning-option
+    -Wno-stringop-overflow
+    -fsized-deallocation
+)
+
+# Find all the static libraries we need from the tensorstore build
+file(GLOB TENSORSTORE_LIBS "${TENSORSTORE_BUILD_DIR}/libtensorstore*.a")
+file(GLOB_RECURSE ABSEIL_LIBS "${DEPS_DIR}/absl-build/absl/*.a")
+file(GLOB_RECURSE RIEGELI_LIBS "${DEPS_DIR}/riegeli-build/*.a")
+
+# Additional dependency libraries - corrected paths
+file(GLOB_RECURSE BLOSC_LIBS "${DEPS_DIR}/blosc-build/*.a")
+file(GLOB_RECURSE ZSTD_LIBS "${DEPS_DIR}/zstd-build/*.a")
+file(GLOB_RECURSE RE2_LIBS "${DEPS_DIR}/re2-build/*.a")
+file(GLOB_RECURSE SNAPPY_LIBS "${DEPS_DIR}/snappy-build/*.a")
+file(GLOB_RECURSE BROTLI_LIBS "${DEPS_DIR}/brotli-build/*.a")
+file(GLOB_RECURSE LZ4_LIBS "${DEPS_DIR}/lz4-build/*.a")
+file(GLOB_RECURSE ZLIB_LIBS "${DEPS_DIR}/zlib-build/*.a")
+file(GLOB_RECURSE PROTOBUF_LIBS "${DEPS_DIR}/protobuf-build/*.a")
+file(GLOB_RECURSE GRPC_LIBS "${DEPS_DIR}/grpc-build/*.a")
+file(GLOB_RECURSE CARES_LIBS "${DEPS_DIR}/c-ares-build/*.a")
+file(GLOB_RECURSE SSL_LIBS "${DEPS_DIR}/boringssl-build/ssl/*.a")
+file(GLOB_RECURSE CRYPTO_LIBS "${DEPS_DIR}/boringssl-build/crypto/*.a")
+file(GLOB_RECURSE LIBLZMA_LIBS "${DEPS_DIR}/liblzma-build/*.a")
+file(GLOB_RECURSE BZIP2_LIBS "${DEPS_DIR}/bzip2-build/*.a")
+file(GLOB_RECURSE JPEG_LIBS "${DEPS_DIR}/jpeg-build/*.a")
+file(GLOB_RECURSE PNG_LIBS "${DEPS_DIR}/png-build/*.a")
+file(GLOB_RECURSE TIFF_LIBS "${DEPS_DIR}/tiff-build/*.a")
+file(GLOB_RECURSE AVIF_LIBS "${DEPS_DIR}/avif-build/*.a")
+file(GLOB_RECURSE AOM_LIBS "${DEPS_DIR}/aom-build/*.a")
+file(GLOB_RECURSE WEBP_LIBS "${DEPS_DIR}/webp-build/*.a")
+file(GLOB_RECURSE CURL_LIBS "${DEPS_DIR}/curl-build/*.a")
+
+# Create executable
+add_executable(read_structured_zarr3 read_structured_zarr3.cc)
+
+# Link libraries - use whole-archive for libraries that use static registration
+# These include drivers, codecs, kvstores, and context resource providers
+target_link_libraries(read_structured_zarr3 PRIVATE
+    # Force inclusion of libraries with static registrations
+    -Wl,--whole-archive
+    
+    # Context resource providers
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_data_copy_concurrency_resource.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_file_io_concurrency_resource.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_cache_cache_pool_resource.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_concurrency_resource.a
+    
+    # Zarr3 driver and codecs
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_driver.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_blosc.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_bytes.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_crc32c.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_gzip.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_transpose.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_zstd.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_sharding_indexed.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_codec_chain_spec.a
+    
+    # File kvstore and its resource providers
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_kvstore_file.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_kvstore_file_file_resource.a
+    
+    -Wl,--no-whole-archive
+    
+    -Wl,--start-group
+    
+    # Tensorstore libs
+    ${TENSORSTORE_LIBS}
+    
+    # Riegeli
+    ${RIEGELI_LIBS}
+    
+    # Abseil
+    ${ABSEIL_LIBS}
+    
+    # Compression libs
+    ${BLOSC_LIBS}
+    ${ZSTD_LIBS}
+    ${LZ4_LIBS}
+    ${SNAPPY_LIBS}
+    ${BROTLI_LIBS}
+    ${ZLIB_LIBS}
+    ${LIBLZMA_LIBS}
+    ${BZIP2_LIBS}
+    
+    # Regex
+    ${RE2_LIBS}
+    
+    # Protocol buffers and gRPC
+    ${PROTOBUF_LIBS}
+    ${GRPC_LIBS}
+    ${CARES_LIBS}
+    
+    # SSL/TLS
+    ${SSL_LIBS}
+    ${CRYPTO_LIBS}
+    
+    # Image libraries  
+    ${JPEG_LIBS}
+    ${PNG_LIBS}
+    ${TIFF_LIBS}
+    ${AVIF_LIBS}
+    ${AOM_LIBS}
+    ${WEBP_LIBS}
+    
+    # HTTP
+    ${CURL_LIBS}
+    
+    -Wl,--end-group
+    
+    # System libraries
+    pthread
+    dl
+    m
+    rt
+)
diff --git a/examples/read_structured_zarr3.cc b/examples/read_structured_zarr3.cc
new file mode 100644
index 000000000..1caacd8f5
--- /dev/null
+++ b/examples/read_structured_zarr3.cc
@@ -0,0 +1,271 @@
+// Copyright 2024 The TensorStore Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Standalone test for reading structured data from a Zarr v3 array.
+//
+// This test opens an existing zarr3 array with structured data type,
+// reads the "inline" field, and prints all values.
+//
+// Usage:
+//   bazel run //examples:read_structured_zarr3 -- /path/to/zarr/array
+//
+// Or with cmake:
+//   cd examples/build && ./read_structured_zarr3
+
+#include <stdint.h>
+
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <string>
+
+#include "absl/flags/flag.h"
+#include "absl/flags/parse.h"
+#include "absl/status/status.h"
+#include <nlohmann/json.hpp>
+#include "tensorstore/array.h"
+#include "tensorstore/context.h"
+#include "tensorstore/data_type.h"
+#include "tensorstore/index.h"
+#include "tensorstore/open.h"
+#include "tensorstore/open_mode.h"
+#include "tensorstore/spec.h"
+#include "tensorstore/tensorstore.h"
+#include "tensorstore/util/result.h"
+#include "tensorstore/util/status.h"
+
+ABSL_FLAG(std::string, zarr_path,
+          "/home/ubuntu/source/tensorstore/filt_mig.mdio/headers",
+          "Path to the zarr3 array directory");
+
+namespace {
+
+using ::tensorstore::Index;
+
+// Field layout from the zarr.json metadata:
+// The structured dtype has the following fields with their byte offsets:
+//   trace_seq_num_line: int32 @ 0
+//   trace_seq_num_reel: int32 @ 4
+//   ... (many more fields) ...
+//   inline: int32 @ 180
+//   crossline: int32 @ 184
+//   cdp_x: int32 @ 188
+//   cdp_y: int32 @ 192
+//
+// Total struct size: 196 bytes (matches blosc typesize)
+
+constexpr size_t kInlineFieldOffset = 180;
+constexpr size_t kStructSize = 196;
+
+// Read and parse the zarr.json metadata to display info about structured type
+void PrintZarrMetadata(const std::string& zarr_path) {
+  std::string metadata_path = zarr_path + "/zarr.json";
+  std::ifstream file(metadata_path);
+  if (!file.is_open()) {
+    std::cerr << "Could not open " << metadata_path << std::endl;
+    return;
+  }
+
+  nlohmann::json metadata;
+  try {
+    file >> metadata;
+  } catch (const nlohmann::json::parse_error& e) {
+    std::cerr << "Failed to parse zarr.json: " << e.what() << std::endl;
+    return;
+  }
+
+  std::cout << "\n=== Zarr Metadata ===" << std::endl;
+  std::cout << "Shape: " << metadata["shape"].dump() << std::endl;
+  std::cout << "Dimension names: " << metadata["dimension_names"].dump()
+            << std::endl;
+
+  if (metadata.contains("data_type")) {
+    auto& dt = metadata["data_type"];
+    std::cout << "\nData type format:" << std::endl;
+    if (dt.is_object()) {
+      std::cout << "  Type: object with name=\"" << dt["name"].get<std::string>()
+                << "\"" << std::endl;
+      if (dt.contains("configuration") &&
+          dt["configuration"].contains("fields")) {
+        auto& fields = dt["configuration"]["fields"];
+        std::cout << "  Number of fields: " << fields.size() << std::endl;
+        std::cout << "  Fields:" << std::endl;
+        size_t byte_offset = 0;
+        for (const auto& field : fields) {
+          std::string name = field[0].get<std::string>();
+          std::string type = field[1].get<std::string>();
+          size_t size = (type == "int32" || type == "uint32" || type == "float32")
+                            ? 4
+                            : 2;  // int16/uint16
+          std::cout << "    " << name << ": " << type << " @ byte " << byte_offset
+                    << std::endl;
+          byte_offset += size;
+        }
+        std::cout << "  Total struct size: " << byte_offset << " bytes"
+                  << std::endl;
+      }
+    } else if (dt.is_string()) {
+      std::cout << "  Type: simple \"" << dt.get<std::string>() << "\""
+                << std::endl;
+    } else if (dt.is_array()) {
+      std::cout << "  Type: array with " << dt.size() << " fields" << std::endl;
+    }
+  }
+
+  if (metadata.contains("codecs")) {
+    std::cout << "\nCodecs: " << metadata["codecs"].dump(2) << std::endl;
+  }
+}
+
+absl::Status Run(const std::string& zarr_path) {
+  std::cout << "=== Zarr v3 Structured Data Type Test ===" << std::endl;
+  std::cout << "Opening zarr3 array at: " << zarr_path << std::endl;
+
+  // First, display metadata information
+  PrintZarrMetadata(zarr_path);
+
+  auto context = tensorstore::Context::Default();
+
+  // Create spec for opening the zarr3 array
+  // Note: "field" is at the driver level, not inside kvstore (same as zarr v2)
+  ::nlohmann::json spec_json = {
+      {"driver", "zarr3"},
+      {"kvstore",
+       {
+           {"driver", "file"},
+           {"path", zarr_path + "/"},
+       }},
+      {"field", "inline"},  // Field at byte offset 180
+  };
+
+  std::cout << "\n=== Opening TensorStore ===" << std::endl;
+  std::cout << "Spec: " << spec_json.dump(2) << std::endl;
+
+  // Open the TensorStore
+  auto open_result =
+      tensorstore::Open(spec_json, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result();
+
+  if (!open_result.ok()) {
+    std::cout << "\n=== Open Failed ===" << std::endl;
+    std::cout << "Status: " << open_result.status() << std::endl;
+    std::cout << "\nThis error is expected if the zarr3 driver's dtype parsing\n"
+              << "does not yet support the extended structured data type format:\n"
+              << "  {\"name\": \"structured\", \"configuration\": {\"fields\": [...]}}\n"
+              << std::endl;
+    std::cout << "The dtype.cc ParseDTypeNoDerived() function currently handles:\n"
+              << "  1. String format: \"int32\"\n"
+              << "  2. Array format: [[\"field1\", \"int32\"], ...]\n"
+              << "\nBut the zarr.json uses the extended object format shown above."
+              << std::endl;
+    return open_result.status();
+  }
+
+  auto store = std::move(open_result).value();
+
+  // Get information about the array
+  auto domain = store.domain();
+  std::cout << "\n=== Array Info ===" << std::endl;
+  std::cout << "Domain: " << domain << std::endl;
+  std::cout << "Dtype: " << store.dtype() << std::endl;
+  std::cout << "Rank: " << store.rank() << std::endl;
+
+  auto shape = domain.shape();
+  std::cout << "Shape: [";
+  for (int i = 0; i < shape.size(); ++i) {
+    if (i > 0) std::cout << ", ";
+    std::cout << shape[i];
+  }
+  std::cout << "]" << std::endl;
+
+  // Read all data
+  std::cout << "\n=== Reading Data ===" << std::endl;
+  TENSORSTORE_ASSIGN_OR_RETURN(
+      auto array, tensorstore::Read<tensorstore::zero_origin>(store).result());
+
+  std::cout << "Read complete. Array size: " << array.num_elements()
+            << " elements" << std::endl;
+  std::cout << "Data type: " << array.dtype() << std::endl;
+
+  // Since field="inline" was specified, the array contains just int32 values
+  // directly - no struct extraction needed!
+  Index num_inline = shape[0];
+  Index num_crossline = shape[1];
+
+  std::cout << "\n=== Inline field values (shape: " << num_inline << " x "
+            << num_crossline << ") ===" << std::endl;
+
+  // Cast to int32 pointer since the data is already the inline field values
+  auto int_ptr = reinterpret_cast<const int32_t*>(array.data());
+
+  // Print first 10 rows (or fewer if less data)
+  Index rows_to_print = std::min(num_inline, Index{10});
+  Index cols_to_print = std::min(num_crossline, Index{10});
+
+  for (Index i = 0; i < rows_to_print; ++i) {
+    for (Index j = 0; j < cols_to_print; ++j) {
+      std::cout << int_ptr[i * num_crossline + j];
+      if (j < cols_to_print - 1) {
+        std::cout << "\t";
+      }
+    }
+    if (num_crossline > cols_to_print) {
+      std::cout << "\t...";
+    }
+    std::cout << std::endl;
+  }
+  if (num_inline > rows_to_print) {
+    std::cout << "... (" << (num_inline - rows_to_print) << " more rows)"
+              << std::endl;
+  }
+
+  std::cout << "\n=== Summary ===" << std::endl;
+  std::cout << "Successfully read " << (num_inline * num_crossline)
+            << " inline values" << std::endl;
+  
+  // Show some statistics
+  int32_t min_val = int_ptr[0], max_val = int_ptr[0];
+  int64_t sum = 0;
+  for (Index i = 0; i < num_inline * num_crossline; ++i) {
+    min_val = std::min(min_val, int_ptr[i]);
+    max_val = std::max(max_val, int_ptr[i]);
+    sum += int_ptr[i];
+  }
+  std::cout << "Min value: " << min_val << std::endl;
+  std::cout << "Max value: " << max_val << std::endl;
+  std::cout << "Mean value: " << (static_cast<double>(sum) / (num_inline * num_crossline)) << std::endl;
+
+  return absl::OkStatus();
+}
+
+}  // namespace
+
+int main(int argc, char** argv) {
+  absl::ParseCommandLine(argc, argv);
+
+  std::string zarr_path = absl::GetFlag(FLAGS_zarr_path);
+  if (zarr_path.empty()) {
+    std::cerr << "Error: --zarr_path is required" << std::endl;
+    return 1;
+  }
+
+  auto status = Run(zarr_path);
+  if (!status.ok()) {
+    std::cerr << "\nFinal status: " << status << std::endl;
+    return 1;
+  }
+
+  return 0;
+}

From 9e8ed947f5912394ca715d36d6fd1eb630d04e8a Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 25 Nov 2025 18:12:58 +0000
Subject: [PATCH 04/20] Begin adding support for opening struct arrays as void
 and add support for raw bits dtype

---
 examples/read_structured_zarr3.cc       | 324 +++++++++++++++++++-----
 tensorstore/driver/zarr3/chunk_cache.cc |   7 +
 tensorstore/driver/zarr3/driver.cc      | 180 +++++++++++--
 tensorstore/driver/zarr3/dtype.cc       |  52 +++-
 tensorstore/driver/zarr3/dtype_test.cc  |  14 +
 tensorstore/driver/zarr3/metadata.cc    |  89 ++++++-
 6 files changed, 565 insertions(+), 101 deletions(-)

diff --git a/examples/read_structured_zarr3.cc b/examples/read_structured_zarr3.cc
index 1caacd8f5..259eade34 100644
--- a/examples/read_structured_zarr3.cc
+++ b/examples/read_structured_zarr3.cc
@@ -12,16 +12,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Standalone test for reading structured data from a Zarr v3 array.
+// Standalone test for reading structured data from Zarr v3 arrays.
 //
-// This test opens an existing zarr3 array with structured data type,
-// reads the "inline" field, and prints all values.
+// This test opens two Zarr v3 arrays:
+// 1. A structured array with named fields (headers/)
+// 2. A raw bytes array containing struct data (raw_headers/)
+//
+// Both arrays should contain the same data, allowing comparison of:
+// - Field-based access vs manual byte extraction
+// - Structured dtype parsing vs raw byte handling
 //
 // Usage:
-//   bazel run //examples:read_structured_zarr3 -- /path/to/zarr/array
+//   bazel run //examples:read_structured_zarr3 -- /path/to/parent/dir
 //
 // Or with cmake:
-//   cd examples/build && ./read_structured_zarr3
+//   cd examples/build && ./read_structured_zarr3 --zarr_path=/path/to/parent/dir
+//
+// Where the parent dir contains both 'headers/' and 'raw_headers/' subdirs.
 
 #include <stdint.h>
 
@@ -45,9 +52,15 @@
 #include "tensorstore/util/result.h"
 #include "tensorstore/util/status.h"
 
+// Internal headers for testing dtype parsing
+#include "tensorstore/driver/zarr3/dtype.h"
+
+// Additional headers for string operations
+#include "absl/strings/str_join.h"
+
 ABSL_FLAG(std::string, zarr_path,
-          "/home/ubuntu/source/tensorstore/filt_mig.mdio/headers",
-          "Path to the zarr3 array directory");
+          "/home/ubuntu/source/tensorstore/filt_mig.mdio",
+          "Path to the parent .mdio directory containing headers/ and raw_headers/");
 
 namespace {
 
@@ -128,56 +141,13 @@ void PrintZarrMetadata(const std::string& zarr_path) {
   }
 }
 
-absl::Status Run(const std::string& zarr_path) {
-  std::cout << "=== Zarr v3 Structured Data Type Test ===" << std::endl;
-  std::cout << "Opening zarr3 array at: " << zarr_path << std::endl;
-
-  // First, display metadata information
-  PrintZarrMetadata(zarr_path);
-
-  auto context = tensorstore::Context::Default();
-
-  // Create spec for opening the zarr3 array
-  // Note: "field" is at the driver level, not inside kvstore (same as zarr v2)
-  ::nlohmann::json spec_json = {
-      {"driver", "zarr3"},
-      {"kvstore",
-       {
-           {"driver", "file"},
-           {"path", zarr_path + "/"},
-       }},
-      {"field", "inline"},  // Field at byte offset 180
-  };
-
-  std::cout << "\n=== Opening TensorStore ===" << std::endl;
-  std::cout << "Spec: " << spec_json.dump(2) << std::endl;
-
-  // Open the TensorStore
-  auto open_result =
-      tensorstore::Open(spec_json, context, tensorstore::OpenMode::open,
-                        tensorstore::ReadWriteMode::read)
-          .result();
-
-  if (!open_result.ok()) {
-    std::cout << "\n=== Open Failed ===" << std::endl;
-    std::cout << "Status: " << open_result.status() << std::endl;
-    std::cout << "\nThis error is expected if the zarr3 driver's dtype parsing\n"
-              << "does not yet support the extended structured data type format:\n"
-              << "  {\"name\": \"structured\", \"configuration\": {\"fields\": [...]}}\n"
-              << std::endl;
-    std::cout << "The dtype.cc ParseDTypeNoDerived() function currently handles:\n"
-              << "  1. String format: \"int32\"\n"
-              << "  2. Array format: [[\"field1\", \"int32\"], ...]\n"
-              << "\nBut the zarr.json uses the extended object format shown above."
-              << std::endl;
-    return open_result.status();
-  }
-
-  auto store = std::move(open_result).value();
-
+// Helper function to read and display inline field from an array
+absl::Status ReadInlineField(const tensorstore::TensorStore<>& store,
+                           const std::string& array_name,
+                           bool is_raw_bytes = false) {
   // Get information about the array
   auto domain = store.domain();
-  std::cout << "\n=== Array Info ===" << std::endl;
+  std::cout << "\n=== " << array_name << " Array Info ===" << std::endl;
   std::cout << "Domain: " << domain << std::endl;
   std::cout << "Dtype: " << store.dtype() << std::endl;
   std::cout << "Rank: " << store.rank() << std::endl;
@@ -191,7 +161,7 @@ absl::Status Run(const std::string& zarr_path) {
   std::cout << "]" << std::endl;
 
   // Read all data
-  std::cout << "\n=== Reading Data ===" << std::endl;
+  std::cout << "\n=== Reading " << array_name << " Data ===" << std::endl;
   TENSORSTORE_ASSIGN_OR_RETURN(
       auto array, tensorstore::Read<tensorstore::zero_origin>(store).result());
 
@@ -199,16 +169,46 @@ absl::Status Run(const std::string& zarr_path) {
             << " elements" << std::endl;
   std::cout << "Data type: " << array.dtype() << std::endl;
 
-  // Since field="inline" was specified, the array contains just int32 values
-  // directly - no struct extraction needed!
-  Index num_inline = shape[0];
-  Index num_crossline = shape[1];
+  Index num_inline, num_crossline;
+  const int32_t* int_ptr;
+
+  if (is_raw_bytes) {
+    // For raw bytes, we need to extract the inline field manually
+    // Shape is [inline, crossline, struct_size]
+    num_inline = shape[0];
+    num_crossline = shape[1];
+    Index struct_size = shape[2];
+    if (struct_size != kStructSize) {
+      std::cout << "Warning: Raw struct size (" << struct_size
+                << ") differs from expected header struct size (" << kStructSize
+                << "). Assuming padding." << std::endl;
+    }
 
-  std::cout << "\n=== Inline field values (shape: " << num_inline << " x "
-            << num_crossline << ") ===" << std::endl;
+    // Extract inline field (4 bytes starting at offset 180)
+    auto byte_ptr = reinterpret_cast<const std::byte*>(array.data());
+    std::vector<int32_t> inline_values(num_inline * num_crossline);
 
-  // Cast to int32 pointer since the data is already the inline field values
-  auto int_ptr = reinterpret_cast<const int32_t*>(array.data());
+    for (Index i = 0; i < num_inline; ++i) {
+      for (Index j = 0; j < num_crossline; ++j) {
+        Index struct_offset = (i * num_crossline + j) * struct_size;
+        Index field_offset = struct_offset + kInlineFieldOffset;
+        std::memcpy(&inline_values[i * num_crossline + j],
+                   byte_ptr + field_offset, 4);
+      }
+    }
+
+    std::cout << "Extracted inline field from raw bytes at offset "
+              << kInlineFieldOffset << std::endl;
+    int_ptr = inline_values.data();
+  } else {
+    // For structured array, field access already gave us int32 values
+    num_inline = shape[0];
+    num_crossline = shape[1];
+    int_ptr = reinterpret_cast<const int32_t*>(array.data());
+  }
+
+  std::cout << "\n=== Inline field values from " << array_name
+            << " (shape: " << num_inline << " x " << num_crossline << ") ===" << std::endl;
 
   // Print first 10 rows (or fewer if less data)
   Index rows_to_print = std::min(num_inline, Index{10});
@@ -231,10 +231,10 @@ absl::Status Run(const std::string& zarr_path) {
               << std::endl;
   }
 
-  std::cout << "\n=== Summary ===" << std::endl;
+  std::cout << "\n=== " << array_name << " Summary ===" << std::endl;
   std::cout << "Successfully read " << (num_inline * num_crossline)
             << " inline values" << std::endl;
-  
+
   // Show some statistics
   int32_t min_val = int_ptr[0], max_val = int_ptr[0];
   int64_t sum = 0;
@@ -250,6 +250,189 @@ absl::Status Run(const std::string& zarr_path) {
   return absl::OkStatus();
 }
 
+absl::Status Run(const std::string& zarr_path) {
+  std::cout << "=== Zarr v3 Structured Data Type Test ===" << std::endl;
+  std::cout << "Opening zarr3 arrays in: " << zarr_path << std::endl;
+
+  auto context = tensorstore::Context::Default();
+
+  // First, display metadata information for structured array
+  std::string headers_path = zarr_path + "/headers";
+  PrintZarrMetadata(headers_path);
+
+  // Test raw_bytes parsing by reading and parsing the raw_headers zarr.json
+  std::cout << "\n" << std::string(60, '=') << std::endl;
+  std::cout << "TESTING RAW_BYTES PARSING" << std::endl;
+  std::cout << std::string(60, '=') << std::endl;
+
+  std::string raw_metadata_path = zarr_path + "/raw_headers/zarr.json";
+  std::ifstream raw_file(raw_metadata_path);
+  if (!raw_file.is_open()) {
+    std::cout << "Could not open " << raw_metadata_path << std::endl;
+    return absl::NotFoundError("Raw headers metadata not found");
+  }
+
+  nlohmann::json raw_metadata;
+  try {
+    raw_file >> raw_metadata;
+  } catch (const nlohmann::json::parse_error& e) {
+    std::cout << "Failed to parse raw zarr.json: " << e.what() << std::endl;
+    return absl::DataLossError("Invalid raw metadata JSON");
+  }
+
+  std::cout << "Raw headers data_type: " << raw_metadata["data_type"].dump(2) << std::endl;
+
+  // Test parsing the raw_bytes data type
+  std::cout << "Testing raw_bytes dtype parsing..." << std::endl;
+
+  // For now, just verify the JSON structure is what we expect
+  if (!raw_metadata.contains("data_type")) {
+    std::cout << "FAILED: No data_type in metadata" << std::endl;
+    return absl::NotFoundError("Missing data_type");
+  }
+
+  auto& dt = raw_metadata["data_type"];
+  if (!dt.is_object() || !dt.contains("name") || dt["name"] != "raw_bytes") {
+    std::cout << "FAILED: data_type is not raw_bytes extension" << std::endl;
+    return absl::InvalidArgumentError("Not raw_bytes extension");
+  }
+
+  if (!dt.contains("configuration") || !dt["configuration"].contains("length_bytes")) {
+    std::cout << "FAILED: Missing length_bytes in configuration" << std::endl;
+    return absl::InvalidArgumentError("Missing length_bytes");
+  }
+
+  int length_bytes = dt["configuration"]["length_bytes"];
+  std::cout << "SUCCESS: Found raw_bytes extension with length_bytes = " << length_bytes << std::endl;
+  std::cout << "This should parse to:" << std::endl;
+  std::cout << "  - Single field with byte_t dtype" << std::endl;
+  std::cout << "  - Field shape: [" << length_bytes << "]" << std::endl;
+  std::cout << "  - Bytes per outer element: " << length_bytes << std::endl;
+
+  // Now actually test the parsing implementation
+  std::cout << "\n=== Testing ParseDType Implementation ===" << std::endl;
+  auto dtype_result = tensorstore::internal_zarr3::ParseDType(dt);
+  if (!dtype_result.ok()) {
+    std::cout << "FAILED: Could not parse raw_bytes data type: " << dtype_result.status() << std::endl;
+    return dtype_result.status();
+  }
+
+  auto dtype = std::move(dtype_result).value();
+  std::cout << "SUCCESS: ParseDType worked!" << std::endl;
+  std::cout << "  Fields: " << dtype.fields.size() << std::endl;
+  std::cout << "  Has fields: " << dtype.has_fields << std::endl;
+  std::cout << "  Bytes per outer element: " << dtype.bytes_per_outer_element << std::endl;
+
+  if (!dtype.fields.empty()) {
+    const auto& field = dtype.fields[0];
+    std::cout << "  Field name: '" << field.name << "'" << std::endl;
+    std::cout << "  Field dtype: " << field.dtype << std::endl;
+    std::cout << "  Field shape: [" << absl::StrJoin(field.field_shape, ", ") << "]" << std::endl;
+    std::cout << "  Field num_inner_elements: " << field.num_inner_elements << std::endl;
+    std::cout << "  Field num_bytes: " << field.num_bytes << std::endl;
+  }
+
+  // Verify the parsing is correct
+  bool parsing_correct = true;
+  if (dtype.fields.size() != 1) {
+    std::cout << "ERROR: Expected 1 field, got " << dtype.fields.size() << std::endl;
+    parsing_correct = false;
+  }
+  if (dtype.fields[0].name != "") {
+    std::cout << "ERROR: Expected empty field name, got '" << dtype.fields[0].name << "'" << std::endl;
+    parsing_correct = false;
+  }
+  if (dtype.fields[0].dtype != tensorstore::dtype_v<tensorstore::dtypes::byte_t>) {
+    std::cout << "ERROR: Expected byte_t dtype, got " << dtype.fields[0].dtype << std::endl;
+    parsing_correct = false;
+  }
+  if (dtype.fields[0].field_shape != std::vector<Index>{length_bytes}) {
+    std::cout << "ERROR: Expected field shape [" << length_bytes << "], got ["
+              << absl::StrJoin(dtype.fields[0].field_shape, ", ") << "]" << std::endl;
+    parsing_correct = false;
+  }
+  if (dtype.bytes_per_outer_element != length_bytes) {
+    std::cout << "ERROR: Expected " << length_bytes << " bytes per element, got "
+              << dtype.bytes_per_outer_element << std::endl;
+    parsing_correct = false;
+  }
+
+  if (parsing_correct) {
+    std::cout << "\n✅ PARSING VERIFICATION: All checks passed!" << std::endl;
+    std::cout << "The raw_bytes extension is correctly parsed." << std::endl;
+  } else {
+    std::cout << "\n❌ PARSING VERIFICATION: Some checks failed!" << std::endl;
+    return absl::InternalError("Parsing verification failed");
+  }
+
+  // Test 1: Read from structured array using field access
+  std::cout << "\n" << std::string(60, '=') << std::endl;
+  std::cout << "TEST 1: Reading from structured 'headers' array" << std::endl;
+  std::cout << std::string(60, '=') << std::endl;
+
+  ::nlohmann::json headers_spec = ::nlohmann::json::object();
+  headers_spec["driver"] = "zarr3";
+  headers_spec["kvstore"] = ::nlohmann::json::object();
+  headers_spec["kvstore"]["driver"] = "file";
+  headers_spec["kvstore"]["path"] = headers_path + "/";
+  headers_spec["field"] = "inline";  // Extract inline field (int32 at byte offset 180)
+
+  std::cout << "Spec: " << headers_spec.dump(2) << std::endl;
+
+  auto headers_open_result =
+      tensorstore::Open(headers_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result();
+
+  if (!headers_open_result.ok()) {
+    std::cout << "\n=== Headers Open Failed ===" << std::endl;
+    std::cout << "Status: " << headers_open_result.status() << std::endl;
+    return headers_open_result.status();
+  }
+
+  auto headers_store = std::move(headers_open_result).value();
+  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(headers_store, "headers"));
+
+  // Test 2: Read from raw bytes array (no special void access needed)
+  std::cout << "\n" << std::string(60, '=') << std::endl;
+  std::cout << "TEST 2: Reading from raw 'raw_headers' array" << std::endl;
+  std::cout << std::string(60, '=') << std::endl;
+
+  std::string raw_headers_path = zarr_path + "/raw_headers";
+  ::nlohmann::json raw_spec = ::nlohmann::json::object();
+  raw_spec["driver"] = "zarr3";
+  raw_spec["kvstore"] = ::nlohmann::json::object();
+  raw_spec["kvstore"]["driver"] = "file";
+  raw_spec["kvstore"]["path"] = raw_headers_path + "/";
+  // No field specified - raw_bytes has a single anonymous field
+
+  std::cout << "Spec: " << raw_spec.dump(2) << std::endl;
+
+  auto raw_open_result =
+      tensorstore::Open(raw_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result();
+
+  if (!raw_open_result.ok()) {
+    std::cout << "\n=== Raw Headers Open Failed ===" << std::endl;
+    std::cout << "Status: " << raw_open_result.status() << std::endl;
+    return raw_open_result.status();
+  }
+
+  auto raw_store = std::move(raw_open_result).value();
+  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(raw_store, "raw_headers", /*is_raw_bytes=*/true));
+
+  std::cout << "\n" << std::string(60, '=') << std::endl;
+  std::cout << "COMPARISON: Both methods should give identical inline field values" << std::endl;
+  std::cout << std::string(60, '=') << std::endl;
+  std::cout << "The structured 'headers' array provides field access convenience,\n"
+            << "while the raw 'raw_headers' array provides direct byte access.\n"
+            << "Both extract the inline field from byte offset " << kInlineFieldOffset
+            << " in " << kStructSize << "-byte structs." << std::endl;
+
+  return absl::OkStatus();
+}
+
 }  // namespace
 
 int main(int argc, char** argv) {
@@ -261,6 +444,15 @@ int main(int argc, char** argv) {
     return 1;
   }
 
+  // Verify the path structure
+  std::string headers_path = zarr_path + "/headers";
+  std::string raw_headers_path = zarr_path + "/raw_headers";
+
+  std::cout << "Expecting arrays at:" << std::endl;
+  std::cout << "  Structured: " << headers_path << std::endl;
+  std::cout << "  Raw bytes:  " << raw_headers_path << std::endl;
+  std::cout << std::endl;
+
   auto status = Run(zarr_path);
   if (!status.ok()) {
     std::cerr << "\nFinal status: " << status << std::endl;
diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index 6bfa8c039..64b6d69fd 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -156,6 +156,13 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
   const size_t num_fields = dtype_.fields.size();
   absl::InlinedVector<SharedArray<const void>, 1> field_arrays(num_fields);
 
+  // Special case: void access - return raw bytes directly
+  if (num_fields == 1 && dtype_.fields[0].name == "<void>") {
+    TENSORSTORE_ASSIGN_OR_RETURN(
+        field_arrays[0], codec_state_->DecodeArray(grid().components[0].shape(),
+                                                   std::move(data)));
+    return field_arrays;
+  }
 
   // For single non-structured field, decode directly
   if (num_fields == 1 && dtype_.fields[0].outer_shape.empty()) {
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 1674a1c6d..b4d96da1f 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -19,6 +19,7 @@
 #include <algorithm>
 #include <cassert>
 #include <memory>
+#include <numeric>
 #include <string>
 #include <string_view>
 #include <utility>
@@ -79,6 +80,8 @@
 namespace tensorstore {
 namespace internal_zarr3 {
 
+constexpr size_t kVoidFieldIndex = size_t(-1);
+
 // Avoid anonymous namespace to workaround MSVC bug.
 //
 // https://developercommunity.visualstudio.com/t/Bug-involving-virtual-functions-templat/10424129
@@ -263,12 +266,29 @@ class DataCacheBase
                           DimensionSet& implicit_lower_bounds,
                           DimensionSet& implicit_upper_bounds) override {
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
-    assert(bounds.rank() == static_cast<DimensionIndex>(metadata.shape.size()));
-    std::fill(bounds.origin().begin(), bounds.origin().end(), Index(0));
+    assert(bounds.rank() >= static_cast<DimensionIndex>(metadata.shape.size()));
+    std::fill(bounds.origin().begin(),
+              bounds.origin().begin() + metadata.shape.size(), Index(0));
     std::copy(metadata.shape.begin(), metadata.shape.end(),
               bounds.shape().begin());
     implicit_lower_bounds = false;
-    implicit_upper_bounds = true;
+    implicit_upper_bounds = false;
+    for (DimensionIndex i = 0;
+         i < static_cast<DimensionIndex>(metadata.shape.size()); ++i) {
+      implicit_upper_bounds[i] = true;
+    }
+    if (bounds.rank() > static_cast<DimensionIndex>(metadata.shape.size()) &&
+        metadata.data_type.fields.size() == 1) {
+      const auto& field = metadata.data_type.fields[0];
+      if (static_cast<DimensionIndex>(metadata.shape.size() +
+                                      field.field_shape.size()) ==
+          bounds.rank()) {
+        for (size_t i = 0; i < field.field_shape.size(); ++i) {
+          bounds.shape()[metadata.shape.size() + i] = field.field_shape[i];
+          bounds.origin()[metadata.shape.size() + i] = 0;
+        }
+      }
+    }
   }
 
   Result<std::shared_ptr<const void>> GetResizedMetadata(
@@ -289,10 +309,47 @@ class DataCacheBase
   }
 
   static internal::ChunkGridSpecification GetChunkGridSpecification(
-      const ZarrMetadata& metadata) {
+      const ZarrMetadata& metadata, size_t field_index = 0) {
     assert(!metadata.fill_value.empty());
     internal::ChunkGridSpecification::ComponentList components;
 
+    // Special case: void access - create single component for entire struct
+    if (field_index == kVoidFieldIndex) {
+      // For void access, use the fill_value from the single raw_bytes field
+      auto& fill_value = metadata.fill_value[0];
+      std::cout << "[DEBUG] Void access fill_value: shape=" << fill_value.shape()
+                << ", dtype=" << fill_value.dtype() << std::endl;
+
+      // Broadcast to shape [unbounded, unbounded, ..., struct_size]
+      std::vector<Index> target_shape(metadata.rank, kInfIndex);
+      target_shape.push_back(metadata.data_type.bytes_per_outer_element);
+      std::cout << "[DEBUG] Void access target_shape: [";
+      for (size_t i = 0; i < target_shape.size(); ++i) {
+        if (i > 0) std::cout << ", ";
+        std::cout << target_shape[i];
+      }
+      std::cout << "]" << std::endl;
+      auto chunk_fill_value =
+          BroadcastArray(fill_value, BoxView<>(target_shape)).value();
+
+      // Add extra dimension for struct size in bytes
+      std::vector<Index> chunk_shape_with_bytes = metadata.chunk_shape;
+      chunk_shape_with_bytes.push_back(metadata.data_type.bytes_per_outer_element);
+
+      auto& component = components.emplace_back(
+          internal::AsyncWriteArray::Spec{
+              std::move(chunk_fill_value),
+              // Since all dimensions are resizable, just
+              // specify unbounded `valid_data_bounds`.
+              Box<>(metadata.rank + 1),
+              ContiguousLayoutPermutation<>(
+                  span(metadata.inner_order.data(), metadata.rank + 1))},
+          chunk_shape_with_bytes);
+      component.array_spec.fill_value_comparison_kind =
+          EqualityComparisonKind::identical;
+      return internal::ChunkGridSpecification(std::move(components));
+    }
+
     // Create one component per field (like zarr v2)
     for (size_t field_i = 0; field_i < metadata.data_type.fields.size();
          ++field_i) {
@@ -303,18 +360,47 @@ class DataCacheBase
         fill_value = AllocateArray(span<const Index, 0>{}, c_order, value_init,
                                    field.dtype);
       }
+
+      // Handle fields with shape (e.g. raw_bytes)
+      const size_t field_rank = field.field_shape.size();
+
+      // 1. Construct target shape for broadcasting
+      std::vector<Index> target_shape(metadata.rank, kInfIndex);
+      target_shape.insert(target_shape.end(), field.field_shape.begin(),
+                          field.field_shape.end());
+
       auto chunk_fill_value =
-          BroadcastArray(fill_value, BoxView<>(metadata.rank)).value();
+          BroadcastArray(fill_value, BoxView<>(target_shape)).value();
+
+      // 2. Construct component chunk shape
+      std::vector<Index> component_chunk_shape = metadata.chunk_shape;
+      component_chunk_shape.insert(component_chunk_shape.end(),
+                                   field.field_shape.begin(),
+                                   field.field_shape.end());
+
+      // 3. Construct permutation
+      std::vector<DimensionIndex> component_permutation(metadata.rank +
+                                                        field_rank);
+      std::copy_n(metadata.inner_order.data(), metadata.rank,
+                  component_permutation.begin());
+      std::iota(component_permutation.begin() + metadata.rank,
+                component_permutation.end(), metadata.rank);
+
+      // 4. Construct bounds
+      Box<> valid_data_bounds(metadata.rank + field_rank);
+      for (size_t i = 0; i < field_rank; ++i) {
+        valid_data_bounds[metadata.rank + i] =
+            IndexInterval::UncheckedSized(0, field.field_shape[i]);
+      }
 
       auto& component = components.emplace_back(
           internal::AsyncWriteArray::Spec{
               std::move(chunk_fill_value),
               // Since all dimensions are resizable, just
               // specify unbounded `valid_data_bounds`.
-              Box<>(metadata.rank),
-              ContiguousLayoutPermutation<>(
-                  span(metadata.inner_order.data(), metadata.rank))},
-          metadata.chunk_shape);
+              std::move(valid_data_bounds),
+              ContiguousLayoutPermutation<>(component_permutation)},
+          component_chunk_shape);
       component.array_spec.fill_value_comparison_kind =
           EqualityComparisonKind::identical;
     }
@@ -342,7 +428,7 @@ class DataCacheBase
         [](std::string& out, DimensionIndex dim, Index grid_index) {
           absl::StrAppend(&out, grid_index);
         },
-        rank, grid_indices);
+        rank, grid_indices.subspan(0, rank));
     return key;
   }
 
@@ -355,17 +441,21 @@ class DataCacheBase
         key_prefix_.size() +
         (metadata.chunk_key_encoding.kind == ChunkKeyEncoding::kDefault ? 2
                                                                         : 0));
-    return internal::ParseGridIndexKeyWithDimensionSeparator(
-        metadata.chunk_key_encoding.separator,
-        [](std::string_view part, DimensionIndex dim, Index& grid_index) {
-          if (part.empty() || !absl::ascii_isdigit(part.front()) ||
-              !absl::ascii_isdigit(part.back()) ||
-              !absl::SimpleAtoi(part, &grid_index)) {
-            return false;
-          }
-          return true;
-        },
-        key, grid_indices);
+    if (!internal::ParseGridIndexKeyWithDimensionSeparator(
+            metadata.chunk_key_encoding.separator,
+            [](std::string_view part, DimensionIndex dim, Index& grid_index) {
+              if (part.empty() || !absl::ascii_isdigit(part.front()) ||
+                  !absl::ascii_isdigit(part.back()) ||
+                  !absl::SimpleAtoi(part, &grid_index)) {
+                return false;
+              }
+              return true;
+            },
+            key, grid_indices.subspan(0, metadata.rank))) {
+      return false;
+    }
+    std::fill(grid_indices.begin() + metadata.rank, grid_indices.end(), 0);
+    return true;
   }
 
   Index MinGridIndexForLexicographicalOrder(
@@ -378,7 +468,7 @@ class DataCacheBase
         *static_cast<const ZarrMetadata*>(initial_metadata().get());
     if (metadata.chunk_key_encoding.kind == ChunkKeyEncoding::kDefault) {
       std::string key = tensorstore::StrCat(key_prefix_, "c");
-      for (DimensionIndex i = 0; i < cell_indices.size(); ++i) {
+      for (DimensionIndex i = 0; i < metadata.rank; ++i) {
         tensorstore::StrAppend(
             &key, std::string_view(&metadata.chunk_key_encoding.separator, 1),
             cell_indices[i]);
@@ -388,7 +478,7 @@ class DataCacheBase
     // Use "0" for rank 0 as a special case.
     std::string key = tensorstore::StrCat(
         key_prefix_, cell_indices.empty() ? 0 : cell_indices[0]);
-    for (DimensionIndex i = 1; i < cell_indices.size(); ++i) {
+    for (DimensionIndex i = 1; i < metadata.rank; ++i) {
       tensorstore::StrAppend(
           &key, std::string_view(&metadata.chunk_key_encoding.separator, 1),
           cell_indices[i]);
@@ -400,7 +490,11 @@ class DataCacheBase
       const void* metadata_ptr, size_t component_index) override {
     // component_index corresponds to the selected field index
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
+    const auto& field = metadata.data_type.fields[component_index];
     const DimensionIndex rank = metadata.rank;
+    const DimensionIndex field_rank = field.field_shape.size();
+    const DimensionIndex total_rank = rank + field_rank;
+
     std::string_view normalized_dimension_names[kMaxRank];
     for (DimensionIndex i = 0; i < rank; ++i) {
       if (const auto& name = metadata.dimension_names[i]; name.has_value()) {
@@ -408,11 +502,20 @@ class DataCacheBase
       }
     }
     auto builder =
-        tensorstore::IndexTransformBuilder<>(rank, rank)
-            .input_shape(metadata.shape)
-            .input_labels(span(&normalized_dimension_names[0], rank));
-    builder.implicit_upper_bounds(true);
+        tensorstore::IndexTransformBuilder<>(total_rank, total_rank);
+    std::vector<Index> full_shape = metadata.shape;
+    full_shape.insert(full_shape.end(), field.field_shape.begin(),
+                      field.field_shape.end());
+    builder.input_shape(full_shape);
+    builder.input_labels(span(&normalized_dimension_names[0], total_rank));
+
+    DimensionSet implicit_upper_bounds(false);
     for (DimensionIndex i = 0; i < rank; ++i) {
+      implicit_upper_bounds[i] = true;
+    }
+    builder.implicit_upper_bounds(implicit_upper_bounds);
+
+    for (DimensionIndex i = 0; i < total_rank; ++i) {
       builder.output_single_input_dimension(i, i);
     }
     return builder.Finalize();
@@ -643,9 +746,26 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
       DataCacheInitializer&& initializer) override {
     const auto& metadata =
         *static_cast<const ZarrMetadata*>(initializer.metadata.get());
+    // For void access, modify the dtype to indicate special handling
+    ZarrDType dtype = metadata.data_type;
+    if (spec().selected_field == "<void>") {
+      // Create a synthetic dtype for void access
+      dtype = ZarrDType{
+          /*.has_fields=*/false,
+          /*.fields=*/{ZarrDType::Field{
+              ZarrDType::BaseDType{"<void>", dtype_v<tensorstore::dtypes::byte_t>,
+                                    {metadata.data_type.bytes_per_outer_element}},
+              /*.outer_shape=*/{},
+              /*.name=*/"<void>",
+              /*.field_shape=*/{metadata.data_type.bytes_per_outer_element},
+              /*.num_inner_elements=*/metadata.data_type.bytes_per_outer_element,
+              /*.byte_offset=*/0,
+              /*.num_bytes=*/metadata.data_type.bytes_per_outer_element}},
+          /*.bytes_per_outer_element=*/metadata.data_type.bytes_per_outer_element};
+    }
     return internal_zarr3::MakeZarrChunkCache<DataCacheBase, ZarrDataCache>(
         *metadata.codecs, std::move(initializer), spec().store.path,
-        metadata.codec_state, metadata.data_type,
+        metadata.codec_state, dtype,
         /*data_cache_pool=*/*cache_pool());
   }
 
@@ -657,6 +777,10 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
     TENSORSTORE_ASSIGN_OR_RETURN(
         auto field_index,
         GetFieldIndex(metadata.data_type, spec().selected_field));
+    // For void access, map to component index 0
+    if (field_index == kVoidFieldIndex) {
+      field_index = 0;
+    }
     TENSORSTORE_RETURN_IF_ERROR(
         ValidateMetadataSchema(metadata, field_index, spec().schema));
     return field_index;
diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
index 281b9c98b..116712d70 100644
--- a/tensorstore/driver/zarr3/dtype.cc
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -19,6 +19,7 @@
 #include <string>
 
 #include "absl/base/optimization.h"
+#include "absl/strings/ascii.h"
 #include "tensorstore/data_type.h"
 #include "tensorstore/internal/json_binding/json_binding.h"
 #include "tensorstore/util/endian.h"
@@ -57,9 +58,26 @@ Result<ZarrDType::BaseDType> ParseBaseDType(std::string_view dtype) {
   if (dtype == "complex128")
     return make_dtype(dtype_v<::tensorstore::dtypes::complex128_t>);
 
+  // Handle r<N> raw bits type where N is number of bits (must be multiple of 8)
+  if (dtype.size() > 1 && dtype[0] == 'r' && absl::ascii_isdigit(dtype[1])) {
+    std::string_view suffix = dtype.substr(1);
+    Index num_bits = 0;
+    if (!absl::SimpleAtoi(suffix, &num_bits) ||
+        num_bits == 0 ||
+        num_bits % 8 != 0) {
+      return absl::InvalidArgumentError(tensorstore::StrCat(
+          dtype, " data type is invalid; expected r<N> where N is a positive "
+                 "multiple of 8"));
+    }
+    Index num_bytes = num_bits / 8;
+    return ZarrDType::BaseDType{std::string(dtype),
+                                 dtype_v<::tensorstore::dtypes::byte_t>,
+                                 {num_bytes}};
+  }
+
   constexpr std::string_view kSupported =
       "bool, uint8, uint16, uint32, uint64, int8, int16, int32, int64, "
-      "bfloat16, float16, float32, float64, complex64, complex128";
+      "bfloat16, float16, float32, float64, complex64, complex128, r<N>";
   return absl::InvalidArgumentError(
       tensorstore::StrCat(dtype, " data type is not one of the supported "
                                  "data types: ",
@@ -162,6 +180,34 @@ Result<ZarrDType> ParseDTypeNoDerived(const nlohmann::json& value) {
         TENSORSTORE_RETURN_IF_ERROR(ParseFieldsArray(config["fields"], out));
         return out;
       }
+      if (type_name == "raw_bytes") {
+        const auto& config = value["configuration"];
+        if (!config.is_object() || !config.contains("length_bytes")) {
+          return absl::InvalidArgumentError(
+              "raw_bytes data type requires 'configuration' object with "
+              "'length_bytes' field");
+        }
+        Index length_bytes;
+        TENSORSTORE_RETURN_IF_ERROR(
+            internal_json::JsonRequireValueAs(config["length_bytes"], &length_bytes));
+        if (length_bytes <= 0) {
+          return absl::InvalidArgumentError(
+              "raw_bytes length_bytes must be positive");
+        }
+        out.has_fields = false;
+        out.fields.resize(1);
+        out.fields[0].encoded_dtype = "raw_bytes";
+        out.fields[0].dtype = dtype_v<tensorstore::dtypes::byte_t>;
+        out.fields[0].flexible_shape = {length_bytes};
+        out.fields[0].outer_shape = {};
+        out.fields[0].name = "";
+        out.fields[0].field_shape = {length_bytes};
+        out.fields[0].num_inner_elements = length_bytes;
+        out.fields[0].byte_offset = 0;
+        out.fields[0].num_bytes = length_bytes;
+        out.bytes_per_outer_element = length_bytes;
+        return out;
+      }
       // For other named types, try to parse as a base dtype
       out.has_fields = false;
       out.fields.resize(1);
@@ -326,6 +372,10 @@ Result<ZarrDType::BaseDType> ChooseBaseDType(DataType dtype) {
     return MakeBaseDType("complex64", dtype);
   if (dtype == dtype_v<::tensorstore::dtypes::complex128_t>)
     return MakeBaseDType("complex128", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::byte_t>)
+    return MakeBaseDType("r8", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::char_t>)
+    return MakeBaseDType("r8", dtype);
   return absl::InvalidArgumentError(
       tensorstore::StrCat("Data type not supported: ", dtype));
 }
diff --git a/tensorstore/driver/zarr3/dtype_test.cc b/tensorstore/driver/zarr3/dtype_test.cc
index cbb7acbfb..e1c5b444c 100644
--- a/tensorstore/driver/zarr3/dtype_test.cc
+++ b/tensorstore/driver/zarr3/dtype_test.cc
@@ -68,6 +68,9 @@ TEST(ParseBaseDType, Success) {
   CheckBaseDType("float64", dtype_v<tensorstore::dtypes::float64_t>, {});
   CheckBaseDType("complex64", dtype_v<tensorstore::dtypes::complex64_t>, {});
   CheckBaseDType("complex128", dtype_v<tensorstore::dtypes::complex128_t>, {});
+  CheckBaseDType("r8", dtype_v<tensorstore::dtypes::byte_t>, {1});
+  CheckBaseDType("r16", dtype_v<tensorstore::dtypes::byte_t>, {2});
+  CheckBaseDType("r64", dtype_v<tensorstore::dtypes::byte_t>, {8});
 }
 
 TEST(ParseBaseDType, Failure) {
@@ -81,6 +84,15 @@ TEST(ParseBaseDType, Failure) {
               StatusIs(absl::StatusCode::kInvalidArgument));
   EXPECT_THAT(ParseBaseDType("<i4"),
               StatusIs(absl::StatusCode::kInvalidArgument));
+  EXPECT_THAT(ParseBaseDType("r"),
+              StatusIs(absl::StatusCode::kInvalidArgument,
+                       HasSubstr("data type is invalid; expected r<N>")));
+  EXPECT_THAT(ParseBaseDType("r7"),
+              StatusIs(absl::StatusCode::kInvalidArgument,
+                       HasSubstr("data type is invalid; expected r<N>")));
+  EXPECT_THAT(ParseBaseDType("r0"),
+              StatusIs(absl::StatusCode::kInvalidArgument,
+                       HasSubstr("data type is invalid; expected r<N>")));
 }
 
 void CheckDType(const ::nlohmann::json& json, const ZarrDType& expected) {
@@ -266,6 +278,8 @@ TEST(ChooseBaseDTypeTest, RoundTrip) {
       dtype_v<tensorstore::dtypes::float64_t>,
       dtype_v<tensorstore::dtypes::complex64_t>,
       dtype_v<tensorstore::dtypes::complex128_t>,
+      dtype_v<tensorstore::dtypes::byte_t>,
+      dtype_v<tensorstore::dtypes::char_t>,
   };
   for (auto dtype : kSupportedDataTypes) {
     SCOPED_TRACE(tensorstore::StrCat("dtype=", dtype));
diff --git a/tensorstore/driver/zarr3/metadata.cc b/tensorstore/driver/zarr3/metadata.cc
index 880991e8c..6a83cdbec 100644
--- a/tensorstore/driver/zarr3/metadata.cc
+++ b/tensorstore/driver/zarr3/metadata.cc
@@ -250,6 +250,10 @@ constexpr std::array<FillValueDataTypeFunctions, kNumDataTypeIds>
       FillValueDataTypeFunctions::Make<::tensorstore::dtypes::T>(); \
   /**/
       TENSORSTORE_ZARR3_FOR_EACH_DATA_TYPE(TENSORSTORE_INTERNAL_DO_DEF)
+      // Add char_t support for string data types
+      functions[static_cast<size_t>(DataTypeId::char_t)] =
+          FillValueDataTypeFunctions::Make<::tensorstore::dtypes::char_t>();
+      // byte_t is handled specially to use uint8_t functions
 #undef TENSORSTORE_INTERNAL_DO_DEF
       return functions;
     }();
@@ -282,8 +286,39 @@ absl::Status FillValueJsonBinder::operator()(
     std::vector<SharedArray<const void>>* obj, ::nlohmann::json* j) const {
   obj->resize(dtype.fields.size());
   if (dtype.fields.size() == 1) {
-    TENSORSTORE_RETURN_IF_ERROR(
-        DecodeSingle(*j, dtype.fields[0].dtype, (*obj)[0]));
+    // Special case: raw_bytes (single field with byte_t and flexible shape)
+    if (dtype.fields[0].dtype.id() == DataTypeId::byte_t &&
+        !dtype.fields[0].flexible_shape.empty()) {
+      // Handle base64-encoded fill value for raw_bytes
+      if (!j->is_string()) {
+        return absl::InvalidArgumentError(
+            "Expected base64-encoded string for raw_bytes fill_value");
+      }
+      std::string b64_decoded;
+      if (!absl::Base64Unescape(j->get<std::string>(), &b64_decoded)) {
+        return absl::InvalidArgumentError(tensorstore::StrCat(
+            "Expected valid base64-encoded fill value, but received: ",
+            j->dump()));
+      }
+      // Verify size matches expected byte array size
+      Index expected_size = dtype.fields[0].num_inner_elements;
+      if (static_cast<Index>(b64_decoded.size()) != expected_size) {
+        return absl::InvalidArgumentError(tensorstore::StrCat(
+            "Expected ", expected_size,
+            " base64-encoded bytes for fill_value, but received ",
+            b64_decoded.size(), " bytes"));
+      }
+      // Create fill value array
+      auto fill_arr = AllocateArray(dtype.fields[0].field_shape, c_order,
+                                   default_init, dtype.fields[0].dtype);
+      std::memcpy(fill_arr.data(), b64_decoded.data(), b64_decoded.size());
+      std::cout << "[DEBUG] Raw bytes fill_value parsed: shape=" << fill_arr.shape()
+                << ", dtype=" << dtype.fields[0].dtype << std::endl;
+      (*obj)[0] = std::move(fill_arr);
+    } else {
+      TENSORSTORE_RETURN_IF_ERROR(
+          DecodeSingle(*j, dtype.fields[0].dtype, (*obj)[0]));
+    }
   } else {
     // For structured types, handle both array format and base64-encoded string
     if (j->is_string()) {
@@ -361,8 +396,14 @@ absl::Status FillValueJsonBinder::DecodeSingle(::nlohmann::json& j,
       AllocateArray(span<const Index, 0>{}, c_order, default_init, data_type);
   void* data = arr.data();
   out = std::move(arr);
+  // Special handling for byte_t: use uint8_t functions since they're binary compatible
+  auto type_id = data_type.id();
+  if (type_id == DataTypeId::byte_t) {
+    type_id = DataTypeId::uint8_t;
+  }
+
   const auto& functions =
-      kFillValueDataTypeFunctions[static_cast<size_t>(data_type.id())];
+      kFillValueDataTypeFunctions[static_cast<size_t>(type_id)];
   if (!functions.decode) {
     if (allow_missing_dtype) {
       out = SharedArray<const void>();
@@ -381,8 +422,14 @@ absl::Status FillValueJsonBinder::EncodeSingle(
     return absl::InvalidArgumentError(
         "data_type must be specified before fill_value");
   }
+  // Special handling for byte_t: use uint8_t functions since they're binary compatible
+  auto type_id = data_type.id();
+  if (type_id == DataTypeId::byte_t) {
+    type_id = DataTypeId::uint8_t;
+  }
+
   const auto& functions =
-      kFillValueDataTypeFunctions[static_cast<size_t>(data_type.id())];
+      kFillValueDataTypeFunctions[static_cast<size_t>(type_id)];
   if (!functions.encode) {
     return absl::FailedPreconditionError(
         "fill_value unsupported for specified data_type");
@@ -751,8 +798,19 @@ std::string GetFieldNames(const ZarrDType& dtype) {
 }
 }  // namespace
 
+constexpr size_t kVoidFieldIndex = size_t(-1);
+
 Result<size_t> GetFieldIndex(const ZarrDType& dtype,
                              std::string_view selected_field) {
+  // Special case: "<void>" requests raw byte access (works for any dtype)
+  if (selected_field == "<void>") {
+    if (dtype.fields.empty()) {
+      return absl::FailedPreconditionError(
+          "Requested field \"<void>\" but dtype has no fields");
+    }
+    return kVoidFieldIndex;
+  }
+
   if (selected_field.empty()) {
     if (dtype.fields.size() != 1) {
       return absl::FailedPreconditionError(tensorstore::StrCat(
@@ -779,6 +837,9 @@ SpecRankAndFieldInfo GetSpecRankAndFieldInfo(const ZarrMetadata& metadata,
   SpecRankAndFieldInfo info;
   info.chunked_rank = metadata.rank;
   info.field = &metadata.data_type.fields[field_index];
+  if (!info.field->field_shape.empty()) {
+    info.chunked_rank += info.field->field_shape.size();
+  }
   return info;
 }
 
@@ -798,8 +859,24 @@ Result<IndexDomain<>> GetEffectiveDomain(
   assert(RankConstraint::EqualOrUnspecified(schema.rank(), rank));
   IndexDomainBuilder builder(std::max(schema.rank().rank, rank));
   if (metadata_shape) {
-    builder.shape(*metadata_shape);
-    builder.implicit_upper_bounds(true);
+    if (static_cast<DimensionIndex>(metadata_shape->size()) < rank &&
+        info.field && !info.field->field_shape.empty() &&
+        static_cast<DimensionIndex>(metadata_shape->size() +
+                                    info.field->field_shape.size()) == rank) {
+      std::vector<Index> full_shape(metadata_shape->begin(),
+                                    metadata_shape->end());
+      full_shape.insert(full_shape.end(), info.field->field_shape.begin(),
+                        info.field->field_shape.end());
+      builder.shape(full_shape);
+      DimensionSet implicit_upper_bounds(false);
+      for (size_t i = 0; i < metadata_shape->size(); ++i) {
+        implicit_upper_bounds[i] = true;
+      }
+      builder.implicit_upper_bounds(implicit_upper_bounds);
+    } else {
+      builder.shape(*metadata_shape);
+      builder.implicit_upper_bounds(true);
+    }
   } else {
     builder.origin(GetConstantVector<Index, 0>(builder.rank()));
   }

From 44c765ec04e0492cd8ba9aa9f5b43cf97834359b Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 25 Nov 2025 18:28:09 +0000
Subject: [PATCH 05/20] Fix failing tests

---
 tensorstore/driver/zarr3/dtype.cc      | 26 ++++++++++++++++++++++----
 tensorstore/driver/zarr3/dtype_test.cc |  9 +++++++--
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
index 116712d70..5b3261812 100644
--- a/tensorstore/driver/zarr3/dtype.cc
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -75,6 +75,13 @@ Result<ZarrDType::BaseDType> ParseBaseDType(std::string_view dtype) {
                                  {num_bytes}};
   }
 
+  // Handle bare "r" - must have a number after it
+  if (dtype.size() >= 1 && dtype[0] == 'r') {
+    return absl::InvalidArgumentError(tensorstore::StrCat(
+        dtype, " data type is invalid; expected r<N> where N is a positive "
+               "multiple of 8"));
+  }
+
   constexpr std::string_view kSupported =
       "bool, uint8, uint16, uint32, uint64, int8, int16, int32, int64, "
       "bfloat16, float16, float32, float64, complex64, complex128, r<N>";
@@ -372,10 +379,21 @@ Result<ZarrDType::BaseDType> ChooseBaseDType(DataType dtype) {
     return MakeBaseDType("complex64", dtype);
   if (dtype == dtype_v<::tensorstore::dtypes::complex128_t>)
     return MakeBaseDType("complex128", dtype);
-  if (dtype == dtype_v<::tensorstore::dtypes::byte_t>)
-    return MakeBaseDType("r8", dtype);
-  if (dtype == dtype_v<::tensorstore::dtypes::char_t>)
-    return MakeBaseDType("r8", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::byte_t>) {
+    ZarrDType::BaseDType base_dtype;
+    base_dtype.dtype = dtype;
+    base_dtype.encoded_dtype = "r8";
+    base_dtype.flexible_shape = {1};
+    return base_dtype;
+  }
+  if (dtype == dtype_v<::tensorstore::dtypes::char_t>) {
+    // char_t encodes as r8, which parses back to byte_t
+    ZarrDType::BaseDType base_dtype;
+    base_dtype.dtype = dtype_v<::tensorstore::dtypes::byte_t>;
+    base_dtype.encoded_dtype = "r8";
+    base_dtype.flexible_shape = {1};
+    return base_dtype;
+  }
   return absl::InvalidArgumentError(
       tensorstore::StrCat("Data type not supported: ", dtype));
 }
diff --git a/tensorstore/driver/zarr3/dtype_test.cc b/tensorstore/driver/zarr3/dtype_test.cc
index e1c5b444c..ef55aba09 100644
--- a/tensorstore/driver/zarr3/dtype_test.cc
+++ b/tensorstore/driver/zarr3/dtype_test.cc
@@ -285,10 +285,15 @@ TEST(ChooseBaseDTypeTest, RoundTrip) {
     SCOPED_TRACE(tensorstore::StrCat("dtype=", dtype));
     TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto base_zarr_dtype,
                                      ChooseBaseDType(dtype));
-    EXPECT_EQ(dtype, base_zarr_dtype.dtype);
+    // byte_t and char_t both encode as r8, which parses back to byte_t
+    DataType expected_dtype = dtype;
+    if (dtype == dtype_v<tensorstore::dtypes::char_t>) {
+      expected_dtype = dtype_v<tensorstore::dtypes::byte_t>;
+    }
+    EXPECT_EQ(expected_dtype, base_zarr_dtype.dtype);
     TENSORSTORE_ASSERT_OK_AND_ASSIGN(
         auto parsed, ParseBaseDType(base_zarr_dtype.encoded_dtype));
-    EXPECT_EQ(dtype, parsed.dtype);
+    EXPECT_EQ(expected_dtype, parsed.dtype);
     EXPECT_EQ(base_zarr_dtype.flexible_shape, parsed.flexible_shape);
     EXPECT_EQ(base_zarr_dtype.encoded_dtype, parsed.encoded_dtype);
   }

From 547642d819aa5ac878300530e9d049018de27db8 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 25 Nov 2025 20:10:09 +0000
Subject: [PATCH 06/20] Resolve issues with opening struct as void

---
 examples/read_structured_zarr3.cc  | 40 ++++++++++++--
 tensorstore/driver/zarr3/driver.cc | 83 ++++++++++++++++++++++++------
 2 files changed, 104 insertions(+), 19 deletions(-)

diff --git a/examples/read_structured_zarr3.cc b/examples/read_structured_zarr3.cc
index 259eade34..bf12ced1b 100644
--- a/examples/read_structured_zarr3.cc
+++ b/examples/read_structured_zarr3.cc
@@ -422,12 +422,44 @@ absl::Status Run(const std::string& zarr_path) {
   auto raw_store = std::move(raw_open_result).value();
   TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(raw_store, "raw_headers", /*is_raw_bytes=*/true));
 
+  // Test 3: Read from headers array as void (field="<void>")
+  // Use a fresh context to avoid cache sharing with Test 1
   std::cout << "\n" << std::string(60, '=') << std::endl;
-  std::cout << "COMPARISON: Both methods should give identical inline field values" << std::endl;
+  std::cout << "TEST 3: Reading from 'headers' array as void (field=\"<void>\")" << std::endl;
   std::cout << std::string(60, '=') << std::endl;
-  std::cout << "The structured 'headers' array provides field access convenience,\n"
-            << "while the raw 'raw_headers' array provides direct byte access.\n"
-            << "Both extract the inline field from byte offset " << kInlineFieldOffset
+
+  auto context_void = tensorstore::Context::Default();
+
+  ::nlohmann::json headers_void_spec = ::nlohmann::json::object();
+  headers_void_spec["driver"] = "zarr3";
+  headers_void_spec["kvstore"] = ::nlohmann::json::object();
+  headers_void_spec["kvstore"]["driver"] = "file";
+  headers_void_spec["kvstore"]["path"] = headers_path + "/";
+  headers_void_spec["field"] = "<void>";  // Special field for raw byte access
+
+  std::cout << "Spec: " << headers_void_spec.dump(2) << std::endl;
+
+  auto headers_void_open_result =
+      tensorstore::Open(headers_void_spec, context_void, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result();
+
+  if (!headers_void_open_result.ok()) {
+    std::cout << "\n=== Headers (void) Open Failed ===" << std::endl;
+    std::cout << "Status: " << headers_void_open_result.status() << std::endl;
+    return headers_void_open_result.status();
+  }
+
+  auto headers_void_store = std::move(headers_void_open_result).value();
+  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(headers_void_store, "headers (void)", /*is_raw_bytes=*/true));
+
+  std::cout << "\n" << std::string(60, '=') << std::endl;
+  std::cout << "COMPARISON: All three methods should give identical inline field values" << std::endl;
+  std::cout << std::string(60, '=') << std::endl;
+  std::cout << "- Test 1: 'headers' with field=\"inline\" provides field access convenience\n"
+            << "- Test 2: 'raw_headers' (raw_bytes type) provides direct byte access\n"
+            << "- Test 3: 'headers' with field=\"<void>\" provides raw byte access to structured data\n"
+            << "All three extract the inline field from byte offset " << kInlineFieldOffset
             << " in " << kStructSize << "-byte structs." << std::endl;
 
   return absl::OkStatus();
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index b4d96da1f..bed1171d2 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -315,26 +315,27 @@ class DataCacheBase
 
     // Special case: void access - create single component for entire struct
     if (field_index == kVoidFieldIndex) {
-      // For void access, use the fill_value from the single raw_bytes field
-      auto& fill_value = metadata.fill_value[0];
-      std::cout << "[DEBUG] Void access fill_value: shape=" << fill_value.shape()
-                << ", dtype=" << fill_value.dtype() << std::endl;
+      // For void access, create a zero-filled byte array as the fill value
+      const Index bytes_per_element = metadata.data_type.bytes_per_outer_element;
+      auto base_fill_value = AllocateArray(
+          span<const Index, 1>({bytes_per_element}), c_order, value_init,
+          dtype_v<tensorstore::dtypes::byte_t>);
 
       // Broadcast to shape [unbounded, unbounded, ..., struct_size]
       std::vector<Index> target_shape(metadata.rank, kInfIndex);
-      target_shape.push_back(metadata.data_type.bytes_per_outer_element);
-      std::cout << "[DEBUG] Void access target_shape: [";
-      for (size_t i = 0; i < target_shape.size(); ++i) {
-        if (i > 0) std::cout << ", ";
-        std::cout << target_shape[i];
-      }
-      std::cout << "]" << std::endl;
+      target_shape.push_back(bytes_per_element);
       auto chunk_fill_value =
-          BroadcastArray(fill_value, BoxView<>(target_shape)).value();
+          BroadcastArray(base_fill_value, BoxView<>(target_shape)).value();
 
       // Add extra dimension for struct size in bytes
       std::vector<Index> chunk_shape_with_bytes = metadata.chunk_shape;
-      chunk_shape_with_bytes.push_back(metadata.data_type.bytes_per_outer_element);
+      chunk_shape_with_bytes.push_back(bytes_per_element);
+
+      // Create permutation: copy existing inner_order and add the new dimension
+      std::vector<DimensionIndex> void_permutation(metadata.rank + 1);
+      std::copy_n(metadata.inner_order.data(), metadata.rank,
+                  void_permutation.begin());
+      void_permutation[metadata.rank] = metadata.rank;  // Add the bytes dimension
 
       auto& component = components.emplace_back(
           internal::AsyncWriteArray::Spec{
@@ -343,7 +344,7 @@ class DataCacheBase
               // specify unbounded `valid_data_bounds`.
               Box<>(metadata.rank + 1),
               ContiguousLayoutPermutation<>(
-                  span(metadata.inner_order.data(), metadata.rank + 1))},
+                  span(void_permutation.data(), metadata.rank + 1))},
           chunk_shape_with_bytes);
       component.array_spec.fill_value_comparison_kind =
           EqualityComparisonKind::identical;
@@ -570,7 +571,13 @@ class ZarrDataCache : public ChunkCacheImpl, public DataCacheBase {
                          std::string key_prefix, U&&... arg)
       : ChunkCacheImpl(std::move(initializer.store), std::forward<U>(arg)...),
         DataCacheBase(std::move(initializer), std::move(key_prefix)),
-        grid_(DataCacheBase::GetChunkGridSpecification(metadata())) {}
+        grid_(DataCacheBase::GetChunkGridSpecification(
+            metadata(),
+            // Check if this is void access by examining the dtype
+            (ChunkCacheImpl::dtype_.fields.size() == 1 &&
+             ChunkCacheImpl::dtype_.fields[0].name == "<void>")
+                ? kVoidFieldIndex
+                : 0)) {}
 
   const internal::LexicographicalGridIndexKeyParser& GetChunkStorageKeyParser()
       final {
@@ -596,6 +603,52 @@ class ZarrDataCache : public ChunkCacheImpl, public DataCacheBase {
     return DataCacheBase::executor();
   }
 
+  // Override to handle void access - check the dtype to see if this is void
+  Result<IndexTransform<>> GetExternalToInternalTransform(
+      const void* metadata_ptr, size_t component_index) override {
+    const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
+    
+    // Check if this is void access by examining the cache's dtype
+    const bool is_void_access = (ChunkCacheImpl::dtype_.fields.size() == 1 &&
+                                 ChunkCacheImpl::dtype_.fields[0].name == "<void>");
+    
+    if (is_void_access) {
+      // For void access, create transform with extra bytes dimension
+      const DimensionIndex rank = metadata.rank;
+      const Index bytes_per_element = metadata.data_type.bytes_per_outer_element;
+      const DimensionIndex total_rank = rank + 1;
+      
+      std::string_view normalized_dimension_names[kMaxRank];
+      for (DimensionIndex i = 0; i < rank; ++i) {
+        if (const auto& name = metadata.dimension_names[i]; name.has_value()) {
+          normalized_dimension_names[i] = *name;
+        }
+      }
+      
+      auto builder =
+          tensorstore::IndexTransformBuilder<>(total_rank, total_rank);
+      std::vector<Index> full_shape = metadata.shape;
+      full_shape.push_back(bytes_per_element);
+      builder.input_shape(full_shape);
+      builder.input_labels(span(&normalized_dimension_names[0], total_rank));
+      
+      DimensionSet implicit_upper_bounds(false);
+      for (DimensionIndex i = 0; i < rank; ++i) {
+        implicit_upper_bounds[i] = true;
+      }
+      builder.implicit_upper_bounds(implicit_upper_bounds);
+      
+      for (DimensionIndex i = 0; i < total_rank; ++i) {
+        builder.output_single_input_dimension(i, i);
+      }
+      return builder.Finalize();
+    }
+    
+    // Not void access - delegate to base implementation
+    return DataCacheBase::GetExternalToInternalTransform(metadata_ptr,
+                                                         component_index);
+  }
+
   internal::ChunkGridSpecification grid_;
 };
 

From 2a4c3d852e0f38b5601dd43482ae878d86a6d7b6 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Wed, 26 Nov 2025 15:03:55 +0000
Subject: [PATCH 07/20] Remove debug print

---
 tensorstore/driver/zarr3/metadata.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorstore/driver/zarr3/metadata.cc b/tensorstore/driver/zarr3/metadata.cc
index 6a83cdbec..9aef7bd0b 100644
--- a/tensorstore/driver/zarr3/metadata.cc
+++ b/tensorstore/driver/zarr3/metadata.cc
@@ -312,8 +312,6 @@ absl::Status FillValueJsonBinder::operator()(
       auto fill_arr = AllocateArray(dtype.fields[0].field_shape, c_order,
                                    default_init, dtype.fields[0].dtype);
       std::memcpy(fill_arr.data(), b64_decoded.data(), b64_decoded.size());
-      std::cout << "[DEBUG] Raw bytes fill_value parsed: shape=" << fill_arr.shape()
-                << ", dtype=" << dtype.fields[0].dtype << std::endl;
       (*obj)[0] = std::move(fill_arr);
     } else {
       TENSORSTORE_RETURN_IF_ERROR(

From b0abb94070f7be7337e7a30b90802ee8617801dd Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 2 Dec 2025 22:01:10 +0000
Subject: [PATCH 08/20] Add field for open as void

---
 .gitignore                         |  5 +++++
 examples/read_structured_zarr3.cc  | 11 ++++++-----
 tensorstore/driver/zarr3/driver.cc | 31 +++++++++++++++++++-----------
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/.gitignore b/.gitignore
index e4737363c..7c75044c5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,8 @@ __pycache__
 *.pyc
 /python/tensorstore/*.so
 /python/tensorstore/*.pyd
+
+build/
+bootstrap.sh
+filt_mig.mdio
+generate_test.py
\ No newline at end of file
diff --git a/examples/read_structured_zarr3.cc b/examples/read_structured_zarr3.cc
index bf12ced1b..720ef1330 100644
--- a/examples/read_structured_zarr3.cc
+++ b/examples/read_structured_zarr3.cc
@@ -21,6 +21,7 @@
 // Both arrays should contain the same data, allowing comparison of:
 // - Field-based access vs manual byte extraction
 // - Structured dtype parsing vs raw byte handling
+// - New open_as_void option for raw byte access to structured data
 //
 // Usage:
 //   bazel run //examples:read_structured_zarr3 -- /path/to/parent/dir
@@ -422,10 +423,10 @@ absl::Status Run(const std::string& zarr_path) {
   auto raw_store = std::move(raw_open_result).value();
   TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(raw_store, "raw_headers", /*is_raw_bytes=*/true));
 
-  // Test 3: Read from headers array as void (field="<void>")
+  // Test 3: Read from headers array as void (open_as_void=true)
   // Use a fresh context to avoid cache sharing with Test 1
   std::cout << "\n" << std::string(60, '=') << std::endl;
-  std::cout << "TEST 3: Reading from 'headers' array as void (field=\"<void>\")" << std::endl;
+  std::cout << "TEST 3: Reading from 'headers' array as void (open_as_void=true)" << std::endl;
   std::cout << std::string(60, '=') << std::endl;
 
   auto context_void = tensorstore::Context::Default();
@@ -435,7 +436,7 @@ absl::Status Run(const std::string& zarr_path) {
   headers_void_spec["kvstore"] = ::nlohmann::json::object();
   headers_void_spec["kvstore"]["driver"] = "file";
   headers_void_spec["kvstore"]["path"] = headers_path + "/";
-  headers_void_spec["field"] = "<void>";  // Special field for raw byte access
+  headers_void_spec["open_as_void"] = true;  // New option for raw byte access
 
   std::cout << "Spec: " << headers_void_spec.dump(2) << std::endl;
 
@@ -451,14 +452,14 @@ absl::Status Run(const std::string& zarr_path) {
   }
 
   auto headers_void_store = std::move(headers_void_open_result).value();
-  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(headers_void_store, "headers (void)", /*is_raw_bytes=*/true));
+  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(headers_void_store, "headers (open_as_void)", /*is_raw_bytes=*/true));
 
   std::cout << "\n" << std::string(60, '=') << std::endl;
   std::cout << "COMPARISON: All three methods should give identical inline field values" << std::endl;
   std::cout << std::string(60, '=') << std::endl;
   std::cout << "- Test 1: 'headers' with field=\"inline\" provides field access convenience\n"
             << "- Test 2: 'raw_headers' (raw_bytes type) provides direct byte access\n"
-            << "- Test 3: 'headers' with field=\"<void>\" provides raw byte access to structured data\n"
+            << "- Test 3: 'headers' with open_as_void=true provides raw byte access to structured data\n"
             << "All three extract the inline field from byte offset " << kInlineFieldOffset
             << " in " << kStructSize << "-byte structs." << std::endl;
 
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index bed1171d2..f4aad10d7 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -107,10 +107,11 @@ class ZarrDriverSpec
 
   ZarrMetadataConstraints metadata_constraints;
   std::string selected_field;
+  bool open_as_void;
 
   constexpr static auto ApplyMembers = [](auto& x, auto f) {
     return f(internal::BaseCast<KvsDriverSpec>(x), x.metadata_constraints,
-             x.selected_field);
+             x.selected_field, x.open_as_void);
   };
 
   static inline const auto default_json_binder = jb::Sequence(
@@ -145,9 +146,17 @@ class ZarrDriverSpec
               },
               jb::Projection<&ZarrDriverSpec::metadata_constraints>(
                   jb::DefaultInitializedValue()))),
-      jb::Member("field", jb::Projection<&ZarrDriverSpec::selected_field>(
-                              jb::DefaultValue<jb::kNeverIncludeDefaults>(
-                                  [](auto* obj) { *obj = std::string{}; }))));
+      jb::Member(
+          "field",
+          jb::Projection<&ZarrDriverSpec::selected_field>(
+              jb::DefaultValue<jb::kNeverIncludeDefaults>(
+                  [](auto* obj) { *obj = std::string{}; }))),
+      jb::Member(
+          "open_as_void",
+          jb::Projection<&ZarrDriverSpec::open_as_void>(
+              jb::DefaultValue<jb::kNeverIncludeDefaults>(
+                  [](auto* v) { *v = false; /*selected_field = "<void>";*/ }))));
+
 
   absl::Status ApplyOptions(SpecOptions&& options) override {
     if (options.minimal_spec) {
@@ -607,43 +616,43 @@ class ZarrDataCache : public ChunkCacheImpl, public DataCacheBase {
   Result<IndexTransform<>> GetExternalToInternalTransform(
       const void* metadata_ptr, size_t component_index) override {
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
-    
+
     // Check if this is void access by examining the cache's dtype
     const bool is_void_access = (ChunkCacheImpl::dtype_.fields.size() == 1 &&
                                  ChunkCacheImpl::dtype_.fields[0].name == "<void>");
-    
+
     if (is_void_access) {
       // For void access, create transform with extra bytes dimension
       const DimensionIndex rank = metadata.rank;
       const Index bytes_per_element = metadata.data_type.bytes_per_outer_element;
       const DimensionIndex total_rank = rank + 1;
-      
+
       std::string_view normalized_dimension_names[kMaxRank];
       for (DimensionIndex i = 0; i < rank; ++i) {
         if (const auto& name = metadata.dimension_names[i]; name.has_value()) {
           normalized_dimension_names[i] = *name;
         }
       }
-      
+
       auto builder =
           tensorstore::IndexTransformBuilder<>(total_rank, total_rank);
       std::vector<Index> full_shape = metadata.shape;
       full_shape.push_back(bytes_per_element);
       builder.input_shape(full_shape);
       builder.input_labels(span(&normalized_dimension_names[0], total_rank));
-      
+
       DimensionSet implicit_upper_bounds(false);
       for (DimensionIndex i = 0; i < rank; ++i) {
         implicit_upper_bounds[i] = true;
       }
       builder.implicit_upper_bounds(implicit_upper_bounds);
-      
+
       for (DimensionIndex i = 0; i < total_rank; ++i) {
         builder.output_single_input_dimension(i, i);
       }
       return builder.Finalize();
     }
-    
+
     // Not void access - delegate to base implementation
     return DataCacheBase::GetExternalToInternalTransform(metadata_ptr,
                                                          component_index);

From fff0a5be9ce8fa1baed0a2db5503b852f3fb5184 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Wed, 3 Dec 2025 15:38:36 +0000
Subject: [PATCH 09/20] Add a shim for new open_as_void flag open option

---
 tensorstore/driver/zarr3/driver.cc | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index f4aad10d7..18c8f3a77 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -140,8 +140,9 @@ class ZarrDriverSpec
                     // at metadata level only.
                   }
                 }
-                TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set(
-                    RankConstraint{obj->metadata_constraints.rank}));
+                TENSORSTORE_RETURN_IF_ERROR(
+                    obj->schema.Set(
+                        RankConstraint{obj->metadata_constraints.rank}));
                 return absl::OkStatus();
               },
               jb::Projection<&ZarrDriverSpec::metadata_constraints>(
@@ -151,11 +152,23 @@ class ZarrDriverSpec
           jb::Projection<&ZarrDriverSpec::selected_field>(
               jb::DefaultValue<jb::kNeverIncludeDefaults>(
                   [](auto* obj) { *obj = std::string{}; }))),
+
+      // NEW: wrap the open_as_void projection in a Validate
       jb::Member(
           "open_as_void",
-          jb::Projection<&ZarrDriverSpec::open_as_void>(
-              jb::DefaultValue<jb::kNeverIncludeDefaults>(
-                  [](auto* v) { *v = false; /*selected_field = "<void>";*/ }))));
+          jb::Validate(
+              [](const auto& options, ZarrDriverSpec* obj) -> absl::Status {
+                // At this point, Projection has already set obj->open_as_void
+                if (obj->open_as_void) {
+                  obj->selected_field = "<void>";
+                }
+                return absl::OkStatus();
+              },
+              jb::Projection<&ZarrDriverSpec::open_as_void>(
+                  jb::DefaultValue<jb::kNeverIncludeDefaults>(
+                      [](auto* v) { *v = false; })))));
+
+
 
 
   absl::Status ApplyOptions(SpecOptions&& options) override {

From b6c24f96289a523d14cd6dc9a173f70e10690e15 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Wed, 3 Dec 2025 15:55:02 +0000
Subject: [PATCH 10/20] Revert some formatting changes

---
 tensorstore/driver/zarr3/driver.cc | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 18c8f3a77..dd95c711b 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -140,22 +140,18 @@ class ZarrDriverSpec
                     // at metadata level only.
                   }
                 }
-                TENSORSTORE_RETURN_IF_ERROR(
-                    obj->schema.Set(
-                        RankConstraint{obj->metadata_constraints.rank}));
+                TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set(
+                    RankConstraint{obj->metadata_constraints.rank}));
                 return absl::OkStatus();
               },
               jb::Projection<&ZarrDriverSpec::metadata_constraints>(
                   jb::DefaultInitializedValue()))),
-      jb::Member(
-          "field",
-          jb::Projection<&ZarrDriverSpec::selected_field>(
+      jb::Member("field", jb::Projection<&ZarrDriverSpec::selected_field>(
               jb::DefaultValue<jb::kNeverIncludeDefaults>(
                   [](auto* obj) { *obj = std::string{}; }))),
 
       // NEW: wrap the open_as_void projection in a Validate
-      jb::Member(
-          "open_as_void",
+      jb::Member("open_as_void",
           jb::Validate(
               [](const auto& options, ZarrDriverSpec* obj) -> absl::Status {
                 // At this point, Projection has already set obj->open_as_void

From 488b1605c1f15f322e4b39f03b02d6cd8b29900b Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Wed, 3 Dec 2025 15:56:34 +0000
Subject: [PATCH 11/20] revert gitignore changes

---
 .gitignore | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7c75044c5..e4737363c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,8 +21,3 @@ __pycache__
 *.pyc
 /python/tensorstore/*.so
 /python/tensorstore/*.pyd
-
-build/
-bootstrap.sh
-filt_mig.mdio
-generate_test.py
\ No newline at end of file

From 54941a09cf5e057e9c32d20512c0bb114b6f9b83 Mon Sep 17 00:00:00 2001
From: Brian Michell <brianm314@comcast.net>
Date: Wed, 3 Dec 2025 13:06:22 -0600
Subject: [PATCH 12/20] V3 structs remove shim (#2)

* Begin removing void field shim

* Fully removed void string shim

* Cleanup debug prints

* Remove shimmed validation

* Remove unnecessary comment

* Prefer false over zero for ternary clarity
---
 tensorstore/driver/zarr3/chunk_cache.cc   | 16 ++++++----
 tensorstore/driver/zarr3/chunk_cache.h    | 14 ++++++---
 tensorstore/driver/zarr3/driver.cc        | 38 +++++++----------------
 tensorstore/driver/zarr3/metadata.cc      | 14 +++++----
 tensorstore/driver/zarr3/metadata.h       |  6 ++--
 tensorstore/driver/zarr3/metadata_test.cc |  2 +-
 6 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index 64b6d69fd..f14efd607 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -75,10 +75,12 @@ ZarrChunkCache::~ZarrChunkCache() = default;
 
 ZarrLeafChunkCache::ZarrLeafChunkCache(
     kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
-    ZarrDType dtype, internal::CachePool::WeakPtr /*data_cache_pool*/)
+    ZarrDType dtype, internal::CachePool::WeakPtr /*data_cache_pool*/,
+    bool open_as_void)
     : Base(std::move(store)),
       codec_state_(std::move(codec_state)),
-      dtype_(std::move(dtype)) {}
+      dtype_(std::move(dtype)),
+      open_as_void_(open_as_void) {}
 
 void ZarrLeafChunkCache::Read(ZarrChunkCache::ReadRequest request,
                               AnyFlowReceiver<absl::Status, internal::ReadChunk,
@@ -157,7 +159,7 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
   absl::InlinedVector<SharedArray<const void>, 1> field_arrays(num_fields);
 
   // Special case: void access - return raw bytes directly
-  if (num_fields == 1 && dtype_.fields[0].name == "<void>") {
+  if (open_as_void_) {
     TENSORSTORE_ASSIGN_OR_RETURN(
         field_arrays[0], codec_state_->DecodeArray(grid().components[0].shape(),
                                                    std::move(data)));
@@ -221,11 +223,13 @@ kvstore::Driver* ZarrLeafChunkCache::GetKvStoreDriver() {
 
 ZarrShardedChunkCache::ZarrShardedChunkCache(
     kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
-    ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool)
+    ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool,
+    bool open_as_void)
     : base_kvstore_(std::move(store)),
       codec_state_(std::move(codec_state)),
       dtype_(std::move(dtype)),
-      data_cache_pool_(std::move(data_cache_pool)) {}
+      data_cache_pool_(std::move(data_cache_pool)),
+      open_as_void_(open_as_void) {}
 
 Result<IndexTransform<>> TranslateCellToSourceTransformForShard(
     IndexTransform<> transform, span<const Index> grid_cell_indices,
@@ -534,7 +538,7 @@ void ZarrShardedChunkCache::Entry::DoInitialize() {
                 *sharding_state.sub_chunk_codec_chain,
                 std::move(sharding_kvstore), cache.executor(),
                 ZarrShardingCodec::PreparedState::Ptr(&sharding_state),
-                cache.dtype_, cache.data_cache_pool_);
+                cache.dtype_, cache.data_cache_pool_, cache.open_as_void_);
         zarr_chunk_cache = new_cache.release();
         return std::unique_ptr<internal::Cache>(&zarr_chunk_cache->cache());
       })
diff --git a/tensorstore/driver/zarr3/chunk_cache.h b/tensorstore/driver/zarr3/chunk_cache.h
index 5933115d7..a39eb1dc8 100644
--- a/tensorstore/driver/zarr3/chunk_cache.h
+++ b/tensorstore/driver/zarr3/chunk_cache.h
@@ -158,7 +158,8 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
   explicit ZarrLeafChunkCache(kvstore::DriverPtr store,
                               ZarrCodecChain::PreparedState::Ptr codec_state,
                               ZarrDType dtype,
-                              internal::CachePool::WeakPtr data_cache_pool);
+                              internal::CachePool::WeakPtr data_cache_pool,
+                              bool open_as_void = false);
 
   void Read(ZarrChunkCache::ReadRequest request,
             AnyFlowReceiver<absl::Status, internal::ReadChunk,
@@ -186,6 +187,7 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
 
   ZarrCodecChain::PreparedState::Ptr codec_state_;
   ZarrDType dtype_;
+  bool open_as_void_;
 };
 
 /// Chunk cache for a Zarr array where each chunk is a shard.
@@ -196,7 +198,8 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
   explicit ZarrShardedChunkCache(kvstore::DriverPtr store,
                                  ZarrCodecChain::PreparedState::Ptr codec_state,
                                  ZarrDType dtype,
-                                 internal::CachePool::WeakPtr data_cache_pool);
+                                 internal::CachePool::WeakPtr data_cache_pool,
+                                 bool open_as_void = false);
 
   const ZarrShardingCodec::PreparedState& sharding_codec_state() const {
     return static_cast<const ZarrShardingCodec::PreparedState&>(
@@ -246,6 +249,7 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
   kvstore::DriverPtr base_kvstore_;
   ZarrCodecChain::PreparedState::Ptr codec_state_;
   ZarrDType dtype_;
+  bool open_as_void_;
 
   // Data cache pool, if it differs from `this->pool()` (which is equal to the
   // metadata cache pool).
@@ -260,11 +264,13 @@ class ZarrShardSubChunkCache : public ChunkCacheImpl {
   explicit ZarrShardSubChunkCache(
       kvstore::DriverPtr store, Executor executor,
       ZarrShardingCodec::PreparedState::Ptr sharding_state,
-      ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool)
+      ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool,
+      bool open_as_void = false)
       : ChunkCacheImpl(std::move(store),
                        ZarrCodecChain::PreparedState::Ptr(
                            sharding_state->sub_chunk_codec_state),
-                       std::move(dtype), std::move(data_cache_pool)),
+                       std::move(dtype), std::move(data_cache_pool),
+                       open_as_void),
         sharding_state_(std::move(sharding_state)),
         executor_(std::move(executor)) {}
 
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index dd95c711b..f4c0ad9d7 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -149,20 +149,9 @@ class ZarrDriverSpec
       jb::Member("field", jb::Projection<&ZarrDriverSpec::selected_field>(
               jb::DefaultValue<jb::kNeverIncludeDefaults>(
                   [](auto* obj) { *obj = std::string{}; }))),
-
-      // NEW: wrap the open_as_void projection in a Validate
-      jb::Member("open_as_void",
-          jb::Validate(
-              [](const auto& options, ZarrDriverSpec* obj) -> absl::Status {
-                // At this point, Projection has already set obj->open_as_void
-                if (obj->open_as_void) {
-                  obj->selected_field = "<void>";
-                }
-                return absl::OkStatus();
-              },
-              jb::Projection<&ZarrDriverSpec::open_as_void>(
+      jb::Member("open_as_void", jb::Projection<&ZarrDriverSpec::open_as_void>(
                   jb::DefaultValue<jb::kNeverIncludeDefaults>(
-                      [](auto* v) { *v = false; })))));
+                      [](auto* v) { *v = false; }))));
 
 
 
@@ -592,10 +581,7 @@ class ZarrDataCache : public ChunkCacheImpl, public DataCacheBase {
         grid_(DataCacheBase::GetChunkGridSpecification(
             metadata(),
             // Check if this is void access by examining the dtype
-            (ChunkCacheImpl::dtype_.fields.size() == 1 &&
-             ChunkCacheImpl::dtype_.fields[0].name == "<void>")
-                ? kVoidFieldIndex
-                : 0)) {}
+            ChunkCacheImpl::open_as_void_ ? kVoidFieldIndex : false)) {}
 
   const internal::LexicographicalGridIndexKeyParser& GetChunkStorageKeyParser()
       final {
@@ -626,9 +612,8 @@ class ZarrDataCache : public ChunkCacheImpl, public DataCacheBase {
       const void* metadata_ptr, size_t component_index) override {
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
 
-    // Check if this is void access by examining the cache's dtype
-    const bool is_void_access = (ChunkCacheImpl::dtype_.fields.size() == 1 &&
-                                 ChunkCacheImpl::dtype_.fields[0].name == "<void>");
+    // Check if this is void access by examining the stored flag
+    const bool is_void_access = ChunkCacheImpl::open_as_void_;
 
     if (is_void_access) {
       // For void access, create transform with extra bytes dimension
@@ -802,7 +787,7 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
     TENSORSTORE_ASSIGN_OR_RETURN(
         auto metadata,
         internal_zarr3::GetNewMetadata(spec().metadata_constraints,
-                                       spec().schema),
+                                       spec().schema, spec().selected_field, spec().open_as_void),
         tensorstore::MaybeAnnotateStatus(
             _, "Cannot create using specified \"metadata\" and schema"));
     return metadata;
@@ -819,15 +804,15 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
         *static_cast<const ZarrMetadata*>(initializer.metadata.get());
     // For void access, modify the dtype to indicate special handling
     ZarrDType dtype = metadata.data_type;
-    if (spec().selected_field == "<void>") {
+    if (spec().open_as_void) {
       // Create a synthetic dtype for void access
       dtype = ZarrDType{
           /*.has_fields=*/false,
           /*.fields=*/{ZarrDType::Field{
-              ZarrDType::BaseDType{"<void>", dtype_v<tensorstore::dtypes::byte_t>,
+              ZarrDType::BaseDType{"", dtype_v<tensorstore::dtypes::byte_t>,
                                     {metadata.data_type.bytes_per_outer_element}},
               /*.outer_shape=*/{},
-              /*.name=*/"<void>",
+              /*.name=*/"",
               /*.field_shape=*/{metadata.data_type.bytes_per_outer_element},
               /*.num_inner_elements=*/metadata.data_type.bytes_per_outer_element,
               /*.byte_offset=*/0,
@@ -837,7 +822,8 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
     return internal_zarr3::MakeZarrChunkCache<DataCacheBase, ZarrDataCache>(
         *metadata.codecs, std::move(initializer), spec().store.path,
         metadata.codec_state, dtype,
-        /*data_cache_pool=*/*cache_pool());
+        /*data_cache_pool=*/*cache_pool(),
+        spec().open_as_void);
   }
 
   Result<size_t> GetComponentIndex(const void* metadata_ptr,
@@ -847,7 +833,7 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
         ValidateMetadata(metadata, spec().metadata_constraints));
     TENSORSTORE_ASSIGN_OR_RETURN(
         auto field_index,
-        GetFieldIndex(metadata.data_type, spec().selected_field));
+        GetFieldIndex(metadata.data_type, spec().selected_field, spec().open_as_void));
     // For void access, map to component index 0
     if (field_index == kVoidFieldIndex) {
       field_index = 0;
diff --git a/tensorstore/driver/zarr3/metadata.cc b/tensorstore/driver/zarr3/metadata.cc
index 9aef7bd0b..ba4454de4 100644
--- a/tensorstore/driver/zarr3/metadata.cc
+++ b/tensorstore/driver/zarr3/metadata.cc
@@ -799,12 +799,14 @@ std::string GetFieldNames(const ZarrDType& dtype) {
 constexpr size_t kVoidFieldIndex = size_t(-1);
 
 Result<size_t> GetFieldIndex(const ZarrDType& dtype,
-                             std::string_view selected_field) {
-  // Special case: "<void>" requests raw byte access (works for any dtype)
-  if (selected_field == "<void>") {
+                             std::string_view selected_field,
+                             bool open_as_void) {
+  // Special case: open_as_void requests raw byte access (works for any dtype)
+
+  if (open_as_void) {
     if (dtype.fields.empty()) {
       return absl::FailedPreconditionError(
-          "Requested field \"<void>\" but dtype has no fields");
+          "Requested void access but dtype has no fields");
     }
     return kVoidFieldIndex;
   }
@@ -1138,7 +1140,7 @@ absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
 
 Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
     const ZarrMetadataConstraints& metadata_constraints, const Schema& schema,
-    std::string_view selected_field) {
+    std::string_view selected_field, bool open_as_void) {
   auto metadata = std::make_shared<ZarrMetadata>();
 
   metadata->zarr_format = metadata_constraints.zarr_format.value_or(3);
@@ -1165,7 +1167,7 @@ Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
   }
 
   TENSORSTORE_ASSIGN_OR_RETURN(
-      size_t field_index, GetFieldIndex(metadata->data_type, selected_field));
+      size_t field_index, GetFieldIndex(metadata->data_type, selected_field, open_as_void));
   SpecRankAndFieldInfo info;
   info.field = &metadata->data_type.fields[field_index];
   info.chunked_rank = metadata_constraints.rank;
diff --git a/tensorstore/driver/zarr3/metadata.h b/tensorstore/driver/zarr3/metadata.h
index 4c7871b0d..857210546 100644
--- a/tensorstore/driver/zarr3/metadata.h
+++ b/tensorstore/driver/zarr3/metadata.h
@@ -230,12 +230,14 @@ absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
 ///     unspecified.
 Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
     const ZarrMetadataConstraints& metadata_constraints,
-    const Schema& schema, std::string_view selected_field = {});
+    const Schema& schema, std::string_view selected_field = {},
+    bool open_as_void = false);
 
 absl::Status ValidateDataType(DataType dtype);
 
 Result<size_t> GetFieldIndex(const ZarrDType& dtype,
-                             std::string_view selected_field);
+                             std::string_view selected_field,
+                             bool open_as_void = false);
 
 struct SpecRankAndFieldInfo {
   DimensionIndex chunked_rank = dynamic_rank;
diff --git a/tensorstore/driver/zarr3/metadata_test.cc b/tensorstore/driver/zarr3/metadata_test.cc
index 11c97619f..ba7a26593 100644
--- a/tensorstore/driver/zarr3/metadata_test.cc
+++ b/tensorstore/driver/zarr3/metadata_test.cc
@@ -438,7 +438,7 @@ Result<std::shared_ptr<const ZarrMetadata>> TestGetNewMetadata(
   TENSORSTORE_RETURN_IF_ERROR(status);
   TENSORSTORE_ASSIGN_OR_RETURN(
       auto constraints, ZarrMetadataConstraints::FromJson(constraints_json));
-  return GetNewMetadata(constraints, schema);
+  return GetNewMetadata(constraints, schema, /*selected_field=*/{}, /*open_as_void=*/false);
 }
 
 TEST(GetNewMetadataTest, DuplicateDimensionNames) {

From c9f58f9eae12c236c1398619c0c43a298fc58dfc Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Wed, 3 Dec 2025 19:38:40 +0000
Subject: [PATCH 13/20] Fix structured fill value population

---
 tensorstore/driver/zarr3/driver.cc | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index f4c0ad9d7..51cc17f42 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -675,7 +675,13 @@ class ZarrDriver : public ZarrDriverBase {
     if (metadata.fill_value.empty()) {
       return SharedArray<const void>();
     }
-    return metadata.fill_value[0];
+    // return metadata.fill_value[0];
+    // TODO: Doe we actually need to validate this or can we trust that component_index will return a valid index?
+    size_t index = this->component_index();
+    if (index >= metadata.fill_value.size()) {
+        return absl::OutOfRangeError("Component index out of bounds");
+    }
+    return metadata.fill_value[index];
   }
 
   Future<ArrayStorageStatistics> GetStorageStatistics(

From 7655cfd4cf435e90a1b468929c344de1300a0aa1 Mon Sep 17 00:00:00 2001
From: Brian Michell <brianm314@comcast.net>
Date: Thu, 4 Dec 2025 10:03:47 -0600
Subject: [PATCH 14/20] V3 examples merge (#3)

* Implement a more general and portable example set

* Fix driver cache bug

* Update example for template

* Cleanup example

* Remove testing examples from source
---
 examples/CMakeLists.txt            | 163 ----------
 examples/read_structured_zarr3.cc  | 496 -----------------------------
 tensorstore/driver/zarr3/driver.cc |   8 +-
 3 files changed, 6 insertions(+), 661 deletions(-)
 delete mode 100644 examples/CMakeLists.txt
 delete mode 100644 examples/read_structured_zarr3.cc

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
deleted file mode 100644
index 92e9857fa..000000000
--- a/examples/CMakeLists.txt
+++ /dev/null
@@ -1,163 +0,0 @@
-# Standalone CMakeLists.txt for read_structured_zarr3 example
-#
-# Build instructions:
-#   mkdir -p /home/ubuntu/source/tensorstore/examples/build
-#   cd /home/ubuntu/source/tensorstore/examples/build
-#   cmake ..
-#   make
-#
-# Run:
-#   ./read_structured_zarr3 --zarr_path=/home/ubuntu/source/tensorstore/filt_mig.mdio/headers
-
-cmake_minimum_required(VERSION 3.24)
-project(read_structured_zarr3 LANGUAGES CXX)
-
-set(CMAKE_CXX_STANDARD 17)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
-
-# Path to the tensorstore build directory
-set(TENSORSTORE_BUILD_DIR "/home/ubuntu/source/tensorstore/build" CACHE PATH "Path to tensorstore build directory")
-set(TENSORSTORE_SOURCE_DIR "/home/ubuntu/source/tensorstore" CACHE PATH "Path to tensorstore source directory")
-set(DEPS_DIR "${TENSORSTORE_BUILD_DIR}/_deps")
-
-# Include paths (matching what tensorstore tests use)
-include_directories(
-    ${TENSORSTORE_SOURCE_DIR}
-    ${DEPS_DIR}/absl-src
-    ${DEPS_DIR}/re2-src
-    ${DEPS_DIR}/riegeli-src
-)
-
-include_directories(SYSTEM
-    ${DEPS_DIR}/half-build/include
-    ${DEPS_DIR}/half-src/include
-    ${DEPS_DIR}/nlohmann_json-build/include
-    ${DEPS_DIR}/nlohmann_json-src/include
-    ${TENSORSTORE_BUILD_DIR}
-)
-
-# Compiler flags
-add_compile_options(
-    -fPIE
-    -Wno-deprecated-declarations
-    -Wno-sign-compare
-    -Wno-unused-but-set-parameter
-    -Wno-maybe-uninitialized
-    -Wno-sequence-point
-    -Wno-unknown-warning-option
-    -Wno-stringop-overflow
-    -fsized-deallocation
-)
-
-# Find all the static libraries we need from the tensorstore build
-file(GLOB TENSORSTORE_LIBS "${TENSORSTORE_BUILD_DIR}/libtensorstore*.a")
-file(GLOB_RECURSE ABSEIL_LIBS "${DEPS_DIR}/absl-build/absl/*.a")
-file(GLOB_RECURSE RIEGELI_LIBS "${DEPS_DIR}/riegeli-build/*.a")
-
-# Additional dependency libraries - corrected paths
-file(GLOB_RECURSE BLOSC_LIBS "${DEPS_DIR}/blosc-build/*.a")
-file(GLOB_RECURSE ZSTD_LIBS "${DEPS_DIR}/zstd-build/*.a")
-file(GLOB_RECURSE RE2_LIBS "${DEPS_DIR}/re2-build/*.a")
-file(GLOB_RECURSE SNAPPY_LIBS "${DEPS_DIR}/snappy-build/*.a")
-file(GLOB_RECURSE BROTLI_LIBS "${DEPS_DIR}/brotli-build/*.a")
-file(GLOB_RECURSE LZ4_LIBS "${DEPS_DIR}/lz4-build/*.a")
-file(GLOB_RECURSE ZLIB_LIBS "${DEPS_DIR}/zlib-build/*.a")
-file(GLOB_RECURSE PROTOBUF_LIBS "${DEPS_DIR}/protobuf-build/*.a")
-file(GLOB_RECURSE GRPC_LIBS "${DEPS_DIR}/grpc-build/*.a")
-file(GLOB_RECURSE CARES_LIBS "${DEPS_DIR}/c-ares-build/*.a")
-file(GLOB_RECURSE SSL_LIBS "${DEPS_DIR}/boringssl-build/ssl/*.a")
-file(GLOB_RECURSE CRYPTO_LIBS "${DEPS_DIR}/boringssl-build/crypto/*.a")
-file(GLOB_RECURSE LIBLZMA_LIBS "${DEPS_DIR}/liblzma-build/*.a")
-file(GLOB_RECURSE BZIP2_LIBS "${DEPS_DIR}/bzip2-build/*.a")
-file(GLOB_RECURSE JPEG_LIBS "${DEPS_DIR}/jpeg-build/*.a")
-file(GLOB_RECURSE PNG_LIBS "${DEPS_DIR}/png-build/*.a")
-file(GLOB_RECURSE TIFF_LIBS "${DEPS_DIR}/tiff-build/*.a")
-file(GLOB_RECURSE AVIF_LIBS "${DEPS_DIR}/avif-build/*.a")
-file(GLOB_RECURSE AOM_LIBS "${DEPS_DIR}/aom-build/*.a")
-file(GLOB_RECURSE WEBP_LIBS "${DEPS_DIR}/webp-build/*.a")
-file(GLOB_RECURSE CURL_LIBS "${DEPS_DIR}/curl-build/*.a")
-
-# Create executable
-add_executable(read_structured_zarr3 read_structured_zarr3.cc)
-
-# Link libraries - use whole-archive for libraries that use static registration
-# These include drivers, codecs, kvstores, and context resource providers
-target_link_libraries(read_structured_zarr3 PRIVATE
-    # Force inclusion of libraries with static registrations
-    -Wl,--whole-archive
-    
-    # Context resource providers
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_data_copy_concurrency_resource.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_file_io_concurrency_resource.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_cache_cache_pool_resource.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_concurrency_resource.a
-    
-    # Zarr3 driver and codecs
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_driver.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_blosc.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_bytes.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_crc32c.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_gzip.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_transpose.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_zstd.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_sharding_indexed.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_codec_chain_spec.a
-    
-    # File kvstore and its resource providers
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_kvstore_file.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_kvstore_file_file_resource.a
-    
-    -Wl,--no-whole-archive
-    
-    -Wl,--start-group
-    
-    # Tensorstore libs
-    ${TENSORSTORE_LIBS}
-    
-    # Riegeli
-    ${RIEGELI_LIBS}
-    
-    # Abseil
-    ${ABSEIL_LIBS}
-    
-    # Compression libs
-    ${BLOSC_LIBS}
-    ${ZSTD_LIBS}
-    ${LZ4_LIBS}
-    ${SNAPPY_LIBS}
-    ${BROTLI_LIBS}
-    ${ZLIB_LIBS}
-    ${LIBLZMA_LIBS}
-    ${BZIP2_LIBS}
-    
-    # Regex
-    ${RE2_LIBS}
-    
-    # Protocol buffers and gRPC
-    ${PROTOBUF_LIBS}
-    ${GRPC_LIBS}
-    ${CARES_LIBS}
-    
-    # SSL/TLS
-    ${SSL_LIBS}
-    ${CRYPTO_LIBS}
-    
-    # Image libraries  
-    ${JPEG_LIBS}
-    ${PNG_LIBS}
-    ${TIFF_LIBS}
-    ${AVIF_LIBS}
-    ${AOM_LIBS}
-    ${WEBP_LIBS}
-    
-    # HTTP
-    ${CURL_LIBS}
-    
-    -Wl,--end-group
-    
-    # System libraries
-    pthread
-    dl
-    m
-    rt
-)
diff --git a/examples/read_structured_zarr3.cc b/examples/read_structured_zarr3.cc
deleted file mode 100644
index 720ef1330..000000000
--- a/examples/read_structured_zarr3.cc
+++ /dev/null
@@ -1,496 +0,0 @@
-// Copyright 2024 The TensorStore Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Standalone test for reading structured data from Zarr v3 arrays.
-//
-// This test opens two Zarr v3 arrays:
-// 1. A structured array with named fields (headers/)
-// 2. A raw bytes array containing struct data (raw_headers/)
-//
-// Both arrays should contain the same data, allowing comparison of:
-// - Field-based access vs manual byte extraction
-// - Structured dtype parsing vs raw byte handling
-// - New open_as_void option for raw byte access to structured data
-//
-// Usage:
-//   bazel run //examples:read_structured_zarr3 -- /path/to/parent/dir
-//
-// Or with cmake:
-//   cd examples/build && ./read_structured_zarr3 --zarr_path=/path/to/parent/dir
-//
-// Where the parent dir contains both 'headers/' and 'raw_headers/' subdirs.
-
-#include <stdint.h>
-
-#include <cstring>
-#include <fstream>
-#include <iostream>
-#include <string>
-
-#include "absl/flags/flag.h"
-#include "absl/flags/parse.h"
-#include "absl/status/status.h"
-#include <nlohmann/json.hpp>
-#include "tensorstore/array.h"
-#include "tensorstore/context.h"
-#include "tensorstore/data_type.h"
-#include "tensorstore/index.h"
-#include "tensorstore/open.h"
-#include "tensorstore/open_mode.h"
-#include "tensorstore/spec.h"
-#include "tensorstore/tensorstore.h"
-#include "tensorstore/util/result.h"
-#include "tensorstore/util/status.h"
-
-// Internal headers for testing dtype parsing
-#include "tensorstore/driver/zarr3/dtype.h"
-
-// Additional headers for string operations
-#include "absl/strings/str_join.h"
-
-ABSL_FLAG(std::string, zarr_path,
-          "/home/ubuntu/source/tensorstore/filt_mig.mdio",
-          "Path to the parent .mdio directory containing headers/ and raw_headers/");
-
-namespace {
-
-using ::tensorstore::Index;
-
-// Field layout from the zarr.json metadata:
-// The structured dtype has the following fields with their byte offsets:
-//   trace_seq_num_line: int32 @ 0
-//   trace_seq_num_reel: int32 @ 4
-//   ... (many more fields) ...
-//   inline: int32 @ 180
-//   crossline: int32 @ 184
-//   cdp_x: int32 @ 188
-//   cdp_y: int32 @ 192
-//
-// Total struct size: 196 bytes (matches blosc typesize)
-
-constexpr size_t kInlineFieldOffset = 180;
-constexpr size_t kStructSize = 196;
-
-// Read and parse the zarr.json metadata to display info about structured type
-void PrintZarrMetadata(const std::string& zarr_path) {
-  std::string metadata_path = zarr_path + "/zarr.json";
-  std::ifstream file(metadata_path);
-  if (!file.is_open()) {
-    std::cerr << "Could not open " << metadata_path << std::endl;
-    return;
-  }
-
-  nlohmann::json metadata;
-  try {
-    file >> metadata;
-  } catch (const nlohmann::json::parse_error& e) {
-    std::cerr << "Failed to parse zarr.json: " << e.what() << std::endl;
-    return;
-  }
-
-  std::cout << "\n=== Zarr Metadata ===" << std::endl;
-  std::cout << "Shape: " << metadata["shape"].dump() << std::endl;
-  std::cout << "Dimension names: " << metadata["dimension_names"].dump()
-            << std::endl;
-
-  if (metadata.contains("data_type")) {
-    auto& dt = metadata["data_type"];
-    std::cout << "\nData type format:" << std::endl;
-    if (dt.is_object()) {
-      std::cout << "  Type: object with name=\"" << dt["name"].get<std::string>()
-                << "\"" << std::endl;
-      if (dt.contains("configuration") &&
-          dt["configuration"].contains("fields")) {
-        auto& fields = dt["configuration"]["fields"];
-        std::cout << "  Number of fields: " << fields.size() << std::endl;
-        std::cout << "  Fields:" << std::endl;
-        size_t byte_offset = 0;
-        for (const auto& field : fields) {
-          std::string name = field[0].get<std::string>();
-          std::string type = field[1].get<std::string>();
-          size_t size = (type == "int32" || type == "uint32" || type == "float32")
-                            ? 4
-                            : 2;  // int16/uint16
-          std::cout << "    " << name << ": " << type << " @ byte " << byte_offset
-                    << std::endl;
-          byte_offset += size;
-        }
-        std::cout << "  Total struct size: " << byte_offset << " bytes"
-                  << std::endl;
-      }
-    } else if (dt.is_string()) {
-      std::cout << "  Type: simple \"" << dt.get<std::string>() << "\""
-                << std::endl;
-    } else if (dt.is_array()) {
-      std::cout << "  Type: array with " << dt.size() << " fields" << std::endl;
-    }
-  }
-
-  if (metadata.contains("codecs")) {
-    std::cout << "\nCodecs: " << metadata["codecs"].dump(2) << std::endl;
-  }
-}
-
-// Helper function to read and display inline field from an array
-absl::Status ReadInlineField(const tensorstore::TensorStore<>& store,
-                           const std::string& array_name,
-                           bool is_raw_bytes = false) {
-  // Get information about the array
-  auto domain = store.domain();
-  std::cout << "\n=== " << array_name << " Array Info ===" << std::endl;
-  std::cout << "Domain: " << domain << std::endl;
-  std::cout << "Dtype: " << store.dtype() << std::endl;
-  std::cout << "Rank: " << store.rank() << std::endl;
-
-  auto shape = domain.shape();
-  std::cout << "Shape: [";
-  for (int i = 0; i < shape.size(); ++i) {
-    if (i > 0) std::cout << ", ";
-    std::cout << shape[i];
-  }
-  std::cout << "]" << std::endl;
-
-  // Read all data
-  std::cout << "\n=== Reading " << array_name << " Data ===" << std::endl;
-  TENSORSTORE_ASSIGN_OR_RETURN(
-      auto array, tensorstore::Read<tensorstore::zero_origin>(store).result());
-
-  std::cout << "Read complete. Array size: " << array.num_elements()
-            << " elements" << std::endl;
-  std::cout << "Data type: " << array.dtype() << std::endl;
-
-  Index num_inline, num_crossline;
-  const int32_t* int_ptr;
-
-  if (is_raw_bytes) {
-    // For raw bytes, we need to extract the inline field manually
-    // Shape is [inline, crossline, struct_size]
-    num_inline = shape[0];
-    num_crossline = shape[1];
-    Index struct_size = shape[2];
-    if (struct_size != kStructSize) {
-      std::cout << "Warning: Raw struct size (" << struct_size
-                << ") differs from expected header struct size (" << kStructSize
-                << "). Assuming padding." << std::endl;
-    }
-
-    // Extract inline field (4 bytes starting at offset 180)
-    auto byte_ptr = reinterpret_cast<const std::byte*>(array.data());
-    std::vector<int32_t> inline_values(num_inline * num_crossline);
-
-    for (Index i = 0; i < num_inline; ++i) {
-      for (Index j = 0; j < num_crossline; ++j) {
-        Index struct_offset = (i * num_crossline + j) * struct_size;
-        Index field_offset = struct_offset + kInlineFieldOffset;
-        std::memcpy(&inline_values[i * num_crossline + j],
-                   byte_ptr + field_offset, 4);
-      }
-    }
-
-    std::cout << "Extracted inline field from raw bytes at offset "
-              << kInlineFieldOffset << std::endl;
-    int_ptr = inline_values.data();
-  } else {
-    // For structured array, field access already gave us int32 values
-    num_inline = shape[0];
-    num_crossline = shape[1];
-    int_ptr = reinterpret_cast<const int32_t*>(array.data());
-  }
-
-  std::cout << "\n=== Inline field values from " << array_name
-            << " (shape: " << num_inline << " x " << num_crossline << ") ===" << std::endl;
-
-  // Print first 10 rows (or fewer if less data)
-  Index rows_to_print = std::min(num_inline, Index{10});
-  Index cols_to_print = std::min(num_crossline, Index{10});
-
-  for (Index i = 0; i < rows_to_print; ++i) {
-    for (Index j = 0; j < cols_to_print; ++j) {
-      std::cout << int_ptr[i * num_crossline + j];
-      if (j < cols_to_print - 1) {
-        std::cout << "\t";
-      }
-    }
-    if (num_crossline > cols_to_print) {
-      std::cout << "\t...";
-    }
-    std::cout << std::endl;
-  }
-  if (num_inline > rows_to_print) {
-    std::cout << "... (" << (num_inline - rows_to_print) << " more rows)"
-              << std::endl;
-  }
-
-  std::cout << "\n=== " << array_name << " Summary ===" << std::endl;
-  std::cout << "Successfully read " << (num_inline * num_crossline)
-            << " inline values" << std::endl;
-
-  // Show some statistics
-  int32_t min_val = int_ptr[0], max_val = int_ptr[0];
-  int64_t sum = 0;
-  for (Index i = 0; i < num_inline * num_crossline; ++i) {
-    min_val = std::min(min_val, int_ptr[i]);
-    max_val = std::max(max_val, int_ptr[i]);
-    sum += int_ptr[i];
-  }
-  std::cout << "Min value: " << min_val << std::endl;
-  std::cout << "Max value: " << max_val << std::endl;
-  std::cout << "Mean value: " << (static_cast<double>(sum) / (num_inline * num_crossline)) << std::endl;
-
-  return absl::OkStatus();
-}
-
-absl::Status Run(const std::string& zarr_path) {
-  std::cout << "=== Zarr v3 Structured Data Type Test ===" << std::endl;
-  std::cout << "Opening zarr3 arrays in: " << zarr_path << std::endl;
-
-  auto context = tensorstore::Context::Default();
-
-  // First, display metadata information for structured array
-  std::string headers_path = zarr_path + "/headers";
-  PrintZarrMetadata(headers_path);
-
-  // Test raw_bytes parsing by reading and parsing the raw_headers zarr.json
-  std::cout << "\n" << std::string(60, '=') << std::endl;
-  std::cout << "TESTING RAW_BYTES PARSING" << std::endl;
-  std::cout << std::string(60, '=') << std::endl;
-
-  std::string raw_metadata_path = zarr_path + "/raw_headers/zarr.json";
-  std::ifstream raw_file(raw_metadata_path);
-  if (!raw_file.is_open()) {
-    std::cout << "Could not open " << raw_metadata_path << std::endl;
-    return absl::NotFoundError("Raw headers metadata not found");
-  }
-
-  nlohmann::json raw_metadata;
-  try {
-    raw_file >> raw_metadata;
-  } catch (const nlohmann::json::parse_error& e) {
-    std::cout << "Failed to parse raw zarr.json: " << e.what() << std::endl;
-    return absl::DataLossError("Invalid raw metadata JSON");
-  }
-
-  std::cout << "Raw headers data_type: " << raw_metadata["data_type"].dump(2) << std::endl;
-
-  // Test parsing the raw_bytes data type
-  std::cout << "Testing raw_bytes dtype parsing..." << std::endl;
-
-  // For now, just verify the JSON structure is what we expect
-  if (!raw_metadata.contains("data_type")) {
-    std::cout << "FAILED: No data_type in metadata" << std::endl;
-    return absl::NotFoundError("Missing data_type");
-  }
-
-  auto& dt = raw_metadata["data_type"];
-  if (!dt.is_object() || !dt.contains("name") || dt["name"] != "raw_bytes") {
-    std::cout << "FAILED: data_type is not raw_bytes extension" << std::endl;
-    return absl::InvalidArgumentError("Not raw_bytes extension");
-  }
-
-  if (!dt.contains("configuration") || !dt["configuration"].contains("length_bytes")) {
-    std::cout << "FAILED: Missing length_bytes in configuration" << std::endl;
-    return absl::InvalidArgumentError("Missing length_bytes");
-  }
-
-  int length_bytes = dt["configuration"]["length_bytes"];
-  std::cout << "SUCCESS: Found raw_bytes extension with length_bytes = " << length_bytes << std::endl;
-  std::cout << "This should parse to:" << std::endl;
-  std::cout << "  - Single field with byte_t dtype" << std::endl;
-  std::cout << "  - Field shape: [" << length_bytes << "]" << std::endl;
-  std::cout << "  - Bytes per outer element: " << length_bytes << std::endl;
-
-  // Now actually test the parsing implementation
-  std::cout << "\n=== Testing ParseDType Implementation ===" << std::endl;
-  auto dtype_result = tensorstore::internal_zarr3::ParseDType(dt);
-  if (!dtype_result.ok()) {
-    std::cout << "FAILED: Could not parse raw_bytes data type: " << dtype_result.status() << std::endl;
-    return dtype_result.status();
-  }
-
-  auto dtype = std::move(dtype_result).value();
-  std::cout << "SUCCESS: ParseDType worked!" << std::endl;
-  std::cout << "  Fields: " << dtype.fields.size() << std::endl;
-  std::cout << "  Has fields: " << dtype.has_fields << std::endl;
-  std::cout << "  Bytes per outer element: " << dtype.bytes_per_outer_element << std::endl;
-
-  if (!dtype.fields.empty()) {
-    const auto& field = dtype.fields[0];
-    std::cout << "  Field name: '" << field.name << "'" << std::endl;
-    std::cout << "  Field dtype: " << field.dtype << std::endl;
-    std::cout << "  Field shape: [" << absl::StrJoin(field.field_shape, ", ") << "]" << std::endl;
-    std::cout << "  Field num_inner_elements: " << field.num_inner_elements << std::endl;
-    std::cout << "  Field num_bytes: " << field.num_bytes << std::endl;
-  }
-
-  // Verify the parsing is correct
-  bool parsing_correct = true;
-  if (dtype.fields.size() != 1) {
-    std::cout << "ERROR: Expected 1 field, got " << dtype.fields.size() << std::endl;
-    parsing_correct = false;
-  }
-  if (dtype.fields[0].name != "") {
-    std::cout << "ERROR: Expected empty field name, got '" << dtype.fields[0].name << "'" << std::endl;
-    parsing_correct = false;
-  }
-  if (dtype.fields[0].dtype != tensorstore::dtype_v<tensorstore::dtypes::byte_t>) {
-    std::cout << "ERROR: Expected byte_t dtype, got " << dtype.fields[0].dtype << std::endl;
-    parsing_correct = false;
-  }
-  if (dtype.fields[0].field_shape != std::vector<Index>{length_bytes}) {
-    std::cout << "ERROR: Expected field shape [" << length_bytes << "], got ["
-              << absl::StrJoin(dtype.fields[0].field_shape, ", ") << "]" << std::endl;
-    parsing_correct = false;
-  }
-  if (dtype.bytes_per_outer_element != length_bytes) {
-    std::cout << "ERROR: Expected " << length_bytes << " bytes per element, got "
-              << dtype.bytes_per_outer_element << std::endl;
-    parsing_correct = false;
-  }
-
-  if (parsing_correct) {
-    std::cout << "\n✅ PARSING VERIFICATION: All checks passed!" << std::endl;
-    std::cout << "The raw_bytes extension is correctly parsed." << std::endl;
-  } else {
-    std::cout << "\n❌ PARSING VERIFICATION: Some checks failed!" << std::endl;
-    return absl::InternalError("Parsing verification failed");
-  }
-
-  // Test 1: Read from structured array using field access
-  std::cout << "\n" << std::string(60, '=') << std::endl;
-  std::cout << "TEST 1: Reading from structured 'headers' array" << std::endl;
-  std::cout << std::string(60, '=') << std::endl;
-
-  ::nlohmann::json headers_spec = ::nlohmann::json::object();
-  headers_spec["driver"] = "zarr3";
-  headers_spec["kvstore"] = ::nlohmann::json::object();
-  headers_spec["kvstore"]["driver"] = "file";
-  headers_spec["kvstore"]["path"] = headers_path + "/";
-  headers_spec["field"] = "inline";  // Extract inline field (int32 at byte offset 180)
-
-  std::cout << "Spec: " << headers_spec.dump(2) << std::endl;
-
-  auto headers_open_result =
-      tensorstore::Open(headers_spec, context, tensorstore::OpenMode::open,
-                        tensorstore::ReadWriteMode::read)
-          .result();
-
-  if (!headers_open_result.ok()) {
-    std::cout << "\n=== Headers Open Failed ===" << std::endl;
-    std::cout << "Status: " << headers_open_result.status() << std::endl;
-    return headers_open_result.status();
-  }
-
-  auto headers_store = std::move(headers_open_result).value();
-  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(headers_store, "headers"));
-
-  // Test 2: Read from raw bytes array (no special void access needed)
-  std::cout << "\n" << std::string(60, '=') << std::endl;
-  std::cout << "TEST 2: Reading from raw 'raw_headers' array" << std::endl;
-  std::cout << std::string(60, '=') << std::endl;
-
-  std::string raw_headers_path = zarr_path + "/raw_headers";
-  ::nlohmann::json raw_spec = ::nlohmann::json::object();
-  raw_spec["driver"] = "zarr3";
-  raw_spec["kvstore"] = ::nlohmann::json::object();
-  raw_spec["kvstore"]["driver"] = "file";
-  raw_spec["kvstore"]["path"] = raw_headers_path + "/";
-  // No field specified - raw_bytes has a single anonymous field
-
-  std::cout << "Spec: " << raw_spec.dump(2) << std::endl;
-
-  auto raw_open_result =
-      tensorstore::Open(raw_spec, context, tensorstore::OpenMode::open,
-                        tensorstore::ReadWriteMode::read)
-          .result();
-
-  if (!raw_open_result.ok()) {
-    std::cout << "\n=== Raw Headers Open Failed ===" << std::endl;
-    std::cout << "Status: " << raw_open_result.status() << std::endl;
-    return raw_open_result.status();
-  }
-
-  auto raw_store = std::move(raw_open_result).value();
-  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(raw_store, "raw_headers", /*is_raw_bytes=*/true));
-
-  // Test 3: Read from headers array as void (open_as_void=true)
-  // Use a fresh context to avoid cache sharing with Test 1
-  std::cout << "\n" << std::string(60, '=') << std::endl;
-  std::cout << "TEST 3: Reading from 'headers' array as void (open_as_void=true)" << std::endl;
-  std::cout << std::string(60, '=') << std::endl;
-
-  auto context_void = tensorstore::Context::Default();
-
-  ::nlohmann::json headers_void_spec = ::nlohmann::json::object();
-  headers_void_spec["driver"] = "zarr3";
-  headers_void_spec["kvstore"] = ::nlohmann::json::object();
-  headers_void_spec["kvstore"]["driver"] = "file";
-  headers_void_spec["kvstore"]["path"] = headers_path + "/";
-  headers_void_spec["open_as_void"] = true;  // New option for raw byte access
-
-  std::cout << "Spec: " << headers_void_spec.dump(2) << std::endl;
-
-  auto headers_void_open_result =
-      tensorstore::Open(headers_void_spec, context_void, tensorstore::OpenMode::open,
-                        tensorstore::ReadWriteMode::read)
-          .result();
-
-  if (!headers_void_open_result.ok()) {
-    std::cout << "\n=== Headers (void) Open Failed ===" << std::endl;
-    std::cout << "Status: " << headers_void_open_result.status() << std::endl;
-    return headers_void_open_result.status();
-  }
-
-  auto headers_void_store = std::move(headers_void_open_result).value();
-  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(headers_void_store, "headers (open_as_void)", /*is_raw_bytes=*/true));
-
-  std::cout << "\n" << std::string(60, '=') << std::endl;
-  std::cout << "COMPARISON: All three methods should give identical inline field values" << std::endl;
-  std::cout << std::string(60, '=') << std::endl;
-  std::cout << "- Test 1: 'headers' with field=\"inline\" provides field access convenience\n"
-            << "- Test 2: 'raw_headers' (raw_bytes type) provides direct byte access\n"
-            << "- Test 3: 'headers' with open_as_void=true provides raw byte access to structured data\n"
-            << "All three extract the inline field from byte offset " << kInlineFieldOffset
-            << " in " << kStructSize << "-byte structs." << std::endl;
-
-  return absl::OkStatus();
-}
-
-}  // namespace
-
-int main(int argc, char** argv) {
-  absl::ParseCommandLine(argc, argv);
-
-  std::string zarr_path = absl::GetFlag(FLAGS_zarr_path);
-  if (zarr_path.empty()) {
-    std::cerr << "Error: --zarr_path is required" << std::endl;
-    return 1;
-  }
-
-  // Verify the path structure
-  std::string headers_path = zarr_path + "/headers";
-  std::string raw_headers_path = zarr_path + "/raw_headers";
-
-  std::cout << "Expecting arrays at:" << std::endl;
-  std::cout << "  Structured: " << headers_path << std::endl;
-  std::cout << "  Raw bytes:  " << raw_headers_path << std::endl;
-  std::cout << std::endl;
-
-  auto status = Run(zarr_path);
-  if (!status.ok()) {
-    std::cerr << "\nFinal status: " << status << std::endl;
-    return 1;
-  }
-
-  return 0;
-}
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 51cc17f42..ec30edd82 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -779,12 +779,16 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
 
   std::string GetDataCacheKey(const void* metadata) override {
     std::string result;
+    const auto& zarr_metadata = *static_cast<const ZarrMetadata*>(metadata);
     internal::EncodeCacheKey(
-        &result, spec().store.path,
-        static_cast<const ZarrMetadata*>(metadata)->GetCompatibilityKey());
+        &result,
+        spec().store.path,
+        zarr_metadata.GetCompatibilityKey(),
+        spec().open_as_void ? "void" : "normal");
     return result;
   }
 
+
   Result<std::shared_ptr<const void>> Create(const void* existing_metadata,
                                              CreateOptions options) override {
     if (existing_metadata) {

From 8c4c4cafe2b33df06131d985c2574c973f817b3d Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Thu, 4 Dec 2025 16:07:26 +0000
Subject: [PATCH 15/20] Remove vestigial example build

---
 examples/BUILD | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/examples/BUILD b/examples/BUILD
index 4dcb2d604..94acdba14 100644
--- a/examples/BUILD
+++ b/examples/BUILD
@@ -122,26 +122,3 @@ tensorstore_cc_binary(
         "@riegeli//riegeli/bytes:writer",
     ],
 )
-
-tensorstore_cc_binary(
-    name = "read_structured_zarr3",
-    srcs = ["read_structured_zarr3.cc"],
-    deps = [
-        "//tensorstore",
-        "//tensorstore:array",
-        "//tensorstore:context",
-        "//tensorstore:data_type",
-        "//tensorstore:index",
-        "//tensorstore:open",
-        "//tensorstore:open_mode",
-        "//tensorstore:spec",
-        "//tensorstore/driver/zarr3",
-        "//tensorstore/kvstore/file",
-        "//tensorstore/util:result",
-        "//tensorstore/util:status",
-        "@abseil-cpp//absl/flags:flag",
-        "@abseil-cpp//absl/flags:parse",
-        "@abseil-cpp//absl/status",
-        "@nlohmann_json//:json",
-    ],
-)

From 4b590f855adc963fe20940bd704693d81190483a Mon Sep 17 00:00:00 2001
From: Brian Michell <brianm314@comcast.net>
Date: Thu, 4 Dec 2025 11:11:14 -0600
Subject: [PATCH 16/20] V3 structs fix fills (#4)

* Use the appropriate fill value for open_as_void structured data

* Cleanup
---
 tensorstore/driver/zarr3/driver.cc | 70 ++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 4 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index ec30edd82..f86e4ad88 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -171,12 +171,74 @@ class ZarrDriverSpec
       IndexTransformView<> transform) const override {
     SharedArray<const void> fill_value{schema.fill_value()};
 
-    const auto& metadata = metadata_constraints;
-    if (metadata.fill_value && !metadata.fill_value->empty()) {
-      fill_value = (*metadata.fill_value)[0];
+    const auto& constraints = metadata_constraints;
+
+    // If constraints don't specify a fill value, just use the schema's.
+    if (!constraints.fill_value || constraints.fill_value->empty()) {
+      return fill_value;
+    }
+
+    const auto& vec = *constraints.fill_value;
+
+    // If we don't have dtype information, we can't do field-aware logic.
+    if (!constraints.data_type) {
+      if (!vec.empty()) return vec[0];
+      return fill_value;
+    }
+
+    const ZarrDType& dtype = *constraints.data_type;
+
+    // Determine which field this spec refers to (or void access).
+    TENSORSTORE_ASSIGN_OR_RETURN(
+        size_t field_index,
+        GetFieldIndex(dtype, selected_field, open_as_void));
+
+    // ── Normal field access: just return that field's fill_value ───────────────
+    if (field_index != kVoidFieldIndex) {
+      if (field_index < vec.size()) {
+        return vec[field_index];
+      }
+      // Fallback to "no fill".
+      return SharedArray<const void>();
+    }
+
+    // ── Void access: synthesize a byte-level fill value ────────────────────────
+    //
+    // We want a 1D byte array of length bytes_per_outer_element whose contents
+    // are exactly the Zarr-defined struct layout built from per-field fills.
+
+    // Special case: "raw bytes" field (single byte_t field with flexible shape).
+    // In that case the existing fill array already has the correct bytes.
+    if (dtype.fields.size() == 1 &&
+        dtype.fields[0].dtype.id() == DataTypeId::byte_t &&
+        !dtype.fields[0].flexible_shape.empty()) {
+      // vec[0] should be a byte array of size bytes_per_outer_element.
+      return vec[0];
+    }
+
+    const Index nbytes = dtype.bytes_per_outer_element;
+
+    auto byte_arr = AllocateArray(
+        span<const Index, 1>({nbytes}), c_order, default_init,
+        dtype_v<tensorstore::dtypes::byte_t>);
+    auto* dst = static_cast<std::byte*>(byte_arr.data());
+    std::memset(dst, 0, static_cast<size_t>(nbytes));
+
+    // Pack each field's scalar fill into its byte_offset region.
+    for (size_t i = 0; i < dtype.fields.size() && i < vec.size(); ++i) {
+      const auto& field = dtype.fields[i];
+      const auto& field_fill = vec[i];
+      if (!field_fill.valid()) continue;
+
+      // We assume a single outer element per field here (which is exactly how
+      // FillValueJsonBinder constructs per-field fill values).
+      std::memcpy(
+          dst + field.byte_offset,
+          static_cast<const std::byte*>(field_fill.data()),
+          static_cast<size_t>(field.num_bytes));
     }
 
-    return fill_value;
+    return byte_arr;
   }
 
   Result<DimensionUnitsVector> GetDimensionUnits() const override {

From c0082a0f09c4537bed65aaaf17939f8825204985 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Thu, 4 Dec 2025 17:22:51 +0000
Subject: [PATCH 17/20] Add new options to schema

---
 tensorstore/driver/zarr3/schema.yml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tensorstore/driver/zarr3/schema.yml b/tensorstore/driver/zarr3/schema.yml
index 4f9733415..9491027b1 100644
--- a/tensorstore/driver/zarr3/schema.yml
+++ b/tensorstore/driver/zarr3/schema.yml
@@ -17,6 +17,31 @@ allOf:
         automatically.  When creating a new array, the new metadata is obtained
         by combining these metadata constraints with any `Schema` constraints.
       $ref: driver/zarr3/Metadata
+    field:
+      type: string
+      title: Field selection for structured arrays.
+      description: |
+        Name of the field to select from a structured array. When specified,
+        the tensorstore will provide access to only the specified field of
+        each element in the structured array.
+    open_as_void:
+      type: boolean
+      default: false
+      title: Raw byte access mode.
+      description: |
+        When true, opens the array as raw bytes instead of interpreting it
+        as structured data. The resulting array will have an additional
+        dimension representing the byte layout of each element.
+  oneOf:
+    - not:
+        anyOf:
+          - required: ["field"]
+          - required: ["open_as_void"]
+    - allOf:
+        - not:
+            required: ["field"]
+        - not:
+            required: ["open_as_void"]
 examples:
 - driver: zarr3
   kvstore:

From 9a46c82968fb1e70e1cb14e3b827dcf627b80463 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Thu, 4 Dec 2025 17:31:17 +0000
Subject: [PATCH 18/20] Fix copyright header date

---
 tensorstore/driver/zarr3/dtype.cc      | 2 +-
 tensorstore/driver/zarr3/dtype.h       | 2 +-
 tensorstore/driver/zarr3/dtype_test.cc | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
index 5b3261812..b8aacaa68 100644
--- a/tensorstore/driver/zarr3/dtype.cc
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -1,4 +1,4 @@
-// Copyright 2020 The TensorStore Authors
+// Copyright 2025 The TensorStore Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/tensorstore/driver/zarr3/dtype.h b/tensorstore/driver/zarr3/dtype.h
index 430dd8849..73a6b0961 100644
--- a/tensorstore/driver/zarr3/dtype.h
+++ b/tensorstore/driver/zarr3/dtype.h
@@ -1,4 +1,4 @@
-// Copyright 2020 The TensorStore Authors
+// Copyright 2025 The TensorStore Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/tensorstore/driver/zarr3/dtype_test.cc b/tensorstore/driver/zarr3/dtype_test.cc
index ef55aba09..709178bc3 100644
--- a/tensorstore/driver/zarr3/dtype_test.cc
+++ b/tensorstore/driver/zarr3/dtype_test.cc
@@ -1,4 +1,4 @@
-// Copyright 2023 The TensorStore Authors
+// Copyright 2025 The TensorStore Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.

From b9b5e41db3266155aa47323249f18687a1e2e45b Mon Sep 17 00:00:00 2001
From: Brian Michell <brianm314@comcast.net>
Date: Thu, 4 Dec 2025 12:52:30 -0600
Subject: [PATCH 19/20] Cleanup (#5)

---
 tensorstore/driver/zarr3/driver.cc     | 2 --
 tensorstore/driver/zarr3/dtype_test.cc | 1 -
 2 files changed, 3 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index f86e4ad88..f65533197 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -737,8 +737,6 @@ class ZarrDriver : public ZarrDriverBase {
     if (metadata.fill_value.empty()) {
       return SharedArray<const void>();
     }
-    // return metadata.fill_value[0];
-    // TODO: Doe we actually need to validate this or can we trust that component_index will return a valid index?
     size_t index = this->component_index();
     if (index >= metadata.fill_value.size()) {
         return absl::OutOfRangeError("Component index out of bounds");
diff --git a/tensorstore/driver/zarr3/dtype_test.cc b/tensorstore/driver/zarr3/dtype_test.cc
index 709178bc3..a41830069 100644
--- a/tensorstore/driver/zarr3/dtype_test.cc
+++ b/tensorstore/driver/zarr3/dtype_test.cc
@@ -17,7 +17,6 @@
 #include <stddef.h>
 #include <stdint.h>
 
-#include <cstddef>  // for std::byte
 #include <string>
 #include <vector>
 

From 4e12b633eb5624f209f7bd3d4e441fb468ef677c Mon Sep 17 00:00:00 2001
From: Brian Michell <brianm314@comcast.net>
Date: Thu, 4 Dec 2025 14:46:38 -0600
Subject: [PATCH 20/20] Add open_as_void option to zarr v2 driver (#6)

---
 tensorstore/driver/zarr/driver.cc      | 211 ++++++++++++++--
 tensorstore/driver/zarr/driver_impl.h  |   8 +-
 tensorstore/driver/zarr/driver_test.cc | 322 +++++++++++++++++++++++++
 tensorstore/driver/zarr/schema.yml     |   8 +
 tensorstore/driver/zarr/spec.cc        |  22 +-
 tensorstore/driver/zarr/spec.h         |  13 +-
 6 files changed, 561 insertions(+), 23 deletions(-)

diff --git a/tensorstore/driver/zarr/driver.cc b/tensorstore/driver/zarr/driver.cc
index 69164648e..8a0943ae5 100644
--- a/tensorstore/driver/zarr/driver.cc
+++ b/tensorstore/driver/zarr/driver.cc
@@ -29,6 +29,10 @@
 #include "absl/status/status.h"
 #include "absl/strings/cord.h"
 #include <nlohmann/json_fwd.hpp>
+#include "riegeli/bytes/cord_reader.h"
+#include "riegeli/bytes/cord_writer.h"
+#include "riegeli/bytes/read_all.h"
+#include "riegeli/bytes/write.h"
 #include "tensorstore/array.h"
 #include "tensorstore/array_storage_statistics.h"
 #include "tensorstore/box.h"
@@ -137,6 +141,20 @@ absl::Status ZarrDriverSpec::ApplyOptions(SpecOptions&& options) {
 }
 
 Result<SpecRankAndFieldInfo> ZarrDriverSpec::GetSpecInfo() const {
+  // For open_as_void, we don't use normal field resolution
+  // Note: When opening an existing array, dtype may not be known yet,
+  // so we can't determine the exact rank until metadata is loaded.
+  if (open_as_void && partial_metadata.dtype) {
+    SpecRankAndFieldInfo info;
+    info.full_rank = schema.rank();
+    info.chunked_rank = partial_metadata.rank;
+    // For void access, add one dimension for the bytes
+    info.field_rank = 1;  // The bytes dimension
+    if (info.chunked_rank != dynamic_rank) {
+      info.full_rank = info.chunked_rank + 1;
+    }
+    return info;
+  }
   return GetSpecRankAndFieldInfo(partial_metadata, selected_field, schema);
 }
 
@@ -171,6 +189,10 @@ TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(
         jb::Member("field", jb::Projection<&ZarrDriverSpec::selected_field>(
                                 jb::DefaultValue<jb::kNeverIncludeDefaults>(
                                     [](auto* obj) { *obj = std::string{}; }))),
+        jb::Member("open_as_void",
+                   jb::Projection<&ZarrDriverSpec::open_as_void>(
+                       jb::DefaultValue<jb::kNeverIncludeDefaults>(
+                           [](auto* v) { *v = false; }))),
         jb::Initialize([](auto* obj) {
           TENSORSTORE_ASSIGN_OR_RETURN(auto info, obj->GetSpecInfo());
           if (info.full_rank != dynamic_rank) {
@@ -210,8 +232,19 @@ Result<SharedArray<const void>> ZarrDriverSpec::GetFillValue(
   const auto& metadata = partial_metadata;
   if (metadata.dtype && metadata.fill_value) {
     TENSORSTORE_ASSIGN_OR_RETURN(
-        size_t field_index, GetFieldIndex(*metadata.dtype, selected_field));
-    fill_value = (*metadata.fill_value)[field_index];
+        size_t field_index,
+        GetFieldIndex(*metadata.dtype, selected_field, open_as_void));
+
+    // For void access, synthesize a byte-level fill value
+    if (field_index == kVoidFieldIndex) {
+      const Index nbytes = metadata.dtype->bytes_per_outer_element;
+      auto byte_arr = AllocateArray(
+          span<const Index, 1>({nbytes}), c_order, value_init,
+          dtype_v<tensorstore::dtypes::byte_t>);
+      fill_value = byte_arr;
+    } else {
+      fill_value = (*metadata.fill_value)[field_index];
+    }
   }
 
   if (!fill_value.valid() || !transform.valid()) {
@@ -238,13 +271,15 @@ Result<SharedArray<const void>> ZarrDriverSpec::GetFillValue(
 
 DataCache::DataCache(Initializer&& initializer, std::string key_prefix,
                      DimensionSeparator dimension_separator,
-                     std::string metadata_key)
+                     std::string metadata_key, bool open_as_void)
     : Base(std::move(initializer),
            GetChunkGridSpecification(
-               *static_cast<const ZarrMetadata*>(initializer.metadata.get()))),
+               *static_cast<const ZarrMetadata*>(initializer.metadata.get()),
+               open_as_void)),
       key_prefix_(std::move(key_prefix)),
       dimension_separator_(dimension_separator),
-      metadata_key_(std::move(metadata_key)) {}
+      metadata_key_(std::move(metadata_key)),
+      open_as_void_(open_as_void) {}
 
 absl::Status DataCache::ValidateMetadataCompatibility(
     const void* existing_metadata_ptr, const void* new_metadata_ptr) {
@@ -268,12 +303,40 @@ void DataCache::GetChunkGridBounds(const void* metadata_ptr,
                                    DimensionSet& implicit_lower_bounds,
                                    DimensionSet& implicit_upper_bounds) {
   const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
-  assert(bounds.rank() == static_cast<DimensionIndex>(metadata.shape.size()));
-  std::fill(bounds.origin().begin(), bounds.origin().end(), Index(0));
+  // Use >= assertion like zarr3 to allow for extra dimensions
+  assert(bounds.rank() >= static_cast<DimensionIndex>(metadata.shape.size()));
+  std::fill(bounds.origin().begin(),
+            bounds.origin().begin() + metadata.shape.size(), Index(0));
   std::copy(metadata.shape.begin(), metadata.shape.end(),
             bounds.shape().begin());
   implicit_lower_bounds = false;
-  implicit_upper_bounds = true;
+  implicit_upper_bounds = false;
+  for (DimensionIndex i = 0;
+       i < static_cast<DimensionIndex>(metadata.shape.size()); ++i) {
+    implicit_upper_bounds[i] = true;
+  }
+  // Handle extra dimensions for void access or field shapes
+  if (bounds.rank() > static_cast<DimensionIndex>(metadata.shape.size())) {
+    if (open_as_void_) {
+      // For void access, the extra dimension is the bytes_per_outer_element
+      if (static_cast<DimensionIndex>(metadata.shape.size() + 1) ==
+          bounds.rank()) {
+        bounds.shape()[metadata.rank] = metadata.dtype.bytes_per_outer_element;
+        bounds.origin()[metadata.rank] = 0;
+      }
+    } else if (metadata.dtype.fields.size() == 1) {
+      // Handle single field with field_shape (like zarr3)
+      const auto& field = metadata.dtype.fields[0];
+      if (static_cast<DimensionIndex>(metadata.shape.size() +
+                                      field.field_shape.size()) ==
+          bounds.rank()) {
+        for (size_t i = 0; i < field.field_shape.size(); ++i) {
+          bounds.shape()[metadata.shape.size() + i] = field.field_shape[i];
+          bounds.origin()[metadata.shape.size() + i] = 0;
+        }
+      }
+    }
+  }
 }
 
 Result<std::shared_ptr<const void>> DataCache::GetResizedMetadata(
@@ -294,13 +357,61 @@ Result<std::shared_ptr<const void>> DataCache::GetResizedMetadata(
 }
 
 internal::ChunkGridSpecification DataCache::GetChunkGridSpecification(
-    const ZarrMetadata& metadata) {
+    const ZarrMetadata& metadata, bool open_as_void) {
   internal::ChunkGridSpecification::ComponentList components;
-  components.reserve(metadata.dtype.fields.size());
   std::vector<DimensionIndex> chunked_to_cell_dimensions(
       metadata.chunks.size());
   std::iota(chunked_to_cell_dimensions.begin(),
             chunked_to_cell_dimensions.end(), static_cast<DimensionIndex>(0));
+
+  // Special case: void access - create single component for raw bytes
+  if (open_as_void) {
+    const Index bytes_per_element = metadata.dtype.bytes_per_outer_element;
+
+    // Create a zero-filled byte array as the fill value
+    auto base_fill_value = AllocateArray(
+        span<const Index, 1>({bytes_per_element}), c_order, value_init,
+        dtype_v<tensorstore::dtypes::byte_t>);
+
+    // The full chunk shape includes the extra bytes dimension
+    std::vector<Index> chunk_shape_with_bytes = metadata.chunks;
+    chunk_shape_with_bytes.push_back(bytes_per_element);
+
+    const DimensionIndex cell_rank = metadata.rank + 1;
+
+    // Broadcast fill value to target shape [unbounded, ..., bytes_per_element]
+    // like zarr3 does
+    std::vector<Index> target_shape(metadata.rank, kInfIndex);
+    target_shape.push_back(bytes_per_element);
+    auto chunk_fill_value =
+        BroadcastArray(base_fill_value, BoxView<>(target_shape)).value();
+
+    // Create valid data bounds - unbounded for chunked dimensions,
+    // explicit for bytes dimension
+    Box<> valid_data_bounds(cell_rank);
+    for (DimensionIndex i = 0; i < metadata.rank; ++i) {
+      valid_data_bounds[i] = IndexInterval::Infinite();
+    }
+    valid_data_bounds[metadata.rank] =
+        IndexInterval::UncheckedSized(0, bytes_per_element);
+
+    // Create permutation: copy existing order and add the bytes dimension
+    DimensionIndex layout_order_buffer[kMaxRank];
+    GetChunkInnerOrder(metadata.rank, metadata.order,
+                       span(layout_order_buffer, metadata.rank));
+    layout_order_buffer[metadata.rank] = metadata.rank;  // Add bytes dimension
+
+    components.emplace_back(
+        internal::AsyncWriteArray::Spec{
+            std::move(chunk_fill_value), std::move(valid_data_bounds),
+            ContiguousLayoutPermutation<>(span(layout_order_buffer, cell_rank))},
+        std::move(chunk_shape_with_bytes), chunked_to_cell_dimensions);
+
+    return internal::ChunkGridSpecification{std::move(components)};
+  }
+
+  // Normal field-based access
+  components.reserve(metadata.dtype.fields.size());
   for (size_t field_i = 0; field_i < metadata.dtype.fields.size(); ++field_i) {
     const auto& field = metadata.dtype.fields[field_i];
     const auto& field_layout = metadata.chunk_layout.fields[field_i];
@@ -335,12 +446,70 @@ internal::ChunkGridSpecification DataCache::GetChunkGridSpecification(
 
 Result<absl::InlinedVector<SharedArray<const void>, 1>> DataCache::DecodeChunk(
     span<const Index> chunk_indices, absl::Cord data) {
+  if (open_as_void_) {
+    // For void access, return raw bytes as a single component
+    const auto& md = metadata();
+
+    // Decompress the data first (if compressed)
+    absl::Cord decompressed = std::move(data);
+    if (md.compressor) {
+      riegeli::CordReader<absl::Cord> base_reader(std::move(decompressed));
+      auto compressed_reader = md.compressor->GetReader(
+          base_reader, md.dtype.bytes_per_outer_element);
+      absl::Cord uncompressed;
+      TENSORSTORE_RETURN_IF_ERROR(
+          riegeli::ReadAll(std::move(compressed_reader), uncompressed));
+      if (!base_reader.VerifyEndAndClose()) return base_reader.status();
+      decompressed = std::move(uncompressed);
+    }
+
+    // Build the shape: chunk_shape + bytes_per_element
+    std::vector<Index> shape = md.chunks;
+    shape.push_back(md.dtype.bytes_per_outer_element);
+
+    // Create a byte array from the decompressed data
+    auto flat_data = decompressed.Flatten();
+    auto byte_array = AllocateArray(shape, c_order, default_init,
+                                    dtype_v<tensorstore::dtypes::byte_t>);
+    std::memcpy(byte_array.data(), flat_data.data(),
+                std::min(static_cast<size_t>(byte_array.num_elements()),
+                         flat_data.size()));
+
+    absl::InlinedVector<SharedArray<const void>, 1> result;
+    result.push_back(std::move(byte_array));
+    return result;
+  }
   return internal_zarr::DecodeChunk(metadata(), std::move(data));
 }
 
 Result<absl::Cord> DataCache::EncodeChunk(
     span<const Index> chunk_indices,
     span<const SharedArray<const void>> component_arrays) {
+  if (open_as_void_) {
+    // For void access, encode raw bytes directly
+    const auto& md = metadata();
+    if (component_arrays.size() != 1) {
+      return absl::InvalidArgumentError(
+          "Expected exactly one component array for void access");
+    }
+    const auto& byte_array = component_arrays[0];
+    absl::Cord uncompressed(
+        std::string_view(static_cast<const char*>(byte_array.data()),
+                         byte_array.num_elements()));
+
+    // Compress if needed
+    if (md.compressor) {
+      absl::Cord encoded;
+      riegeli::CordWriter<absl::Cord*> base_writer(&encoded);
+      auto writer = md.compressor->GetWriter(
+          base_writer, md.dtype.bytes_per_outer_element);
+      TENSORSTORE_RETURN_IF_ERROR(
+          riegeli::Write(std::move(uncompressed), std::move(writer)));
+      if (!base_writer.Close()) return base_writer.status();
+      return encoded;
+    }
+    return uncompressed;
+  }
   return internal_zarr::EncodeChunk(metadata(), component_arrays);
 }
 
@@ -356,6 +525,7 @@ absl::Status DataCache::GetBoundSpecData(
   const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
   spec.selected_field = EncodeSelectedField(component_index, metadata.dtype);
   spec.metadata_key = metadata_key_;
+  spec.open_as_void = open_as_void_;
   auto& pm = spec.partial_metadata;
   pm.rank = metadata.rank;
   pm.zarr_format = metadata.zarr_format;
@@ -416,6 +586,10 @@ Result<std::string> ZarrDriverSpec::ToUrl() const {
     return absl::InvalidArgumentError(
         "zarr2 URL syntax not supported with selected_field specified");
   }
+  if (open_as_void) {
+    return absl::InvalidArgumentError(
+        "zarr2 URL syntax not supported with open_as_void specified");
+  }
   TENSORSTORE_ASSIGN_OR_RETURN(auto base_url, store.ToUrl());
   return tensorstore::StrCat(base_url, "|", kUrlScheme, ":");
 }
@@ -483,7 +657,8 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
     TENSORSTORE_ASSIGN_OR_RETURN(
         auto metadata,
         internal_zarr::GetNewMetadata(spec().partial_metadata,
-                                      spec().selected_field, spec().schema),
+                                      spec().selected_field, spec().schema,
+                                      spec().open_as_void),
         tensorstore::MaybeAnnotateStatus(
             _, "Cannot create using specified \"metadata\" and schema"));
     return metadata;
@@ -496,7 +671,8 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
     internal::EncodeCacheKey(
         &result, spec.store.path,
         GetDimensionSeparator(spec.partial_metadata, zarr_metadata),
-        zarr_metadata, spec.metadata_key);
+        zarr_metadata, spec.metadata_key,
+        spec.open_as_void ? "void" : "normal");
     return result;
   }
 
@@ -507,7 +683,7 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
     return std::make_unique<DataCache>(
         std::move(initializer), spec().store.path,
         GetDimensionSeparator(spec().partial_metadata, metadata),
-        spec().metadata_key);
+        spec().metadata_key, spec().open_as_void);
   }
 
   Result<size_t> GetComponentIndex(const void* metadata_ptr,
@@ -516,7 +692,14 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
     TENSORSTORE_RETURN_IF_ERROR(
         ValidateMetadata(metadata, spec().partial_metadata));
     TENSORSTORE_ASSIGN_OR_RETURN(
-        auto field_index, GetFieldIndex(metadata.dtype, spec().selected_field));
+        auto field_index,
+        GetFieldIndex(metadata.dtype, spec().selected_field,
+                      spec().open_as_void));
+    // For void access, map to component index 0 since we create a special
+    // component for raw byte access
+    if (field_index == kVoidFieldIndex) {
+      field_index = 0;
+    }
     TENSORSTORE_RETURN_IF_ERROR(
         ValidateMetadataSchema(metadata, field_index, spec().schema));
     return field_index;
diff --git a/tensorstore/driver/zarr/driver_impl.h b/tensorstore/driver/zarr/driver_impl.h
index df3c3930f..c2933dd90 100644
--- a/tensorstore/driver/zarr/driver_impl.h
+++ b/tensorstore/driver/zarr/driver_impl.h
@@ -63,10 +63,11 @@ class ZarrDriverSpec
   ZarrPartialMetadata partial_metadata;
   SelectedField selected_field;
   std::string metadata_key;
+  bool open_as_void = false;
 
   constexpr static auto ApplyMembers = [](auto& x, auto f) {
     return f(internal::BaseCast<KvsDriverSpec>(x), x.partial_metadata,
-             x.selected_field, x.metadata_key);
+             x.selected_field, x.metadata_key, x.open_as_void);
   };
   absl::Status ApplyOptions(SpecOptions&& options) override;
 
@@ -98,7 +99,7 @@ class DataCache : public internal_kvs_backed_chunk_driver::DataCache {
  public:
   explicit DataCache(Initializer&& initializer, std::string key_prefix,
                      DimensionSeparator dimension_separator,
-                     std::string metadata_key);
+                     std::string metadata_key, bool open_as_void = false);
 
   const ZarrMetadata& metadata() {
     return *static_cast<const ZarrMetadata*>(initial_metadata().get());
@@ -117,7 +118,7 @@ class DataCache : public internal_kvs_backed_chunk_driver::DataCache {
 
   /// Returns the ChunkCache grid to use for the given metadata.
   static internal::ChunkGridSpecification GetChunkGridSpecification(
-      const ZarrMetadata& metadata);
+      const ZarrMetadata& metadata, bool open_as_void = false);
 
   Result<absl::InlinedVector<SharedArray<const void>, 1>> DecodeChunk(
       span<const Index> chunk_indices, absl::Cord data) override;
@@ -140,6 +141,7 @@ class DataCache : public internal_kvs_backed_chunk_driver::DataCache {
   std::string key_prefix_;
   DimensionSeparator dimension_separator_;
   std::string metadata_key_;
+  bool open_as_void_;
 };
 
 class ZarrDriver;
diff --git a/tensorstore/driver/zarr/driver_test.cc b/tensorstore/driver/zarr/driver_test.cc
index 92c5be48a..a5014987d 100644
--- a/tensorstore/driver/zarr/driver_test.cc
+++ b/tensorstore/driver/zarr/driver_test.cc
@@ -3499,4 +3499,326 @@ TEST(DriverTest, UrlSchemeRoundtrip) {
        {"kvstore", {{"driver", "memory"}, {"path", "abc.zarr/def/"}}}});
 }
 
+// Tests for open_as_void functionality
+
+TEST(ZarrDriverTest, OpenAsVoidSimpleType) {
+  // Test open_as_void with a simple data type (int16)
+  auto context = Context::Default();
+
+  // First create a normal array
+  ::nlohmann::json create_spec{
+      {"driver", "zarr"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"metadata",
+       {
+           {"compressor", nullptr},
+           {"dtype", "<i2"},
+           {"shape", {4, 4}},
+           {"chunks", {2, 2}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Write some data
+  auto data = tensorstore::MakeArray<int16_t>({{1, 2}, {3, 4}});
+  TENSORSTORE_EXPECT_OK(
+      tensorstore::Write(data, store | tensorstore::Dims(0, 1).SizedInterval(
+                                           {0, 0}, {2, 2}))
+          .result());
+
+  // Now open with open_as_void=true
+  ::nlohmann::json void_spec{
+      {"driver", "zarr"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result());
+
+  // The void store should have rank = original_rank + 1 (for bytes dimension)
+  EXPECT_EQ(3, void_store.rank());
+
+  // The last dimension should be the size of the data type (2 bytes for int16)
+  EXPECT_EQ(2, void_store.domain().shape()[2]);
+
+  // The data type should be byte
+  EXPECT_EQ(tensorstore::dtype_v<tensorstore::dtypes::byte_t>,
+            void_store.dtype());
+}
+
+TEST(ZarrDriverTest, OpenAsVoidStructuredType) {
+  // Test open_as_void with a structured data type
+  auto context = Context::Default();
+
+  // Create an array with a structured dtype
+  ::nlohmann::json create_spec{
+      {"driver", "zarr"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"field", "y"},
+      {"metadata",
+       {
+           {"compressor", nullptr},
+           {"dtype", ::nlohmann::json::array_t{{"x", "|u1"}, {"y", "<i2"}}},
+           {"shape", {4, 4}},
+           {"chunks", {2, 2}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Write some data to field y
+  auto data = tensorstore::MakeArray<int16_t>({{100, 200}, {300, 400}});
+  TENSORSTORE_EXPECT_OK(
+      tensorstore::Write(data, store | tensorstore::Dims(0, 1).SizedInterval(
+                                           {0, 0}, {2, 2}))
+          .result());
+
+  // Now open with open_as_void=true - this should give raw access to the entire
+  // struct
+  ::nlohmann::json void_spec{
+      {"driver", "zarr"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result());
+
+  // The void store should have rank = original_rank + 1 (for bytes dimension)
+  EXPECT_EQ(3, void_store.rank());
+
+  // The last dimension should be 3 bytes (1 byte for u1 + 2 bytes for i2)
+  EXPECT_EQ(3, void_store.domain().shape()[2]);
+
+  // The data type should be byte
+  EXPECT_EQ(tensorstore::dtype_v<tensorstore::dtypes::byte_t>,
+            void_store.dtype());
+}
+
+TEST(ZarrDriverTest, OpenAsVoidWithCompression) {
+  // Test open_as_void with compression enabled
+  auto context = Context::Default();
+
+  // Create an array with blosc compression
+  ::nlohmann::json create_spec{
+      {"driver", "zarr"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"metadata",
+       {
+           {"compressor", {{"id", "blosc"}}},
+           {"dtype", "<i4"},
+           {"shape", {4, 4}},
+           {"chunks", {2, 2}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Write some data
+  auto data = tensorstore::MakeArray<int32_t>({{0x01020304, 0x05060708},
+                                                {0x090a0b0c, 0x0d0e0f10}});
+  TENSORSTORE_EXPECT_OK(
+      tensorstore::Write(data, store | tensorstore::Dims(0, 1).SizedInterval(
+                                           {0, 0}, {2, 2}))
+          .result());
+
+  // Now open with open_as_void=true
+  ::nlohmann::json void_spec{
+      {"driver", "zarr"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result());
+
+  // The void store should have rank = original_rank + 1 (for bytes dimension)
+  EXPECT_EQ(3, void_store.rank());
+
+  // The last dimension should be 4 bytes for int32
+  EXPECT_EQ(4, void_store.domain().shape()[2]);
+
+  // The data type should be byte
+  EXPECT_EQ(tensorstore::dtype_v<tensorstore::dtypes::byte_t>,
+            void_store.dtype());
+
+  // Read the raw bytes and verify decompression works
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto read_result,
+      tensorstore::Read(void_store | tensorstore::Dims(0, 1).SizedInterval(
+                                         {0, 0}, {2, 2}))
+          .result());
+  EXPECT_EQ(read_result.shape()[0], 2);
+  EXPECT_EQ(read_result.shape()[1], 2);
+  EXPECT_EQ(read_result.shape()[2], 4);
+}
+
+TEST(ZarrDriverTest, OpenAsVoidSpecRoundtrip) {
+  // Test that open_as_void is properly preserved in spec round-trips
+  ::nlohmann::json json_spec{
+      {"driver", "zarr"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+      {"metadata",
+       {
+           {"compressor", nullptr},
+           {"dtype", "<i2"},
+           {"shape", {4, 4}},
+           {"chunks", {2, 2}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto spec,
+                                   tensorstore::Spec::FromJson(json_spec));
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto json_result, spec.ToJson());
+
+  EXPECT_EQ(true, json_result.value("open_as_void", false));
+}
+
+TEST(ZarrDriverTest, OpenAsVoidCannotUseWithField) {
+  // Test that specifying both open_as_void and field is handled appropriately
+  auto context = Context::Default();
+
+  // First create the array
+  ::nlohmann::json create_spec{
+      {"driver", "zarr"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"metadata",
+       {
+           {"compressor", nullptr},
+           {"dtype", ::nlohmann::json::array_t{{"x", "|u1"}, {"y", "<i2"}}},
+           {"shape", {4, 4}},
+           {"chunks", {2, 2}},
+       }},
+      {"field", "x"},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Using open_as_void takes precedence - it opens as raw bytes regardless of
+  // field selection. The field parameter should be ignored when open_as_void is
+  // true.
+  ::nlohmann::json void_spec{
+      {"driver", "zarr"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  // This should succeed - open_as_void gives raw byte access
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result());
+
+  EXPECT_EQ(3, void_store.rank());
+  EXPECT_EQ(tensorstore::dtype_v<tensorstore::dtypes::byte_t>,
+            void_store.dtype());
+}
+
+TEST(ZarrDriverTest, OpenAsVoidUrlNotSupported) {
+  // Test that open_as_void is not supported with URL syntax
+  ::nlohmann::json json_spec{
+      {"driver", "zarr"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+      {"metadata",
+       {
+           {"dtype", "<i2"},
+           {"shape", {4, 4}},
+           {"chunks", {2, 2}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto spec,
+                                   tensorstore::Spec::FromJson(json_spec));
+
+  // ToUrl should fail when open_as_void is specified
+  EXPECT_THAT(spec.ToUrl(), StatusIs(absl::StatusCode::kInvalidArgument));
+}
+
+TEST(ZarrDriverTest, OpenAsVoidReadWrite) {
+  // Test reading and writing through open_as_void
+  auto context = Context::Default();
+
+  // Create an array
+  ::nlohmann::json create_spec{
+      {"driver", "zarr"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"metadata",
+       {
+           {"compressor", nullptr},
+           {"dtype", "<u2"},
+           {"shape", {2, 2}},
+           {"chunks", {2, 2}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Write data as normal uint16
+  auto data = tensorstore::MakeArray<uint16_t>({{0x0102, 0x0304},
+                                                 {0x0506, 0x0708}});
+  TENSORSTORE_EXPECT_OK(tensorstore::Write(data, store).result());
+
+  // Open as void and read
+  ::nlohmann::json void_spec{
+      {"driver", "zarr"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Read the raw bytes
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto bytes_read,
+                                   tensorstore::Read(void_store).result());
+
+  // Verify shape: [2, 2, 2] where last dim is 2 bytes per uint16
+  EXPECT_EQ(bytes_read.shape()[0], 2);
+  EXPECT_EQ(bytes_read.shape()[1], 2);
+  EXPECT_EQ(bytes_read.shape()[2], 2);
+
+  // Verify the raw bytes (little endian)
+  auto bytes_ptr = static_cast<const unsigned char*>(bytes_read.data());
+  // First element: 0x0102 -> bytes 0x02, 0x01 (little endian)
+  EXPECT_EQ(bytes_ptr[0], 0x02);
+  EXPECT_EQ(bytes_ptr[1], 0x01);
+}
+
 }  // namespace
diff --git a/tensorstore/driver/zarr/schema.yml b/tensorstore/driver/zarr/schema.yml
index 45711648c..a90fb7e3a 100644
--- a/tensorstore/driver/zarr/schema.yml
+++ b/tensorstore/driver/zarr/schema.yml
@@ -17,6 +17,14 @@ allOf:
           Must be specified if the `.metadata.dtype` specified in the array
           metadata has more than one field.
         default: null
+      open_as_void:
+        type: boolean
+        default: false
+        title: Raw byte access mode.
+        description: |
+          When true, opens the array as raw bytes instead of interpreting it
+          as structured data. The resulting array will have an additional
+          dimension representing the byte layout of each element.
       metadata:
         title: Zarr array metadata.
         description: |
diff --git a/tensorstore/driver/zarr/spec.cc b/tensorstore/driver/zarr/spec.cc
index 34a2825f9..4857d045b 100644
--- a/tensorstore/driver/zarr/spec.cc
+++ b/tensorstore/driver/zarr/spec.cc
@@ -151,7 +151,8 @@ absl::Status ValidateMetadata(const ZarrMetadata& metadata,
 
 Result<ZarrMetadataPtr> GetNewMetadata(
     const ZarrPartialMetadata& partial_metadata,
-    const SelectedField& selected_field, const Schema& schema) {
+    const SelectedField& selected_field, const Schema& schema,
+    bool open_as_void) {
   ZarrMetadataPtr metadata = std::make_shared<ZarrMetadata>();
   metadata->zarr_format = partial_metadata.zarr_format.value_or(2);
   metadata->dimension_separator = partial_metadata.dimension_separator.value_or(
@@ -172,7 +173,12 @@ Result<ZarrMetadataPtr> GetNewMetadata(
     // multi-field zarr dtype is desired, it must be specified explicitly.
     TENSORSTORE_ASSIGN_OR_RETURN(
         selected_field_index,
-        GetFieldIndex(*partial_metadata.dtype, selected_field));
+        GetFieldIndex(*partial_metadata.dtype, selected_field, open_as_void));
+    // For void access, use field 0 for metadata creation since we use all
+    // fields as raw bytes
+    if (selected_field_index == kVoidFieldIndex) {
+      selected_field_index = 0;
+    }
     metadata->dtype = *partial_metadata.dtype;
   } else {
     if (!selected_field.empty()) {
@@ -527,7 +533,17 @@ std::string GetFieldNames(const ZarrDType& dtype) {
 }  // namespace
 
 Result<size_t> GetFieldIndex(const ZarrDType& dtype,
-                             const SelectedField& selected_field) {
+                             const SelectedField& selected_field,
+                             bool open_as_void) {
+  // Special case: open_as_void requests raw byte access (works for any dtype)
+  if (open_as_void) {
+    if (dtype.fields.empty()) {
+      return absl::FailedPreconditionError(
+          "Requested void access but dtype has no fields");
+    }
+    return kVoidFieldIndex;
+  }
+
   if (selected_field.empty()) {
     if (dtype.fields.size() != 1) {
       return absl::FailedPreconditionError(tensorstore::StrCat(
diff --git a/tensorstore/driver/zarr/spec.h b/tensorstore/driver/zarr/spec.h
index 0ef3ab9d3..597fc32f0 100644
--- a/tensorstore/driver/zarr/spec.h
+++ b/tensorstore/driver/zarr/spec.h
@@ -70,9 +70,11 @@ using SelectedField = std::string;
 /// \param partial_metadata Constraints in the form of partial zarr metadata.
 /// \param selected_field The field to which `schema` applies.
 /// \param schema Schema constraints for the `selected_field`.
+/// \param open_as_void If true, opens the array as raw bytes.
 Result<ZarrMetadataPtr> GetNewMetadata(
     const ZarrPartialMetadata& partial_metadata,
-    const SelectedField& selected_field, const Schema& schema);
+    const SelectedField& selected_field, const Schema& schema,
+    bool open_as_void = false);
 
 struct SpecRankAndFieldInfo {
   /// Full rank of the TensorStore, if known.  Equal to the chunked rank plus
@@ -134,11 +136,16 @@ Result<SelectedField> ParseSelectedField(const ::nlohmann::json& value);
 /// \param dtype The parsed zarr "dtype" specification.
 /// \param selected_field The label of the field, or an empty string to indicate
 ///     that the zarr array must have only a single field.
-/// \returns The field index.
+/// \param open_as_void If true, returns kVoidFieldIndex for raw byte access.
+/// \returns The field index, or kVoidFieldIndex if open_as_void is true.
 /// \error `absl::StatusCode::kFailedPrecondition` if `selected_field` is not
 ///     valid.
 Result<size_t> GetFieldIndex(const ZarrDType& dtype,
-                             const SelectedField& selected_field);
+                             const SelectedField& selected_field,
+                             bool open_as_void = false);
+
+/// Special field index indicating void (raw byte) access.
+constexpr size_t kVoidFieldIndex = size_t(-1);
 
 /// Encodes a field index as a `SelectedField` JSON specification.
 ///