From 97d18cb426fc43241f824add1352725f04470498 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 25 Dec 2025 13:12:11 +0900 Subject: [PATCH] GH-48642: [Ruby] Add support for reading decimal128 array --- .../lib/arrow-format/array.rb | 14 ++ .../lib/arrow-format/readable.rb | 6 + .../red-arrow-format/lib/arrow-format/type.rb | 206 ++++++++++++------ ruby/red-arrow-format/test/test-reader.rb | 12 + 4 files changed, 175 insertions(+), 63 deletions(-) diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb b/ruby/red-arrow-format/lib/arrow-format/array.rb index ac96038f194..7df305b28f0 100644 --- a/ruby/red-arrow-format/lib/arrow-format/array.rb +++ b/ruby/red-arrow-format/lib/arrow-format/array.rb @@ -306,6 +306,20 @@ def to_a end end + class DecimalArray < FixedSizeBinaryArray + def to_a + byte_width = @type.byte_width + values = 0.step(@size * byte_width - 1, byte_width).collect do |offset| + # TODO: How to represent in Ruby? + @values_buffer.get_string(offset, byte_width) + end + apply_validity(values) + end + end + + class Decimal128Array < DecimalArray + end + class VariableSizeListArray < Array def initialize(type, size, validity_buffer, offsets_buffer, child) super(type, size, validity_buffer) diff --git a/ruby/red-arrow-format/lib/arrow-format/readable.rb b/ruby/red-arrow-format/lib/arrow-format/readable.rb index 2d64d5387ff..5a247c822a4 100644 --- a/ruby/red-arrow-format/lib/arrow-format/readable.rb +++ b/ruby/red-arrow-format/lib/arrow-format/readable.rb @@ -25,6 +25,7 @@ require_relative "org/apache/arrow/flatbuf/bool" require_relative "org/apache/arrow/flatbuf/date" require_relative "org/apache/arrow/flatbuf/date_unit" +require_relative "org/apache/arrow/flatbuf/decimal" require_relative "org/apache/arrow/flatbuf/duration" require_relative "org/apache/arrow/flatbuf/fixed_size_binary" require_relative "org/apache/arrow/flatbuf/floating_point" @@ -166,6 +167,11 @@ def read_field(fb_field) type = LargeUTF8Type.singleton when Org::Apache::Arrow::Flatbuf::FixedSizeBinary type = FixedSizeBinaryType.new(fb_type.byte_width) + when Org::Apache::Arrow::Flatbuf::Decimal + case fb_type.bit_width + when 128 + type = Decimal128Type.new(fb_type.precision, fb_type.scale) + end end Field.new(fb_field.name, type, fb_field.nullable?) end diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb b/ruby/red-arrow-format/lib/arrow-format/type.rb index c6679660122..d6d8b7bb81a 100644 --- a/ruby/red-arrow-format/lib/arrow-format/type.rb +++ b/ruby/red-arrow-format/lib/arrow-format/type.rb @@ -16,10 +16,6 @@ module ArrowFormat class Type - attr_reader :name - def initialize(name) - @name = name - end end class NullType < Type @@ -29,8 +25,8 @@ def singleton end end - def initialize - super("Null") + def name + "Null" end def build_array(size) @@ -45,8 +41,8 @@ def singleton end end - def initialize - super("Boolean") + def name + "Boolean" end def build_array(size, validity_buffer, values_buffer) @@ -60,8 +56,8 @@ class NumberType < Type class IntType < NumberType attr_reader :bit_width attr_reader :signed - def initialize(name, bit_width, signed) - super(name) + def initialize(bit_width, signed) + super() @bit_width = bit_width @signed = signed end @@ -75,7 +71,11 @@ def singleton end def initialize - super("Int8", 8, true) + super(8, true) + end + + def name + "Int8" end def build_array(size, validity_buffer, values_buffer) @@ -91,7 +91,11 @@ def singleton end def initialize - super("UInt8", 8, false) + super(8, false) + end + + def name + "UInt8" end def build_array(size, validity_buffer, values_buffer) @@ -107,7 +111,11 @@ def singleton end def initialize - super("Int16", 16, true) + super(16, true) + end + + def name + "Int16" end def build_array(size, validity_buffer, values_buffer) @@ -123,7 +131,11 @@ def singleton end def initialize - super("UInt16", 16, false) + super(16, false) + end + + def name + "UInt16" end def build_array(size, validity_buffer, values_buffer) @@ -139,7 +151,11 @@ def singleton end def initialize - super("Int32", 32, true) + super(32, true) + end + + def name + "Int32" end def build_array(size, validity_buffer, values_buffer) @@ -155,7 +171,11 @@ def singleton end def initialize - super("UInt32", 32, false) + super(32, false) + end + + def name + "UInt32" end def build_array(size, validity_buffer, values_buffer) @@ -171,7 +191,11 @@ def singleton end def initialize - super("Int64", 64, true) + super(64, true) + end + + def name + "Int64" end def build_array(size, validity_buffer, values_buffer) @@ -187,7 +211,11 @@ def singleton end def initialize - super("UInt64", 64, false) + super(64, false) + end + + def name + "UInt64" end def build_array(size, validity_buffer, values_buffer) @@ -197,8 +225,8 @@ def build_array(size, validity_buffer, values_buffer) class FloatingPointType < NumberType attr_reader :precision - def initialize(name, precision) - super(name) + def initialize(precision) + super() @precision = precision end end @@ -211,7 +239,11 @@ def singleton end def initialize - super("Float32", :single) + super(:single) + end + + def name + "Float32" end def build_array(size, validity_buffer, values_buffer) @@ -227,7 +259,11 @@ def singleton end def initialize - super("Float64", :double) + super(:double) + end + + def name + "Float64" end def build_array(size, validity_buffer, values_buffer) @@ -248,8 +284,8 @@ def singleton end end - def initialize - super("Date32") + def name + "Date32" end def build_array(size, validity_buffer, values_buffer) @@ -264,8 +300,8 @@ def singleton end end - def initialize - super("Date64") + def name + "Date64" end def build_array(size, validity_buffer, values_buffer) @@ -275,15 +311,15 @@ def build_array(size, validity_buffer, values_buffer) class TimeType < TemporalType attr_reader :unit - def initialize(name, unit) - super(name) + def initialize(unit) + super() @unit = unit end end class Time32Type < TimeType - def initialize(unit) - super("Time32", unit) + def name + "Time32" end def build_array(size, validity_buffer, values_buffer) @@ -292,8 +328,8 @@ def build_array(size, validity_buffer, values_buffer) end class Time64Type < TimeType - def initialize(unit) - super("Time64", unit) + def name + "Time64" end def build_array(size, validity_buffer, values_buffer) @@ -305,11 +341,15 @@ class TimestampType < TemporalType attr_reader :unit attr_reader :timezone def initialize(unit, timezone) - super("Timestamp") + super() @unit = unit @timezone = timezone end + def name + "Timestamp" + end + def build_array(size, validity_buffer, values_buffer) TimestampArray.new(self, size, validity_buffer, values_buffer) end @@ -319,8 +359,8 @@ class IntervalType < TemporalType end class YearMonthIntervalType < IntervalType - def initialize - super("YearMonthInterval") + def name + "YearMonthInterval" end def build_array(size, validity_buffer, values_buffer) @@ -329,8 +369,8 @@ def build_array(size, validity_buffer, values_buffer) end class DayTimeIntervalType < IntervalType - def initialize - super("DayTimeInterval") + def name + "DayTimeInterval" end def build_array(size, validity_buffer, values_buffer) @@ -339,8 +379,8 @@ def build_array(size, validity_buffer, values_buffer) end class MonthDayNanoIntervalType < IntervalType - def initialize - super("MonthDayNanoInterval") + def name + "MonthDayNanoInterval" end def build_array(size, validity_buffer, values_buffer) @@ -354,10 +394,14 @@ def build_array(size, validity_buffer, values_buffer) class DurationType < TemporalType attr_reader :unit def initialize(unit) - super("Duration") + super() @unit = unit end + def name + "Duration" + end + def build_array(size, validity_buffer, values_buffer) DurationArray.new(self, size, validity_buffer, values_buffer) end @@ -373,8 +417,8 @@ def singleton end end - def initialize - super("Binary") + def name + "Binary" end def build_array(size, validity_buffer, offsets_buffer, values_buffer) @@ -389,8 +433,8 @@ def singleton end end - def initialize - super("LargeBinary") + def name + "LargeBinary" end def build_array(size, validity_buffer, offsets_buffer, values_buffer) @@ -409,8 +453,8 @@ def singleton end end - def initialize - super("UTF8") + def name + "UTF8" end def build_array(size, validity_buffer, offsets_buffer, values_buffer) @@ -425,8 +469,8 @@ def singleton end end - def initialize - super("LargeUTF8") + def name + "LargeUTF8" end def build_array(size, validity_buffer, offsets_buffer, values_buffer) @@ -441,27 +485,55 @@ def build_array(size, validity_buffer, offsets_buffer, values_buffer) class FixedSizeBinaryType < Type attr_reader :byte_width def initialize(byte_width) - super("FixedSizeBinary") + super() @byte_width = byte_width end + def name + "FixedSizeBinary" + end + def build_array(size, validity_buffer, values_buffer) FixedSizeBinaryArray.new(self, size, validity_buffer, values_buffer) end end + class DecimalType < FixedSizeBinaryType + attr_reader :precision + attr_reader :scale + def initialize(byte_width, precision, scale) + super(byte_width) + @precision = precision + @scale = scale + end + end + + class Decimal128Type < DecimalType + def initialize(precision, scale) + super(16, precision, scale) + end + + def name + "Decimal128" + end + + def build_array(size, validity_buffer, values_buffer) + Decimal128Array.new(self, size, validity_buffer, values_buffer) + end + end + class VariableSizeListType < Type attr_reader :child - def initialize(name, child) - super(name) + def initialize(child) + super() @child = child end end class ListType < VariableSizeListType - def initialize(child) - super("List", child) + def name + "List" end def build_array(size, validity_buffer, offsets_buffer, child) @@ -470,8 +542,8 @@ def build_array(size, validity_buffer, offsets_buffer, child) end class LargeListType < VariableSizeListType - def initialize(child) - super("LargeList", child) + def name + "LargeList" end def build_array(size, validity_buffer, offsets_buffer, child) @@ -482,10 +554,14 @@ def build_array(size, validity_buffer, offsets_buffer, child) class StructType < Type attr_reader :children def initialize(children) - super("Struct") + super() @children = children end + def name + "Struct" + end + def build_array(size, validity_buffer, children) StructArray.new(self, size, validity_buffer, children) end @@ -509,7 +585,11 @@ def initialize(child) raise TypeError.new("Map key field must not be nullable: " + type.children[0].inspect) end - super("Map", child) + super(child) + end + + def name + "Map" end def build_array(size, validity_buffer, offsets_buffer, child) @@ -520,8 +600,8 @@ def build_array(size, validity_buffer, offsets_buffer, child) class UnionType < Type attr_reader :children attr_reader :type_ids - def initialize(name, children, type_ids) - super(name) + def initialize(children, type_ids) + super() @children = children @type_ids = type_ids @type_indexes = {} @@ -533,8 +613,8 @@ def resolve_type_index(type) end class DenseUnionType < UnionType - def initialize(children, type_ids) - super("DenseUnion", children, type_ids) + def name + "DenseUnion" end def build_array(size, types_buffer, offsets_buffer, children) @@ -543,8 +623,8 @@ def build_array(size, types_buffer, offsets_buffer, children) end class SparseUnionType < UnionType - def initialize(children, type_ids) - super("SparseUnion", children, type_ids) + def name + "SparseUnion" end def build_array(size, types_buffer, children) diff --git a/ruby/red-arrow-format/test/test-reader.rb b/ruby/red-arrow-format/test/test-reader.rb index 8095adfd50f..01013bd69ac 100644 --- a/ruby/red-arrow-format/test/test-reader.rb +++ b/ruby/red-arrow-format/test/test-reader.rb @@ -674,6 +674,18 @@ def test_read end end + sub_test_case("Decimal128") do + def build_array + Arrow::Decimal128Array.new({precision: 3, scale: 1}, + ["10.1", nil, "1.11"]) + end + + def test_read + assert_equal([{"value" => ["TODO", nil, "TODO"]}], + read) + end + end + sub_test_case("List") do def build_array data_type = Arrow::ListDataType.new(name: "count", type: :int8)