Skip to content

Commit b5793e6

Browse files
Merge branch 'main' into friendlymatthew/compare-union-fields-eq
2 parents b43a742 + c2bd7d9 commit b5793e6

File tree

47 files changed

+3671
-514
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+3671
-514
lines changed

.github/pull_request_template.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,38 @@
11
# Which issue does this PR close?
22

3+
<!--
34
We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax.
5+
-->
46

57
- Closes #NNN.
68

79
# Rationale for this change
810

11+
<!--
912
Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed.
1013
Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes.
14+
-->
1115

1216
# What changes are included in this PR?
1317

18+
<!--
1419
There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR.
20+
-->
1521

1622
# Are these changes tested?
1723

24+
<!--
1825
We typically require tests for all PRs in order to:
1926
1. Prevent the code from being accidentally broken by subsequent changes
2027
2. Serve as another way to document the expected behavior of the code
2128
2229
If tests are not included in your PR, please explain why (for example, are they covered by existing tests)?
30+
-->
2331

2432
# Are there any user-facing changes?
2533

34+
<!--
2635
If there are user-facing changes then we may require documentation to be updated before approving the PR.
2736
2837
If there are any breaking changes to public APIs, please call them out.
38+
-->

.github/workflows/docs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ jobs:
7171
steps:
7272
- uses: actions/checkout@v6
7373
- name: Download crate docs
74-
uses: actions/download-artifact@v6
74+
uses: actions/download-artifact@v7
7575
with:
7676
name: crate-docs
7777
path: website/build

.github/workflows/integration.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,12 +149,12 @@ jobs:
149149
rustup default ${{ matrix.rust }}
150150
rustup component add rustfmt clippy
151151
- name: Cache Cargo
152-
uses: actions/cache@v4
152+
uses: actions/cache@v5
153153
with:
154154
path: /home/runner/.cargo
155155
key: cargo-maturin-cache-
156156
- name: Cache Rust dependencies
157-
uses: actions/cache@v4
157+
uses: actions/cache@v5
158158
with:
159159
path: /home/runner/target
160160
# this key is not equal because maturin uses different compilation flags.

arrow-array/benches/union_array.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,10 @@ fn criterion_benchmark(c: &mut Criterion) {
5454
|b| {
5555
let type_ids = 0..with_nulls+without_nulls;
5656

57-
let fields = UnionFields::new(
57+
let fields = UnionFields::try_new(
5858
type_ids.clone(),
5959
type_ids.clone().map(|i| Field::new(format!("f{i}"), DataType::Int32, true)),
60-
);
60+
).unwrap();
6161

6262
let array = UnionArray::try_new(
6363
fields,

arrow-array/src/array/list_view_array.rs

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ use crate::array::{make_array, print_long_array};
2626
use crate::builder::{GenericListViewBuilder, PrimitiveBuilder};
2727
use crate::iterator::GenericListViewArrayIter;
2828
use crate::{
29-
Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray, OffsetSizeTrait,
30-
new_empty_array,
29+
Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray, GenericListArray,
30+
OffsetSizeTrait, new_empty_array,
3131
};
3232

3333
/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i32`.
@@ -498,6 +498,29 @@ impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListViewArray<Offse
498498
}
499499
}
500500

501+
impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>>
502+
for GenericListViewArray<OffsetSize>
503+
{
504+
fn from(value: GenericListArray<OffsetSize>) -> Self {
505+
let (field, offsets, values, nulls) = value.into_parts();
506+
let len = offsets.len() - 1;
507+
let mut sizes = Vec::with_capacity(len);
508+
let mut view_offsets = Vec::with_capacity(len);
509+
for (i, offset) in offsets.iter().enumerate().take(len) {
510+
view_offsets.push(*offset);
511+
sizes.push(offsets[i + 1] - offsets[i]);
512+
}
513+
514+
Self::new(
515+
field,
516+
ScalarBuffer::from(view_offsets),
517+
ScalarBuffer::from(sizes),
518+
values,
519+
nulls,
520+
)
521+
}
522+
}
523+
501524
impl<OffsetSize: OffsetSizeTrait> From<GenericListViewArray<OffsetSize>> for ArrayData {
502525
fn from(array: GenericListViewArray<OffsetSize>) -> Self {
503526
let len = array.len();

arrow-array/src/array/mod.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -620,10 +620,11 @@ impl<'a> StringArrayType<'a> for &'a StringViewArray {
620620
}
621621
}
622622

623-
/// A trait for Arrow String Arrays, currently three types are supported:
623+
/// A trait for Arrow Binary Arrays, currently four types are supported:
624624
/// - `BinaryArray`
625625
/// - `LargeBinaryArray`
626626
/// - `BinaryViewArray`
627+
/// - `FixedSizeBinaryArray`
627628
///
628629
/// This trait helps to abstract over the different types of binary arrays
629630
/// so that we don't need to duplicate the implementation for each type.
@@ -642,6 +643,11 @@ impl<'a> BinaryArrayType<'a> for &'a BinaryViewArray {
642643
BinaryViewArray::iter(self)
643644
}
644645
}
646+
impl<'a> BinaryArrayType<'a> for &'a FixedSizeBinaryArray {
647+
fn iter(&self) -> ArrayIter<Self> {
648+
FixedSizeBinaryArray::iter(self)
649+
}
650+
}
645651

646652
impl PartialEq for dyn Array + '_ {
647653
fn eq(&self, other: &Self) -> bool {
@@ -1067,13 +1073,14 @@ mod tests {
10671073
fn test_null_union() {
10681074
for mode in [UnionMode::Sparse, UnionMode::Dense] {
10691075
let data_type = DataType::Union(
1070-
UnionFields::new(
1076+
UnionFields::try_new(
10711077
vec![2, 1],
10721078
vec![
10731079
Field::new("foo", DataType::Int32, true),
10741080
Field::new("bar", DataType::Int64, true),
10751081
],
1076-
),
1082+
)
1083+
.unwrap(),
10771084
mode,
10781085
);
10791086
let array = new_null_array(&data_type, 4);

arrow-array/src/array/union_array.rs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,14 +1682,15 @@ mod tests {
16821682
#[test]
16831683
fn test_custom_type_ids() {
16841684
let data_type = DataType::Union(
1685-
UnionFields::new(
1685+
UnionFields::try_new(
16861686
vec![8, 4, 9],
16871687
vec![
16881688
Field::new("strings", DataType::Utf8, false),
16891689
Field::new("integers", DataType::Int32, false),
16901690
Field::new("floats", DataType::Float64, false),
16911691
],
1692-
),
1692+
)
1693+
.unwrap(),
16931694
UnionMode::Dense,
16941695
);
16951696

@@ -1796,14 +1797,15 @@ mod tests {
17961797
fn into_parts_custom_type_ids() {
17971798
let set_field_type_ids: [i8; 3] = [8, 4, 9];
17981799
let data_type = DataType::Union(
1799-
UnionFields::new(
1800+
UnionFields::try_new(
18001801
set_field_type_ids,
18011802
[
18021803
Field::new("strings", DataType::Utf8, false),
18031804
Field::new("integers", DataType::Int32, false),
18041805
Field::new("floats", DataType::Float64, false),
18051806
],
1806-
),
1807+
)
1808+
.unwrap(),
18071809
UnionMode::Dense,
18081810
);
18091811
let string_array = StringArray::from(vec!["foo", "bar", "baz"]);
@@ -1836,13 +1838,14 @@ mod tests {
18361838

18371839
#[test]
18381840
fn test_invalid() {
1839-
let fields = UnionFields::new(
1841+
let fields = UnionFields::try_new(
18401842
[3, 2],
18411843
[
18421844
Field::new("a", DataType::Utf8, false),
18431845
Field::new("b", DataType::Utf8, false),
18441846
],
1845-
);
1847+
)
1848+
.unwrap();
18461849
let children = vec![
18471850
Arc::new(StringArray::from_iter_values(["a", "b"])) as _,
18481851
Arc::new(StringArray::from_iter_values(["c", "d"])) as _,
@@ -1912,13 +1915,14 @@ mod tests {
19121915

19131916
assert_eq!(array.logical_nulls(), None);
19141917

1915-
let fields = UnionFields::new(
1918+
let fields = UnionFields::try_new(
19161919
[1, 3],
19171920
[
19181921
Field::new("a", DataType::Int8, false), // non nullable
19191922
Field::new("b", DataType::Int8, false), // non nullable
19201923
],
1921-
);
1924+
)
1925+
.unwrap();
19221926
let array = UnionArray::try_new(
19231927
fields,
19241928
vec![1].into(),
@@ -1932,13 +1936,14 @@ mod tests {
19321936

19331937
assert_eq!(array.logical_nulls(), None);
19341938

1935-
let nullable_fields = UnionFields::new(
1939+
let nullable_fields = UnionFields::try_new(
19361940
[1, 3],
19371941
[
19381942
Field::new("a", DataType::Int8, true), // nullable but without nulls
19391943
Field::new("b", DataType::Int8, true), // nullable but without nulls
19401944
],
1941-
);
1945+
)
1946+
.unwrap();
19421947
let array = UnionArray::try_new(
19431948
nullable_fields.clone(),
19441949
vec![1, 1].into(),

arrow-avro/benches/avro_writer.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -688,14 +688,15 @@ static ENUM_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
688688

689689
static UNION_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
690690
// Basic Dense Union of three types: Utf8, Int32, Float64
691-
let union_fields = UnionFields::new(
691+
let union_fields = UnionFields::try_new(
692692
vec![0, 1, 2],
693693
vec![
694694
Field::new("u_str", DataType::Utf8, true),
695695
Field::new("u_int", DataType::Int32, true),
696696
Field::new("u_f64", DataType::Float64, true),
697697
],
698-
);
698+
)
699+
.expect("UnionFields should be valid");
699700
let union_dt = DataType::Union(union_fields.clone(), UnionMode::Dense);
700701
let schema = schema_single("field1", union_dt);
701702

arrow-avro/src/codec.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -993,13 +993,13 @@ fn union_branch_name(dt: &AvroDataType) -> String {
993993
dt.codec.union_field_name()
994994
}
995995

996-
fn build_union_fields(encodings: &[AvroDataType]) -> UnionFields {
996+
fn build_union_fields(encodings: &[AvroDataType]) -> Result<UnionFields, ArrowError> {
997997
let arrow_fields: Vec<Field> = encodings
998998
.iter()
999999
.map(|encoding| encoding.field_with_name(&union_branch_name(encoding)))
10001000
.collect();
10011001
let type_ids: Vec<i8> = (0..arrow_fields.len()).map(|i| i as i8).collect();
1002-
UnionFields::new(type_ids, arrow_fields)
1002+
UnionFields::try_new(type_ids, arrow_fields)
10031003
}
10041004

10051005
/// Resolves Avro type names to [`AvroDataType`]
@@ -1267,7 +1267,7 @@ impl<'a> Maker<'a> {
12671267
.map(|s| self.parse_type(s, namespace))
12681268
.collect::<Result<_, _>>()?;
12691269
// Build Arrow layout once here
1270-
let union_fields = build_union_fields(&children);
1270+
let union_fields = build_union_fields(&children)?;
12711271
Ok(AvroDataType::new(
12721272
Codec::Union(Arc::from(children), union_fields, UnionMode::Dense),
12731273
Default::default(),
@@ -1620,7 +1620,7 @@ impl<'a> Maker<'a> {
16201620
for writer in writer_variants {
16211621
writer_to_reader.push(self.find_best_promotion(writer, reader_variants, namespace));
16221622
}
1623-
let union_fields = build_union_fields(&reader_encodings);
1623+
let union_fields = build_union_fields(&reader_encodings)?;
16241624
let mut dt = AvroDataType::new(
16251625
Codec::Union(reader_encodings.into(), union_fields, UnionMode::Dense),
16261626
Default::default(),

0 commit comments

Comments
 (0)