Skip to content

Commit 2c558de

Browse files
committed
Follow previous zip benchmarks in design
1 parent 166b093 commit 2c558de

File tree

2 files changed

+81
-109
lines changed

2 files changed

+81
-109
lines changed

arrow/benches/zip_kernels.rs

Lines changed: 34 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,35 @@ where
133133
}
134134
}
135135

136+
struct GenerateStringView {
137+
str_len: usize,
138+
description: String,
139+
_marker: std::marker::PhantomData<StringViewType>,
140+
}
141+
142+
impl InputGenerator for GenerateStringView {
143+
fn name(&self) -> &str {
144+
self.description.as_str()
145+
}
146+
fn generate_scalar_with_null_value(&self) -> ArrayRef {
147+
new_null_array(&DataType::Utf8View, 1)
148+
}
149+
150+
fn generate_non_null_scalars(&self, seed: u64, number_of_scalars: usize) -> Vec<ArrayRef> {
151+
let array = self.generate_array(seed, number_of_scalars, 0.0);
152+
(0..number_of_scalars).map(|i| array.slice(i, 1)).collect()
153+
}
154+
155+
fn generate_array(&self, seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef {
156+
Arc::new(create_string_view_array_with_fixed_len_with_seed(
157+
array_length,
158+
null_percentage,
159+
self.str_len,
160+
seed,
161+
))
162+
}
163+
}
164+
136165
fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> {
137166
vec![
138167
("all_true", create_boolean_array(len, 0.0, 1.0)),
@@ -145,9 +174,10 @@ fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> {
145174
("50pct_nulls", create_boolean_array(len, 0.5, 0.5)),
146175
]
147176
}
148-
const ARRAY_LEN: usize = 8192;
149177

150178
fn bench_zip_on_input_generator(c: &mut Criterion, input_generator: &impl InputGenerator) {
179+
const ARRAY_LEN: usize = 8192;
180+
151181
let mut group =
152182
c.benchmark_group(format!("zip_{ARRAY_LEN}_from_{}", input_generator.name()).as_str());
153183

@@ -223,68 +253,6 @@ fn bench_zip_input_on_all_masks(
223253
}
224254
}
225255

226-
fn bench_zip_on_string_view_scalar(c: &mut Criterion, input_generator: &GenerateStringView) {
227-
bench_zip_on_string_view_scalars(c, input_generator, input_generator);
228-
}
229-
230-
fn bench_zip_on_string_view_scalars(
231-
c: &mut Criterion,
232-
input_generator_1: &GenerateStringView,
233-
input_generator_2: &GenerateStringView,
234-
) {
235-
let mut group = c.benchmark_group(
236-
format!(
237-
"zip_{ARRAY_LEN}_from_{} and {}",
238-
input_generator_1.name(),
239-
input_generator_2.name()
240-
)
241-
.as_str(),
242-
);
243-
244-
let null_scalar = input_generator_1.generate_null_scalar();
245-
246-
let non_null_scalar_1 = input_generator_1.generate_scalar();
247-
let non_null_scalar_2 = input_generator_2.generate_scalar();
248-
249-
let masks = mask_cases(ARRAY_LEN);
250-
251-
for (description, truthy, falsy) in [
252-
("null_vs_non_null_scalar", &null_scalar, &non_null_scalar_1),
253-
(
254-
"non_null_scalar_vs_null_scalar",
255-
&non_null_scalar_1,
256-
&null_scalar,
257-
),
258-
("non_nulls_scalars", &non_null_scalar_1, &non_null_scalar_2),
259-
] {
260-
bench_zip_input_on_all_masks(description, &mut group, &masks, truthy, falsy);
261-
}
262-
group.finish();
263-
}
264-
265-
struct GenerateStringView {
266-
str_len: usize,
267-
description: String,
268-
_marker: std::marker::PhantomData<StringViewType>,
269-
}
270-
271-
impl GenerateStringView {
272-
fn name(&self) -> &str {
273-
self.description.as_str()
274-
}
275-
fn generate_null_scalar(&self) -> Scalar<ArrayRef> {
276-
Scalar::new(new_null_array(&DataType::Utf8View, 1))
277-
}
278-
279-
fn generate_scalar(&self) -> Scalar<ArrayRef> {
280-
Scalar::new(Arc::new(create_string_view_array_with_fixed_len(
281-
1,
282-
0.0,
283-
self.str_len,
284-
)))
285-
}
286-
}
287-
288256
fn add_benchmark(c: &mut Criterion) {
289257
// Primitive
290258
bench_zip_on_input_generator(
@@ -335,7 +303,7 @@ fn add_benchmark(c: &mut Criterion) {
335303
},
336304
);
337305

338-
bench_zip_on_string_view_scalar(
306+
bench_zip_on_input_generator(
339307
c,
340308
&GenerateStringView {
341309
description: "string_views size 3".to_string(),
@@ -344,59 +312,17 @@ fn add_benchmark(c: &mut Criterion) {
344312
},
345313
);
346314

347-
bench_zip_on_string_view_scalar(
348-
c,
349-
&GenerateStringView {
350-
description: "string_views size 10".to_string(),
351-
str_len: 10,
352-
_marker: std::marker::PhantomData,
353-
},
354-
);
355-
356-
bench_zip_on_string_view_scalar(
357-
c,
358-
&GenerateStringView {
359-
description: "string_views size 100".to_string(),
360-
str_len: 100,
361-
_marker: std::marker::PhantomData,
362-
},
363-
);
364-
365-
bench_zip_on_string_view_scalars(
315+
bench_zip_on_input_generator(
366316
c,
367-
&GenerateStringView {
368-
description: "string_views size 3".to_string(),
369-
str_len: 3,
370-
_marker: std::marker::PhantomData,
371-
},
372317
&GenerateStringView {
373318
description: "string_views size 10".to_string(),
374319
str_len: 10,
375320
_marker: std::marker::PhantomData,
376321
},
377322
);
378323

379-
bench_zip_on_string_view_scalars(
380-
c,
381-
&GenerateStringView {
382-
description: "string_views size 3".to_string(),
383-
str_len: 3,
384-
_marker: std::marker::PhantomData,
385-
},
386-
&GenerateStringView {
387-
description: "string_views size 100".to_string(),
388-
str_len: 100,
389-
_marker: std::marker::PhantomData,
390-
},
391-
);
392-
393-
bench_zip_on_string_view_scalars(
324+
bench_zip_on_input_generator(
394325
c,
395-
&GenerateStringView {
396-
description: "string_views size 10".to_string(),
397-
str_len: 10,
398-
_marker: std::marker::PhantomData,
399-
},
400326
&GenerateStringView {
401327
description: "string_views size 100".to_string(),
402328
str_len: 100,

arrow/src/util/bench_util.rs

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,12 +319,58 @@ pub fn create_string_view_array_with_max_len(
319319
}
320320

321321
/// Creates a random (but fixed-seeded) array of a given size, null density and length
322+
///
323+
/// Arguments:
324+
/// - `size`: number of string view array
325+
/// - `null_density`: density of nulls in the string view array
326+
/// - `str_len`: size of each string in the string view array
322327
pub fn create_string_view_array_with_fixed_len(
323328
size: usize,
324329
null_density: f32,
325330
str_len: usize,
326331
) -> StringViewArray {
327-
let rng = &mut seedable_rng();
332+
create_string_view_array_with_fixed_len_with_rng(
333+
size,
334+
null_density,
335+
str_len,
336+
&mut seedable_rng(),
337+
)
338+
}
339+
340+
/// Creates a string view array of a given size, null density and length
341+
///
342+
/// Arguments:
343+
/// - `size`: number of string view array
344+
/// - `null_density`: density of nulls in the string view array
345+
/// - `str_len`: size of each string in the string view array
346+
/// - `seed`: seed for the random number generator
347+
pub fn create_string_view_array_with_fixed_len_with_seed(
348+
size: usize,
349+
null_density: f32,
350+
str_len: usize,
351+
seed: u64,
352+
) -> StringViewArray {
353+
create_string_view_array_with_fixed_len_with_rng(
354+
size,
355+
null_density,
356+
str_len,
357+
&mut StdRng::seed_from_u64(seed),
358+
)
359+
}
360+
361+
/// Creates a string view array of a given size, null density and length
362+
///
363+
/// Arguments:
364+
/// - `size`: number of string view array
365+
/// - `null_density`: density of nulls in the string view array
366+
/// - `str_len`: size of each string in the string view array
367+
/// - `rng` random number generator
368+
fn create_string_view_array_with_fixed_len_with_rng(
369+
size: usize,
370+
null_density: f32,
371+
str_len: usize,
372+
rng: &mut StdRng,
373+
) -> StringViewArray {
328374
(0..size)
329375
.map(|_| {
330376
if rng.random::<f32>() < null_density {

0 commit comments

Comments
 (0)