@@ -133,6 +133,35 @@ where
133133 }
134134}
135135
136+ struct GenerateStringView {
137+ str_len : usize ,
138+ description : String ,
139+ _marker : std:: marker:: PhantomData < StringViewType > ,
140+ }
141+
142+ impl InputGenerator for GenerateStringView {
143+ fn name ( & self ) -> & str {
144+ self . description . as_str ( )
145+ }
146+ fn generate_scalar_with_null_value ( & self ) -> ArrayRef {
147+ new_null_array ( & DataType :: Utf8View , 1 )
148+ }
149+
150+ fn generate_non_null_scalars ( & self , seed : u64 , number_of_scalars : usize ) -> Vec < ArrayRef > {
151+ let array = self . generate_array ( seed, number_of_scalars, 0.0 ) ;
152+ ( 0 ..number_of_scalars) . map ( |i| array. slice ( i, 1 ) ) . collect ( )
153+ }
154+
155+ fn generate_array ( & self , seed : u64 , array_length : usize , null_percentage : f32 ) -> ArrayRef {
156+ Arc :: new ( create_string_view_array_with_fixed_len_with_seed (
157+ array_length,
158+ null_percentage,
159+ self . str_len ,
160+ seed,
161+ ) )
162+ }
163+ }
164+
136165fn mask_cases ( len : usize ) -> Vec < ( & ' static str , BooleanArray ) > {
137166 vec ! [
138167 ( "all_true" , create_boolean_array( len, 0.0 , 1.0 ) ) ,
@@ -145,9 +174,10 @@ fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> {
145174 ( "50pct_nulls" , create_boolean_array( len, 0.5 , 0.5 ) ) ,
146175 ]
147176}
148- const ARRAY_LEN : usize = 8192 ;
149177
150178fn bench_zip_on_input_generator ( c : & mut Criterion , input_generator : & impl InputGenerator ) {
179+ const ARRAY_LEN : usize = 8192 ;
180+
151181 let mut group =
152182 c. benchmark_group ( format ! ( "zip_{ARRAY_LEN}_from_{}" , input_generator. name( ) ) . as_str ( ) ) ;
153183
@@ -223,68 +253,6 @@ fn bench_zip_input_on_all_masks(
223253 }
224254}
225255
226- fn bench_zip_on_string_view_scalar ( c : & mut Criterion , input_generator : & GenerateStringView ) {
227- bench_zip_on_string_view_scalars ( c, input_generator, input_generator) ;
228- }
229-
230- fn bench_zip_on_string_view_scalars (
231- c : & mut Criterion ,
232- input_generator_1 : & GenerateStringView ,
233- input_generator_2 : & GenerateStringView ,
234- ) {
235- let mut group = c. benchmark_group (
236- format ! (
237- "zip_{ARRAY_LEN}_from_{} and {}" ,
238- input_generator_1. name( ) ,
239- input_generator_2. name( )
240- )
241- . as_str ( ) ,
242- ) ;
243-
244- let null_scalar = input_generator_1. generate_null_scalar ( ) ;
245-
246- let non_null_scalar_1 = input_generator_1. generate_scalar ( ) ;
247- let non_null_scalar_2 = input_generator_2. generate_scalar ( ) ;
248-
249- let masks = mask_cases ( ARRAY_LEN ) ;
250-
251- for ( description, truthy, falsy) in [
252- ( "null_vs_non_null_scalar" , & null_scalar, & non_null_scalar_1) ,
253- (
254- "non_null_scalar_vs_null_scalar" ,
255- & non_null_scalar_1,
256- & null_scalar,
257- ) ,
258- ( "non_nulls_scalars" , & non_null_scalar_1, & non_null_scalar_2) ,
259- ] {
260- bench_zip_input_on_all_masks ( description, & mut group, & masks, truthy, falsy) ;
261- }
262- group. finish ( ) ;
263- }
264-
265- struct GenerateStringView {
266- str_len : usize ,
267- description : String ,
268- _marker : std:: marker:: PhantomData < StringViewType > ,
269- }
270-
271- impl GenerateStringView {
272- fn name ( & self ) -> & str {
273- self . description . as_str ( )
274- }
275- fn generate_null_scalar ( & self ) -> Scalar < ArrayRef > {
276- Scalar :: new ( new_null_array ( & DataType :: Utf8View , 1 ) )
277- }
278-
279- fn generate_scalar ( & self ) -> Scalar < ArrayRef > {
280- Scalar :: new ( Arc :: new ( create_string_view_array_with_fixed_len (
281- 1 ,
282- 0.0 ,
283- self . str_len ,
284- ) ) )
285- }
286- }
287-
288256fn add_benchmark ( c : & mut Criterion ) {
289257 // Primitive
290258 bench_zip_on_input_generator (
@@ -335,7 +303,7 @@ fn add_benchmark(c: &mut Criterion) {
335303 } ,
336304 ) ;
337305
338- bench_zip_on_string_view_scalar (
306+ bench_zip_on_input_generator (
339307 c,
340308 & GenerateStringView {
341309 description : "string_views size 3" . to_string ( ) ,
@@ -344,59 +312,17 @@ fn add_benchmark(c: &mut Criterion) {
344312 } ,
345313 ) ;
346314
347- bench_zip_on_string_view_scalar (
348- c,
349- & GenerateStringView {
350- description : "string_views size 10" . to_string ( ) ,
351- str_len : 10 ,
352- _marker : std:: marker:: PhantomData ,
353- } ,
354- ) ;
355-
356- bench_zip_on_string_view_scalar (
357- c,
358- & GenerateStringView {
359- description : "string_views size 100" . to_string ( ) ,
360- str_len : 100 ,
361- _marker : std:: marker:: PhantomData ,
362- } ,
363- ) ;
364-
365- bench_zip_on_string_view_scalars (
315+ bench_zip_on_input_generator (
366316 c,
367- & GenerateStringView {
368- description : "string_views size 3" . to_string ( ) ,
369- str_len : 3 ,
370- _marker : std:: marker:: PhantomData ,
371- } ,
372317 & GenerateStringView {
373318 description : "string_views size 10" . to_string ( ) ,
374319 str_len : 10 ,
375320 _marker : std:: marker:: PhantomData ,
376321 } ,
377322 ) ;
378323
379- bench_zip_on_string_view_scalars (
380- c,
381- & GenerateStringView {
382- description : "string_views size 3" . to_string ( ) ,
383- str_len : 3 ,
384- _marker : std:: marker:: PhantomData ,
385- } ,
386- & GenerateStringView {
387- description : "string_views size 100" . to_string ( ) ,
388- str_len : 100 ,
389- _marker : std:: marker:: PhantomData ,
390- } ,
391- ) ;
392-
393- bench_zip_on_string_view_scalars (
324+ bench_zip_on_input_generator (
394325 c,
395- & GenerateStringView {
396- description : "string_views size 10" . to_string ( ) ,
397- str_len : 10 ,
398- _marker : std:: marker:: PhantomData ,
399- } ,
400326 & GenerateStringView {
401327 description : "string_views size 100" . to_string ( ) ,
402328 str_len : 100 ,
0 commit comments