-
Notifications
You must be signed in to change notification settings - Fork 0
/
ListUtil_OMP.pl
351 lines (280 loc) · 9.44 KB
/
ListUtil_OMP.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
#!/home/chrisarg/perl5/perlbrew/perls/current/bin/perl
use v5.38;
=head1 NAME
ListUtil_OMP.pl - Benchmark List::Util and list reduction operations with OMP
=head1 VERSION
Version 0.01 - created for London Perl & Raku Workshop 2024
=head1 USAGE
perl ListUtil_OMP.pl-m 1 -s .3 -n 30 -r 100 -t 8 -e guided,1 -o ListUtil_OMP.csv
Simulates a large number of rounded doubles and times a number of reduction
operations (sum, product), using for loops in base Perl, List::Util, and
OpenMP parallelized operations using Inline.
=head1 OPTIONS
Numerous options can control the scenarios to be tested (and none of them are
required, since sensible defaults are provided):
=over 4
=item B<-n> I<number of elements>
Number of elements of the numeric array (the "size" of the problem)
=item B<-m> I<mean>
Mean of the lognormal distribution used to draw random numbers from
=item B<-s> I<standard deviation>
Standard deviation of the lognormal distribution used to draw random numbers from
=item B<-r> I<number of repetitions>
Number of repetitions for the benchmarking
=item B<-t> I<number of threads>
Maximum number of threads to use for the OpenMP operations (default is 2); the program
will test all scenarios from 1 to C<$c> threads.
=item B<-e> I<OpenMP schedule>
OpenMP schedule to use for the parallelized operations (default is guided,1). The
schedule is a comma-separated string with the first element being the schedule kind
(static, dynamic, guided, auto) and the second element being the chunk size. This
will be set up through the OMP_SCHEDULE environment variable at runtime.
=item B<-o> I<output file>
Output file for the benchmarking results (default is ListUtil_OMP.csv)
=back
=head1 DEPENDENCIES
The script depends on the following modules:
=over 4
=item Benchmark::CSV
=item File::Copy
=item Getopt::Long
=item Inline::C
=item List::Util
=item Math::GSL::RNG
=item Math::GSL::Randist
=item OpenMP::Environment
=item PDL::Lite
=item PDL::IO::CSV
=item PDL::Stats::Basic
=back
=head1 TODO
Expand the script to include more operations (e.g. min and max) and more scenarios
including different types of data (e.g. text, integers, floating point numbers),
and OMP tasks.
=head1 AUTHOR
Christos Argyropoulos, C<< <chrisarg at cpan.org> >>
=cut
###############################################################################
## dependencies
use Benchmark::CSV; # for proper statistical benchmarking
use File::Copy; # for copying files
use Getopt::Long; # for parsing command line options
use List::Util qw(min max sum product)
; # for finding the minimum and maximum values
use Math::GSL::RNG;
use Math::GSL::Randist qw/:all/; # for generating random numbers
use OpenMP::Environment; # for controlling the OpenMP environment
use PDL::Lite;
use PDL::IO::CSV ':all';
use PDL::Stats::Basic;
use Inline (
C => 'DATA',
ccflagsex => q{-fopenmp},
lddlflags => join( q{ }, $Config::Config{lddlflags}, q{-fopenmp} ),
myextlib => ''
);
###############################################################################
## process command line options : number of elements, mean and standard deviation
## We will generate text length and integer values with a lognormal distribution
my $n = 1_000; # number of elements
my $m = 1; # mean
my $s = 1; # standard deviation
my $r = 20; # number of repetitions
my $t = 2; # maximum number of threads to test for scalability
my $e = 'guided,1'; # OpenMP schedule
my $o = 'ListUtil_OMP.csv'; # output file
GetOptions(
'n=i' => \$n,
'm=f' => \$m,
's=f' => \$s,
'r=i' => \$r,
't=i' => \$t,
'e=s' => \$e,
'o=s' => \$o,
);
my $env = OpenMP::Environment->new(); ## initialize the OpenMP environment
## simulate the text and numeric arrays
my $rng = Math::GSL::RNG->new();
my @random_numbers = (undef) x $n;
for my $i ( 0 .. $n - 1 ) {
$random_numbers[$i] = int( gsl_ran_lognormal( $rng->raw(), $m, $s ) );
}
## to avoid overflow issues, invert every other element in @random_numbers
## and zeros to ones
for my $i ( 0 .. $#random_numbers ) {
$random_numbers[$i] = 1 if $random_numbers[$i] == 0;
$random_numbers[$i] = 1.0/$random_numbers[$i] if $i % 2;
}
## initialize and set the benchmarks
my $benchmark = Benchmark::CSV->new(
output => $o,
sample_size => 1,
);
$benchmark->add_instance( 'ListUtil_sum_num_1' => sub { sum(@random_numbers) },
);
$benchmark->add_instance(
'ForLoop_sum_num_1' => sub {
my $sum = 0.0;
foreach my $i (@random_numbers) {
$sum += $i;
}
},
);
$benchmark->add_instance( 'StC_sum_num_1' => sub { sum_with_C(\@random_numbers) },
);
for my $num_of_threads ( 1 .. $t ) {
my $bench_name = sprintf "OMP_sum_num_%02d", $num_of_threads;
$benchmark->add_instance(
$bench_name => sub {
$env->omp_num_threads($num_of_threads);
$env->omp_schedule($e);
set_openmp_schedule_from_env();
set_openmp_num_threads_from_env();
sum_with_OMP( \@random_numbers );
},
);
} ## add the benchmarks for the OpenMP parallelized sum
$benchmark->add_instance(
'ListUtil_prod_num_1' => sub { product(@random_numbers) }, );
$benchmark->add_instance(
'ForLoop_prod_num_1' => sub {
my $prod = 1.0;
foreach my $i (@random_numbers) {
$prod *= $i;
}
},
);
$benchmark->add_instance( 'StC_prod_num_1' => sub { prod_with_C(\@random_numbers) },
);
for my $num_of_threads ( 1 .. $t ) {
my $bench_name = sprintf "OMP_prod_num_%02d", $num_of_threads;
$benchmark->add_instance(
$bench_name => sub {
$env->omp_num_threads($num_of_threads);
$env->omp_schedule($e);
set_openmp_schedule_from_env();
set_openmp_num_threads_from_env();
prod_with_OMP( \@random_numbers );
},
);
} ## add the benchmarks for the OpenMP parallelized sum
$benchmark->run_iterations($r);
# Load the CSV file
my @data = rcsv1D( $o, { text2bad => 1, header => 1 } );
my %summary_stats = ();
foreach my $col ( 0 .. $#data ) {
my $pdl = pdl( $data[$col] );
my $mean = $pdl->average;
my $stddev = $pdl->stdv_unbiased;
my $median = $pdl->median;
$summary_stats{ $data[$col]->hdr->{col_name} } =
{ mean => $mean, stddev => $stddev, median => $median };
}
# Get the column names from the first row
my @column_names = sort keys %{ $summary_stats{ ( keys %summary_stats )[0] } };
# Define the width for each column
my $width_name = 24;
my $width_col = 10;
# Print the column names
printf "%-${width_name}s", '';
printf "%${width_col}s", $_ for @column_names;
print "\n";
# Print each row
foreach my $row_name ( sort keys %summary_stats ) {
printf "%-${width_name}s", $row_name;
printf "%${width_col}.1e", $summary_stats{$row_name}{$_} for @column_names;
print "\n";
}
## copy the $o to a file in which the size of the problem is included prior to the
## extension .csv
my $o_new = $o;
$o_new =~ s/\.csv/_$n.csv/;
copy( $o, $o_new );
unlink $o;
__DATA__
__C__
#include <omp.h>
#include <stdlib.h>
#include <string.h>
void set_openmp_schedule_from_env();
void set_openmp_num_threads_from_env();
SV* sum_with_OMP(AV *array);
SV* prod_with_OMP(AV *array);
SV* sum_with_C(AV *array);
SV* prod_with_C(AV *array);
void set_openmp_schedule_from_env() {
char *schedule_env = getenv("OMP_SCHEDULE");
if (schedule_env != NULL) {
char *kind_str = strtok(schedule_env, ",");
char *chunk_size_str = strtok(NULL, ",");
omp_sched_t kind;
if (strcmp(kind_str, "static") == 0) {
kind = omp_sched_static;
} else if (strcmp(kind_str, "dynamic") == 0) {
kind = omp_sched_dynamic;
} else if (strcmp(kind_str, "guided") == 0) {
kind = omp_sched_guided;
} else {
kind = omp_sched_auto;
}
int chunk_size = atoi(chunk_size_str);
omp_set_schedule(kind, chunk_size);
}
}
void set_openmp_num_threads_from_env() {
char *num;
num = getenv("OMP_NUM_THREADS");
omp_set_num_threads(atoi(num));
}
SV* sum_with_OMP(AV *array) {
int len = av_len(array) + 1;
double retval = 0.0;
#pragma omp parallel
{
#pragma omp for schedule(runtime) reduction(+:retval) nowait
for (int i = 0; i < len; i++) {
SV **elem = av_fetch_simple(array, i, 0); // perl 5.36 and above
if (elem != NULL) {
retval += (double) SvNV(*elem);
}
}
}
return newSVnv(retval);
}
SV* sum_with_C(AV *array) {
int len = av_len(array) + 1;
double retval = 0.0;
for (int i = 0; i < len; i++) {
SV **elem = av_fetch_simple(array, i, 0); // perl 5.36 and above
if (elem != NULL) {
retval += (double) SvNV(*elem);
}
}
return newSVnv(retval);
}
SV* prod_with_OMP(AV *array) {
int len = av_len(array) + 1;
double retval = 1.0;
#pragma omp parallel
{
#pragma omp for schedule(runtime) reduction(*:retval) nowait
for (int i = 0; i < len; i++) {
SV **elem = av_fetch_simple(array, i, 0); // perl 5.36 and above
if (elem != NULL) {
retval *= (double) SvNV(*elem);
}
}
}
return newSVnv(retval);
}
SV* prod_with_C(AV *array) {
int len = av_len(array) + 1;
double retval = 1.0;
for (int i = 0; i < len; i++) {
SV **elem = av_fetch_simple(array, i, 0); // perl 5.36 and above
if (elem != NULL) {
retval *= (double) SvNV(*elem);
}
}
return newSVnv(retval);
}