xref: /aosp_15_r20/external/icu/icu4j/perf-tests/ucharacterperf.pl (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1#!/usr/local/bin/perl
2# * © 2016 and later: Unicode, Inc. and others.
3# * License & terms of use: http://www.unicode.org/copyright.html
4# *******************************************************************************
5# * Copyright (C) 2002-2007 International Business Machines Corporation and     *
6# * others. All Rights Reserved.                                                *
7# *******************************************************************************
8
9use strict;
10
11# Assume we are running within the icu4j root directory
12use lib 'src/com/ibm/icu/dev/test/perf';
13use Dataset;
14
15#---------------------------------------------------------------------
16# Test class
17my $TESTCLASS = 'com.ibm.icu.dev.test.perf.UCharacterPerf';
18
19my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar';
20
21# Methods to be tested.  Each pair represents a test method and
22# a baseline method which is used for comparison.
23my @METHODS  = (['JDKDigit',                    'Digit'],
24                ['JDKGetNumericValue',          'GetNumericValue'],
25                ['JDKGetType',                  'GetType'],
26                ['JDKIsDefined',                'IsDefined'],
27                ['JDKIsDigit',                  'IsDigit'],
28                ['JDKIsIdentifierIgnorable',    'IsIdentifierIgnorable'],
29                ['JDKIsISOControl',             'IsISOControl'],
30                ['JDKIsLetter',                 'IsLetter'],
31                ['JDKIsLetterOrDigit',          'IsLetterOrDigit'],
32                ['JDKIsLowerCase',              'IsLowerCase'],
33                ['JDKIsSpaceChar',              'IsSpaceChar'],
34                ['JDKIsTitleCase',              'IsTitleCase'],
35                ['JDKIsUnicodeIdentifierPart',  'IsUnicodeIdentifierPart'],
36                ['JDKIsUnicodeIdentifierStart', 'IsUnicodeIdentifierStart'],
37                ['JDKIsUpperCase',              'IsUpperCase'],
38                ['JDKIsWhiteSpace',             'IsWhiteSpace'],
39               );
40
41# Patterns which define the set of characters used for testing.
42my @PATTERNS = ('0 ffff');
43
44my $CALIBRATE = 2;  # duration in seconds for initial calibration
45my $DURATION  = 10; # duration in seconds for each pass
46my $NUMPASSES = 4;  # number of passes.  If > 1 then the first pass
47                    # is discarded as a JIT warm-up pass.
48
49my $TABLEATTR = 'BORDER="1" CELLPADDING="4" CELLSPACING="0"';
50
51my $PLUS_MINUS = "±";
52
53if ($NUMPASSES < 3) {
54    die "Need at least 3 passes.  One is discarded (JIT warmup) and need two to have 1 degree of freedom (t distribution).";
55}
56
57my $OUT; # see out()
58
59main();
60
61#---------------------------------------------------------------------
62# ...
63sub main {
64    my $date = localtime;
65    my $title = "ICU4J Performance Test $date";
66
67    my $html = $date;
68    $html =~ s/://g; # ':' illegal
69    $html =~ s/\s*\d+$//; # delete year
70    $html =~ s/^\w+\s*//; # delete dow
71    $html = "perf $html.html";
72
73    open(HTML,">$html") or die "Can't write to $html: $!";
74
75    print HTML <<EOF;
76<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
77   "http://www.w3.org/TR/html4/strict.dtd">
78<HTML>
79   <HEAD>
80      <TITLE>$title</TITLE>
81   </HEAD>
82   <BODY>
83EOF
84    print HTML "<H1>$title</H1>\n";
85
86    print HTML "<H2>$TESTCLASS</H2>\n";
87
88    my $raw = "";
89
90    for my $methodPair (@METHODS) {
91
92        my $testMethod = $methodPair->[0];
93        my $baselineMethod = $methodPair->[1];
94
95        print HTML "<P><TABLE $TABLEATTR><TR><TD>\n";
96        print HTML "<P><B>$testMethod vs. $baselineMethod</B></P>\n";
97
98        print HTML "<P><TABLE $TABLEATTR BGCOLOR=\"#CCFFFF\">\n";
99        print HTML "<TR><TD>Pattern</TD><TD>$testMethod</TD>";
100        print HTML "<TD>$baselineMethod</TD><TD>Ratio</TD></TR>\n";
101
102        $OUT = '';
103
104        for my $pat (@PATTERNS) {
105            print HTML "<TR><TD>$pat</TD>\n";
106
107            out("<P><TABLE $TABLEATTR WIDTH=\"100%\">");
108
109            # measure the test method
110            out("<TR><TD>");
111            print "\n$testMethod $pat\n";
112            my $t = measure2($testMethod, $pat, -$DURATION);
113            out("</TD></TR>");
114            print HTML "<TD>", formatSeconds(4, $t->getMean(), $t->getError);
115            print HTML "/event</TD>\n";
116
117            # measure baseline method
118            out("<TR><TD>");
119            print "\nBegin $baselineMethod $pat\n";
120            my $b = measure2($baselineMethod, $pat, -$DURATION);
121            out("</TD></TR>");
122            print HTML "<TD>", formatSeconds(4, $b->getMean(), $t->getError);
123            print HTML "/event</TD>\n";
124
125            out("</TABLE></P>");
126
127            # output ratio
128            my $r = $t->divide($b);
129            my $mean = $r->getMean() - 1;
130            my $color = $mean < 0 ? "RED" : "BLACK";
131            print HTML "<TD><B><FONT COLOR=\"$color\">", formatPercent(3, $mean, $r->getError);
132            print HTML "</FONT></B></TD></TR>\n";
133        }
134
135        print HTML "</TABLE></P>\n";
136
137        print HTML "<P>Raw data:</P>\n";
138        print HTML $OUT;
139        print HTML "</TABLE></P>\n";
140    }
141
142    print HTML <<EOF;
143   </BODY>
144</HTML>
145EOF
146    close(HTML) or die "Can't close $html: $!";
147}
148
149#---------------------------------------------------------------------
150# Append text to the global variable $OUT
151sub out {
152    $OUT .= join('', @_);
153}
154
155#---------------------------------------------------------------------
156# Append text to the global variable $OUT
157sub outln {
158    $OUT .= join('', @_) . "\n";
159}
160
161#---------------------------------------------------------------------
162# Measure a given test method with a give test pattern using the
163# global run parameters.
164#
165# @param the method to run
166# @param the pattern defining characters to test
167# @param if >0 then the number of iterations per pass.  If <0 then
168#        (negative of) the number of seconds per pass.
169#
170# @return a Dataset object, scaled by iterations per pass and
171#         events per iteration, to give time per event
172#
173sub measure2 {
174    my @data = measure1(@_);
175    my $iterPerPass = shift(@data);
176    my $eventPerIter = shift(@data);
177
178    shift(@data) if (@data > 1); # discard first run
179
180    my $ds = Dataset->new(@data);
181    $ds->setScale(1.0e-3 / ($iterPerPass * $eventPerIter));
182    $ds;
183}
184
185#---------------------------------------------------------------------
186# Measure a given test method with a give test pattern using the
187# global run parameters.
188#
189# @param the method to run
190# @param the pattern defining characters to test
191# @param if >0 then the number of iterations per pass.  If <0 then
192#        (negative of) the number of seconds per pass.
193#
194# @return array of:
195#         [0] iterations per pass
196#         [1] events per iteration
197#         [2..] ms reported for each pass, in order
198#
199sub measure1 {
200    my $method = shift;
201    my $pat = shift;
202    my $iterCount = shift; # actually might be -seconds/pass
203
204    out("<P>Measuring $method using $pat, ");
205    if ($iterCount > 0) {
206        out("$iterCount iterations/pass, $NUMPASSES passes</P>\n");
207    } else {
208        out(-$iterCount, " seconds/pass, $NUMPASSES passes</P>\n");
209    }
210
211    # is $iterCount actually -seconds/pass?
212    if ($iterCount < 0) {
213
214        # calibrate: estimate ms/iteration
215        print "Calibrating...";
216        my @t = callJava($method, $pat, -$CALIBRATE, 1);
217        print "done.\n";
218
219        my @data = split(/\s+/, $t[0]->[2]);
220        $data[0] *= 1.0e+3;
221
222        my $timePerIter = 1.0e-3 * $data[0] / $data[1];
223
224        # determine iterations/pass
225        $iterCount = int(-$iterCount / $timePerIter + 0.5);
226
227        out("<P>Calibration pass ($CALIBRATE sec): ");
228        out("$data[0] ms, ");
229        out("$data[1] iterations = ");
230        out(formatSeconds(4, $timePerIter), "/iteration<BR>\n");
231    }
232
233    # run passes
234    print "Measuring $iterCount iterations x $NUMPASSES passes...";
235    my @t = callJava($method, $pat, $iterCount, $NUMPASSES);
236    print "done.\n";
237    my @ms = ();
238    my @b; # scratch
239    for my $a (@t) {
240        # $a->[0]: method name, corresponds to $method
241        # $a->[1]: 'begin' data, == $iterCount
242        # $a->[2]: 'end' data, of the form <ms> <loops> <eventsPerIter>
243        # $a->[3...]: gc messages from JVM during pass
244        @b = split(/\s+/, $a->[2]);
245        push(@ms, $b[0] * 1.0e+3);
246    }
247    my $eventsPerIter = $b[2];
248
249    out("Iterations per pass: $iterCount<BR>\n");
250    out("Events per iteration: $eventsPerIter<BR>\n");
251
252    my @ms_str = @ms;
253    $ms_str[0] .= " (discarded)" if (@ms_str > 1);
254    out("Raw times (ms/pass): ", join(", ", @ms_str), "<BR>\n");
255
256    ($iterCount, $eventsPerIter, @ms);
257}
258
259#---------------------------------------------------------------------
260# Invoke java to run $TESTCLASS, passing it the given parameters.
261#
262# @param the method to run
263# @param the number of iterations, or if negative, the duration
264#        in seconds.  If more than on pass is desired, pass in
265#        a string, e.g., "100 100 100".
266# @param the pattern defining characters to test, values in hex digits without 0x
267#
268# @return an array of results.  Each result is an array REF
269#         describing one pass.  The array REF contains:
270#         ->[0]: The method name as reported
271#         ->[1]: The params on the '= <meth> begin ...' line
272#         ->[2]: The params on the '= <meth> end ...' line
273#         ->[3..]: GC messages from the JVM, if any
274#
275sub callJava {
276    my $method = shift;
277    my $pat = shift;
278    my $n = shift;
279    my $passes = shift;
280
281    my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
282
283    my $cmd = "java -cp $CLASSES $TESTCLASS $method $n -p $passes $pat";
284    print "[$cmd]\n"; # for debugging
285    open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
286    my @out;
287    while (<PIPE>) {
288        push(@out, $_);
289    }
290    close(PIPE) or die "Java failed: \"$cmd\"";
291
292    @out = grep(!/^\#/, @out);  # filter out comments
293
294    #print "[", join("\n", @out), "]\n";
295
296    my @results;
297    my $method = '';
298    my $data = [];
299    foreach (@out) {
300        next unless (/\S/);
301
302        if (/^=\s*(\w+)\s*(\w+)\s*(.*)/) {
303            my ($m, $state, $d) = ($1, $2, $3);
304            #print "$_ => [[$m $state $data]]\n";
305            if ($state eq 'begin') {
306                die "$method was begun but not finished" if ($method);
307                $method = $m;
308                push(@$data, $d);
309                push(@$data, ''); # placeholder for end data
310            } elsif ($state eq 'end') {
311                if ($m ne $method) {
312                    die "$method end does not match: $_";
313                }
314                $data->[1] = $d; # insert end data at [1]
315                #print "#$method:", join(";",@$data), "\n";
316                unshift(@$data, $method); # add method to start
317
318                push(@results, $data);
319                $method = '';
320                $data = [];
321            } else {
322                die "Can't parse: $_";
323            }
324        }
325
326        elsif (/^\[/) {
327            if ($method) {
328                push(@$data, $_);
329            } else {
330                # ignore extraneous GC notices
331            }
332        }
333
334        else {
335            die "Can't parse: $_";
336        }
337    }
338
339    die "$method was begun but not finished" if ($method);
340
341    @results;
342}
343
344#|#---------------------------------------------------------------------
345#|# Format a confidence interval, as given by a Dataset.  Output is as
346#|# as follows:
347#|#   241.23 - 241.98 => 241.5 +/- 0.3
348#|#   241.2 - 243.8 => 242 +/- 1
349#|#   211.0 - 241.0 => 226 +/- 15 or? 230 +/- 20
350#|#   220.3 - 234.3 => 227 +/- 7
351#|#   220.3 - 300.3 => 260 +/- 40
352#|#   220.3 - 1000 => 610 +/- 390 or? 600 +/- 400
353#|#   0.022 - 0.024 => 0.023 +/- 0.001
354#|#   0.022 - 0.032 => 0.027 +/- 0.005
355#|#   0.022 - 1.000 => 0.5 +/- 0.5
356#|# In other words, take one significant digit of the error value and
357#|# display the mean to the same precision.
358#|sub formatDataset {
359#|    my $ds = shift;
360#|    my $lower = $ds->getMean() - $ds->getError();
361#|    my $upper = $ds->getMean() + $ds->getError();
362#|    my $scale = 0;
363#|    # Find how many initial digits are the same
364#|    while ($lower < 1 ||
365#|           int($lower) == int($upper)) {
366#|        $lower *= 10;
367#|        $upper *= 10;
368#|        $scale++;
369#|    }
370#|    while ($lower >= 10 &&
371#|           int($lower) == int($upper)) {
372#|        $lower /= 10;
373#|        $upper /= 10;
374#|        $scale--;
375#|    }
376#|}
377
378#---------------------------------------------------------------------
379# Format a number, optionally with a +/- delta, to n significant
380# digits.
381#
382# @param significant digit, a value >= 1
383# @param multiplier
384# @param time in seconds to be formatted
385# @optional delta in seconds
386#
387# @return string of the form "23" or "23 +/- 10".
388#
389sub formatNumber {
390    my $sigdig = shift;
391    my $mult = shift;
392    my $a = shift;
393    my $delta = shift; # may be undef
394
395    my $result = formatSigDig($sigdig, $a*$mult);
396    if (defined($delta)) {
397        my $d = formatSigDig($sigdig, $delta*$mult);
398        # restrict PRECISION of delta to that of main number
399        if ($result =~ /\.(\d+)/) {
400            # TODO make this work for values with all significant
401            # digits to the left of the decimal, e.g., 1234000.
402
403            # TODO the other thing wrong with this is that it
404            # isn't rounding the $delta properly.  Have to put
405            # this logic into formatSigDig().
406            my $x = length($1);
407            $d =~ s/\.(\d{$x})\d+/.$1/;
408        }
409        $result .= " $PLUS_MINUS " . $d;
410    }
411    $result;
412}
413
414#---------------------------------------------------------------------
415# Format a time, optionally with a +/- delta, to n significant
416# digits.
417#
418# @param significant digit, a value >= 1
419# @param time in seconds to be formatted
420# @optional delta in seconds
421#
422# @return string of the form "23 ms" or "23 +/- 10 ms".
423#
424sub formatSeconds {
425    my $sigdig = shift;
426    my $a = shift;
427    my $delta = shift; # may be undef
428
429    my @MULT = (1   , 1e3,  1e6,  1e9);
430    my @SUFF = ('s' , 'ms', 'us', 'ns');
431
432    # Determine our scale
433    my $i = 0;
434    ++$i while ($a*$MULT[$i] < 1 && $i < @MULT);
435
436    formatNumber($sigdig, $MULT[$i], $a, $delta) . ' ' . $SUFF[$i];
437}
438
439#---------------------------------------------------------------------
440# Format a percentage, optionally with a +/- delta, to n significant
441# digits.
442#
443# @param significant digit, a value >= 1
444# @param value to be formatted, as a fraction, e.g. 0.5 for 50%
445# @optional delta, as a fraction
446#
447# @return string of the form "23 %" or "23 +/- 10 %".
448#
449sub formatPercent {
450    my $sigdig = shift;
451    my $a = shift;
452    my $delta = shift; # may be undef
453
454    formatNumber($sigdig, 100, $a, $delta) . ' %';
455}
456
457#---------------------------------------------------------------------
458# Format a number to n significant digits without using exponential
459# notation.
460#
461# @param significant digit, a value >= 1
462# @param number to be formatted
463#
464# @return string of the form "1234" "12.34" or "0.001234".  If
465#         number was negative, prefixed by '-'.
466#
467sub formatSigDig {
468    my $n = shift() - 1;
469    my $a = shift;
470
471    local $_ = sprintf("%.${n}e", $a);
472    my $sign = (s/^-//) ? '-' : '';
473
474    my $a_e;
475    my $result;
476    if (/^(\d)\.(\d+)e([-+]\d+)$/) {
477        my ($d, $dn, $e) = ($1, $2, $3);
478        $a_e = $e;
479        $d .= $dn;
480        $e++;
481        $d .= '0' while ($e > length($d));
482        while ($e < 1) {
483            $e++;
484            $d = '0' . $d;
485        }
486        if ($e == length($d)) {
487            $result = $sign . $d;
488        } else {
489            $result = $sign . substr($d, 0, $e) . '.' . substr($d, $e);
490        }
491    } else {
492        die "Can't parse $_";
493    }
494    $result;
495}
496
497#eof
498