1#!/usr/local/bin/perl 2# * © 2016 and later: Unicode, Inc. and others. 3# * License & terms of use: http://www.unicode.org/copyright.html 4# ******************************************************************************* 5# * Copyright (C) 2002-2007 International Business Machines Corporation and * 6# * others. All Rights Reserved. * 7# ******************************************************************************* 8 9use strict; 10 11# Assume we are running within the icu4j root directory 12use lib 'src/com/ibm/icu/dev/test/perf'; 13use Dataset; 14 15#--------------------------------------------------------------------- 16# Test class 17my $TESTCLASS = 'com.ibm.icu.dev.test.perf.UCharacterPerf'; 18 19my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar'; 20 21# Methods to be tested. Each pair represents a test method and 22# a baseline method which is used for comparison. 23my @METHODS = (['JDKDigit', 'Digit'], 24 ['JDKGetNumericValue', 'GetNumericValue'], 25 ['JDKGetType', 'GetType'], 26 ['JDKIsDefined', 'IsDefined'], 27 ['JDKIsDigit', 'IsDigit'], 28 ['JDKIsIdentifierIgnorable', 'IsIdentifierIgnorable'], 29 ['JDKIsISOControl', 'IsISOControl'], 30 ['JDKIsLetter', 'IsLetter'], 31 ['JDKIsLetterOrDigit', 'IsLetterOrDigit'], 32 ['JDKIsLowerCase', 'IsLowerCase'], 33 ['JDKIsSpaceChar', 'IsSpaceChar'], 34 ['JDKIsTitleCase', 'IsTitleCase'], 35 ['JDKIsUnicodeIdentifierPart', 'IsUnicodeIdentifierPart'], 36 ['JDKIsUnicodeIdentifierStart', 'IsUnicodeIdentifierStart'], 37 ['JDKIsUpperCase', 'IsUpperCase'], 38 ['JDKIsWhiteSpace', 'IsWhiteSpace'], 39 ); 40 41# Patterns which define the set of characters used for testing. 42my @PATTERNS = ('0 ffff'); 43 44my $CALIBRATE = 2; # duration in seconds for initial calibration 45my $DURATION = 10; # duration in seconds for each pass 46my $NUMPASSES = 4; # number of passes. If > 1 then the first pass 47 # is discarded as a JIT warm-up pass. 48 49my $TABLEATTR = 'BORDER="1" CELLPADDING="4" CELLSPACING="0"'; 50 51my $PLUS_MINUS = "±"; 52 53if ($NUMPASSES < 3) { 54 die "Need at least 3 passes. One is discarded (JIT warmup) and need two to have 1 degree of freedom (t distribution)."; 55} 56 57my $OUT; # see out() 58 59main(); 60 61#--------------------------------------------------------------------- 62# ... 63sub main { 64 my $date = localtime; 65 my $title = "ICU4J Performance Test $date"; 66 67 my $html = $date; 68 $html =~ s/://g; # ':' illegal 69 $html =~ s/\s*\d+$//; # delete year 70 $html =~ s/^\w+\s*//; # delete dow 71 $html = "perf $html.html"; 72 73 open(HTML,">$html") or die "Can't write to $html: $!"; 74 75 print HTML <<EOF; 76<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 77 "http://www.w3.org/TR/html4/strict.dtd"> 78<HTML> 79 <HEAD> 80 <TITLE>$title</TITLE> 81 </HEAD> 82 <BODY> 83EOF 84 print HTML "<H1>$title</H1>\n"; 85 86 print HTML "<H2>$TESTCLASS</H2>\n"; 87 88 my $raw = ""; 89 90 for my $methodPair (@METHODS) { 91 92 my $testMethod = $methodPair->[0]; 93 my $baselineMethod = $methodPair->[1]; 94 95 print HTML "<P><TABLE $TABLEATTR><TR><TD>\n"; 96 print HTML "<P><B>$testMethod vs. $baselineMethod</B></P>\n"; 97 98 print HTML "<P><TABLE $TABLEATTR BGCOLOR=\"#CCFFFF\">\n"; 99 print HTML "<TR><TD>Pattern</TD><TD>$testMethod</TD>"; 100 print HTML "<TD>$baselineMethod</TD><TD>Ratio</TD></TR>\n"; 101 102 $OUT = ''; 103 104 for my $pat (@PATTERNS) { 105 print HTML "<TR><TD>$pat</TD>\n"; 106 107 out("<P><TABLE $TABLEATTR WIDTH=\"100%\">"); 108 109 # measure the test method 110 out("<TR><TD>"); 111 print "\n$testMethod $pat\n"; 112 my $t = measure2($testMethod, $pat, -$DURATION); 113 out("</TD></TR>"); 114 print HTML "<TD>", formatSeconds(4, $t->getMean(), $t->getError); 115 print HTML "/event</TD>\n"; 116 117 # measure baseline method 118 out("<TR><TD>"); 119 print "\nBegin $baselineMethod $pat\n"; 120 my $b = measure2($baselineMethod, $pat, -$DURATION); 121 out("</TD></TR>"); 122 print HTML "<TD>", formatSeconds(4, $b->getMean(), $t->getError); 123 print HTML "/event</TD>\n"; 124 125 out("</TABLE></P>"); 126 127 # output ratio 128 my $r = $t->divide($b); 129 my $mean = $r->getMean() - 1; 130 my $color = $mean < 0 ? "RED" : "BLACK"; 131 print HTML "<TD><B><FONT COLOR=\"$color\">", formatPercent(3, $mean, $r->getError); 132 print HTML "</FONT></B></TD></TR>\n"; 133 } 134 135 print HTML "</TABLE></P>\n"; 136 137 print HTML "<P>Raw data:</P>\n"; 138 print HTML $OUT; 139 print HTML "</TABLE></P>\n"; 140 } 141 142 print HTML <<EOF; 143 </BODY> 144</HTML> 145EOF 146 close(HTML) or die "Can't close $html: $!"; 147} 148 149#--------------------------------------------------------------------- 150# Append text to the global variable $OUT 151sub out { 152 $OUT .= join('', @_); 153} 154 155#--------------------------------------------------------------------- 156# Append text to the global variable $OUT 157sub outln { 158 $OUT .= join('', @_) . "\n"; 159} 160 161#--------------------------------------------------------------------- 162# Measure a given test method with a give test pattern using the 163# global run parameters. 164# 165# @param the method to run 166# @param the pattern defining characters to test 167# @param if >0 then the number of iterations per pass. If <0 then 168# (negative of) the number of seconds per pass. 169# 170# @return a Dataset object, scaled by iterations per pass and 171# events per iteration, to give time per event 172# 173sub measure2 { 174 my @data = measure1(@_); 175 my $iterPerPass = shift(@data); 176 my $eventPerIter = shift(@data); 177 178 shift(@data) if (@data > 1); # discard first run 179 180 my $ds = Dataset->new(@data); 181 $ds->setScale(1.0e-3 / ($iterPerPass * $eventPerIter)); 182 $ds; 183} 184 185#--------------------------------------------------------------------- 186# Measure a given test method with a give test pattern using the 187# global run parameters. 188# 189# @param the method to run 190# @param the pattern defining characters to test 191# @param if >0 then the number of iterations per pass. If <0 then 192# (negative of) the number of seconds per pass. 193# 194# @return array of: 195# [0] iterations per pass 196# [1] events per iteration 197# [2..] ms reported for each pass, in order 198# 199sub measure1 { 200 my $method = shift; 201 my $pat = shift; 202 my $iterCount = shift; # actually might be -seconds/pass 203 204 out("<P>Measuring $method using $pat, "); 205 if ($iterCount > 0) { 206 out("$iterCount iterations/pass, $NUMPASSES passes</P>\n"); 207 } else { 208 out(-$iterCount, " seconds/pass, $NUMPASSES passes</P>\n"); 209 } 210 211 # is $iterCount actually -seconds/pass? 212 if ($iterCount < 0) { 213 214 # calibrate: estimate ms/iteration 215 print "Calibrating..."; 216 my @t = callJava($method, $pat, -$CALIBRATE, 1); 217 print "done.\n"; 218 219 my @data = split(/\s+/, $t[0]->[2]); 220 $data[0] *= 1.0e+3; 221 222 my $timePerIter = 1.0e-3 * $data[0] / $data[1]; 223 224 # determine iterations/pass 225 $iterCount = int(-$iterCount / $timePerIter + 0.5); 226 227 out("<P>Calibration pass ($CALIBRATE sec): "); 228 out("$data[0] ms, "); 229 out("$data[1] iterations = "); 230 out(formatSeconds(4, $timePerIter), "/iteration<BR>\n"); 231 } 232 233 # run passes 234 print "Measuring $iterCount iterations x $NUMPASSES passes..."; 235 my @t = callJava($method, $pat, $iterCount, $NUMPASSES); 236 print "done.\n"; 237 my @ms = (); 238 my @b; # scratch 239 for my $a (@t) { 240 # $a->[0]: method name, corresponds to $method 241 # $a->[1]: 'begin' data, == $iterCount 242 # $a->[2]: 'end' data, of the form <ms> <loops> <eventsPerIter> 243 # $a->[3...]: gc messages from JVM during pass 244 @b = split(/\s+/, $a->[2]); 245 push(@ms, $b[0] * 1.0e+3); 246 } 247 my $eventsPerIter = $b[2]; 248 249 out("Iterations per pass: $iterCount<BR>\n"); 250 out("Events per iteration: $eventsPerIter<BR>\n"); 251 252 my @ms_str = @ms; 253 $ms_str[0] .= " (discarded)" if (@ms_str > 1); 254 out("Raw times (ms/pass): ", join(", ", @ms_str), "<BR>\n"); 255 256 ($iterCount, $eventsPerIter, @ms); 257} 258 259#--------------------------------------------------------------------- 260# Invoke java to run $TESTCLASS, passing it the given parameters. 261# 262# @param the method to run 263# @param the number of iterations, or if negative, the duration 264# in seconds. If more than on pass is desired, pass in 265# a string, e.g., "100 100 100". 266# @param the pattern defining characters to test, values in hex digits without 0x 267# 268# @return an array of results. Each result is an array REF 269# describing one pass. The array REF contains: 270# ->[0]: The method name as reported 271# ->[1]: The params on the '= <meth> begin ...' line 272# ->[2]: The params on the '= <meth> end ...' line 273# ->[3..]: GC messages from the JVM, if any 274# 275sub callJava { 276 my $method = shift; 277 my $pat = shift; 278 my $n = shift; 279 my $passes = shift; 280 281 my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n; 282 283 my $cmd = "java -cp $CLASSES $TESTCLASS $method $n -p $passes $pat"; 284 print "[$cmd]\n"; # for debugging 285 open(PIPE, "$cmd|") or die "Can't run \"$cmd\""; 286 my @out; 287 while (<PIPE>) { 288 push(@out, $_); 289 } 290 close(PIPE) or die "Java failed: \"$cmd\""; 291 292 @out = grep(!/^\#/, @out); # filter out comments 293 294 #print "[", join("\n", @out), "]\n"; 295 296 my @results; 297 my $method = ''; 298 my $data = []; 299 foreach (@out) { 300 next unless (/\S/); 301 302 if (/^=\s*(\w+)\s*(\w+)\s*(.*)/) { 303 my ($m, $state, $d) = ($1, $2, $3); 304 #print "$_ => [[$m $state $data]]\n"; 305 if ($state eq 'begin') { 306 die "$method was begun but not finished" if ($method); 307 $method = $m; 308 push(@$data, $d); 309 push(@$data, ''); # placeholder for end data 310 } elsif ($state eq 'end') { 311 if ($m ne $method) { 312 die "$method end does not match: $_"; 313 } 314 $data->[1] = $d; # insert end data at [1] 315 #print "#$method:", join(";",@$data), "\n"; 316 unshift(@$data, $method); # add method to start 317 318 push(@results, $data); 319 $method = ''; 320 $data = []; 321 } else { 322 die "Can't parse: $_"; 323 } 324 } 325 326 elsif (/^\[/) { 327 if ($method) { 328 push(@$data, $_); 329 } else { 330 # ignore extraneous GC notices 331 } 332 } 333 334 else { 335 die "Can't parse: $_"; 336 } 337 } 338 339 die "$method was begun but not finished" if ($method); 340 341 @results; 342} 343 344#|#--------------------------------------------------------------------- 345#|# Format a confidence interval, as given by a Dataset. Output is as 346#|# as follows: 347#|# 241.23 - 241.98 => 241.5 +/- 0.3 348#|# 241.2 - 243.8 => 242 +/- 1 349#|# 211.0 - 241.0 => 226 +/- 15 or? 230 +/- 20 350#|# 220.3 - 234.3 => 227 +/- 7 351#|# 220.3 - 300.3 => 260 +/- 40 352#|# 220.3 - 1000 => 610 +/- 390 or? 600 +/- 400 353#|# 0.022 - 0.024 => 0.023 +/- 0.001 354#|# 0.022 - 0.032 => 0.027 +/- 0.005 355#|# 0.022 - 1.000 => 0.5 +/- 0.5 356#|# In other words, take one significant digit of the error value and 357#|# display the mean to the same precision. 358#|sub formatDataset { 359#| my $ds = shift; 360#| my $lower = $ds->getMean() - $ds->getError(); 361#| my $upper = $ds->getMean() + $ds->getError(); 362#| my $scale = 0; 363#| # Find how many initial digits are the same 364#| while ($lower < 1 || 365#| int($lower) == int($upper)) { 366#| $lower *= 10; 367#| $upper *= 10; 368#| $scale++; 369#| } 370#| while ($lower >= 10 && 371#| int($lower) == int($upper)) { 372#| $lower /= 10; 373#| $upper /= 10; 374#| $scale--; 375#| } 376#|} 377 378#--------------------------------------------------------------------- 379# Format a number, optionally with a +/- delta, to n significant 380# digits. 381# 382# @param significant digit, a value >= 1 383# @param multiplier 384# @param time in seconds to be formatted 385# @optional delta in seconds 386# 387# @return string of the form "23" or "23 +/- 10". 388# 389sub formatNumber { 390 my $sigdig = shift; 391 my $mult = shift; 392 my $a = shift; 393 my $delta = shift; # may be undef 394 395 my $result = formatSigDig($sigdig, $a*$mult); 396 if (defined($delta)) { 397 my $d = formatSigDig($sigdig, $delta*$mult); 398 # restrict PRECISION of delta to that of main number 399 if ($result =~ /\.(\d+)/) { 400 # TODO make this work for values with all significant 401 # digits to the left of the decimal, e.g., 1234000. 402 403 # TODO the other thing wrong with this is that it 404 # isn't rounding the $delta properly. Have to put 405 # this logic into formatSigDig(). 406 my $x = length($1); 407 $d =~ s/\.(\d{$x})\d+/.$1/; 408 } 409 $result .= " $PLUS_MINUS " . $d; 410 } 411 $result; 412} 413 414#--------------------------------------------------------------------- 415# Format a time, optionally with a +/- delta, to n significant 416# digits. 417# 418# @param significant digit, a value >= 1 419# @param time in seconds to be formatted 420# @optional delta in seconds 421# 422# @return string of the form "23 ms" or "23 +/- 10 ms". 423# 424sub formatSeconds { 425 my $sigdig = shift; 426 my $a = shift; 427 my $delta = shift; # may be undef 428 429 my @MULT = (1 , 1e3, 1e6, 1e9); 430 my @SUFF = ('s' , 'ms', 'us', 'ns'); 431 432 # Determine our scale 433 my $i = 0; 434 ++$i while ($a*$MULT[$i] < 1 && $i < @MULT); 435 436 formatNumber($sigdig, $MULT[$i], $a, $delta) . ' ' . $SUFF[$i]; 437} 438 439#--------------------------------------------------------------------- 440# Format a percentage, optionally with a +/- delta, to n significant 441# digits. 442# 443# @param significant digit, a value >= 1 444# @param value to be formatted, as a fraction, e.g. 0.5 for 50% 445# @optional delta, as a fraction 446# 447# @return string of the form "23 %" or "23 +/- 10 %". 448# 449sub formatPercent { 450 my $sigdig = shift; 451 my $a = shift; 452 my $delta = shift; # may be undef 453 454 formatNumber($sigdig, 100, $a, $delta) . ' %'; 455} 456 457#--------------------------------------------------------------------- 458# Format a number to n significant digits without using exponential 459# notation. 460# 461# @param significant digit, a value >= 1 462# @param number to be formatted 463# 464# @return string of the form "1234" "12.34" or "0.001234". If 465# number was negative, prefixed by '-'. 466# 467sub formatSigDig { 468 my $n = shift() - 1; 469 my $a = shift; 470 471 local $_ = sprintf("%.${n}e", $a); 472 my $sign = (s/^-//) ? '-' : ''; 473 474 my $a_e; 475 my $result; 476 if (/^(\d)\.(\d+)e([-+]\d+)$/) { 477 my ($d, $dn, $e) = ($1, $2, $3); 478 $a_e = $e; 479 $d .= $dn; 480 $e++; 481 $d .= '0' while ($e > length($d)); 482 while ($e < 1) { 483 $e++; 484 $d = '0' . $d; 485 } 486 if ($e == length($d)) { 487 $result = $sign . $d; 488 } else { 489 $result = $sign . substr($d, 0, $e) . '.' . substr($d, $e); 490 } 491 } else { 492 die "Can't parse $_"; 493 } 494 $result; 495} 496 497#eof 498