1 //===-- Performance test for nearest integer functions --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "src/__support/FPUtil/FPBits.h"
10 #include "src/math/ceilf.h"
11 #include "src/math/ceilf16.h"
12 #include "src/math/floorf.h"
13 #include "src/math/floorf16.h"
14 #include "src/math/rintf.h"
15 #include "src/math/rintf16.h"
16 #include "src/math/roundevenf.h"
17 #include "src/math/roundevenf16.h"
18 #include "src/math/roundf.h"
19 #include "src/math/roundf16.h"
20 #include "src/math/truncf.h"
21 #include "src/math/truncf16.h"
22 #include "test/UnitTest/RoundingModeUtils.h"
23 #include "test/src/math/performance_testing/Timer.h"
24
25 #include <fstream>
26 #include <math.h>
27
28 using LIBC_NAMESPACE::fputil::testing::ForceRoundingMode;
29 using LIBC_NAMESPACE::fputil::testing::RoundingMode;
30
31 namespace LIBC_NAMESPACE::testing {
32
33 template <typename T> class NearestIntegerPerf {
34 using FPBits = fputil::FPBits<T>;
35 using StorageType = typename FPBits::StorageType;
36
37 public:
38 typedef T Func(T);
39
run_perf_in_range(Func my_func,Func other_func,StorageType starting_bit,StorageType ending_bit,StorageType step,size_t rounds,std::ofstream & log)40 static void run_perf_in_range(Func my_func, Func other_func,
41 StorageType starting_bit,
42 StorageType ending_bit, StorageType step,
43 size_t rounds, std::ofstream &log) {
44 auto runner = [=](Func func) {
45 [[maybe_unused]] volatile T result;
46 for (size_t i = 0; i < rounds; i++) {
47 for (StorageType bits = starting_bit; bits <= ending_bit;
48 bits += step) {
49 T x = FPBits(bits).get_val();
50 result = func(x);
51 }
52 }
53 };
54
55 Timer timer;
56 timer.start();
57 runner(my_func);
58 timer.stop();
59
60 size_t number_of_runs = (ending_bit - starting_bit) / step + 1;
61 double my_average =
62 static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds;
63 log << "-- My function --\n";
64 log << " Total time : " << timer.nanoseconds() << " ns \n";
65 log << " Average runtime : " << my_average << " ns/op \n";
66 log << " Ops per second : "
67 << static_cast<uint64_t>(1'000'000'000.0 / my_average) << " op/s \n";
68
69 timer.start();
70 runner(other_func);
71 timer.stop();
72
73 double other_average =
74 static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds;
75 log << "-- Other function --\n";
76 log << " Total time : " << timer.nanoseconds() << " ns \n";
77 log << " Average runtime : " << other_average << " ns/op \n";
78 log << " Ops per second : "
79 << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n";
80
81 log << "-- Average runtime ratio --\n";
82 log << " Mine / Other's : " << my_average / other_average << " \n";
83 }
84
run_perf(Func my_func,Func other_func,size_t rounds,const char * log_file)85 static void run_perf(Func my_func, Func other_func, size_t rounds,
86 const char *log_file) {
87 std::ofstream log(log_file);
88 log << "Performance tests with inputs in normal integral range:\n";
89 run_perf_in_range(
90 my_func, other_func,
91 /*starting_bit=*/StorageType((FPBits::EXP_BIAS + 1) << FPBits::SIG_LEN),
92 /*ending_bit=*/
93 StorageType((FPBits::EXP_BIAS + FPBits::FRACTION_LEN - 1)
94 << FPBits::SIG_LEN),
95 /*step=*/StorageType(1 << FPBits::SIG_LEN),
96 rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
97 log << "\n Performance tests with inputs in low integral range:\n";
98 run_perf_in_range(
99 my_func, other_func,
100 /*starting_bit=*/StorageType(1 << FPBits::SIG_LEN),
101 /*ending_bit=*/StorageType((FPBits::EXP_BIAS - 1) << FPBits::SIG_LEN),
102 /*step_bit=*/StorageType(1 << FPBits::SIG_LEN),
103 rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
104 log << "\n Performance tests with inputs in high integral range:\n";
105 run_perf_in_range(
106 my_func, other_func,
107 /*starting_bit=*/
108 StorageType((FPBits::EXP_BIAS + FPBits::FRACTION_LEN)
109 << FPBits::SIG_LEN),
110 /*ending_bit=*/
111 StorageType(FPBits::MAX_BIASED_EXPONENT << FPBits::SIG_LEN),
112 /*step=*/StorageType(1 << FPBits::SIG_LEN),
113 rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
114 log << "\n Performance tests with inputs in normal fractional range:\n";
115 run_perf_in_range(
116 my_func, other_func,
117 /*starting_bit=*/
118 StorageType(((FPBits::EXP_BIAS + 1) << FPBits::SIG_LEN) + 1),
119 /*ending_bit=*/
120 StorageType(((FPBits::EXP_BIAS + 2) << FPBits::SIG_LEN) - 1),
121 /*step=*/StorageType(1), rounds * 2, log);
122 log << "\n Performance tests with inputs in subnormal fractional range:\n";
123 run_perf_in_range(my_func, other_func, /*starting_bit=*/StorageType(1),
124 /*ending_bit=*/StorageType(FPBits::SIG_MASK),
125 /*step=*/StorageType(1), rounds, log);
126 }
127 };
128
129 } // namespace LIBC_NAMESPACE::testing
130
131 #define NEAREST_INTEGER_PERF(T, my_func, other_func, rounds, filename) \
132 { \
133 LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf( \
134 &my_func, &other_func, rounds, filename); \
135 LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf( \
136 &my_func, &other_func, rounds, filename); \
137 }
138
139 static constexpr size_t FLOAT16_ROUNDS = 20'000;
140 static constexpr size_t FLOAT_ROUNDS = 40;
141
142 // LLVM libc might be the only libc implementation with support for float16 math
143 // functions currently. We can't compare our float16 functions against the
144 // system libc, so we compare them against this placeholder function.
placeholderf16(float16 x)145 float16 placeholderf16(float16 x) { return x; }
146
147 // The system libc might not provide the roundeven* C23 math functions either.
placeholderf(float x)148 float placeholderf(float x) { return x; }
149
main()150 int main() {
151 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::ceilf16, ::placeholderf16,
152 FLOAT16_ROUNDS, "ceilf16_perf.log")
153 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::floorf16, ::placeholderf16,
154 FLOAT16_ROUNDS, "floorf16_perf.log")
155 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::roundf16, ::placeholderf16,
156 FLOAT16_ROUNDS, "roundf16_perf.log")
157 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::roundevenf16, ::placeholderf16,
158 FLOAT16_ROUNDS, "roundevenf16_perf.log")
159 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::truncf16, ::placeholderf16,
160 FLOAT16_ROUNDS, "truncf16_perf.log")
161
162 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::ceilf, ::ceilf, FLOAT_ROUNDS,
163 "ceilf_perf.log")
164 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::floorf, ::floorf, FLOAT_ROUNDS,
165 "floorf_perf.log")
166 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::roundf, ::roundf, FLOAT_ROUNDS,
167 "roundf_perf.log")
168 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::roundevenf, ::placeholderf,
169 FLOAT_ROUNDS, "roundevenf_perf.log")
170 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::truncf, ::truncf, FLOAT_ROUNDS,
171 "truncf_perf.log")
172
173 if (ForceRoundingMode r(RoundingMode::Upward); r.success) {
174 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
175 FLOAT16_ROUNDS, "rintf16_upward_perf.log")
176 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
177 "rintf_upward_perf.log")
178 }
179 if (ForceRoundingMode r(RoundingMode::Downward); r.success) {
180 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
181 FLOAT16_ROUNDS, "rintf16_downward_perf.log")
182 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
183 "rintf_downward_perf.log")
184 }
185 if (ForceRoundingMode r(RoundingMode::TowardZero); r.success) {
186 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
187 FLOAT16_ROUNDS, "rintf16_towardzero_perf.log")
188 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
189 "rintf_towardzero_perf.log")
190 }
191 if (ForceRoundingMode r(RoundingMode::Nearest); r.success) {
192 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
193 FLOAT16_ROUNDS, "rintf16_nearest_perf.log")
194 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
195 "rintf_nearest_perf.log")
196 }
197
198 return 0;
199 }
200