1*b095b053SXin Li #include <benchmark/benchmark.h>
2*b095b053SXin Li
3*b095b053SXin Li #include <pthreadpool.h>
4*b095b053SXin Li
5*b095b053SXin Li #include <thread>
6*b095b053SXin Li
SetNumberOfThreads(benchmark::internal::Benchmark * benchmark)7*b095b053SXin Li static void SetNumberOfThreads(benchmark::internal::Benchmark* benchmark) {
8*b095b053SXin Li const int max_threads = std::thread::hardware_concurrency();
9*b095b053SXin Li for (int t = 1; t <= max_threads; t++) {
10*b095b053SXin Li benchmark->Arg(t);
11*b095b053SXin Li }
12*b095b053SXin Li }
13*b095b053SXin Li
14*b095b053SXin Li
compute_1d(void *,size_t x)15*b095b053SXin Li static void compute_1d(void*, size_t x) {
16*b095b053SXin Li }
17*b095b053SXin Li
pthreadpool_parallelize_1d(benchmark::State & state)18*b095b053SXin Li static void pthreadpool_parallelize_1d(benchmark::State& state) {
19*b095b053SXin Li const uint32_t threads = static_cast<uint32_t>(state.range(0));
20*b095b053SXin Li pthreadpool_t threadpool = pthreadpool_create(threads);
21*b095b053SXin Li while (state.KeepRunning()) {
22*b095b053SXin Li pthreadpool_parallelize_1d(
23*b095b053SXin Li threadpool,
24*b095b053SXin Li compute_1d,
25*b095b053SXin Li nullptr /* context */,
26*b095b053SXin Li threads,
27*b095b053SXin Li 0 /* flags */);
28*b095b053SXin Li }
29*b095b053SXin Li pthreadpool_destroy(threadpool);
30*b095b053SXin Li }
31*b095b053SXin Li BENCHMARK(pthreadpool_parallelize_1d)->UseRealTime()->Apply(SetNumberOfThreads);
32*b095b053SXin Li
33*b095b053SXin Li
compute_1d_tile_1d(void *,size_t,size_t)34*b095b053SXin Li static void compute_1d_tile_1d(void*, size_t, size_t) {
35*b095b053SXin Li }
36*b095b053SXin Li
pthreadpool_parallelize_1d_tile_1d(benchmark::State & state)37*b095b053SXin Li static void pthreadpool_parallelize_1d_tile_1d(benchmark::State& state) {
38*b095b053SXin Li const uint32_t threads = static_cast<uint32_t>(state.range(0));
39*b095b053SXin Li pthreadpool_t threadpool = pthreadpool_create(threads);
40*b095b053SXin Li while (state.KeepRunning()) {
41*b095b053SXin Li pthreadpool_parallelize_1d_tile_1d(
42*b095b053SXin Li threadpool,
43*b095b053SXin Li compute_1d_tile_1d,
44*b095b053SXin Li nullptr /* context */,
45*b095b053SXin Li threads, 1,
46*b095b053SXin Li 0 /* flags */);
47*b095b053SXin Li }
48*b095b053SXin Li pthreadpool_destroy(threadpool);
49*b095b053SXin Li }
50*b095b053SXin Li BENCHMARK(pthreadpool_parallelize_1d_tile_1d)->UseRealTime()->Apply(SetNumberOfThreads);
51*b095b053SXin Li
52*b095b053SXin Li
compute_2d(void *,size_t,size_t)53*b095b053SXin Li static void compute_2d(void*, size_t, size_t) {
54*b095b053SXin Li }
55*b095b053SXin Li
pthreadpool_parallelize_2d(benchmark::State & state)56*b095b053SXin Li static void pthreadpool_parallelize_2d(benchmark::State& state) {
57*b095b053SXin Li const uint32_t threads = static_cast<uint32_t>(state.range(0));
58*b095b053SXin Li pthreadpool_t threadpool = pthreadpool_create(threads);
59*b095b053SXin Li while (state.KeepRunning()) {
60*b095b053SXin Li pthreadpool_parallelize_2d(
61*b095b053SXin Li threadpool,
62*b095b053SXin Li compute_2d,
63*b095b053SXin Li nullptr /* context */,
64*b095b053SXin Li 1, threads,
65*b095b053SXin Li 0 /* flags */);
66*b095b053SXin Li }
67*b095b053SXin Li pthreadpool_destroy(threadpool);
68*b095b053SXin Li }
69*b095b053SXin Li BENCHMARK(pthreadpool_parallelize_2d)->UseRealTime()->Apply(SetNumberOfThreads);
70*b095b053SXin Li
71*b095b053SXin Li
compute_2d_tile_2d(void *,size_t,size_t,size_t,size_t)72*b095b053SXin Li static void compute_2d_tile_2d(void*, size_t, size_t, size_t, size_t) {
73*b095b053SXin Li }
74*b095b053SXin Li
pthreadpool_parallelize_2d_tile_2d(benchmark::State & state)75*b095b053SXin Li static void pthreadpool_parallelize_2d_tile_2d(benchmark::State& state) {
76*b095b053SXin Li const uint32_t threads = static_cast<uint32_t>(state.range(0));
77*b095b053SXin Li pthreadpool_t threadpool = pthreadpool_create(threads);
78*b095b053SXin Li while (state.KeepRunning()) {
79*b095b053SXin Li pthreadpool_parallelize_2d_tile_2d(
80*b095b053SXin Li threadpool,
81*b095b053SXin Li compute_2d_tile_2d,
82*b095b053SXin Li nullptr /* context */,
83*b095b053SXin Li 1, threads,
84*b095b053SXin Li 1, 1,
85*b095b053SXin Li 0 /* flags */);
86*b095b053SXin Li }
87*b095b053SXin Li pthreadpool_destroy(threadpool);
88*b095b053SXin Li }
89*b095b053SXin Li BENCHMARK(pthreadpool_parallelize_2d_tile_2d)->UseRealTime()->Apply(SetNumberOfThreads);
90*b095b053SXin Li
91*b095b053SXin Li
92*b095b053SXin Li BENCHMARK_MAIN();
93