xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/tools/benchmark/benchmark_model.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/tools/benchmark/benchmark_model.h"
17 
18 #include <unistd.h>
19 
20 #include <iostream>
21 #include <memory>
22 #include <sstream>
23 #include <string>
24 
25 #include "tensorflow/lite/profiling/memory_info.h"
26 #include "tensorflow/lite/profiling/time.h"
27 #include "tensorflow/lite/tools/benchmark/benchmark_utils.h"
28 #include "tensorflow/lite/tools/logging.h"
29 
30 namespace tflite {
31 namespace benchmark {
32 using tensorflow::Stat;
33 
34 constexpr int kMemoryCheckIntervalMs = 50;
35 
36 #ifdef __linux__
GetRssStats(size_t * vsize,size_t * rss,size_t * shared,size_t * code)37 void GetRssStats(size_t* vsize, size_t* rss, size_t* shared, size_t* code) {
38   FILE* fp = fopen("/proc/self/statm", "rt");
39   *vsize = 0;
40   *rss = 0;
41   *shared = 0;
42   *code = 0;
43   if (fp == nullptr) return;
44   fscanf(fp, "%zu %zu %zu %zu", vsize, rss, shared, code);
45   fclose(fp);
46   *vsize = *vsize * getpagesize() >> 20;
47   *rss = *rss * getpagesize() >> 20;
48   *shared = *shared * getpagesize() >> 20;
49   *code = *code * getpagesize() >> 20;
50 }
51 #endif  // __linux__
52 
DefaultParams()53 BenchmarkParams BenchmarkModel::DefaultParams() {
54   BenchmarkParams params;
55   params.AddParam("num_runs", BenchmarkParam::Create<int32_t>(50));
56   params.AddParam("min_secs", BenchmarkParam::Create<float>(1.0f));
57   params.AddParam("max_secs", BenchmarkParam::Create<float>(150.0f));
58   params.AddParam("run_delay", BenchmarkParam::Create<float>(-1.0f));
59   params.AddParam("run_frequency", BenchmarkParam::Create<float>(-1.0f));
60   params.AddParam("num_threads", BenchmarkParam::Create<int32_t>(-1));
61   params.AddParam("use_caching", BenchmarkParam::Create<bool>(false));
62   params.AddParam("benchmark_name", BenchmarkParam::Create<std::string>(""));
63   params.AddParam("output_prefix", BenchmarkParam::Create<std::string>(""));
64   params.AddParam("warmup_runs", BenchmarkParam::Create<int32_t>(1));
65   params.AddParam("warmup_min_secs", BenchmarkParam::Create<float>(0.5f));
66   params.AddParam("verbose", BenchmarkParam::Create<bool>(false));
67   params.AddParam("dry_run", BenchmarkParam::Create<bool>(false));
68   params.AddParam("report_peak_memory_footprint",
69                   BenchmarkParam::Create<bool>(false));
70   params.AddParam("memory_footprint_check_interval_ms",
71                   BenchmarkParam::Create<int32_t>(kMemoryCheckIntervalMs));
72   return params;
73 }
74 
BenchmarkModel()75 BenchmarkModel::BenchmarkModel() : params_(DefaultParams()) {}
76 
OnBenchmarkEnd(const BenchmarkResults & results)77 void BenchmarkLoggingListener::OnBenchmarkEnd(const BenchmarkResults& results) {
78   auto inference_us = results.inference_time_us();
79   auto init_us = results.startup_latency_us();
80   auto warmup_us = results.warmup_time_us();
81   auto init_mem_usage = results.init_mem_usage();
82   auto overall_mem_usage = results.overall_mem_usage();
83   TFLITE_LOG(INFO) << "Inference timings in us: "
84                    << "Init: " << init_us << ", "
85                    << "First inference: " << warmup_us.first() << ", "
86                    << "Warmup (avg): " << warmup_us.avg() << ", "
87                    << "Inference (avg): " << inference_us.avg();
88 
89   if (!init_mem_usage.IsSupported()) return;
90   TFLITE_LOG(INFO)
91       << "Note: as the benchmark tool itself affects memory footprint, the "
92          "following is only APPROXIMATE to the actual memory footprint of the "
93          "model at runtime. Take the information at your discretion.";
94   TFLITE_LOG(INFO) << "Memory footprint delta from the start of the tool (MB): "
95                    << "init=" << init_mem_usage.max_rss_kb / 1024.0
96                    << " overall=" << overall_mem_usage.max_rss_kb / 1024.0;
97 
98   auto peak_mem_mb = results.peak_mem_mb();
99   if (peak_mem_mb > 0) {
100     TFLITE_LOG(INFO)
101         << "Overall peak memory footprint (MB) via periodic monitoring: "
102         << peak_mem_mb;
103 #ifdef __linux__
104     size_t vsize, rss, shared, code;
105     GetRssStats(&vsize, &rss, &shared, &code);
106     TFLITE_LOG(INFO) << "Memory status at the end of exeution:";
107     TFLITE_LOG(INFO) << "- VmRSS              : " << rss << " MB";
108     TFLITE_LOG(INFO) << "+ RssAnnon           : " << rss - shared << " MB";
109     TFLITE_LOG(INFO) << "+ RssFile + RssShmem : " << shared << " MB";
110 #endif  // __linux_
111   }
112 }
113 
GetFlags()114 std::vector<Flag> BenchmarkModel::GetFlags() {
115   return {
116       CreateFlag<int32_t>(
117           "num_runs", &params_,
118           "expected number of runs, see also min_secs, max_secs"),
119       CreateFlag<float>(
120           "min_secs", &params_,
121           "minimum number of seconds to rerun for, potentially making the "
122           "actual number of runs to be greater than num_runs"),
123       CreateFlag<float>(
124           "max_secs", &params_,
125           "maximum number of seconds to rerun for, potentially making the "
126           "actual number of runs to be less than num_runs. Note if --max-secs "
127           "is exceeded in the middle of a run, the benchmark will continue to "
128           "the end of the run but will not start the next run."),
129       CreateFlag<float>("run_delay", &params_, "delay between runs in seconds"),
130       CreateFlag<float>(
131           "run_frequency", &params_,
132           "Execute at a fixed frequency, instead of a fixed delay."
133           "Note if the targeted rate per second cannot be reached, the "
134           "benchmark would start the next run immediately, trying its best to "
135           "catch up. If set, this will override run_delay."),
136       CreateFlag<int32_t>("num_threads", &params_, "number of threads"),
137       CreateFlag<bool>(
138           "use_caching", &params_,
139           "Enable caching of prepacked weights matrices in matrix "
140           "multiplication routines. Currently implies the use of the Ruy "
141           "library."),
142       CreateFlag<std::string>("benchmark_name", &params_, "benchmark name"),
143       CreateFlag<std::string>("output_prefix", &params_,
144                               "benchmark output prefix"),
145       CreateFlag<int32_t>(
146           "warmup_runs", &params_,
147           "minimum number of runs performed on initialization, to "
148           "allow performance characteristics to settle, see also "
149           "warmup_min_secs"),
150       CreateFlag<float>(
151           "warmup_min_secs", &params_,
152           "minimum number of seconds to rerun for, potentially making the "
153           "actual number of warm-up runs to be greater than warmup_runs"),
154       CreateFlag<bool>("verbose", &params_,
155                        "Whether to log parameters whose values are not set. "
156                        "By default, only log those parameters that are set by "
157                        "parsing their values from the commandline flags."),
158       CreateFlag<bool>("dry_run", &params_,
159                        "Whether to run the tool just with simply loading the "
160                        "model, allocating tensors etc. but without actually "
161                        "invoking any op kernels."),
162       CreateFlag<bool>(
163           "report_peak_memory_footprint", &params_,
164           "Report the peak memory footprint by periodically checking the "
165           "memory footprint. Internally, a separate thread will be spawned for "
166           "this periodic check. Therefore, the performance benchmark result "
167           "could be affected."),
168       CreateFlag<int32_t>("memory_footprint_check_interval_ms", &params_,
169                           "The interval in millisecond between two consecutive "
170                           "memory footprint checks. This is only used when "
171                           "--report_peak_memory_footprint is set to true.")};
172 }
173 
LogParams()174 void BenchmarkModel::LogParams() {
175   const bool verbose = params_.Get<bool>("verbose");
176   TFLITE_LOG(INFO) << "Log parameter values verbosely: [" << verbose << "]";
177 
178   LOG_BENCHMARK_PARAM(int32_t, "num_runs", "Min num runs", verbose);
179   LOG_BENCHMARK_PARAM(float, "min_secs", "Min runs duration (seconds)",
180                       verbose);
181   LOG_BENCHMARK_PARAM(float, "max_secs", "Max runs duration (seconds)",
182                       verbose);
183   LOG_BENCHMARK_PARAM(float, "run_delay", "Inter-run delay (seconds)", verbose);
184   LOG_BENCHMARK_PARAM(float, "run_frequency",
185                       "Number of prorated runs per second", verbose);
186   LOG_BENCHMARK_PARAM(int32_t, "num_threads", "Num threads", verbose);
187   LOG_BENCHMARK_PARAM(bool, "use_caching", "Use caching", verbose);
188   LOG_BENCHMARK_PARAM(std::string, "benchmark_name", "Benchmark name", verbose);
189   LOG_BENCHMARK_PARAM(std::string, "output_prefix", "Output prefix", verbose);
190   LOG_BENCHMARK_PARAM(int32_t, "warmup_runs", "Min warmup runs", verbose);
191   LOG_BENCHMARK_PARAM(float, "warmup_min_secs",
192                       "Min warmup runs duration (seconds)", verbose);
193   LOG_BENCHMARK_PARAM(bool, "dry_run", "Run w/o invoking kernels", verbose);
194   LOG_BENCHMARK_PARAM(bool, "report_peak_memory_footprint",
195                       "Report the peak memory footprint", verbose);
196   LOG_BENCHMARK_PARAM(int32_t, "memory_footprint_check_interval_ms",
197                       "Memory footprint check interval (ms)", verbose);
198 }
199 
PrepareInputData()200 TfLiteStatus BenchmarkModel::PrepareInputData() { return kTfLiteOk; }
201 
ResetInputsAndOutputs()202 TfLiteStatus BenchmarkModel::ResetInputsAndOutputs() { return kTfLiteOk; }
203 
Run(int min_num_times,float min_secs,float max_secs,RunType run_type,TfLiteStatus * invoke_status)204 Stat<int64_t> BenchmarkModel::Run(int min_num_times, float min_secs,
205                                   float max_secs, RunType run_type,
206                                   TfLiteStatus* invoke_status) {
207   Stat<int64_t> run_stats;
208   TFLITE_LOG(INFO) << "Running benchmark for at least " << min_num_times
209                    << " iterations and at least " << min_secs << " seconds but"
210                    << " terminate if exceeding " << max_secs << " seconds.";
211   int64_t now_us = profiling::time::NowMicros();
212   int64_t min_finish_us = now_us + static_cast<int64_t>(min_secs * 1.e6f);
213   int64_t max_finish_us = now_us + static_cast<int64_t>(max_secs * 1.e6f);
214 
215   *invoke_status = kTfLiteOk;
216   float inter_run_sleep_time = params_.Get<float>("run_delay");
217   auto run_frequency = params_.Get<float>("run_frequency");
218   double manual_inter_run_gap = 1.0 / run_frequency;
219   // float doesn't have sufficient precision for storing this number
220   double next_run_finish_time = now_us * 1e-6 + manual_inter_run_gap;
221   for (int run = 0; (run < min_num_times || now_us < min_finish_us) &&
222                     now_us <= max_finish_us;
223        run++) {
224     ResetInputsAndOutputs();
225     listeners_.OnSingleRunStart(run_type);
226     int64_t start_us = profiling::time::NowMicros();
227     TfLiteStatus status = RunImpl();
228     int64_t end_us = profiling::time::NowMicros();
229     listeners_.OnSingleRunEnd();
230 
231     run_stats.UpdateStat(end_us - start_us);
232     if (run_frequency > 0) {
233       inter_run_sleep_time =
234           next_run_finish_time - profiling::time::NowMicros() * 1e-6;
235       next_run_finish_time += manual_inter_run_gap;
236     }
237     // Note when "inter_run_sleep_time" is negative or 0.0,
238     // the function will return immediately.
239     util::SleepForSeconds(inter_run_sleep_time);
240     now_us = profiling::time::NowMicros();
241 
242     if (status != kTfLiteOk) {
243       *invoke_status = status;
244     }
245   }
246 
247   std::stringstream stream;
248   run_stats.OutputToStream(&stream);
249   TFLITE_LOG(INFO) << stream.str() << std::endl;
250 
251   return run_stats;
252 }
253 
ValidateParams()254 TfLiteStatus BenchmarkModel::ValidateParams() {
255   if (params_.Get<bool>("report_peak_memory_footprint")) {
256     const int32_t interval =
257         params_.Get<int32_t>("memory_footprint_check_interval_ms");
258     if (interval <= 0) {
259       TFLITE_LOG(WARN) << "--memory_footprint_check_interval_ms is set to "
260                        << interval
261                        << " (ms), This value is invalid, and it will be set to "
262                           "the default value "
263                        << kMemoryCheckIntervalMs << " (ms).";
264       params_.Set<int32_t>("memory_footprint_check_interval_ms",
265                            kMemoryCheckIntervalMs);
266     }
267   }
268   return kTfLiteOk;
269 }
270 
Run(int argc,char ** argv)271 TfLiteStatus BenchmarkModel::Run(int argc, char** argv) {
272   TF_LITE_ENSURE_STATUS(ParseFlags(argc, argv));
273   return Run();
274 }
275 
Run()276 TfLiteStatus BenchmarkModel::Run() {
277   TF_LITE_ENSURE_STATUS(ValidateParams());
278 
279   LogParams();
280 
281   auto peak_memory_reporter = MayCreateMemoryUsageMonitor();
282   if (peak_memory_reporter != nullptr) peak_memory_reporter->Start();
283   const double model_size_mb = MayGetModelFileSize() / 1e6;
284   const auto start_mem_usage = profiling::memory::GetMemoryUsage();
285   int64_t initialization_start_us = profiling::time::NowMicros();
286   TF_LITE_ENSURE_STATUS(Init());
287   const auto init_end_mem_usage = profiling::memory::GetMemoryUsage();
288   int64_t initialization_end_us = profiling::time::NowMicros();
289   int64_t startup_latency_us = initialization_end_us - initialization_start_us;
290   const auto init_mem_usage = init_end_mem_usage - start_mem_usage;
291 
292   if (model_size_mb > 0) {
293     TFLITE_LOG(INFO) << "The input model file size (MB): " << model_size_mb;
294   }
295   TFLITE_LOG(INFO) << "Initialized session in " << startup_latency_us / 1e3
296                    << "ms.";
297 
298   TF_LITE_ENSURE_STATUS(PrepareInputData());
299 
300   TfLiteStatus status = kTfLiteOk;
301   uint64_t input_bytes = ComputeInputBytes();
302 
303   // Overwrite certain parameters when --dry_run=true is set.
304   if (params_.Get<bool>("dry_run")) {
305     params_.Set("warmup_runs", 0);
306     params_.Set("warmup_min_secs", -1.0f);
307     params_.Set("num_runs", 0);
308     params_.Set("min_secs", -1.0f);
309   }
310 
311   listeners_.OnBenchmarkStart(params_);
312   Stat<int64_t> warmup_time_us =
313       Run(params_.Get<int32_t>("warmup_runs"),
314           params_.Get<float>("warmup_min_secs"), params_.Get<float>("max_secs"),
315           WARMUP, &status);
316   if (status != kTfLiteOk) {
317     return status;
318   }
319 
320   Stat<int64_t> inference_time_us =
321       Run(params_.Get<int32_t>("num_runs"), params_.Get<float>("min_secs"),
322           params_.Get<float>("max_secs"), REGULAR, &status);
323   const auto overall_mem_usage =
324       profiling::memory::GetMemoryUsage() - start_mem_usage;
325 
326   float peak_mem_mb = profiling::memory::MemoryUsageMonitor::kInvalidMemUsageMB;
327   if (peak_memory_reporter != nullptr) {
328     peak_memory_reporter->Stop();
329     peak_mem_mb = peak_memory_reporter->GetPeakMemUsageInMB();
330   }
331 
332   listeners_.OnBenchmarkEnd({model_size_mb, startup_latency_us, input_bytes,
333                              warmup_time_us, inference_time_us, init_mem_usage,
334                              overall_mem_usage, peak_mem_mb});
335   return status;
336 }
337 
ParseFlags(int * argc,char ** argv)338 TfLiteStatus BenchmarkModel::ParseFlags(int* argc, char** argv) {
339   auto flag_list = GetFlags();
340   const bool parse_result =
341       Flags::Parse(argc, const_cast<const char**>(argv), flag_list);
342   // "--help" flag is added in tools/delegates/default_execution_provider.cc. As
343   // this is an optional dependency, we need to check whether "--help" exists or
344   // not first.
345   if (!parse_result ||
346       (params_.HasParam("help") && params_.Get<bool>("help"))) {
347     std::string usage = Flags::Usage(argv[0], flag_list);
348     TFLITE_LOG(ERROR) << usage;
349     // Returning kTfLiteError intentionally when "--help=true" is specified so
350     // that the caller could check the return value to decide stopping the
351     // execution.
352     return kTfLiteError;
353   }
354 
355   std::string unconsumed_args =
356       Flags::ArgsToString(*argc, const_cast<const char**>(argv));
357   if (!unconsumed_args.empty()) {
358     TFLITE_LOG(WARN) << "Unconsumed cmdline flags: " << unconsumed_args;
359   }
360 
361   return kTfLiteOk;
362 }
363 
364 std::unique_ptr<profiling::memory::MemoryUsageMonitor>
MayCreateMemoryUsageMonitor() const365 BenchmarkModel::MayCreateMemoryUsageMonitor() const {
366   if (!params_.Get<bool>("report_peak_memory_footprint")) return nullptr;
367 
368   return std::make_unique<profiling::memory::MemoryUsageMonitor>(
369 
370       params_.Get<int32_t>("memory_footprint_check_interval_ms"));
371 }
372 
373 }  // namespace benchmark
374 }  // namespace tflite
375