1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <ExecutionBurstServer.h>
18 #include <HalInterfaces.h>
19 #include <SampleDriver.h>
20 #include <ValidateHal.h>
21 #include <gtest/gtest.h>
22
23 #include <algorithm>
24 #include <cassert>
25 #include <chrono>
26 #include <iterator>
27 #include <map>
28 #include <queue>
29 #include <set>
30 #include <string>
31 #include <thread>
32 #include <tuple>
33 #include <utility>
34 #include <vector>
35
36 #include "CompilationBuilder.h"
37 #include "HalUtils.h"
38 #include "Manager.h"
39 #include "NeuralNetworks.h"
40 #include "NeuralNetworksOEM.h"
41 #include "TestNeuralNetworksWrapper.h"
42
43 namespace {
44
45 using namespace ::android;
46 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
47 namespace V1_1 = ::android::hardware::neuralnetworks::V1_1;
48 namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
49 namespace V1_3 = ::android::hardware::neuralnetworks::V1_3;
50
51 using CompilationBuilder = nn::CompilationBuilder;
52 using Device = nn::Device;
53 using DeviceManager = nn::DeviceManager;
54 using ExecutePreference = nn::test_wrapper::ExecutePreference;
55 using ExecutionBurstServer = nn::ExecutionBurstServer;
56 using HidlModel = V1_3::Model;
57 using Result = nn::test_wrapper::Result;
58 using SampleDriver = nn::sample_driver::SampleDriver;
59 using SamplePreparedModel = nn::sample_driver::SamplePreparedModel;
60 using SampleFencedExecutionCallback = nn::sample_driver::SampleFencedExecutionCallback;
61 using WrapperModel = nn::test_wrapper::Model;
62 using WrapperOperandType = nn::test_wrapper::OperandType;
63 using WrapperType = nn::test_wrapper::Type;
64 using nn::convertToV1_0;
65 using nn::convertToV1_3;
66
67 template <typename T>
68 using MQDescriptorSync = hardware::MQDescriptorSync<T>;
69
70 constexpr V1_2::Timing kBadTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
71 constexpr V1_2::Timing kGoodUnfencedTiming = {.timeOnDevice = 123, .timeInDriver = 456};
72 constexpr V1_2::Timing kGoodFencedTiming = {.timeOnDevice = 23, .timeInDriver = 56};
73
74 // This is an IDevice for testing purposes. The test driver has customized
75 // getCapabilities_1_3 and getSupportedOperations_1_3.
76 class TestDriver : public SampleDriver {
77 public:
TestDriver(const char * name,V1_3::Capabilities capabilities,const std::vector<bool> & supportedOps)78 TestDriver(const char* name, V1_3::Capabilities capabilities,
79 const std::vector<bool>& supportedOps)
80 : SampleDriver(name), mCapabilities(capabilities), mSupportedOps(supportedOps) {}
~TestDriver()81 ~TestDriver() override {}
82
getCapabilities_1_3(getCapabilities_1_3_cb cb)83 hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override {
84 cb(V1_3::ErrorStatus::NONE, mCapabilities);
85 return hardware::Void();
86 }
87
getSupportedOperations_1_3(const V1_3::Model & model,getSupportedOperations_1_3_cb cb)88 hardware::Return<void> getSupportedOperations_1_3(const V1_3::Model& model,
89 getSupportedOperations_1_3_cb cb) override {
90 if (!android::nn::validateModel(model)) {
91 cb(V1_3::ErrorStatus::INVALID_ARGUMENT, std::vector<bool>());
92 return hardware::Void();
93 }
94 const size_t count = model.main.operations.size();
95 std::vector<bool> supported(count);
96 std::transform(model.main.operations.begin(), model.main.operations.end(),
97 supported.begin(), [this](V1_3::Operation op) {
98 return mSupportedOps[static_cast<int32_t>(op.type)];
99 });
100 cb(V1_3::ErrorStatus::NONE, supported);
101 return hardware::Void();
102 }
103
104 private:
105 V1_3::Capabilities mCapabilities;
106 std::vector<bool> mSupportedOps;
107 };
108
109 class IntrospectionControlTest : public ::testing::Test {
110 protected:
SetUp()111 void SetUp() override {}
TearDown()112 void TearDown() override {
113 if (mEvent) {
114 ANeuralNetworksEvent_free(mEvent);
115 }
116 if (mExecution) {
117 ANeuralNetworksExecution_free(mExecution);
118 }
119 if (mCompilation) {
120 ANeuralNetworksCompilation_free(mCompilation);
121 }
122 DeviceManager::get()->forTest_reInitializeDeviceList();
123 }
124
125 struct DeviceSpecification {
DeviceSpecification__anon3a0b5fa30111::IntrospectionControlTest::DeviceSpecification126 DeviceSpecification(const std::string& name, float perf, std::vector<bool>& supportedOps)
127 : mName(name), mSupportedOps(supportedOps) {
128 V1_0::PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
129 mCapabilities = {
130 .relaxedFloat32toFloat16PerformanceScalar = perfInfo,
131 .relaxedFloat32toFloat16PerformanceTensor = perfInfo,
132 .operandPerformance =
133 nn::nonExtensionOperandPerformance<nn::HalVersion::V1_3>(perfInfo),
134 .ifPerformance = perfInfo,
135 .whilePerformance = perfInfo};
136 }
137 std::string mName;
138 V1_3::Capabilities mCapabilities;
139 std::vector<bool> mSupportedOps;
140 };
141
142 // From a vector of DeviceSpecification, register new Devices.
registerDevices(std::vector<DeviceSpecification> specifications)143 void registerDevices(std::vector<DeviceSpecification> specifications) {
144 for (const auto& specification : specifications) {
145 DeviceManager::get()->forTest_registerDevice(nn::makeSharedDevice(
146 specification.mName.c_str(),
147 new TestDriver(specification.mName.c_str(), specification.mCapabilities,
148 specification.mSupportedOps)));
149 }
150 }
151
selectDeviceByName(const std::string & name)152 bool selectDeviceByName(const std::string& name) {
153 uint32_t numDevices = 0;
154 EXPECT_EQ(ANeuralNetworks_getDeviceCount(&numDevices), ANEURALNETWORKS_NO_ERROR);
155 EXPECT_GE(numDevices, (uint32_t)1);
156
157 for (uint32_t i = 0; i < numDevices; i++) {
158 ANeuralNetworksDevice* device = nullptr;
159 EXPECT_EQ(ANeuralNetworks_getDevice(i, &device), ANEURALNETWORKS_NO_ERROR);
160 const char* buffer = nullptr;
161 int result = ANeuralNetworksDevice_getName(device, &buffer);
162 if (result == ANEURALNETWORKS_NO_ERROR && name.compare(buffer) == 0) {
163 mDevices.push_back(device);
164 return true;
165 }
166 }
167 return false;
168 }
169
isSupportedOpListExpected(const std::vector<bool> & expected)170 bool isSupportedOpListExpected(const std::vector<bool>& expected) {
171 const uint32_t kMaxNumberOperations = 256;
172 EXPECT_LE(expected.size(), kMaxNumberOperations);
173 ANeuralNetworksModel* modelHandle = mModel.getHandle();
174 bool supported[kMaxNumberOperations] = {false};
175 EXPECT_EQ(ANeuralNetworksModel_getSupportedOperationsForDevices(
176 modelHandle, mDevices.data(), mDevices.size(), supported),
177 ANEURALNETWORKS_NO_ERROR);
178 return std::equal(expected.begin(), expected.end(), supported);
179 }
180
prepareForExecution(bool measureTiming=false)181 int prepareForExecution(bool measureTiming = false) {
182 ANeuralNetworksModel* modelHandle = mModel.getHandle();
183 int result = ANeuralNetworksCompilation_createForDevices(modelHandle, mDevices.data(),
184 mDevices.size(), &mCompilation);
185 if (result != ANEURALNETWORKS_NO_ERROR) {
186 return result;
187 }
188 EXPECT_EQ(ANeuralNetworksCompilation_finish(mCompilation), ANEURALNETWORKS_NO_ERROR);
189 EXPECT_EQ(ANeuralNetworksExecution_create(mCompilation, &mExecution),
190 ANEURALNETWORKS_NO_ERROR);
191 if (measureTiming) {
192 // Don't call setMeasureTiming unless we need to -- cannot call this
193 // API unless there is exactly one device.
194 EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
195 ANEURALNETWORKS_NO_ERROR);
196 }
197 return ANEURALNETWORKS_NO_ERROR;
198 }
199
200 std::vector<ANeuralNetworksDevice*> mDevices;
201 ANeuralNetworksEvent* mEvent = nullptr;
202 ANeuralNetworksExecution* mExecution = nullptr;
203 ANeuralNetworksCompilation* mCompilation = nullptr;
204 WrapperModel mModel;
205 };
206
createSimpleAddModel(WrapperModel * model)207 void createSimpleAddModel(WrapperModel* model) {
208 WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
209 WrapperOperandType type1(WrapperType::INT32, {});
210 // Phase 1, operands
211 auto op1 = model->addOperand(&type0);
212 auto op2 = model->addOperand(&type0);
213 auto act = model->addOperand(&type1);
214 auto op3 = model->addOperand(&type0);
215 // Phase 2, operations
216 static int32_t act_init[] = {0};
217 model->setOperandValue(act, act_init, sizeof(act_init));
218 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
219 // Phase 3, inputs and outputs
220 model->identifyInputsAndOutputs({op1, op2}, {op3});
221 model->finish();
222 ASSERT_TRUE(model->isValid());
223 }
224
225 // This test verifies that a simple ADD model is able to run on a single device that claims being
226 // able to handle all operations.
TEST_F(IntrospectionControlTest,SimpleAddModel)227 TEST_F(IntrospectionControlTest, SimpleAddModel) {
228 // This is needed before we have the CPU fallback path being treated as a Device.
229 // TODO(miaowang): remove once b/72506261 is fixed.
230 if (DeviceManager::get()->getUseCpuOnly()) {
231 GTEST_SKIP();
232 }
233
234 createSimpleAddModel(&mModel);
235
236 std::string driverName = "test-all";
237 std::vector<bool> ops(android::nn::kNumberOfOperationTypes, true);
238 registerDevices({{driverName, 0.9, ops}});
239
240 EXPECT_TRUE(selectDeviceByName(driverName));
241 EXPECT_TRUE(isSupportedOpListExpected({true}));
242 EXPECT_EQ(prepareForExecution(), ANEURALNETWORKS_NO_ERROR);
243
244 // Verify that the mCompilation is actually using the "test-all" device.
245 CompilationBuilder* c = reinterpret_cast<CompilationBuilder*>(mCompilation);
246 const std::string& deviceNameBuffer =
247 c->forTest_getExecutionPlan().forTest_simpleGetDevice()->getName();
248 EXPECT_EQ(driverName, deviceNameBuffer);
249
250 float input1[2] = {1.0f, 2.0f};
251 float input2[2] = {3.0f, 4.0f};
252 float output[2];
253 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
254 ANEURALNETWORKS_NO_ERROR);
255 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
256 ANEURALNETWORKS_NO_ERROR);
257 EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
258 ANEURALNETWORKS_NO_ERROR);
259 EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
260 ANEURALNETWORKS_NO_ERROR);
261
262 EXPECT_EQ(ANeuralNetworksExecution_startCompute(mExecution, &mEvent), ANEURALNETWORKS_NO_ERROR);
263 EXPECT_EQ(ANeuralNetworksEvent_wait(mEvent), ANEURALNETWORKS_NO_ERROR);
264 EXPECT_EQ(output[0], input1[0] + input2[0]);
265 EXPECT_EQ(output[1], input1[1] + input2[1]);
266
267 uint64_t timeOnHardware, timeInDriver;
268 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_ON_HARDWARE,
269 &timeOnHardware),
270 ANEURALNETWORKS_NO_ERROR);
271 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_IN_DRIVER,
272 &timeInDriver),
273 ANEURALNETWORKS_NO_ERROR);
274 if (timeOnHardware != UINT64_MAX && timeInDriver != UINT64_MAX) {
275 EXPECT_LE(timeOnHardware, timeInDriver);
276 }
277 }
278
279 /*-- Begin test drivers -------------------------------------------------------------------------*/
280
281 namespace test_drivers {
282
283 enum class Success : uint32_t {
284 // ASYNC: Return ErrorStatus::GENERAL_FAILURE; notify ErrorStatus::GENERAL_FAILURE and
285 // kBadTiming
286 // SYNC, BURST: Return ErrorStatus::GENERAL_FAILURE and kBadTiming
287 // FENCED: Return ErrorStatus::GENERAL_FAILURE, empty hidl_handle, and a nullptr callback
288 FAIL_LAUNCH,
289
290 // ASYNC: Return ErrorStatus::NONE; notify ErrorStatus::GENERAL_FAILURE and kBadTiming
291 FAIL_WAIT,
292
293 // Bit representation for PASS: One bit set to indicate PASS rather than
294 // FAIL, one bit for each of the four timing fields (Unfenced, Fenced) x
295 // (OnDevice, InDriver) to distinguish between unavailable timing (bit is
296 // clear) and available timing (bit is set), and one bit to call out the
297 // special case of CPU.
298 PASS_BIT = 1 << 4,
299 PASS_UNFENCED_DEVICE_BIT = 1 << 5,
300 PASS_UNFENCED_DRIVER_BIT = 1 << 6,
301 PASS_FENCED_DEVICE_BIT = 1 << 7,
302 PASS_FENCED_DRIVER_BIT = 1 << 8,
303 PASS_CPU_BIT = 1 << 9,
304
305 // Each of the four timing fields may be either unavailable or 0
306 PASS_CPU = PASS_BIT | PASS_CPU_BIT,
307
308 // ASYNC: Return ErrorStatus::NONE; notify ErrorStatus::NONE and timing
309 // SYNC, BURST: Return ErrorStatus::NONE and timing
310 // FENCED: Return ErrorStatus::NONE, empty hidl_handle, and a callback with timing.
311 //
312 // For each PASS other than PASS_CPU, an enum name has the form
313 // PASS_${UNFENCED_TIME}_${FENCED_TIME}. For example, PASS_NEITHER_BOTH
314 // means that only fenced timing is available (both timeOnDevice and
315 // timeInDriver). If _${FENCED_TIME} is omitted, it is equivalent to
316 // _NEITHER; so PASS_BOTH means that only unfenced timing is available (both
317 // timeOnDevice and timeInDriver).
318 PASS_NEITHER = PASS_BIT,
319 PASS_DEVICE = PASS_BIT | PASS_UNFENCED_DEVICE_BIT,
320 PASS_DRIVER = PASS_BIT | PASS_UNFENCED_DRIVER_BIT,
321 PASS_BOTH = PASS_BIT | PASS_UNFENCED_DEVICE_BIT | PASS_UNFENCED_DRIVER_BIT,
322 PASS_NEITHER_DEVICE = PASS_BIT | PASS_FENCED_DEVICE_BIT,
323 PASS_NEITHER_DRIVER = PASS_BIT | PASS_FENCED_DRIVER_BIT,
324 PASS_NEITHER_BOTH = PASS_BIT | PASS_FENCED_DEVICE_BIT | PASS_FENCED_DRIVER_BIT,
325 PASS_DEVICE_DEVICE = PASS_DEVICE | PASS_NEITHER_DEVICE,
326 PASS_DEVICE_DRIVER = PASS_DEVICE | PASS_NEITHER_DRIVER,
327 PASS_DEVICE_BOTH = PASS_DEVICE | PASS_NEITHER_BOTH,
328 PASS_DRIVER_DEVICE = PASS_DRIVER | PASS_NEITHER_DEVICE,
329 PASS_DRIVER_DRIVER = PASS_DRIVER | PASS_NEITHER_DRIVER,
330 PASS_DRIVER_BOTH = PASS_DRIVER | PASS_NEITHER_BOTH,
331 PASS_BOTH_DEVICE = PASS_BOTH | PASS_NEITHER_DEVICE,
332 PASS_BOTH_DRIVER = PASS_BOTH | PASS_NEITHER_DRIVER,
333 PASS_BOTH_BOTH = PASS_BOTH | PASS_NEITHER_BOTH,
334 };
335
hasBit(Success mask,Success bit)336 bool hasBit(Success mask, Success bit) {
337 const uint32_t bitAsInt = static_cast<uint32_t>(bit);
338 CHECK(bitAsInt && (bitAsInt & (bitAsInt - 1)) == 0)
339 << "second argument must be a single bit rather than " << static_cast<uint32_t>(bit);
340 return static_cast<uint32_t>(mask) & bitAsInt;
341 }
342
clearBit(Success mask,Success bit)343 Success clearBit(Success mask, Success bit) {
344 const uint32_t bitAsInt = static_cast<uint32_t>(bit);
345 CHECK(bitAsInt && (bitAsInt & (bitAsInt - 1)) == 0)
346 << "second argument must be a single bit rather than " << static_cast<uint32_t>(bit);
347 return static_cast<Success>(static_cast<uint32_t>(mask) & ~bitAsInt);
348 }
349
operator <<(std::ostream & os,Success success)350 std::ostream& operator<<(std::ostream& os, Success success) {
351 switch (success) {
352 case Success::FAIL_LAUNCH:
353 return os << "FAIL_LAUNCH";
354 case Success::FAIL_WAIT:
355 return os << "FAIL_WAIT";
356 case Success::PASS_CPU:
357 return os << "PASS_CPU";
358 default:
359 break;
360 }
361
362 static const std::vector<std::pair<Success, const char*>> bits = {
363 {Success::PASS_BIT, "PASS"},
364 {Success::PASS_UNFENCED_DEVICE_BIT, "UNFENCED_DEVICE"},
365 {Success::PASS_UNFENCED_DRIVER_BIT, "UNFENCED_DRIVER"},
366 {Success::PASS_FENCED_DEVICE_BIT, "FENCED_DEVICE"},
367 {Success::PASS_FENCED_DRIVER_BIT, "FENCED_DRIVER"},
368 };
369 bool gotOutput = false;
370 for (const auto& b : bits) {
371 if (hasBit(success, b.first)) {
372 if (gotOutput) {
373 os << '|';
374 } else {
375 gotOutput = true;
376 }
377 os << b.second;
378 success = clearBit(success, b.first);
379 }
380 }
381 if (uint32_t successAsInt = static_cast<uint32_t>(success)) {
382 if (gotOutput) {
383 os << '|';
384 }
385 os << successAsInt;
386 }
387 return os;
388 }
389
390 // Returns (unfenced timing, fenced timing).
391 // Not for PASS_CPU.
getExpectedTiming(Success s,bool fencedExecution)392 std::pair<V1_2::Timing, V1_2::Timing> getExpectedTiming(Success s, bool fencedExecution) {
393 CHECK_NE(s, Success::PASS_CPU);
394
395 if (!hasBit(s, Success::PASS_BIT)) {
396 return {kBadTiming, kBadTiming};
397 }
398
399 std::pair<V1_2::Timing, V1_2::Timing> result;
400 result.first.timeOnDevice = hasBit(s, Success::PASS_UNFENCED_DEVICE_BIT)
401 ? kGoodUnfencedTiming.timeOnDevice
402 : UINT64_MAX;
403 result.first.timeInDriver = hasBit(s, Success::PASS_UNFENCED_DRIVER_BIT)
404 ? kGoodUnfencedTiming.timeInDriver
405 : UINT64_MAX;
406 if (fencedExecution) {
407 result.second.timeOnDevice = hasBit(s, Success::PASS_FENCED_DEVICE_BIT)
408 ? kGoodFencedTiming.timeOnDevice
409 : UINT64_MAX;
410 result.second.timeInDriver = hasBit(s, Success::PASS_FENCED_DRIVER_BIT)
411 ? kGoodFencedTiming.timeInDriver
412 : UINT64_MAX;
413 } else {
414 result.second = result.first;
415 }
416 return result;
417 }
418
419 // For these tests we don't care about actually running an inference -- we
420 // just want to placeholder up execution status and timing results, and control
421 // when the execution finishes.
422 class TestPreparedModelLatest : public SamplePreparedModel {
423 public:
TestPreparedModelLatest(const HidlModel & model,const SampleDriver * driver,Success success)424 TestPreparedModelLatest(const HidlModel& model, const SampleDriver* driver, Success success)
425 : SamplePreparedModel(model, driver, V1_1::ExecutionPreference::FAST_SINGLE_ANSWER, uid_t{},
426 nn::kDefaultPriority13),
427 mSuccess(success) {}
428
execute(const V1_0::Request &,const sp<V1_0::IExecutionCallback> & callback)429 hardware::Return<V1_0::ErrorStatus> execute(
430 const V1_0::Request&, const sp<V1_0::IExecutionCallback>& callback) override {
431 switch (mSuccess) {
432 case Success::PASS_NEITHER:
433 std::thread([callback] {
434 dummyExecution();
435 callback->notify(V1_0::ErrorStatus::NONE);
436 }).detach();
437 return V1_0::ErrorStatus::NONE;
438 case Success::FAIL_LAUNCH:
439 dummyExecution();
440 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
441 return V1_0::ErrorStatus::GENERAL_FAILURE;
442 case Success::FAIL_WAIT:
443 std::thread([callback] {
444 dummyExecution();
445 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
446 }).detach();
447 return V1_0::ErrorStatus::NONE;
448 default:
449 ADD_FAILURE() << "Unexpected Success kind";
450 return V1_0::ErrorStatus::GENERAL_FAILURE;
451 }
452 }
453
execute_1_2(const V1_0::Request &,V1_2::MeasureTiming measure,const sp<V1_2::IExecutionCallback> & callback)454 hardware::Return<V1_0::ErrorStatus> execute_1_2(
455 const V1_0::Request&, V1_2::MeasureTiming measure,
456 const sp<V1_2::IExecutionCallback>& callback) override {
457 EXPECT_EQ(measure, V1_2::MeasureTiming::YES);
458 switch (mSuccess) {
459 case Success::PASS_NEITHER:
460 case Success::PASS_DEVICE:
461 case Success::PASS_DRIVER:
462 case Success::PASS_BOTH:
463 std::thread([this, callback] {
464 dummyExecution();
465 callback->notify_1_2(V1_0::ErrorStatus::NONE, {},
466 getExpectedTiming(mSuccess, false).first);
467 }).detach();
468 return V1_0::ErrorStatus::NONE;
469 case Success::FAIL_LAUNCH:
470 dummyExecution();
471 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
472 return V1_0::ErrorStatus::GENERAL_FAILURE;
473 case Success::FAIL_WAIT:
474 std::thread([callback] {
475 dummyExecution();
476 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
477 }).detach();
478 return V1_0::ErrorStatus::NONE;
479 default:
480 ADD_FAILURE() << "Unexpected Success kind";
481 return V1_0::ErrorStatus::GENERAL_FAILURE;
482 }
483 }
484
execute_1_3(const V1_3::Request &,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint &,const V1_3::OptionalTimeoutDuration &,const sp<V1_3::IExecutionCallback> & callback)485 hardware::Return<V1_3::ErrorStatus> execute_1_3(
486 const V1_3::Request&, V1_2::MeasureTiming measure, const V1_3::OptionalTimePoint&,
487 const V1_3::OptionalTimeoutDuration&,
488 const sp<V1_3::IExecutionCallback>& callback) override {
489 // Use a placeholder V1_0::Request because execute_1_2 ignores request entirely.
490 const V1_0::ErrorStatus status = execute_1_2(V1_0::Request{}, measure, callback);
491 return convertToV1_3(status);
492 }
493
executeSynchronously(const V1_0::Request &,V1_2::MeasureTiming measure,executeSynchronously_cb cb)494 hardware::Return<void> executeSynchronously(const V1_0::Request&, V1_2::MeasureTiming measure,
495 executeSynchronously_cb cb) override {
496 EXPECT_EQ(measure, V1_2::MeasureTiming::YES);
497 switch (mSuccess) {
498 case Success::PASS_NEITHER:
499 case Success::PASS_DEVICE:
500 case Success::PASS_DRIVER:
501 case Success::PASS_BOTH:
502 dummyExecution();
503 cb(V1_0::ErrorStatus::NONE, {}, getExpectedTiming(mSuccess, false).first);
504 return hardware::Void();
505 case Success::FAIL_WAIT:
506 // While this is a synchronous execution method, the NNAPI
507 // runtime may call it even for asynchronous execution, so we
508 // need to tolerate Success::FAIL_WAIT here, not just
509 // Success::FAIL_LAUNCH.
510 FALLTHROUGH_INTENDED;
511 case Success::FAIL_LAUNCH:
512 dummyExecution();
513 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kBadTiming);
514 return hardware::Void();
515 default:
516 ADD_FAILURE() << "Unexpected Success kind";
517 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kBadTiming);
518 return hardware::Void();
519 }
520 }
521
executeSynchronously_1_3(const V1_3::Request &,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint &,const V1_3::OptionalTimeoutDuration &,executeSynchronously_1_3_cb cb)522 hardware::Return<void> executeSynchronously_1_3(const V1_3::Request&,
523 V1_2::MeasureTiming measure,
524 const V1_3::OptionalTimePoint&,
525 const V1_3::OptionalTimeoutDuration&,
526 executeSynchronously_1_3_cb cb) override {
527 const auto wrappedCb = [&cb](V1_0::ErrorStatus status,
528 const hardware::hidl_vec<V1_2::OutputShape>& outputShapes,
529 V1_2::Timing timing) {
530 cb(convertToV1_3(status), outputShapes, timing);
531 };
532 // Use a placeholder V1_0::Request because executeSynchronously ignores request entirely.
533 return executeSynchronously(V1_0::Request{}, measure, wrappedCb);
534 }
535
536 // ExecutionBurstServer::create has an overload that will use
537 // IPreparedModel::executeSynchronously(), so we can rely on that, rather
538 // than having to implement ExecutionBurstServer::IExecutorWithCache.
configureExecutionBurst(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,configureExecutionBurst_cb cb)539 hardware::Return<void> configureExecutionBurst(
540 const sp<V1_2::IBurstCallback>& callback,
541 const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
542 const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
543 configureExecutionBurst_cb cb) override {
544 const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create(
545 callback, requestChannel, resultChannel, this, std::chrono::microseconds{0});
546
547 cb(burst == nullptr ? V1_0::ErrorStatus::GENERAL_FAILURE : V1_0::ErrorStatus::NONE, burst);
548 return hardware::Void();
549 }
550
executeFenced(const V1_3::Request &,const hardware::hidl_vec<hardware::hidl_handle> &,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint &,const V1_3::OptionalTimeoutDuration &,const V1_3::OptionalTimeoutDuration &,executeFenced_cb callback)551 hardware::Return<void> executeFenced(const V1_3::Request&,
552 const hardware::hidl_vec<hardware::hidl_handle>&,
553 V1_2::MeasureTiming measure,
554 const V1_3::OptionalTimePoint&,
555 const V1_3::OptionalTimeoutDuration&,
556 const V1_3::OptionalTimeoutDuration&,
557 executeFenced_cb callback) override {
558 EXPECT_EQ(measure, V1_2::MeasureTiming::YES);
559 if (hasBit(mSuccess, Success::PASS_BIT)) {
560 dummyExecution();
561 const auto expectedTiming = getExpectedTiming(mSuccess, true);
562 sp<SampleFencedExecutionCallback> fencedExecutionCallback =
563 new SampleFencedExecutionCallback(expectedTiming.first, expectedTiming.second,
564 V1_3::ErrorStatus::NONE);
565 callback(V1_3::ErrorStatus::NONE, hardware::hidl_handle(nullptr),
566 fencedExecutionCallback);
567 return hardware::Void();
568 }
569 switch (mSuccess) {
570 case Success::FAIL_WAIT:
571 // Due to the limitation of the SampleDriver,
572 // FAIL_WAIT behaves the same as FAIL_LAUNCH.
573 // If the SampleDriver is updated to return real
574 // sync fences, this must be updated.
575 FALLTHROUGH_INTENDED;
576 case Success::FAIL_LAUNCH:
577 dummyExecution();
578 callback(V1_3::ErrorStatus::GENERAL_FAILURE, hardware::hidl_handle(nullptr),
579 nullptr);
580 return hardware::Void();
581 default:
582 ADD_FAILURE() << "Unexpected Success kind";
583 return hardware::Void();
584 }
585 }
586
587 // We can place the TestPreparedModelLatest system in a "pause" mode where
588 // no execution will complete until the system is taken out of that mode.
589 // Initially, the system is not in that mode.
pauseExecutions(bool v)590 static void pauseExecutions(bool v) { mPauseExecutions.store(v); }
591
592 // This function is only guaranteed to work in the following pattern:
593 // Consider thread A as primary thread
594 // - thread A: pauseExecutions(true);
595 // - thread A: launch execution (as thread B)
596 // - thread A: waitForExecutionToBegin(), block until call to dummyExecution by
597 // thread B makes mExecutionsInFlight nonzero
598 // - thread B: dummyExecution(), which makes mExecutionsInFlight nonzero and blocks
599 // until thread A calls pauseExecutions(false)
600 // - thread A: waitForExecutionToBegin() returns
601 // - thread A: pauseExecutions(false), allowing dummyExecution() on thread B to continue
602 // - thread B: dummyExecution() zeroes mExecutionsInFlight and returns
603 // - thread B: thread exits
waitForExecutionToBegin()604 static void waitForExecutionToBegin() {
605 CHECK(mPauseExecutions.load());
606 while (mExecutionsInFlight.load() == 0) {
607 }
608 }
609
610 private:
611 Success mSuccess;
612
613 static std::atomic<bool> mPauseExecutions;
614 static std::atomic<unsigned int> mExecutionsInFlight;
615
dummyExecution()616 static void dummyExecution() {
617 CHECK_EQ(mExecutionsInFlight.fetch_add(1), 0u) << "We do not support concurrent executions";
618 while (mPauseExecutions.load()) {
619 }
620 mExecutionsInFlight.fetch_sub(1);
621 }
622 };
623 std::atomic<bool> TestPreparedModelLatest::mPauseExecutions = false;
624 std::atomic<unsigned int> TestPreparedModelLatest::mExecutionsInFlight = 0;
625
626 using TestPreparedModel13 = TestPreparedModelLatest;
627
628 // Like TestPreparedModelLatest, but implementing 1.2
629 class TestPreparedModel12 : public V1_2::IPreparedModel {
630 public:
TestPreparedModel12(const HidlModel & model,const SampleDriver * driver,Success success)631 TestPreparedModel12(const HidlModel& model, const SampleDriver* driver, Success success)
632 : mLatestPreparedModel(new TestPreparedModelLatest(model, driver, success)) {}
633
execute(const V1_0::Request & request,const sp<V1_0::IExecutionCallback> & callback)634 hardware::Return<V1_0::ErrorStatus> execute(
635 const V1_0::Request& request, const sp<V1_0::IExecutionCallback>& callback) override {
636 return mLatestPreparedModel->execute(request, callback);
637 }
638
execute_1_2(const V1_0::Request & request,V1_2::MeasureTiming measure,const sp<V1_2::IExecutionCallback> & callback)639 hardware::Return<V1_0::ErrorStatus> execute_1_2(
640 const V1_0::Request& request, V1_2::MeasureTiming measure,
641 const sp<V1_2::IExecutionCallback>& callback) override {
642 return mLatestPreparedModel->execute_1_2(request, measure, callback);
643 }
644
executeSynchronously(const V1_0::Request & request,V1_2::MeasureTiming measure,executeSynchronously_cb cb)645 hardware::Return<void> executeSynchronously(const V1_0::Request& request,
646 V1_2::MeasureTiming measure,
647 executeSynchronously_cb cb) override {
648 return mLatestPreparedModel->executeSynchronously(request, measure, cb);
649 }
650
configureExecutionBurst(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,configureExecutionBurst_cb cb)651 hardware::Return<void> configureExecutionBurst(
652 const sp<V1_2::IBurstCallback>& callback,
653 const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
654 const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
655 configureExecutionBurst_cb cb) override {
656 return mLatestPreparedModel->configureExecutionBurst(callback, requestChannel,
657 resultChannel, cb);
658 }
659
660 private:
661 const sp<V1_3::IPreparedModel> mLatestPreparedModel;
662 };
663
664 // Like TestPreparedModelLatest, but implementing 1.0
665 class TestPreparedModel10 : public V1_0::IPreparedModel {
666 public:
TestPreparedModel10(const HidlModel & model,const SampleDriver * driver,Success success)667 TestPreparedModel10(const HidlModel& model, const SampleDriver* driver, Success success)
668 : mLatestPreparedModel(new TestPreparedModelLatest(model, driver, success)) {}
669
execute(const V1_0::Request & request,const sp<V1_0::IExecutionCallback> & callback)670 hardware::Return<V1_0::ErrorStatus> execute(
671 const V1_0::Request& request, const sp<V1_0::IExecutionCallback>& callback) override {
672 return mLatestPreparedModel->execute(request, callback);
673 }
674
675 private:
676 const sp<V1_3::IPreparedModel> mLatestPreparedModel;
677 };
678
679 // Behaves like SampleDriver, except that it produces customized IPrepareModel.
680 class TestDriver13 : public SampleDriver {
681 public:
TestDriver13(const std::string & name,Success success)682 TestDriver13(const std::string& name, Success success)
683 : SampleDriver(name.c_str()), mSuccess(success) {}
684
getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb)685 hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb) override {
686 android::nn::initVLogMask();
687 V1_3::Capabilities capabilities = nn::makeCapabilities(0.75f);
688 _hidl_cb(V1_3::ErrorStatus::NONE, capabilities);
689 return hardware::Void();
690 }
691
getSupportedOperations_1_3(const HidlModel & model,getSupportedOperations_1_3_cb cb)692 hardware::Return<void> getSupportedOperations_1_3(const HidlModel& model,
693 getSupportedOperations_1_3_cb cb) override {
694 if (nn::validateModel(model)) {
695 std::vector<bool> supported(model.main.operations.size(), true);
696 cb(V1_3::ErrorStatus::NONE, supported);
697 } else {
698 cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {});
699 }
700 return hardware::Void();
701 }
702
getSupportedOperations_1_2(const V1_2::Model & model,getSupportedOperations_1_2_cb cb)703 hardware::Return<void> getSupportedOperations_1_2(const V1_2::Model& model,
704 getSupportedOperations_1_2_cb cb) override {
705 if (nn::validateModel(model)) {
706 std::vector<bool> supported(model.operations.size(), true);
707 cb(V1_0::ErrorStatus::NONE, supported);
708 } else {
709 std::vector<bool> supported;
710 cb(V1_0::ErrorStatus::INVALID_ARGUMENT, supported);
711 }
712 return hardware::Void();
713 }
714
prepareModel_1_3(const HidlModel & model,V1_1::ExecutionPreference,V1_3::Priority,const V1_3::OptionalTimePoint &,const hardware::hidl_vec<hardware::hidl_handle> &,const hardware::hidl_vec<hardware::hidl_handle> &,const nn::HalCacheToken &,const sp<V1_3::IPreparedModelCallback> & callback)715 hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
716 const HidlModel& model, V1_1::ExecutionPreference, V1_3::Priority,
717 const V1_3::OptionalTimePoint&, const hardware::hidl_vec<hardware::hidl_handle>&,
718 const hardware::hidl_vec<hardware::hidl_handle>&, const nn::HalCacheToken&,
719 const sp<V1_3::IPreparedModelCallback>& callback) override {
720 callback->notify_1_3(V1_3::ErrorStatus::NONE,
721 new TestPreparedModel13(model, this, mSuccess));
722 return V1_3::ErrorStatus::NONE;
723 }
724
prepareModel_1_2(const V1_2::Model & model,V1_1::ExecutionPreference,const hardware::hidl_vec<hardware::hidl_handle> &,const hardware::hidl_vec<hardware::hidl_handle> &,const nn::HalCacheToken &,const sp<V1_2::IPreparedModelCallback> & callback)725 hardware::Return<V1_0::ErrorStatus> prepareModel_1_2(
726 const V1_2::Model& model, V1_1::ExecutionPreference,
727 const hardware::hidl_vec<hardware::hidl_handle>&,
728 const hardware::hidl_vec<hardware::hidl_handle>&, const nn::HalCacheToken&,
729 const sp<V1_2::IPreparedModelCallback>& callback) override {
730 callback->notify_1_2(V1_0::ErrorStatus::NONE,
731 new TestPreparedModel12(nn::convertToV1_3(model), this, mSuccess));
732 return V1_0::ErrorStatus::NONE;
733 }
734
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference,const sp<V1_0::IPreparedModelCallback> & callback)735 hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
736 const V1_1::Model& model, V1_1::ExecutionPreference,
737 const sp<V1_0::IPreparedModelCallback>& callback) override {
738 callback->notify(V1_0::ErrorStatus::NONE,
739 new TestPreparedModel10(nn::convertToV1_3(model), this, mSuccess));
740 return V1_0::ErrorStatus::NONE;
741 }
742
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & callback)743 hardware::Return<V1_0::ErrorStatus> prepareModel(
744 const V1_0::Model& model, const sp<V1_0::IPreparedModelCallback>& callback) override {
745 return prepareModel_1_1(nn::convertToV1_1(model),
746 V1_1::ExecutionPreference::FAST_SINGLE_ANSWER, callback);
747 }
748
749 private:
750 Success mSuccess;
751 };
752
753 // Like TestDriver, but implementing 1.1
754 class TestDriver11 : public V1_1::IDevice {
755 public:
TestDriver11(const std::string & name,Success success)756 TestDriver11(const std::string& name, Success success)
757 : mLatestDriver(new TestDriver13(name, success)) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)758 hardware::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
759 return mLatestDriver->getCapabilities_1_1(_hidl_cb);
760 }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)761 hardware::Return<void> getSupportedOperations_1_1(
762 const V1_1::Model& model, getSupportedOperations_1_1_cb _hidl_cb) override {
763 return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
764 }
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)765 hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
766 const V1_1::Model& model, V1_1::ExecutionPreference preference,
767 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
768 return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
769 }
getStatus()770 hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)771 hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
772 return mLatestDriver->getCapabilities(_hidl_cb);
773 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)774 hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
775 getSupportedOperations_cb _hidl_cb) override {
776 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
777 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)778 hardware::Return<V1_0::ErrorStatus> prepareModel(
779 const V1_0::Model& model,
780 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
781 return mLatestDriver->prepareModel(model, actualCallback);
782 }
783
784 private:
785 const sp<V1_3::IDevice> mLatestDriver;
786 };
787
788 } // namespace test_drivers
789
790 /*-- End test drivers -------------------------------------------------------------------------*/
791
792 /*-- Begin timing tests -------------------------------------------------------------------------*/
793
794 namespace timing_tests {
795
796 using namespace test_drivers;
797
798 enum class DriverKind {
799 CPU,
800 OLD, // too old to support timing (1.1 or earlier)
801 NEW // new enough to support timing (1.2 or later)
802 };
803
operator <<(std::ostream & os,DriverKind kind)804 std::ostream& operator<<(std::ostream& os, DriverKind kind) {
805 const char* names[] = {"CPU", "OLD", "NEW"};
806 const uint32_t index = static_cast<uint32_t>(kind);
807 CHECK(index < std::size(names));
808 return os << names[index];
809 }
810
811 enum class Compute { ASYNC, SYNC, BURST, FENCED };
812
operator <<(std::ostream & os,Compute compute)813 std::ostream& operator<<(std::ostream& os, Compute compute) {
814 const char* names[] = {"ASYNC", "SYNC", "BURST", "FENCED"};
815 const uint32_t index = static_cast<uint32_t>(compute);
816 CHECK(index < std::size(names));
817 return os << names[index];
818 }
819
820 class TimingTest : public IntrospectionControlTest,
821 public ::testing::WithParamInterface<std::tuple<DriverKind, Success, Compute>> {
822 public:
TimingTest()823 TimingTest()
824 : kDriverKind(std::get<0>(GetParam())),
825 kSuccess(std::get<1>(GetParam())),
826 kCompute(std::get<2>(GetParam())) {}
827
828 protected:
829 const DriverKind kDriverKind;
830 const Success kSuccess;
831 const Compute kCompute;
832 };
833
TEST_P(TimingTest,Test)834 TEST_P(TimingTest, Test) {
835 // There's no straightforward way to force CPU execution to fail.
836 ASSERT_EQ(kDriverKind == DriverKind::CPU, kSuccess == Success::PASS_CPU);
837
838 // FAIL_WAIT only makes sense for ASYNC and FENCED.
839 ASSERT_TRUE(kCompute == Compute::ASYNC || kCompute == Compute::FENCED ||
840 kSuccess != Success::FAIL_WAIT);
841
842 if (DeviceManager::get()->getUseCpuOnly() != (kDriverKind == DriverKind::CPU)) {
843 // We don't have an elegant way to request the CPU driver. Therefore,
844 // we rely on our test framework to make the choice between CPU and
845 // non-CPU.
846 GTEST_SKIP();
847 }
848
849 createSimpleAddModel(&mModel);
850
851 switch (kDriverKind) {
852 case DriverKind::CPU: {
853 // There should be only one driver -- the CPU
854 const std::string& name = DeviceManager::get()->getDrivers()[0]->getName();
855 ASSERT_TRUE(selectDeviceByName(name));
856 break;
857 }
858 case DriverKind::OLD: {
859 static const char name[] = "old";
860 DeviceManager::get()->forTest_registerDevice(
861 nn::makeSharedDevice(name, new TestDriver11(name, kSuccess)));
862 ASSERT_TRUE(selectDeviceByName(name));
863 break;
864 }
865 case DriverKind::NEW: {
866 static const char name[] = "new";
867 DeviceManager::get()->forTest_registerDevice(
868 nn::makeSharedDevice(name, new TestDriver13(name, kSuccess)));
869 ASSERT_TRUE(selectDeviceByName(name));
870 break;
871 }
872 default:
873 FAIL() << "Unexpected DriverKind";
874 }
875
876 EXPECT_EQ(prepareForExecution(true /*measureTiming*/), ANEURALNETWORKS_NO_ERROR);
877
878 float input1[2] = {1.0f, 2.0f};
879 float input2[2] = {3.0f, 4.0f};
880 float output[2];
881 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
882 ANEURALNETWORKS_NO_ERROR);
883 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
884 ANEURALNETWORKS_NO_ERROR);
885 EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
886 ANEURALNETWORKS_NO_ERROR);
887 EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
888 ANEURALNETWORKS_NO_ERROR);
889
890 auto Check = [](bool expectPass, int result) {
891 if (expectPass) {
892 ASSERT_EQ(result, ANEURALNETWORKS_NO_ERROR);
893 } else {
894 ASSERT_NE(result, ANEURALNETWORKS_NO_ERROR);
895 }
896 };
897
898 const bool isPass = hasBit(kSuccess, Success::PASS_BIT);
899 const int expectedGetDurationResultCode =
900 isPass ? ANEURALNETWORKS_NO_ERROR : ANEURALNETWORKS_BAD_STATE;
901
902 const auto getDurationWhileRunning = [this] {
903 if (kDriverKind == DriverKind::CPU) {
904 // Testing DriverKind::CPU would require modifying the CPU execution
905 // path to control execution completion, similarly to how this test
906 // case does with TestPreparedModel::dummyExecution(). This does not
907 // seem worthwhile -- it's intrusive into the runtime code solely
908 // for the sake of testing, and we do not expect that the code paths
909 // needed to ensure correct behavior of
910 // ANeuralNetworksExecution_getDuration() on a running execution
911 // would be any different for CPU than for actual drivers.
912 return;
913 }
914 TestPreparedModelLatest::waitForExecutionToBegin();
915 for (int durationCode :
916 std::vector{ANEURALNETWORKS_DURATION_ON_HARDWARE, ANEURALNETWORKS_DURATION_IN_DRIVER,
917 ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE,
918 ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER}) {
919 uint64_t time;
920 // Cannot query duration while execution is running
921 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, durationCode, &time),
922 ANEURALNETWORKS_BAD_STATE);
923 }
924 };
925
926 switch (kCompute) {
927 case Compute::ASYNC: {
928 // Ideally what we'd like to do here is
929 //
930 // Check(kSuccess != Success::FAIL_LAUNCH,
931 // ANeuralNetworksExecution_startCompute(mExecution, &mEvent));
932 // Check(isPass, ANeuralNetworksEvent_wait(mEvent));
933 //
934 // However, in the current implementation of the runtime, a launch
935 // failure at the HAL level does not show up as a launch failure at
936 // the NDK level ("startCompute"): The NNAPI runtime does not call a
937 // driver until it (the runtime) begins execution, so a launch
938 // failure at the HAL level looks like an execution failure at the
939 // NDK level ("wait").
940 SCOPED_TRACE("ASYNC startCompute");
941 TestPreparedModelLatest::pauseExecutions(true);
942 Check(true, // rather than kSuccess != Success::FAIL_LAUNCH
943 ANeuralNetworksExecution_startCompute(mExecution, &mEvent));
944 getDurationWhileRunning();
945 TestPreparedModelLatest::pauseExecutions(false);
946 SCOPED_TRACE("ASYNC wait");
947 Check(isPass, ANeuralNetworksEvent_wait(mEvent));
948 break;
949 }
950 case Compute::SYNC: {
951 SCOPED_TRACE("SYNC");
952 TestPreparedModelLatest::pauseExecutions(true);
953 std::thread run([this, Check, isPass] {
954 Check(isPass, ANeuralNetworksExecution_compute(mExecution));
955 });
956 getDurationWhileRunning();
957 TestPreparedModelLatest::pauseExecutions(false);
958 run.join();
959 break;
960 }
961 case Compute::BURST: {
962 SCOPED_TRACE("BURST");
963 ANeuralNetworksBurst* burst;
964 ASSERT_EQ(ANeuralNetworksBurst_create(mCompilation, &burst), ANEURALNETWORKS_NO_ERROR);
965 TestPreparedModelLatest::pauseExecutions(true);
966 std::thread run([this, Check, isPass, burst] {
967 Check(isPass, ANeuralNetworksExecution_burstCompute(mExecution, burst));
968 });
969 getDurationWhileRunning();
970 TestPreparedModelLatest::pauseExecutions(false);
971 run.join();
972 ANeuralNetworksBurst_free(burst);
973 break;
974 }
975 case Compute::FENCED: {
976 SCOPED_TRACE("FENCED startComputeWithDependencies");
977 TestPreparedModelLatest::pauseExecutions(true);
978
979 // Note, due to the limitation of SampleDriver implementation, the call is synchronous.
980 // If the SampleDriver is updated to return real sync fence, this must be updated.
981 std::thread run([this, Check, isPass] {
982 Check(isPass, ANeuralNetworksExecution_startComputeWithDependencies(
983 mExecution, nullptr, 0, 0, &mEvent));
984 });
985 getDurationWhileRunning();
986 TestPreparedModelLatest::pauseExecutions(false);
987 run.join();
988 SCOPED_TRACE("FENCED wait");
989 Check(isPass, ANeuralNetworksEvent_wait(mEvent));
990 break;
991 }
992 default:
993 FAIL() << "unreachable";
994 }
995
996 uint64_t timeOnHardware, timeInDriver, timeOnHardwareFenced, timeInDriverFenced;
997 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_ON_HARDWARE,
998 &timeOnHardware),
999 expectedGetDurationResultCode);
1000 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_IN_DRIVER,
1001 &timeInDriver),
1002 expectedGetDurationResultCode);
1003 EXPECT_EQ(
1004 ANeuralNetworksExecution_getDuration(
1005 mExecution, ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE, &timeOnHardwareFenced),
1006 expectedGetDurationResultCode);
1007 EXPECT_EQ(ANeuralNetworksExecution_getDuration(
1008 mExecution, ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER, &timeInDriverFenced),
1009 expectedGetDurationResultCode);
1010 switch (kDriverKind) {
1011 case DriverKind::CPU: {
1012 // TODO: Should we require timing to be reported as 0?
1013 EXPECT_TRUE(timeOnHardware == 0 || timeOnHardware == UINT64_MAX)
1014 << "timeOnHardware = " << timeOnHardware;
1015 EXPECT_TRUE(timeInDriver == 0 || timeInDriver == UINT64_MAX)
1016 << "timeInDriver = " << timeOnHardware;
1017 EXPECT_TRUE(timeOnHardwareFenced == 0 || timeOnHardwareFenced == UINT64_MAX)
1018 << "timeOnHardwareFenced = " << timeOnHardwareFenced;
1019 EXPECT_TRUE(timeInDriverFenced == 0 || timeInDriverFenced == UINT64_MAX)
1020 << "timeInDriver = " << timeInDriverFenced;
1021 break;
1022 }
1023 case DriverKind::OLD: {
1024 EXPECT_EQ(timeOnHardware, UINT64_MAX);
1025 EXPECT_EQ(timeInDriver, UINT64_MAX);
1026 EXPECT_EQ(timeOnHardwareFenced, UINT64_MAX);
1027 EXPECT_EQ(timeInDriverFenced, UINT64_MAX);
1028 break;
1029 }
1030 case DriverKind::NEW: {
1031 auto microsToNanos = [](uint64_t micros) {
1032 constexpr uint64_t kNanosPerMicro = 1000;
1033 return micros == UINT64_MAX ? UINT64_MAX : kNanosPerMicro * micros;
1034 };
1035 auto expectedTiming = getExpectedTiming(kSuccess, kCompute == Compute::FENCED);
1036 EXPECT_EQ(timeOnHardware, microsToNanos(expectedTiming.first.timeOnDevice));
1037 EXPECT_EQ(timeInDriver, microsToNanos(expectedTiming.first.timeInDriver));
1038 EXPECT_EQ(timeOnHardwareFenced, microsToNanos(expectedTiming.second.timeOnDevice));
1039 EXPECT_EQ(timeInDriverFenced, microsToNanos(expectedTiming.second.timeInDriver));
1040 break;
1041 }
1042 default:
1043 FAIL() << "unreachable";
1044 }
1045 if (kCompute != Compute::FENCED) {
1046 EXPECT_EQ(timeOnHardware, timeOnHardwareFenced);
1047 EXPECT_EQ(timeInDriver, timeInDriverFenced);
1048 }
1049 auto expectTimingLe = [](uint64_t a, const char* aName, uint64_t b, const char* bName) {
1050 if (a != UINT64_MAX && b != UINT64_MAX) {
1051 EXPECT_LE(a, b) << aName << " exceeds " << bName;
1052 }
1053 };
1054 #define EXPECT_TIMING_LE(a, b) expectTimingLe(a, #a, b, #b)
1055 EXPECT_TIMING_LE(timeOnHardware, timeInDriver);
1056 EXPECT_TIMING_LE(timeOnHardwareFenced, timeInDriverFenced);
1057
1058 EXPECT_TIMING_LE(timeOnHardwareFenced, timeOnHardware);
1059 EXPECT_TIMING_LE(timeInDriverFenced, timeInDriver);
1060 #undef EXPECT_TIMING_LE
1061 }
1062
1063 auto kTimingTestUnfencedValues = ::testing::Values(
1064 // NOTE: We cannot force CPU execution to fail
1065 std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::ASYNC),
1066 std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::SYNC),
1067 std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::BURST),
1068
1069 // NOTE: OLD driver does not provide timing
1070 std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::ASYNC),
1071 std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::SYNC),
1072 std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::BURST),
1073
1074 std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::ASYNC),
1075 std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::SYNC),
1076 std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::BURST),
1077
1078 // NOTE: Only ASYNC is paired with a wait
1079 std::make_tuple(DriverKind::OLD, Success::FAIL_WAIT, Compute::ASYNC),
1080
1081 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::ASYNC),
1082 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::SYNC),
1083 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::BURST),
1084
1085 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::ASYNC),
1086 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::SYNC),
1087 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::BURST),
1088
1089 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::ASYNC),
1090 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::SYNC),
1091 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::BURST),
1092
1093 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::ASYNC),
1094 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::SYNC),
1095 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::BURST),
1096
1097 std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::ASYNC),
1098 std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::SYNC),
1099 std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::BURST),
1100
1101 // NOTE: Only ASYNC is paired with a wait
1102 std::make_tuple(DriverKind::NEW, Success::FAIL_WAIT, Compute::ASYNC));
1103
1104 auto kTimingTestFencedValues = ::testing::Values(
1105 // NOTE: We cannot force CPU execution to fail
1106 std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::FENCED),
1107
1108 // NOTE: OLD driver does not provide timing
1109 std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::FENCED),
1110
1111 std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::FENCED),
1112
1113 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::FENCED),
1114 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::FENCED),
1115 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::FENCED),
1116 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::FENCED),
1117 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_DEVICE, Compute::FENCED),
1118 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_DRIVER, Compute::FENCED),
1119 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_BOTH, Compute::FENCED),
1120 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_DEVICE, Compute::FENCED),
1121 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_DRIVER, Compute::FENCED),
1122 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_BOTH, Compute::FENCED),
1123 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_DEVICE, Compute::FENCED),
1124 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_DRIVER, Compute::FENCED),
1125 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_BOTH, Compute::FENCED),
1126 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_DEVICE, Compute::FENCED),
1127 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_DRIVER, Compute::FENCED),
1128 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_BOTH, Compute::FENCED),
1129
1130 std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::FENCED));
1131
1132 INSTANTIATE_TEST_SUITE_P(Unfenced, TimingTest, kTimingTestUnfencedValues);
1133 INSTANTIATE_TEST_SUITE_P(Fenced, TimingTest, kTimingTestFencedValues);
1134
1135 } // namespace timing_tests
1136
1137 /*-- End timing tests -------------------------------------------------------------------------*/
1138
1139 const float kSimpleCeiling = 2.0f;
1140
createAddMaxModel(WrapperModel * model,bool reverseOrder)1141 void createAddMaxModel(WrapperModel* model, bool reverseOrder) {
1142 WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
1143 WrapperOperandType type1(WrapperType::INT32, {});
1144 // Phase 1, operands
1145 auto op1 = model->addOperand(&type0);
1146 auto op2 = model->addOperand(&type0);
1147 auto act = model->addOperand(&type1);
1148 auto op3 = model->addOperand(&type0);
1149 auto op4 = model->addOperand(&type0);
1150 auto op5 = model->addOperand(&type0);
1151 // Phase 2, operations
1152 static int32_t act_init[] = {0};
1153 model->setOperandValue(act, act_init, sizeof(act_init));
1154 static float ceiling[] = {kSimpleCeiling, kSimpleCeiling};
1155 model->setOperandValue(op4, ceiling, sizeof(ceiling));
1156 if (reverseOrder) {
1157 // In this case, add MAXIMUM first, but the execution order is still ADD -> MAXIMUM.
1158 model->addOperation(ANEURALNETWORKS_MAXIMUM, {op3, op4}, {op5});
1159 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1160 } else {
1161 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1162 model->addOperation(ANEURALNETWORKS_MAXIMUM, {op3, op4}, {op5});
1163 }
1164 // Phase 3, inputs and outputs
1165 model->identifyInputsAndOutputs({op1, op2}, {op5});
1166 model->finish();
1167 ASSERT_TRUE(model->isValid());
1168 }
1169
TEST_F(IntrospectionControlTest,SlicingAddMax)1170 TEST_F(IntrospectionControlTest, SlicingAddMax) {
1171 // This is needed before we have the CPU fallback path being treated as a Device.
1172 if (DeviceManager::get()->getUseCpuOnly()) {
1173 GTEST_SKIP();
1174 }
1175
1176 using namespace test_drivers;
1177
1178 static const char name[] = "driver11";
1179 DeviceManager::get()->forTest_registerDevice(
1180 nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1181 ASSERT_TRUE(selectDeviceByName(name));
1182
1183 createAddMaxModel(&mModel, false);
1184 EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1185 }
1186
TEST_F(IntrospectionControlTest,SlicingMaxAdd)1187 TEST_F(IntrospectionControlTest, SlicingMaxAdd) {
1188 // This is needed before we have the CPU fallback path being treated as a Device.
1189 if (DeviceManager::get()->getUseCpuOnly()) {
1190 GTEST_SKIP();
1191 }
1192
1193 using namespace test_drivers;
1194
1195 static const char name[] = "driver11";
1196 DeviceManager::get()->forTest_registerDevice(
1197 nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1198 ASSERT_TRUE(selectDeviceByName(name));
1199
1200 createAddMaxModel(&mModel, true);
1201 EXPECT_TRUE(isSupportedOpListExpected({false, true}));
1202 }
1203
1204 const float kSimpleMultiplier = 2.0f;
1205
createAddMulModel(WrapperModel * model,bool reverseOrder)1206 void createAddMulModel(WrapperModel* model, bool reverseOrder) {
1207 WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
1208 WrapperOperandType type1(WrapperType::INT32, {});
1209 // Phase 1, operands
1210 auto op1 = model->addOperand(&type0);
1211 auto op2 = model->addOperand(&type0);
1212 auto act = model->addOperand(&type1);
1213 auto op3 = model->addOperand(&type0);
1214 auto op4 = model->addOperand(&type0);
1215 auto op5 = model->addOperand(&type0);
1216 // Phase 2, operations
1217 static int32_t act_init[] = {0};
1218 model->setOperandValue(act, act_init, sizeof(act_init));
1219 static float multiplier[] = {kSimpleMultiplier, kSimpleMultiplier};
1220 model->setOperandValue(op4, multiplier, sizeof(multiplier));
1221 if (reverseOrder) {
1222 // In this case, add MUL first, but the execution order is still ADD -> MUL.
1223 model->addOperation(ANEURALNETWORKS_MUL, {op3, op4, act}, {op5});
1224 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1225 } else {
1226 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1227 model->addOperation(ANEURALNETWORKS_MUL, {op3, op4, act}, {op5});
1228 }
1229 // Phase 3, inputs and outputs
1230 model->identifyInputsAndOutputs({op1, op2}, {op5});
1231 model->finish();
1232 ASSERT_TRUE(model->isValid());
1233 }
1234
TEST_F(IntrospectionControlTest,SlicingFullySupported)1235 TEST_F(IntrospectionControlTest, SlicingFullySupported) {
1236 // This is needed before we have the CPU fallback path being treated as a Device.
1237 if (DeviceManager::get()->getUseCpuOnly()) {
1238 GTEST_SKIP();
1239 }
1240
1241 using namespace test_drivers;
1242
1243 static const char name[] = "driver11";
1244 DeviceManager::get()->forTest_registerDevice(
1245 nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1246 ASSERT_TRUE(selectDeviceByName(name));
1247
1248 createAddMulModel(&mModel, false);
1249 EXPECT_TRUE(isSupportedOpListExpected({true, true}));
1250 }
1251
createCondModel(WrapperModel * model,bool dynamicRank)1252 void createCondModel(WrapperModel* model, bool dynamicRank) {
1253 const auto dimensions = dynamicRank ? std::vector<uint32_t>{} : std::vector<uint32_t>{1};
1254 WrapperOperandType floatType(WrapperType::TENSOR_FLOAT32, dimensions);
1255 WrapperOperandType boolType(WrapperType::TENSOR_BOOL8, {1});
1256 // Phase 1, operands
1257 auto op1 = model->addOperand(&floatType);
1258 auto op2 = model->addOperand(&boolType);
1259 // Phase 2, operations
1260 model->addOperation(ANEURALNETWORKS_LESS, {op1, op1}, {op2});
1261 // Phase 3, inputs and outputs
1262 model->identifyInputsAndOutputs({op1}, {op2});
1263 model->finish();
1264 }
1265
addReluOperation(WrapperModel * model,std::vector<uint32_t> * modelInputIndexes,std::vector<uint32_t> * modelOutputIndexes,bool dynamicRank)1266 void addReluOperation(WrapperModel* model, std::vector<uint32_t>* modelInputIndexes,
1267 std::vector<uint32_t>* modelOutputIndexes, bool dynamicRank) {
1268 const auto dimensions = dynamicRank ? std::vector<uint32_t>{} : std::vector<uint32_t>{1};
1269 WrapperOperandType type(WrapperType::TENSOR_FLOAT32, dimensions);
1270 // Phase 1, operands
1271 auto op1 = model->addOperand(&type);
1272 auto op2 = model->addOperand(&type);
1273 // Phase 2, operations
1274 model->addOperation(ANEURALNETWORKS_RELU, {op1}, {op2});
1275 // Phase 3, inputs and outputs
1276 modelInputIndexes->push_back(op1);
1277 modelOutputIndexes->push_back(op2);
1278 }
1279
createReluModel(WrapperModel * model,bool dynamicRank)1280 void createReluModel(WrapperModel* model, bool dynamicRank) {
1281 std::vector<uint32_t> modelInputIndexes, modelOutputIndexes;
1282 addReluOperation(model, &modelInputIndexes, &modelOutputIndexes, dynamicRank);
1283 model->identifyInputsAndOutputs(modelInputIndexes, modelOutputIndexes);
1284 model->finish();
1285 }
1286
addWhileOperation(std::vector<WrapperModel> * extraModels,WrapperModel * mainModel,std::vector<uint32_t> * modelInputIndexes,std::vector<uint32_t> * modelOutputIndexes,bool dynamicRank)1287 void addWhileOperation(std::vector<WrapperModel>* extraModels, WrapperModel* mainModel,
1288 std::vector<uint32_t>* modelInputIndexes,
1289 std::vector<uint32_t>* modelOutputIndexes, bool dynamicRank) {
1290 const auto dimensions = dynamicRank ? std::vector<uint32_t>{} : std::vector<uint32_t>{1};
1291 WrapperOperandType floatType(WrapperType::TENSOR_FLOAT32, dimensions);
1292 WrapperOperandType modelType(WrapperType::MODEL, {});
1293
1294 extraModels->emplace_back();
1295 extraModels->emplace_back();
1296 WrapperModel* condModel = &extraModels->at(extraModels->size() - 2);
1297 WrapperModel* bodyModel = &extraModels->at(extraModels->size() - 1);
1298 createCondModel(condModel, dynamicRank);
1299 createReluModel(bodyModel, dynamicRank);
1300 ASSERT_TRUE(condModel->isValid());
1301 ASSERT_TRUE(bodyModel->isValid());
1302
1303 // Phase 1, operands
1304 const uint32_t op1 = mainModel->addOperand(&modelType);
1305 const uint32_t op2 = mainModel->addOperand(&modelType);
1306 const uint32_t op3 = mainModel->addOperand(&floatType);
1307 const uint32_t op4 = mainModel->addOperand(&floatType);
1308 mainModel->setOperandValueFromModel(op1, condModel);
1309 mainModel->setOperandValueFromModel(op2, bodyModel);
1310 // Phase 2, operations
1311 mainModel->addOperation(ANEURALNETWORKS_WHILE, {op1, op2, op3}, {op4});
1312 // Phase 3, inputs and outputs
1313 modelInputIndexes->push_back(op3);
1314 modelOutputIndexes->push_back(op4);
1315 }
1316
createReluStaticWhileModel(std::vector<WrapperModel> * extraModels,WrapperModel * mainModel)1317 void createReluStaticWhileModel(std::vector<WrapperModel>* extraModels, WrapperModel* mainModel) {
1318 std::vector<uint32_t> modelInputIndexes, modelOutputIndexes;
1319
1320 // Operation supported in Android API level 27
1321 addReluOperation(mainModel, &modelInputIndexes, &modelOutputIndexes, /*dynamicRank=*/false);
1322 // Operation supported in Android API level 30
1323 addWhileOperation(extraModels, mainModel, &modelInputIndexes, &modelOutputIndexes,
1324 /*dynamicRank=*/false);
1325
1326 mainModel->identifyInputsAndOutputs(modelInputIndexes, modelOutputIndexes);
1327 mainModel->finish();
1328 ASSERT_TRUE(mainModel->isValid());
1329 }
1330
TEST_F(IntrospectionControlTest,ControlFlowNotSupported)1331 TEST_F(IntrospectionControlTest, ControlFlowNotSupported) {
1332 // This is needed before we have the CPU fallback path being treated as a Device.
1333 if (DeviceManager::get()->getUseCpuOnly()) {
1334 GTEST_SKIP();
1335 }
1336
1337 using namespace test_drivers;
1338
1339 static const char name[] = "driver11";
1340 DeviceManager::get()->forTest_registerDevice(
1341 nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1342 ASSERT_TRUE(selectDeviceByName(name));
1343
1344 std::vector<WrapperModel> extraModels;
1345 createReluStaticWhileModel(&extraModels, &mModel);
1346 EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1347
1348 // Clear mModel early because it may reference `extraModels`.
1349 mModel = WrapperModel{};
1350 }
1351
TEST_F(IntrospectionControlTest,ControlFlowSupported)1352 TEST_F(IntrospectionControlTest, ControlFlowSupported) {
1353 // This is needed before we have the CPU fallback path being treated as a Device.
1354 if (DeviceManager::get()->getUseCpuOnly()) {
1355 GTEST_SKIP();
1356 }
1357
1358 using namespace test_drivers;
1359
1360 static const char name[] = "driver13";
1361 DeviceManager::get()->forTest_registerDevice(
1362 nn::makeSharedDevice(name, new TestDriver13(name, Success::PASS_BOTH)));
1363 ASSERT_TRUE(selectDeviceByName(name));
1364
1365 std::vector<WrapperModel> extraModels;
1366 createReluStaticWhileModel(&extraModels, &mModel);
1367 EXPECT_TRUE(isSupportedOpListExpected({true, true}));
1368
1369 // Clear mModel early because it may reference `extraModels`.
1370 mModel = WrapperModel{};
1371 }
1372
createStaticWhileDynamicWhileModel(std::vector<WrapperModel> * extraModels,WrapperModel * mainModel)1373 void createStaticWhileDynamicWhileModel(std::vector<WrapperModel>* extraModels,
1374 WrapperModel* mainModel) {
1375 std::vector<uint32_t> modelInputIndexes, modelOutputIndexes;
1376
1377 // Operation supported in Android API level 30
1378 addWhileOperation(extraModels, mainModel, &modelInputIndexes, &modelOutputIndexes,
1379 /*dynamicRank=*/false);
1380 // Operation supported only by NNAPI runtime
1381 addWhileOperation(extraModels, mainModel, &modelInputIndexes, &modelOutputIndexes,
1382 /*dynamicRank=*/true);
1383
1384 mainModel->identifyInputsAndOutputs(modelInputIndexes, modelOutputIndexes);
1385 mainModel->finish();
1386 ASSERT_TRUE(mainModel->isValid());
1387 }
1388
TEST_F(IntrospectionControlTest,ControlFlowFailedToSlice)1389 TEST_F(IntrospectionControlTest, ControlFlowFailedToSlice) {
1390 // This is needed before we have the CPU fallback path being treated as a Device.
1391 if (DeviceManager::get()->getUseCpuOnly()) {
1392 GTEST_SKIP();
1393 }
1394
1395 using namespace test_drivers;
1396
1397 static const char name[] = "driver13";
1398 DeviceManager::get()->forTest_registerDevice(
1399 nn::makeSharedDevice(name, new TestDriver13(name, Success::PASS_BOTH)));
1400 ASSERT_TRUE(selectDeviceByName(name));
1401
1402 std::vector<WrapperModel> extraModels;
1403 createStaticWhileDynamicWhileModel(&extraModels, &mModel);
1404 EXPECT_TRUE(isSupportedOpListExpected({false, false}));
1405
1406 // Clear mModel early because it may reference `extraModels`.
1407 mModel = WrapperModel{};
1408 }
1409
1410 // TODO(miaowang): add a test to make sure ANNCompilation_create() has CPU
1411 // fallback.
1412 // This test verifies that a device that could only handle ADD would correctly report that an
1413 // ADD->MUL model could not be fully supported.
TEST_F(IntrospectionControlTest,PartialModelNotSupported)1414 TEST_F(IntrospectionControlTest, PartialModelNotSupported) {
1415 // This is needed before we have the CPU fallback path being treated as a Device.
1416 if (DeviceManager::get()->getUseCpuOnly()) {
1417 GTEST_SKIP();
1418 }
1419
1420 createAddMulModel(&mModel, false);
1421
1422 std::string addOnlyDriver = "test-onlyAdd";
1423 std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1424 addOnlyOp[ANEURALNETWORKS_ADD] = true;
1425
1426 registerDevices({{addOnlyDriver, 0.9, addOnlyOp}});
1427
1428 EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1429 EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1430
1431 ANeuralNetworksModel* modelHandle = mModel.getHandle();
1432 EXPECT_EQ(ANeuralNetworksCompilation_createForDevices(modelHandle, mDevices.data(),
1433 mDevices.size(), &mCompilation),
1434 ANEURALNETWORKS_NO_ERROR);
1435 // The compilation must fail as there is no fallback when using
1436 // Introspection API.
1437 EXPECT_NE(ANeuralNetworksCompilation_finish(mCompilation), ANEURALNETWORKS_NO_ERROR);
1438 }
1439
1440 // This test verifies that a device that could only handle ADD would correctly report that an
1441 // ADD->MUL model could not be fully supported. Also verifies that the indices of returned
1442 // supported op list correctly map to the order of operations being added by the user.
TEST_F(IntrospectionControlTest,PartialModelNotSupportedOrder)1443 TEST_F(IntrospectionControlTest, PartialModelNotSupportedOrder) {
1444 // This is needed before we have the CPU fallback path being treated as a Device.
1445 if (DeviceManager::get()->getUseCpuOnly()) {
1446 GTEST_SKIP();
1447 }
1448
1449 createAddMulModel(&mModel, true);
1450
1451 std::string addOnlyDriver = "test-onlyAdd";
1452 std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1453 addOnlyOp[ANEURALNETWORKS_ADD] = true;
1454
1455 registerDevices({{addOnlyDriver, 0.9, addOnlyOp}});
1456
1457 EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1458 EXPECT_TRUE(isSupportedOpListExpected({false, true}));
1459 }
1460
1461 // TODO(miaowang): update the test to make sure the model is actually running on the test devices.
1462 // This test verifies that an ADD->MUL model is able to run on two selected devices that together
1463 // can handle all operations.
TEST_F(IntrospectionControlTest,ModelNeedTwoDevices)1464 TEST_F(IntrospectionControlTest, ModelNeedTwoDevices) {
1465 // This is needed before we have the CPU fallback path being treated as a Device.
1466 if (DeviceManager::get()->getUseCpuOnly()) {
1467 GTEST_SKIP();
1468 }
1469
1470 createAddMulModel(&mModel, false);
1471
1472 std::string addOnlyDriver = "test-onlyAdd";
1473 std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1474 addOnlyOp[ANEURALNETWORKS_ADD] = true;
1475
1476 std::string mulOnlyDriver = "test-onlyMul";
1477 std::vector<bool> mulOnlyOp(android::nn::kNumberOfOperationTypes, false);
1478 mulOnlyOp[ANEURALNETWORKS_MUL] = true;
1479
1480 registerDevices({
1481 {addOnlyDriver, 0.9, addOnlyOp},
1482 {mulOnlyDriver, 0.9, mulOnlyOp},
1483 });
1484
1485 EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1486 EXPECT_TRUE(selectDeviceByName(mulOnlyDriver));
1487 EXPECT_TRUE(isSupportedOpListExpected({true, true}));
1488 EXPECT_EQ(prepareForExecution(), ANEURALNETWORKS_NO_ERROR);
1489
1490 float input1[2] = {1.0f, 2.0f};
1491 float input2[2] = {3.0f, 4.0f};
1492 float output[2];
1493 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
1494 ANEURALNETWORKS_NO_ERROR);
1495 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
1496 ANEURALNETWORKS_NO_ERROR);
1497 EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
1498 ANEURALNETWORKS_NO_ERROR);
1499
1500 EXPECT_EQ(ANeuralNetworksExecution_startCompute(mExecution, &mEvent), ANEURALNETWORKS_NO_ERROR);
1501 EXPECT_EQ(ANeuralNetworksEvent_wait(mEvent), ANEURALNETWORKS_NO_ERROR);
1502 EXPECT_EQ(output[0], kSimpleMultiplier * (input1[0] + input2[0]));
1503 EXPECT_EQ(output[1], kSimpleMultiplier * (input1[1] + input2[1]));
1504 }
1505 } // namespace
1506
1507 #if defined(NN_DEBUGGABLE) && !defined(NNTEST_ONLY_PUBLIC_API)
1508
1509 void forTest_setRuntimeFeatureLevel(int64_t level); // defined in NeuralNetworks.cpp
1510
1511 namespace {
1512 class WhiteboxFeatureLevelTest : public IntrospectionControlTest {
1513 protected:
TearDown()1514 void TearDown() override {
1515 forTest_setRuntimeFeatureLevel(0);
1516 IntrospectionControlTest::TearDown();
1517 }
1518
1519 public:
1520 enum DeviceLevel { V1_1, V1_3 };
1521 void trial(int64_t setRuntimeFeatureLevel, DeviceLevel deviceLevel,
1522 int64_t expectDeviceFeatureLevel);
1523 };
1524
trial(int64_t setRuntimeFeatureLevel,DeviceLevel deviceLevel,int64_t expectDeviceFeatureLevel)1525 void WhiteboxFeatureLevelTest::trial(int64_t setRuntimeFeatureLevel, DeviceLevel deviceLevel,
1526 int64_t expectDeviceFeatureLevel) {
1527 // This is needed before we have the CPU fallback path being treated as a Device.
1528 if (DeviceManager::get()->getUseCpuOnly()) {
1529 GTEST_SKIP();
1530 }
1531
1532 using namespace test_drivers;
1533
1534 forTest_setRuntimeFeatureLevel(setRuntimeFeatureLevel);
1535
1536 static const char deviceName[] = "trial";
1537 auto newTestDriver = [deviceLevel]() -> V1_0::IDevice* {
1538 switch (deviceLevel) {
1539 case DeviceLevel::V1_1:
1540 return new TestDriver11(deviceName, Success::PASS_BOTH_BOTH);
1541 case DeviceLevel::V1_3:
1542 return new TestDriver13(deviceName, Success::PASS_BOTH_BOTH);
1543 default:
1544 assert(!"Unrecognized deviceLevel");
1545 return nullptr;
1546 }
1547 };
1548 DeviceManager::get()->forTest_registerDevice(nn::makeSharedDevice(deviceName, newTestDriver()));
1549
1550 ASSERT_TRUE(selectDeviceByName(deviceName));
1551 int64_t deviceFeatureLevel;
1552 ASSERT_EQ(mDevices.size(), size_t(1));
1553 ASSERT_EQ(ANeuralNetworksDevice_getFeatureLevel(mDevices.front(), &deviceFeatureLevel),
1554 ANEURALNETWORKS_NO_ERROR);
1555 ASSERT_EQ(deviceFeatureLevel, expectDeviceFeatureLevel);
1556 }
1557
TEST_F(WhiteboxFeatureLevelTest,Default_V1_1)1558 TEST_F(WhiteboxFeatureLevelTest, Default_V1_1) {
1559 trial(0, DeviceLevel::V1_1, ANEURALNETWORKS_FEATURE_LEVEL_2);
1560 }
1561
TEST_F(WhiteboxFeatureLevelTest,FL3_V1_1)1562 TEST_F(WhiteboxFeatureLevelTest, FL3_V1_1) {
1563 trial(ANEURALNETWORKS_FEATURE_LEVEL_3, DeviceLevel::V1_1, ANEURALNETWORKS_FEATURE_LEVEL_2);
1564 }
1565
TEST_F(WhiteboxFeatureLevelTest,FL2_V1_1)1566 TEST_F(WhiteboxFeatureLevelTest, FL2_V1_1) {
1567 trial(ANEURALNETWORKS_FEATURE_LEVEL_2, DeviceLevel::V1_1, ANEURALNETWORKS_FEATURE_LEVEL_2);
1568 }
1569
TEST_F(WhiteboxFeatureLevelTest,FL1_V1_1)1570 TEST_F(WhiteboxFeatureLevelTest, FL1_V1_1) {
1571 trial(ANEURALNETWORKS_FEATURE_LEVEL_1, DeviceLevel::V1_1, ANEURALNETWORKS_FEATURE_LEVEL_1);
1572 }
1573
TEST_F(WhiteboxFeatureLevelTest,Default_V1_3)1574 TEST_F(WhiteboxFeatureLevelTest, Default_V1_3) {
1575 trial(0, DeviceLevel::V1_3, ANEURALNETWORKS_FEATURE_LEVEL_4);
1576 }
1577
TEST_F(WhiteboxFeatureLevelTest,FL5_V1_3)1578 TEST_F(WhiteboxFeatureLevelTest, FL5_V1_3) {
1579 trial(ANEURALNETWORKS_FEATURE_LEVEL_5, DeviceLevel::V1_3, ANEURALNETWORKS_FEATURE_LEVEL_4);
1580 }
1581
TEST_F(WhiteboxFeatureLevelTest,FL4_V1_3)1582 TEST_F(WhiteboxFeatureLevelTest, FL4_V1_3) {
1583 trial(ANEURALNETWORKS_FEATURE_LEVEL_4, DeviceLevel::V1_3, ANEURALNETWORKS_FEATURE_LEVEL_4);
1584 }
1585
TEST_F(WhiteboxFeatureLevelTest,FL3_V1_3)1586 TEST_F(WhiteboxFeatureLevelTest, FL3_V1_3) {
1587 trial(ANEURALNETWORKS_FEATURE_LEVEL_3, DeviceLevel::V1_3, ANEURALNETWORKS_FEATURE_LEVEL_3);
1588 }
1589
TEST_F(WhiteboxFeatureLevelTest,FL2_V1_3)1590 TEST_F(WhiteboxFeatureLevelTest, FL2_V1_3) {
1591 trial(ANEURALNETWORKS_FEATURE_LEVEL_2, DeviceLevel::V1_3, ANEURALNETWORKS_FEATURE_LEVEL_2);
1592 }
1593 } // namespace
1594
1595 #endif // defined(NN_DEBUGGABLE) && !defined(NNTEST_ONLY_PUBLIC_API)
1596