1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <ExecutionBurstServer.h>
18 #include <HalInterfaces.h>
19 #include <SampleDriver.h>
20 #include <ValidateHal.h>
21 #include <gtest/gtest.h>
22 
23 #include <algorithm>
24 #include <cassert>
25 #include <chrono>
26 #include <iterator>
27 #include <map>
28 #include <queue>
29 #include <set>
30 #include <string>
31 #include <thread>
32 #include <tuple>
33 #include <utility>
34 #include <vector>
35 
36 #include "CompilationBuilder.h"
37 #include "HalUtils.h"
38 #include "Manager.h"
39 #include "NeuralNetworks.h"
40 #include "NeuralNetworksOEM.h"
41 #include "TestNeuralNetworksWrapper.h"
42 
43 namespace {
44 
45 using namespace ::android;
46 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
47 namespace V1_1 = ::android::hardware::neuralnetworks::V1_1;
48 namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
49 namespace V1_3 = ::android::hardware::neuralnetworks::V1_3;
50 
51 using CompilationBuilder = nn::CompilationBuilder;
52 using Device = nn::Device;
53 using DeviceManager = nn::DeviceManager;
54 using ExecutePreference = nn::test_wrapper::ExecutePreference;
55 using ExecutionBurstServer = nn::ExecutionBurstServer;
56 using HidlModel = V1_3::Model;
57 using Result = nn::test_wrapper::Result;
58 using SampleDriver = nn::sample_driver::SampleDriver;
59 using SamplePreparedModel = nn::sample_driver::SamplePreparedModel;
60 using SampleFencedExecutionCallback = nn::sample_driver::SampleFencedExecutionCallback;
61 using WrapperModel = nn::test_wrapper::Model;
62 using WrapperOperandType = nn::test_wrapper::OperandType;
63 using WrapperType = nn::test_wrapper::Type;
64 using nn::convertToV1_0;
65 using nn::convertToV1_3;
66 
67 template <typename T>
68 using MQDescriptorSync = hardware::MQDescriptorSync<T>;
69 
70 constexpr V1_2::Timing kBadTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
71 constexpr V1_2::Timing kGoodUnfencedTiming = {.timeOnDevice = 123, .timeInDriver = 456};
72 constexpr V1_2::Timing kGoodFencedTiming = {.timeOnDevice = 23, .timeInDriver = 56};
73 
74 // This is an IDevice for testing purposes. The test driver has customized
75 // getCapabilities_1_3 and getSupportedOperations_1_3.
76 class TestDriver : public SampleDriver {
77    public:
TestDriver(const char * name,V1_3::Capabilities capabilities,const std::vector<bool> & supportedOps)78     TestDriver(const char* name, V1_3::Capabilities capabilities,
79                const std::vector<bool>& supportedOps)
80         : SampleDriver(name), mCapabilities(capabilities), mSupportedOps(supportedOps) {}
~TestDriver()81     ~TestDriver() override {}
82 
getCapabilities_1_3(getCapabilities_1_3_cb cb)83     hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override {
84         cb(V1_3::ErrorStatus::NONE, mCapabilities);
85         return hardware::Void();
86     }
87 
getSupportedOperations_1_3(const V1_3::Model & model,getSupportedOperations_1_3_cb cb)88     hardware::Return<void> getSupportedOperations_1_3(const V1_3::Model& model,
89                                                       getSupportedOperations_1_3_cb cb) override {
90         if (!android::nn::validateModel(model)) {
91             cb(V1_3::ErrorStatus::INVALID_ARGUMENT, std::vector<bool>());
92             return hardware::Void();
93         }
94         const size_t count = model.main.operations.size();
95         std::vector<bool> supported(count);
96         std::transform(model.main.operations.begin(), model.main.operations.end(),
97                        supported.begin(), [this](V1_3::Operation op) {
98                            return mSupportedOps[static_cast<int32_t>(op.type)];
99                        });
100         cb(V1_3::ErrorStatus::NONE, supported);
101         return hardware::Void();
102     }
103 
104    private:
105     V1_3::Capabilities mCapabilities;
106     std::vector<bool> mSupportedOps;
107 };
108 
109 class IntrospectionControlTest : public ::testing::Test {
110    protected:
SetUp()111     void SetUp() override {}
TearDown()112     void TearDown() override {
113         if (mEvent) {
114             ANeuralNetworksEvent_free(mEvent);
115         }
116         if (mExecution) {
117             ANeuralNetworksExecution_free(mExecution);
118         }
119         if (mCompilation) {
120             ANeuralNetworksCompilation_free(mCompilation);
121         }
122         DeviceManager::get()->forTest_reInitializeDeviceList();
123     }
124 
125     struct DeviceSpecification {
DeviceSpecification__anon3a0b5fa30111::IntrospectionControlTest::DeviceSpecification126         DeviceSpecification(const std::string& name, float perf, std::vector<bool>& supportedOps)
127             : mName(name), mSupportedOps(supportedOps) {
128             V1_0::PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
129             mCapabilities = {
130                     .relaxedFloat32toFloat16PerformanceScalar = perfInfo,
131                     .relaxedFloat32toFloat16PerformanceTensor = perfInfo,
132                     .operandPerformance =
133                             nn::nonExtensionOperandPerformance<nn::HalVersion::V1_3>(perfInfo),
134                     .ifPerformance = perfInfo,
135                     .whilePerformance = perfInfo};
136         }
137         std::string mName;
138         V1_3::Capabilities mCapabilities;
139         std::vector<bool> mSupportedOps;
140     };
141 
142     // From a vector of DeviceSpecification, register new Devices.
registerDevices(std::vector<DeviceSpecification> specifications)143     void registerDevices(std::vector<DeviceSpecification> specifications) {
144         for (const auto& specification : specifications) {
145             DeviceManager::get()->forTest_registerDevice(nn::makeSharedDevice(
146                     specification.mName.c_str(),
147                     new TestDriver(specification.mName.c_str(), specification.mCapabilities,
148                                    specification.mSupportedOps)));
149         }
150     }
151 
selectDeviceByName(const std::string & name)152     bool selectDeviceByName(const std::string& name) {
153         uint32_t numDevices = 0;
154         EXPECT_EQ(ANeuralNetworks_getDeviceCount(&numDevices), ANEURALNETWORKS_NO_ERROR);
155         EXPECT_GE(numDevices, (uint32_t)1);
156 
157         for (uint32_t i = 0; i < numDevices; i++) {
158             ANeuralNetworksDevice* device = nullptr;
159             EXPECT_EQ(ANeuralNetworks_getDevice(i, &device), ANEURALNETWORKS_NO_ERROR);
160             const char* buffer = nullptr;
161             int result = ANeuralNetworksDevice_getName(device, &buffer);
162             if (result == ANEURALNETWORKS_NO_ERROR && name.compare(buffer) == 0) {
163                 mDevices.push_back(device);
164                 return true;
165             }
166         }
167         return false;
168     }
169 
isSupportedOpListExpected(const std::vector<bool> & expected)170     bool isSupportedOpListExpected(const std::vector<bool>& expected) {
171         const uint32_t kMaxNumberOperations = 256;
172         EXPECT_LE(expected.size(), kMaxNumberOperations);
173         ANeuralNetworksModel* modelHandle = mModel.getHandle();
174         bool supported[kMaxNumberOperations] = {false};
175         EXPECT_EQ(ANeuralNetworksModel_getSupportedOperationsForDevices(
176                           modelHandle, mDevices.data(), mDevices.size(), supported),
177                   ANEURALNETWORKS_NO_ERROR);
178         return std::equal(expected.begin(), expected.end(), supported);
179     }
180 
prepareForExecution(bool measureTiming=false)181     int prepareForExecution(bool measureTiming = false) {
182         ANeuralNetworksModel* modelHandle = mModel.getHandle();
183         int result = ANeuralNetworksCompilation_createForDevices(modelHandle, mDevices.data(),
184                                                                  mDevices.size(), &mCompilation);
185         if (result != ANEURALNETWORKS_NO_ERROR) {
186             return result;
187         }
188         EXPECT_EQ(ANeuralNetworksCompilation_finish(mCompilation), ANEURALNETWORKS_NO_ERROR);
189         EXPECT_EQ(ANeuralNetworksExecution_create(mCompilation, &mExecution),
190                   ANEURALNETWORKS_NO_ERROR);
191         if (measureTiming) {
192             // Don't call setMeasureTiming unless we need to -- cannot call this
193             // API unless there is exactly one device.
194             EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
195                       ANEURALNETWORKS_NO_ERROR);
196         }
197         return ANEURALNETWORKS_NO_ERROR;
198     }
199 
200     std::vector<ANeuralNetworksDevice*> mDevices;
201     ANeuralNetworksEvent* mEvent = nullptr;
202     ANeuralNetworksExecution* mExecution = nullptr;
203     ANeuralNetworksCompilation* mCompilation = nullptr;
204     WrapperModel mModel;
205 };
206 
createSimpleAddModel(WrapperModel * model)207 void createSimpleAddModel(WrapperModel* model) {
208     WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
209     WrapperOperandType type1(WrapperType::INT32, {});
210     // Phase 1, operands
211     auto op1 = model->addOperand(&type0);
212     auto op2 = model->addOperand(&type0);
213     auto act = model->addOperand(&type1);
214     auto op3 = model->addOperand(&type0);
215     // Phase 2, operations
216     static int32_t act_init[] = {0};
217     model->setOperandValue(act, act_init, sizeof(act_init));
218     model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
219     // Phase 3, inputs and outputs
220     model->identifyInputsAndOutputs({op1, op2}, {op3});
221     model->finish();
222     ASSERT_TRUE(model->isValid());
223 }
224 
225 // This test verifies that a simple ADD model is able to run on a single device that claims being
226 // able to handle all operations.
TEST_F(IntrospectionControlTest,SimpleAddModel)227 TEST_F(IntrospectionControlTest, SimpleAddModel) {
228     // This is needed before we have the CPU fallback path being treated as a Device.
229     // TODO(miaowang): remove once b/72506261 is fixed.
230     if (DeviceManager::get()->getUseCpuOnly()) {
231         GTEST_SKIP();
232     }
233 
234     createSimpleAddModel(&mModel);
235 
236     std::string driverName = "test-all";
237     std::vector<bool> ops(android::nn::kNumberOfOperationTypes, true);
238     registerDevices({{driverName, 0.9, ops}});
239 
240     EXPECT_TRUE(selectDeviceByName(driverName));
241     EXPECT_TRUE(isSupportedOpListExpected({true}));
242     EXPECT_EQ(prepareForExecution(), ANEURALNETWORKS_NO_ERROR);
243 
244     // Verify that the mCompilation is actually using the "test-all" device.
245     CompilationBuilder* c = reinterpret_cast<CompilationBuilder*>(mCompilation);
246     const std::string& deviceNameBuffer =
247             c->forTest_getExecutionPlan().forTest_simpleGetDevice()->getName();
248     EXPECT_EQ(driverName, deviceNameBuffer);
249 
250     float input1[2] = {1.0f, 2.0f};
251     float input2[2] = {3.0f, 4.0f};
252     float output[2];
253     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
254               ANEURALNETWORKS_NO_ERROR);
255     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
256               ANEURALNETWORKS_NO_ERROR);
257     EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
258               ANEURALNETWORKS_NO_ERROR);
259     EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
260               ANEURALNETWORKS_NO_ERROR);
261 
262     EXPECT_EQ(ANeuralNetworksExecution_startCompute(mExecution, &mEvent), ANEURALNETWORKS_NO_ERROR);
263     EXPECT_EQ(ANeuralNetworksEvent_wait(mEvent), ANEURALNETWORKS_NO_ERROR);
264     EXPECT_EQ(output[0], input1[0] + input2[0]);
265     EXPECT_EQ(output[1], input1[1] + input2[1]);
266 
267     uint64_t timeOnHardware, timeInDriver;
268     EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_ON_HARDWARE,
269                                                    &timeOnHardware),
270               ANEURALNETWORKS_NO_ERROR);
271     EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_IN_DRIVER,
272                                                    &timeInDriver),
273               ANEURALNETWORKS_NO_ERROR);
274     if (timeOnHardware != UINT64_MAX && timeInDriver != UINT64_MAX) {
275         EXPECT_LE(timeOnHardware, timeInDriver);
276     }
277 }
278 
279 /*-- Begin test drivers -------------------------------------------------------------------------*/
280 
281 namespace test_drivers {
282 
283 enum class Success : uint32_t {
284     // ASYNC: Return ErrorStatus::GENERAL_FAILURE; notify ErrorStatus::GENERAL_FAILURE and
285     // kBadTiming
286     // SYNC, BURST: Return ErrorStatus::GENERAL_FAILURE and kBadTiming
287     // FENCED: Return ErrorStatus::GENERAL_FAILURE, empty hidl_handle, and a nullptr callback
288     FAIL_LAUNCH,
289 
290     // ASYNC: Return ErrorStatus::NONE; notify ErrorStatus::GENERAL_FAILURE and kBadTiming
291     FAIL_WAIT,
292 
293     // Bit representation for PASS: One bit set to indicate PASS rather than
294     // FAIL, one bit for each of the four timing fields (Unfenced, Fenced) x
295     // (OnDevice, InDriver) to distinguish between unavailable timing (bit is
296     // clear) and available timing (bit is set), and one bit to call out the
297     // special case of CPU.
298     PASS_BIT = 1 << 4,
299     PASS_UNFENCED_DEVICE_BIT = 1 << 5,
300     PASS_UNFENCED_DRIVER_BIT = 1 << 6,
301     PASS_FENCED_DEVICE_BIT = 1 << 7,
302     PASS_FENCED_DRIVER_BIT = 1 << 8,
303     PASS_CPU_BIT = 1 << 9,
304 
305     // Each of the four timing fields may be either unavailable or 0
306     PASS_CPU = PASS_BIT | PASS_CPU_BIT,
307 
308     // ASYNC: Return ErrorStatus::NONE; notify ErrorStatus::NONE and timing
309     // SYNC, BURST: Return ErrorStatus::NONE and timing
310     // FENCED: Return ErrorStatus::NONE, empty hidl_handle, and a callback with timing.
311     //
312     // For each PASS other than PASS_CPU, an enum name has the form
313     // PASS_${UNFENCED_TIME}_${FENCED_TIME}.  For example, PASS_NEITHER_BOTH
314     // means that only fenced timing is available (both timeOnDevice and
315     // timeInDriver).  If _${FENCED_TIME} is omitted, it is equivalent to
316     // _NEITHER; so PASS_BOTH means that only unfenced timing is available (both
317     // timeOnDevice and timeInDriver).
318     PASS_NEITHER = PASS_BIT,
319     PASS_DEVICE = PASS_BIT | PASS_UNFENCED_DEVICE_BIT,
320     PASS_DRIVER = PASS_BIT | PASS_UNFENCED_DRIVER_BIT,
321     PASS_BOTH = PASS_BIT | PASS_UNFENCED_DEVICE_BIT | PASS_UNFENCED_DRIVER_BIT,
322     PASS_NEITHER_DEVICE = PASS_BIT | PASS_FENCED_DEVICE_BIT,
323     PASS_NEITHER_DRIVER = PASS_BIT | PASS_FENCED_DRIVER_BIT,
324     PASS_NEITHER_BOTH = PASS_BIT | PASS_FENCED_DEVICE_BIT | PASS_FENCED_DRIVER_BIT,
325     PASS_DEVICE_DEVICE = PASS_DEVICE | PASS_NEITHER_DEVICE,
326     PASS_DEVICE_DRIVER = PASS_DEVICE | PASS_NEITHER_DRIVER,
327     PASS_DEVICE_BOTH = PASS_DEVICE | PASS_NEITHER_BOTH,
328     PASS_DRIVER_DEVICE = PASS_DRIVER | PASS_NEITHER_DEVICE,
329     PASS_DRIVER_DRIVER = PASS_DRIVER | PASS_NEITHER_DRIVER,
330     PASS_DRIVER_BOTH = PASS_DRIVER | PASS_NEITHER_BOTH,
331     PASS_BOTH_DEVICE = PASS_BOTH | PASS_NEITHER_DEVICE,
332     PASS_BOTH_DRIVER = PASS_BOTH | PASS_NEITHER_DRIVER,
333     PASS_BOTH_BOTH = PASS_BOTH | PASS_NEITHER_BOTH,
334 };
335 
hasBit(Success mask,Success bit)336 bool hasBit(Success mask, Success bit) {
337     const uint32_t bitAsInt = static_cast<uint32_t>(bit);
338     CHECK(bitAsInt && (bitAsInt & (bitAsInt - 1)) == 0)
339             << "second argument must be a single bit rather than " << static_cast<uint32_t>(bit);
340     return static_cast<uint32_t>(mask) & bitAsInt;
341 }
342 
clearBit(Success mask,Success bit)343 Success clearBit(Success mask, Success bit) {
344     const uint32_t bitAsInt = static_cast<uint32_t>(bit);
345     CHECK(bitAsInt && (bitAsInt & (bitAsInt - 1)) == 0)
346             << "second argument must be a single bit rather than " << static_cast<uint32_t>(bit);
347     return static_cast<Success>(static_cast<uint32_t>(mask) & ~bitAsInt);
348 }
349 
operator <<(std::ostream & os,Success success)350 std::ostream& operator<<(std::ostream& os, Success success) {
351     switch (success) {
352         case Success::FAIL_LAUNCH:
353             return os << "FAIL_LAUNCH";
354         case Success::FAIL_WAIT:
355             return os << "FAIL_WAIT";
356         case Success::PASS_CPU:
357             return os << "PASS_CPU";
358         default:
359             break;
360     }
361 
362     static const std::vector<std::pair<Success, const char*>> bits = {
363             {Success::PASS_BIT, "PASS"},
364             {Success::PASS_UNFENCED_DEVICE_BIT, "UNFENCED_DEVICE"},
365             {Success::PASS_UNFENCED_DRIVER_BIT, "UNFENCED_DRIVER"},
366             {Success::PASS_FENCED_DEVICE_BIT, "FENCED_DEVICE"},
367             {Success::PASS_FENCED_DRIVER_BIT, "FENCED_DRIVER"},
368     };
369     bool gotOutput = false;
370     for (const auto& b : bits) {
371         if (hasBit(success, b.first)) {
372             if (gotOutput) {
373                 os << '|';
374             } else {
375                 gotOutput = true;
376             }
377             os << b.second;
378             success = clearBit(success, b.first);
379         }
380     }
381     if (uint32_t successAsInt = static_cast<uint32_t>(success)) {
382         if (gotOutput) {
383             os << '|';
384         }
385         os << successAsInt;
386     }
387     return os;
388 }
389 
390 // Returns (unfenced timing, fenced timing).
391 // Not for PASS_CPU.
getExpectedTiming(Success s,bool fencedExecution)392 std::pair<V1_2::Timing, V1_2::Timing> getExpectedTiming(Success s, bool fencedExecution) {
393     CHECK_NE(s, Success::PASS_CPU);
394 
395     if (!hasBit(s, Success::PASS_BIT)) {
396         return {kBadTiming, kBadTiming};
397     }
398 
399     std::pair<V1_2::Timing, V1_2::Timing> result;
400     result.first.timeOnDevice = hasBit(s, Success::PASS_UNFENCED_DEVICE_BIT)
401                                         ? kGoodUnfencedTiming.timeOnDevice
402                                         : UINT64_MAX;
403     result.first.timeInDriver = hasBit(s, Success::PASS_UNFENCED_DRIVER_BIT)
404                                         ? kGoodUnfencedTiming.timeInDriver
405                                         : UINT64_MAX;
406     if (fencedExecution) {
407         result.second.timeOnDevice = hasBit(s, Success::PASS_FENCED_DEVICE_BIT)
408                                              ? kGoodFencedTiming.timeOnDevice
409                                              : UINT64_MAX;
410         result.second.timeInDriver = hasBit(s, Success::PASS_FENCED_DRIVER_BIT)
411                                              ? kGoodFencedTiming.timeInDriver
412                                              : UINT64_MAX;
413     } else {
414         result.second = result.first;
415     }
416     return result;
417 }
418 
419 // For these tests we don't care about actually running an inference -- we
420 // just want to placeholder up execution status and timing results, and control
421 // when the execution finishes.
422 class TestPreparedModelLatest : public SamplePreparedModel {
423    public:
TestPreparedModelLatest(const HidlModel & model,const SampleDriver * driver,Success success)424     TestPreparedModelLatest(const HidlModel& model, const SampleDriver* driver, Success success)
425         : SamplePreparedModel(model, driver, V1_1::ExecutionPreference::FAST_SINGLE_ANSWER, uid_t{},
426                               nn::kDefaultPriority13),
427           mSuccess(success) {}
428 
execute(const V1_0::Request &,const sp<V1_0::IExecutionCallback> & callback)429     hardware::Return<V1_0::ErrorStatus> execute(
430             const V1_0::Request&, const sp<V1_0::IExecutionCallback>& callback) override {
431         switch (mSuccess) {
432             case Success::PASS_NEITHER:
433                 std::thread([callback] {
434                     dummyExecution();
435                     callback->notify(V1_0::ErrorStatus::NONE);
436                 }).detach();
437                 return V1_0::ErrorStatus::NONE;
438             case Success::FAIL_LAUNCH:
439                 dummyExecution();
440                 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
441                 return V1_0::ErrorStatus::GENERAL_FAILURE;
442             case Success::FAIL_WAIT:
443                 std::thread([callback] {
444                     dummyExecution();
445                     callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
446                 }).detach();
447                 return V1_0::ErrorStatus::NONE;
448             default:
449                 ADD_FAILURE() << "Unexpected Success kind";
450                 return V1_0::ErrorStatus::GENERAL_FAILURE;
451         }
452     }
453 
execute_1_2(const V1_0::Request &,V1_2::MeasureTiming measure,const sp<V1_2::IExecutionCallback> & callback)454     hardware::Return<V1_0::ErrorStatus> execute_1_2(
455             const V1_0::Request&, V1_2::MeasureTiming measure,
456             const sp<V1_2::IExecutionCallback>& callback) override {
457         EXPECT_EQ(measure, V1_2::MeasureTiming::YES);
458         switch (mSuccess) {
459             case Success::PASS_NEITHER:
460             case Success::PASS_DEVICE:
461             case Success::PASS_DRIVER:
462             case Success::PASS_BOTH:
463                 std::thread([this, callback] {
464                     dummyExecution();
465                     callback->notify_1_2(V1_0::ErrorStatus::NONE, {},
466                                          getExpectedTiming(mSuccess, false).first);
467                 }).detach();
468                 return V1_0::ErrorStatus::NONE;
469             case Success::FAIL_LAUNCH:
470                 dummyExecution();
471                 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
472                 return V1_0::ErrorStatus::GENERAL_FAILURE;
473             case Success::FAIL_WAIT:
474                 std::thread([callback] {
475                     dummyExecution();
476                     callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
477                 }).detach();
478                 return V1_0::ErrorStatus::NONE;
479             default:
480                 ADD_FAILURE() << "Unexpected Success kind";
481                 return V1_0::ErrorStatus::GENERAL_FAILURE;
482         }
483     }
484 
execute_1_3(const V1_3::Request &,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint &,const V1_3::OptionalTimeoutDuration &,const sp<V1_3::IExecutionCallback> & callback)485     hardware::Return<V1_3::ErrorStatus> execute_1_3(
486             const V1_3::Request&, V1_2::MeasureTiming measure, const V1_3::OptionalTimePoint&,
487             const V1_3::OptionalTimeoutDuration&,
488             const sp<V1_3::IExecutionCallback>& callback) override {
489         // Use a placeholder V1_0::Request because execute_1_2 ignores request entirely.
490         const V1_0::ErrorStatus status = execute_1_2(V1_0::Request{}, measure, callback);
491         return convertToV1_3(status);
492     }
493 
executeSynchronously(const V1_0::Request &,V1_2::MeasureTiming measure,executeSynchronously_cb cb)494     hardware::Return<void> executeSynchronously(const V1_0::Request&, V1_2::MeasureTiming measure,
495                                                 executeSynchronously_cb cb) override {
496         EXPECT_EQ(measure, V1_2::MeasureTiming::YES);
497         switch (mSuccess) {
498             case Success::PASS_NEITHER:
499             case Success::PASS_DEVICE:
500             case Success::PASS_DRIVER:
501             case Success::PASS_BOTH:
502                 dummyExecution();
503                 cb(V1_0::ErrorStatus::NONE, {}, getExpectedTiming(mSuccess, false).first);
504                 return hardware::Void();
505             case Success::FAIL_WAIT:
506                 // While this is a synchronous execution method, the NNAPI
507                 // runtime may call it even for asynchronous execution, so we
508                 // need to tolerate Success::FAIL_WAIT here, not just
509                 // Success::FAIL_LAUNCH.
510                 FALLTHROUGH_INTENDED;
511             case Success::FAIL_LAUNCH:
512                 dummyExecution();
513                 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kBadTiming);
514                 return hardware::Void();
515             default:
516                 ADD_FAILURE() << "Unexpected Success kind";
517                 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kBadTiming);
518                 return hardware::Void();
519         }
520     }
521 
executeSynchronously_1_3(const V1_3::Request &,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint &,const V1_3::OptionalTimeoutDuration &,executeSynchronously_1_3_cb cb)522     hardware::Return<void> executeSynchronously_1_3(const V1_3::Request&,
523                                                     V1_2::MeasureTiming measure,
524                                                     const V1_3::OptionalTimePoint&,
525                                                     const V1_3::OptionalTimeoutDuration&,
526                                                     executeSynchronously_1_3_cb cb) override {
527         const auto wrappedCb = [&cb](V1_0::ErrorStatus status,
528                                      const hardware::hidl_vec<V1_2::OutputShape>& outputShapes,
529                                      V1_2::Timing timing) {
530             cb(convertToV1_3(status), outputShapes, timing);
531         };
532         // Use a placeholder V1_0::Request because executeSynchronously ignores request entirely.
533         return executeSynchronously(V1_0::Request{}, measure, wrappedCb);
534     }
535 
536     // ExecutionBurstServer::create has an overload that will use
537     // IPreparedModel::executeSynchronously(), so we can rely on that, rather
538     // than having to implement ExecutionBurstServer::IExecutorWithCache.
configureExecutionBurst(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,configureExecutionBurst_cb cb)539     hardware::Return<void> configureExecutionBurst(
540             const sp<V1_2::IBurstCallback>& callback,
541             const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
542             const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
543             configureExecutionBurst_cb cb) override {
544         const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create(
545                 callback, requestChannel, resultChannel, this, std::chrono::microseconds{0});
546 
547         cb(burst == nullptr ? V1_0::ErrorStatus::GENERAL_FAILURE : V1_0::ErrorStatus::NONE, burst);
548         return hardware::Void();
549     }
550 
executeFenced(const V1_3::Request &,const hardware::hidl_vec<hardware::hidl_handle> &,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint &,const V1_3::OptionalTimeoutDuration &,const V1_3::OptionalTimeoutDuration &,executeFenced_cb callback)551     hardware::Return<void> executeFenced(const V1_3::Request&,
552                                          const hardware::hidl_vec<hardware::hidl_handle>&,
553                                          V1_2::MeasureTiming measure,
554                                          const V1_3::OptionalTimePoint&,
555                                          const V1_3::OptionalTimeoutDuration&,
556                                          const V1_3::OptionalTimeoutDuration&,
557                                          executeFenced_cb callback) override {
558         EXPECT_EQ(measure, V1_2::MeasureTiming::YES);
559         if (hasBit(mSuccess, Success::PASS_BIT)) {
560             dummyExecution();
561             const auto expectedTiming = getExpectedTiming(mSuccess, true);
562             sp<SampleFencedExecutionCallback> fencedExecutionCallback =
563                     new SampleFencedExecutionCallback(expectedTiming.first, expectedTiming.second,
564                                                       V1_3::ErrorStatus::NONE);
565             callback(V1_3::ErrorStatus::NONE, hardware::hidl_handle(nullptr),
566                      fencedExecutionCallback);
567             return hardware::Void();
568         }
569         switch (mSuccess) {
570             case Success::FAIL_WAIT:
571                 // Due to the limitation of the SampleDriver,
572                 // FAIL_WAIT behaves the same as FAIL_LAUNCH.
573                 // If the SampleDriver is updated to return real
574                 // sync fences, this must be updated.
575                 FALLTHROUGH_INTENDED;
576             case Success::FAIL_LAUNCH:
577                 dummyExecution();
578                 callback(V1_3::ErrorStatus::GENERAL_FAILURE, hardware::hidl_handle(nullptr),
579                          nullptr);
580                 return hardware::Void();
581             default:
582                 ADD_FAILURE() << "Unexpected Success kind";
583                 return hardware::Void();
584         }
585     }
586 
587     // We can place the TestPreparedModelLatest system in a "pause" mode where
588     // no execution will complete until the system is taken out of that mode.
589     // Initially, the system is not in that mode.
pauseExecutions(bool v)590     static void pauseExecutions(bool v) { mPauseExecutions.store(v); }
591 
592     // This function is only guaranteed to work in the following pattern:
593     // Consider thread A as primary thread
594     // - thread A: pauseExecutions(true);
595     // - thread A: launch execution (as thread B)
596     // - thread A: waitForExecutionToBegin(), block until call to dummyExecution by
597     //                                        thread B makes mExecutionsInFlight nonzero
598     // - thread B: dummyExecution(), which makes mExecutionsInFlight nonzero and blocks
599     //                               until thread A calls pauseExecutions(false)
600     // - thread A: waitForExecutionToBegin() returns
601     // - thread A: pauseExecutions(false), allowing dummyExecution() on thread B to continue
602     // - thread B: dummyExecution() zeroes mExecutionsInFlight and returns
603     // - thread B: thread exits
waitForExecutionToBegin()604     static void waitForExecutionToBegin() {
605         CHECK(mPauseExecutions.load());
606         while (mExecutionsInFlight.load() == 0) {
607         }
608     }
609 
610    private:
611     Success mSuccess;
612 
613     static std::atomic<bool> mPauseExecutions;
614     static std::atomic<unsigned int> mExecutionsInFlight;
615 
dummyExecution()616     static void dummyExecution() {
617         CHECK_EQ(mExecutionsInFlight.fetch_add(1), 0u) << "We do not support concurrent executions";
618         while (mPauseExecutions.load()) {
619         }
620         mExecutionsInFlight.fetch_sub(1);
621     }
622 };
623 std::atomic<bool> TestPreparedModelLatest::mPauseExecutions = false;
624 std::atomic<unsigned int> TestPreparedModelLatest::mExecutionsInFlight = 0;
625 
626 using TestPreparedModel13 = TestPreparedModelLatest;
627 
628 // Like TestPreparedModelLatest, but implementing 1.2
629 class TestPreparedModel12 : public V1_2::IPreparedModel {
630    public:
TestPreparedModel12(const HidlModel & model,const SampleDriver * driver,Success success)631     TestPreparedModel12(const HidlModel& model, const SampleDriver* driver, Success success)
632         : mLatestPreparedModel(new TestPreparedModelLatest(model, driver, success)) {}
633 
execute(const V1_0::Request & request,const sp<V1_0::IExecutionCallback> & callback)634     hardware::Return<V1_0::ErrorStatus> execute(
635             const V1_0::Request& request, const sp<V1_0::IExecutionCallback>& callback) override {
636         return mLatestPreparedModel->execute(request, callback);
637     }
638 
execute_1_2(const V1_0::Request & request,V1_2::MeasureTiming measure,const sp<V1_2::IExecutionCallback> & callback)639     hardware::Return<V1_0::ErrorStatus> execute_1_2(
640             const V1_0::Request& request, V1_2::MeasureTiming measure,
641             const sp<V1_2::IExecutionCallback>& callback) override {
642         return mLatestPreparedModel->execute_1_2(request, measure, callback);
643     }
644 
executeSynchronously(const V1_0::Request & request,V1_2::MeasureTiming measure,executeSynchronously_cb cb)645     hardware::Return<void> executeSynchronously(const V1_0::Request& request,
646                                                 V1_2::MeasureTiming measure,
647                                                 executeSynchronously_cb cb) override {
648         return mLatestPreparedModel->executeSynchronously(request, measure, cb);
649     }
650 
configureExecutionBurst(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,configureExecutionBurst_cb cb)651     hardware::Return<void> configureExecutionBurst(
652             const sp<V1_2::IBurstCallback>& callback,
653             const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
654             const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
655             configureExecutionBurst_cb cb) override {
656         return mLatestPreparedModel->configureExecutionBurst(callback, requestChannel,
657                                                              resultChannel, cb);
658     }
659 
660    private:
661     const sp<V1_3::IPreparedModel> mLatestPreparedModel;
662 };
663 
664 // Like TestPreparedModelLatest, but implementing 1.0
665 class TestPreparedModel10 : public V1_0::IPreparedModel {
666    public:
TestPreparedModel10(const HidlModel & model,const SampleDriver * driver,Success success)667     TestPreparedModel10(const HidlModel& model, const SampleDriver* driver, Success success)
668         : mLatestPreparedModel(new TestPreparedModelLatest(model, driver, success)) {}
669 
execute(const V1_0::Request & request,const sp<V1_0::IExecutionCallback> & callback)670     hardware::Return<V1_0::ErrorStatus> execute(
671             const V1_0::Request& request, const sp<V1_0::IExecutionCallback>& callback) override {
672         return mLatestPreparedModel->execute(request, callback);
673     }
674 
675    private:
676     const sp<V1_3::IPreparedModel> mLatestPreparedModel;
677 };
678 
679 // Behaves like SampleDriver, except that it produces customized IPrepareModel.
680 class TestDriver13 : public SampleDriver {
681    public:
TestDriver13(const std::string & name,Success success)682     TestDriver13(const std::string& name, Success success)
683         : SampleDriver(name.c_str()), mSuccess(success) {}
684 
getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb)685     hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb) override {
686         android::nn::initVLogMask();
687         V1_3::Capabilities capabilities = nn::makeCapabilities(0.75f);
688         _hidl_cb(V1_3::ErrorStatus::NONE, capabilities);
689         return hardware::Void();
690     }
691 
getSupportedOperations_1_3(const HidlModel & model,getSupportedOperations_1_3_cb cb)692     hardware::Return<void> getSupportedOperations_1_3(const HidlModel& model,
693                                                       getSupportedOperations_1_3_cb cb) override {
694         if (nn::validateModel(model)) {
695             std::vector<bool> supported(model.main.operations.size(), true);
696             cb(V1_3::ErrorStatus::NONE, supported);
697         } else {
698             cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {});
699         }
700         return hardware::Void();
701     }
702 
getSupportedOperations_1_2(const V1_2::Model & model,getSupportedOperations_1_2_cb cb)703     hardware::Return<void> getSupportedOperations_1_2(const V1_2::Model& model,
704                                                       getSupportedOperations_1_2_cb cb) override {
705         if (nn::validateModel(model)) {
706             std::vector<bool> supported(model.operations.size(), true);
707             cb(V1_0::ErrorStatus::NONE, supported);
708         } else {
709             std::vector<bool> supported;
710             cb(V1_0::ErrorStatus::INVALID_ARGUMENT, supported);
711         }
712         return hardware::Void();
713     }
714 
prepareModel_1_3(const HidlModel & model,V1_1::ExecutionPreference,V1_3::Priority,const V1_3::OptionalTimePoint &,const hardware::hidl_vec<hardware::hidl_handle> &,const hardware::hidl_vec<hardware::hidl_handle> &,const nn::HalCacheToken &,const sp<V1_3::IPreparedModelCallback> & callback)715     hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
716             const HidlModel& model, V1_1::ExecutionPreference, V1_3::Priority,
717             const V1_3::OptionalTimePoint&, const hardware::hidl_vec<hardware::hidl_handle>&,
718             const hardware::hidl_vec<hardware::hidl_handle>&, const nn::HalCacheToken&,
719             const sp<V1_3::IPreparedModelCallback>& callback) override {
720         callback->notify_1_3(V1_3::ErrorStatus::NONE,
721                              new TestPreparedModel13(model, this, mSuccess));
722         return V1_3::ErrorStatus::NONE;
723     }
724 
prepareModel_1_2(const V1_2::Model & model,V1_1::ExecutionPreference,const hardware::hidl_vec<hardware::hidl_handle> &,const hardware::hidl_vec<hardware::hidl_handle> &,const nn::HalCacheToken &,const sp<V1_2::IPreparedModelCallback> & callback)725     hardware::Return<V1_0::ErrorStatus> prepareModel_1_2(
726             const V1_2::Model& model, V1_1::ExecutionPreference,
727             const hardware::hidl_vec<hardware::hidl_handle>&,
728             const hardware::hidl_vec<hardware::hidl_handle>&, const nn::HalCacheToken&,
729             const sp<V1_2::IPreparedModelCallback>& callback) override {
730         callback->notify_1_2(V1_0::ErrorStatus::NONE,
731                              new TestPreparedModel12(nn::convertToV1_3(model), this, mSuccess));
732         return V1_0::ErrorStatus::NONE;
733     }
734 
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference,const sp<V1_0::IPreparedModelCallback> & callback)735     hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
736             const V1_1::Model& model, V1_1::ExecutionPreference,
737             const sp<V1_0::IPreparedModelCallback>& callback) override {
738         callback->notify(V1_0::ErrorStatus::NONE,
739                          new TestPreparedModel10(nn::convertToV1_3(model), this, mSuccess));
740         return V1_0::ErrorStatus::NONE;
741     }
742 
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & callback)743     hardware::Return<V1_0::ErrorStatus> prepareModel(
744             const V1_0::Model& model, const sp<V1_0::IPreparedModelCallback>& callback) override {
745         return prepareModel_1_1(nn::convertToV1_1(model),
746                                 V1_1::ExecutionPreference::FAST_SINGLE_ANSWER, callback);
747     }
748 
749    private:
750     Success mSuccess;
751 };
752 
753 // Like TestDriver, but implementing 1.1
754 class TestDriver11 : public V1_1::IDevice {
755    public:
TestDriver11(const std::string & name,Success success)756     TestDriver11(const std::string& name, Success success)
757         : mLatestDriver(new TestDriver13(name, success)) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)758     hardware::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
759         return mLatestDriver->getCapabilities_1_1(_hidl_cb);
760     }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)761     hardware::Return<void> getSupportedOperations_1_1(
762             const V1_1::Model& model, getSupportedOperations_1_1_cb _hidl_cb) override {
763         return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
764     }
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)765     hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
766             const V1_1::Model& model, V1_1::ExecutionPreference preference,
767             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
768         return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
769     }
getStatus()770     hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)771     hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
772         return mLatestDriver->getCapabilities(_hidl_cb);
773     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)774     hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
775                                                   getSupportedOperations_cb _hidl_cb) override {
776         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
777     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)778     hardware::Return<V1_0::ErrorStatus> prepareModel(
779             const V1_0::Model& model,
780             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
781         return mLatestDriver->prepareModel(model, actualCallback);
782     }
783 
784    private:
785     const sp<V1_3::IDevice> mLatestDriver;
786 };
787 
788 }  // namespace test_drivers
789 
790 /*-- End   test drivers -------------------------------------------------------------------------*/
791 
792 /*-- Begin timing tests -------------------------------------------------------------------------*/
793 
794 namespace timing_tests {
795 
796 using namespace test_drivers;
797 
798 enum class DriverKind {
799     CPU,
800     OLD,  // too old to support timing (1.1 or earlier)
801     NEW   // new enough to support timing (1.2 or later)
802 };
803 
operator <<(std::ostream & os,DriverKind kind)804 std::ostream& operator<<(std::ostream& os, DriverKind kind) {
805     const char* names[] = {"CPU", "OLD", "NEW"};
806     const uint32_t index = static_cast<uint32_t>(kind);
807     CHECK(index < std::size(names));
808     return os << names[index];
809 }
810 
811 enum class Compute { ASYNC, SYNC, BURST, FENCED };
812 
operator <<(std::ostream & os,Compute compute)813 std::ostream& operator<<(std::ostream& os, Compute compute) {
814     const char* names[] = {"ASYNC", "SYNC", "BURST", "FENCED"};
815     const uint32_t index = static_cast<uint32_t>(compute);
816     CHECK(index < std::size(names));
817     return os << names[index];
818 }
819 
820 class TimingTest : public IntrospectionControlTest,
821                    public ::testing::WithParamInterface<std::tuple<DriverKind, Success, Compute>> {
822    public:
TimingTest()823     TimingTest()
824         : kDriverKind(std::get<0>(GetParam())),
825           kSuccess(std::get<1>(GetParam())),
826           kCompute(std::get<2>(GetParam())) {}
827 
828    protected:
829     const DriverKind kDriverKind;
830     const Success kSuccess;
831     const Compute kCompute;
832 };
833 
TEST_P(TimingTest,Test)834 TEST_P(TimingTest, Test) {
835     // There's no straightforward way to force CPU execution to fail.
836     ASSERT_EQ(kDriverKind == DriverKind::CPU, kSuccess == Success::PASS_CPU);
837 
838     // FAIL_WAIT only makes sense for ASYNC and FENCED.
839     ASSERT_TRUE(kCompute == Compute::ASYNC || kCompute == Compute::FENCED ||
840                 kSuccess != Success::FAIL_WAIT);
841 
842     if (DeviceManager::get()->getUseCpuOnly() != (kDriverKind == DriverKind::CPU)) {
843         // We don't have an elegant way to request the CPU driver.  Therefore,
844         // we rely on our test framework to make the choice between CPU and
845         // non-CPU.
846         GTEST_SKIP();
847     }
848 
849     createSimpleAddModel(&mModel);
850 
851     switch (kDriverKind) {
852         case DriverKind::CPU: {
853             // There should be only one driver -- the CPU
854             const std::string& name = DeviceManager::get()->getDrivers()[0]->getName();
855             ASSERT_TRUE(selectDeviceByName(name));
856             break;
857         }
858         case DriverKind::OLD: {
859             static const char name[] = "old";
860             DeviceManager::get()->forTest_registerDevice(
861                     nn::makeSharedDevice(name, new TestDriver11(name, kSuccess)));
862             ASSERT_TRUE(selectDeviceByName(name));
863             break;
864         }
865         case DriverKind::NEW: {
866             static const char name[] = "new";
867             DeviceManager::get()->forTest_registerDevice(
868                     nn::makeSharedDevice(name, new TestDriver13(name, kSuccess)));
869             ASSERT_TRUE(selectDeviceByName(name));
870             break;
871         }
872         default:
873             FAIL() << "Unexpected DriverKind";
874     }
875 
876     EXPECT_EQ(prepareForExecution(true /*measureTiming*/), ANEURALNETWORKS_NO_ERROR);
877 
878     float input1[2] = {1.0f, 2.0f};
879     float input2[2] = {3.0f, 4.0f};
880     float output[2];
881     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
882               ANEURALNETWORKS_NO_ERROR);
883     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
884               ANEURALNETWORKS_NO_ERROR);
885     EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
886               ANEURALNETWORKS_NO_ERROR);
887     EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
888               ANEURALNETWORKS_NO_ERROR);
889 
890     auto Check = [](bool expectPass, int result) {
891         if (expectPass) {
892             ASSERT_EQ(result, ANEURALNETWORKS_NO_ERROR);
893         } else {
894             ASSERT_NE(result, ANEURALNETWORKS_NO_ERROR);
895         }
896     };
897 
898     const bool isPass = hasBit(kSuccess, Success::PASS_BIT);
899     const int expectedGetDurationResultCode =
900             isPass ? ANEURALNETWORKS_NO_ERROR : ANEURALNETWORKS_BAD_STATE;
901 
902     const auto getDurationWhileRunning = [this] {
903         if (kDriverKind == DriverKind::CPU) {
904             // Testing DriverKind::CPU would require modifying the CPU execution
905             // path to control execution completion, similarly to how this test
906             // case does with TestPreparedModel::dummyExecution(). This does not
907             // seem worthwhile -- it's intrusive into the runtime code solely
908             // for the sake of testing, and we do not expect that the code paths
909             // needed to ensure correct behavior of
910             // ANeuralNetworksExecution_getDuration() on a running execution
911             // would be any different for CPU than for actual drivers.
912             return;
913         }
914         TestPreparedModelLatest::waitForExecutionToBegin();
915         for (int durationCode :
916              std::vector{ANEURALNETWORKS_DURATION_ON_HARDWARE, ANEURALNETWORKS_DURATION_IN_DRIVER,
917                          ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE,
918                          ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER}) {
919             uint64_t time;
920             // Cannot query duration while execution is running
921             EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, durationCode, &time),
922                       ANEURALNETWORKS_BAD_STATE);
923         }
924     };
925 
926     switch (kCompute) {
927         case Compute::ASYNC: {
928             // Ideally what we'd like to do here is
929             //
930             //     Check(kSuccess != Success::FAIL_LAUNCH,
931             //         ANeuralNetworksExecution_startCompute(mExecution, &mEvent));
932             //     Check(isPass, ANeuralNetworksEvent_wait(mEvent));
933             //
934             // However, in the current implementation of the runtime, a launch
935             // failure at the HAL level does not show up as a launch failure at
936             // the NDK level ("startCompute"): The NNAPI runtime does not call a
937             // driver until it (the runtime) begins execution, so a launch
938             // failure at the HAL level looks like an execution failure at the
939             // NDK level ("wait").
940             SCOPED_TRACE("ASYNC startCompute");
941             TestPreparedModelLatest::pauseExecutions(true);
942             Check(true,  // rather than kSuccess != Success::FAIL_LAUNCH
943                   ANeuralNetworksExecution_startCompute(mExecution, &mEvent));
944             getDurationWhileRunning();
945             TestPreparedModelLatest::pauseExecutions(false);
946             SCOPED_TRACE("ASYNC wait");
947             Check(isPass, ANeuralNetworksEvent_wait(mEvent));
948             break;
949         }
950         case Compute::SYNC: {
951             SCOPED_TRACE("SYNC");
952             TestPreparedModelLatest::pauseExecutions(true);
953             std::thread run([this, Check, isPass] {
954                 Check(isPass, ANeuralNetworksExecution_compute(mExecution));
955             });
956             getDurationWhileRunning();
957             TestPreparedModelLatest::pauseExecutions(false);
958             run.join();
959             break;
960         }
961         case Compute::BURST: {
962             SCOPED_TRACE("BURST");
963             ANeuralNetworksBurst* burst;
964             ASSERT_EQ(ANeuralNetworksBurst_create(mCompilation, &burst), ANEURALNETWORKS_NO_ERROR);
965             TestPreparedModelLatest::pauseExecutions(true);
966             std::thread run([this, Check, isPass, burst] {
967                 Check(isPass, ANeuralNetworksExecution_burstCompute(mExecution, burst));
968             });
969             getDurationWhileRunning();
970             TestPreparedModelLatest::pauseExecutions(false);
971             run.join();
972             ANeuralNetworksBurst_free(burst);
973             break;
974         }
975         case Compute::FENCED: {
976             SCOPED_TRACE("FENCED startComputeWithDependencies");
977             TestPreparedModelLatest::pauseExecutions(true);
978 
979             // Note, due to the limitation of SampleDriver implementation, the call is synchronous.
980             // If the SampleDriver is updated to return real sync fence, this must be updated.
981             std::thread run([this, Check, isPass] {
982                 Check(isPass, ANeuralNetworksExecution_startComputeWithDependencies(
983                                       mExecution, nullptr, 0, 0, &mEvent));
984             });
985             getDurationWhileRunning();
986             TestPreparedModelLatest::pauseExecutions(false);
987             run.join();
988             SCOPED_TRACE("FENCED wait");
989             Check(isPass, ANeuralNetworksEvent_wait(mEvent));
990             break;
991         }
992         default:
993             FAIL() << "unreachable";
994     }
995 
996     uint64_t timeOnHardware, timeInDriver, timeOnHardwareFenced, timeInDriverFenced;
997     EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_ON_HARDWARE,
998                                                    &timeOnHardware),
999               expectedGetDurationResultCode);
1000     EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_IN_DRIVER,
1001                                                    &timeInDriver),
1002               expectedGetDurationResultCode);
1003     EXPECT_EQ(
1004             ANeuralNetworksExecution_getDuration(
1005                     mExecution, ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE, &timeOnHardwareFenced),
1006             expectedGetDurationResultCode);
1007     EXPECT_EQ(ANeuralNetworksExecution_getDuration(
1008                       mExecution, ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER, &timeInDriverFenced),
1009               expectedGetDurationResultCode);
1010     switch (kDriverKind) {
1011         case DriverKind::CPU: {
1012             // TODO: Should we require timing to be reported as 0?
1013             EXPECT_TRUE(timeOnHardware == 0 || timeOnHardware == UINT64_MAX)
1014                     << "timeOnHardware = " << timeOnHardware;
1015             EXPECT_TRUE(timeInDriver == 0 || timeInDriver == UINT64_MAX)
1016                     << "timeInDriver = " << timeOnHardware;
1017             EXPECT_TRUE(timeOnHardwareFenced == 0 || timeOnHardwareFenced == UINT64_MAX)
1018                     << "timeOnHardwareFenced = " << timeOnHardwareFenced;
1019             EXPECT_TRUE(timeInDriverFenced == 0 || timeInDriverFenced == UINT64_MAX)
1020                     << "timeInDriver = " << timeInDriverFenced;
1021             break;
1022         }
1023         case DriverKind::OLD: {
1024             EXPECT_EQ(timeOnHardware, UINT64_MAX);
1025             EXPECT_EQ(timeInDriver, UINT64_MAX);
1026             EXPECT_EQ(timeOnHardwareFenced, UINT64_MAX);
1027             EXPECT_EQ(timeInDriverFenced, UINT64_MAX);
1028             break;
1029         }
1030         case DriverKind::NEW: {
1031             auto microsToNanos = [](uint64_t micros) {
1032                 constexpr uint64_t kNanosPerMicro = 1000;
1033                 return micros == UINT64_MAX ? UINT64_MAX : kNanosPerMicro * micros;
1034             };
1035             auto expectedTiming = getExpectedTiming(kSuccess, kCompute == Compute::FENCED);
1036             EXPECT_EQ(timeOnHardware, microsToNanos(expectedTiming.first.timeOnDevice));
1037             EXPECT_EQ(timeInDriver, microsToNanos(expectedTiming.first.timeInDriver));
1038             EXPECT_EQ(timeOnHardwareFenced, microsToNanos(expectedTiming.second.timeOnDevice));
1039             EXPECT_EQ(timeInDriverFenced, microsToNanos(expectedTiming.second.timeInDriver));
1040             break;
1041         }
1042         default:
1043             FAIL() << "unreachable";
1044     }
1045     if (kCompute != Compute::FENCED) {
1046         EXPECT_EQ(timeOnHardware, timeOnHardwareFenced);
1047         EXPECT_EQ(timeInDriver, timeInDriverFenced);
1048     }
1049     auto expectTimingLe = [](uint64_t a, const char* aName, uint64_t b, const char* bName) {
1050         if (a != UINT64_MAX && b != UINT64_MAX) {
1051             EXPECT_LE(a, b) << aName << " exceeds " << bName;
1052         }
1053     };
1054 #define EXPECT_TIMING_LE(a, b) expectTimingLe(a, #a, b, #b)
1055     EXPECT_TIMING_LE(timeOnHardware, timeInDriver);
1056     EXPECT_TIMING_LE(timeOnHardwareFenced, timeInDriverFenced);
1057 
1058     EXPECT_TIMING_LE(timeOnHardwareFenced, timeOnHardware);
1059     EXPECT_TIMING_LE(timeInDriverFenced, timeInDriver);
1060 #undef EXPECT_TIMING_LE
1061 }
1062 
1063 auto kTimingTestUnfencedValues = ::testing::Values(
1064         // NOTE: We cannot force CPU execution to fail
1065         std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::ASYNC),
1066         std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::SYNC),
1067         std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::BURST),
1068 
1069         // NOTE: OLD driver does not provide timing
1070         std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::ASYNC),
1071         std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::SYNC),
1072         std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::BURST),
1073 
1074         std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::ASYNC),
1075         std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::SYNC),
1076         std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::BURST),
1077 
1078         // NOTE: Only ASYNC is paired with a wait
1079         std::make_tuple(DriverKind::OLD, Success::FAIL_WAIT, Compute::ASYNC),
1080 
1081         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::ASYNC),
1082         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::SYNC),
1083         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::BURST),
1084 
1085         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::ASYNC),
1086         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::SYNC),
1087         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::BURST),
1088 
1089         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::ASYNC),
1090         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::SYNC),
1091         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::BURST),
1092 
1093         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::ASYNC),
1094         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::SYNC),
1095         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::BURST),
1096 
1097         std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::ASYNC),
1098         std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::SYNC),
1099         std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::BURST),
1100 
1101         // NOTE: Only ASYNC is paired with a wait
1102         std::make_tuple(DriverKind::NEW, Success::FAIL_WAIT, Compute::ASYNC));
1103 
1104 auto kTimingTestFencedValues = ::testing::Values(
1105         // NOTE: We cannot force CPU execution to fail
1106         std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::FENCED),
1107 
1108         // NOTE: OLD driver does not provide timing
1109         std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::FENCED),
1110 
1111         std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::FENCED),
1112 
1113         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::FENCED),
1114         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::FENCED),
1115         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::FENCED),
1116         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::FENCED),
1117         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_DEVICE, Compute::FENCED),
1118         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_DRIVER, Compute::FENCED),
1119         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_BOTH, Compute::FENCED),
1120         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_DEVICE, Compute::FENCED),
1121         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_DRIVER, Compute::FENCED),
1122         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_BOTH, Compute::FENCED),
1123         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_DEVICE, Compute::FENCED),
1124         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_DRIVER, Compute::FENCED),
1125         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_BOTH, Compute::FENCED),
1126         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_DEVICE, Compute::FENCED),
1127         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_DRIVER, Compute::FENCED),
1128         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_BOTH, Compute::FENCED),
1129 
1130         std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::FENCED));
1131 
1132 INSTANTIATE_TEST_SUITE_P(Unfenced, TimingTest, kTimingTestUnfencedValues);
1133 INSTANTIATE_TEST_SUITE_P(Fenced, TimingTest, kTimingTestFencedValues);
1134 
1135 }  // namespace timing_tests
1136 
1137 /*-- End   timing tests -------------------------------------------------------------------------*/
1138 
1139 const float kSimpleCeiling = 2.0f;
1140 
createAddMaxModel(WrapperModel * model,bool reverseOrder)1141 void createAddMaxModel(WrapperModel* model, bool reverseOrder) {
1142     WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
1143     WrapperOperandType type1(WrapperType::INT32, {});
1144     // Phase 1, operands
1145     auto op1 = model->addOperand(&type0);
1146     auto op2 = model->addOperand(&type0);
1147     auto act = model->addOperand(&type1);
1148     auto op3 = model->addOperand(&type0);
1149     auto op4 = model->addOperand(&type0);
1150     auto op5 = model->addOperand(&type0);
1151     // Phase 2, operations
1152     static int32_t act_init[] = {0};
1153     model->setOperandValue(act, act_init, sizeof(act_init));
1154     static float ceiling[] = {kSimpleCeiling, kSimpleCeiling};
1155     model->setOperandValue(op4, ceiling, sizeof(ceiling));
1156     if (reverseOrder) {
1157         // In this case, add MAXIMUM first, but the execution order is still ADD -> MAXIMUM.
1158         model->addOperation(ANEURALNETWORKS_MAXIMUM, {op3, op4}, {op5});
1159         model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1160     } else {
1161         model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1162         model->addOperation(ANEURALNETWORKS_MAXIMUM, {op3, op4}, {op5});
1163     }
1164     // Phase 3, inputs and outputs
1165     model->identifyInputsAndOutputs({op1, op2}, {op5});
1166     model->finish();
1167     ASSERT_TRUE(model->isValid());
1168 }
1169 
TEST_F(IntrospectionControlTest,SlicingAddMax)1170 TEST_F(IntrospectionControlTest, SlicingAddMax) {
1171     // This is needed before we have the CPU fallback path being treated as a Device.
1172     if (DeviceManager::get()->getUseCpuOnly()) {
1173         GTEST_SKIP();
1174     }
1175 
1176     using namespace test_drivers;
1177 
1178     static const char name[] = "driver11";
1179     DeviceManager::get()->forTest_registerDevice(
1180             nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1181     ASSERT_TRUE(selectDeviceByName(name));
1182 
1183     createAddMaxModel(&mModel, false);
1184     EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1185 }
1186 
TEST_F(IntrospectionControlTest,SlicingMaxAdd)1187 TEST_F(IntrospectionControlTest, SlicingMaxAdd) {
1188     // This is needed before we have the CPU fallback path being treated as a Device.
1189     if (DeviceManager::get()->getUseCpuOnly()) {
1190         GTEST_SKIP();
1191     }
1192 
1193     using namespace test_drivers;
1194 
1195     static const char name[] = "driver11";
1196     DeviceManager::get()->forTest_registerDevice(
1197             nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1198     ASSERT_TRUE(selectDeviceByName(name));
1199 
1200     createAddMaxModel(&mModel, true);
1201     EXPECT_TRUE(isSupportedOpListExpected({false, true}));
1202 }
1203 
1204 const float kSimpleMultiplier = 2.0f;
1205 
createAddMulModel(WrapperModel * model,bool reverseOrder)1206 void createAddMulModel(WrapperModel* model, bool reverseOrder) {
1207     WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
1208     WrapperOperandType type1(WrapperType::INT32, {});
1209     // Phase 1, operands
1210     auto op1 = model->addOperand(&type0);
1211     auto op2 = model->addOperand(&type0);
1212     auto act = model->addOperand(&type1);
1213     auto op3 = model->addOperand(&type0);
1214     auto op4 = model->addOperand(&type0);
1215     auto op5 = model->addOperand(&type0);
1216     // Phase 2, operations
1217     static int32_t act_init[] = {0};
1218     model->setOperandValue(act, act_init, sizeof(act_init));
1219     static float multiplier[] = {kSimpleMultiplier, kSimpleMultiplier};
1220     model->setOperandValue(op4, multiplier, sizeof(multiplier));
1221     if (reverseOrder) {
1222         // In this case, add MUL first, but the execution order is still ADD -> MUL.
1223         model->addOperation(ANEURALNETWORKS_MUL, {op3, op4, act}, {op5});
1224         model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1225     } else {
1226         model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1227         model->addOperation(ANEURALNETWORKS_MUL, {op3, op4, act}, {op5});
1228     }
1229     // Phase 3, inputs and outputs
1230     model->identifyInputsAndOutputs({op1, op2}, {op5});
1231     model->finish();
1232     ASSERT_TRUE(model->isValid());
1233 }
1234 
TEST_F(IntrospectionControlTest,SlicingFullySupported)1235 TEST_F(IntrospectionControlTest, SlicingFullySupported) {
1236     // This is needed before we have the CPU fallback path being treated as a Device.
1237     if (DeviceManager::get()->getUseCpuOnly()) {
1238         GTEST_SKIP();
1239     }
1240 
1241     using namespace test_drivers;
1242 
1243     static const char name[] = "driver11";
1244     DeviceManager::get()->forTest_registerDevice(
1245             nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1246     ASSERT_TRUE(selectDeviceByName(name));
1247 
1248     createAddMulModel(&mModel, false);
1249     EXPECT_TRUE(isSupportedOpListExpected({true, true}));
1250 }
1251 
createCondModel(WrapperModel * model,bool dynamicRank)1252 void createCondModel(WrapperModel* model, bool dynamicRank) {
1253     const auto dimensions = dynamicRank ? std::vector<uint32_t>{} : std::vector<uint32_t>{1};
1254     WrapperOperandType floatType(WrapperType::TENSOR_FLOAT32, dimensions);
1255     WrapperOperandType boolType(WrapperType::TENSOR_BOOL8, {1});
1256     // Phase 1, operands
1257     auto op1 = model->addOperand(&floatType);
1258     auto op2 = model->addOperand(&boolType);
1259     // Phase 2, operations
1260     model->addOperation(ANEURALNETWORKS_LESS, {op1, op1}, {op2});
1261     // Phase 3, inputs and outputs
1262     model->identifyInputsAndOutputs({op1}, {op2});
1263     model->finish();
1264 }
1265 
addReluOperation(WrapperModel * model,std::vector<uint32_t> * modelInputIndexes,std::vector<uint32_t> * modelOutputIndexes,bool dynamicRank)1266 void addReluOperation(WrapperModel* model, std::vector<uint32_t>* modelInputIndexes,
1267                       std::vector<uint32_t>* modelOutputIndexes, bool dynamicRank) {
1268     const auto dimensions = dynamicRank ? std::vector<uint32_t>{} : std::vector<uint32_t>{1};
1269     WrapperOperandType type(WrapperType::TENSOR_FLOAT32, dimensions);
1270     // Phase 1, operands
1271     auto op1 = model->addOperand(&type);
1272     auto op2 = model->addOperand(&type);
1273     // Phase 2, operations
1274     model->addOperation(ANEURALNETWORKS_RELU, {op1}, {op2});
1275     // Phase 3, inputs and outputs
1276     modelInputIndexes->push_back(op1);
1277     modelOutputIndexes->push_back(op2);
1278 }
1279 
createReluModel(WrapperModel * model,bool dynamicRank)1280 void createReluModel(WrapperModel* model, bool dynamicRank) {
1281     std::vector<uint32_t> modelInputIndexes, modelOutputIndexes;
1282     addReluOperation(model, &modelInputIndexes, &modelOutputIndexes, dynamicRank);
1283     model->identifyInputsAndOutputs(modelInputIndexes, modelOutputIndexes);
1284     model->finish();
1285 }
1286 
addWhileOperation(std::vector<WrapperModel> * extraModels,WrapperModel * mainModel,std::vector<uint32_t> * modelInputIndexes,std::vector<uint32_t> * modelOutputIndexes,bool dynamicRank)1287 void addWhileOperation(std::vector<WrapperModel>* extraModels, WrapperModel* mainModel,
1288                        std::vector<uint32_t>* modelInputIndexes,
1289                        std::vector<uint32_t>* modelOutputIndexes, bool dynamicRank) {
1290     const auto dimensions = dynamicRank ? std::vector<uint32_t>{} : std::vector<uint32_t>{1};
1291     WrapperOperandType floatType(WrapperType::TENSOR_FLOAT32, dimensions);
1292     WrapperOperandType modelType(WrapperType::MODEL, {});
1293 
1294     extraModels->emplace_back();
1295     extraModels->emplace_back();
1296     WrapperModel* condModel = &extraModels->at(extraModels->size() - 2);
1297     WrapperModel* bodyModel = &extraModels->at(extraModels->size() - 1);
1298     createCondModel(condModel, dynamicRank);
1299     createReluModel(bodyModel, dynamicRank);
1300     ASSERT_TRUE(condModel->isValid());
1301     ASSERT_TRUE(bodyModel->isValid());
1302 
1303     // Phase 1, operands
1304     const uint32_t op1 = mainModel->addOperand(&modelType);
1305     const uint32_t op2 = mainModel->addOperand(&modelType);
1306     const uint32_t op3 = mainModel->addOperand(&floatType);
1307     const uint32_t op4 = mainModel->addOperand(&floatType);
1308     mainModel->setOperandValueFromModel(op1, condModel);
1309     mainModel->setOperandValueFromModel(op2, bodyModel);
1310     // Phase 2, operations
1311     mainModel->addOperation(ANEURALNETWORKS_WHILE, {op1, op2, op3}, {op4});
1312     // Phase 3, inputs and outputs
1313     modelInputIndexes->push_back(op3);
1314     modelOutputIndexes->push_back(op4);
1315 }
1316 
createReluStaticWhileModel(std::vector<WrapperModel> * extraModels,WrapperModel * mainModel)1317 void createReluStaticWhileModel(std::vector<WrapperModel>* extraModels, WrapperModel* mainModel) {
1318     std::vector<uint32_t> modelInputIndexes, modelOutputIndexes;
1319 
1320     // Operation supported in Android API level 27
1321     addReluOperation(mainModel, &modelInputIndexes, &modelOutputIndexes, /*dynamicRank=*/false);
1322     // Operation supported in Android API level 30
1323     addWhileOperation(extraModels, mainModel, &modelInputIndexes, &modelOutputIndexes,
1324                       /*dynamicRank=*/false);
1325 
1326     mainModel->identifyInputsAndOutputs(modelInputIndexes, modelOutputIndexes);
1327     mainModel->finish();
1328     ASSERT_TRUE(mainModel->isValid());
1329 }
1330 
TEST_F(IntrospectionControlTest,ControlFlowNotSupported)1331 TEST_F(IntrospectionControlTest, ControlFlowNotSupported) {
1332     // This is needed before we have the CPU fallback path being treated as a Device.
1333     if (DeviceManager::get()->getUseCpuOnly()) {
1334         GTEST_SKIP();
1335     }
1336 
1337     using namespace test_drivers;
1338 
1339     static const char name[] = "driver11";
1340     DeviceManager::get()->forTest_registerDevice(
1341             nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1342     ASSERT_TRUE(selectDeviceByName(name));
1343 
1344     std::vector<WrapperModel> extraModels;
1345     createReluStaticWhileModel(&extraModels, &mModel);
1346     EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1347 
1348     // Clear mModel early because it may reference `extraModels`.
1349     mModel = WrapperModel{};
1350 }
1351 
TEST_F(IntrospectionControlTest,ControlFlowSupported)1352 TEST_F(IntrospectionControlTest, ControlFlowSupported) {
1353     // This is needed before we have the CPU fallback path being treated as a Device.
1354     if (DeviceManager::get()->getUseCpuOnly()) {
1355         GTEST_SKIP();
1356     }
1357 
1358     using namespace test_drivers;
1359 
1360     static const char name[] = "driver13";
1361     DeviceManager::get()->forTest_registerDevice(
1362             nn::makeSharedDevice(name, new TestDriver13(name, Success::PASS_BOTH)));
1363     ASSERT_TRUE(selectDeviceByName(name));
1364 
1365     std::vector<WrapperModel> extraModels;
1366     createReluStaticWhileModel(&extraModels, &mModel);
1367     EXPECT_TRUE(isSupportedOpListExpected({true, true}));
1368 
1369     // Clear mModel early because it may reference `extraModels`.
1370     mModel = WrapperModel{};
1371 }
1372 
createStaticWhileDynamicWhileModel(std::vector<WrapperModel> * extraModels,WrapperModel * mainModel)1373 void createStaticWhileDynamicWhileModel(std::vector<WrapperModel>* extraModels,
1374                                         WrapperModel* mainModel) {
1375     std::vector<uint32_t> modelInputIndexes, modelOutputIndexes;
1376 
1377     // Operation supported in Android API level 30
1378     addWhileOperation(extraModels, mainModel, &modelInputIndexes, &modelOutputIndexes,
1379                       /*dynamicRank=*/false);
1380     // Operation supported only by NNAPI runtime
1381     addWhileOperation(extraModels, mainModel, &modelInputIndexes, &modelOutputIndexes,
1382                       /*dynamicRank=*/true);
1383 
1384     mainModel->identifyInputsAndOutputs(modelInputIndexes, modelOutputIndexes);
1385     mainModel->finish();
1386     ASSERT_TRUE(mainModel->isValid());
1387 }
1388 
TEST_F(IntrospectionControlTest,ControlFlowFailedToSlice)1389 TEST_F(IntrospectionControlTest, ControlFlowFailedToSlice) {
1390     // This is needed before we have the CPU fallback path being treated as a Device.
1391     if (DeviceManager::get()->getUseCpuOnly()) {
1392         GTEST_SKIP();
1393     }
1394 
1395     using namespace test_drivers;
1396 
1397     static const char name[] = "driver13";
1398     DeviceManager::get()->forTest_registerDevice(
1399             nn::makeSharedDevice(name, new TestDriver13(name, Success::PASS_BOTH)));
1400     ASSERT_TRUE(selectDeviceByName(name));
1401 
1402     std::vector<WrapperModel> extraModels;
1403     createStaticWhileDynamicWhileModel(&extraModels, &mModel);
1404     EXPECT_TRUE(isSupportedOpListExpected({false, false}));
1405 
1406     // Clear mModel early because it may reference `extraModels`.
1407     mModel = WrapperModel{};
1408 }
1409 
1410 // TODO(miaowang): add a test to make sure ANNCompilation_create() has CPU
1411 // fallback.
1412 // This test verifies that a device that could only handle ADD would correctly report that an
1413 // ADD->MUL model could not be fully supported.
TEST_F(IntrospectionControlTest,PartialModelNotSupported)1414 TEST_F(IntrospectionControlTest, PartialModelNotSupported) {
1415     // This is needed before we have the CPU fallback path being treated as a Device.
1416     if (DeviceManager::get()->getUseCpuOnly()) {
1417         GTEST_SKIP();
1418     }
1419 
1420     createAddMulModel(&mModel, false);
1421 
1422     std::string addOnlyDriver = "test-onlyAdd";
1423     std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1424     addOnlyOp[ANEURALNETWORKS_ADD] = true;
1425 
1426     registerDevices({{addOnlyDriver, 0.9, addOnlyOp}});
1427 
1428     EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1429     EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1430 
1431     ANeuralNetworksModel* modelHandle = mModel.getHandle();
1432     EXPECT_EQ(ANeuralNetworksCompilation_createForDevices(modelHandle, mDevices.data(),
1433                                                           mDevices.size(), &mCompilation),
1434               ANEURALNETWORKS_NO_ERROR);
1435     // The compilation must fail as there is no fallback when using
1436     // Introspection API.
1437     EXPECT_NE(ANeuralNetworksCompilation_finish(mCompilation), ANEURALNETWORKS_NO_ERROR);
1438 }
1439 
1440 // This test verifies that a device that could only handle ADD would correctly report that an
1441 // ADD->MUL model could not be fully supported. Also verifies that the indices of returned
1442 // supported op list correctly map to the order of operations being added by the user.
TEST_F(IntrospectionControlTest,PartialModelNotSupportedOrder)1443 TEST_F(IntrospectionControlTest, PartialModelNotSupportedOrder) {
1444     // This is needed before we have the CPU fallback path being treated as a Device.
1445     if (DeviceManager::get()->getUseCpuOnly()) {
1446         GTEST_SKIP();
1447     }
1448 
1449     createAddMulModel(&mModel, true);
1450 
1451     std::string addOnlyDriver = "test-onlyAdd";
1452     std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1453     addOnlyOp[ANEURALNETWORKS_ADD] = true;
1454 
1455     registerDevices({{addOnlyDriver, 0.9, addOnlyOp}});
1456 
1457     EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1458     EXPECT_TRUE(isSupportedOpListExpected({false, true}));
1459 }
1460 
1461 // TODO(miaowang): update the test to make sure the model is actually running on the test devices.
1462 // This test verifies that an ADD->MUL model is able to run on two selected devices that together
1463 // can handle all operations.
TEST_F(IntrospectionControlTest,ModelNeedTwoDevices)1464 TEST_F(IntrospectionControlTest, ModelNeedTwoDevices) {
1465     // This is needed before we have the CPU fallback path being treated as a Device.
1466     if (DeviceManager::get()->getUseCpuOnly()) {
1467         GTEST_SKIP();
1468     }
1469 
1470     createAddMulModel(&mModel, false);
1471 
1472     std::string addOnlyDriver = "test-onlyAdd";
1473     std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1474     addOnlyOp[ANEURALNETWORKS_ADD] = true;
1475 
1476     std::string mulOnlyDriver = "test-onlyMul";
1477     std::vector<bool> mulOnlyOp(android::nn::kNumberOfOperationTypes, false);
1478     mulOnlyOp[ANEURALNETWORKS_MUL] = true;
1479 
1480     registerDevices({
1481             {addOnlyDriver, 0.9, addOnlyOp},
1482             {mulOnlyDriver, 0.9, mulOnlyOp},
1483     });
1484 
1485     EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1486     EXPECT_TRUE(selectDeviceByName(mulOnlyDriver));
1487     EXPECT_TRUE(isSupportedOpListExpected({true, true}));
1488     EXPECT_EQ(prepareForExecution(), ANEURALNETWORKS_NO_ERROR);
1489 
1490     float input1[2] = {1.0f, 2.0f};
1491     float input2[2] = {3.0f, 4.0f};
1492     float output[2];
1493     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
1494               ANEURALNETWORKS_NO_ERROR);
1495     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
1496               ANEURALNETWORKS_NO_ERROR);
1497     EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
1498               ANEURALNETWORKS_NO_ERROR);
1499 
1500     EXPECT_EQ(ANeuralNetworksExecution_startCompute(mExecution, &mEvent), ANEURALNETWORKS_NO_ERROR);
1501     EXPECT_EQ(ANeuralNetworksEvent_wait(mEvent), ANEURALNETWORKS_NO_ERROR);
1502     EXPECT_EQ(output[0], kSimpleMultiplier * (input1[0] + input2[0]));
1503     EXPECT_EQ(output[1], kSimpleMultiplier * (input1[1] + input2[1]));
1504 }
1505 }  // namespace
1506 
1507 #if defined(NN_DEBUGGABLE) && !defined(NNTEST_ONLY_PUBLIC_API)
1508 
1509 void forTest_setRuntimeFeatureLevel(int64_t level);  // defined in NeuralNetworks.cpp
1510 
1511 namespace {
1512 class WhiteboxFeatureLevelTest : public IntrospectionControlTest {
1513    protected:
TearDown()1514     void TearDown() override {
1515         forTest_setRuntimeFeatureLevel(0);
1516         IntrospectionControlTest::TearDown();
1517     }
1518 
1519    public:
1520     enum DeviceLevel { V1_1, V1_3 };
1521     void trial(int64_t setRuntimeFeatureLevel, DeviceLevel deviceLevel,
1522                int64_t expectDeviceFeatureLevel);
1523 };
1524 
trial(int64_t setRuntimeFeatureLevel,DeviceLevel deviceLevel,int64_t expectDeviceFeatureLevel)1525 void WhiteboxFeatureLevelTest::trial(int64_t setRuntimeFeatureLevel, DeviceLevel deviceLevel,
1526                                      int64_t expectDeviceFeatureLevel) {
1527     // This is needed before we have the CPU fallback path being treated as a Device.
1528     if (DeviceManager::get()->getUseCpuOnly()) {
1529         GTEST_SKIP();
1530     }
1531 
1532     using namespace test_drivers;
1533 
1534     forTest_setRuntimeFeatureLevel(setRuntimeFeatureLevel);
1535 
1536     static const char deviceName[] = "trial";
1537     auto newTestDriver = [deviceLevel]() -> V1_0::IDevice* {
1538         switch (deviceLevel) {
1539             case DeviceLevel::V1_1:
1540                 return new TestDriver11(deviceName, Success::PASS_BOTH_BOTH);
1541             case DeviceLevel::V1_3:
1542                 return new TestDriver13(deviceName, Success::PASS_BOTH_BOTH);
1543             default:
1544                 assert(!"Unrecognized deviceLevel");
1545                 return nullptr;
1546         }
1547     };
1548     DeviceManager::get()->forTest_registerDevice(nn::makeSharedDevice(deviceName, newTestDriver()));
1549 
1550     ASSERT_TRUE(selectDeviceByName(deviceName));
1551     int64_t deviceFeatureLevel;
1552     ASSERT_EQ(mDevices.size(), size_t(1));
1553     ASSERT_EQ(ANeuralNetworksDevice_getFeatureLevel(mDevices.front(), &deviceFeatureLevel),
1554               ANEURALNETWORKS_NO_ERROR);
1555     ASSERT_EQ(deviceFeatureLevel, expectDeviceFeatureLevel);
1556 }
1557 
TEST_F(WhiteboxFeatureLevelTest,Default_V1_1)1558 TEST_F(WhiteboxFeatureLevelTest, Default_V1_1) {
1559     trial(0, DeviceLevel::V1_1, ANEURALNETWORKS_FEATURE_LEVEL_2);
1560 }
1561 
TEST_F(WhiteboxFeatureLevelTest,FL3_V1_1)1562 TEST_F(WhiteboxFeatureLevelTest, FL3_V1_1) {
1563     trial(ANEURALNETWORKS_FEATURE_LEVEL_3, DeviceLevel::V1_1, ANEURALNETWORKS_FEATURE_LEVEL_2);
1564 }
1565 
TEST_F(WhiteboxFeatureLevelTest,FL2_V1_1)1566 TEST_F(WhiteboxFeatureLevelTest, FL2_V1_1) {
1567     trial(ANEURALNETWORKS_FEATURE_LEVEL_2, DeviceLevel::V1_1, ANEURALNETWORKS_FEATURE_LEVEL_2);
1568 }
1569 
TEST_F(WhiteboxFeatureLevelTest,FL1_V1_1)1570 TEST_F(WhiteboxFeatureLevelTest, FL1_V1_1) {
1571     trial(ANEURALNETWORKS_FEATURE_LEVEL_1, DeviceLevel::V1_1, ANEURALNETWORKS_FEATURE_LEVEL_1);
1572 }
1573 
TEST_F(WhiteboxFeatureLevelTest,Default_V1_3)1574 TEST_F(WhiteboxFeatureLevelTest, Default_V1_3) {
1575     trial(0, DeviceLevel::V1_3, ANEURALNETWORKS_FEATURE_LEVEL_4);
1576 }
1577 
TEST_F(WhiteboxFeatureLevelTest,FL5_V1_3)1578 TEST_F(WhiteboxFeatureLevelTest, FL5_V1_3) {
1579     trial(ANEURALNETWORKS_FEATURE_LEVEL_5, DeviceLevel::V1_3, ANEURALNETWORKS_FEATURE_LEVEL_4);
1580 }
1581 
TEST_F(WhiteboxFeatureLevelTest,FL4_V1_3)1582 TEST_F(WhiteboxFeatureLevelTest, FL4_V1_3) {
1583     trial(ANEURALNETWORKS_FEATURE_LEVEL_4, DeviceLevel::V1_3, ANEURALNETWORKS_FEATURE_LEVEL_4);
1584 }
1585 
TEST_F(WhiteboxFeatureLevelTest,FL3_V1_3)1586 TEST_F(WhiteboxFeatureLevelTest, FL3_V1_3) {
1587     trial(ANEURALNETWORKS_FEATURE_LEVEL_3, DeviceLevel::V1_3, ANEURALNETWORKS_FEATURE_LEVEL_3);
1588 }
1589 
TEST_F(WhiteboxFeatureLevelTest,FL2_V1_3)1590 TEST_F(WhiteboxFeatureLevelTest, FL2_V1_3) {
1591     trial(ANEURALNETWORKS_FEATURE_LEVEL_2, DeviceLevel::V1_3, ANEURALNETWORKS_FEATURE_LEVEL_2);
1592 }
1593 }  // namespace
1594 
1595 #endif  // defined(NN_DEBUGGABLE) && !defined(NNTEST_ONLY_PUBLIC_API)
1596