xref: /aosp_15_r20/external/ComputeLibrary/arm_compute/runtime/NEON/functions/NEAddMulAdd.h (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD
25 #define ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD
26 
27 #include "arm_compute/core/Types.h"
28 #include "arm_compute/runtime/IFunction.h"
29 #include "arm_compute/runtime/IMemoryManager.h"
30 
31 #include <memory>
32 
33 namespace arm_compute
34 {
35 class ITensor;
36 class ITensorInfo;
37 
38 /** Function to compute Add+Mul+Add fused operation */
39 class NEAddMulAdd : public IFunction
40 {
41 public:
42     /** Constructor */
43     NEAddMulAdd(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
44     /** Prevent instances of this class from being copied (As this class contains pointers) */
45     NEAddMulAdd(const NEAddMulAdd &) = delete;
46     /** Prevent instances of this class from being moved (As this class contains non movable objects) */
47     NEAddMulAdd(NEAddMulAdd &&) = delete;
48     /** Prevent instances of this class from being copied (As this class contains pointers) */
49     NEAddMulAdd &operator=(const NEAddMulAdd &) = delete;
50     /** Prevent instances of this class from being moved (As this class contains non movable objects) */
51     NEAddMulAdd &operator=(NEAddMulAdd &&) = delete;
52     /** Destructor */
53     ~NEAddMulAdd();
54     /** Initialize the function's inputs and outputs.
55      *
56      * Valid data layouts:
57      * - Any
58      *
59      * Valid data type configurations:
60      * |input1         |input2         |bn_mul         |bn_add         |add_output     |final_output   |
61      * |:--------------|:--------------|:--------------|:--------------|:--------------|:--------------|
62      * |QASYMM8        |QASYMM8        |QASYMM8        |QASYMM8        |QASYMM8        |QASYMM8        |
63      * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
64      * |F16            |F16            |F16            |F16            |F16            |F16            |
65      * |F32            |F32            |F32            |F32            |F32            |F32            |
66      *
67      * This is what this composite function (tailored for add followed by a batch norm operation) does:
68      *      add_output <- input1 + input2 (add)
69      *      final_output <- add_output * bn_mul + bn_add  (batch norm = mul+add)
70      *
71      * @param[in]  input1       First tensor input. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
72      * @param[in]  input2       Second tensor input. Data types supported: Same as @p input.
73      * @param[in]  bn_mul       The multiplication coefficient on the feature dimension. Data types supported: Same as @p input.
74      *                          It's one dimensional tensor with size equal to the feature maps [FM]
75      * @param[in]  bn_add       The addition coefficient on the feature dimension. Data types supported: Same as @p input.
76      *                          It's one dimensional tensor with size equal to the feature maps [FM]
77      * @param[out] add_output   Output of the first add. Data type supported: Same as @p input.
78      * @param[out] final_output Output of the add+mul+add+act composite operation. Data type supported: Same as @p input.
79      * @param[in]  policy       Policy to handle overflow
80      * @param[in]  act_info     (Optional) Activation layer information in case of a fused activation.
81      *
82      */
83     void configure(ITensor *input1, ITensor *input2, ITensor *bn_mul, ITensor *bn_add,
84                    ITensor *add_output, ITensor *final_output,
85                    ConvertPolicy policy, const ActivationLayerInfo &act_info);
86     /** Static function to check if given info will lead to a valid configuration of @ref NEAddMulAdd
87      *
88      * Similar to @ref NEAddMulAdd::configure() except the arguments are @ref ITensorInfo * instead of @ref ITensor *
89      *
90      * @return a status
91      */
92     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
93                            const ITensorInfo *bn_mul, const ITensorInfo *bn_add,
94                            const ITensorInfo *add_output, const ITensorInfo *final_output,
95                            ConvertPolicy policy, const ActivationLayerInfo &act_info);
96 
97     // Inherited methods overridden:
98     void run() override;
99 
100 private:
101     struct Impl;
102     std::unique_ptr<Impl> _impl;
103 };
104 } // namespace arm_compute
105 #endif /* ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD */
106