xref: /aosp_15_r20/external/ComputeLibrary/src/cpu/operators/CpuFullyConnected.h (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1*c217d954SCole Faust /*
2*c217d954SCole Faust  * Copyright (c) 2021-2023 Arm Limited.
3*c217d954SCole Faust  *
4*c217d954SCole Faust  * SPDX-License-Identifier: MIT
5*c217d954SCole Faust  *
6*c217d954SCole Faust  * Permission is hereby granted, free of charge, to any person obtaining a copy
7*c217d954SCole Faust  * of this software and associated documentation files (the "Software"), to
8*c217d954SCole Faust  * deal in the Software without restriction, including without limitation the
9*c217d954SCole Faust  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*c217d954SCole Faust  * sell copies of the Software, and to permit persons to whom the Software is
11*c217d954SCole Faust  * furnished to do so, subject to the following conditions:
12*c217d954SCole Faust  *
13*c217d954SCole Faust  * The above copyright notice and this permission notice shall be included in all
14*c217d954SCole Faust  * copies or substantial portions of the Software.
15*c217d954SCole Faust  *
16*c217d954SCole Faust  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*c217d954SCole Faust  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*c217d954SCole Faust  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*c217d954SCole Faust  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*c217d954SCole Faust  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*c217d954SCole Faust  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*c217d954SCole Faust  * SOFTWARE.
23*c217d954SCole Faust  */
24*c217d954SCole Faust #ifndef ARM_COMPUTE_CPU_FULLY_CONNECTED_H
25*c217d954SCole Faust #define ARM_COMPUTE_CPU_FULLY_CONNECTED_H
26*c217d954SCole Faust 
27*c217d954SCole Faust #include "src/cpu/ICpuOperator.h"
28*c217d954SCole Faust 
29*c217d954SCole Faust #include "arm_compute/core/TensorInfo.h"
30*c217d954SCole Faust 
31*c217d954SCole Faust #include <memory>
32*c217d954SCole Faust 
33*c217d954SCole Faust namespace arm_compute
34*c217d954SCole Faust {
35*c217d954SCole Faust namespace cpu
36*c217d954SCole Faust {
37*c217d954SCole Faust // Forward declarations
38*c217d954SCole Faust class CpuConvertFullyConnectedWeights;
39*c217d954SCole Faust class CpuFlatten;
40*c217d954SCole Faust class CpuGemm;
41*c217d954SCole Faust class CpuGemmLowpMatrixMultiplyCore;
42*c217d954SCole Faust namespace kernels
43*c217d954SCole Faust {
44*c217d954SCole Faust class CpuTransposeKernel;
45*c217d954SCole Faust } // namespace kernels
46*c217d954SCole Faust /** Basic function to compute a Fully Connected layer. This function calls the following kernels:
47*c217d954SCole Faust  *  -# @ref kernels::CpuIm2ColKernel (called when the input comes from a convolutional layer)
48*c217d954SCole Faust  *  -# @ref kernels::CpuTransposeKernel (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
49*c217d954SCole Faust  *  -# @ref CpuGemm or @ref CpuGemmLowpMatrixMultiplyCore (if quantized asymmetric)
50*c217d954SCole Faust  *  -# @ref kernels::CpuGemmMatrixAdditionKernel or @ref CpuGemmLowpOutputStage (if quantized asymmetric) (if @p biases is not equal to nullptr)
51*c217d954SCole Faust  *
52*c217d954SCole Faust  * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
53*c217d954SCole Faust  */
54*c217d954SCole Faust class CpuFullyConnected : public ICpuOperator
55*c217d954SCole Faust {
56*c217d954SCole Faust public:
57*c217d954SCole Faust     /** Constructor */
58*c217d954SCole Faust     CpuFullyConnected();
59*c217d954SCole Faust     /** Destructor */
60*c217d954SCole Faust     ~CpuFullyConnected();
61*c217d954SCole Faust     /** Set the input and output tensors.
62*c217d954SCole Faust      *
63*c217d954SCole Faust      * Valid data layouts:
64*c217d954SCole Faust      * - NHWC
65*c217d954SCole Faust      * - NCHW
66*c217d954SCole Faust      *
67*c217d954SCole Faust      * Valid data type configurations:
68*c217d954SCole Faust      * |src0           |src1               |src2   |dst            |
69*c217d954SCole Faust      * |:--------------|:------------------|:------|:--------------|
70*c217d954SCole Faust      * |F16            |F16                |F16    |F16            |
71*c217d954SCole Faust      * |F32            |F32                |F32    |F32            |
72*c217d954SCole Faust      * |QASYMM8        |QASYMM8            |S32    |QASYMM8        |
73*c217d954SCole Faust      * |QASYMM8_SIGNED |QASYMM8_SIGNED     |S32    |QASYMM8_SIGNED |
74*c217d954SCole Faust      *
75*c217d954SCole Faust      * @param[in]  src          Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
76*c217d954SCole Faust      * @param[in]  weights      Weights tensor info. The weights must be 2 dimensional.
77*c217d954SCole Faust      *                          If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
78*c217d954SCole Faust      *                          If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
79*c217d954SCole Faust      *                          Data type supported: Same as @p src.
80*c217d954SCole Faust      * @param[in]  biases       Bias tensor info. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
81*c217d954SCole Faust      * @param[out] dst          Destination tensor info. Its shape should be equal to the output of a matrix multiplication between:
82*c217d954SCole Faust      *                          - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
83*c217d954SCole Faust      *                          - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
84*c217d954SCole Faust      *                          Data type supported: Same as @p src.
85*c217d954SCole Faust      * @param[in]  fc_info      (Optional) Fully connected layer additional info
86*c217d954SCole Faust      * @param[in]  weights_info (Optional) Stores neccessary compute information when weights are already reshaped
87*c217d954SCole Faust      */
88*c217d954SCole Faust     void configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst,
89*c217d954SCole Faust                    FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(), const WeightsInfo &weights_info = WeightsInfo());
90*c217d954SCole Faust     /** Static function to check if given info will lead to a valid configuration of @ref CpuFullyConnected
91*c217d954SCole Faust      *
92*c217d954SCole Faust      * Similar to @ref CpuFullyConnected::configure()
93*c217d954SCole Faust      *
94*c217d954SCole Faust      * @return a status
95*c217d954SCole Faust      */
96*c217d954SCole Faust     static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
97*c217d954SCole Faust                            FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(), const WeightsInfo &weights_info = WeightsInfo());
98*c217d954SCole Faust 
99*c217d954SCole Faust     /** Static function that queries whether there exists fixed-format kernel and if it exists it will return in the first argument in what format
100*c217d954SCole Faust      * weights are expected to be reshaped as defined by WeightFormat class. Apart from the first argument the rest of the arguments are the same
101*c217d954SCole Faust      * as in @ref CpuFullyConnectedLayer::validate() except that all arguments are required.
102*c217d954SCole Faust      *
103*c217d954SCole Faust      * @return a status
104*c217d954SCole Faust      */
105*c217d954SCole Faust     static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format, const ITensorInfo *src, const ITensorInfo *weights,
106*c217d954SCole Faust                                const ITensorInfo *biases, const ITensorInfo *dst,
107*c217d954SCole Faust                                FullyConnectedLayerInfo fc_info, WeightsInfo weights_info);
108*c217d954SCole Faust 
109*c217d954SCole Faust     //Inherited methods override
110*c217d954SCole Faust     void                             run(ITensorPack &tensors) override;
111*c217d954SCole Faust     void                             prepare(ITensorPack &tensors) override;
112*c217d954SCole Faust     experimental::MemoryRequirements workspace() const override;
113*c217d954SCole Faust 
114*c217d954SCole Faust private:
115*c217d954SCole Faust     void configure_fc_fc(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ActivationLayerInfo &act);
116*c217d954SCole Faust     void configure_conv_fc(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ActivationLayerInfo &act);
117*c217d954SCole Faust     void configure_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ActivationLayerInfo &act);
118*c217d954SCole Faust 
119*c217d954SCole Faust     enum AuxTensorIdx
120*c217d954SCole Faust     {
121*c217d954SCole Faust         AsmGemmWorkspace = 0,
122*c217d954SCole Faust         Pretranspose,
123*c217d954SCole Faust         GemmTemp1, // Both CpuGemm and CpuGemmLowpMatrixMultiplyCore
124*c217d954SCole Faust         GemmTemp2, // Both CpuGemm and CpuGemmLowpMatrixMultiplyCore
125*c217d954SCole Faust         GemmTemp3, // Both CpuGemm and CpuGemmLowpMatrixMultiplyCore
126*c217d954SCole Faust         GemmTemp4, // CpuGemmLowpMatrixMultiplyCore only
127*c217d954SCole Faust         GemmTemp5, // CpuGemmLowpMatrixMultiplyCore only
128*c217d954SCole Faust         GemmTemp6, // CpuGemmLowpMatrixMultiplyCore only
129*c217d954SCole Faust         GemmTemp7, // CpuGemmLowpMatrixMultiplyCore only
130*c217d954SCole Faust         TransposedWeights,
131*c217d954SCole Faust         ConvertedWeights,
132*c217d954SCole Faust         FlattenedSrc,
133*c217d954SCole Faust         Count
134*c217d954SCole Faust     };
135*c217d954SCole Faust 
136*c217d954SCole Faust     std::unique_ptr<CpuFlatten>                      _flatten;
137*c217d954SCole Faust     std::unique_ptr<CpuConvertFullyConnectedWeights> _convert_weights;
138*c217d954SCole Faust     std::unique_ptr<kernels::CpuTransposeKernel>     _transpose_weights;
139*c217d954SCole Faust     std::unique_ptr<CpuGemm>                         _mm_gemm;
140*c217d954SCole Faust     std::unique_ptr<CpuGemmLowpMatrixMultiplyCore>   _mm_gemmlowp;
141*c217d954SCole Faust 
142*c217d954SCole Faust     TensorInfo   _flattened_src;
143*c217d954SCole Faust     TensorInfo   _converted_weights;
144*c217d954SCole Faust     TensorInfo   _reshaped_weights;
145*c217d954SCole Faust     TensorInfo   _trans_weights;
146*c217d954SCole Faust     AuxTensorIdx _trans_weights_idx;
147*c217d954SCole Faust 
148*c217d954SCole Faust     experimental::MemoryRequirements _aux_mem;
149*c217d954SCole Faust 
150*c217d954SCole Faust     bool                      _needs_weights_conversion;
151*c217d954SCole Faust     bool                      _needs_weights_reshape;
152*c217d954SCole Faust     bool                      _is_fc_after_conv;
153*c217d954SCole Faust     bool                      _is_quantized_asymmetric;
154*c217d954SCole Faust     bool                      _is_prepared;
155*c217d954SCole Faust     bool                      _enable_fast_math;
156*c217d954SCole Faust     bool                      _fixed_format;
157*c217d954SCole Faust     arm_compute::WeightFormat _weight_format;
158*c217d954SCole Faust };
159*c217d954SCole Faust } // namespace cpu
160*c217d954SCole Faust } // namespace arm_compute
161*c217d954SCole Faust #endif /* ARM_COMPUTE_CPU_FULLY_CONNECTED_H */
162