1*c217d954SCole Faust /* 2*c217d954SCole Faust * Copyright (c) 2016-2022 Arm Limited. 3*c217d954SCole Faust * 4*c217d954SCole Faust * SPDX-License-Identifier: MIT 5*c217d954SCole Faust * 6*c217d954SCole Faust * Permission is hereby granted, free of charge, to any person obtaining a copy 7*c217d954SCole Faust * of this software and associated documentation files (the "Software"), to 8*c217d954SCole Faust * deal in the Software without restriction, including without limitation the 9*c217d954SCole Faust * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10*c217d954SCole Faust * sell copies of the Software, and to permit persons to whom the Software is 11*c217d954SCole Faust * furnished to do so, subject to the following conditions: 12*c217d954SCole Faust * 13*c217d954SCole Faust * The above copyright notice and this permission notice shall be included in all 14*c217d954SCole Faust * copies or substantial portions of the Software. 15*c217d954SCole Faust * 16*c217d954SCole Faust * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17*c217d954SCole Faust * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18*c217d954SCole Faust * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19*c217d954SCole Faust * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20*c217d954SCole Faust * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21*c217d954SCole Faust * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22*c217d954SCole Faust * SOFTWARE. 23*c217d954SCole Faust */ 24*c217d954SCole Faust #ifndef ARM_COMPUTE_CPU_MUL_KERNEL_H 25*c217d954SCole Faust #define ARM_COMPUTE_CPU_MUL_KERNEL_H 26*c217d954SCole Faust 27*c217d954SCole Faust #include "src/core/common/Macros.h" 28*c217d954SCole Faust #include "src/cpu/ICpuKernel.h" 29*c217d954SCole Faust 30*c217d954SCole Faust namespace arm_compute 31*c217d954SCole Faust { 32*c217d954SCole Faust namespace cpu 33*c217d954SCole Faust { 34*c217d954SCole Faust namespace kernels 35*c217d954SCole Faust { 36*c217d954SCole Faust /** Interface for the kernel to perform multiplication between two tensors */ 37*c217d954SCole Faust class CpuMulKernel : public ICpuKernel<CpuMulKernel> 38*c217d954SCole Faust { 39*c217d954SCole Faust public: 40*c217d954SCole Faust CpuMulKernel() = default; 41*c217d954SCole Faust ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuMulKernel); 42*c217d954SCole Faust /** Initialise the kernel's input, dst and border mode. 43*c217d954SCole Faust * 44*c217d954SCole Faust * Valid configurations (Src1,Src2) -> Dst : 45*c217d954SCole Faust * 46*c217d954SCole Faust * Support: Broadcast? Scale=1/255? 47*c217d954SCole Faust * - (U8,U8) -> U8, S16 N Y 48*c217d954SCole Faust * - (U8,S16) -> S16 N Y 49*c217d954SCole Faust * - (S16,U8) -> S16 N Y 50*c217d954SCole Faust * - (S16,S16) -> S16 N Y 51*c217d954SCole Faust * - (S32,S32) -> S32 Y N 52*c217d954SCole Faust * - (F16,F16) -> F16 N Y 53*c217d954SCole Faust * - (F32,F32) -> F32 Y Y 54*c217d954SCole Faust * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y 55*c217d954SCole Faust * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y 56*c217d954SCole Faust * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y 57*c217d954SCole Faust * 58*c217d954SCole Faust * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. 59*c217d954SCole Faust * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. 60*c217d954SCole Faust * 61*c217d954SCole Faust * @param[in] src1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 62*c217d954SCole Faust * @param[in] src2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 63*c217d954SCole Faust * @param[out] dst Dst tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 64*c217d954SCole Faust * @param[in] scale Scale to apply after multiplication. 65*c217d954SCole Faust * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. 66*c217d954SCole Faust * If both @p src1, @p src2 and @p dst are of datatype S32, scale cannot be 1/255 67*c217d954SCole Faust * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype 68*c217d954SCole Faust * @param[in] rounding_policy Rounding policy. 69*c217d954SCole Faust */ 70*c217d954SCole Faust void configure(ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); 71*c217d954SCole Faust /** Static function to check if given info will lead to a valid configuration 72*c217d954SCole Faust * 73*c217d954SCole Faust * Similar to @ref CpuMulKernel::configure() 74*c217d954SCole Faust * 75*c217d954SCole Faust * @return a status 76*c217d954SCole Faust */ 77*c217d954SCole Faust static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); 78*c217d954SCole Faust 79*c217d954SCole Faust // Inherited methods overridden 80*c217d954SCole Faust void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; 81*c217d954SCole Faust const char *name() const override; 82*c217d954SCole Faust 83*c217d954SCole Faust /** Return minimum workload size of the relevant kernel 84*c217d954SCole Faust * 85*c217d954SCole Faust * @param[in] platform The CPU platform used to create the context. 86*c217d954SCole Faust * @param[in] thread_count Number of threads in the execution. 87*c217d954SCole Faust * 88*c217d954SCole Faust * @return[out] mws Minimum workload size for requested configuration. 89*c217d954SCole Faust */ 90*c217d954SCole Faust size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; 91*c217d954SCole Faust 92*c217d954SCole Faust /** Get the preferred dimension in which the scheduler splits the work into multiple jobs. 93*c217d954SCole Faust * 94*c217d954SCole Faust * @return The split dimension hint. 95*c217d954SCole Faust */ get_split_dimension_hint()96*c217d954SCole Faust size_t get_split_dimension_hint() const 97*c217d954SCole Faust { 98*c217d954SCole Faust return _split_dimension; 99*c217d954SCole Faust } 100*c217d954SCole Faust 101*c217d954SCole Faust private: 102*c217d954SCole Faust /** Common signature for all the specialised multiplication functions with integer scaling factor 103*c217d954SCole Faust * 104*c217d954SCole Faust * @param[in] src1 Src1 tensor object. 105*c217d954SCole Faust * @param[in] src2 Src2 tensor object. 106*c217d954SCole Faust * @param[out] dst Dst tensor object. 107*c217d954SCole Faust * @param[in] window Region on which to execute the kernel 108*c217d954SCole Faust * @param[in] scale Integer scale factor. 109*c217d954SCole Faust */ 110*c217d954SCole Faust using MulFunctionInt = void(const ITensor *src1, const ITensor *src2, ITensor *dst, const Window &window, int scale); 111*c217d954SCole Faust /** Common signature for all the specialised multiplication functions with float scaling factor 112*c217d954SCole Faust * 113*c217d954SCole Faust * @param[in] src1 Src1 tensor object. 114*c217d954SCole Faust * @param[in] src2 Src2 tensor object. 115*c217d954SCole Faust * @param[out] dst Dst tensor object. 116*c217d954SCole Faust * @param[in] window Region on which to execute the kernel 117*c217d954SCole Faust * @param[in] scale Float scale factor. 118*c217d954SCole Faust */ 119*c217d954SCole Faust using MulFunctionFloat = void(const ITensor *src1, const ITensor *src2, ITensor *dst, const Window &window, float scale); 120*c217d954SCole Faust /** Common signature for all the specialised QASYMM8 multiplication functions with float scaling factor 121*c217d954SCole Faust * 122*c217d954SCole Faust * @param[in] src1 Src1 tensor object. 123*c217d954SCole Faust * @param[in] src2 Src2 tensor object. 124*c217d954SCole Faust * @param[out] dst Dst tensor object. 125*c217d954SCole Faust * @param[in] window Region on which to execute the kernel 126*c217d954SCole Faust * @param[in] scale Float scale factor. 127*c217d954SCole Faust * 128*c217d954SCole Faust */ 129*c217d954SCole Faust using MulFunctionQuantized = void(const ITensor *src1, const ITensor *src2, ITensor *dst, const Window &window, float scale); 130*c217d954SCole Faust 131*c217d954SCole Faust MulFunctionFloat *_func_float{ nullptr }; 132*c217d954SCole Faust MulFunctionInt *_func_int{ nullptr }; 133*c217d954SCole Faust MulFunctionQuantized *_func_quantized{ nullptr }; 134*c217d954SCole Faust float _scale{ 0 }; 135*c217d954SCole Faust int _scale_exponent{ 0 }; 136*c217d954SCole Faust size_t _split_dimension{ Window::DimY }; 137*c217d954SCole Faust }; 138*c217d954SCole Faust 139*c217d954SCole Faust /** Interface for the complex pixelwise multiplication kernel. */ 140*c217d954SCole Faust class CpuComplexMulKernel : public ICpuKernel<CpuComplexMulKernel> 141*c217d954SCole Faust { 142*c217d954SCole Faust public: 143*c217d954SCole Faust CpuComplexMulKernel() = default; 144*c217d954SCole Faust ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuComplexMulKernel); 145*c217d954SCole Faust /** Initialise the kernel's src, dst and border mode. 146*c217d954SCole Faust * 147*c217d954SCole Faust * @param[in] src1 An src tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). 148*c217d954SCole Faust * @param[in] src2 An src tensor. Data types supported: same as @p src1. Number of channels supported: same as @p src1. 149*c217d954SCole Faust * @param[out] dst The dst tensor, Data types supported: same as @p src1. Number of channels supported: same as @p src1. 150*c217d954SCole Faust */ 151*c217d954SCole Faust void configure(ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst); 152*c217d954SCole Faust /** Static function to check if given info will lead to a valid configuration 153*c217d954SCole Faust * 154*c217d954SCole Faust * Similar to @ref CpuComplexMulKernel::configure() 155*c217d954SCole Faust * 156*c217d954SCole Faust * @return a status 157*c217d954SCole Faust */ 158*c217d954SCole Faust static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst); 159*c217d954SCole Faust 160*c217d954SCole Faust // Inherited methods overridden: 161*c217d954SCole Faust void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; 162*c217d954SCole Faust const char *name() const override; 163*c217d954SCole Faust }; 164*c217d954SCole Faust } // namespace kernels 165*c217d954SCole Faust } // namespace cpu 166*c217d954SCole Faust } // namespace arm_compute 167*c217d954SCole Faust #endif /* ARM_COMPUTE_CPU_MUL_KERNEL_H */ 168