1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 /*******************************************************************************
10 * Copyright (c) 2018-2023 Cadence Design Systems, Inc.
11 *
12 * Permission is hereby granted, free of charge, to any person obtaining
13 * a copy of this software and associated documentation files (the
14 * "Software"), to use this Software with Cadence processor cores only and
15 * not with any other processors and platforms, subject to
16 * the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included
19 * in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
25 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
26 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
27 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 
29 ******************************************************************************/
30 
31 #pragma once
32 
33 /*
34 This file modifies the macros defined in
35 nnlib-hifi4/xa_nnlib/algo/common/include/xa_nnlib_common_macros.h
36 to adjust the accumulated value for per-channel quantized scheme.
37 1. The ADJUST_ACC_BATCH_ASYM8b in nnlib multiplies the accumulated value in
38 matmul with the requantized scale. The requantized scale is an fp32 value
39 (in_scale*weight_scale/out_scale). This fp32 requanzied_scale is repsented as a
40 fixed-point computation with an int32 out_multiplier, and an in32 out_shift.
41 The weight_scale can be an array for per-channel quantized weight. So we allow
42 the left_shift/right_shift and out_multiplier to be arrays in
43 ADJUST_ACC_BATCH_ASYM8b.
44 2. The new macros SETUP_SHIFT and  UNROLL_ROW_SETUP_SHIFT are not present in
45 nnlib. We add these to allow finding the correct out_shift for each channel
46 (i.e., unrolled row of p_mat1).
47 */
48 
49 #include "xa_nnlib_matmul_unroll_macros.h"
50 
51 #define UNROLL_ROW_SETUP_SHIFT(idx_row)                               \
52   ae_int32x2 _ae_int32x2_ch_idx_##idx_row = 0;                        \
53   AE_MOVT32X2(                                                        \
54       _ae_int32x2_ch_idx_##idx_row,                                   \
55       AE_MOVDA32X2(m_itr + idx_row, m_itr + idx_row),                 \
56       per_channel_quantized);                                         \
57   int _ch_idx_##idx_row = AE_MOVAD32_L(_ae_int32x2_ch_idx_##idx_row); \
58   left_shift[idx_row] = AE_MAX32(0, out_shift[_ch_idx_##idx_row]);    \
59   right_shift[idx_row] = AE_MAX32(0, -out_shift[_ch_idx_##idx_row]);
60 
61 #define ADJUST_ACC_BATCH_ASYM8b(idx_row, idx_vec)                      \
62   /* Multiply accumulator with 'out_multiplier', same as Tensorflow */ \
63   ae_int32x2 _ae_int32x2_acc_##idx_row##_##idx_vec;                    \
64   MPY_BY_QUANT_MULT_X2_OUT32(                                          \
65       _ae_int32x2_acc_##idx_row##_##idx_vec,                           \
66       AE_MOVINT32X2_FROMINT64(_ae_int64_acc_##idx_row##_##idx_vec),    \
67       out_multiplier[_ch_idx_##idx_row],                               \
68       left_shift[idx_row],                                             \
69       right_shift[idx_row]);                                           \
70   /* Add output zero point */                                          \
71   (_ae_int32x2_acc_##idx_row##_##idx_vec) = AE_ADD32S(                 \
72       _ae_int32x2_acc_##idx_row##_##idx_vec, AE_MOVDA32(out_zero_bias));
73 
74 #if (ROW_UNROLL == 2)
75 
76 #define SETUP_SHIFT         \
77   UNROLL_ROW_SETUP_SHIFT(0) \
78   UNROLL_ROW_SETUP_SHIFT(1)
79 
80 #elif (ROW_UNROLL == 4)
81 
82 #define SETUP_SHIFT         \
83   UNROLL_ROW_SETUP_SHIFT(0) \
84   UNROLL_ROW_SETUP_SHIFT(1) \
85   UNROLL_ROW_SETUP_SHIFT(2) \
86   UNROLL_ROW_SETUP_SHIFT(3)
87 
88 #endif /* (ROW_UNROLL == 4)*/
89