xref: /aosp_15_r20/external/XNNPACK/src/f32-dwconv/up-scalar.c.in (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker// Copyright 2019 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker//
3*4bdc9457SAndroid Build Coastguard Worker// This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker// LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker
6*4bdc9457SAndroid Build Coastguard Worker$assert CHANNEL_TILE >= 1
7*4bdc9457SAndroid Build Coastguard Worker$assert KERNEL_TILE >= 2
8*4bdc9457SAndroid Build Coastguard Worker$assert ACCUMULATORS >= 1
9*4bdc9457SAndroid Build Coastguard Worker$assert ACTIVATION in ["LINEAR", "MINMAX"]
10*4bdc9457SAndroid Build Coastguard Worker$assert ACTIVATION != "LINEAR" or not WASM
11*4bdc9457SAndroid Build Coastguard Worker$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
12*4bdc9457SAndroid Build Coastguard Worker#include <assert.h>
13*4bdc9457SAndroid Build Coastguard Worker
14*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/dwconv.h>
15*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/math.h>
16*4bdc9457SAndroid Build Coastguard Worker
17*4bdc9457SAndroid Build Coastguard Worker
18*4bdc9457SAndroid Build Coastguard Worker$MIN_F32 = "__builtin_wasm_min_f32" if WASM else "math_min_f32"
19*4bdc9457SAndroid Build Coastguard Worker$MAX_F32 = "__builtin_wasm_max_f32" if WASM else "math_max_f32"
20*4bdc9457SAndroid Build Coastguard Worker$SUFFIX = {"LINEAR": "", "MINMAX": "_minmax"}[ACTIVATION]
21*4bdc9457SAndroid Build Coastguard Worker$PARAMS = {"LINEAR": "xnn_f32_default_params", "MINMAX": "xnn_f32_minmax_params"}[ACTIVATION]
22*4bdc9457SAndroid Build Coastguard Workervoid xnn_f32_dwconv${SUFFIX}_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__${"wasm" if WASM else "scalar"}${"" if ACCUMULATORS == 1 else "_acc%d" % ACCUMULATORS}(
23*4bdc9457SAndroid Build Coastguard Worker    size_t channels,
24*4bdc9457SAndroid Build Coastguard Worker    size_t output_width,
25*4bdc9457SAndroid Build Coastguard Worker    const float** input,
26*4bdc9457SAndroid Build Coastguard Worker    const float* weights,
27*4bdc9457SAndroid Build Coastguard Worker    float* output,
28*4bdc9457SAndroid Build Coastguard Worker    size_t input_stride,
29*4bdc9457SAndroid Build Coastguard Worker    size_t output_increment,
30*4bdc9457SAndroid Build Coastguard Worker    size_t input_offset,
31*4bdc9457SAndroid Build Coastguard Worker    const float* zero,
32*4bdc9457SAndroid Build Coastguard Worker    const union ${PARAMS} params[restrict XNN_MIN_ELEMENTS(1)])
33*4bdc9457SAndroid Build Coastguard Worker{
34*4bdc9457SAndroid Build Coastguard Worker  assert(channels != 0);
35*4bdc9457SAndroid Build Coastguard Worker  assert(output_width != 0);
36*4bdc9457SAndroid Build Coastguard Worker
37*4bdc9457SAndroid Build Coastguard Worker  $if ACTIVATION == "MINMAX":
38*4bdc9457SAndroid Build Coastguard Worker    const float vmin = params->scalar.min;
39*4bdc9457SAndroid Build Coastguard Worker    const float vmax = params->scalar.max;
40*4bdc9457SAndroid Build Coastguard Worker  do {
41*4bdc9457SAndroid Build Coastguard Worker    $for K in range(KERNEL_TILE):
42*4bdc9457SAndroid Build Coastguard Worker      const float* i${K} = input[${K}];
43*4bdc9457SAndroid Build Coastguard Worker      assert(i${K} != NULL);
44*4bdc9457SAndroid Build Coastguard Worker      if XNN_UNPREDICTABLE(i${K} != zero) {
45*4bdc9457SAndroid Build Coastguard Worker        i${K} = (const float*) ((uintptr_t) i${K} + input_offset);
46*4bdc9457SAndroid Build Coastguard Worker      }
47*4bdc9457SAndroid Build Coastguard Worker    input = (const float**) ((uintptr_t) input + input_stride);
48*4bdc9457SAndroid Build Coastguard Worker
49*4bdc9457SAndroid Build Coastguard Worker    size_t c = channels;
50*4bdc9457SAndroid Build Coastguard Worker    const float* w = weights;
51*4bdc9457SAndroid Build Coastguard Worker    $if CHANNEL_TILE > 1:
52*4bdc9457SAndroid Build Coastguard Worker      for (; c >= ${CHANNEL_TILE}; c -= ${CHANNEL_TILE}) {
53*4bdc9457SAndroid Build Coastguard Worker        $for C in range(CHANNEL_TILE):
54*4bdc9457SAndroid Build Coastguard Worker          float vacc${C}p0 = w[${C}];
55*4bdc9457SAndroid Build Coastguard Worker
56*4bdc9457SAndroid Build Coastguard Worker        $for K in range(KERNEL_TILE):
57*4bdc9457SAndroid Build Coastguard Worker
58*4bdc9457SAndroid Build Coastguard Worker          $for C in range(CHANNEL_TILE):
59*4bdc9457SAndroid Build Coastguard Worker            const float vi${K}x${C} = i${K}[${C}];
60*4bdc9457SAndroid Build Coastguard Worker          i${K} += ${CHANNEL_TILE};
61*4bdc9457SAndroid Build Coastguard Worker
62*4bdc9457SAndroid Build Coastguard Worker          $for C in range(CHANNEL_TILE):
63*4bdc9457SAndroid Build Coastguard Worker            const float vk${K}x${C} = w[${(K + 1) * CHANNEL_TILE + C}];
64*4bdc9457SAndroid Build Coastguard Worker            $if 1 <= K < ACCUMULATORS:
65*4bdc9457SAndroid Build Coastguard Worker              float vacc${C}p${K} = vi${K}x${C} * vk${K}x${C};
66*4bdc9457SAndroid Build Coastguard Worker            $else:
67*4bdc9457SAndroid Build Coastguard Worker              vacc${C}p${K % ACCUMULATORS} = math_muladd_f32(vi${K}x${C}, vk${K}x${C}, vacc${C}p${K % ACCUMULATORS});
68*4bdc9457SAndroid Build Coastguard Worker
69*4bdc9457SAndroid Build Coastguard Worker        w += ${(KERNEL_TILE + 1) * CHANNEL_TILE};
70*4bdc9457SAndroid Build Coastguard Worker
71*4bdc9457SAndroid Build Coastguard Worker        $if ACCUMULATORS > 1:
72*4bdc9457SAndroid Build Coastguard Worker          // Add up all accumulators to vacc${ABC[0:CHANNEL_TILE]}p0
73*4bdc9457SAndroid Build Coastguard Worker          $ACC_SLICE = 1
74*4bdc9457SAndroid Build Coastguard Worker          $while ACC_SLICE < ACCUMULATORS:
75*4bdc9457SAndroid Build Coastguard Worker            $for A in range(0, ACCUMULATORS, ACC_SLICE * 2):
76*4bdc9457SAndroid Build Coastguard Worker              $if A + ACC_SLICE < ACCUMULATORS:
77*4bdc9457SAndroid Build Coastguard Worker                $for C in range(CHANNEL_TILE):
78*4bdc9457SAndroid Build Coastguard Worker                  vacc${C}p${A} = vacc${C}p${A} + vacc${C}p${A + ACC_SLICE};
79*4bdc9457SAndroid Build Coastguard Worker            $ACC_SLICE *= 2
80*4bdc9457SAndroid Build Coastguard Worker
81*4bdc9457SAndroid Build Coastguard Worker        $if ACTIVATION == "MINMAX":
82*4bdc9457SAndroid Build Coastguard Worker          $for C in range(CHANNEL_TILE):
83*4bdc9457SAndroid Build Coastguard Worker            float vacc${C} = ${MAX_F32}(vacc${C}p0, vmin);
84*4bdc9457SAndroid Build Coastguard Worker
85*4bdc9457SAndroid Build Coastguard Worker          $for C in range(CHANNEL_TILE):
86*4bdc9457SAndroid Build Coastguard Worker            vacc${C} = ${MIN_F32}(vacc${C}, vmax);
87*4bdc9457SAndroid Build Coastguard Worker
88*4bdc9457SAndroid Build Coastguard Worker          $for C in range(CHANNEL_TILE):
89*4bdc9457SAndroid Build Coastguard Worker            output[${C}] = vacc${C};
90*4bdc9457SAndroid Build Coastguard Worker        $else:
91*4bdc9457SAndroid Build Coastguard Worker          $for C in range(CHANNEL_TILE):
92*4bdc9457SAndroid Build Coastguard Worker            output[${C}] = vacc${C}p0;
93*4bdc9457SAndroid Build Coastguard Worker        output += ${CHANNEL_TILE};
94*4bdc9457SAndroid Build Coastguard Worker      }
95*4bdc9457SAndroid Build Coastguard Worker      for (; c >= 1; c -= 1) {
96*4bdc9457SAndroid Build Coastguard Worker        float vacc0p0 = *w++;
97*4bdc9457SAndroid Build Coastguard Worker
98*4bdc9457SAndroid Build Coastguard Worker        $for K in range(KERNEL_TILE):
99*4bdc9457SAndroid Build Coastguard Worker          const float vi${K} = *i${K}++;
100*4bdc9457SAndroid Build Coastguard Worker          const float vk${K} = w[${(K + 1) * CHANNEL_TILE - 1}];
101*4bdc9457SAndroid Build Coastguard Worker          $if 1 <= K < ACCUMULATORS:
102*4bdc9457SAndroid Build Coastguard Worker            float vacc0p${K} = vi${K} * vk${K};
103*4bdc9457SAndroid Build Coastguard Worker          $else:
104*4bdc9457SAndroid Build Coastguard Worker            vacc0p${K % ACCUMULATORS} = math_muladd_f32(vi${K}, vk${K}, vacc0p${K % ACCUMULATORS});
105*4bdc9457SAndroid Build Coastguard Worker
106*4bdc9457SAndroid Build Coastguard Worker        $if ACCUMULATORS > 1:
107*4bdc9457SAndroid Build Coastguard Worker          // Add up all accumulators to vacc${ABC[0:CHANNEL_TILE]}p0
108*4bdc9457SAndroid Build Coastguard Worker          $ACC_SLICE = 1
109*4bdc9457SAndroid Build Coastguard Worker          $while ACC_SLICE < ACCUMULATORS:
110*4bdc9457SAndroid Build Coastguard Worker            $for A in range(0, ACCUMULATORS, ACC_SLICE * 2):
111*4bdc9457SAndroid Build Coastguard Worker              $if A + ACC_SLICE < ACCUMULATORS:
112*4bdc9457SAndroid Build Coastguard Worker                vacc0p${A} = vacc0p${A} + vacc0p${A + ACC_SLICE};
113*4bdc9457SAndroid Build Coastguard Worker            $ACC_SLICE *= 2
114*4bdc9457SAndroid Build Coastguard Worker
115*4bdc9457SAndroid Build Coastguard Worker        $if ACTIVATION == "MINMAX":
116*4bdc9457SAndroid Build Coastguard Worker          float vacc0 = ${MAX_F32}(vacc0p0, vmin);
117*4bdc9457SAndroid Build Coastguard Worker          vacc0 = ${MIN_F32}(vacc0, vmax);
118*4bdc9457SAndroid Build Coastguard Worker          *output++ = vacc0;
119*4bdc9457SAndroid Build Coastguard Worker        $else:
120*4bdc9457SAndroid Build Coastguard Worker          *output++ = vacc0p0;
121*4bdc9457SAndroid Build Coastguard Worker      }
122*4bdc9457SAndroid Build Coastguard Worker    $else:
123*4bdc9457SAndroid Build Coastguard Worker      do {
124*4bdc9457SAndroid Build Coastguard Worker        float vacc0p0 = w[0];
125*4bdc9457SAndroid Build Coastguard Worker        $for K in range(KERNEL_TILE):
126*4bdc9457SAndroid Build Coastguard Worker
127*4bdc9457SAndroid Build Coastguard Worker          const float vi${K} = *i${K}++;
128*4bdc9457SAndroid Build Coastguard Worker          const float vk${K} = w[${K+1}];
129*4bdc9457SAndroid Build Coastguard Worker          $if 1 <= K < ACCUMULATORS:
130*4bdc9457SAndroid Build Coastguard Worker            float vacc0p${K} = vi${K} * vk${K};
131*4bdc9457SAndroid Build Coastguard Worker          $else:
132*4bdc9457SAndroid Build Coastguard Worker            vacc0p${K % ACCUMULATORS} = math_muladd_f32(vi${K}, vk${K}, vacc0p${K % ACCUMULATORS});
133*4bdc9457SAndroid Build Coastguard Worker
134*4bdc9457SAndroid Build Coastguard Worker        w += ${KERNEL_TILE + 1};
135*4bdc9457SAndroid Build Coastguard Worker
136*4bdc9457SAndroid Build Coastguard Worker        $ACC_STEP = 1
137*4bdc9457SAndroid Build Coastguard Worker        $while ACC_STEP < ACCUMULATORS:
138*4bdc9457SAndroid Build Coastguard Worker          $for A in range(0, ACCUMULATORS, ACC_STEP * 2):
139*4bdc9457SAndroid Build Coastguard Worker            $if A + ACC_STEP < ACCUMULATORS:
140*4bdc9457SAndroid Build Coastguard Worker              vacc0p${A} += vacc0p${A + ACC_STEP};
141*4bdc9457SAndroid Build Coastguard Worker          $ACC_STEP *= 2
142*4bdc9457SAndroid Build Coastguard Worker
143*4bdc9457SAndroid Build Coastguard Worker        $if ACTIVATION == "MINMAX":
144*4bdc9457SAndroid Build Coastguard Worker          float vacc0 = ${MAX_F32}(vacc0p0, vmin);
145*4bdc9457SAndroid Build Coastguard Worker          vacc0 = ${MIN_F32}(vacc0, vmax);
146*4bdc9457SAndroid Build Coastguard Worker          *output++ = vacc0;
147*4bdc9457SAndroid Build Coastguard Worker        $else:
148*4bdc9457SAndroid Build Coastguard Worker          *output++ = vacc0p0;
149*4bdc9457SAndroid Build Coastguard Worker      } while (--c != 0);
150*4bdc9457SAndroid Build Coastguard Worker
151*4bdc9457SAndroid Build Coastguard Worker    output = (float*) ((uintptr_t) output + output_increment);
152*4bdc9457SAndroid Build Coastguard Worker  } while (--output_width != 0);
153*4bdc9457SAndroid Build Coastguard Worker}
154