xref: /aosp_15_r20/external/XNNPACK/src/x32-transposec/scalar.c.in (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker// Copyright 2021 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker//
3*4bdc9457SAndroid Build Coastguard Worker// This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker// LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker
6*4bdc9457SAndroid Build Coastguard Worker$assert ((TILE_HEIGHT & (TILE_HEIGHT-1) == 0) and TILE_HEIGHT != 0)
7*4bdc9457SAndroid Build Coastguard Worker$assert ((TILE_WIDTH & (TILE_WIDTH-1) == 0) and TILE_WIDTH != 0)
8*4bdc9457SAndroid Build Coastguard Worker$assert SIZE in [8, 16, 32, 64]
9*4bdc9457SAndroid Build Coastguard Worker$assert TYPE in ["int8_t", "int16_t", "int", "float", "int64_t", "double"]
10*4bdc9457SAndroid Build Coastguard Worker$assert (TILE_WIDTH * SIZE <= 128)
11*4bdc9457SAndroid Build Coastguard Worker$SUFFIX = "float" if TYPE in ["float", "double"] else "int"
12*4bdc9457SAndroid Build Coastguard Worker
13*4bdc9457SAndroid Build Coastguard Worker#include <assert.h>
14*4bdc9457SAndroid Build Coastguard Worker
15*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/common.h>
16*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/math.h>
17*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/transpose.h>
18*4bdc9457SAndroid Build Coastguard Worker
19*4bdc9457SAndroid Build Coastguard Workervoid xnn_x${SIZE}_transposec_ukernel__${TILE_HEIGHT}x${TILE_WIDTH}_scalar_${SUFFIX}(
20*4bdc9457SAndroid Build Coastguard Worker    const uint${SIZE}_t *input,
21*4bdc9457SAndroid Build Coastguard Worker    uint${SIZE}_t * output,
22*4bdc9457SAndroid Build Coastguard Worker    size_t input_stride,
23*4bdc9457SAndroid Build Coastguard Worker    size_t output_stride,
24*4bdc9457SAndroid Build Coastguard Worker    size_t block_width,
25*4bdc9457SAndroid Build Coastguard Worker    size_t block_height) XNN_OOB_READS
26*4bdc9457SAndroid Build Coastguard Worker{
27*4bdc9457SAndroid Build Coastguard Worker  assert(output_stride >= block_height * sizeof(${TYPE}));
28*4bdc9457SAndroid Build Coastguard Worker  assert(input_stride >= block_width * sizeof(${TYPE}));
29*4bdc9457SAndroid Build Coastguard Worker
30*4bdc9457SAndroid Build Coastguard Worker  const size_t tile_height = ${TILE_HEIGHT};
31*4bdc9457SAndroid Build Coastguard Worker  const size_t tile_width = ${TILE_WIDTH};
32*4bdc9457SAndroid Build Coastguard Worker  const size_t tile_wbytes = tile_width * sizeof(${TYPE});
33*4bdc9457SAndroid Build Coastguard Worker  $if TILE_HEIGHT == 1:
34*4bdc9457SAndroid Build Coastguard Worker    const size_t input_reset = tile_wbytes - block_height * input_stride;
35*4bdc9457SAndroid Build Coastguard Worker    const size_t output_reset = tile_width * output_stride - block_height * sizeof(${TYPE});
36*4bdc9457SAndroid Build Coastguard Worker  $else:
37*4bdc9457SAndroid Build Coastguard Worker    const size_t input_reset = tile_wbytes - round_down_po2(block_height, tile_height) * input_stride;
38*4bdc9457SAndroid Build Coastguard Worker    const size_t output_reset = tile_width * output_stride - round_down_po2(block_height, 2) * sizeof(${TYPE});
39*4bdc9457SAndroid Build Coastguard Worker  const size_t input_offset = tile_height * input_stride;
40*4bdc9457SAndroid Build Coastguard Worker
41*4bdc9457SAndroid Build Coastguard Worker  const ${TYPE}* i0 = (const ${TYPE}*) input;
42*4bdc9457SAndroid Build Coastguard Worker  $for N in range(1, TILE_HEIGHT):
43*4bdc9457SAndroid Build Coastguard Worker    const ${TYPE}* i${N} = (const ${TYPE}*) ((uintptr_t) i${N-1} + input_stride);
44*4bdc9457SAndroid Build Coastguard Worker
45*4bdc9457SAndroid Build Coastguard Worker  ${TYPE}* o0 = (${TYPE}*) output;
46*4bdc9457SAndroid Build Coastguard Worker  $for N in range(1, TILE_WIDTH):
47*4bdc9457SAndroid Build Coastguard Worker    ${TYPE}* o${N} = (${TYPE}*) ((uintptr_t) o${N-1} + output_stride);
48*4bdc9457SAndroid Build Coastguard Worker
49*4bdc9457SAndroid Build Coastguard Worker  do {
50*4bdc9457SAndroid Build Coastguard Worker    $if TILE_WIDTH > 1:
51*4bdc9457SAndroid Build Coastguard Worker      if XNN_UNPREDICTABLE(block_width < 2) {
52*4bdc9457SAndroid Build Coastguard Worker        o1 = o0;
53*4bdc9457SAndroid Build Coastguard Worker      }
54*4bdc9457SAndroid Build Coastguard Worker    $for N in range(2, TILE_WIDTH, 2):
55*4bdc9457SAndroid Build Coastguard Worker      if XNN_UNPREDICTABLE(block_width <= ${N}) {
56*4bdc9457SAndroid Build Coastguard Worker        o${N} = o0;
57*4bdc9457SAndroid Build Coastguard Worker      }
58*4bdc9457SAndroid Build Coastguard Worker      if XNN_UNPREDICTABLE(block_width < ${N+2}) {
59*4bdc9457SAndroid Build Coastguard Worker        o${N+1} = o0;
60*4bdc9457SAndroid Build Coastguard Worker      }
61*4bdc9457SAndroid Build Coastguard Worker    size_t bh = block_height;
62*4bdc9457SAndroid Build Coastguard Worker    for (; bh >= ${TILE_HEIGHT}; bh -= ${TILE_HEIGHT}) {
63*4bdc9457SAndroid Build Coastguard Worker      $for M in reversed(range(TILE_WIDTH)):
64*4bdc9457SAndroid Build Coastguard Worker        $for N in range(TILE_HEIGHT):
65*4bdc9457SAndroid Build Coastguard Worker          *o${M}++ = i${N}[${M}];
66*4bdc9457SAndroid Build Coastguard Worker      $for N in range(TILE_HEIGHT):
67*4bdc9457SAndroid Build Coastguard Worker        i${N} = (const ${TYPE}*) ((uintptr_t) i${N} + input_offset);
68*4bdc9457SAndroid Build Coastguard Worker    }
69*4bdc9457SAndroid Build Coastguard Worker    $if TILE_HEIGHT > 2:
70*4bdc9457SAndroid Build Coastguard Worker      const ${TYPE}* i = i0;
71*4bdc9457SAndroid Build Coastguard Worker      if (bh & 2) {
72*4bdc9457SAndroid Build Coastguard Worker        $for M in reversed(range(TILE_WIDTH)):
73*4bdc9457SAndroid Build Coastguard Worker          o${M}[0] = i0[${M}];
74*4bdc9457SAndroid Build Coastguard Worker          o${M}[1] = i1[${M}];
75*4bdc9457SAndroid Build Coastguard Worker          o${M} += 2;
76*4bdc9457SAndroid Build Coastguard Worker        i = i2;
77*4bdc9457SAndroid Build Coastguard Worker      }
78*4bdc9457SAndroid Build Coastguard Worker      if (bh & 1) {
79*4bdc9457SAndroid Build Coastguard Worker        $for M in reversed(range(TILE_WIDTH)):
80*4bdc9457SAndroid Build Coastguard Worker          o${M}[0] = i[${M}];
81*4bdc9457SAndroid Build Coastguard Worker      }
82*4bdc9457SAndroid Build Coastguard Worker    $elif TILE_HEIGHT > 1:
83*4bdc9457SAndroid Build Coastguard Worker      if (bh & 1) {
84*4bdc9457SAndroid Build Coastguard Worker        $for M in reversed(range(TILE_WIDTH)):
85*4bdc9457SAndroid Build Coastguard Worker          o${M}[0] = i0[${M}];
86*4bdc9457SAndroid Build Coastguard Worker      }
87*4bdc9457SAndroid Build Coastguard Worker
88*4bdc9457SAndroid Build Coastguard Worker    i0 = (const ${TYPE}*) ((uintptr_t) i0 + input_reset);
89*4bdc9457SAndroid Build Coastguard Worker    $for N in range(1, TILE_HEIGHT):
90*4bdc9457SAndroid Build Coastguard Worker      i${N} = (const ${TYPE}*) ((uintptr_t) i${N-1} + input_stride);
91*4bdc9457SAndroid Build Coastguard Worker    $for N in range(TILE_WIDTH):
92*4bdc9457SAndroid Build Coastguard Worker      o${N} = (${TYPE}*) ((uintptr_t) o${N} + output_reset);
93*4bdc9457SAndroid Build Coastguard Worker    block_width = doz(block_width, tile_width);
94*4bdc9457SAndroid Build Coastguard Worker  } while (block_width != 0);
95*4bdc9457SAndroid Build Coastguard Worker}
96