1*4bdc9457SAndroid Build Coastguard Worker// Copyright 2021 Google LLC 2*4bdc9457SAndroid Build Coastguard Worker// 3*4bdc9457SAndroid Build Coastguard Worker// This source code is licensed under the BSD-style license found in the 4*4bdc9457SAndroid Build Coastguard Worker// LICENSE file in the root directory of this source tree. 5*4bdc9457SAndroid Build Coastguard Worker 6*4bdc9457SAndroid Build Coastguard Worker$assert ((TILE_HEIGHT & (TILE_HEIGHT-1) == 0) and TILE_HEIGHT != 0) 7*4bdc9457SAndroid Build Coastguard Worker$assert ((TILE_WIDTH & (TILE_WIDTH-1) == 0) and TILE_WIDTH != 0) 8*4bdc9457SAndroid Build Coastguard Worker$assert SIZE in [8, 16, 32, 64] 9*4bdc9457SAndroid Build Coastguard Worker$assert TYPE in ["int8_t", "int16_t", "int", "float", "int64_t", "double"] 10*4bdc9457SAndroid Build Coastguard Worker$assert (TILE_WIDTH * SIZE <= 128) 11*4bdc9457SAndroid Build Coastguard Worker$SUFFIX = "float" if TYPE in ["float", "double"] else "int" 12*4bdc9457SAndroid Build Coastguard Worker 13*4bdc9457SAndroid Build Coastguard Worker#include <assert.h> 14*4bdc9457SAndroid Build Coastguard Worker 15*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/common.h> 16*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/math.h> 17*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/transpose.h> 18*4bdc9457SAndroid Build Coastguard Worker 19*4bdc9457SAndroid Build Coastguard Workervoid xnn_x${SIZE}_transposec_ukernel__${TILE_HEIGHT}x${TILE_WIDTH}_scalar_${SUFFIX}( 20*4bdc9457SAndroid Build Coastguard Worker const uint${SIZE}_t *input, 21*4bdc9457SAndroid Build Coastguard Worker uint${SIZE}_t * output, 22*4bdc9457SAndroid Build Coastguard Worker size_t input_stride, 23*4bdc9457SAndroid Build Coastguard Worker size_t output_stride, 24*4bdc9457SAndroid Build Coastguard Worker size_t block_width, 25*4bdc9457SAndroid Build Coastguard Worker size_t block_height) XNN_OOB_READS 26*4bdc9457SAndroid Build Coastguard Worker{ 27*4bdc9457SAndroid Build Coastguard Worker assert(output_stride >= block_height * sizeof(${TYPE})); 28*4bdc9457SAndroid Build Coastguard Worker assert(input_stride >= block_width * sizeof(${TYPE})); 29*4bdc9457SAndroid Build Coastguard Worker 30*4bdc9457SAndroid Build Coastguard Worker const size_t tile_height = ${TILE_HEIGHT}; 31*4bdc9457SAndroid Build Coastguard Worker const size_t tile_width = ${TILE_WIDTH}; 32*4bdc9457SAndroid Build Coastguard Worker const size_t tile_wbytes = tile_width * sizeof(${TYPE}); 33*4bdc9457SAndroid Build Coastguard Worker $if TILE_HEIGHT == 1: 34*4bdc9457SAndroid Build Coastguard Worker const size_t input_reset = tile_wbytes - block_height * input_stride; 35*4bdc9457SAndroid Build Coastguard Worker const size_t output_reset = tile_width * output_stride - block_height * sizeof(${TYPE}); 36*4bdc9457SAndroid Build Coastguard Worker $else: 37*4bdc9457SAndroid Build Coastguard Worker const size_t input_reset = tile_wbytes - round_down_po2(block_height, tile_height) * input_stride; 38*4bdc9457SAndroid Build Coastguard Worker const size_t output_reset = tile_width * output_stride - round_down_po2(block_height, 2) * sizeof(${TYPE}); 39*4bdc9457SAndroid Build Coastguard Worker const size_t input_offset = tile_height * input_stride; 40*4bdc9457SAndroid Build Coastguard Worker 41*4bdc9457SAndroid Build Coastguard Worker const ${TYPE}* i0 = (const ${TYPE}*) input; 42*4bdc9457SAndroid Build Coastguard Worker $for N in range(1, TILE_HEIGHT): 43*4bdc9457SAndroid Build Coastguard Worker const ${TYPE}* i${N} = (const ${TYPE}*) ((uintptr_t) i${N-1} + input_stride); 44*4bdc9457SAndroid Build Coastguard Worker 45*4bdc9457SAndroid Build Coastguard Worker ${TYPE}* o0 = (${TYPE}*) output; 46*4bdc9457SAndroid Build Coastguard Worker $for N in range(1, TILE_WIDTH): 47*4bdc9457SAndroid Build Coastguard Worker ${TYPE}* o${N} = (${TYPE}*) ((uintptr_t) o${N-1} + output_stride); 48*4bdc9457SAndroid Build Coastguard Worker 49*4bdc9457SAndroid Build Coastguard Worker do { 50*4bdc9457SAndroid Build Coastguard Worker $if TILE_WIDTH > 1: 51*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(block_width < 2) { 52*4bdc9457SAndroid Build Coastguard Worker o1 = o0; 53*4bdc9457SAndroid Build Coastguard Worker } 54*4bdc9457SAndroid Build Coastguard Worker $for N in range(2, TILE_WIDTH, 2): 55*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(block_width <= ${N}) { 56*4bdc9457SAndroid Build Coastguard Worker o${N} = o0; 57*4bdc9457SAndroid Build Coastguard Worker } 58*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(block_width < ${N+2}) { 59*4bdc9457SAndroid Build Coastguard Worker o${N+1} = o0; 60*4bdc9457SAndroid Build Coastguard Worker } 61*4bdc9457SAndroid Build Coastguard Worker size_t bh = block_height; 62*4bdc9457SAndroid Build Coastguard Worker for (; bh >= ${TILE_HEIGHT}; bh -= ${TILE_HEIGHT}) { 63*4bdc9457SAndroid Build Coastguard Worker $for M in reversed(range(TILE_WIDTH)): 64*4bdc9457SAndroid Build Coastguard Worker $for N in range(TILE_HEIGHT): 65*4bdc9457SAndroid Build Coastguard Worker *o${M}++ = i${N}[${M}]; 66*4bdc9457SAndroid Build Coastguard Worker $for N in range(TILE_HEIGHT): 67*4bdc9457SAndroid Build Coastguard Worker i${N} = (const ${TYPE}*) ((uintptr_t) i${N} + input_offset); 68*4bdc9457SAndroid Build Coastguard Worker } 69*4bdc9457SAndroid Build Coastguard Worker $if TILE_HEIGHT > 2: 70*4bdc9457SAndroid Build Coastguard Worker const ${TYPE}* i = i0; 71*4bdc9457SAndroid Build Coastguard Worker if (bh & 2) { 72*4bdc9457SAndroid Build Coastguard Worker $for M in reversed(range(TILE_WIDTH)): 73*4bdc9457SAndroid Build Coastguard Worker o${M}[0] = i0[${M}]; 74*4bdc9457SAndroid Build Coastguard Worker o${M}[1] = i1[${M}]; 75*4bdc9457SAndroid Build Coastguard Worker o${M} += 2; 76*4bdc9457SAndroid Build Coastguard Worker i = i2; 77*4bdc9457SAndroid Build Coastguard Worker } 78*4bdc9457SAndroid Build Coastguard Worker if (bh & 1) { 79*4bdc9457SAndroid Build Coastguard Worker $for M in reversed(range(TILE_WIDTH)): 80*4bdc9457SAndroid Build Coastguard Worker o${M}[0] = i[${M}]; 81*4bdc9457SAndroid Build Coastguard Worker } 82*4bdc9457SAndroid Build Coastguard Worker $elif TILE_HEIGHT > 1: 83*4bdc9457SAndroid Build Coastguard Worker if (bh & 1) { 84*4bdc9457SAndroid Build Coastguard Worker $for M in reversed(range(TILE_WIDTH)): 85*4bdc9457SAndroid Build Coastguard Worker o${M}[0] = i0[${M}]; 86*4bdc9457SAndroid Build Coastguard Worker } 87*4bdc9457SAndroid Build Coastguard Worker 88*4bdc9457SAndroid Build Coastguard Worker i0 = (const ${TYPE}*) ((uintptr_t) i0 + input_reset); 89*4bdc9457SAndroid Build Coastguard Worker $for N in range(1, TILE_HEIGHT): 90*4bdc9457SAndroid Build Coastguard Worker i${N} = (const ${TYPE}*) ((uintptr_t) i${N-1} + input_stride); 91*4bdc9457SAndroid Build Coastguard Worker $for N in range(TILE_WIDTH): 92*4bdc9457SAndroid Build Coastguard Worker o${N} = (${TYPE}*) ((uintptr_t) o${N} + output_reset); 93*4bdc9457SAndroid Build Coastguard Worker block_width = doz(block_width, tile_width); 94*4bdc9457SAndroid Build Coastguard Worker } while (block_width != 0); 95*4bdc9457SAndroid Build Coastguard Worker} 96