1*4bdc9457SAndroid Build Coastguard Worker// Copyright 2022 Google LLC 2*4bdc9457SAndroid Build Coastguard Worker// 3*4bdc9457SAndroid Build Coastguard Worker// This source code is licensed under the BSD-style license found in the 4*4bdc9457SAndroid Build Coastguard Worker// LICENSE file in the root directory of this source tree. 5*4bdc9457SAndroid Build Coastguard Worker 6*4bdc9457SAndroid Build Coastguard Worker$assert TILE_HEIGHT & (TILE_HEIGHT-1) == 0 and TILE_HEIGHT != 0 7*4bdc9457SAndroid Build Coastguard Worker$assert TILE_WIDTH & (TILE_WIDTH-1) == 0 and TILE_WIDTH != 0 8*4bdc9457SAndroid Build Coastguard Worker$assert TILE_HEIGHT in [1, 2, 4] 9*4bdc9457SAndroid Build Coastguard Worker$assert TILE_WIDTH in [1, 2, 4] 10*4bdc9457SAndroid Build Coastguard Worker 11*4bdc9457SAndroid Build Coastguard Worker#include <assert.h> 12*4bdc9457SAndroid Build Coastguard Worker 13*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/common.h> 14*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/math.h> 15*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/transpose.h> 16*4bdc9457SAndroid Build Coastguard Worker 17*4bdc9457SAndroid Build Coastguard Workervoid xnn_x24_transposec_ukernel__${TILE_HEIGHT}x${TILE_WIDTH}_scalar( 18*4bdc9457SAndroid Build Coastguard Worker const void *input, 19*4bdc9457SAndroid Build Coastguard Worker void * output, 20*4bdc9457SAndroid Build Coastguard Worker size_t input_stride, 21*4bdc9457SAndroid Build Coastguard Worker size_t output_stride, 22*4bdc9457SAndroid Build Coastguard Worker size_t block_width, 23*4bdc9457SAndroid Build Coastguard Worker size_t block_height) 24*4bdc9457SAndroid Build Coastguard Worker{ 25*4bdc9457SAndroid Build Coastguard Worker assert(output_stride >= block_height * 3); 26*4bdc9457SAndroid Build Coastguard Worker assert(input_stride >= block_width * 3); 27*4bdc9457SAndroid Build Coastguard Worker 28*4bdc9457SAndroid Build Coastguard Worker $if TILE_HEIGHT == 1: 29*4bdc9457SAndroid Build Coastguard Worker const size_t input_reset = ${TILE_WIDTH * 3} - block_height * input_stride; 30*4bdc9457SAndroid Build Coastguard Worker const size_t output_reset = ${TILE_WIDTH} * output_stride - block_height * 3; 31*4bdc9457SAndroid Build Coastguard Worker $else: 32*4bdc9457SAndroid Build Coastguard Worker const size_t input_reset = ${TILE_WIDTH * 3} - round_down_po2(block_height, ${TILE_HEIGHT}) * input_stride; 33*4bdc9457SAndroid Build Coastguard Worker const size_t output_reset = ${TILE_WIDTH} * output_stride - block_height * 3; 34*4bdc9457SAndroid Build Coastguard Worker const size_t input_offset = ${TILE_HEIGHT} * input_stride; 35*4bdc9457SAndroid Build Coastguard Worker 36*4bdc9457SAndroid Build Coastguard Worker const uint8_t* i0 = (const uint8_t*) input; 37*4bdc9457SAndroid Build Coastguard Worker $for N in range(1, TILE_HEIGHT): 38*4bdc9457SAndroid Build Coastguard Worker const uint8_t* i${N} = (const uint8_t*) ((uintptr_t) i${N-1} + input_stride); 39*4bdc9457SAndroid Build Coastguard Worker 40*4bdc9457SAndroid Build Coastguard Worker uint8_t* o0 = (uint8_t*) output; 41*4bdc9457SAndroid Build Coastguard Worker $for N in range(1, TILE_WIDTH): 42*4bdc9457SAndroid Build Coastguard Worker uint8_t* o${N} = (uint8_t*) ((uintptr_t) o${N-1} + output_stride); 43*4bdc9457SAndroid Build Coastguard Worker 44*4bdc9457SAndroid Build Coastguard Worker do { 45*4bdc9457SAndroid Build Coastguard Worker $if TILE_WIDTH > 1: 46*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(block_width < 2) { 47*4bdc9457SAndroid Build Coastguard Worker o1 = o0; 48*4bdc9457SAndroid Build Coastguard Worker } 49*4bdc9457SAndroid Build Coastguard Worker $for N in range(2, TILE_WIDTH, 2): 50*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(block_width <= ${N}) { 51*4bdc9457SAndroid Build Coastguard Worker o${N} = o0; 52*4bdc9457SAndroid Build Coastguard Worker } 53*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(block_width < ${N+2}) { 54*4bdc9457SAndroid Build Coastguard Worker o${N+1} = o0; 55*4bdc9457SAndroid Build Coastguard Worker } 56*4bdc9457SAndroid Build Coastguard Worker size_t bh = block_height; 57*4bdc9457SAndroid Build Coastguard Worker for (; bh >= ${TILE_HEIGHT}; bh -= ${TILE_HEIGHT}) { 58*4bdc9457SAndroid Build Coastguard Worker $for M in reversed(range(TILE_WIDTH)): 59*4bdc9457SAndroid Build Coastguard Worker $POS = 0 60*4bdc9457SAndroid Build Coastguard Worker $for N in range(TILE_HEIGHT): 61*4bdc9457SAndroid Build Coastguard Worker o${M}[${POS}] = i${N}[${M * 3}]; 62*4bdc9457SAndroid Build Coastguard Worker o${M}[${POS + 1}] = i${N}[${M * 3 + 1}]; 63*4bdc9457SAndroid Build Coastguard Worker o${M}[${POS + 2}] = i${N}[${M * 3 + 2}]; 64*4bdc9457SAndroid Build Coastguard Worker $POS += 3 65*4bdc9457SAndroid Build Coastguard Worker o${M} += ${POS}; 66*4bdc9457SAndroid Build Coastguard Worker $for N in range(TILE_HEIGHT): 67*4bdc9457SAndroid Build Coastguard Worker i${N} = (const uint8_t*) ((uintptr_t) i${N} + input_offset); 68*4bdc9457SAndroid Build Coastguard Worker } 69*4bdc9457SAndroid Build Coastguard Worker $if TILE_HEIGHT > 1: 70*4bdc9457SAndroid Build Coastguard Worker const uint8_t* i = i0; 71*4bdc9457SAndroid Build Coastguard Worker $if TILE_HEIGHT > 2: 72*4bdc9457SAndroid Build Coastguard Worker if (bh & 2) { 73*4bdc9457SAndroid Build Coastguard Worker $for M in reversed(range(TILE_WIDTH)): 74*4bdc9457SAndroid Build Coastguard Worker o${M}[0] = i0[${M * 3}]; 75*4bdc9457SAndroid Build Coastguard Worker o${M}[1] = i0[${M * 3 + 1}]; 76*4bdc9457SAndroid Build Coastguard Worker o${M}[2] = i0[${M * 3 + 2}]; 77*4bdc9457SAndroid Build Coastguard Worker o${M}[3] = i1[${M * 3}]; 78*4bdc9457SAndroid Build Coastguard Worker o${M}[4] = i1[${M * 3 + 1}]; 79*4bdc9457SAndroid Build Coastguard Worker o${M}[5] = i1[${M * 3 + 2}]; 80*4bdc9457SAndroid Build Coastguard Worker o${M} += 6; 81*4bdc9457SAndroid Build Coastguard Worker i = i2; 82*4bdc9457SAndroid Build Coastguard Worker } 83*4bdc9457SAndroid Build Coastguard Worker if (bh & 1) { 84*4bdc9457SAndroid Build Coastguard Worker $for M in reversed(range(TILE_WIDTH)): 85*4bdc9457SAndroid Build Coastguard Worker o${M}[0] = i[${M * 3}]; 86*4bdc9457SAndroid Build Coastguard Worker o${M}[1] = i[${M * 3 + 1}]; 87*4bdc9457SAndroid Build Coastguard Worker o${M}[2] = i[${M * 3 + 2}]; 88*4bdc9457SAndroid Build Coastguard Worker o${M} += 3; 89*4bdc9457SAndroid Build Coastguard Worker } 90*4bdc9457SAndroid Build Coastguard Worker 91*4bdc9457SAndroid Build Coastguard Worker i0 = (const uint8_t*) ((uintptr_t) i0 + input_reset); 92*4bdc9457SAndroid Build Coastguard Worker $for N in range(1, TILE_HEIGHT): 93*4bdc9457SAndroid Build Coastguard Worker i${N} = (const uint8_t*) ((uintptr_t) i${N-1} + input_stride); 94*4bdc9457SAndroid Build Coastguard Worker $for N in range(TILE_WIDTH): 95*4bdc9457SAndroid Build Coastguard Worker o${N} = (uint8_t*) ((uintptr_t) o${N} + output_reset); 96*4bdc9457SAndroid Build Coastguard Worker block_width = doz(block_width, ${TILE_WIDTH}); 97*4bdc9457SAndroid Build Coastguard Worker } while (block_width != 0); 98*4bdc9457SAndroid Build Coastguard Worker} 99