1// Copyright 2019 Google LLC 2// 3// This source code is licensed under the BSD-style license found in the 4// LICENSE file in the root directory of this source tree. 5 6$assert ACTIVATION in ["LINEAR", "RELU", "MINMAX"] 7$assert ACTIVATION != "LINEAR" or not WASM 8#include <assert.h> 9 10#include <xnnpack/igemm.h> 11#include <xnnpack/math.h> 12 13 14$MIN_F32 = "__builtin_wasm_min_f32" if WASM else "math_min_f32" 15$MAX_F32 = "__builtin_wasm_max_f32" if WASM else "math_max_f32" 16$SUFFIX = {"LINEAR": "", "RELU": "_relu", "MINMAX": "_minmax"}[ACTIVATION] 17$PARAMS = {"LINEAR": "xnn_f32_default_params", "RELU": "xnn_f32_relu_params", "MINMAX": "xnn_f32_minmax_params"}[ACTIVATION] 18void xnn_f32_igemm${SUFFIX}_ukernel_${MR}x${NR}__${"wasm" if WASM else "scalar"}( 19 size_t mr, 20 size_t nc, 21 size_t kc, 22 size_t ks, 23 const float**restrict a, 24 const float*restrict w, 25 float*restrict c, 26 size_t cm_stride, 27 size_t cn_stride, 28 size_t a_offset, 29 const float* zero, 30 const union ${PARAMS} params[restrict XNN_MIN_ELEMENTS(1)]) 31{ 32 assert(mr != 0); 33 assert(mr <= ${MR}); 34 assert(nc != 0); 35 assert(kc != 0); 36 assert(kc % sizeof(float) == 0); 37 assert(ks != 0); 38 assert(ks % (${MR} * sizeof(void*)) == 0); 39 assert(a_offset % sizeof(float) == 0); 40 assert(a != NULL); 41 assert(w != NULL); 42 assert(c != NULL); 43 44 float* c0 = c; 45 $for M in range(1, MR): 46 float* c${M} = (float*) ((uintptr_t) c${M-1} + cm_stride); 47 $if M % 2 == 0: 48 if XNN_UNPREDICTABLE(mr <= ${M}) { 49 c${M} = c${M-1}; 50 } 51 $elif M + 1 == MR: 52 if XNN_UNPREDICTABLE(mr != ${M+1}) { 53 c${M} = c${M-1}; 54 } 55 $else: 56 if XNN_UNPREDICTABLE(mr < ${M+1}) { 57 c${M} = c${M-1}; 58 } 59 60 $if ACTIVATION == "MINMAX": 61 const float vmin = params->scalar.min; 62 const float vmax = params->scalar.max; 63 do { 64 $for N in range(NR): 65 float vacc0${N} = w[${N}]; 66 $for M in range(1, MR): 67 $for N in range(NR): 68 float vacc${M}${N} = vacc0${N}; 69 w += ${NR}; 70 71 size_t p = ks; 72 do { 73 $for M in range(MR): 74 const float* restrict a${M} = a[${M}]; 75 assert(a${M} != NULL); 76 if XNN_UNPREDICTABLE(a${M} != zero) { 77 a${M} = (const float*) ((uintptr_t) a${M} + a_offset); 78 } 79 a += ${MR}; 80 81 size_t k = kc; 82 do { 83 $for M in range(MR): 84 const float va${M} = *a${M}++; 85 86 $for N in range(NR): 87 const float vb${N} = w[${N}]; 88 w += ${NR}; 89 90 $for M in range(MR): 91 $for N in range(NR): 92 vacc${M}${N} = math_muladd_f32(va${M}, vb${N}, vacc${M}${N}); 93 94 k -= sizeof(float); 95 } while (k != 0); 96 p -= ${MR} * sizeof(void*); 97 } while (p != 0); 98 99 $if ACTIVATION == "MINMAX": 100 $for M in range(MR): 101 $for N in range(NR): 102 vacc${M}${N} = ${MAX_F32}(vacc${M}${N}, vmin); 103 104 $for M in range(MR): 105 $for N in range(NR): 106 vacc${M}${N} = ${MIN_F32}(vacc${M}${N}, vmax); 107 $elif ACTIVATION == "RELU": 108 $for M in range(MR): 109 $for N in range(NR): 110 vacc${M}${N} = ${MAX_F32}(vacc${M}${N}, 0.0f); 111 112 if XNN_LIKELY(nc >= ${NR}) { 113 $for M in reversed(range(MR)): 114 $for N in range(NR): 115 c${M}[${N}] = vacc${M}${N}; 116 c${M} = (float*) ((uintptr_t) c${M} + cn_stride); 117 118 a = (const float**restrict) ((uintptr_t) a - ks); 119 nc -= ${NR}; 120 } else { 121 $for LOG2N in reversed(range(NR.bit_length() - 1)): 122 if (nc & ${1 << LOG2N}) { 123 $for M in reversed(range(MR)): 124 $for N in range(1 << LOG2N): 125 c${M}[${N}] = vacc${M}${N}; 126 $if LOG2N != 0: 127 $for N in range(1 << (LOG2N - 1)): 128 vacc${M}${N} = vacc${M}${N + (1 << LOG2N)}; 129 c${M} += ${1 << LOG2N}; 130 } 131 132 nc = 0; 133 } 134 } while (nc != 0); 135} 136