1# Copyright 2021 Google LLC 2# 3# This source code is licensed under the BSD-style license found in the 4# LICENSE file in the root directory of this source tree. 5 6# ARMv6 SIMD 7- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32 8 init: xnn_init_qs8_conv_minmax_fp32_armsimd32_params 9 k-block: 4 10- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32 11 init: xnn_init_qs8_conv_minmax_fp32_armsimd32_params 12 k-block: 4 13- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32 14 init: xnn_init_qs8_conv_minmax_fp32_armsimd32_params 15 k-block: 4 16- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32 17 init: xnn_init_qs8_conv_minmax_fp32_armsimd32_params 18 k-block: 4 19 20# ARM NEON 21- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup 22 init: xnn_init_qs8_conv_minmax_fp32_neon_params 23 k-block: 16 24- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r 25 init: xnn_init_qs8_conv_minmax_fp32_neon_params 26 k-block: 16 27- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r 28 init: xnn_init_qs8_conv_minmax_fp32_neon_params 29 k-block: 16 30- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r 31 init: xnn_init_qs8_conv_minmax_fp32_neon_params 32 k-block: 16 33- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup 34 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 35 k-block: 16 36- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r 37 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 38 k-block: 16 39- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r 40 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 41 k-block: 16 42- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r 43 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 44 k-block: 16 45- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal 46 init: xnn_init_qs8_conv_minmax_fp32_neon_params 47 k-block: 16 48- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal 49 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 50 k-block: 16 51- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup 52 init: xnn_init_qs8_conv_minmax_fp32_neon_params 53 k-block: 16 54- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r 55 init: xnn_init_qs8_conv_minmax_fp32_neon_params 56 k-block: 16 57- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r 58 init: xnn_init_qs8_conv_minmax_fp32_neon_params 59 k-block: 16 60- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup 61 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 62 k-block: 16 63- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r 64 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 65 k-block: 16 66- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r 67 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 68 k-block: 16 69- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal 70 init: xnn_init_qs8_conv_minmax_fp32_neon_params 71 k-block: 16 72- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal 73 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 74 k-block: 16 75- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup 76 init: xnn_init_qs8_conv_minmax_fp32_neon_params 77 k-block: 16 78- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r 79 init: xnn_init_qs8_conv_minmax_fp32_neon_params 80 k-block: 16 81- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r 82 init: xnn_init_qs8_conv_minmax_fp32_neon_params 83 k-block: 16 84- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r 85 init: xnn_init_qs8_conv_minmax_fp32_neon_params 86 k-block: 16 87- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup 88 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 89 k-block: 16 90- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r 91 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 92 k-block: 16 93- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r 94 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 95 k-block: 16 96- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r 97 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 98 k-block: 16 99- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal 100 init: xnn_init_qs8_conv_minmax_fp32_neon_params 101 k-block: 16 102- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal 103 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 104 k-block: 16 105- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup 106 init: xnn_init_qs8_conv_minmax_fp32_neon_params 107 k-block: 16 108- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r 109 init: xnn_init_qs8_conv_minmax_fp32_neon_params 110 k-block: 16 111- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r 112 init: xnn_init_qs8_conv_minmax_fp32_neon_params 113 k-block: 16 114- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup 115 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 116 k-block: 16 117- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r 118 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 119 k-block: 16 120- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r 121 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 122 k-block: 16 123- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal 124 init: xnn_init_qs8_conv_minmax_fp32_neon_params 125 k-block: 16 126- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal 127 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 128 k-block: 16 129# AArch64 assembly 130- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal 131 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 132 k-block: 16 133 assembly: true 134- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53 135 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 136 k-block: 16 137 assembly: true 138- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm 139 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 140 k-block: 16 141 assembly: true 142- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53 143 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 144 k-block: 16 145 assembly: true 146- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal 147 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 148 k-block: 16 149 assembly: true 150- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53 151 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 152 k-block: 16 153 assembly: true 154- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm 155 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 156 k-block: 16 157 assembly: true 158- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53 159 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 160 k-block: 16 161 assembly: true 162- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal 163 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 164 k-block: 16 165 assembly: true 166- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53 167 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 168 k-block: 8 169 assembly: true 170- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64 171 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 172 k-block: 8 173 assembly: true 174- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53 175 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 176 k-block: 8 177 assembly: true 178- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64 179 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 180 k-block: 8 181 assembly: true 182- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55 183 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 184 k-block: 16 185 assembly: true 186- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64 187 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 188 k-block: 8 189 assembly: true 190- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128 191 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 192 k-block: 16 193 assembly: true 194# ARM NEON 195- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot 196 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 197 k-block: 8 198- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal 199 init: xnn_init_qs8_conv_minmax_fp32_neon_params 200 k-block: 16 201- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal 202 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 203 k-block: 16 204- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane 205 init: xnn_init_qs8_conv_minmax_fp32_neon_params 206 k-block: 8 207- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane 208 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 209 k-block: 8 210- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal 211 init: xnn_init_qs8_conv_minmax_fp32_neon_params 212 k-block: 16 213- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal 214 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 215 k-block: 16 216- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane 217 init: xnn_init_qs8_conv_minmax_fp32_neon_params 218 k-block: 8 219- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane 220 init: xnn_init_qs8_conv_minmax_fp32_neonv8_params 221 k-block: 8 222# x86 SSE 223- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64 224 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 225 k-block: 8 226- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64 227 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 228 k-block: 8 229- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64 230 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 231 k-block: 8 232- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64 233 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 234 k-block: 8 235- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64 236 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 237 k-block: 8 238- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64 239 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 240 k-block: 8 241- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64 242 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 243 k-block: 8 244- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64 245 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 246 k-block: 8 247# x86 AVX 248- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64 249 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 250 k-block: 8 251- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64 252 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 253 k-block: 8 254- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64 255 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 256 k-block: 8 257- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64 258 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 259 k-block: 8 260- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64 261 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 262 k-block: 8 263- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64 264 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 265 k-block: 8 266- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64 267 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 268 k-block: 8 269- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64 270 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 271 k-block: 8 272# x86 SSE 273- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128 274 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 275 k-block: 8 276- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128 277 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 278 k-block: 8 279- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128 280 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 281 k-block: 8 282- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128 283 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 284 k-block: 8 285- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128 286 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 287 k-block: 8 288- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128 289 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 290 k-block: 8 291- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128 292 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 293 k-block: 8 294- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128 295 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 296 k-block: 8 297# x86 AVX 298- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128 299 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 300 k-block: 8 301- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128 302 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 303 k-block: 8 304- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128 305 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 306 k-block: 8 307- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128 308 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 309 k-block: 8 310- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128 311 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 312 k-block: 8 313- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128 314 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 315 k-block: 8 316- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128 317 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 318 k-block: 8 319- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128 320 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 321 k-block: 8 322# x86 SSE 323- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64 324 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 325 k-block: 8 326- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64 327 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 328 k-block: 8 329- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64 330 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 331 k-block: 8 332- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64 333 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 334 k-block: 8 335- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64 336 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 337 k-block: 8 338- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64 339 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 340 k-block: 8 341- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64 342 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 343 k-block: 8 344- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64 345 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 346 k-block: 8 347# x86 AVX 348- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64 349 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 350 k-block: 8 351- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64 352 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 353 k-block: 8 354- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64 355 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 356 k-block: 8 357- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64 358 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 359 k-block: 8 360- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64 361 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 362 k-block: 8 363- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64 364 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 365 k-block: 8 366- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64 367 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 368 k-block: 8 369- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64 370 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 371 k-block: 8 372# x86 SSE 373- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128 374 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 375 k-block: 8 376- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128 377 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 378 k-block: 8 379- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128 380 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 381 k-block: 8 382- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128 383 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 384 k-block: 8 385- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128 386 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 387 k-block: 8 388- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128 389 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 390 k-block: 8 391- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128 392 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 393 k-block: 8 394- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128 395 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 396 k-block: 8 397# x86 AVX 398- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128 399 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 400 k-block: 8 401- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128 402 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 403 k-block: 8 404- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128 405 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 406 k-block: 8 407- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128 408 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 409 k-block: 8 410- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128 411 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 412 k-block: 8 413- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128 414 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 415 k-block: 8 416- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128 417 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 418 k-block: 8 419- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128 420 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 421 k-block: 8 422# x86 SSE 423- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64 424 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 425 k-block: 8 426- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64 427 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 428 k-block: 8 429- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64 430 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 431 k-block: 8 432- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64 433 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 434 k-block: 8 435- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64 436 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 437 k-block: 8 438- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64 439 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 440 k-block: 8 441# x86 SSE 442- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64 443 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 444 k-block: 8 445- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64 446 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 447 k-block: 8 448- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64 449 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 450 k-block: 8 451# x86 AVX 452- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64 453 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 454 k-block: 8 455- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64 456 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 457 k-block: 8 458- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64 459 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 460 k-block: 8 461- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64 462 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 463 k-block: 8 464- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64 465 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 466 k-block: 8 467- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64 468 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 469 k-block: 8 470# x86 SSE 471- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128 472 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 473 k-block: 8 474- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128 475 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 476 k-block: 8 477- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128 478 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 479 k-block: 8 480- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128 481 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 482 k-block: 8 483- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128 484 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 485 k-block: 8 486- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128 487 init: xnn_init_qs8_conv_minmax_fp32_sse2_params 488 k-block: 8 489# x86 SSE 490- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128 491 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 492 k-block: 8 493- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128 494 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 495 k-block: 8 496- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128 497 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 498 k-block: 8 499# x86 AVX 500- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128 501 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 502 k-block: 8 503- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128 504 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 505 k-block: 8 506- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128 507 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 508 k-block: 8 509- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128 510 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 511 k-block: 8 512- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128 513 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 514 k-block: 8 515- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128 516 init: xnn_init_qs8_conv_minmax_fp32_sse4_params 517 k-block: 8 518# x86 AVX 519- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2 520 init: xnn_init_qs8_conv_minmax_fp32_avx2_params 521 k-block: 8 522- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2 523 init: xnn_init_qs8_conv_minmax_fp32_avx2_params 524 k-block: 8 525- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2 526 init: xnn_init_qs8_conv_minmax_fp32_avx2_params 527 k-block: 8 528# x86 AVX512 529- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx 530 init: xnn_init_qs8_conv_minmax_fp32_avx512_params 531 k-block: 8 532- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx 533 init: xnn_init_qs8_conv_minmax_fp32_avx512_params 534 k-block: 8 535- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx 536 init: xnn_init_qs8_conv_minmax_fp32_avx512_params 537 k-block: 8 538- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx 539 init: xnn_init_qs8_conv_minmax_fp32_avx512_params 540 k-block: 8 541# WAsm SIMD 542- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64 543 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 544 k-block: 8 545- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128 546 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 547 k-block: 8 548- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64 549 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 550 k-block: 8 551- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128 552 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 553 k-block: 8 554- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64 555 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 556 k-block: 8 557- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128 558 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 559 k-block: 8 560- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64 561 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 562 k-block: 8 563- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128 564 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 565 k-block: 8 566- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64 567 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 568 k-block: 8 569- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128 570 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 571 k-block: 8 572- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64 573 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 574 k-block: 8 575- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128 576 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 577 k-block: 8 578- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64 579 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 580 k-block: 8 581- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128 582 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 583 k-block: 8 584- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64 585 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 586 k-block: 8 587- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128 588 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 589 k-block: 8 590- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64 591 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 592 k-block: 8 593- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128 594 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 595 k-block: 8 596- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64 597 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 598 k-block: 8 599- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128 600 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 601 k-block: 8 602- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64 603 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 604 k-block: 8 605- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128 606 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 607 k-block: 8 608- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64 609 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 610 k-block: 8 611- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128 612 init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params 613 k-block: 8 614# WAsm 615- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic 616 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 617 k-block: 1 618- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic 619 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 620 k-block: 1 621- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic 622 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 623 k-block: 1 624- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic 625 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 626 k-block: 1 627- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic 628 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 629 k-block: 1 630- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic 631 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 632 k-block: 1 633- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic 634 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 635 k-block: 1 636- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic 637 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 638 k-block: 1 639# Scalar 640- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic 641 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 642 k-block: 1 643- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic 644 init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params 645 k-block: 1 646- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf 647 init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params 648 k-block: 1 649- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic 650 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 651 k-block: 1 652- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic 653 init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params 654 k-block: 1 655- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf 656 init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params 657 k-block: 1 658- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic 659 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 660 k-block: 1 661- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic 662 init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params 663 k-block: 1 664- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf 665 init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params 666 k-block: 1 667- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic 668 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 669 k-block: 1 670- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic 671 init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params 672 k-block: 1 673- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf 674 init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params 675 k-block: 1 676- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic 677 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 678 k-block: 1 679- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic 680 init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params 681 k-block: 1 682- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf 683 init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params 684 k-block: 1 685- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic 686 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 687 k-block: 1 688- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic 689 init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params 690 k-block: 1 691- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf 692 init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params 693 k-block: 1 694- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic 695 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 696 k-block: 1 697- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic 698 init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params 699 k-block: 1 700- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf 701 init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params 702 k-block: 1 703- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic 704 init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params 705 k-block: 1 706- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic 707 init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params 708 k-block: 1 709- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf 710 init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params 711 k-block: 1 712