/aosp_15_r20/external/XNNPACK/src/qs8-gemm/gen/ |
H A D | 1x16c8-minmax-rndnu-neon-mull.c | 127 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull() local 160 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull() local
|
H A D | 1x16c16-minmax-rndnu-neon-mlal.c | 143 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() local 175 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() local
|
H A D | 1x16c8-minmax-rndnu-neon-mlal.c | 216 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 249 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local
|
H A D | 2x16c16-minmax-rndnu-neon-mlal.c | 214 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local 258 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local
|
H A D | 2x16c8-minmax-rndnu-neon-mull.c | 182 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local 227 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local
|
H A D | 2x16c8-minmax-rndnu-neon-mlal.c | 321 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 366 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local
|
H A D | 3x16c16-minmax-rndnu-neon-mlal.c | 285 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 341 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
|
H A D | 3x16c8-minmax-rndnu-neon-mull.c | 237 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 294 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
|
H A D | 1x16c4s2-minmax-rndnu-neon-mull.c | 127 const int32x2_t vsum0xEF = vpadd_s32(vget_low_s32(vacc0xEF), vget_high_s32(vacc0xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull() local
|
H A D | 4x16c8-minmax-rndnu-neon-mull.c | 292 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 361 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
|
H A D | 3x16c8-minmax-rndnu-neon-mlal.c | 426 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 483 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
|
H A D | 4x16c16-minmax-rndnu-neon-mlal.c | 356 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 424 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
|
H A D | 1x16c4-minmax-rndnu-neon-mull-dup.c | 163 const int32x2_t vsum0xEF = vpadd_s32(vget_low_s32(vacc0xEF), vget_high_s32(vacc0xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup() local
|
/aosp_15_r20/external/XNNPACK/src/qs8-igemm/gen/ |
H A D | 1x16c8-minmax-rndnu-neon-mull.c | 141 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull() local 174 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull() local
|
H A D | 1x16c16-minmax-rndnu-neon-mlal.c | 157 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() local 189 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() local
|
H A D | 1x16c8-minmax-rndnu-neon-mlal.c | 230 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 263 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local
|
H A D | 2x16c8-minmax-rndnu-neon-mull.c | 198 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local 243 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local
|
H A D | 2x16c16-minmax-rndnu-neon-mlal.c | 230 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local 274 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local
|
H A D | 2x16c8-minmax-rndnu-neon-mlal.c | 337 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 382 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local
|
H A D | 3x16c8-minmax-rndnu-neon-mull.c | 255 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 312 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
|
H A D | 3x16c16-minmax-rndnu-neon-mlal.c | 303 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 359 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
|
H A D | 1x16c4s2-minmax-rndnu-neon-mull.c | 141 const int32x2_t vsum0xEF = vpadd_s32(vget_low_s32(vacc0xEF), vget_high_s32(vacc0xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull() local
|
H A D | 4x16c8-minmax-rndnu-neon-mull.c | 312 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 381 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
|
H A D | 4x16c16-minmax-rndnu-neon-mlal.c | 376 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 444 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
|
H A D | 3x16c8-minmax-rndnu-neon-mlal.c | 444 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 501 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
|