/aosp_15_r20/external/XNNPACK/src/qs8-gemm/gen/ |
H A D | 3x16c16-minmax-rndnu-neon-mlal.c | 298 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 389 const int32x2_t vsum2x89 = vpadd_s32(vpsum2x8, vpsum2x9); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
|
H A D | 3x16c8-minmax-rndnu-neon-mull.c | 250 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 342 const int32x2_t vsum2x89 = vpadd_s32(vpsum2x8, vpsum2x9); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
|
H A D | 4x16c8-minmax-rndnu-neon-mull.c | 305 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 409 const int32x2_t vsum2x89 = vpadd_s32(vpsum2x8, vpsum2x9); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
|
H A D | 3x16c8-minmax-rndnu-neon-mlal.c | 439 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 531 const int32x2_t vsum2x89 = vpadd_s32(vpsum2x8, vpsum2x9); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
|
H A D | 4x16c16-minmax-rndnu-neon-mlal.c | 369 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 472 const int32x2_t vsum2x89 = vpadd_s32(vpsum2x8, vpsum2x9); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
|
H A D | 4x16c8-minmax-rndnu-neon-mlal.c | 544 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 648 const int32x2_t vsum2x89 = vpadd_s32(vpsum2x8, vpsum2x9); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
|
H A D | 3x16c4s2-minmax-rndnu-neon-mull.c | 251 const int32x2_t vsum2x89 = vpadd_s32(vget_low_s32(vacc2x89), vget_high_s32(vacc2x89)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull() local
|
H A D | 3x16c4-minmax-rndnu-neon-mull-ld2r.c | 325 const int32x2_t vsum2x89 = vpadd_s32(vget_low_s32(vacc2x89), vget_high_s32(vacc2x89)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local
|
H A D | 3x16c4-minmax-rndnu-neon-mull-ld1r.c | 328 const int32x2_t vsum2x89 = vpadd_s32(vget_low_s32(vacc2x89), vget_high_s32(vacc2x89)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local
|
H A D | 3x16c4-minmax-rndnu-neon-mull-dup.c | 325 const int32x2_t vsum2x89 = vpadd_s32(vget_low_s32(vacc2x89), vget_high_s32(vacc2x89)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local
|
H A D | 4x16c4s2-minmax-rndnu-neon-mull.c | 303 const int32x2_t vsum2x89 = vpadd_s32(vget_low_s32(vacc2x89), vget_high_s32(vacc2x89)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
|
H A D | 3x16c4s2-minmax-rndnu-neon-mlal.c | 444 const int32x2_t vsum2x89 = vpadd_s32(vget_low_s32(vacc2x89), vget_high_s32(vacc2x89)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local
|
/aosp_15_r20/external/XNNPACK/src/qs8-igemm/gen/ |
H A D | 3x16c8-minmax-rndnu-neon-mull.c | 268 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 360 const int32x2_t vsum2x89 = vpadd_s32(vpsum2x8, vpsum2x9); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
|
H A D | 3x16c16-minmax-rndnu-neon-mlal.c | 316 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 407 const int32x2_t vsum2x89 = vpadd_s32(vpsum2x8, vpsum2x9); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
|
H A D | 4x16c8-minmax-rndnu-neon-mull.c | 325 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 429 const int32x2_t vsum2x89 = vpadd_s32(vpsum2x8, vpsum2x9); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
|
H A D | 4x16c16-minmax-rndnu-neon-mlal.c | 389 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 492 const int32x2_t vsum2x89 = vpadd_s32(vpsum2x8, vpsum2x9); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
|
H A D | 3x16c8-minmax-rndnu-neon-mlal.c | 457 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 549 const int32x2_t vsum2x89 = vpadd_s32(vpsum2x8, vpsum2x9); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
|
H A D | 4x16c8-minmax-rndnu-neon-mlal.c | 564 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 668 const int32x2_t vsum2x89 = vpadd_s32(vpsum2x8, vpsum2x9); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
|
H A D | 3x16c4s2-minmax-rndnu-neon-mull.c | 269 const int32x2_t vsum2x89 = vpadd_s32(vget_low_s32(vacc2x89), vget_high_s32(vacc2x89)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull() local
|
H A D | 3x16c4-minmax-rndnu-neon-mull-ld2r.c | 342 const int32x2_t vsum2x89 = vpadd_s32(vget_low_s32(vacc2x89), vget_high_s32(vacc2x89)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local
|
H A D | 3x16c4-minmax-rndnu-neon-mull-dup.c | 342 const int32x2_t vsum2x89 = vpadd_s32(vget_low_s32(vacc2x89), vget_high_s32(vacc2x89)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local
|
H A D | 3x16c4-minmax-rndnu-neon-mull-ld1r.c | 345 const int32x2_t vsum2x89 = vpadd_s32(vget_low_s32(vacc2x89), vget_high_s32(vacc2x89)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local
|
H A D | 4x16c4s2-minmax-rndnu-neon-mull.c | 323 const int32x2_t vsum2x89 = vpadd_s32(vget_low_s32(vacc2x89), vget_high_s32(vacc2x89)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
|
H A D | 3x16c4s2-minmax-rndnu-neon-mlal.c | 462 const int32x2_t vsum2x89 = vpadd_s32(vget_low_s32(vacc2x89), vget_high_s32(vacc2x89)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local
|
H A D | 4x16c4-minmax-rndnu-neon-mull-ld1r.c | 419 const int32x2_t vsum2x89 = vpadd_s32(vget_low_s32(vacc2x89), vget_high_s32(vacc2x89)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local
|