/aosp_15_r20/external/XNNPACK/src/f32-avgpool/ |
H A D | 9x-minmax-sse-c4.c | 127 const __m128 vsum018 = _mm_add_ps(_mm_add_ps(vi0, vi1), vi8); in xnn_f32_avgpool_minmax_ukernel_9x__sse_c4() local 159 const __m128 vsum018 = _mm_add_ps(vsum01, vi8); in xnn_f32_avgpool_minmax_ukernel_9x__sse_c4() local
|
H A D | 9x-minmax-wasmsimd-arm-c4.c | 127 const v128_t vsum018 = wasm_f32x4_add(wasm_f32x4_add(vi0, vi1), vi8); in xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_arm_c4() local 160 const v128_t vsum018 = wasm_f32x4_add(vsum01, vi8); in xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_arm_c4() local
|
H A D | 9x-minmax-wasmsimd-x86-c4.c | 127 const v128_t vsum018 = wasm_f32x4_add(wasm_f32x4_add(vi0, vi1), vi8); in xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_x86_c4() local 160 const v128_t vsum018 = wasm_f32x4_add(vsum01, vi8); in xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_x86_c4() local
|
H A D | 9x-minmax-neon-c4.c | 122 const float32x4_t vsum018 = vaddq_f32(vsum01, vi8); in xnn_f32_avgpool_minmax_ukernel_9x__neon_c4() local 150 const float32x4_t vsum018 = vaddq_f32(vsum01, vi8); in xnn_f32_avgpool_minmax_ukernel_9x__neon_c4() local
|
H A D | 9x-minmax-wasm-c1.c | 121 const float vsum018 = vsum01 + vi8; in xnn_f32_avgpool_minmax_ukernel_9x__wasm_c1() local
|
H A D | 9x-minmax-scalar-c1.c | 121 const float vsum018 = vsum01 + vi8; in xnn_f32_avgpool_minmax_ukernel_9x__scalar_c1() local
|
/aosp_15_r20/external/XNNPACK/src/f16-avgpool/ |
H A D | 9x-minmax-f16c-c8.c | 133 …const __m256 vsum018 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vi8), _MM_FROUND_NO_E… in xnn_f16_avgpool_minmax_ukernel_9x__f16c_c8() local 162 …const __m256 vsum018 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vi8), _MM_FROUND_NO_E… in xnn_f16_avgpool_minmax_ukernel_9x__f16c_c8() local
|
H A D | 9x-minmax-neonfp16arith-c8.c | 122 const float16x8_t vsum018 = vaddq_f16(vsum01, vi8); in xnn_f16_avgpool_minmax_ukernel_9x__neonfp16arith_c8() local 150 const float16x8_t vsum018 = vaddq_f16(vsum01, vi8); in xnn_f16_avgpool_minmax_ukernel_9x__neonfp16arith_c8() local
|
/aosp_15_r20/external/XNNPACK/src/f32-pavgpool/ |
H A D | 9x-minmax-wasmsimd-x86-c4.c | 130 const v128_t vsum018 = wasm_f32x4_add(wasm_f32x4_add(vi0, vi1), vi8); in xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_x86_c4() local 163 const v128_t vsum018 = wasm_f32x4_add(vsum01, vi8); in xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_x86_c4() local
|
H A D | 9x-minmax-neon-c4.c | 124 const float32x4_t vsum018 = vaddq_f32(vsum01, vi8); in xnn_f32_pavgpool_minmax_ukernel_9x__neon_c4() local 152 const float32x4_t vsum018 = vaddq_f32(vsum01, vi8); in xnn_f32_pavgpool_minmax_ukernel_9x__neon_c4() local
|
H A D | 9x-minmax-wasmsimd-arm-c4.c | 130 const v128_t vsum018 = wasm_f32x4_add(wasm_f32x4_add(vi0, vi1), vi8); in xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_arm_c4() local 163 const v128_t vsum018 = wasm_f32x4_add(vsum01, vi8); in xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_arm_c4() local
|
H A D | 9x-minmax-sse-c4.c | 130 const __m128 vsum018 = _mm_add_ps(_mm_add_ps(vi0, vi1), vi8); in xnn_f32_pavgpool_minmax_ukernel_9x__sse_c4() local 162 const __m128 vsum018 = _mm_add_ps(vsum01, vi8); in xnn_f32_pavgpool_minmax_ukernel_9x__sse_c4() local
|
H A D | 9x-minmax-wasm-c1.c | 123 const float vsum018 = vsum01 + vi8; in xnn_f32_pavgpool_minmax_ukernel_9x__wasm_c1() local
|
H A D | 9x-minmax-scalar-c1.c | 123 const float vsum018 = vsum01 + vi8; in xnn_f32_pavgpool_minmax_ukernel_9x__scalar_c1() local
|
H A D | 9p8x-minmax-wasm-c1.c | 98 const float vsum018 = vsum01 + vi8; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() local
|
H A D | 9p8x-minmax-scalar-c1.c | 98 const float vsum018 = vsum01 + vi8; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() local
|
/aosp_15_r20/external/XNNPACK/src/f16-pavgpool/ |
H A D | 9x-minmax-avx2-c8.c | 136 …const __m256 vsum018 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vi8), _MM_FROUND_NO_E… in xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8() local 165 …const __m256 vsum018 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vi8), _MM_FROUND_NO_E… in xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8() local
|
H A D | 9x-minmax-neonfp16arith-c8.c | 124 const float16x8_t vsum018 = vaddq_f16(vsum01, vi8); in xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8() local 152 const float16x8_t vsum018 = vaddq_f16(vsum01, vi8); in xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8() local
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8avgpool/ |
H A D | up8x9-neon.c | 104 const uint16x8_t vsum018 = vaddw_u8(vaddl_u8(vi0, vi1), vi8); in pytorch_q8avgpool_ukernel_up8x9__neon() local 182 const uint16x8_t vsum018 = vaddw_u8(vaddl_u8(vi0, vi1), vi8); in pytorch_q8avgpool_ukernel_up8x9__neon() local
|
H A D | up8x9-sse2.c | 102 const __m128i vsum018 = _mm_add_epi16(_mm_add_epi16(vxi0, vxi1), vxi8); in pytorch_q8avgpool_ukernel_up8x9__sse2() local 184 const __m128i vsum018 = _mm_add_epi16(_mm_add_epi16(vxi0, vxi1), vxi8); in pytorch_q8avgpool_ukernel_up8x9__sse2() local
|
H A D | mp8x9p8q-neon.c | 81 const uint16x8_t vsum018 = vaddw_u8(vaddl_u8(vi0, vi1), vi8); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local 134 const uint16x8_t vsum018 = vaddw_u8(vaddl_u8(vi0, vi1), vi8); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
|
H A D | mp8x9p8q-sse2.c | 80 const __m128i vsum018 = _mm_add_epi16(_mm_add_epi16(vxi0, vxi1), vxi8); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local 142 const __m128i vsum018 = _mm_add_epi16(_mm_add_epi16(vxi0, vxi1), vxi8); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
|
/aosp_15_r20/external/XNNPACK/src/qu8-avgpool/ |
H A D | 9x-minmax-neon-c8.c | 129 const uint16x8_t vsum018 = vaddw_u8(vaddl_u8(vi0, vi1), vi8); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() local 202 const uint16x8_t vsum018 = vaddw_u8(vaddl_u8(vi0, vi1), vi8); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() local
|
H A D | 9x-minmax-sse2-c8.c | 134 const __m128i vsum018 = _mm_add_epi16(_mm_add_epi16(vxi0, vxi1), vxi8); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() local 209 const __m128i vsum018 = _mm_add_epi16(_mm_add_epi16(vxi0, vxi1), vxi8); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() local
|
H A D | 9x-minmax-scalar-c1.c | 127 const uint32_t vsum018 = vsum01 + vi8; in xnn_qu8_avgpool_minmax_ukernel_9x__scalar_c1() local
|