/aosp_15_r20/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8avgpool/ |
H A D | mp8x9p8q-neon.c | 82 const uint16x8_t vsum23 = vaddl_u8(vi2, vi3); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local 135 const uint16x8_t vsum23 = vaddl_u8(vi2, vi3); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local 188 const uint16x8_t vsum23 = vaddl_u8(vi2, vi3); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local 238 const uint16x8_t vsum23 = vaddl_u8(vi2, vi3); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local 312 const int16x8_t vsum23 = vreinterpretq_s16_u16(vaddl_u8(vi2, vi3)); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local 388 const int16x8_t vsum23 = vreinterpretq_s16_u16(vaddl_u8(vi2, vi3)); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
|
H A D | mp8x9p8q-sse2.c | 81 const __m128i vsum23 = _mm_add_epi16(vxi2, vxi3); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local 143 const __m128i vsum23 = _mm_add_epi16(vxi2, vxi3); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local 204 const __m128i vsum23 = _mm_add_epi16(vxi2, vxi3); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local 262 const __m128i vsum23 = _mm_add_epi16(vxi2, vxi3); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local 343 const __m128i vsum23 = _mm_add_epi16(vxi2, vxi3); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local 421 const __m128i vsum23 = _mm_add_epi16(vxi2, vxi3); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
|
/aosp_15_r20/external/XNNPACK/src/f32-gavgpool/ |
H A D | 7p7x-minmax-neon-c4.c | 48 const float32x4_t vsum23 = vaddq_f32(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local 80 const float32x4_t vsum23 = vaddq_f32(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local 134 const float32x4_t vsum23 = vaddq_f32(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local 162 const float32x4_t vsum23 = vaddq_f32(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
|
H A D | 7p7x-minmax-wasmsimd-x86-c4.c | 55 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local 95 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local 157 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local 186 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
|
H A D | 7p7x-minmax-wasmsimd-arm-c4.c | 55 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local 95 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local 157 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local 186 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
|
H A D | 7p7x-minmax-sse-c4.c | 55 const __m128 vsum23 = _mm_add_ps(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local 94 const __m128 vsum23 = _mm_add_ps(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local 156 const __m128 vsum23 = _mm_add_ps(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local 185 const __m128 vsum23 = _mm_add_ps(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
|
H A D | 7p7x-minmax-wasm-c1.c | 46 const float vsum23 = vi2 + vi3; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local 79 const float vsum23 = vi2 + vi3; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local 133 const float vsum23 = vi2 + vi3; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local
|
H A D | 7p7x-minmax-scalar-c1.c | 46 const float vsum23 = vi2 + vi3; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local 79 const float vsum23 = vi2 + vi3; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local 133 const float vsum23 = vi2 + vi3; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gavgpool/ |
H A D | mp8x7p7q-neon.c | 59 const int16x8_t vsum23 = vreinterpretq_s16_u16(vaddl_u8(vi2, vi3)); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon() local 104 const int16x8_t vsum23 = vreinterpretq_s16_u16(vaddl_u8(vi2, vi3)); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon() local 185 const int16x8_t vsum23 = vreinterpretq_s16_u16(vaddl_u8(vi2, vi3)); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon() local 258 const int16x8_t vsum23 = vreinterpretq_s16_u16(vaddl_u8(vi2, vi3)); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon() local
|
/aosp_15_r20/external/XNNPACK/src/f32-avgpool/ |
H A D | 9p8x-minmax-wasmsimd-arm-c4.c | 104 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 181 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 281 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 310 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local
|
H A D | 9p8x-minmax-wasmsimd-x86-c4.c | 104 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 181 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 281 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 310 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local
|
H A D | 9p8x-minmax-sse-c4.c | 104 const __m128 vsum23 = _mm_add_ps(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 180 const __m128 vsum23 = _mm_add_ps(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 279 const __m128 vsum23 = _mm_add_ps(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 308 const __m128 vsum23 = _mm_add_ps(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local
|
H A D | 9p8x-minmax-neon-c4.c | 95 const float32x4_t vsum23 = vaddq_f32(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local 163 const float32x4_t vsum23 = vaddq_f32(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local 254 const float32x4_t vsum23 = vaddq_f32(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local 282 const float32x4_t vsum23 = vaddq_f32(vi2, vi3); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local
|
/aosp_15_r20/external/XNNPACK/src/f16-avgpool/ |
H A D | 9p8x-minmax-neonfp16arith-c8.c | 95 const float16x8_t vsum23 = vaddq_f16(vi2, vi3); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 163 const float16x8_t vsum23 = vaddq_f16(vi2, vi3); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 254 const float16x8_t vsum23 = vaddq_f16(vi2, vi3); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 282 const float16x8_t vsum23 = vaddq_f16(vi2, vi3); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local
|
H A D | 9p8x-minmax-f16c-c8.c | 106 …const __m256 vsum23 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vi2, vi3), _MM_FROUND_NO_EXC)); in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 183 …const __m256 vsum23 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vi2, vi3), _MM_FROUND_NO_EXC)); in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 284 …const __m256 vsum23 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vi2, vi3), _MM_FROUND_NO_EXC)); in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 313 …const __m256 vsum23 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vi2, vi3), _MM_FROUND_NO_EXC)); in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local
|
/aosp_15_r20/external/XNNPACK/src/f32-pavgpool/ |
H A D | 9p8x-minmax-wasmsimd-x86-c4.c | 104 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 181 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 284 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 313 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local
|
H A D | 9p8x-minmax-wasmsimd-arm-c4.c | 104 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 181 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 284 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 313 const v128_t vsum23 = wasm_f32x4_add(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local
|
H A D | 9p8x-minmax-sse-c4.c | 104 const __m128 vsum23 = _mm_add_ps(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 180 const __m128 vsum23 = _mm_add_ps(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 282 const __m128 vsum23 = _mm_add_ps(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 311 const __m128 vsum23 = _mm_add_ps(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local
|
H A D | 9p8x-minmax-neon-c4.c | 95 const float32x4_t vsum23 = vaddq_f32(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local 163 const float32x4_t vsum23 = vaddq_f32(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local 255 const float32x4_t vsum23 = vaddq_f32(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local 283 const float32x4_t vsum23 = vaddq_f32(vi2, vi3); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local
|
H A D | 9p8x-minmax-wasm-c1.c | 95 const float vsum23 = vi2 + vi3; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() local 164 const float vsum23 = vi2 + vi3; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() local 256 const float vsum23 = vi2 + vi3; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() local
|
H A D | 9p8x-minmax-scalar-c1.c | 95 const float vsum23 = vi2 + vi3; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() local 164 const float vsum23 = vi2 + vi3; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() local 256 const float vsum23 = vi2 + vi3; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() local
|
/aosp_15_r20/external/XNNPACK/src/f16-pavgpool/ |
H A D | 9p8x-minmax-neonfp16arith-c8.c | 95 const float16x8_t vsum23 = vaddq_f16(vi2, vi3); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 163 const float16x8_t vsum23 = vaddq_f16(vi2, vi3); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 255 const float16x8_t vsum23 = vaddq_f16(vi2, vi3); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 283 const float16x8_t vsum23 = vaddq_f16(vi2, vi3); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local
|
H A D | 9p8x-minmax-avx2-c8.c | 106 …const __m256 vsum23 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vi2, vi3), _MM_FROUND_NO_EXC)); in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 183 …const __m256 vsum23 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vi2, vi3), _MM_FROUND_NO_EXC)); in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 286 …const __m256 vsum23 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vi2, vi3), _MM_FROUND_NO_EXC)); in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 315 …const __m256 vsum23 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vi2, vi3), _MM_FROUND_NO_EXC)); in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local
|
/aosp_15_r20/external/XNNPACK/src/qu8-avgpool/ |
H A D | 9p8x-minmax-neon-c8.c | 106 const uint16x8_t vsum23 = vaddl_u8(vi2, vi3); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() local 179 const uint16x8_t vsum23 = vaddl_u8(vi2, vi3); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() local 274 const int16x8_t vsum23 = vreinterpretq_s16_u16(vaddl_u8(vi2, vi3)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() local 348 const int16x8_t vsum23 = vreinterpretq_s16_u16(vaddl_u8(vi2, vi3)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() local
|
H A D | 9p8x-minmax-sse2-c8.c | 111 const __m128i vsum23 = _mm_add_epi16(vxi2, vxi3); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local 194 const __m128i vsum23 = _mm_add_epi16(vxi2, vxi3); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local 300 const __m128i vsum23 = _mm_add_epi16(vxi2, vxi3); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local 375 const __m128i vsum23 = _mm_add_epi16(vxi2, vxi3); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local
|