/aosp_15_r20/external/XNNPACK/src/f32-avgpool/ |
H A D | 9p8x-minmax-wasmsimd-arm-c4.c | 186 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 187 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 286 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 287 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 315 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 316 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4()
|
H A D | 9p8x-minmax-wasmsimd-x86-c4.c | 186 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 187 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 286 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 287 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 315 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 316 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4()
|
H A D | 9p8x-minmax-sse-c4.c | 185 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 186 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() 284 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 285 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() 313 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 314 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4()
|
H A D | 9p8x-minmax-neon-c4.c | 168 const float32x4_t vsum0167a = vaddq_f32(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local 169 const float32x4_t vsum = vaddq_f32(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() 259 const float32x4_t vsum0167a = vaddq_f32(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local 260 const float32x4_t vsum = vaddq_f32(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() 287 const float32x4_t vsum0167a = vaddq_f32(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local 288 const float32x4_t vsum = vaddq_f32(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4()
|
H A D | 9p8x-minmax-scalar-c1.c | 169 const float vsum0167a = vsum01a + vsum67; in xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1() local 170 const float vsum = vsum2345 + vsum0167a; in xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1() 259 const float vsum0167a = vsum01a + vsum67; in xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1() local 260 const float vsum = vsum2345 + vsum0167a; in xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1()
|
H A D | 9p8x-minmax-wasm-c1.c | 169 const float vsum0167a = vsum01a + vsum67; in xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1() local 170 const float vsum = vsum2345 + vsum0167a; in xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1() 259 const float vsum0167a = vsum01a + vsum67; in xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1() local 260 const float vsum = vsum2345 + vsum0167a; in xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1()
|
/aosp_15_r20/external/XNNPACK/src/f32-pavgpool/ |
H A D | 9p8x-minmax-wasmsimd-x86-c4.c | 186 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 187 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 289 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 290 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 318 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 319 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4()
|
H A D | 9p8x-minmax-wasmsimd-arm-c4.c | 186 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 187 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 289 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 290 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 318 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 319 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4()
|
H A D | 9p8x-minmax-sse-c4.c | 185 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 186 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() 287 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 288 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() 316 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 317 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4()
|
H A D | 9p8x-minmax-neon-c4.c | 168 const float32x4_t vsum0167a = vaddq_f32(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local 169 const float32x4_t vsum = vaddq_f32(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() 260 const float32x4_t vsum0167a = vaddq_f32(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local 261 const float32x4_t vsum = vaddq_f32(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() 288 const float32x4_t vsum0167a = vaddq_f32(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local 289 const float32x4_t vsum = vaddq_f32(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4()
|
H A D | 9p8x-minmax-wasm-c1.c | 169 const float vsum0167a = vsum01a + vsum67; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() local 170 const float vsum = vsum2345 + vsum0167a; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() 261 const float vsum0167a = vsum01a + vsum67; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() local 262 const float vsum = vsum2345 + vsum0167a; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1()
|
H A D | 9p8x-minmax-scalar-c1.c | 169 const float vsum0167a = vsum01a + vsum67; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() local 170 const float vsum = vsum2345 + vsum0167a; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() 261 const float vsum0167a = vsum01a + vsum67; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() local 262 const float vsum = vsum2345 + vsum0167a; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1()
|
/aosp_15_r20/external/XNNPACK/src/f16-avgpool/ |
H A D | 9p8x-minmax-neonfp16arith-c8.c | 168 const float16x8_t vsum0167a = vaddq_f16(vsum01a, vsum67); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 169 const float16x8_t vsum = vaddq_f16(vsum2345, vsum0167a); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() 259 const float16x8_t vsum0167a = vaddq_f16(vsum01a, vsum67); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 260 const float16x8_t vsum = vaddq_f16(vsum2345, vsum0167a); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() 287 const float16x8_t vsum0167a = vaddq_f16(vsum01a, vsum67); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 288 const float16x8_t vsum = vaddq_f16(vsum2345, vsum0167a); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8()
|
H A D | 9p8x-minmax-f16c-c8.c | 188 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 189 const __m128i vsum = _mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND_NO_EXC); in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() 289 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 290 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() 318 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 319 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8()
|
/aosp_15_r20/external/XNNPACK/src/f16-pavgpool/ |
H A D | 9p8x-minmax-neonfp16arith-c8.c | 168 const float16x8_t vsum0167a = vaddq_f16(vsum01a, vsum67); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 169 const float16x8_t vsum = vaddq_f16(vsum2345, vsum0167a); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() 260 const float16x8_t vsum0167a = vaddq_f16(vsum01a, vsum67); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 261 const float16x8_t vsum = vaddq_f16(vsum2345, vsum0167a); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() 288 const float16x8_t vsum0167a = vaddq_f16(vsum01a, vsum67); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 289 const float16x8_t vsum = vaddq_f16(vsum2345, vsum0167a); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8()
|
H A D | 9p8x-minmax-avx2-c8.c | 188 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 189 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() 291 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 292 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() 320 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 321 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8()
|
/aosp_15_r20/external/XNNPACK/src/amalgam/ |
H A D | f16c.c | 197 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 198 const __m128i vsum = _mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND_NO_EXC); in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() 298 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 299 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() 327 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 328 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8()
|
H A D | sse.c | 203 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 204 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() 302 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 303 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() 331 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 332 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() 6176 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 6177 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() 6278 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 6279 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() [all …]
|
H A D | avx2.c | 788 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 789 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() 891 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 892 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() 920 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 921 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8()
|