Home
last modified time | relevance | path

Searched refs:vreinterpret_u32_u16 (Results 1 – 25 of 63) sorted by relevance

123

/aosp_15_r20/external/XNNPACK/src/x8-zip/
H A Dxm-neon.c47 vst1_lane_u32((void*) output, vreinterpret_u32_u16(vxyzw_lo.val[0]), 0); in xnn_x8_zip_xm_ukernel__neon()
50 vst1_lane_u32((void*) output, vreinterpret_u32_u16(vxyzw_lo.val[0]), 1); in xnn_x8_zip_xm_ukernel__neon()
53 vst1_lane_u32((void*) output, vreinterpret_u32_u16(vxyzw_lo.val[1]), 0); in xnn_x8_zip_xm_ukernel__neon()
56 vst1_lane_u32((void*) output, vreinterpret_u32_u16(vxyzw_lo.val[1]), 1); in xnn_x8_zip_xm_ukernel__neon()
59 vst1_lane_u32((void*) output, vreinterpret_u32_u16(vxyzw_hi.val[0]), 0); in xnn_x8_zip_xm_ukernel__neon()
62 vst1_lane_u32((void*) output, vreinterpret_u32_u16(vxyzw_hi.val[0]), 1); in xnn_x8_zip_xm_ukernel__neon()
65 vst1_lane_u32((void*) output, vreinterpret_u32_u16(vxyzw_hi.val[1]), 0); in xnn_x8_zip_xm_ukernel__neon()
68 vst1_lane_u32((void*) output, vreinterpret_u32_u16(vxyzw_hi.val[1]), 1); in xnn_x8_zip_xm_ukernel__neon()
90 uint32x2_t vxyzw0 = vreinterpret_u32_u16(vxyzw_lo.val[0]); in xnn_x8_zip_xm_ukernel__neon()
91 uint32x2_t vxyzw1 = vreinterpret_u32_u16(vxyzw_lo.val[1]); in xnn_x8_zip_xm_ukernel__neon()
[all …]
/aosp_15_r20/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/x8zip/
H A Dxm-neon.c53 vreinterpret_u32_u16(vxyzw_lo.val[0]), in pytorch_qnnp_x8zip_xm__neon()
59 vreinterpret_u32_u16(vxyzw_lo.val[0]), in pytorch_qnnp_x8zip_xm__neon()
65 vreinterpret_u32_u16(vxyzw_lo.val[1]), in pytorch_qnnp_x8zip_xm__neon()
71 vreinterpret_u32_u16(vxyzw_lo.val[1]), in pytorch_qnnp_x8zip_xm__neon()
77 vreinterpret_u32_u16(vxyzw_hi.val[0]), in pytorch_qnnp_x8zip_xm__neon()
83 vreinterpret_u32_u16(vxyzw_hi.val[0]), in pytorch_qnnp_x8zip_xm__neon()
89 vreinterpret_u32_u16(vxyzw_hi.val[1]), in pytorch_qnnp_x8zip_xm__neon()
95 vreinterpret_u32_u16(vxyzw_hi.val[1]), in pytorch_qnnp_x8zip_xm__neon()
123 uint32x2_t vxyzw0 = vreinterpret_u32_u16(vxyzw_lo.val[0]); in pytorch_qnnp_x8zip_xm__neon()
124 uint32x2_t vxyzw1 = vreinterpret_u32_u16(vxyzw_lo.val[1]); in pytorch_qnnp_x8zip_xm__neon()
[all …]
/aosp_15_r20/external/libaom/aom_dsp/arm/
H A Dtranspose_neon.h111 uint32x2x2_t w6 = vzip_u32(vreinterpret_u32_u16(w4.val[0]), in transpose_arrays_u8_8x16()
112 vreinterpret_u32_u16(w5.val[0])); in transpose_arrays_u8_8x16()
113 uint32x2x2_t w7 = vzip_u32(vreinterpret_u32_u16(w4.val[1]), in transpose_arrays_u8_8x16()
114 vreinterpret_u32_u16(w5.val[1])); in transpose_arrays_u8_8x16()
115 uint32x2x2_t w14 = vzip_u32(vreinterpret_u32_u16(w12.val[0]), in transpose_arrays_u8_8x16()
116 vreinterpret_u32_u16(w13.val[0])); in transpose_arrays_u8_8x16()
117 uint32x2x2_t w15 = vzip_u32(vreinterpret_u32_u16(w12.val[1]), in transpose_arrays_u8_8x16()
118 vreinterpret_u32_u16(w13.val[1])); in transpose_arrays_u8_8x16()
133 w6 = vzip_u32(vreinterpret_u32_u16(w4.val[0]), in transpose_arrays_u8_8x16()
134 vreinterpret_u32_u16(w5.val[0])); in transpose_arrays_u8_8x16()
[all …]
/aosp_15_r20/external/ComputeLibrary/src/cpu/kernels/
H A DCpuTransposeKernel.cpp130 …const uint32x2x2_t k0_u32 = vtrn_u32(vreinterpret_u32_u16(k0_u16.val[0]), vreinterpret_u32_u16(k2_… in transpose_8bit_elements()
131 …const uint32x2x2_t k1_u32 = vtrn_u32(vreinterpret_u32_u16(k0_u16.val[1]), vreinterpret_u32_u16(k2_… in transpose_8bit_elements()
132 …const uint32x2x2_t k2_u32 = vtrn_u32(vreinterpret_u32_u16(k1_u16.val[0]), vreinterpret_u32_u16(k3_… in transpose_8bit_elements()
133 …const uint32x2x2_t k3_u32 = vtrn_u32(vreinterpret_u32_u16(k1_u16.val[1]), vreinterpret_u32_u16(k3_… in transpose_8bit_elements()
257 …const uint32x2x2_t k0_u32 = vtrn_u32(vreinterpret_u32_u16(k0_u16.val[0]), vreinterpret_u32_u16(k1_… in transpose_16bit_elements()
258 …const uint32x2x2_t k1_u32 = vtrn_u32(vreinterpret_u32_u16(k0_u16.val[1]), vreinterpret_u32_u16(k1_… in transpose_16bit_elements()
/aosp_15_r20/external/XNNPACK/src/x16-transposec/gen/
H A D8x8-reuse-multi-zip-neon.c180 vst1_lane_u32((void*) o7, vreinterpret_u32_u16(v7_low), 0); o7 += 2; in xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon()
181 vst1_lane_u32((void*) o6, vreinterpret_u32_u16(v6_low), 0); o6 += 2; in xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon()
182 vst1_lane_u32((void*) o5, vreinterpret_u32_u16(v5_low), 0); o5 += 2; in xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon()
183 vst1_lane_u32((void*) o4, vreinterpret_u32_u16(v4_low), 0); o4 += 2; in xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon()
184 vst1_lane_u32((void*) o3, vreinterpret_u32_u16(v3_low), 0); o3 += 2; in xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon()
185 vst1_lane_u32((void*) o2, vreinterpret_u32_u16(v2_low), 0); o2 += 2; in xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon()
186 vst1_lane_u32((void*) o1, vreinterpret_u32_u16(v1_low), 0); o1 += 2; in xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon()
187 vst1_lane_u32((void*) o0, vreinterpret_u32_u16(v0_low), 0); o0 += 2; in xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon()
H A D8x8-reuse-switch-zip-neon.c186 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v7_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon()
188 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v6_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon()
190 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v5_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon()
192 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v4_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon()
194 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v3_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon()
196 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v2_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon()
198 vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v1_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon()
200 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v0_low), 0); o += 2; in xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon()
H A D8x8-reuse-dec-zip-neon.c202 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v7_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon()
206 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v6_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon()
210 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v5_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon()
214 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v4_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon()
218 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v3_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon()
222 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v2_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon()
226 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v1_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon()
230 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v0_low), 0); o += 2; in xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon()
H A D8x8-multi-switch-zip-neon.c188 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v7_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon()
190 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v6_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon()
192 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v5_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon()
194 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v4_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon()
196 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v3_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon()
198 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v2_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon()
200 vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v1_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon()
202 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v0_low), 0); o += 2; in xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon()
H A D8x8-reuse-mov-zip-neon.c216 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v7_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon()
221 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v6_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon()
226 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v5_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon()
231 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v4_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon()
236 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v3_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon()
241 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v2_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon()
246 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v1_low), 0); in xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon()
251 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v0_low), 0); o += 2; in xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon()
H A D8x8-multi-dec-zip-neon.c204 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v7_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon()
208 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v6_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon()
212 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v5_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon()
216 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v4_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon()
220 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v3_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon()
224 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v2_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon()
228 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v1_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon()
232 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v0_low), 0); o += 2; in xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon()
H A D8x8-multi-mov-zip-neon.c218 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v7_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon()
223 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v6_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon()
228 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v5_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon()
233 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v4_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon()
238 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v3_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon()
243 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v2_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon()
248 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v1_low), 0); in xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon()
253 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v0_low), 0); o += 2; in xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon()
H A D4x4-reuse-multi-zip-neon.c97 vst1_lane_u32((void*) o3, vreinterpret_u32_u16(v3_low), 0); o3 += 2; in xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon()
98 vst1_lane_u32((void*) o2, vreinterpret_u32_u16(v2_low), 0); o2 += 2; in xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon()
99 vst1_lane_u32((void*) o1, vreinterpret_u32_u16(v1_low), 0); o1 += 2; in xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon()
100 vst1_lane_u32((void*) o0, vreinterpret_u32_u16(v0_low), 0); o0 += 2; in xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon()
H A D4x4-reuse-switch-zip-neon.c101 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v3_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon()
103 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v2_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon()
105 vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v1_low), 0); in xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon()
107 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v0_low), 0); o += 2; in xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon()
H A D4x4-multi-multi-zip-neon.c99 vst1_lane_u32((void*) o3, vreinterpret_u32_u16(v3_low), 0); o3 += 2; in xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon()
100 vst1_lane_u32((void*) o2, vreinterpret_u32_u16(v2_low), 0); o2 += 2; in xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon()
101 vst1_lane_u32((void*) o1, vreinterpret_u32_u16(v1_low), 0); o1 += 2; in xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon()
102 vst1_lane_u32((void*) o0, vreinterpret_u32_u16(v0_low), 0); o0 += 2; in xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon()
H A D4x4-reuse-dec-zip-neon.c101 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v3_low), 0); in xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon()
105 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v2_low), 0); in xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon()
109 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v1_low), 0); in xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon()
113 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v0_low), 0); o += 2; in xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon()
H A D4x4-multi-switch-zip-neon.c103 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v3_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon()
105 …vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v2_low), 0); oN = (uint16_t*) ((uintptr_t) oN + min… in xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon()
107 vst1_lane_u32((void*) oN, vreinterpret_u32_u16(v1_low), 0); in xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon()
109 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v0_low), 0); o += 2; in xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon()
H A D4x4-multi-dec-zip-neon.c103 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v3_low), 0); in xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon()
107 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v2_low), 0); in xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon()
111 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v1_low), 0); in xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon()
115 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v0_low), 0); o += 2; in xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon()
H A D4x4-reuse-mov-zip-neon.c104 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v3_low), 0); in xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon()
109 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v2_low), 0); in xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon()
114 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v1_low), 0); in xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon()
119 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v0_low), 0); o += 2; in xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon()
H A D4x4-multi-mov-zip-neon.c106 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v3_low), 0); in xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon()
111 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v2_low), 0); in xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon()
116 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v1_low), 0); in xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon()
121 vst1_lane_u32((void*) o, vreinterpret_u32_u16(v0_low), 0); o += 2; in xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon()
/aosp_15_r20/external/libgav1/src/dsp/arm/
H A Dcommon_neon.h236 vld1_lane_u32(&temp, vreinterpret_u32_u16(val), lane)); in Load2()
384 ValueToMem<uint32_t>(buf, vget_lane_u32(vreinterpret_u32_u16(val), lane)); in Store2()
782 vtrn_u32(vreinterpret_u32_u16(b.val[0]), vreinterpret_u32_u16(c.val[0])); in Transpose4x4()
787 vtrn_u32(vreinterpret_u32_u16(b.val[1]), vreinterpret_u32_u16(c.val[1])); in Transpose4x4()
805 vtrn_u32(vreinterpret_u32_u16(c.val[0]), vreinterpret_u32_u16(c.val[1])); in Transpose4x4()
/aosp_15_r20/external/gemmlowp/internal/
H A Doutput_neon.h675 c[0] = vtrn_u32(vreinterpret_u32_u16(b[0].val[0]),
676 vreinterpret_u32_u16(b[2].val[0]));
677 c[1] = vtrn_u32(vreinterpret_u32_u16(b[1].val[0]),
678 vreinterpret_u32_u16(b[3].val[0]));
679 c[2] = vtrn_u32(vreinterpret_u32_u16(b[0].val[1]),
680 vreinterpret_u32_u16(b[2].val[1]));
681 c[3] = vtrn_u32(vreinterpret_u32_u16(b[1].val[1]),
682 vreinterpret_u32_u16(b[3].val[1]));
/aosp_15_r20/external/XNNPACK/src/bf16-gemm/gen/
H A D5x4c8-minmax-neonfma-zip.c387 vst1_lane_u32((void*) c0, vreinterpret_u32_u16(vout0x0123), 0); c0 += 2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
388 vst1_lane_u32((void*) c1, vreinterpret_u32_u16(vout1x0123), 0); c1 += 2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
389 vst1_lane_u32((void*) c2, vreinterpret_u32_u16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
390 vst1_lane_u32((void*) c3, vreinterpret_u32_u16(vout3x0123), 0); c3 += 2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
391 vst1_lane_u32((void*) c4, vreinterpret_u32_u16(vout4x0123), 0); c4 += 2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_zip()
H A D5x4c8-minmax-neonfma-shland.c387 vst1_lane_u32((void*) c0, vreinterpret_u32_u16(vout0x0123), 0); c0 += 2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
388 vst1_lane_u32((void*) c1, vreinterpret_u32_u16(vout1x0123), 0); c1 += 2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
389 vst1_lane_u32((void*) c2, vreinterpret_u32_u16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
390 vst1_lane_u32((void*) c3, vreinterpret_u32_u16(vout3x0123), 0); c3 += 2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
391 vst1_lane_u32((void*) c4, vreinterpret_u32_u16(vout4x0123), 0); c4 += 2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonfma_shland()
H A D4x4c8-minmax-neonfma-shland.c331 vst1_lane_u32((void*) c0, vreinterpret_u32_u16(vout0x0123), 0); c0 += 2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
332 vst1_lane_u32((void*) c1, vreinterpret_u32_u16(vout1x0123), 0); c1 += 2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
333 vst1_lane_u32((void*) c2, vreinterpret_u32_u16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
334 vst1_lane_u32((void*) c3, vreinterpret_u32_u16(vout3x0123), 0); c3 += 2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
H A D4x4c8-minmax-neonfma-zip.c331 vst1_lane_u32((void*) c0, vreinterpret_u32_u16(vout0x0123), 0); c0 += 2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
332 vst1_lane_u32((void*) c1, vreinterpret_u32_u16(vout1x0123), 0); c1 += 2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
333 vst1_lane_u32((void*) c2, vreinterpret_u32_u16(vout2x0123), 0); c2 += 2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
334 vst1_lane_u32((void*) c3, vreinterpret_u32_u16(vout3x0123), 0); c3 += 2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()

123