c2-neon-mull-dup.c.in - OpenGrok cross reference for /aosp_15_r20/external/XNNPACK/src/qs8-igemm/c2-neon-mull-dup.c.in

Lines Matching full:va
89               const int16x4x4_t va${M}x0 = vld4_dup_s16((const void*)a${M}); a${M} += 8;
90               const int16x4x4_t va${M}x1 = vld4_dup_s16((const void*)a${M}); a${M} += 8;
92               const int16x4x2_t va${M}0x0 = vld2_dup_s16((const void*)a${M});
93               const int16x4x2_t va${M}1x0 = vld2_dup_s16((const void*)(a${M} + 4)); a${M} += 8;
94               const int16x4x2_t va${M}0x1 = vld2_dup_s16((const void*)a${M});
95               const int16x4x2_t va${M}1x1 = vld2_dup_s16((const void*)(a${M} + 4)); a${M} += 8;
97               const int16x4_t va${M}0x0 = vld1_dup_s16((const void*)a${M});
98               const int16x4_t va${M}1x0 = vld1_dup_s16((const void*)(a${M} + 2));
99               const int16x4_t va${M}2x0 = vld1_dup_s16((const void*)(a${M} + 4));
100               const int16x4_t va${M}3x0 = vld1_dup_s16((const void*)(a${M} + 6)); a${M} += 8;
101               const int16x4_t va${M}0x1 = vld1_dup_s16((const void*)a${M});
102               const int16x4_t va${M}1x1 = vld1_dup_s16((const void*)(a${M} + 2));
103               const int16x4_t va${M}2x1 = vld1_dup_s16((const void*)(a${M} + 4));
104               const int16x4_t va${M}3x1 = vld1_dup_s16((const void*)(a${M} + 6)); a${M} += 8;
106               const int8x8_t va${M}x0 = vld1_s8(a${M}); a${M} += 8;
107               const int8x8_t va${M}x1 = vld1_s8(a${M}); a${M} += 8;
116                 const int8x8_t va${M}c${K}x0 = vreinterpret_s8_s16(va${M}x0.val[${K}]);
117                 const int8x8_t va${M}c${K}x1 = vreinterpret_s8_s16(va${M}x1.val[${K}]);
119                 const int8x8_t va${M}c${K}x0 = vreinterpret_s8_s16(va${M}${int(K/2)}x0.val[${K%2}]);
120                 const int8x8_t va${M}c${K}x1 = vreinterpret_s8_s16(va${M}${int(K/2)}x1.val[${K%2}]);
122                 const int8x8_t va${M}c${K}x0 = vreinterpret_s8_s16(va${M}${K}x0);
123                 const int8x8_t va${M}c${K}x1 = vreinterpret_s8_s16(va${M}${K}x1);
125 …const int8x8_t va${M}c${K}x0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va${M}x0), ${…
126 …const int8x8_t va${M}c${K}x1 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va${M}x1), ${…
130 …          int16x8_t vprod${M}x${ABC[N:N+4]}c${K} = vmull_s8(vb${ABC[N:N+4]}c${K}x0, va${M}c${K}x0);
133 …${ABC[N:N+4]}c${K} = vmlal_s8(vprod${M}x${ABC[N:N+4]}c${K}, vb${ABC[N:N+4]}c${K}x1, va${M}c${K}x1);
143             const int16x4x4_t va${M} = vld4_dup_s16((const void*)a${M}); a${M} += 8;
145             const int16x4x2_t va${M}0 = vld2_dup_s16((const void*)a${M});
146             const int16x4x2_t va${M}1 = vld2_dup_s16((const void*)(a${M} + 4)); a${M} += 8;
148             const int16x4_t va${M}0 = vld1_dup_s16((const void*)a${M});
149             const int16x4_t va${M}1 = vld1_dup_s16((const void*)(a${M} + 2));
150             const int16x4_t va${M}2 = vld1_dup_s16((const void*)(a${M} + 4));
151             const int16x4_t va${M}3 = vld1_dup_s16((const void*)(a${M} + 6)); a${M} += 8;
153             const int8x8_t va${M} = vld1_s8(a${M}); a${M} += 8;
162               const int8x8_t va${M}c${K} = vreinterpret_s8_s16(va${M}.val[${K}]);
164               const int8x8_t va${M}c${K} = vreinterpret_s8_s16(va${M}${int(K/2)}.val[${K%2}]);
166               const int8x8_t va${M}c${K} = vreinterpret_s8_s16(va${M}${K});
168 …const int8x8_t va${M}c${K} = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va${M}), ${K}));
172 …        const int16x8_t vprod${M}x${ABC[N:N+4]}c${K} = vmull_s8(vb${ABC[N:N+4]}c${K}, va${M}c${K});
181           const int8x8_t va${M} = vld1_s8(a${M}); a${M} = (const int8_t*) ((uintptr_t) a${M} + k);
187 …      const int8x8_t va${M}c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va${M}), 0));
189             const int16x8_t vprod${M}x${ABC[N:N+4]}c0 = vmull_s8(vb${ABC[N:N+4]}c0, va${M}c0);
197 …      const int8x8_t va${M}c1 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va${M}), 1));
199               const int16x8_t vprod${M}x${ABC[N:N+4]}c1 = vmull_s8(vb${ABC[N:N+4]}c1, va${M}c1);
207 …      const int8x8_t va${M}c2 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va${M}), 2));
209                 const int16x8_t vprod${M}x${ABC[N:N+4]}c2 = vmull_s8(vb${ABC[N:N+4]}c2, va${M}c2);