xref: /aosp_15_r20/external/clang/test/CodeGen/aarch64-neon-tbl.c (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2*67e74705SXin Li // RUN:   -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
3*67e74705SXin Li 
4*67e74705SXin Li // Test new aarch64 intrinsics and types
5*67e74705SXin Li 
6*67e74705SXin Li #include <arm_neon.h>
7*67e74705SXin Li 
8*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
9*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10*67e74705SXin Li // CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
11*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL11_I]]
test_vtbl1_s8(int8x8_t a,int8x8_t b)12*67e74705SXin Li int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
13*67e74705SXin Li   return vtbl1_s8(a, b);
14*67e74705SXin Li }
15*67e74705SXin Li 
16*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) #0 {
17*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2
18*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL1_I]]
test_vqtbl1_s8(int8x16_t a,int8x8_t b)19*67e74705SXin Li int8x8_t test_vqtbl1_s8(int8x16_t a, int8x8_t b) {
20*67e74705SXin Li   return vqtbl1_s8(a, b);
21*67e74705SXin Li }
22*67e74705SXin Li 
23*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbl2_s8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
24*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x8x2_t, align 8
25*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.int8x8x2_t, align 8
26*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0
27*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[A]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
28*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0
29*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
30*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
31*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
32*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
33*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
34*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
35*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
36*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
37*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
38*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
39*67e74705SXin Li // CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
40*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL13_I]]
test_vtbl2_s8(int8x8x2_t a,int8x8_t b)41*67e74705SXin Li int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
42*67e74705SXin Li   return vtbl2_s8(a, b);
43*67e74705SXin Li }
44*67e74705SXin Li 
45*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbl2_s8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
46*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x16x2_t, align 16
47*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.int8x16x2_t, align 16
48*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[A]], i32 0, i32 0
49*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
50*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[A]], i32 0, i32 0
51*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
52*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
53*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
54*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
55*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
56*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
57*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
58*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
59*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
60*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2
61*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL2_I]]
test_vqtbl2_s8(int8x16x2_t a,int8x8_t b)62*67e74705SXin Li int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) {
63*67e74705SXin Li   return vqtbl2_s8(a, b);
64*67e74705SXin Li }
65*67e74705SXin Li 
66*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbl3_s8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
67*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x8x3_t, align 8
68*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.int8x8x3_t, align 8
69*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0
70*67e74705SXin Li // CHECK:   store [3 x <8 x i8>] [[A]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
71*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0
72*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
73*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
74*67e74705SXin Li // CHECK:   store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
75*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
76*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
77*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
78*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
79*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
80*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
81*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
82*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
83*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
84*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
85*67e74705SXin Li // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
86*67e74705SXin Li // CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2
87*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL26_I]]
test_vtbl3_s8(int8x8x3_t a,int8x8_t b)88*67e74705SXin Li int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
89*67e74705SXin Li   return vtbl3_s8(a, b);
90*67e74705SXin Li }
91*67e74705SXin Li 
92*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbl3_s8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
93*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x16x3_t, align 16
94*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.int8x16x3_t, align 16
95*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[A]], i32 0, i32 0
96*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
97*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[A]], i32 0, i32 0
98*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
99*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
100*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
101*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
102*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
103*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
104*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
105*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
106*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
107*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
108*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
109*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
110*67e74705SXin Li // CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2
111*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL3_I]]
test_vqtbl3_s8(int8x16x3_t a,int8x8_t b)112*67e74705SXin Li int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) {
113*67e74705SXin Li   return vqtbl3_s8(a, b);
114*67e74705SXin Li }
115*67e74705SXin Li 
116*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbl4_s8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
117*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x8x4_t, align 8
118*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.int8x8x4_t, align 8
119*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0
120*67e74705SXin Li // CHECK:   store [4 x <8 x i8>] [[A]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
121*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0
122*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
123*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
124*67e74705SXin Li // CHECK:   store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
125*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
126*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
127*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
128*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
129*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
130*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
131*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
132*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
133*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
134*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
135*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
136*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
137*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
138*67e74705SXin Li // CHECK:   [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
139*67e74705SXin Li // CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2
140*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL28_I]]
test_vtbl4_s8(int8x8x4_t a,int8x8_t b)141*67e74705SXin Li int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
142*67e74705SXin Li   return vtbl4_s8(a, b);
143*67e74705SXin Li }
144*67e74705SXin Li 
145*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbl4_s8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
146*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x16x4_t, align 16
147*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.int8x16x4_t, align 16
148*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[A]], i32 0, i32 0
149*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
150*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[A]], i32 0, i32 0
151*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
152*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
153*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
154*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
155*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
156*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
157*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
158*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
159*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
160*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
161*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
162*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
163*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
164*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
165*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
166*67e74705SXin Li // CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2
167*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL4_I]]
test_vqtbl4_s8(int8x16x4_t a,int8x8_t b)168*67e74705SXin Li int8x8_t test_vqtbl4_s8(int8x16x4_t a, int8x8_t b) {
169*67e74705SXin Li   return vqtbl4_s8(a, b);
170*67e74705SXin Li }
171*67e74705SXin Li 
172*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
173*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2
174*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBL1_I]]
test_vqtbl1q_s8(int8x16_t a,int8x16_t b)175*67e74705SXin Li int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
176*67e74705SXin Li   return vqtbl1q_s8(a, b);
177*67e74705SXin Li }
178*67e74705SXin Li 
179*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
180*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x16x2_t, align 16
181*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.int8x16x2_t, align 16
182*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[A]], i32 0, i32 0
183*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
184*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[A]], i32 0, i32 0
185*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
186*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
187*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
188*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
189*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
190*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
191*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
192*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
193*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
194*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2
195*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBL2_I]]
test_vqtbl2q_s8(int8x16x2_t a,int8x16_t b)196*67e74705SXin Li int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
197*67e74705SXin Li   return vqtbl2q_s8(a, b);
198*67e74705SXin Li }
199*67e74705SXin Li 
200*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
201*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x16x3_t, align 16
202*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.int8x16x3_t, align 16
203*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[A]], i32 0, i32 0
204*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
205*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[A]], i32 0, i32 0
206*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
207*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
208*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
209*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
210*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
211*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
212*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
213*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
214*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
215*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
216*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
217*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
218*67e74705SXin Li // CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2
219*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBL3_I]]
test_vqtbl3q_s8(int8x16x3_t a,int8x16_t b)220*67e74705SXin Li int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
221*67e74705SXin Li   return vqtbl3q_s8(a, b);
222*67e74705SXin Li }
223*67e74705SXin Li 
224*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
225*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x16x4_t, align 16
226*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.int8x16x4_t, align 16
227*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[A]], i32 0, i32 0
228*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
229*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[A]], i32 0, i32 0
230*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
231*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
232*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
233*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
234*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
235*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
236*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
237*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
238*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
239*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
240*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
241*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
242*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
243*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
244*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
245*67e74705SXin Li // CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2
246*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBL4_I]]
test_vqtbl4q_s8(int8x16x4_t a,int8x16_t b)247*67e74705SXin Li int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
248*67e74705SXin Li   return vqtbl4q_s8(a, b);
249*67e74705SXin Li }
250*67e74705SXin Li 
251*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
252*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
253*67e74705SXin Li // CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2
254*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
255*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
256*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
257*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
258*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
259*67e74705SXin Li // CHECK:   [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
260*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX_I]]
test_vtbx1_s8(int8x8_t a,int8x8_t b,int8x8_t c)261*67e74705SXin Li int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
262*67e74705SXin Li   return vtbx1_s8(a, b, c);
263*67e74705SXin Li }
264*67e74705SXin Li 
265*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbx2_s8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
266*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x8x2_t, align 8
267*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
268*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
269*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
270*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
271*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
272*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
273*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
274*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
275*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
276*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
277*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
278*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
279*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
280*67e74705SXin Li // CHECK:   [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
281*67e74705SXin Li // CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2
282*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX13_I]]
test_vtbx2_s8(int8x8_t a,int8x8x2_t b,int8x8_t c)283*67e74705SXin Li int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
284*67e74705SXin Li   return vtbx2_s8(a, b, c);
285*67e74705SXin Li }
286*67e74705SXin Li 
287*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbx3_s8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
288*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x8x3_t, align 8
289*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
290*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
291*67e74705SXin Li // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
292*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
293*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
294*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
295*67e74705SXin Li // CHECK:   store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
296*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
297*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
298*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
299*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
300*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
301*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
302*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
303*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
304*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
305*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
306*67e74705SXin Li // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
307*67e74705SXin Li // CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2
308*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
309*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
310*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
311*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
312*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
313*67e74705SXin Li // CHECK:   [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
314*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX_I]]
test_vtbx3_s8(int8x8_t a,int8x8x3_t b,int8x8_t c)315*67e74705SXin Li int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
316*67e74705SXin Li   return vtbx3_s8(a, b, c);
317*67e74705SXin Li }
318*67e74705SXin Li 
319*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbx4_s8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
320*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x8x4_t, align 8
321*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
322*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
323*67e74705SXin Li // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
324*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
325*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
326*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
327*67e74705SXin Li // CHECK:   store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
328*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
329*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
330*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
331*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
332*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
333*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
334*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
335*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
336*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
337*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
338*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
339*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
340*67e74705SXin Li // CHECK:   [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
341*67e74705SXin Li // CHECK:   [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
342*67e74705SXin Li // CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2
343*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX28_I]]
test_vtbx4_s8(int8x8_t a,int8x8x4_t b,int8x8_t c)344*67e74705SXin Li int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
345*67e74705SXin Li   return vtbx4_s8(a, b, c);
346*67e74705SXin Li }
347*67e74705SXin Li 
348*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 {
349*67e74705SXin Li // CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2
350*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX1_I]]
test_vqtbx1_s8(int8x8_t a,int8x16_t b,int8x8_t c)351*67e74705SXin Li int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, int8x8_t c) {
352*67e74705SXin Li   return vqtbx1_s8(a, b, c);
353*67e74705SXin Li }
354*67e74705SXin Li 
355*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbx2_s8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
356*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x16x2_t, align 16
357*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
358*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
359*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
360*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
361*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
362*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
363*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
364*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
365*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
366*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
367*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
368*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
369*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
370*67e74705SXin Li // CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2
371*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX2_I]]
test_vqtbx2_s8(int8x8_t a,int8x16x2_t b,int8x8_t c)372*67e74705SXin Li int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, int8x8_t c) {
373*67e74705SXin Li   return vqtbx2_s8(a, b, c);
374*67e74705SXin Li }
375*67e74705SXin Li 
376*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbx3_s8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
377*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x16x3_t, align 16
378*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
379*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
380*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
381*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
382*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
383*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
384*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
385*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
386*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
387*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
388*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
389*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
390*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
391*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
392*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
393*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
394*67e74705SXin Li // CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2
395*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX3_I]]
test_vqtbx3_s8(int8x8_t a,int8x16x3_t b,int8x8_t c)396*67e74705SXin Li int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, int8x8_t c) {
397*67e74705SXin Li   return vqtbx3_s8(a, b, c);
398*67e74705SXin Li }
399*67e74705SXin Li 
400*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbx4_s8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
401*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x16x4_t, align 16
402*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
403*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
404*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
405*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
406*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
407*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
408*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
409*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
410*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
411*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
412*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
413*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
414*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
415*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
416*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
417*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
418*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
419*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
420*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
421*67e74705SXin Li // CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2
422*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX4_I]]
test_vqtbx4_s8(int8x8_t a,int8x16x4_t b,int8x8_t c)423*67e74705SXin Li int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, int8x8_t c) {
424*67e74705SXin Li   return vqtbx4_s8(a, b, c);
425*67e74705SXin Li }
426*67e74705SXin Li 
427*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
428*67e74705SXin Li // CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2
429*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBX1_I]]
test_vqtbx1q_s8(int8x16_t a,int8x16_t b,int8x16_t c)430*67e74705SXin Li int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
431*67e74705SXin Li   return vqtbx1q_s8(a, b, c);
432*67e74705SXin Li }
433*67e74705SXin Li 
434*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
435*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x16x2_t, align 16
436*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
437*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
438*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
439*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
440*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
441*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
442*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
443*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
444*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
445*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
446*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
447*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
448*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
449*67e74705SXin Li // CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2
450*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBX2_I]]
test_vqtbx2q_s8(int8x16_t a,int8x16x2_t b,int8x16_t c)451*67e74705SXin Li int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
452*67e74705SXin Li   return vqtbx2q_s8(a, b, c);
453*67e74705SXin Li }
454*67e74705SXin Li 
455*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
456*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x16x3_t, align 16
457*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
458*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
459*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
460*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
461*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
462*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
463*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
464*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
465*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
466*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
467*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
468*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
469*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
470*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
471*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
472*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
473*67e74705SXin Li // CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2
474*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBX3_I]]
test_vqtbx3q_s8(int8x16_t a,int8x16x3_t b,int8x16_t c)475*67e74705SXin Li int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
476*67e74705SXin Li   return vqtbx3q_s8(a, b, c);
477*67e74705SXin Li }
478*67e74705SXin Li 
479*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
480*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x16x4_t, align 16
481*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
482*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
483*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
484*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
485*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
486*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
487*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
488*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
489*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
490*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
491*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
492*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
493*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
494*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
495*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
496*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
497*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
498*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
499*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
500*67e74705SXin Li // CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2
501*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBX4_I]]
test_vqtbx4q_s8(int8x16_t a,int8x16x4_t b,int8x16_t c)502*67e74705SXin Li int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) {
503*67e74705SXin Li   return vqtbx4q_s8(a, b, c);
504*67e74705SXin Li }
505*67e74705SXin Li 
506*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
507*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
508*67e74705SXin Li // CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
509*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL11_I]]
test_vtbl1_u8(uint8x8_t a,uint8x8_t b)510*67e74705SXin Li uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
511*67e74705SXin Li   return vtbl1_u8(a, b);
512*67e74705SXin Li }
513*67e74705SXin Li 
514*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) #0 {
515*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2
516*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL1_I]]
test_vqtbl1_u8(uint8x16_t a,uint8x8_t b)517*67e74705SXin Li uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
518*67e74705SXin Li   return vqtbl1_u8(a, b);
519*67e74705SXin Li }
520*67e74705SXin Li 
521*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbl2_u8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
522*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
523*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.uint8x8x2_t, align 8
524*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0
525*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[A]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
526*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0
527*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
528*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
529*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
530*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
531*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
532*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
533*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
534*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
535*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
536*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
537*67e74705SXin Li // CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
538*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL13_I]]
test_vtbl2_u8(uint8x8x2_t a,uint8x8_t b)539*67e74705SXin Li uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
540*67e74705SXin Li   return vtbl2_u8(a, b);
541*67e74705SXin Li }
542*67e74705SXin Li 
543*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbl2_u8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
544*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
545*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.uint8x16x2_t, align 16
546*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[A]], i32 0, i32 0
547*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
548*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[A]], i32 0, i32 0
549*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
550*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
551*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
552*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
553*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
554*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
555*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
556*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
557*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
558*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2
559*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL2_I]]
test_vqtbl2_u8(uint8x16x2_t a,uint8x8_t b)560*67e74705SXin Li uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
561*67e74705SXin Li   return vqtbl2_u8(a, b);
562*67e74705SXin Li }
563*67e74705SXin Li 
564*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbl3_u8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
565*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
566*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.uint8x8x3_t, align 8
567*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0
568*67e74705SXin Li // CHECK:   store [3 x <8 x i8>] [[A]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
569*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0
570*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
571*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
572*67e74705SXin Li // CHECK:   store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
573*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
574*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
575*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
576*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
577*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
578*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
579*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
580*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
581*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
582*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
583*67e74705SXin Li // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
584*67e74705SXin Li // CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2
585*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL26_I]]
test_vtbl3_u8(uint8x8x3_t a,uint8x8_t b)586*67e74705SXin Li uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
587*67e74705SXin Li   return vtbl3_u8(a, b);
588*67e74705SXin Li }
589*67e74705SXin Li 
590*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbl3_u8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
591*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x16x3_t, align 16
592*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.uint8x16x3_t, align 16
593*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[A]], i32 0, i32 0
594*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
595*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[A]], i32 0, i32 0
596*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
597*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
598*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
599*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
600*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
601*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
602*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
603*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
604*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
605*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
606*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
607*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
608*67e74705SXin Li // CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2
609*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL3_I]]
test_vqtbl3_u8(uint8x16x3_t a,uint8x8_t b)610*67e74705SXin Li uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
611*67e74705SXin Li   return vqtbl3_u8(a, b);
612*67e74705SXin Li }
613*67e74705SXin Li 
614*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbl4_u8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
615*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
616*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.uint8x8x4_t, align 8
617*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0
618*67e74705SXin Li // CHECK:   store [4 x <8 x i8>] [[A]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
619*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0
620*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
621*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
622*67e74705SXin Li // CHECK:   store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
623*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
624*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
625*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
626*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
627*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
628*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
629*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
630*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
631*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
632*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
633*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
634*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
635*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
636*67e74705SXin Li // CHECK:   [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
637*67e74705SXin Li // CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2
638*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL28_I]]
test_vtbl4_u8(uint8x8x4_t a,uint8x8_t b)639*67e74705SXin Li uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
640*67e74705SXin Li   return vtbl4_u8(a, b);
641*67e74705SXin Li }
642*67e74705SXin Li 
643*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbl4_u8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
644*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x16x4_t, align 16
645*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.uint8x16x4_t, align 16
646*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[A]], i32 0, i32 0
647*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
648*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[A]], i32 0, i32 0
649*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
650*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
651*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
652*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
653*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
654*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
655*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
656*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
657*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
658*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
659*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
660*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
661*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
662*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
663*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
664*67e74705SXin Li // CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2
665*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL4_I]]
test_vqtbl4_u8(uint8x16x4_t a,uint8x8_t b)666*67e74705SXin Li uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) {
667*67e74705SXin Li   return vqtbl4_u8(a, b);
668*67e74705SXin Li }
669*67e74705SXin Li 
670*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
671*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2
672*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBL1_I]]
test_vqtbl1q_u8(uint8x16_t a,uint8x16_t b)673*67e74705SXin Li uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
674*67e74705SXin Li   return vqtbl1q_u8(a, b);
675*67e74705SXin Li }
676*67e74705SXin Li 
677*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
678*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
679*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.uint8x16x2_t, align 16
680*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[A]], i32 0, i32 0
681*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
682*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[A]], i32 0, i32 0
683*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
684*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
685*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
686*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
687*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
688*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
689*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
690*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
691*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
692*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2
693*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBL2_I]]
test_vqtbl2q_u8(uint8x16x2_t a,uint8x16_t b)694*67e74705SXin Li uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
695*67e74705SXin Li   return vqtbl2q_u8(a, b);
696*67e74705SXin Li }
697*67e74705SXin Li 
698*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
699*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x16x3_t, align 16
700*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.uint8x16x3_t, align 16
701*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[A]], i32 0, i32 0
702*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
703*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[A]], i32 0, i32 0
704*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
705*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
706*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
707*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
708*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
709*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
710*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
711*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
712*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
713*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
714*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
715*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
716*67e74705SXin Li // CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2
717*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBL3_I]]
test_vqtbl3q_u8(uint8x16x3_t a,uint8x16_t b)718*67e74705SXin Li uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
719*67e74705SXin Li   return vqtbl3q_u8(a, b);
720*67e74705SXin Li }
721*67e74705SXin Li 
722*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
723*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x16x4_t, align 16
724*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.uint8x16x4_t, align 16
725*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[A]], i32 0, i32 0
726*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
727*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[A]], i32 0, i32 0
728*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
729*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
730*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
731*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
732*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
733*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
734*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
735*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
736*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
737*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
738*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
739*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
740*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
741*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
742*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
743*67e74705SXin Li // CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2
744*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBL4_I]]
test_vqtbl4q_u8(uint8x16x4_t a,uint8x16_t b)745*67e74705SXin Li uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
746*67e74705SXin Li   return vqtbl4q_u8(a, b);
747*67e74705SXin Li }
748*67e74705SXin Li 
749*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
750*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
751*67e74705SXin Li // CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2
752*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
753*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
754*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
755*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
756*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
757*67e74705SXin Li // CHECK:   [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
758*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX_I]]
test_vtbx1_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)759*67e74705SXin Li uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
760*67e74705SXin Li   return vtbx1_u8(a, b, c);
761*67e74705SXin Li }
762*67e74705SXin Li 
763*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbx2_u8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
764*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
765*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
766*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
767*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
768*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
769*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
770*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
771*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
772*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
773*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
774*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
775*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
776*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
777*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
778*67e74705SXin Li // CHECK:   [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
779*67e74705SXin Li // CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2
780*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX13_I]]
test_vtbx2_u8(uint8x8_t a,uint8x8x2_t b,uint8x8_t c)781*67e74705SXin Li uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
782*67e74705SXin Li   return vtbx2_u8(a, b, c);
783*67e74705SXin Li }
784*67e74705SXin Li 
785*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbx3_u8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
786*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
787*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
788*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
789*67e74705SXin Li // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
790*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
791*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
792*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
793*67e74705SXin Li // CHECK:   store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
794*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
795*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
796*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
797*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
798*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
799*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
800*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
801*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
802*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
803*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
804*67e74705SXin Li // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
805*67e74705SXin Li // CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2
806*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
807*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
808*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
809*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
810*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
811*67e74705SXin Li // CHECK:   [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
812*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX_I]]
test_vtbx3_u8(uint8x8_t a,uint8x8x3_t b,uint8x8_t c)813*67e74705SXin Li uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
814*67e74705SXin Li   return vtbx3_u8(a, b, c);
815*67e74705SXin Li }
816*67e74705SXin Li 
817*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbx4_u8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
818*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
819*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
820*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
821*67e74705SXin Li // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
822*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
823*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
824*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
825*67e74705SXin Li // CHECK:   store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
826*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
827*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
828*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
829*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
830*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
831*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
832*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
833*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
834*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
835*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
836*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
837*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
838*67e74705SXin Li // CHECK:   [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
839*67e74705SXin Li // CHECK:   [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
840*67e74705SXin Li // CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2
841*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX28_I]]
test_vtbx4_u8(uint8x8_t a,uint8x8x4_t b,uint8x8_t c)842*67e74705SXin Li uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
843*67e74705SXin Li   return vtbx4_u8(a, b, c);
844*67e74705SXin Li }
845*67e74705SXin Li 
846*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 {
847*67e74705SXin Li // CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2
848*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX1_I]]
test_vqtbx1_u8(uint8x8_t a,uint8x16_t b,uint8x8_t c)849*67e74705SXin Li uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
850*67e74705SXin Li   return vqtbx1_u8(a, b, c);
851*67e74705SXin Li }
852*67e74705SXin Li 
853*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbx2_u8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
854*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
855*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
856*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
857*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
858*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
859*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
860*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
861*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
862*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
863*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
864*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
865*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
866*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
867*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
868*67e74705SXin Li // CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2
869*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX2_I]]
test_vqtbx2_u8(uint8x8_t a,uint8x16x2_t b,uint8x8_t c)870*67e74705SXin Li uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
871*67e74705SXin Li   return vqtbx2_u8(a, b, c);
872*67e74705SXin Li }
873*67e74705SXin Li 
874*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbx3_u8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
875*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x16x3_t, align 16
876*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
877*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
878*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
879*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
880*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
881*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
882*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
883*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
884*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
885*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
886*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
887*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
888*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
889*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
890*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
891*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
892*67e74705SXin Li // CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2
893*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX3_I]]
test_vqtbx3_u8(uint8x8_t a,uint8x16x3_t b,uint8x8_t c)894*67e74705SXin Li uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
895*67e74705SXin Li   return vqtbx3_u8(a, b, c);
896*67e74705SXin Li }
897*67e74705SXin Li 
898*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbx4_u8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
899*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x16x4_t, align 16
900*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
901*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
902*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
903*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
904*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
905*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
906*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
907*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
908*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
909*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
910*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
911*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
912*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
913*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
914*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
915*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
916*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
917*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
918*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
919*67e74705SXin Li // CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2
920*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX4_I]]
test_vqtbx4_u8(uint8x8_t a,uint8x16x4_t b,uint8x8_t c)921*67e74705SXin Li uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) {
922*67e74705SXin Li   return vqtbx4_u8(a, b, c);
923*67e74705SXin Li }
924*67e74705SXin Li 
925*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
926*67e74705SXin Li // CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2
927*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBX1_I]]
test_vqtbx1q_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)928*67e74705SXin Li uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
929*67e74705SXin Li   return vqtbx1q_u8(a, b, c);
930*67e74705SXin Li }
931*67e74705SXin Li 
932*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
933*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
934*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
935*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
936*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
937*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
938*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
939*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
940*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
941*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
942*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
943*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
944*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
945*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
946*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
947*67e74705SXin Li // CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2
948*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBX2_I]]
test_vqtbx2q_u8(uint8x16_t a,uint8x16x2_t b,uint8x16_t c)949*67e74705SXin Li uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
950*67e74705SXin Li   return vqtbx2q_u8(a, b, c);
951*67e74705SXin Li }
952*67e74705SXin Li 
953*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
954*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x16x3_t, align 16
955*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
956*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
957*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
958*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
959*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
960*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
961*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
962*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
963*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
964*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
965*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
966*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
967*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
968*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
969*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
970*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
971*67e74705SXin Li // CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2
972*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBX3_I]]
test_vqtbx3q_u8(uint8x16_t a,uint8x16x3_t b,uint8x16_t c)973*67e74705SXin Li uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
974*67e74705SXin Li   return vqtbx3q_u8(a, b, c);
975*67e74705SXin Li }
976*67e74705SXin Li 
977*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
978*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x16x4_t, align 16
979*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
980*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
981*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
982*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
983*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
984*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
985*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
986*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
987*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
988*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
989*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
990*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
991*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
992*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
993*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
994*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
995*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
996*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
997*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
998*67e74705SXin Li // CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2
999*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBX4_I]]
test_vqtbx4q_u8(uint8x16_t a,uint8x16x4_t b,uint8x16_t c)1000*67e74705SXin Li uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) {
1001*67e74705SXin Li   return vqtbx4q_u8(a, b, c);
1002*67e74705SXin Li }
1003*67e74705SXin Li 
1004*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
1005*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1006*67e74705SXin Li // CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
1007*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL11_I]]
test_vtbl1_p8(poly8x8_t a,uint8x8_t b)1008*67e74705SXin Li poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
1009*67e74705SXin Li   return vtbl1_p8(a, b);
1010*67e74705SXin Li }
1011*67e74705SXin Li 
1012*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) #0 {
1013*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2
1014*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL1_I]]
test_vqtbl1_p8(poly8x16_t a,uint8x8_t b)1015*67e74705SXin Li poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
1016*67e74705SXin Li   return vqtbl1_p8(a, b);
1017*67e74705SXin Li }
1018*67e74705SXin Li 
1019*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbl2_p8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
1020*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1021*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.poly8x8x2_t, align 8
1022*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0
1023*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[A]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
1024*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0
1025*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
1026*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
1027*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
1028*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
1029*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
1030*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
1031*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
1032*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
1033*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
1034*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1035*67e74705SXin Li // CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
1036*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL13_I]]
test_vtbl2_p8(poly8x8x2_t a,uint8x8_t b)1037*67e74705SXin Li poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
1038*67e74705SXin Li   return vtbl2_p8(a, b);
1039*67e74705SXin Li }
1040*67e74705SXin Li 
1041*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbl2_p8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
1042*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1043*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.poly8x16x2_t, align 16
1044*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[A]], i32 0, i32 0
1045*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
1046*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[A]], i32 0, i32 0
1047*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
1048*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
1049*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1050*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
1051*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
1052*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
1053*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
1054*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
1055*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
1056*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2
1057*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL2_I]]
test_vqtbl2_p8(poly8x16x2_t a,uint8x8_t b)1058*67e74705SXin Li poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
1059*67e74705SXin Li   return vqtbl2_p8(a, b);
1060*67e74705SXin Li }
1061*67e74705SXin Li 
1062*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbl3_p8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
1063*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
1064*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.poly8x8x3_t, align 8
1065*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0
1066*67e74705SXin Li // CHECK:   store [3 x <8 x i8>] [[A]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
1067*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0
1068*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
1069*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
1070*67e74705SXin Li // CHECK:   store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
1071*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
1072*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
1073*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
1074*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
1075*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
1076*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
1077*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
1078*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
1079*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
1080*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1081*67e74705SXin Li // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1082*67e74705SXin Li // CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2
1083*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL26_I]]
test_vtbl3_p8(poly8x8x3_t a,uint8x8_t b)1084*67e74705SXin Li poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
1085*67e74705SXin Li   return vtbl3_p8(a, b);
1086*67e74705SXin Li }
1087*67e74705SXin Li 
1088*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbl3_p8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
1089*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x16x3_t, align 16
1090*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.poly8x16x3_t, align 16
1091*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[A]], i32 0, i32 0
1092*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
1093*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[A]], i32 0, i32 0
1094*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
1095*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
1096*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1097*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
1098*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
1099*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
1100*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
1101*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
1102*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
1103*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
1104*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
1105*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
1106*67e74705SXin Li // CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2
1107*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL3_I]]
test_vqtbl3_p8(poly8x16x3_t a,uint8x8_t b)1108*67e74705SXin Li poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
1109*67e74705SXin Li   return vqtbl3_p8(a, b);
1110*67e74705SXin Li }
1111*67e74705SXin Li 
1112*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbl4_p8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
1113*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
1114*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.poly8x8x4_t, align 8
1115*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0
1116*67e74705SXin Li // CHECK:   store [4 x <8 x i8>] [[A]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
1117*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0
1118*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
1119*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
1120*67e74705SXin Li // CHECK:   store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
1121*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
1122*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
1123*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
1124*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
1125*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
1126*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
1127*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
1128*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
1129*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
1130*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
1131*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
1132*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
1133*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1134*67e74705SXin Li // CHECK:   [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1135*67e74705SXin Li // CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2
1136*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL28_I]]
test_vtbl4_p8(poly8x8x4_t a,uint8x8_t b)1137*67e74705SXin Li poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
1138*67e74705SXin Li   return vtbl4_p8(a, b);
1139*67e74705SXin Li }
1140*67e74705SXin Li 
1141*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbl4_p8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
1142*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x16x4_t, align 16
1143*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.poly8x16x4_t, align 16
1144*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[A]], i32 0, i32 0
1145*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
1146*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[A]], i32 0, i32 0
1147*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
1148*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
1149*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1150*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
1151*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
1152*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
1153*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
1154*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
1155*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
1156*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
1157*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
1158*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
1159*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
1160*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
1161*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
1162*67e74705SXin Li // CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2
1163*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBL4_I]]
test_vqtbl4_p8(poly8x16x4_t a,uint8x8_t b)1164*67e74705SXin Li poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) {
1165*67e74705SXin Li   return vqtbl4_p8(a, b);
1166*67e74705SXin Li }
1167*67e74705SXin Li 
1168*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
1169*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2
1170*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBL1_I]]
test_vqtbl1q_p8(poly8x16_t a,uint8x16_t b)1171*67e74705SXin Li poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
1172*67e74705SXin Li   return vqtbl1q_p8(a, b);
1173*67e74705SXin Li }
1174*67e74705SXin Li 
1175*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
1176*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1177*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.poly8x16x2_t, align 16
1178*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[A]], i32 0, i32 0
1179*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
1180*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[A]], i32 0, i32 0
1181*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
1182*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
1183*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1184*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
1185*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
1186*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
1187*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
1188*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
1189*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
1190*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2
1191*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBL2_I]]
test_vqtbl2q_p8(poly8x16x2_t a,uint8x16_t b)1192*67e74705SXin Li poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
1193*67e74705SXin Li   return vqtbl2q_p8(a, b);
1194*67e74705SXin Li }
1195*67e74705SXin Li 
1196*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
1197*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x16x3_t, align 16
1198*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.poly8x16x3_t, align 16
1199*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[A]], i32 0, i32 0
1200*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
1201*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[A]], i32 0, i32 0
1202*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
1203*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
1204*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1205*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
1206*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
1207*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
1208*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
1209*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
1210*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
1211*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
1212*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
1213*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
1214*67e74705SXin Li // CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2
1215*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBL3_I]]
test_vqtbl3q_p8(poly8x16x3_t a,uint8x16_t b)1216*67e74705SXin Li poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
1217*67e74705SXin Li   return vqtbl3q_p8(a, b);
1218*67e74705SXin Li }
1219*67e74705SXin Li 
1220*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
1221*67e74705SXin Li // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x16x4_t, align 16
1222*67e74705SXin Li // CHECK:   [[A:%.*]] = alloca %struct.poly8x16x4_t, align 16
1223*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[A]], i32 0, i32 0
1224*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
1225*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[A]], i32 0, i32 0
1226*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
1227*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
1228*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1229*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
1230*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
1231*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
1232*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
1233*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
1234*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
1235*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
1236*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
1237*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
1238*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
1239*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
1240*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
1241*67e74705SXin Li // CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2
1242*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBL4_I]]
test_vqtbl4q_p8(poly8x16x4_t a,uint8x16_t b)1243*67e74705SXin Li poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
1244*67e74705SXin Li   return vqtbl4q_p8(a, b);
1245*67e74705SXin Li }
1246*67e74705SXin Li 
1247*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
1248*67e74705SXin Li // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1249*67e74705SXin Li // CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2
1250*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
1251*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
1252*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
1253*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1254*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
1255*67e74705SXin Li // CHECK:   [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
1256*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX_I]]
test_vtbx1_p8(poly8x8_t a,poly8x8_t b,uint8x8_t c)1257*67e74705SXin Li poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
1258*67e74705SXin Li   return vtbx1_p8(a, b, c);
1259*67e74705SXin Li }
1260*67e74705SXin Li 
1261*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbx2_p8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
1262*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1263*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
1264*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
1265*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
1266*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
1267*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
1268*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
1269*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
1270*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
1271*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
1272*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
1273*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
1274*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
1275*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
1276*67e74705SXin Li // CHECK:   [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1277*67e74705SXin Li // CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2
1278*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX13_I]]
test_vtbx2_p8(poly8x8_t a,poly8x8x2_t b,uint8x8_t c)1279*67e74705SXin Li poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
1280*67e74705SXin Li   return vtbx2_p8(a, b, c);
1281*67e74705SXin Li }
1282*67e74705SXin Li 
1283*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbx3_p8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
1284*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
1285*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
1286*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
1287*67e74705SXin Li // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
1288*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
1289*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
1290*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
1291*67e74705SXin Li // CHECK:   store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
1292*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
1293*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
1294*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
1295*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
1296*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
1297*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
1298*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
1299*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
1300*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
1301*67e74705SXin Li // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1302*67e74705SXin Li // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1303*67e74705SXin Li // CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2
1304*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
1305*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
1306*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
1307*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1308*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
1309*67e74705SXin Li // CHECK:   [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
1310*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX_I]]
test_vtbx3_p8(poly8x8_t a,poly8x8x3_t b,uint8x8_t c)1311*67e74705SXin Li poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
1312*67e74705SXin Li   return vtbx3_p8(a, b, c);
1313*67e74705SXin Li }
1314*67e74705SXin Li 
1315*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtbx4_p8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
1316*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
1317*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
1318*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
1319*67e74705SXin Li // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
1320*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
1321*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
1322*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
1323*67e74705SXin Li // CHECK:   store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
1324*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
1325*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
1326*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
1327*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
1328*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
1329*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
1330*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
1331*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
1332*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
1333*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
1334*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
1335*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
1336*67e74705SXin Li // CHECK:   [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1337*67e74705SXin Li // CHECK:   [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1338*67e74705SXin Li // CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2
1339*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX28_I]]
test_vtbx4_p8(poly8x8_t a,poly8x8x4_t b,uint8x8_t c)1340*67e74705SXin Li poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
1341*67e74705SXin Li   return vtbx4_p8(a, b, c);
1342*67e74705SXin Li }
1343*67e74705SXin Li 
1344*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 {
1345*67e74705SXin Li // CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2
1346*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX1_I]]
test_vqtbx1_p8(poly8x8_t a,uint8x16_t b,uint8x8_t c)1347*67e74705SXin Li poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
1348*67e74705SXin Li   return vqtbx1_p8(a, b, c);
1349*67e74705SXin Li }
1350*67e74705SXin Li 
1351*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbx2_p8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
1352*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1353*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
1354*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
1355*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
1356*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
1357*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
1358*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
1359*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1360*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
1361*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
1362*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
1363*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
1364*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
1365*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
1366*67e74705SXin Li // CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2
1367*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX2_I]]
test_vqtbx2_p8(poly8x8_t a,poly8x16x2_t b,uint8x8_t c)1368*67e74705SXin Li poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
1369*67e74705SXin Li   return vqtbx2_p8(a, b, c);
1370*67e74705SXin Li }
1371*67e74705SXin Li 
1372*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbx3_p8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
1373*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x16x3_t, align 16
1374*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
1375*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
1376*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
1377*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
1378*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
1379*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
1380*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1381*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
1382*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
1383*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
1384*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
1385*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
1386*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
1387*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
1388*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
1389*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
1390*67e74705SXin Li // CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2
1391*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX3_I]]
test_vqtbx3_p8(poly8x8_t a,poly8x16x3_t b,uint8x8_t c)1392*67e74705SXin Li poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
1393*67e74705SXin Li   return vqtbx3_p8(a, b, c);
1394*67e74705SXin Li }
1395*67e74705SXin Li 
1396*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vqtbx4_p8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
1397*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x16x4_t, align 16
1398*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
1399*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
1400*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
1401*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
1402*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
1403*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
1404*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1405*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
1406*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
1407*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
1408*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
1409*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
1410*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
1411*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
1412*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
1413*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
1414*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
1415*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
1416*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
1417*67e74705SXin Li // CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2
1418*67e74705SXin Li // CHECK:   ret <8 x i8> [[VTBX4_I]]
test_vqtbx4_p8(poly8x8_t a,poly8x16x4_t b,uint8x8_t c)1419*67e74705SXin Li poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) {
1420*67e74705SXin Li   return vqtbx4_p8(a, b, c);
1421*67e74705SXin Li }
1422*67e74705SXin Li 
1423*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
1424*67e74705SXin Li // CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2
1425*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBX1_I]]
test_vqtbx1q_p8(poly8x16_t a,uint8x16_t b,uint8x16_t c)1426*67e74705SXin Li poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
1427*67e74705SXin Li   return vqtbx1q_p8(a, b, c);
1428*67e74705SXin Li }
1429*67e74705SXin Li 
1430*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
1431*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1432*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
1433*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
1434*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
1435*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
1436*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
1437*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
1438*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1439*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
1440*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
1441*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
1442*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
1443*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
1444*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
1445*67e74705SXin Li // CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2
1446*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBX2_I]]
test_vqtbx2q_p8(poly8x16_t a,poly8x16x2_t b,uint8x16_t c)1447*67e74705SXin Li poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
1448*67e74705SXin Li   return vqtbx2q_p8(a, b, c);
1449*67e74705SXin Li }
1450*67e74705SXin Li 
1451*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
1452*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x16x3_t, align 16
1453*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
1454*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
1455*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
1456*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
1457*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
1458*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
1459*67e74705SXin Li // CHECK:   store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1460*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
1461*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
1462*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
1463*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
1464*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
1465*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
1466*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
1467*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
1468*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
1469*67e74705SXin Li // CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2
1470*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBX3_I]]
test_vqtbx3q_p8(poly8x16_t a,poly8x16x3_t b,uint8x16_t c)1471*67e74705SXin Li poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
1472*67e74705SXin Li   return vqtbx3q_p8(a, b, c);
1473*67e74705SXin Li }
1474*67e74705SXin Li 
1475*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
1476*67e74705SXin Li // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x16x4_t, align 16
1477*67e74705SXin Li // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
1478*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
1479*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
1480*67e74705SXin Li // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
1481*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
1482*67e74705SXin Li // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
1483*67e74705SXin Li // CHECK:   store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1484*67e74705SXin Li // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
1485*67e74705SXin Li // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
1486*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
1487*67e74705SXin Li // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
1488*67e74705SXin Li // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
1489*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
1490*67e74705SXin Li // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
1491*67e74705SXin Li // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
1492*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
1493*67e74705SXin Li // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
1494*67e74705SXin Li // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
1495*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
1496*67e74705SXin Li // CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2
1497*67e74705SXin Li // CHECK:   ret <16 x i8> [[VTBX4_I]]
test_vqtbx4q_p8(poly8x16_t a,poly8x16x4_t b,uint8x16_t c)1498*67e74705SXin Li poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) {
1499*67e74705SXin Li   return vqtbx4q_p8(a, b, c);
1500*67e74705SXin Li }
1501