xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/vector-shuffle-sse41.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @blend_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) {
6*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: blend_packusdw:
7*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
8*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packusdw %xmm2, %xmm0
9*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
10*9880d681SAndroid Build Coastguard Worker;
11*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: blend_packusdw:
12*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
13*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
14*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
15*9880d681SAndroid Build Coastguard Worker  %p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
16*9880d681SAndroid Build Coastguard Worker  %p1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a2, <4 x i32> %a3)
17*9880d681SAndroid Build Coastguard Worker  %s0 = shufflevector <8 x i16> %p0, <8 x i16> %p1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
18*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %s0
19*9880d681SAndroid Build Coastguard Worker}
20*9880d681SAndroid Build Coastguard Worker
21*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @blend_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) {
22*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: blend_packuswb:
23*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
24*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm2, %xmm0
25*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
26*9880d681SAndroid Build Coastguard Worker;
27*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: blend_packuswb:
28*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
29*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
30*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
31*9880d681SAndroid Build Coastguard Worker  %p0 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
32*9880d681SAndroid Build Coastguard Worker  %p1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a2, <8 x i16> %a3)
33*9880d681SAndroid Build Coastguard Worker  %s0 = shufflevector <16 x i8> %p0, <16 x i8> %p1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
34*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %s0
35*9880d681SAndroid Build Coastguard Worker}
36*9880d681SAndroid Build Coastguard Worker
37*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @blend_packusdw_packuswb(<4 x i32> %a0, <4 x i32> %a1, <8 x i16> %a2, <8 x i16> %a3) {
38*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: blend_packusdw_packuswb:
39*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
40*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packusdw %xmm1, %xmm0
41*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm3, %xmm2
42*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
43*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
44*9880d681SAndroid Build Coastguard Worker;
45*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: blend_packusdw_packuswb:
46*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
47*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
48*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpackuswb %xmm3, %xmm2, %xmm1
49*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
50*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
51*9880d681SAndroid Build Coastguard Worker  %p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
52*9880d681SAndroid Build Coastguard Worker  %p1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a2, <8 x i16> %a3)
53*9880d681SAndroid Build Coastguard Worker  %b1 = bitcast <16 x i8> %p1 to <8 x i16>
54*9880d681SAndroid Build Coastguard Worker  %s0 = shufflevector <8 x i16> %p0, <8 x i16> %b1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
55*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %s0
56*9880d681SAndroid Build Coastguard Worker}
57*9880d681SAndroid Build Coastguard Worker
58*9880d681SAndroid Build Coastguard Workerdeclare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
59*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>)
60