1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41 3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX 4*9880d681SAndroid Build Coastguard Worker 5*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @blend_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) { 6*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: blend_packusdw: 7*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 8*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packusdw %xmm2, %xmm0 9*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 10*9880d681SAndroid Build Coastguard Worker; 11*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: blend_packusdw: 12*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 13*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 14*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 15*9880d681SAndroid Build Coastguard Worker %p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) 16*9880d681SAndroid Build Coastguard Worker %p1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a2, <4 x i32> %a3) 17*9880d681SAndroid Build Coastguard Worker %s0 = shufflevector <8 x i16> %p0, <8 x i16> %p1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 18*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %s0 19*9880d681SAndroid Build Coastguard Worker} 20*9880d681SAndroid Build Coastguard Worker 21*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @blend_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) { 22*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: blend_packuswb: 23*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 24*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm2, %xmm0 25*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 26*9880d681SAndroid Build Coastguard Worker; 27*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: blend_packuswb: 28*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 29*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 30*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 31*9880d681SAndroid Build Coastguard Worker %p0 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) 32*9880d681SAndroid Build Coastguard Worker %p1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a2, <8 x i16> %a3) 33*9880d681SAndroid Build Coastguard Worker %s0 = shufflevector <16 x i8> %p0, <16 x i8> %p1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 34*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %s0 35*9880d681SAndroid Build Coastguard Worker} 36*9880d681SAndroid Build Coastguard Worker 37*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @blend_packusdw_packuswb(<4 x i32> %a0, <4 x i32> %a1, <8 x i16> %a2, <8 x i16> %a3) { 38*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: blend_packusdw_packuswb: 39*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 40*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packusdw %xmm1, %xmm0 41*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm3, %xmm2 42*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 43*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 44*9880d681SAndroid Build Coastguard Worker; 45*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: blend_packusdw_packuswb: 46*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 47*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 48*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpackuswb %xmm3, %xmm2, %xmm1 49*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 50*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 51*9880d681SAndroid Build Coastguard Worker %p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) 52*9880d681SAndroid Build Coastguard Worker %p1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a2, <8 x i16> %a3) 53*9880d681SAndroid Build Coastguard Worker %b1 = bitcast <16 x i8> %p1 to <8 x i16> 54*9880d681SAndroid Build Coastguard Worker %s0 = shufflevector <8 x i16> %p0, <8 x i16> %b1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 55*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %s0 56*9880d681SAndroid Build Coastguard Worker} 57*9880d681SAndroid Build Coastguard Worker 58*9880d681SAndroid Build Coastguard Workerdeclare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) 59*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) 60