1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 3*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX 4*9880d681SAndroid Build Coastguard Worker 5*9880d681SAndroid Build Coastguard Worker; Verify that we're folding the load into the math instruction. 6*9880d681SAndroid Build Coastguard Worker; This pattern is generated out of the simplest intrinsics usage: 7*9880d681SAndroid Build Coastguard Worker; _mm_add_ss(a, _mm_load_ss(b)); 8*9880d681SAndroid Build Coastguard Worker 9*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @addss(<4 x float> %va, float* %pb) { 10*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: addss: 11*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 12*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addss (%rdi), %xmm0 13*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 14*9880d681SAndroid Build Coastguard Worker; 15*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: addss: 16*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 17*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0 18*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 19*9880d681SAndroid Build Coastguard Worker %a = extractelement <4 x float> %va, i32 0 20*9880d681SAndroid Build Coastguard Worker %b = load float, float* %pb 21*9880d681SAndroid Build Coastguard Worker %r = fadd float %a, %b 22*9880d681SAndroid Build Coastguard Worker %vr = insertelement <4 x float> %va, float %r, i32 0 23*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vr 24*9880d681SAndroid Build Coastguard Worker} 25*9880d681SAndroid Build Coastguard Worker 26*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @addsd(<2 x double> %va, double* %pb) { 27*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: addsd: 28*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 29*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addsd (%rdi), %xmm0 30*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 31*9880d681SAndroid Build Coastguard Worker; 32*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: addsd: 33*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 34*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 35*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 36*9880d681SAndroid Build Coastguard Worker %a = extractelement <2 x double> %va, i32 0 37*9880d681SAndroid Build Coastguard Worker %b = load double, double* %pb 38*9880d681SAndroid Build Coastguard Worker %r = fadd double %a, %b 39*9880d681SAndroid Build Coastguard Worker %vr = insertelement <2 x double> %va, double %r, i32 0 40*9880d681SAndroid Build Coastguard Worker ret <2 x double> %vr 41*9880d681SAndroid Build Coastguard Worker} 42*9880d681SAndroid Build Coastguard Worker 43*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @subss(<4 x float> %va, float* %pb) { 44*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: subss: 45*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 46*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: subss (%rdi), %xmm0 47*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 48*9880d681SAndroid Build Coastguard Worker; 49*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: subss: 50*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 51*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0 52*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 53*9880d681SAndroid Build Coastguard Worker %a = extractelement <4 x float> %va, i32 0 54*9880d681SAndroid Build Coastguard Worker %b = load float, float* %pb 55*9880d681SAndroid Build Coastguard Worker %r = fsub float %a, %b 56*9880d681SAndroid Build Coastguard Worker %vr = insertelement <4 x float> %va, float %r, i32 0 57*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vr 58*9880d681SAndroid Build Coastguard Worker} 59*9880d681SAndroid Build Coastguard Worker 60*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @subsd(<2 x double> %va, double* %pb) { 61*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: subsd: 62*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 63*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: subsd (%rdi), %xmm0 64*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 65*9880d681SAndroid Build Coastguard Worker; 66*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: subsd: 67*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 68*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 69*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 70*9880d681SAndroid Build Coastguard Worker %a = extractelement <2 x double> %va, i32 0 71*9880d681SAndroid Build Coastguard Worker %b = load double, double* %pb 72*9880d681SAndroid Build Coastguard Worker %r = fsub double %a, %b 73*9880d681SAndroid Build Coastguard Worker %vr = insertelement <2 x double> %va, double %r, i32 0 74*9880d681SAndroid Build Coastguard Worker ret <2 x double> %vr 75*9880d681SAndroid Build Coastguard Worker} 76*9880d681SAndroid Build Coastguard Worker 77*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @mulss(<4 x float> %va, float* %pb) { 78*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mulss: 79*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 80*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: mulss (%rdi), %xmm0 81*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 82*9880d681SAndroid Build Coastguard Worker; 83*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mulss: 84*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 85*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0 86*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 87*9880d681SAndroid Build Coastguard Worker %a = extractelement <4 x float> %va, i32 0 88*9880d681SAndroid Build Coastguard Worker %b = load float, float* %pb 89*9880d681SAndroid Build Coastguard Worker %r = fmul float %a, %b 90*9880d681SAndroid Build Coastguard Worker %vr = insertelement <4 x float> %va, float %r, i32 0 91*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vr 92*9880d681SAndroid Build Coastguard Worker} 93*9880d681SAndroid Build Coastguard Worker 94*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @mulsd(<2 x double> %va, double* %pb) { 95*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mulsd: 96*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 97*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: mulsd (%rdi), %xmm0 98*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 99*9880d681SAndroid Build Coastguard Worker; 100*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mulsd: 101*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 102*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 103*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 104*9880d681SAndroid Build Coastguard Worker %a = extractelement <2 x double> %va, i32 0 105*9880d681SAndroid Build Coastguard Worker %b = load double, double* %pb 106*9880d681SAndroid Build Coastguard Worker %r = fmul double %a, %b 107*9880d681SAndroid Build Coastguard Worker %vr = insertelement <2 x double> %va, double %r, i32 0 108*9880d681SAndroid Build Coastguard Worker ret <2 x double> %vr 109*9880d681SAndroid Build Coastguard Worker} 110*9880d681SAndroid Build Coastguard Worker 111*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @divss(<4 x float> %va, float* %pb) { 112*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: divss: 113*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 114*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: divss (%rdi), %xmm0 115*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 116*9880d681SAndroid Build Coastguard Worker; 117*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: divss: 118*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 119*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0 120*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 121*9880d681SAndroid Build Coastguard Worker %a = extractelement <4 x float> %va, i32 0 122*9880d681SAndroid Build Coastguard Worker %b = load float, float* %pb 123*9880d681SAndroid Build Coastguard Worker %r = fdiv float %a, %b 124*9880d681SAndroid Build Coastguard Worker %vr = insertelement <4 x float> %va, float %r, i32 0 125*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vr 126*9880d681SAndroid Build Coastguard Worker} 127*9880d681SAndroid Build Coastguard Worker 128*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @divsd(<2 x double> %va, double* %pb) { 129*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: divsd: 130*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 131*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: divsd (%rdi), %xmm0 132*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 133*9880d681SAndroid Build Coastguard Worker; 134*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: divsd: 135*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 136*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 137*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 138*9880d681SAndroid Build Coastguard Worker %a = extractelement <2 x double> %va, i32 0 139*9880d681SAndroid Build Coastguard Worker %b = load double, double* %pb 140*9880d681SAndroid Build Coastguard Worker %r = fdiv double %a, %b 141*9880d681SAndroid Build Coastguard Worker %vr = insertelement <2 x double> %va, double %r, i32 0 142*9880d681SAndroid Build Coastguard Worker ret <2 x double> %vr 143*9880d681SAndroid Build Coastguard Worker} 144