1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=X32 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c 6 7define i32 @test_mm_cmpestra(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nounwind { 8; X32-LABEL: test_mm_cmpestra: 9; X32: # BB#0: 10; X32-NEXT: pushl %ebx 11; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 12; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 13; X32-NEXT: xorl %ebx, %ebx 14; X32-NEXT: pcmpestri $7, %xmm1, %xmm0 15; X32-NEXT: seta %bl 16; X32-NEXT: movl %ebx, %eax 17; X32-NEXT: popl %ebx 18; X32-NEXT: retl 19; 20; X64-LABEL: test_mm_cmpestra: 21; X64: # BB#0: 22; X64-NEXT: xorl %r8d, %r8d 23; X64-NEXT: movl %edi, %eax 24; X64-NEXT: movl %esi, %edx 25; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 26; X64-NEXT: seta %r8b 27; X64-NEXT: movl %r8d, %eax 28; X64-NEXT: retq 29 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 30 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 31 %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %arg0, i32 %a1, <16 x i8> %arg2, i32 %a3, i8 7) 32 ret i32 %res 33} 34declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 35 36define i32 @test_mm_cmpestrc(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) { 37; X32-LABEL: test_mm_cmpestrc: 38; X32: # BB#0: 39; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 40; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 41; X32-NEXT: pcmpestri $7, %xmm1, %xmm0 42; X32-NEXT: sbbl %eax, %eax 43; X32-NEXT: andl $1, %eax 44; X32-NEXT: retl 45; 46; X64-LABEL: test_mm_cmpestrc: 47; X64: # BB#0: 48; X64-NEXT: movl %edi, %eax 49; X64-NEXT: movl %esi, %edx 50; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 51; X64-NEXT: sbbl %eax, %eax 52; X64-NEXT: andl $1, %eax 53; X64-NEXT: retq 54 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 55 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 56 %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %arg0, i32 %a1, <16 x i8> %arg2, i32 %a3, i8 7) 57 ret i32 %res 58} 59declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 60 61define i32 @test_mm_cmpestri(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) { 62; X32-LABEL: test_mm_cmpestri: 63; X32: # BB#0: 64; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 65; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 66; X32-NEXT: pcmpestri $7, %xmm1, %xmm0 67; X32-NEXT: movl %ecx, %eax 68; X32-NEXT: retl 69; 70; X64-LABEL: test_mm_cmpestri: 71; X64: # BB#0: 72; X64-NEXT: movl %edi, %eax 73; X64-NEXT: movl %esi, %edx 74; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 75; X64-NEXT: movl %ecx, %eax 76; X64-NEXT: retq 77 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 78 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 79 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %arg0, i32 %a1, <16 x i8> %arg2, i32 %a3, i8 7) 80 ret i32 %res 81} 82declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 83 84define <2 x i64> @test_mm_cmpestrm(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) { 85; X32-LABEL: test_mm_cmpestrm: 86; X32: # BB#0: 87; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 88; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 89; X32-NEXT: pcmpestrm $7, %xmm1, %xmm0 90; X32-NEXT: retl 91; 92; X64-LABEL: test_mm_cmpestrm: 93; X64: # BB#0: 94; X64-NEXT: movl %edi, %eax 95; X64-NEXT: movl %esi, %edx 96; X64-NEXT: pcmpestrm $7, %xmm1, %xmm0 97; X64-NEXT: retq 98 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 99 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 100 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %arg0, i32 %a1, <16 x i8> %arg2, i32 %a3, i8 7) 101 %bc = bitcast <16 x i8> %res to <2 x i64> 102 ret <2 x i64> %bc 103} 104declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 105 106define i32 @test_mm_cmpestro(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nounwind { 107; X32-LABEL: test_mm_cmpestro: 108; X32: # BB#0: 109; X32-NEXT: pushl %ebx 110; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 111; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 112; X32-NEXT: xorl %ebx, %ebx 113; X32-NEXT: pcmpestri $7, %xmm1, %xmm0 114; X32-NEXT: seto %bl 115; X32-NEXT: movl %ebx, %eax 116; X32-NEXT: popl %ebx 117; X32-NEXT: retl 118; 119; X64-LABEL: test_mm_cmpestro: 120; X64: # BB#0: 121; X64-NEXT: xorl %r8d, %r8d 122; X64-NEXT: movl %edi, %eax 123; X64-NEXT: movl %esi, %edx 124; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 125; X64-NEXT: seto %r8b 126; X64-NEXT: movl %r8d, %eax 127; X64-NEXT: retq 128 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 129 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 130 %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %arg0, i32 %a1, <16 x i8> %arg2, i32 %a3, i8 7) 131 ret i32 %res 132} 133declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 134 135define i32 @test_mm_cmpestrs(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nounwind { 136; X32-LABEL: test_mm_cmpestrs: 137; X32: # BB#0: 138; X32-NEXT: pushl %ebx 139; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 140; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 141; X32-NEXT: xorl %ebx, %ebx 142; X32-NEXT: pcmpestri $7, %xmm1, %xmm0 143; X32-NEXT: sets %bl 144; X32-NEXT: movl %ebx, %eax 145; X32-NEXT: popl %ebx 146; X32-NEXT: retl 147; 148; X64-LABEL: test_mm_cmpestrs: 149; X64: # BB#0: 150; X64-NEXT: xorl %r8d, %r8d 151; X64-NEXT: movl %edi, %eax 152; X64-NEXT: movl %esi, %edx 153; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 154; X64-NEXT: sets %r8b 155; X64-NEXT: movl %r8d, %eax 156; X64-NEXT: retq 157 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 158 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 159 %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %arg0, i32 %a1, <16 x i8> %arg2, i32 %a3, i8 7) 160 ret i32 %res 161} 162declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 163 164define i32 @test_mm_cmpestrz(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nounwind { 165; X32-LABEL: test_mm_cmpestrz: 166; X32: # BB#0: 167; X32-NEXT: pushl %ebx 168; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 169; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 170; X32-NEXT: xorl %ebx, %ebx 171; X32-NEXT: pcmpestri $7, %xmm1, %xmm0 172; X32-NEXT: sete %bl 173; X32-NEXT: movl %ebx, %eax 174; X32-NEXT: popl %ebx 175; X32-NEXT: retl 176; 177; X64-LABEL: test_mm_cmpestrz: 178; X64: # BB#0: 179; X64-NEXT: xorl %r8d, %r8d 180; X64-NEXT: movl %edi, %eax 181; X64-NEXT: movl %esi, %edx 182; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 183; X64-NEXT: sete %r8b 184; X64-NEXT: movl %r8d, %eax 185; X64-NEXT: retq 186 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 187 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 188 %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %arg0, i32 %a1, <16 x i8> %arg2, i32 %a3, i8 7) 189 ret i32 %res 190} 191declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 192 193define <2 x i64> @test_mm_cmpgt_epi64(<2 x i64> %a0, <2 x i64> %a1) { 194; X32-LABEL: test_mm_cmpgt_epi64: 195; X32: # BB#0: 196; X32-NEXT: pcmpgtq %xmm1, %xmm0 197; X32-NEXT: retl 198; 199; X64-LABEL: test_mm_cmpgt_epi64: 200; X64: # BB#0: 201; X64-NEXT: pcmpgtq %xmm1, %xmm0 202; X64-NEXT: retq 203 %cmp = icmp sgt <2 x i64> %a0, %a1 204 %res = sext <2 x i1> %cmp to <2 x i64> 205 ret <2 x i64> %res 206} 207 208define i32 @test_mm_cmpistra(<2 x i64> %a0, <2 x i64> %a1) { 209; X32-LABEL: test_mm_cmpistra: 210; X32: # BB#0: 211; X32-NEXT: xorl %eax, %eax 212; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 213; X32-NEXT: seta %al 214; X32-NEXT: retl 215; 216; X64-LABEL: test_mm_cmpistra: 217; X64: # BB#0: 218; X64-NEXT: xorl %eax, %eax 219; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 220; X64-NEXT: seta %al 221; X64-NEXT: retq 222 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 223 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 224 %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) 225 ret i32 %res 226} 227declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone 228 229define i32 @test_mm_cmpistrc(<2 x i64> %a0, <2 x i64> %a1) { 230; X32-LABEL: test_mm_cmpistrc: 231; X32: # BB#0: 232; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 233; X32-NEXT: sbbl %eax, %eax 234; X32-NEXT: andl $1, %eax 235; X32-NEXT: retl 236; 237; X64-LABEL: test_mm_cmpistrc: 238; X64: # BB#0: 239; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 240; X64-NEXT: sbbl %eax, %eax 241; X64-NEXT: andl $1, %eax 242; X64-NEXT: retq 243 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 244 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 245 %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) 246 ret i32 %res 247} 248declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone 249 250define i32 @test_mm_cmpistri(<2 x i64> %a0, <2 x i64> %a1) { 251; X32-LABEL: test_mm_cmpistri: 252; X32: # BB#0: 253; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 254; X32-NEXT: movl %ecx, %eax 255; X32-NEXT: retl 256; 257; X64-LABEL: test_mm_cmpistri: 258; X64: # BB#0: 259; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 260; X64-NEXT: movl %ecx, %eax 261; X64-NEXT: retq 262 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 263 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 264 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) 265 ret i32 %res 266} 267declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone 268 269define <2 x i64> @test_mm_cmpistrm(<2 x i64> %a0, <2 x i64> %a1) { 270; X32-LABEL: test_mm_cmpistrm: 271; X32: # BB#0: 272; X32-NEXT: pcmpistrm $7, %xmm1, %xmm0 273; X32-NEXT: retl 274; 275; X64-LABEL: test_mm_cmpistrm: 276; X64: # BB#0: 277; X64-NEXT: pcmpistrm $7, %xmm1, %xmm0 278; X64-NEXT: retq 279 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 280 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 281 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) 282 %bc = bitcast <16 x i8> %res to <2 x i64> 283 ret <2 x i64> %bc 284} 285declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone 286 287define i32 @test_mm_cmpistro(<2 x i64> %a0, <2 x i64> %a1) { 288; X32-LABEL: test_mm_cmpistro: 289; X32: # BB#0: 290; X32-NEXT: xorl %eax, %eax 291; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 292; X32-NEXT: seto %al 293; X32-NEXT: retl 294; 295; X64-LABEL: test_mm_cmpistro: 296; X64: # BB#0: 297; X64-NEXT: xorl %eax, %eax 298; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 299; X64-NEXT: seto %al 300; X64-NEXT: retq 301 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 302 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 303 %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) 304 ret i32 %res 305} 306declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone 307 308define i32 @test_mm_cmpistrs(<2 x i64> %a0, <2 x i64> %a1) { 309; X32-LABEL: test_mm_cmpistrs: 310; X32: # BB#0: 311; X32-NEXT: xorl %eax, %eax 312; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 313; X32-NEXT: sets %al 314; X32-NEXT: retl 315; 316; X64-LABEL: test_mm_cmpistrs: 317; X64: # BB#0: 318; X64-NEXT: xorl %eax, %eax 319; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 320; X64-NEXT: sets %al 321; X64-NEXT: retq 322 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 323 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 324 %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) 325 ret i32 %res 326} 327declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone 328 329define i32 @test_mm_cmpistrz(<2 x i64> %a0, <2 x i64> %a1) { 330; X32-LABEL: test_mm_cmpistrz: 331; X32: # BB#0: 332; X32-NEXT: xorl %eax, %eax 333; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 334; X32-NEXT: sete %al 335; X32-NEXT: retl 336; 337; X64-LABEL: test_mm_cmpistrz: 338; X64: # BB#0: 339; X64-NEXT: xorl %eax, %eax 340; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 341; X64-NEXT: sete %al 342; X64-NEXT: retq 343 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 344 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 345 %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) 346 ret i32 %res 347} 348declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone 349 350define i32 @test_mm_crc32_u8(i32 %a0, i8 %a1) { 351; X32-LABEL: test_mm_crc32_u8: 352; X32: # BB#0: 353; X32-NEXT: movb {{[0-9]+}}(%esp), %cl 354; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 355; X32-NEXT: crc32b %cl, %eax 356; X32-NEXT: retl 357; 358; X64-LABEL: test_mm_crc32_u8: 359; X64: # BB#0: 360; X64-NEXT: crc32b %sil, %edi 361; X64-NEXT: movl %edi, %eax 362; X64-NEXT: retq 363 %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1) 364 ret i32 %res 365} 366declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone 367 368define i32 @test_mm_crc32_u16(i32 %a0, i16 %a1) { 369; X32-LABEL: test_mm_crc32_u16: 370; X32: # BB#0: 371; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 372; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 373; X32-NEXT: crc32w %cx, %eax 374; X32-NEXT: retl 375; 376; X64-LABEL: test_mm_crc32_u16: 377; X64: # BB#0: 378; X64-NEXT: crc32w %si, %edi 379; X64-NEXT: movl %edi, %eax 380; X64-NEXT: retq 381 %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1) 382 ret i32 %res 383} 384declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone 385 386define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) { 387; X32-LABEL: test_mm_crc32_u32: 388; X32: # BB#0: 389; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 390; X32-NEXT: crc32l {{[0-9]+}}(%esp), %eax 391; X32-NEXT: retl 392; 393; X64-LABEL: test_mm_crc32_u32: 394; X64: # BB#0: 395; X64-NEXT: crc32l %esi, %edi 396; X64-NEXT: movl %edi, %eax 397; X64-NEXT: retq 398 %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) 399 ret i32 %res 400} 401declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone 402