1*c0909341SAndroid Build Coastguard Worker;***************************************************************************** 2*c0909341SAndroid Build Coastguard Worker;* x86inc.asm: x86 abstraction layer 3*c0909341SAndroid Build Coastguard Worker;***************************************************************************** 4*c0909341SAndroid Build Coastguard Worker;* Copyright (C) 2005-2024 x264 project 5*c0909341SAndroid Build Coastguard Worker;* 6*c0909341SAndroid Build Coastguard Worker;* Authors: Loren Merritt <lorenm@u.washington.edu> 7*c0909341SAndroid Build Coastguard Worker;* Henrik Gramner <henrik@gramner.com> 8*c0909341SAndroid Build Coastguard Worker;* Anton Mitrofanov <BugMaster@narod.ru> 9*c0909341SAndroid Build Coastguard Worker;* Fiona Glaser <fiona@x264.com> 10*c0909341SAndroid Build Coastguard Worker;* 11*c0909341SAndroid Build Coastguard Worker;* Permission to use, copy, modify, and/or distribute this software for any 12*c0909341SAndroid Build Coastguard Worker;* purpose with or without fee is hereby granted, provided that the above 13*c0909341SAndroid Build Coastguard Worker;* copyright notice and this permission notice appear in all copies. 14*c0909341SAndroid Build Coastguard Worker;* 15*c0909341SAndroid Build Coastguard Worker;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 16*c0909341SAndroid Build Coastguard Worker;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 17*c0909341SAndroid Build Coastguard Worker;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 18*c0909341SAndroid Build Coastguard Worker;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19*c0909341SAndroid Build Coastguard Worker;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 20*c0909341SAndroid Build Coastguard Worker;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 21*c0909341SAndroid Build Coastguard Worker;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 22*c0909341SAndroid Build Coastguard Worker;***************************************************************************** 23*c0909341SAndroid Build Coastguard Worker 24*c0909341SAndroid Build Coastguard Worker; This is a header file for the x86inc.asm assembly language, which uses 25*c0909341SAndroid Build Coastguard Worker; NASM/YASM syntax combined with a large number of macros to provide easy 26*c0909341SAndroid Build Coastguard Worker; abstraction between different calling conventions (x86_32, win64, linux64). 27*c0909341SAndroid Build Coastguard Worker; It also has various other useful features to simplify writing the kind of 28*c0909341SAndroid Build Coastguard Worker; DSP functions that are most often used. 29*c0909341SAndroid Build Coastguard Worker 30*c0909341SAndroid Build Coastguard Worker%ifndef private_prefix 31*c0909341SAndroid Build Coastguard Worker %error private_prefix not defined 32*c0909341SAndroid Build Coastguard Worker%endif 33*c0909341SAndroid Build Coastguard Worker 34*c0909341SAndroid Build Coastguard Worker%ifndef public_prefix 35*c0909341SAndroid Build Coastguard Worker %define public_prefix private_prefix 36*c0909341SAndroid Build Coastguard Worker%endif 37*c0909341SAndroid Build Coastguard Worker 38*c0909341SAndroid Build Coastguard Worker%ifndef STACK_ALIGNMENT 39*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 40*c0909341SAndroid Build Coastguard Worker %define STACK_ALIGNMENT 16 41*c0909341SAndroid Build Coastguard Worker %else 42*c0909341SAndroid Build Coastguard Worker %define STACK_ALIGNMENT 4 43*c0909341SAndroid Build Coastguard Worker %endif 44*c0909341SAndroid Build Coastguard Worker%endif 45*c0909341SAndroid Build Coastguard Worker 46*c0909341SAndroid Build Coastguard Worker%define WIN64 0 47*c0909341SAndroid Build Coastguard Worker%define UNIX64 0 48*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 49*c0909341SAndroid Build Coastguard Worker %ifidn __OUTPUT_FORMAT__,win32 50*c0909341SAndroid Build Coastguard Worker %define WIN64 1 51*c0909341SAndroid Build Coastguard Worker %elifidn __OUTPUT_FORMAT__,win64 52*c0909341SAndroid Build Coastguard Worker %define WIN64 1 53*c0909341SAndroid Build Coastguard Worker %elifidn __OUTPUT_FORMAT__,x64 54*c0909341SAndroid Build Coastguard Worker %define WIN64 1 55*c0909341SAndroid Build Coastguard Worker %else 56*c0909341SAndroid Build Coastguard Worker %define UNIX64 1 57*c0909341SAndroid Build Coastguard Worker %endif 58*c0909341SAndroid Build Coastguard Worker%endif 59*c0909341SAndroid Build Coastguard Worker 60*c0909341SAndroid Build Coastguard Worker%define FORMAT_ELF 0 61*c0909341SAndroid Build Coastguard Worker%define FORMAT_MACHO 0 62*c0909341SAndroid Build Coastguard Worker%ifidn __OUTPUT_FORMAT__,elf 63*c0909341SAndroid Build Coastguard Worker %define FORMAT_ELF 1 64*c0909341SAndroid Build Coastguard Worker%elifidn __OUTPUT_FORMAT__,elf32 65*c0909341SAndroid Build Coastguard Worker %define FORMAT_ELF 1 66*c0909341SAndroid Build Coastguard Worker%elifidn __OUTPUT_FORMAT__,elf64 67*c0909341SAndroid Build Coastguard Worker %define FORMAT_ELF 1 68*c0909341SAndroid Build Coastguard Worker%elifidn __OUTPUT_FORMAT__,macho 69*c0909341SAndroid Build Coastguard Worker %define FORMAT_MACHO 1 70*c0909341SAndroid Build Coastguard Worker%elifidn __OUTPUT_FORMAT__,macho32 71*c0909341SAndroid Build Coastguard Worker %define FORMAT_MACHO 1 72*c0909341SAndroid Build Coastguard Worker%elifidn __OUTPUT_FORMAT__,macho64 73*c0909341SAndroid Build Coastguard Worker %define FORMAT_MACHO 1 74*c0909341SAndroid Build Coastguard Worker%endif 75*c0909341SAndroid Build Coastguard Worker 76*c0909341SAndroid Build Coastguard Worker%ifdef PREFIX 77*c0909341SAndroid Build Coastguard Worker %define mangle(x) _ %+ x 78*c0909341SAndroid Build Coastguard Worker%else 79*c0909341SAndroid Build Coastguard Worker %define mangle(x) x 80*c0909341SAndroid Build Coastguard Worker%endif 81*c0909341SAndroid Build Coastguard Worker 82*c0909341SAndroid Build Coastguard Worker; Use VEX-encoding even in non-AVX functions 83*c0909341SAndroid Build Coastguard Worker%ifndef FORCE_VEX_ENCODING 84*c0909341SAndroid Build Coastguard Worker %define FORCE_VEX_ENCODING 0 85*c0909341SAndroid Build Coastguard Worker%endif 86*c0909341SAndroid Build Coastguard Worker 87*c0909341SAndroid Build Coastguard Worker%macro SECTION_RODATA 0-1 16 88*c0909341SAndroid Build Coastguard Worker %ifidn __OUTPUT_FORMAT__,win32 89*c0909341SAndroid Build Coastguard Worker SECTION .rdata align=%1 90*c0909341SAndroid Build Coastguard Worker %elif WIN64 91*c0909341SAndroid Build Coastguard Worker SECTION .rdata align=%1 92*c0909341SAndroid Build Coastguard Worker %else 93*c0909341SAndroid Build Coastguard Worker SECTION .rodata align=%1 94*c0909341SAndroid Build Coastguard Worker %endif 95*c0909341SAndroid Build Coastguard Worker%endmacro 96*c0909341SAndroid Build Coastguard Worker 97*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 98*c0909341SAndroid Build Coastguard Worker %define PIC 1 ; always use PIC on x86-64 99*c0909341SAndroid Build Coastguard Worker default rel 100*c0909341SAndroid Build Coastguard Worker%elifidn __OUTPUT_FORMAT__,win32 101*c0909341SAndroid Build Coastguard Worker %define PIC 0 ; PIC isn't used on 32-bit Windows 102*c0909341SAndroid Build Coastguard Worker%elifndef PIC 103*c0909341SAndroid Build Coastguard Worker %define PIC 0 104*c0909341SAndroid Build Coastguard Worker%endif 105*c0909341SAndroid Build Coastguard Worker 106*c0909341SAndroid Build Coastguard Worker%define HAVE_PRIVATE_EXTERN 1 107*c0909341SAndroid Build Coastguard Worker%ifdef __NASM_VERSION_ID__ 108*c0909341SAndroid Build Coastguard Worker %use smartalign 109*c0909341SAndroid Build Coastguard Worker %if __NASM_VERSION_ID__ < 0x020e0000 ; 2.14 110*c0909341SAndroid Build Coastguard Worker %define HAVE_PRIVATE_EXTERN 0 111*c0909341SAndroid Build Coastguard Worker %endif 112*c0909341SAndroid Build Coastguard Worker%endif 113*c0909341SAndroid Build Coastguard Worker 114*c0909341SAndroid Build Coastguard Worker; Macros to eliminate most code duplication between x86_32 and x86_64: 115*c0909341SAndroid Build Coastguard Worker; Currently this works only for leaf functions which load all their arguments 116*c0909341SAndroid Build Coastguard Worker; into registers at the start, and make no other use of the stack. Luckily that 117*c0909341SAndroid Build Coastguard Worker; covers most use cases. 118*c0909341SAndroid Build Coastguard Worker 119*c0909341SAndroid Build Coastguard Worker; PROLOGUE: 120*c0909341SAndroid Build Coastguard Worker; %1 = number of arguments. loads them from stack if needed. 121*c0909341SAndroid Build Coastguard Worker; %2 = number of registers used. pushes callee-saved regs if needed. 122*c0909341SAndroid Build Coastguard Worker; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed. 123*c0909341SAndroid Build Coastguard Worker; %4 = (optional) stack size to be allocated. The stack will be aligned before 124*c0909341SAndroid Build Coastguard Worker; allocating the specified stack size. If the required stack alignment is 125*c0909341SAndroid Build Coastguard Worker; larger than the known stack alignment the stack will be manually aligned 126*c0909341SAndroid Build Coastguard Worker; and an extra register will be allocated to hold the original stack 127*c0909341SAndroid Build Coastguard Worker; pointer (to not invalidate r0m etc.). To prevent the use of an extra 128*c0909341SAndroid Build Coastguard Worker; register as stack pointer, request a negative stack size. 129*c0909341SAndroid Build Coastguard Worker; %4+/%5+ = list of names to define to registers 130*c0909341SAndroid Build Coastguard Worker; PROLOGUE can also be invoked by adding the same options to cglobal 131*c0909341SAndroid Build Coastguard Worker 132*c0909341SAndroid Build Coastguard Worker; e.g. 133*c0909341SAndroid Build Coastguard Worker; cglobal foo, 2,3,7,0x40, dst, src, tmp 134*c0909341SAndroid Build Coastguard Worker; declares a function (foo) that automatically loads two arguments (dst and 135*c0909341SAndroid Build Coastguard Worker; src) into registers, uses one additional register (tmp) plus 7 vector 136*c0909341SAndroid Build Coastguard Worker; registers (m0-m6) and allocates 0x40 bytes of stack space. 137*c0909341SAndroid Build Coastguard Worker 138*c0909341SAndroid Build Coastguard Worker; TODO Some functions can use some args directly from the stack. If they're the 139*c0909341SAndroid Build Coastguard Worker; last args then you can just not declare them, but if they're in the middle 140*c0909341SAndroid Build Coastguard Worker; we need more flexible macro. 141*c0909341SAndroid Build Coastguard Worker 142*c0909341SAndroid Build Coastguard Worker; RET: 143*c0909341SAndroid Build Coastguard Worker; Pops anything that was pushed by PROLOGUE, and returns. 144*c0909341SAndroid Build Coastguard Worker 145*c0909341SAndroid Build Coastguard Worker; REP_RET: 146*c0909341SAndroid Build Coastguard Worker; Use this instead of RET if it's a branch target. 147*c0909341SAndroid Build Coastguard Worker 148*c0909341SAndroid Build Coastguard Worker; registers: 149*c0909341SAndroid Build Coastguard Worker; rN and rNq are the native-size register holding function argument N 150*c0909341SAndroid Build Coastguard Worker; rNd, rNw, rNb are dword, word, and byte size 151*c0909341SAndroid Build Coastguard Worker; rNh is the high 8 bits of the word size 152*c0909341SAndroid Build Coastguard Worker; rNm is the original location of arg N (a register or on the stack), dword 153*c0909341SAndroid Build Coastguard Worker; rNmp is native size 154*c0909341SAndroid Build Coastguard Worker 155*c0909341SAndroid Build Coastguard Worker%macro DECLARE_REG 2-3 156*c0909341SAndroid Build Coastguard Worker %define r%1q %2 157*c0909341SAndroid Build Coastguard Worker %define r%1d %2d 158*c0909341SAndroid Build Coastguard Worker %define r%1w %2w 159*c0909341SAndroid Build Coastguard Worker %define r%1b %2b 160*c0909341SAndroid Build Coastguard Worker %define r%1h %2h 161*c0909341SAndroid Build Coastguard Worker %define %2q %2 162*c0909341SAndroid Build Coastguard Worker %if %0 == 2 163*c0909341SAndroid Build Coastguard Worker %define r%1m %2d 164*c0909341SAndroid Build Coastguard Worker %define r%1mp %2 165*c0909341SAndroid Build Coastguard Worker %elif ARCH_X86_64 ; memory 166*c0909341SAndroid Build Coastguard Worker %define r%1m [rstk + stack_offset + %3] 167*c0909341SAndroid Build Coastguard Worker %define r%1mp qword r %+ %1 %+ m 168*c0909341SAndroid Build Coastguard Worker %else 169*c0909341SAndroid Build Coastguard Worker %define r%1m [rstk + stack_offset + %3] 170*c0909341SAndroid Build Coastguard Worker %define r%1mp dword r %+ %1 %+ m 171*c0909341SAndroid Build Coastguard Worker %endif 172*c0909341SAndroid Build Coastguard Worker %define r%1 %2 173*c0909341SAndroid Build Coastguard Worker%endmacro 174*c0909341SAndroid Build Coastguard Worker 175*c0909341SAndroid Build Coastguard Worker%macro DECLARE_REG_SIZE 3 176*c0909341SAndroid Build Coastguard Worker %define r%1q r%1 177*c0909341SAndroid Build Coastguard Worker %define e%1q r%1 178*c0909341SAndroid Build Coastguard Worker %define r%1d e%1 179*c0909341SAndroid Build Coastguard Worker %define e%1d e%1 180*c0909341SAndroid Build Coastguard Worker %define r%1w %1 181*c0909341SAndroid Build Coastguard Worker %define e%1w %1 182*c0909341SAndroid Build Coastguard Worker %define r%1h %3 183*c0909341SAndroid Build Coastguard Worker %define e%1h %3 184*c0909341SAndroid Build Coastguard Worker %define r%1b %2 185*c0909341SAndroid Build Coastguard Worker %define e%1b %2 186*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 == 0 187*c0909341SAndroid Build Coastguard Worker %define r%1 e%1 188*c0909341SAndroid Build Coastguard Worker %endif 189*c0909341SAndroid Build Coastguard Worker%endmacro 190*c0909341SAndroid Build Coastguard Worker 191*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_SIZE ax, al, ah 192*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_SIZE bx, bl, bh 193*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_SIZE cx, cl, ch 194*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_SIZE dx, dl, dh 195*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_SIZE si, sil, null 196*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_SIZE di, dil, null 197*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_SIZE bp, bpl, null 198*c0909341SAndroid Build Coastguard Worker 199*c0909341SAndroid Build Coastguard Worker; t# defines for when per-arch register allocation is more complex than just function arguments 200*c0909341SAndroid Build Coastguard Worker 201*c0909341SAndroid Build Coastguard Worker%macro DECLARE_REG_TMP 1-* 202*c0909341SAndroid Build Coastguard Worker %assign %%i 0 203*c0909341SAndroid Build Coastguard Worker %rep %0 204*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE t, %%i, r%1 205*c0909341SAndroid Build Coastguard Worker %assign %%i %%i+1 206*c0909341SAndroid Build Coastguard Worker %rotate 1 207*c0909341SAndroid Build Coastguard Worker %endrep 208*c0909341SAndroid Build Coastguard Worker%endmacro 209*c0909341SAndroid Build Coastguard Worker 210*c0909341SAndroid Build Coastguard Worker%macro DECLARE_REG_TMP_SIZE 0-* 211*c0909341SAndroid Build Coastguard Worker %rep %0 212*c0909341SAndroid Build Coastguard Worker %define t%1q t%1 %+ q 213*c0909341SAndroid Build Coastguard Worker %define t%1d t%1 %+ d 214*c0909341SAndroid Build Coastguard Worker %define t%1w t%1 %+ w 215*c0909341SAndroid Build Coastguard Worker %define t%1h t%1 %+ h 216*c0909341SAndroid Build Coastguard Worker %define t%1b t%1 %+ b 217*c0909341SAndroid Build Coastguard Worker %rotate 1 218*c0909341SAndroid Build Coastguard Worker %endrep 219*c0909341SAndroid Build Coastguard Worker%endmacro 220*c0909341SAndroid Build Coastguard Worker 221*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 222*c0909341SAndroid Build Coastguard Worker 223*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 224*c0909341SAndroid Build Coastguard Worker %define gprsize 8 225*c0909341SAndroid Build Coastguard Worker%else 226*c0909341SAndroid Build Coastguard Worker %define gprsize 4 227*c0909341SAndroid Build Coastguard Worker%endif 228*c0909341SAndroid Build Coastguard Worker 229*c0909341SAndroid Build Coastguard Worker%macro LEA 2 230*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 231*c0909341SAndroid Build Coastguard Worker lea %1, [%2] 232*c0909341SAndroid Build Coastguard Worker%elif PIC 233*c0909341SAndroid Build Coastguard Worker call $+5 ; special-cased to not affect the RSB on most CPU:s 234*c0909341SAndroid Build Coastguard Worker pop %1 235*c0909341SAndroid Build Coastguard Worker add %1, -$+1+%2 236*c0909341SAndroid Build Coastguard Worker%else 237*c0909341SAndroid Build Coastguard Worker mov %1, %2 238*c0909341SAndroid Build Coastguard Worker%endif 239*c0909341SAndroid Build Coastguard Worker%endmacro 240*c0909341SAndroid Build Coastguard Worker 241*c0909341SAndroid Build Coastguard Worker; Repeats an instruction/operation for multiple arguments. 242*c0909341SAndroid Build Coastguard Worker; Example usage: "REPX {psrlw x, 8}, m0, m1, m2, m3" 243*c0909341SAndroid Build Coastguard Worker%macro REPX 2-* ; operation, args 244*c0909341SAndroid Build Coastguard Worker %xdefine %%f(x) %1 245*c0909341SAndroid Build Coastguard Worker %rep %0 - 1 246*c0909341SAndroid Build Coastguard Worker %rotate 1 247*c0909341SAndroid Build Coastguard Worker %%f(%1) 248*c0909341SAndroid Build Coastguard Worker %endrep 249*c0909341SAndroid Build Coastguard Worker%endmacro 250*c0909341SAndroid Build Coastguard Worker 251*c0909341SAndroid Build Coastguard Worker%macro PUSH 1 252*c0909341SAndroid Build Coastguard Worker push %1 253*c0909341SAndroid Build Coastguard Worker %ifidn rstk, rsp 254*c0909341SAndroid Build Coastguard Worker %assign stack_offset stack_offset+gprsize 255*c0909341SAndroid Build Coastguard Worker %endif 256*c0909341SAndroid Build Coastguard Worker%endmacro 257*c0909341SAndroid Build Coastguard Worker 258*c0909341SAndroid Build Coastguard Worker%macro POP 1 259*c0909341SAndroid Build Coastguard Worker pop %1 260*c0909341SAndroid Build Coastguard Worker %ifidn rstk, rsp 261*c0909341SAndroid Build Coastguard Worker %assign stack_offset stack_offset-gprsize 262*c0909341SAndroid Build Coastguard Worker %endif 263*c0909341SAndroid Build Coastguard Worker%endmacro 264*c0909341SAndroid Build Coastguard Worker 265*c0909341SAndroid Build Coastguard Worker%macro PUSH_IF_USED 1-* 266*c0909341SAndroid Build Coastguard Worker %rep %0 267*c0909341SAndroid Build Coastguard Worker %if %1 < regs_used 268*c0909341SAndroid Build Coastguard Worker PUSH r%1 269*c0909341SAndroid Build Coastguard Worker %endif 270*c0909341SAndroid Build Coastguard Worker %rotate 1 271*c0909341SAndroid Build Coastguard Worker %endrep 272*c0909341SAndroid Build Coastguard Worker%endmacro 273*c0909341SAndroid Build Coastguard Worker 274*c0909341SAndroid Build Coastguard Worker%macro POP_IF_USED 1-* 275*c0909341SAndroid Build Coastguard Worker %rep %0 276*c0909341SAndroid Build Coastguard Worker %if %1 < regs_used 277*c0909341SAndroid Build Coastguard Worker pop r%1 278*c0909341SAndroid Build Coastguard Worker %endif 279*c0909341SAndroid Build Coastguard Worker %rotate 1 280*c0909341SAndroid Build Coastguard Worker %endrep 281*c0909341SAndroid Build Coastguard Worker%endmacro 282*c0909341SAndroid Build Coastguard Worker 283*c0909341SAndroid Build Coastguard Worker%macro LOAD_IF_USED 1-* 284*c0909341SAndroid Build Coastguard Worker %rep %0 285*c0909341SAndroid Build Coastguard Worker %if %1 < num_args 286*c0909341SAndroid Build Coastguard Worker mov r%1, r %+ %1 %+ mp 287*c0909341SAndroid Build Coastguard Worker %endif 288*c0909341SAndroid Build Coastguard Worker %rotate 1 289*c0909341SAndroid Build Coastguard Worker %endrep 290*c0909341SAndroid Build Coastguard Worker%endmacro 291*c0909341SAndroid Build Coastguard Worker 292*c0909341SAndroid Build Coastguard Worker%macro SUB 2 293*c0909341SAndroid Build Coastguard Worker sub %1, %2 294*c0909341SAndroid Build Coastguard Worker %ifidn %1, rstk 295*c0909341SAndroid Build Coastguard Worker %assign stack_offset stack_offset+(%2) 296*c0909341SAndroid Build Coastguard Worker %endif 297*c0909341SAndroid Build Coastguard Worker%endmacro 298*c0909341SAndroid Build Coastguard Worker 299*c0909341SAndroid Build Coastguard Worker%macro ADD 2 300*c0909341SAndroid Build Coastguard Worker add %1, %2 301*c0909341SAndroid Build Coastguard Worker %ifidn %1, rstk 302*c0909341SAndroid Build Coastguard Worker %assign stack_offset stack_offset-(%2) 303*c0909341SAndroid Build Coastguard Worker %endif 304*c0909341SAndroid Build Coastguard Worker%endmacro 305*c0909341SAndroid Build Coastguard Worker 306*c0909341SAndroid Build Coastguard Worker%macro movifnidn 2 307*c0909341SAndroid Build Coastguard Worker %ifnidn %1, %2 308*c0909341SAndroid Build Coastguard Worker mov %1, %2 309*c0909341SAndroid Build Coastguard Worker %endif 310*c0909341SAndroid Build Coastguard Worker%endmacro 311*c0909341SAndroid Build Coastguard Worker 312*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 == 0 313*c0909341SAndroid Build Coastguard Worker %define movsxd movifnidn 314*c0909341SAndroid Build Coastguard Worker%endif 315*c0909341SAndroid Build Coastguard Worker 316*c0909341SAndroid Build Coastguard Worker%macro movsxdifnidn 2 317*c0909341SAndroid Build Coastguard Worker %ifnidn %1, %2 318*c0909341SAndroid Build Coastguard Worker movsxd %1, %2 319*c0909341SAndroid Build Coastguard Worker %endif 320*c0909341SAndroid Build Coastguard Worker%endmacro 321*c0909341SAndroid Build Coastguard Worker 322*c0909341SAndroid Build Coastguard Worker%macro ASSERT 1 323*c0909341SAndroid Build Coastguard Worker %if (%1) == 0 324*c0909341SAndroid Build Coastguard Worker %error assertion ``%1'' failed 325*c0909341SAndroid Build Coastguard Worker %endif 326*c0909341SAndroid Build Coastguard Worker%endmacro 327*c0909341SAndroid Build Coastguard Worker 328*c0909341SAndroid Build Coastguard Worker%macro DEFINE_ARGS 0-* 329*c0909341SAndroid Build Coastguard Worker %ifdef n_arg_names 330*c0909341SAndroid Build Coastguard Worker %assign %%i 0 331*c0909341SAndroid Build Coastguard Worker %rep n_arg_names 332*c0909341SAndroid Build Coastguard Worker CAT_UNDEF arg_name %+ %%i, q 333*c0909341SAndroid Build Coastguard Worker CAT_UNDEF arg_name %+ %%i, d 334*c0909341SAndroid Build Coastguard Worker CAT_UNDEF arg_name %+ %%i, w 335*c0909341SAndroid Build Coastguard Worker CAT_UNDEF arg_name %+ %%i, h 336*c0909341SAndroid Build Coastguard Worker CAT_UNDEF arg_name %+ %%i, b 337*c0909341SAndroid Build Coastguard Worker CAT_UNDEF arg_name %+ %%i, m 338*c0909341SAndroid Build Coastguard Worker CAT_UNDEF arg_name %+ %%i, mp 339*c0909341SAndroid Build Coastguard Worker CAT_UNDEF arg_name, %%i 340*c0909341SAndroid Build Coastguard Worker %assign %%i %%i+1 341*c0909341SAndroid Build Coastguard Worker %endrep 342*c0909341SAndroid Build Coastguard Worker %endif 343*c0909341SAndroid Build Coastguard Worker 344*c0909341SAndroid Build Coastguard Worker %xdefine %%stack_offset stack_offset 345*c0909341SAndroid Build Coastguard Worker %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine 346*c0909341SAndroid Build Coastguard Worker %assign %%i 0 347*c0909341SAndroid Build Coastguard Worker %rep %0 348*c0909341SAndroid Build Coastguard Worker %xdefine %1q r %+ %%i %+ q 349*c0909341SAndroid Build Coastguard Worker %xdefine %1d r %+ %%i %+ d 350*c0909341SAndroid Build Coastguard Worker %xdefine %1w r %+ %%i %+ w 351*c0909341SAndroid Build Coastguard Worker %xdefine %1h r %+ %%i %+ h 352*c0909341SAndroid Build Coastguard Worker %xdefine %1b r %+ %%i %+ b 353*c0909341SAndroid Build Coastguard Worker %xdefine %1m r %+ %%i %+ m 354*c0909341SAndroid Build Coastguard Worker %xdefine %1mp r %+ %%i %+ mp 355*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE arg_name, %%i, %1 356*c0909341SAndroid Build Coastguard Worker %assign %%i %%i+1 357*c0909341SAndroid Build Coastguard Worker %rotate 1 358*c0909341SAndroid Build Coastguard Worker %endrep 359*c0909341SAndroid Build Coastguard Worker %xdefine stack_offset %%stack_offset 360*c0909341SAndroid Build Coastguard Worker %assign n_arg_names %0 361*c0909341SAndroid Build Coastguard Worker%endmacro 362*c0909341SAndroid Build Coastguard Worker 363*c0909341SAndroid Build Coastguard Worker%define required_stack_alignment ((mmsize + 15) & ~15) 364*c0909341SAndroid Build Coastguard Worker%define vzeroupper_required (mmsize > 16 && (ARCH_X86_64 == 0 || xmm_regs_used > 16 || notcpuflag(avx512))) 365*c0909341SAndroid Build Coastguard Worker%define high_mm_regs (16*cpuflag(avx512)) 366*c0909341SAndroid Build Coastguard Worker 367*c0909341SAndroid Build Coastguard Worker; Large stack allocations on Windows need to use stack probing in order 368*c0909341SAndroid Build Coastguard Worker; to guarantee that all stack memory is committed before accessing it. 369*c0909341SAndroid Build Coastguard Worker; This is done by ensuring that the guard page(s) at the end of the 370*c0909341SAndroid Build Coastguard Worker; currently committed pages are touched prior to any pages beyond that. 371*c0909341SAndroid Build Coastguard Worker%if WIN64 372*c0909341SAndroid Build Coastguard Worker %assign STACK_PROBE_SIZE 8192 373*c0909341SAndroid Build Coastguard Worker%elifidn __OUTPUT_FORMAT__, win32 374*c0909341SAndroid Build Coastguard Worker %assign STACK_PROBE_SIZE 4096 375*c0909341SAndroid Build Coastguard Worker%else 376*c0909341SAndroid Build Coastguard Worker %assign STACK_PROBE_SIZE 0 377*c0909341SAndroid Build Coastguard Worker%endif 378*c0909341SAndroid Build Coastguard Worker 379*c0909341SAndroid Build Coastguard Worker%macro PROBE_STACK 1 ; stack_size 380*c0909341SAndroid Build Coastguard Worker %if STACK_PROBE_SIZE 381*c0909341SAndroid Build Coastguard Worker %assign %%i STACK_PROBE_SIZE 382*c0909341SAndroid Build Coastguard Worker %rep %1 / STACK_PROBE_SIZE 383*c0909341SAndroid Build Coastguard Worker mov eax, [rsp-%%i] 384*c0909341SAndroid Build Coastguard Worker %assign %%i %%i+STACK_PROBE_SIZE 385*c0909341SAndroid Build Coastguard Worker %endrep 386*c0909341SAndroid Build Coastguard Worker %endif 387*c0909341SAndroid Build Coastguard Worker%endmacro 388*c0909341SAndroid Build Coastguard Worker 389*c0909341SAndroid Build Coastguard Worker%macro RESET_STACK_STATE 0 390*c0909341SAndroid Build Coastguard Worker %ifidn rstk, rsp 391*c0909341SAndroid Build Coastguard Worker %assign stack_offset stack_offset - stack_size_padded 392*c0909341SAndroid Build Coastguard Worker %else 393*c0909341SAndroid Build Coastguard Worker %xdefine rstk rsp 394*c0909341SAndroid Build Coastguard Worker %endif 395*c0909341SAndroid Build Coastguard Worker %assign stack_size 0 396*c0909341SAndroid Build Coastguard Worker %assign stack_size_padded 0 397*c0909341SAndroid Build Coastguard Worker %assign xmm_regs_used 0 398*c0909341SAndroid Build Coastguard Worker%endmacro 399*c0909341SAndroid Build Coastguard Worker 400*c0909341SAndroid Build Coastguard Worker%macro ALLOC_STACK 0-2 0, 0 ; stack_size, n_xmm_regs 401*c0909341SAndroid Build Coastguard Worker RESET_STACK_STATE 402*c0909341SAndroid Build Coastguard Worker %ifnum %2 403*c0909341SAndroid Build Coastguard Worker %if mmsize != 8 404*c0909341SAndroid Build Coastguard Worker %assign xmm_regs_used %2 405*c0909341SAndroid Build Coastguard Worker %endif 406*c0909341SAndroid Build Coastguard Worker %endif 407*c0909341SAndroid Build Coastguard Worker %ifnum %1 408*c0909341SAndroid Build Coastguard Worker %if %1 != 0 409*c0909341SAndroid Build Coastguard Worker %assign %%pad 0 410*c0909341SAndroid Build Coastguard Worker %assign stack_size %1 411*c0909341SAndroid Build Coastguard Worker %if stack_size < 0 412*c0909341SAndroid Build Coastguard Worker %assign stack_size -stack_size 413*c0909341SAndroid Build Coastguard Worker %endif 414*c0909341SAndroid Build Coastguard Worker %if WIN64 415*c0909341SAndroid Build Coastguard Worker %assign %%pad %%pad + 32 ; shadow space 416*c0909341SAndroid Build Coastguard Worker %if xmm_regs_used > 8 417*c0909341SAndroid Build Coastguard Worker %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers 418*c0909341SAndroid Build Coastguard Worker %endif 419*c0909341SAndroid Build Coastguard Worker %endif 420*c0909341SAndroid Build Coastguard Worker %if required_stack_alignment <= STACK_ALIGNMENT 421*c0909341SAndroid Build Coastguard Worker ; maintain the current stack alignment 422*c0909341SAndroid Build Coastguard Worker %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1)) 423*c0909341SAndroid Build Coastguard Worker PROBE_STACK stack_size_padded 424*c0909341SAndroid Build Coastguard Worker SUB rsp, stack_size_padded 425*c0909341SAndroid Build Coastguard Worker %else 426*c0909341SAndroid Build Coastguard Worker %assign %%reg_num (regs_used - 1) 427*c0909341SAndroid Build Coastguard Worker %xdefine rstk r %+ %%reg_num 428*c0909341SAndroid Build Coastguard Worker ; align stack, and save original stack location directly above 429*c0909341SAndroid Build Coastguard Worker ; it, i.e. in [rsp+stack_size_padded], so we can restore the 430*c0909341SAndroid Build Coastguard Worker ; stack in a single instruction (i.e. mov rsp, rstk or mov 431*c0909341SAndroid Build Coastguard Worker ; rsp, [rsp+stack_size_padded]) 432*c0909341SAndroid Build Coastguard Worker %if %1 < 0 ; need to store rsp on stack 433*c0909341SAndroid Build Coastguard Worker %xdefine rstkm [rsp + stack_size + %%pad] 434*c0909341SAndroid Build Coastguard Worker %assign %%pad %%pad + gprsize 435*c0909341SAndroid Build Coastguard Worker %else ; can keep rsp in rstk during whole function 436*c0909341SAndroid Build Coastguard Worker %xdefine rstkm rstk 437*c0909341SAndroid Build Coastguard Worker %endif 438*c0909341SAndroid Build Coastguard Worker %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1)) 439*c0909341SAndroid Build Coastguard Worker PROBE_STACK stack_size_padded 440*c0909341SAndroid Build Coastguard Worker mov rstk, rsp 441*c0909341SAndroid Build Coastguard Worker and rsp, ~(required_stack_alignment-1) 442*c0909341SAndroid Build Coastguard Worker sub rsp, stack_size_padded 443*c0909341SAndroid Build Coastguard Worker movifnidn rstkm, rstk 444*c0909341SAndroid Build Coastguard Worker %endif 445*c0909341SAndroid Build Coastguard Worker WIN64_PUSH_XMM 446*c0909341SAndroid Build Coastguard Worker %endif 447*c0909341SAndroid Build Coastguard Worker %endif 448*c0909341SAndroid Build Coastguard Worker%endmacro 449*c0909341SAndroid Build Coastguard Worker 450*c0909341SAndroid Build Coastguard Worker%macro SETUP_STACK_POINTER 0-1 0 451*c0909341SAndroid Build Coastguard Worker %ifnum %1 452*c0909341SAndroid Build Coastguard Worker %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT 453*c0909341SAndroid Build Coastguard Worker %if %1 > 0 454*c0909341SAndroid Build Coastguard Worker ; Reserve an additional register for storing the original stack pointer, but avoid using 455*c0909341SAndroid Build Coastguard Worker ; eax/rax for this purpose since it can potentially get overwritten as a return value. 456*c0909341SAndroid Build Coastguard Worker %assign regs_used (regs_used + 1) 457*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 && regs_used == 7 458*c0909341SAndroid Build Coastguard Worker %assign regs_used 8 459*c0909341SAndroid Build Coastguard Worker %elif ARCH_X86_64 == 0 && regs_used == 1 460*c0909341SAndroid Build Coastguard Worker %assign regs_used 2 461*c0909341SAndroid Build Coastguard Worker %endif 462*c0909341SAndroid Build Coastguard Worker %endif 463*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3 464*c0909341SAndroid Build Coastguard Worker ; Ensure that we don't clobber any registers containing arguments. For UNIX64 we also preserve r6 (rax) 465*c0909341SAndroid Build Coastguard Worker ; since it's used as a hidden argument in vararg functions to specify the number of vector registers used. 466*c0909341SAndroid Build Coastguard Worker %assign regs_used 5 + UNIX64 * 3 467*c0909341SAndroid Build Coastguard Worker %endif 468*c0909341SAndroid Build Coastguard Worker %endif 469*c0909341SAndroid Build Coastguard Worker %endif 470*c0909341SAndroid Build Coastguard Worker%endmacro 471*c0909341SAndroid Build Coastguard Worker 472*c0909341SAndroid Build Coastguard Worker%if WIN64 ; Windows x64 ;================================================= 473*c0909341SAndroid Build Coastguard Worker 474*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 0, rcx 475*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 1, rdx 476*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 2, R8 477*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 3, R9 478*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 4, R10, 40 479*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 5, R11, 48 480*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 6, rax, 56 481*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 7, rdi, 64 482*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 8, rsi, 72 483*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 9, rbx, 80 484*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 10, rbp, 88 485*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 11, R14, 96 486*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 12, R15, 104 487*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 13, R12, 112 488*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 14, R13, 120 489*c0909341SAndroid Build Coastguard Worker 490*c0909341SAndroid Build Coastguard Worker%macro PROLOGUE 2-5+ 0, 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names... 491*c0909341SAndroid Build Coastguard Worker %assign num_args %1 492*c0909341SAndroid Build Coastguard Worker %assign regs_used %2 493*c0909341SAndroid Build Coastguard Worker ASSERT regs_used >= num_args 494*c0909341SAndroid Build Coastguard Worker SETUP_STACK_POINTER %4 495*c0909341SAndroid Build Coastguard Worker ASSERT regs_used <= 15 496*c0909341SAndroid Build Coastguard Worker PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14 497*c0909341SAndroid Build Coastguard Worker ALLOC_STACK %4, %3 498*c0909341SAndroid Build Coastguard Worker %if mmsize != 8 && stack_size == 0 499*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM %3 500*c0909341SAndroid Build Coastguard Worker %endif 501*c0909341SAndroid Build Coastguard Worker LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 502*c0909341SAndroid Build Coastguard Worker %if %0 > 4 503*c0909341SAndroid Build Coastguard Worker %ifnum %4 504*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS %5 505*c0909341SAndroid Build Coastguard Worker %else 506*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS %4, %5 507*c0909341SAndroid Build Coastguard Worker %endif 508*c0909341SAndroid Build Coastguard Worker %elifnnum %4 509*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS %4 510*c0909341SAndroid Build Coastguard Worker %endif 511*c0909341SAndroid Build Coastguard Worker%endmacro 512*c0909341SAndroid Build Coastguard Worker 513*c0909341SAndroid Build Coastguard Worker; Push XMM registers to the stack. If no argument is specified all used register 514*c0909341SAndroid Build Coastguard Worker; will be pushed, otherwise only push previously unpushed registers. 515*c0909341SAndroid Build Coastguard Worker%macro WIN64_PUSH_XMM 0-2 ; new_xmm_regs_used, xmm_regs_pushed 516*c0909341SAndroid Build Coastguard Worker %if mmsize != 8 517*c0909341SAndroid Build Coastguard Worker %if %0 == 2 518*c0909341SAndroid Build Coastguard Worker %assign %%pushed %2 519*c0909341SAndroid Build Coastguard Worker %assign xmm_regs_used %1 520*c0909341SAndroid Build Coastguard Worker %elif %0 == 1 521*c0909341SAndroid Build Coastguard Worker %assign %%pushed xmm_regs_used 522*c0909341SAndroid Build Coastguard Worker %assign xmm_regs_used %1 523*c0909341SAndroid Build Coastguard Worker %else 524*c0909341SAndroid Build Coastguard Worker %assign %%pushed 0 525*c0909341SAndroid Build Coastguard Worker %endif 526*c0909341SAndroid Build Coastguard Worker ; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated. 527*c0909341SAndroid Build Coastguard Worker %if %%pushed <= 6 + high_mm_regs && xmm_regs_used > 6 + high_mm_regs 528*c0909341SAndroid Build Coastguard Worker movaps [rstk + stack_offset + 8], xmm6 529*c0909341SAndroid Build Coastguard Worker %endif 530*c0909341SAndroid Build Coastguard Worker %if %%pushed <= 7 + high_mm_regs && xmm_regs_used > 7 + high_mm_regs 531*c0909341SAndroid Build Coastguard Worker movaps [rstk + stack_offset + 24], xmm7 532*c0909341SAndroid Build Coastguard Worker %endif 533*c0909341SAndroid Build Coastguard Worker %assign %%pushed %%pushed - high_mm_regs - 8 534*c0909341SAndroid Build Coastguard Worker %if %%pushed < 0 535*c0909341SAndroid Build Coastguard Worker %assign %%pushed 0 536*c0909341SAndroid Build Coastguard Worker %endif 537*c0909341SAndroid Build Coastguard Worker %assign %%regs_to_push xmm_regs_used - %%pushed - high_mm_regs - 8 538*c0909341SAndroid Build Coastguard Worker %if %%regs_to_push > 0 539*c0909341SAndroid Build Coastguard Worker ASSERT (%%regs_to_push + %%pushed) * 16 <= stack_size_padded - stack_size - 32 540*c0909341SAndroid Build Coastguard Worker %assign %%i %%pushed + 8 541*c0909341SAndroid Build Coastguard Worker %rep %%regs_to_push 542*c0909341SAndroid Build Coastguard Worker movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i 543*c0909341SAndroid Build Coastguard Worker %assign %%i %%i+1 544*c0909341SAndroid Build Coastguard Worker %endrep 545*c0909341SAndroid Build Coastguard Worker %endif 546*c0909341SAndroid Build Coastguard Worker %endif 547*c0909341SAndroid Build Coastguard Worker%endmacro 548*c0909341SAndroid Build Coastguard Worker 549*c0909341SAndroid Build Coastguard Worker; Allocated stack space for XMM registers and push all, or a subset, of those 550*c0909341SAndroid Build Coastguard Worker%macro WIN64_SPILL_XMM 1-2 ; xmm_regs_used, xmm_regs_reserved 551*c0909341SAndroid Build Coastguard Worker RESET_STACK_STATE 552*c0909341SAndroid Build Coastguard Worker %if mmsize != 8 553*c0909341SAndroid Build Coastguard Worker %assign xmm_regs_used %1 554*c0909341SAndroid Build Coastguard Worker ASSERT xmm_regs_used <= 16 + high_mm_regs 555*c0909341SAndroid Build Coastguard Worker %if %0 == 2 556*c0909341SAndroid Build Coastguard Worker ASSERT %2 >= %1 557*c0909341SAndroid Build Coastguard Worker %assign %%xmm_regs_on_stack %2 - high_mm_regs - 8 558*c0909341SAndroid Build Coastguard Worker %else 559*c0909341SAndroid Build Coastguard Worker %assign %%xmm_regs_on_stack %1 - high_mm_regs - 8 560*c0909341SAndroid Build Coastguard Worker %endif 561*c0909341SAndroid Build Coastguard Worker %if %%xmm_regs_on_stack > 0 562*c0909341SAndroid Build Coastguard Worker ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack. 563*c0909341SAndroid Build Coastguard Worker %assign %%pad %%xmm_regs_on_stack*16 + 32 564*c0909341SAndroid Build Coastguard Worker %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1)) 565*c0909341SAndroid Build Coastguard Worker SUB rsp, stack_size_padded 566*c0909341SAndroid Build Coastguard Worker %endif 567*c0909341SAndroid Build Coastguard Worker WIN64_PUSH_XMM 568*c0909341SAndroid Build Coastguard Worker %endif 569*c0909341SAndroid Build Coastguard Worker%endmacro 570*c0909341SAndroid Build Coastguard Worker 571*c0909341SAndroid Build Coastguard Worker%macro WIN64_RESTORE_XMM_INTERNAL 0 572*c0909341SAndroid Build Coastguard Worker %assign %%pad_size 0 573*c0909341SAndroid Build Coastguard Worker %assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8 574*c0909341SAndroid Build Coastguard Worker %if %%xmm_regs_on_stack > 0 575*c0909341SAndroid Build Coastguard Worker %assign %%i xmm_regs_used - high_mm_regs 576*c0909341SAndroid Build Coastguard Worker %rep %%xmm_regs_on_stack 577*c0909341SAndroid Build Coastguard Worker %assign %%i %%i-1 578*c0909341SAndroid Build Coastguard Worker movaps xmm %+ %%i, [rsp + (%%i-8)*16 + stack_size + 32] 579*c0909341SAndroid Build Coastguard Worker %endrep 580*c0909341SAndroid Build Coastguard Worker %endif 581*c0909341SAndroid Build Coastguard Worker %if stack_size_padded > 0 582*c0909341SAndroid Build Coastguard Worker %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT 583*c0909341SAndroid Build Coastguard Worker mov rsp, rstkm 584*c0909341SAndroid Build Coastguard Worker %else 585*c0909341SAndroid Build Coastguard Worker add rsp, stack_size_padded 586*c0909341SAndroid Build Coastguard Worker %assign %%pad_size stack_size_padded 587*c0909341SAndroid Build Coastguard Worker %endif 588*c0909341SAndroid Build Coastguard Worker %endif 589*c0909341SAndroid Build Coastguard Worker %if xmm_regs_used > 7 + high_mm_regs 590*c0909341SAndroid Build Coastguard Worker movaps xmm7, [rsp + stack_offset - %%pad_size + 24] 591*c0909341SAndroid Build Coastguard Worker %endif 592*c0909341SAndroid Build Coastguard Worker %if xmm_regs_used > 6 + high_mm_regs 593*c0909341SAndroid Build Coastguard Worker movaps xmm6, [rsp + stack_offset - %%pad_size + 8] 594*c0909341SAndroid Build Coastguard Worker %endif 595*c0909341SAndroid Build Coastguard Worker%endmacro 596*c0909341SAndroid Build Coastguard Worker 597*c0909341SAndroid Build Coastguard Worker%macro WIN64_RESTORE_XMM 0 598*c0909341SAndroid Build Coastguard Worker WIN64_RESTORE_XMM_INTERNAL 599*c0909341SAndroid Build Coastguard Worker RESET_STACK_STATE 600*c0909341SAndroid Build Coastguard Worker%endmacro 601*c0909341SAndroid Build Coastguard Worker 602*c0909341SAndroid Build Coastguard Worker%define has_epilogue regs_used > 7 || stack_size > 0 || vzeroupper_required || xmm_regs_used > 6+high_mm_regs 603*c0909341SAndroid Build Coastguard Worker 604*c0909341SAndroid Build Coastguard Worker%macro RET 0 605*c0909341SAndroid Build Coastguard Worker WIN64_RESTORE_XMM_INTERNAL 606*c0909341SAndroid Build Coastguard Worker POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7 607*c0909341SAndroid Build Coastguard Worker %if vzeroupper_required 608*c0909341SAndroid Build Coastguard Worker vzeroupper 609*c0909341SAndroid Build Coastguard Worker %endif 610*c0909341SAndroid Build Coastguard Worker AUTO_REP_RET 611*c0909341SAndroid Build Coastguard Worker%endmacro 612*c0909341SAndroid Build Coastguard Worker 613*c0909341SAndroid Build Coastguard Worker%elif ARCH_X86_64 ; *nix x64 ;============================================= 614*c0909341SAndroid Build Coastguard Worker 615*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 0, rdi 616*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 1, rsi 617*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 2, rdx 618*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 3, rcx 619*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 4, R8 620*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 5, R9 621*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 6, rax, 8 622*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 7, R10, 16 623*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 8, R11, 24 624*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 9, rbx, 32 625*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 10, rbp, 40 626*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 11, R14, 48 627*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 12, R15, 56 628*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 13, R12, 64 629*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 14, R13, 72 630*c0909341SAndroid Build Coastguard Worker 631*c0909341SAndroid Build Coastguard Worker%macro PROLOGUE 2-5+ 0, 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names... 632*c0909341SAndroid Build Coastguard Worker %assign num_args %1 633*c0909341SAndroid Build Coastguard Worker %assign regs_used %2 634*c0909341SAndroid Build Coastguard Worker ASSERT regs_used >= num_args 635*c0909341SAndroid Build Coastguard Worker SETUP_STACK_POINTER %4 636*c0909341SAndroid Build Coastguard Worker ASSERT regs_used <= 15 637*c0909341SAndroid Build Coastguard Worker PUSH_IF_USED 9, 10, 11, 12, 13, 14 638*c0909341SAndroid Build Coastguard Worker ALLOC_STACK %4, %3 639*c0909341SAndroid Build Coastguard Worker LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14 640*c0909341SAndroid Build Coastguard Worker %if %0 > 4 641*c0909341SAndroid Build Coastguard Worker %ifnum %4 642*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS %5 643*c0909341SAndroid Build Coastguard Worker %else 644*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS %4, %5 645*c0909341SAndroid Build Coastguard Worker %endif 646*c0909341SAndroid Build Coastguard Worker %elifnnum %4 647*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS %4 648*c0909341SAndroid Build Coastguard Worker %endif 649*c0909341SAndroid Build Coastguard Worker%endmacro 650*c0909341SAndroid Build Coastguard Worker 651*c0909341SAndroid Build Coastguard Worker%define has_epilogue regs_used > 9 || stack_size > 0 || vzeroupper_required 652*c0909341SAndroid Build Coastguard Worker 653*c0909341SAndroid Build Coastguard Worker%macro RET 0 654*c0909341SAndroid Build Coastguard Worker %if stack_size_padded > 0 655*c0909341SAndroid Build Coastguard Worker %if required_stack_alignment > STACK_ALIGNMENT 656*c0909341SAndroid Build Coastguard Worker mov rsp, rstkm 657*c0909341SAndroid Build Coastguard Worker %else 658*c0909341SAndroid Build Coastguard Worker add rsp, stack_size_padded 659*c0909341SAndroid Build Coastguard Worker %endif 660*c0909341SAndroid Build Coastguard Worker %endif 661*c0909341SAndroid Build Coastguard Worker POP_IF_USED 14, 13, 12, 11, 10, 9 662*c0909341SAndroid Build Coastguard Worker %if vzeroupper_required 663*c0909341SAndroid Build Coastguard Worker vzeroupper 664*c0909341SAndroid Build Coastguard Worker %endif 665*c0909341SAndroid Build Coastguard Worker AUTO_REP_RET 666*c0909341SAndroid Build Coastguard Worker%endmacro 667*c0909341SAndroid Build Coastguard Worker 668*c0909341SAndroid Build Coastguard Worker%else ; X86_32 ;============================================================== 669*c0909341SAndroid Build Coastguard Worker 670*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 0, eax, 4 671*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 1, ecx, 8 672*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 2, edx, 12 673*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 3, ebx, 16 674*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 4, esi, 20 675*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 5, edi, 24 676*c0909341SAndroid Build Coastguard WorkerDECLARE_REG 6, ebp, 28 677*c0909341SAndroid Build Coastguard Worker%define rsp esp 678*c0909341SAndroid Build Coastguard Worker 679*c0909341SAndroid Build Coastguard Worker%macro DECLARE_ARG 1-* 680*c0909341SAndroid Build Coastguard Worker %rep %0 681*c0909341SAndroid Build Coastguard Worker %define r%1m [rstk + stack_offset + 4*%1 + 4] 682*c0909341SAndroid Build Coastguard Worker %define r%1mp dword r%1m 683*c0909341SAndroid Build Coastguard Worker %rotate 1 684*c0909341SAndroid Build Coastguard Worker %endrep 685*c0909341SAndroid Build Coastguard Worker%endmacro 686*c0909341SAndroid Build Coastguard Worker 687*c0909341SAndroid Build Coastguard WorkerDECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 688*c0909341SAndroid Build Coastguard Worker 689*c0909341SAndroid Build Coastguard Worker%macro PROLOGUE 2-5+ 0, 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names... 690*c0909341SAndroid Build Coastguard Worker %assign num_args %1 691*c0909341SAndroid Build Coastguard Worker %assign regs_used %2 692*c0909341SAndroid Build Coastguard Worker ASSERT regs_used >= num_args 693*c0909341SAndroid Build Coastguard Worker %if num_args > 7 694*c0909341SAndroid Build Coastguard Worker %assign num_args 7 695*c0909341SAndroid Build Coastguard Worker %endif 696*c0909341SAndroid Build Coastguard Worker %if regs_used > 7 697*c0909341SAndroid Build Coastguard Worker %assign regs_used 7 698*c0909341SAndroid Build Coastguard Worker %endif 699*c0909341SAndroid Build Coastguard Worker SETUP_STACK_POINTER %4 700*c0909341SAndroid Build Coastguard Worker ASSERT regs_used <= 7 701*c0909341SAndroid Build Coastguard Worker PUSH_IF_USED 3, 4, 5, 6 702*c0909341SAndroid Build Coastguard Worker ALLOC_STACK %4, %3 703*c0909341SAndroid Build Coastguard Worker LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6 704*c0909341SAndroid Build Coastguard Worker %if %0 > 4 705*c0909341SAndroid Build Coastguard Worker %ifnum %4 706*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS %5 707*c0909341SAndroid Build Coastguard Worker %else 708*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS %4, %5 709*c0909341SAndroid Build Coastguard Worker %endif 710*c0909341SAndroid Build Coastguard Worker %elifnnum %4 711*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS %4 712*c0909341SAndroid Build Coastguard Worker %endif 713*c0909341SAndroid Build Coastguard Worker%endmacro 714*c0909341SAndroid Build Coastguard Worker 715*c0909341SAndroid Build Coastguard Worker%define has_epilogue regs_used > 3 || stack_size > 0 || vzeroupper_required 716*c0909341SAndroid Build Coastguard Worker 717*c0909341SAndroid Build Coastguard Worker%macro RET 0 718*c0909341SAndroid Build Coastguard Worker %if stack_size_padded > 0 719*c0909341SAndroid Build Coastguard Worker %if required_stack_alignment > STACK_ALIGNMENT 720*c0909341SAndroid Build Coastguard Worker mov rsp, rstkm 721*c0909341SAndroid Build Coastguard Worker %else 722*c0909341SAndroid Build Coastguard Worker add rsp, stack_size_padded 723*c0909341SAndroid Build Coastguard Worker %endif 724*c0909341SAndroid Build Coastguard Worker %endif 725*c0909341SAndroid Build Coastguard Worker POP_IF_USED 6, 5, 4, 3 726*c0909341SAndroid Build Coastguard Worker %if vzeroupper_required 727*c0909341SAndroid Build Coastguard Worker vzeroupper 728*c0909341SAndroid Build Coastguard Worker %endif 729*c0909341SAndroid Build Coastguard Worker AUTO_REP_RET 730*c0909341SAndroid Build Coastguard Worker%endmacro 731*c0909341SAndroid Build Coastguard Worker 732*c0909341SAndroid Build Coastguard Worker%endif ;====================================================================== 733*c0909341SAndroid Build Coastguard Worker 734*c0909341SAndroid Build Coastguard Worker%if WIN64 == 0 735*c0909341SAndroid Build Coastguard Worker %macro WIN64_SPILL_XMM 1-2 736*c0909341SAndroid Build Coastguard Worker RESET_STACK_STATE 737*c0909341SAndroid Build Coastguard Worker %if mmsize != 8 738*c0909341SAndroid Build Coastguard Worker %assign xmm_regs_used %1 739*c0909341SAndroid Build Coastguard Worker %endif 740*c0909341SAndroid Build Coastguard Worker %endmacro 741*c0909341SAndroid Build Coastguard Worker %macro WIN64_RESTORE_XMM 0 742*c0909341SAndroid Build Coastguard Worker RESET_STACK_STATE 743*c0909341SAndroid Build Coastguard Worker %endmacro 744*c0909341SAndroid Build Coastguard Worker %macro WIN64_PUSH_XMM 0-2 745*c0909341SAndroid Build Coastguard Worker %if mmsize != 8 && %0 >= 1 746*c0909341SAndroid Build Coastguard Worker %assign xmm_regs_used %1 747*c0909341SAndroid Build Coastguard Worker %endif 748*c0909341SAndroid Build Coastguard Worker %endmacro 749*c0909341SAndroid Build Coastguard Worker%endif 750*c0909341SAndroid Build Coastguard Worker 751*c0909341SAndroid Build Coastguard Worker; On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either 752*c0909341SAndroid Build Coastguard Worker; a branch or a branch target. So switch to a 2-byte form of ret in that case. 753*c0909341SAndroid Build Coastguard Worker; We can automatically detect "follows a branch", but not a branch target. 754*c0909341SAndroid Build Coastguard Worker; (SSSE3 is a sufficient condition to know that your cpu doesn't have this problem.) 755*c0909341SAndroid Build Coastguard Worker%macro REP_RET 0 756*c0909341SAndroid Build Coastguard Worker %if has_epilogue || cpuflag(ssse3) 757*c0909341SAndroid Build Coastguard Worker RET 758*c0909341SAndroid Build Coastguard Worker %else 759*c0909341SAndroid Build Coastguard Worker rep ret 760*c0909341SAndroid Build Coastguard Worker %endif 761*c0909341SAndroid Build Coastguard Worker annotate_function_size 762*c0909341SAndroid Build Coastguard Worker%endmacro 763*c0909341SAndroid Build Coastguard Worker 764*c0909341SAndroid Build Coastguard Worker%define last_branch_adr $$ 765*c0909341SAndroid Build Coastguard Worker%macro AUTO_REP_RET 0 766*c0909341SAndroid Build Coastguard Worker %if notcpuflag(ssse3) 767*c0909341SAndroid Build Coastguard Worker times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ == last_branch_adr. 768*c0909341SAndroid Build Coastguard Worker %endif 769*c0909341SAndroid Build Coastguard Worker ret 770*c0909341SAndroid Build Coastguard Worker annotate_function_size 771*c0909341SAndroid Build Coastguard Worker%endmacro 772*c0909341SAndroid Build Coastguard Worker 773*c0909341SAndroid Build Coastguard Worker%macro BRANCH_INSTR 0-* 774*c0909341SAndroid Build Coastguard Worker %rep %0 775*c0909341SAndroid Build Coastguard Worker %macro %1 1-2 %1 776*c0909341SAndroid Build Coastguard Worker %2 %1 777*c0909341SAndroid Build Coastguard Worker %if notcpuflag(ssse3) 778*c0909341SAndroid Build Coastguard Worker %%branch_instr equ $ 779*c0909341SAndroid Build Coastguard Worker %xdefine last_branch_adr %%branch_instr 780*c0909341SAndroid Build Coastguard Worker %endif 781*c0909341SAndroid Build Coastguard Worker %endmacro 782*c0909341SAndroid Build Coastguard Worker %rotate 1 783*c0909341SAndroid Build Coastguard Worker %endrep 784*c0909341SAndroid Build Coastguard Worker%endmacro 785*c0909341SAndroid Build Coastguard Worker 786*c0909341SAndroid Build Coastguard WorkerBRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, jna, jnae, jb, jbe, jnb, jnbe, jc, jnc, js, jns, jo, jno, jp, jnp 787*c0909341SAndroid Build Coastguard Worker 788*c0909341SAndroid Build Coastguard Worker%macro TAIL_CALL 1-2 1 ; callee, is_nonadjacent 789*c0909341SAndroid Build Coastguard Worker %if has_epilogue 790*c0909341SAndroid Build Coastguard Worker call %1 791*c0909341SAndroid Build Coastguard Worker RET 792*c0909341SAndroid Build Coastguard Worker %elif %2 793*c0909341SAndroid Build Coastguard Worker jmp %1 794*c0909341SAndroid Build Coastguard Worker %endif 795*c0909341SAndroid Build Coastguard Worker annotate_function_size 796*c0909341SAndroid Build Coastguard Worker%endmacro 797*c0909341SAndroid Build Coastguard Worker 798*c0909341SAndroid Build Coastguard Worker;============================================================================= 799*c0909341SAndroid Build Coastguard Worker; arch-independent part 800*c0909341SAndroid Build Coastguard Worker;============================================================================= 801*c0909341SAndroid Build Coastguard Worker 802*c0909341SAndroid Build Coastguard Worker%assign function_align 16 803*c0909341SAndroid Build Coastguard Worker 804*c0909341SAndroid Build Coastguard Worker; Begin a function. 805*c0909341SAndroid Build Coastguard Worker; Applies any symbol mangling needed for C linkage, and sets up a define such that 806*c0909341SAndroid Build Coastguard Worker; subsequent uses of the function name automatically refer to the mangled version. 807*c0909341SAndroid Build Coastguard Worker; Appends cpuflags to the function name if cpuflags has been specified. 808*c0909341SAndroid Build Coastguard Worker; The "" empty default parameter is a workaround for nasm, which fails if SUFFIX 809*c0909341SAndroid Build Coastguard Worker; is empty and we call cglobal_internal with just %1 %+ SUFFIX (without %2). 810*c0909341SAndroid Build Coastguard Worker%macro cglobal 1-2+ "" ; name, [PROLOGUE args] 811*c0909341SAndroid Build Coastguard Worker cglobal_internal 1, %1 %+ SUFFIX, %2 812*c0909341SAndroid Build Coastguard Worker%endmacro 813*c0909341SAndroid Build Coastguard Worker%macro cvisible 1-2+ "" ; name, [PROLOGUE args] 814*c0909341SAndroid Build Coastguard Worker cglobal_internal 0, %1 %+ SUFFIX, %2 815*c0909341SAndroid Build Coastguard Worker%endmacro 816*c0909341SAndroid Build Coastguard Worker%macro cglobal_internal 2-3+ 817*c0909341SAndroid Build Coastguard Worker annotate_function_size 818*c0909341SAndroid Build Coastguard Worker %ifndef cglobaled_%2 819*c0909341SAndroid Build Coastguard Worker %if %1 820*c0909341SAndroid Build Coastguard Worker %xdefine %2 mangle(private_prefix %+ _ %+ %2) 821*c0909341SAndroid Build Coastguard Worker %else 822*c0909341SAndroid Build Coastguard Worker %xdefine %2 mangle(public_prefix %+ _ %+ %2) 823*c0909341SAndroid Build Coastguard Worker %endif 824*c0909341SAndroid Build Coastguard Worker %xdefine %2.skip_prologue %2 %+ .skip_prologue 825*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE cglobaled_, %2, 1 826*c0909341SAndroid Build Coastguard Worker %endif 827*c0909341SAndroid Build Coastguard Worker %xdefine current_function %2 828*c0909341SAndroid Build Coastguard Worker %xdefine current_function_section __SECT__ 829*c0909341SAndroid Build Coastguard Worker %if FORMAT_ELF 830*c0909341SAndroid Build Coastguard Worker %if %1 831*c0909341SAndroid Build Coastguard Worker global %2:function hidden 832*c0909341SAndroid Build Coastguard Worker %else 833*c0909341SAndroid Build Coastguard Worker global %2:function 834*c0909341SAndroid Build Coastguard Worker %endif 835*c0909341SAndroid Build Coastguard Worker %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN && %1 836*c0909341SAndroid Build Coastguard Worker global %2:private_extern 837*c0909341SAndroid Build Coastguard Worker %else 838*c0909341SAndroid Build Coastguard Worker global %2 839*c0909341SAndroid Build Coastguard Worker %endif 840*c0909341SAndroid Build Coastguard Worker align function_align 841*c0909341SAndroid Build Coastguard Worker %2: 842*c0909341SAndroid Build Coastguard Worker RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer 843*c0909341SAndroid Build Coastguard Worker %xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required 844*c0909341SAndroid Build Coastguard Worker %assign stack_offset 0 ; stack pointer offset relative to the return address 845*c0909341SAndroid Build Coastguard Worker %assign stack_size 0 ; amount of stack space that can be freely used inside a function 846*c0909341SAndroid Build Coastguard Worker %assign stack_size_padded 0 ; total amount of allocated stack space, including space for callee-saved xmm registers on WIN64 and alignment padding 847*c0909341SAndroid Build Coastguard Worker %assign xmm_regs_used 0 ; number of XMM registers requested, used for dealing with callee-saved registers on WIN64 and vzeroupper 848*c0909341SAndroid Build Coastguard Worker %ifnidn %3, "" 849*c0909341SAndroid Build Coastguard Worker PROLOGUE %3 850*c0909341SAndroid Build Coastguard Worker %endif 851*c0909341SAndroid Build Coastguard Worker%endmacro 852*c0909341SAndroid Build Coastguard Worker 853*c0909341SAndroid Build Coastguard Worker; Create a global symbol from a local label with the correct name mangling and type 854*c0909341SAndroid Build Coastguard Worker%macro cglobal_label 1 855*c0909341SAndroid Build Coastguard Worker %if FORMAT_ELF 856*c0909341SAndroid Build Coastguard Worker global current_function %+ %1:function hidden 857*c0909341SAndroid Build Coastguard Worker %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN 858*c0909341SAndroid Build Coastguard Worker global current_function %+ %1:private_extern 859*c0909341SAndroid Build Coastguard Worker %else 860*c0909341SAndroid Build Coastguard Worker global current_function %+ %1 861*c0909341SAndroid Build Coastguard Worker %endif 862*c0909341SAndroid Build Coastguard Worker %1: 863*c0909341SAndroid Build Coastguard Worker%endmacro 864*c0909341SAndroid Build Coastguard Worker 865*c0909341SAndroid Build Coastguard Worker%macro cextern 1 866*c0909341SAndroid Build Coastguard Worker %xdefine %1 mangle(private_prefix %+ _ %+ %1) 867*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE cglobaled_, %1, 2 868*c0909341SAndroid Build Coastguard Worker extern %1 869*c0909341SAndroid Build Coastguard Worker%endmacro 870*c0909341SAndroid Build Coastguard Worker 871*c0909341SAndroid Build Coastguard Worker; Like cextern, but without the prefix. This should be used for symbols from external libraries. 872*c0909341SAndroid Build Coastguard Worker%macro cextern_naked 1 873*c0909341SAndroid Build Coastguard Worker %ifdef PREFIX 874*c0909341SAndroid Build Coastguard Worker %xdefine %1 mangle(%1) 875*c0909341SAndroid Build Coastguard Worker %endif 876*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE cglobaled_, %1, 3 877*c0909341SAndroid Build Coastguard Worker extern %1 878*c0909341SAndroid Build Coastguard Worker%endmacro 879*c0909341SAndroid Build Coastguard Worker 880*c0909341SAndroid Build Coastguard Worker%macro const 1-2+ 881*c0909341SAndroid Build Coastguard Worker %xdefine %1 mangle(private_prefix %+ _ %+ %1) 882*c0909341SAndroid Build Coastguard Worker %if FORMAT_ELF 883*c0909341SAndroid Build Coastguard Worker global %1:data hidden 884*c0909341SAndroid Build Coastguard Worker %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN 885*c0909341SAndroid Build Coastguard Worker global %1:private_extern 886*c0909341SAndroid Build Coastguard Worker %else 887*c0909341SAndroid Build Coastguard Worker global %1 888*c0909341SAndroid Build Coastguard Worker %endif 889*c0909341SAndroid Build Coastguard Worker %1: %2 890*c0909341SAndroid Build Coastguard Worker%endmacro 891*c0909341SAndroid Build Coastguard Worker 892*c0909341SAndroid Build Coastguard Worker%if FORMAT_ELF 893*c0909341SAndroid Build Coastguard Worker ; The GNU linker assumes the stack is executable by default. 894*c0909341SAndroid Build Coastguard Worker [SECTION .note.GNU-stack noalloc noexec nowrite progbits] 895*c0909341SAndroid Build Coastguard Worker 896*c0909341SAndroid Build Coastguard Worker %ifdef __NASM_VERSION_ID__ 897*c0909341SAndroid Build Coastguard Worker %if __NASM_VERSION_ID__ >= 0x020e0300 ; 2.14.03 898*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 899*c0909341SAndroid Build Coastguard Worker ; Control-flow Enforcement Technology (CET) properties. 900*c0909341SAndroid Build Coastguard Worker [SECTION .note.gnu.property alloc noexec nowrite note align=gprsize] 901*c0909341SAndroid Build Coastguard Worker dd 0x00000004 ; n_namesz 902*c0909341SAndroid Build Coastguard Worker dd gprsize + 8 ; n_descsz 903*c0909341SAndroid Build Coastguard Worker dd 0x00000005 ; n_type = NT_GNU_PROPERTY_TYPE_0 904*c0909341SAndroid Build Coastguard Worker db "GNU",0 ; n_name 905*c0909341SAndroid Build Coastguard Worker dd 0xc0000002 ; pr_type = GNU_PROPERTY_X86_FEATURE_1_AND 906*c0909341SAndroid Build Coastguard Worker dd 0x00000004 ; pr_datasz 907*c0909341SAndroid Build Coastguard Worker dd 0x00000002 ; pr_data = GNU_PROPERTY_X86_FEATURE_1_SHSTK 908*c0909341SAndroid Build Coastguard Worker dd 0x00000000 ; pr_padding 909*c0909341SAndroid Build Coastguard Worker %endif 910*c0909341SAndroid Build Coastguard Worker %endif 911*c0909341SAndroid Build Coastguard Worker %endif 912*c0909341SAndroid Build Coastguard Worker%endif 913*c0909341SAndroid Build Coastguard Worker 914*c0909341SAndroid Build Coastguard Worker; Tell debuggers how large the function was. 915*c0909341SAndroid Build Coastguard Worker; This may be invoked multiple times per function; we rely on later instances overriding earlier ones. 916*c0909341SAndroid Build Coastguard Worker; This is invoked by RET and similar macros, and also cglobal does it for the previous function, 917*c0909341SAndroid Build Coastguard Worker; but if the last function in a source file doesn't use any of the standard macros for its epilogue, 918*c0909341SAndroid Build Coastguard Worker; then its size might be unspecified. 919*c0909341SAndroid Build Coastguard Worker%macro annotate_function_size 0 920*c0909341SAndroid Build Coastguard Worker %ifdef __YASM_VER__ 921*c0909341SAndroid Build Coastguard Worker %ifdef current_function 922*c0909341SAndroid Build Coastguard Worker %if FORMAT_ELF 923*c0909341SAndroid Build Coastguard Worker current_function_section 924*c0909341SAndroid Build Coastguard Worker %%ecf equ $ 925*c0909341SAndroid Build Coastguard Worker size current_function %%ecf - current_function 926*c0909341SAndroid Build Coastguard Worker __SECT__ 927*c0909341SAndroid Build Coastguard Worker %endif 928*c0909341SAndroid Build Coastguard Worker %endif 929*c0909341SAndroid Build Coastguard Worker %endif 930*c0909341SAndroid Build Coastguard Worker%endmacro 931*c0909341SAndroid Build Coastguard Worker 932*c0909341SAndroid Build Coastguard Worker; cpuflags 933*c0909341SAndroid Build Coastguard Worker 934*c0909341SAndroid Build Coastguard Worker%assign cpuflags_mmx (1<<0) 935*c0909341SAndroid Build Coastguard Worker%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx 936*c0909341SAndroid Build Coastguard Worker%assign cpuflags_3dnow (1<<2) | cpuflags_mmx 937*c0909341SAndroid Build Coastguard Worker%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow 938*c0909341SAndroid Build Coastguard Worker%assign cpuflags_sse (1<<4) | cpuflags_mmx2 939*c0909341SAndroid Build Coastguard Worker%assign cpuflags_sse2 (1<<5) | cpuflags_sse 940*c0909341SAndroid Build Coastguard Worker%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 941*c0909341SAndroid Build Coastguard Worker%assign cpuflags_lzcnt (1<<7) | cpuflags_sse2 942*c0909341SAndroid Build Coastguard Worker%assign cpuflags_sse3 (1<<8) | cpuflags_sse2 943*c0909341SAndroid Build Coastguard Worker%assign cpuflags_ssse3 (1<<9) | cpuflags_sse3 944*c0909341SAndroid Build Coastguard Worker%assign cpuflags_sse4 (1<<10) | cpuflags_ssse3 945*c0909341SAndroid Build Coastguard Worker%assign cpuflags_sse42 (1<<11) | cpuflags_sse4 946*c0909341SAndroid Build Coastguard Worker%assign cpuflags_aesni (1<<12) | cpuflags_sse42 947*c0909341SAndroid Build Coastguard Worker%assign cpuflags_clmul (1<<13) | cpuflags_sse42 948*c0909341SAndroid Build Coastguard Worker%assign cpuflags_gfni (1<<14) | cpuflags_aesni|cpuflags_clmul 949*c0909341SAndroid Build Coastguard Worker%assign cpuflags_avx (1<<15) | cpuflags_sse42 950*c0909341SAndroid Build Coastguard Worker%assign cpuflags_xop (1<<16) | cpuflags_avx 951*c0909341SAndroid Build Coastguard Worker%assign cpuflags_fma4 (1<<17) | cpuflags_avx 952*c0909341SAndroid Build Coastguard Worker%assign cpuflags_fma3 (1<<18) | cpuflags_avx 953*c0909341SAndroid Build Coastguard Worker%assign cpuflags_bmi1 (1<<19) | cpuflags_avx|cpuflags_lzcnt 954*c0909341SAndroid Build Coastguard Worker%assign cpuflags_bmi2 (1<<20) | cpuflags_bmi1 955*c0909341SAndroid Build Coastguard Worker%assign cpuflags_avx2 (1<<21) | cpuflags_fma3|cpuflags_bmi2 956*c0909341SAndroid Build Coastguard Worker%assign cpuflags_avx512 (1<<22) | cpuflags_avx2 ; F, CD, BW, DQ, VL 957*c0909341SAndroid Build Coastguard Worker%assign cpuflags_avx512icl (1<<23) | cpuflags_avx512|cpuflags_gfni ; VNNI, IFMA, VBMI, VBMI2, VPOPCNTDQ, BITALG, VAES, VPCLMULQDQ 958*c0909341SAndroid Build Coastguard Worker 959*c0909341SAndroid Build Coastguard Worker%assign cpuflags_cache32 (1<<24) 960*c0909341SAndroid Build Coastguard Worker%assign cpuflags_cache64 (1<<25) 961*c0909341SAndroid Build Coastguard Worker%assign cpuflags_aligned (1<<26) ; not a cpu feature, but a function variant 962*c0909341SAndroid Build Coastguard Worker%assign cpuflags_atom (1<<27) 963*c0909341SAndroid Build Coastguard Worker 964*c0909341SAndroid Build Coastguard Worker; Returns a boolean value expressing whether or not the specified cpuflag is enabled. 965*c0909341SAndroid Build Coastguard Worker%define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1) 966*c0909341SAndroid Build Coastguard Worker%define notcpuflag(x) (cpuflag(x) ^ 1) 967*c0909341SAndroid Build Coastguard Worker 968*c0909341SAndroid Build Coastguard Worker; Takes an arbitrary number of cpuflags from the above list. 969*c0909341SAndroid Build Coastguard Worker; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu. 970*c0909341SAndroid Build Coastguard Worker; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co. 971*c0909341SAndroid Build Coastguard Worker%macro INIT_CPUFLAGS 0-* 972*c0909341SAndroid Build Coastguard Worker %xdefine SUFFIX 973*c0909341SAndroid Build Coastguard Worker %undef cpuname 974*c0909341SAndroid Build Coastguard Worker %assign cpuflags 0 975*c0909341SAndroid Build Coastguard Worker 976*c0909341SAndroid Build Coastguard Worker %if %0 >= 1 977*c0909341SAndroid Build Coastguard Worker %rep %0 978*c0909341SAndroid Build Coastguard Worker %ifdef cpuname 979*c0909341SAndroid Build Coastguard Worker %xdefine cpuname cpuname %+ _%1 980*c0909341SAndroid Build Coastguard Worker %else 981*c0909341SAndroid Build Coastguard Worker %xdefine cpuname %1 982*c0909341SAndroid Build Coastguard Worker %endif 983*c0909341SAndroid Build Coastguard Worker %assign cpuflags cpuflags | cpuflags_%1 984*c0909341SAndroid Build Coastguard Worker %rotate 1 985*c0909341SAndroid Build Coastguard Worker %endrep 986*c0909341SAndroid Build Coastguard Worker %xdefine SUFFIX _ %+ cpuname 987*c0909341SAndroid Build Coastguard Worker 988*c0909341SAndroid Build Coastguard Worker %if cpuflag(avx) 989*c0909341SAndroid Build Coastguard Worker %assign avx_enabled 1 990*c0909341SAndroid Build Coastguard Worker %endif 991*c0909341SAndroid Build Coastguard Worker %if (mmsize == 16 && notcpuflag(sse2)) || (mmsize == 32 && notcpuflag(avx2)) 992*c0909341SAndroid Build Coastguard Worker %define mova movaps 993*c0909341SAndroid Build Coastguard Worker %define movu movups 994*c0909341SAndroid Build Coastguard Worker %define movnta movntps 995*c0909341SAndroid Build Coastguard Worker %endif 996*c0909341SAndroid Build Coastguard Worker %if cpuflag(aligned) 997*c0909341SAndroid Build Coastguard Worker %define movu mova 998*c0909341SAndroid Build Coastguard Worker %elif cpuflag(sse3) && notcpuflag(ssse3) 999*c0909341SAndroid Build Coastguard Worker %define movu lddqu 1000*c0909341SAndroid Build Coastguard Worker %endif 1001*c0909341SAndroid Build Coastguard Worker %endif 1002*c0909341SAndroid Build Coastguard Worker 1003*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 || cpuflag(sse2) 1004*c0909341SAndroid Build Coastguard Worker %ifdef __NASM_VERSION_ID__ 1005*c0909341SAndroid Build Coastguard Worker ALIGNMODE p6 1006*c0909341SAndroid Build Coastguard Worker %else 1007*c0909341SAndroid Build Coastguard Worker CPU amdnop 1008*c0909341SAndroid Build Coastguard Worker %endif 1009*c0909341SAndroid Build Coastguard Worker %else 1010*c0909341SAndroid Build Coastguard Worker %ifdef __NASM_VERSION_ID__ 1011*c0909341SAndroid Build Coastguard Worker ALIGNMODE nop 1012*c0909341SAndroid Build Coastguard Worker %else 1013*c0909341SAndroid Build Coastguard Worker CPU basicnop 1014*c0909341SAndroid Build Coastguard Worker %endif 1015*c0909341SAndroid Build Coastguard Worker %endif 1016*c0909341SAndroid Build Coastguard Worker%endmacro 1017*c0909341SAndroid Build Coastguard Worker 1018*c0909341SAndroid Build Coastguard Worker; Merge mmx, sse*, and avx* 1019*c0909341SAndroid Build Coastguard Worker; m# is a simd register of the currently selected size 1020*c0909341SAndroid Build Coastguard Worker; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m# 1021*c0909341SAndroid Build Coastguard Worker; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m# 1022*c0909341SAndroid Build Coastguard Worker; zm# is the corresponding zmm register if mmsize >= 64, otherwise the same as m# 1023*c0909341SAndroid Build Coastguard Worker; (All 4 remain in sync through SWAP.) 1024*c0909341SAndroid Build Coastguard Worker 1025*c0909341SAndroid Build Coastguard Worker%macro CAT_XDEFINE 3 1026*c0909341SAndroid Build Coastguard Worker %xdefine %1%2 %3 1027*c0909341SAndroid Build Coastguard Worker%endmacro 1028*c0909341SAndroid Build Coastguard Worker 1029*c0909341SAndroid Build Coastguard Worker%macro CAT_UNDEF 2 1030*c0909341SAndroid Build Coastguard Worker %undef %1%2 1031*c0909341SAndroid Build Coastguard Worker%endmacro 1032*c0909341SAndroid Build Coastguard Worker 1033*c0909341SAndroid Build Coastguard Worker%macro DEFINE_MMREGS 1 ; mmtype 1034*c0909341SAndroid Build Coastguard Worker %assign %%prev_mmregs 0 1035*c0909341SAndroid Build Coastguard Worker %ifdef num_mmregs 1036*c0909341SAndroid Build Coastguard Worker %assign %%prev_mmregs num_mmregs 1037*c0909341SAndroid Build Coastguard Worker %endif 1038*c0909341SAndroid Build Coastguard Worker 1039*c0909341SAndroid Build Coastguard Worker %assign num_mmregs 8 1040*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 && mmsize >= 16 1041*c0909341SAndroid Build Coastguard Worker %assign num_mmregs 16 1042*c0909341SAndroid Build Coastguard Worker %if cpuflag(avx512) || mmsize == 64 1043*c0909341SAndroid Build Coastguard Worker %assign num_mmregs 32 1044*c0909341SAndroid Build Coastguard Worker %endif 1045*c0909341SAndroid Build Coastguard Worker %endif 1046*c0909341SAndroid Build Coastguard Worker 1047*c0909341SAndroid Build Coastguard Worker %assign %%i 0 1048*c0909341SAndroid Build Coastguard Worker %rep num_mmregs 1049*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE m, %%i, %1 %+ %%i 1050*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE nn%1, %%i, %%i 1051*c0909341SAndroid Build Coastguard Worker %assign %%i %%i+1 1052*c0909341SAndroid Build Coastguard Worker %endrep 1053*c0909341SAndroid Build Coastguard Worker %if %%prev_mmregs > num_mmregs 1054*c0909341SAndroid Build Coastguard Worker %rep %%prev_mmregs - num_mmregs 1055*c0909341SAndroid Build Coastguard Worker CAT_UNDEF m, %%i 1056*c0909341SAndroid Build Coastguard Worker CAT_UNDEF nn %+ mmtype, %%i 1057*c0909341SAndroid Build Coastguard Worker %assign %%i %%i+1 1058*c0909341SAndroid Build Coastguard Worker %endrep 1059*c0909341SAndroid Build Coastguard Worker %endif 1060*c0909341SAndroid Build Coastguard Worker %xdefine mmtype %1 1061*c0909341SAndroid Build Coastguard Worker%endmacro 1062*c0909341SAndroid Build Coastguard Worker 1063*c0909341SAndroid Build Coastguard Worker; Prefer registers 16-31 over 0-15 to avoid having to use vzeroupper 1064*c0909341SAndroid Build Coastguard Worker%macro AVX512_MM_PERMUTATION 0-1 0 ; start_reg 1065*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 && cpuflag(avx512) 1066*c0909341SAndroid Build Coastguard Worker %assign %%i %1 1067*c0909341SAndroid Build Coastguard Worker %rep 16-%1 1068*c0909341SAndroid Build Coastguard Worker %assign %%i_high %%i+16 1069*c0909341SAndroid Build Coastguard Worker SWAP %%i, %%i_high 1070*c0909341SAndroid Build Coastguard Worker %assign %%i %%i+1 1071*c0909341SAndroid Build Coastguard Worker %endrep 1072*c0909341SAndroid Build Coastguard Worker %endif 1073*c0909341SAndroid Build Coastguard Worker%endmacro 1074*c0909341SAndroid Build Coastguard Worker 1075*c0909341SAndroid Build Coastguard Worker%macro INIT_MMX 0-1+ 1076*c0909341SAndroid Build Coastguard Worker %assign avx_enabled 0 1077*c0909341SAndroid Build Coastguard Worker %define RESET_MM_PERMUTATION INIT_MMX %1 1078*c0909341SAndroid Build Coastguard Worker %define mmsize 8 1079*c0909341SAndroid Build Coastguard Worker %define mova movq 1080*c0909341SAndroid Build Coastguard Worker %define movu movq 1081*c0909341SAndroid Build Coastguard Worker %define movh movd 1082*c0909341SAndroid Build Coastguard Worker %define movnta movntq 1083*c0909341SAndroid Build Coastguard Worker INIT_CPUFLAGS %1 1084*c0909341SAndroid Build Coastguard Worker DEFINE_MMREGS mm 1085*c0909341SAndroid Build Coastguard Worker%endmacro 1086*c0909341SAndroid Build Coastguard Worker 1087*c0909341SAndroid Build Coastguard Worker%macro INIT_XMM 0-1+ 1088*c0909341SAndroid Build Coastguard Worker %assign avx_enabled FORCE_VEX_ENCODING 1089*c0909341SAndroid Build Coastguard Worker %define RESET_MM_PERMUTATION INIT_XMM %1 1090*c0909341SAndroid Build Coastguard Worker %define mmsize 16 1091*c0909341SAndroid Build Coastguard Worker %define mova movdqa 1092*c0909341SAndroid Build Coastguard Worker %define movu movdqu 1093*c0909341SAndroid Build Coastguard Worker %define movh movq 1094*c0909341SAndroid Build Coastguard Worker %define movnta movntdq 1095*c0909341SAndroid Build Coastguard Worker INIT_CPUFLAGS %1 1096*c0909341SAndroid Build Coastguard Worker DEFINE_MMREGS xmm 1097*c0909341SAndroid Build Coastguard Worker %if WIN64 1098*c0909341SAndroid Build Coastguard Worker AVX512_MM_PERMUTATION 6 ; Swap callee-saved registers with volatile registers 1099*c0909341SAndroid Build Coastguard Worker %endif 1100*c0909341SAndroid Build Coastguard Worker %xdefine bcstw 1to8 1101*c0909341SAndroid Build Coastguard Worker %xdefine bcstd 1to4 1102*c0909341SAndroid Build Coastguard Worker %xdefine bcstq 1to2 1103*c0909341SAndroid Build Coastguard Worker%endmacro 1104*c0909341SAndroid Build Coastguard Worker 1105*c0909341SAndroid Build Coastguard Worker%macro INIT_YMM 0-1+ 1106*c0909341SAndroid Build Coastguard Worker %assign avx_enabled 1 1107*c0909341SAndroid Build Coastguard Worker %define RESET_MM_PERMUTATION INIT_YMM %1 1108*c0909341SAndroid Build Coastguard Worker %define mmsize 32 1109*c0909341SAndroid Build Coastguard Worker %define mova movdqa 1110*c0909341SAndroid Build Coastguard Worker %define movu movdqu 1111*c0909341SAndroid Build Coastguard Worker %undef movh 1112*c0909341SAndroid Build Coastguard Worker %define movnta movntdq 1113*c0909341SAndroid Build Coastguard Worker INIT_CPUFLAGS %1 1114*c0909341SAndroid Build Coastguard Worker DEFINE_MMREGS ymm 1115*c0909341SAndroid Build Coastguard Worker AVX512_MM_PERMUTATION 1116*c0909341SAndroid Build Coastguard Worker %xdefine bcstw 1to16 1117*c0909341SAndroid Build Coastguard Worker %xdefine bcstd 1to8 1118*c0909341SAndroid Build Coastguard Worker %xdefine bcstq 1to4 1119*c0909341SAndroid Build Coastguard Worker%endmacro 1120*c0909341SAndroid Build Coastguard Worker 1121*c0909341SAndroid Build Coastguard Worker%macro INIT_ZMM 0-1+ 1122*c0909341SAndroid Build Coastguard Worker %assign avx_enabled 1 1123*c0909341SAndroid Build Coastguard Worker %define RESET_MM_PERMUTATION INIT_ZMM %1 1124*c0909341SAndroid Build Coastguard Worker %define mmsize 64 1125*c0909341SAndroid Build Coastguard Worker %define mova movdqa 1126*c0909341SAndroid Build Coastguard Worker %define movu movdqu 1127*c0909341SAndroid Build Coastguard Worker %undef movh 1128*c0909341SAndroid Build Coastguard Worker %define movnta movntdq 1129*c0909341SAndroid Build Coastguard Worker INIT_CPUFLAGS %1 1130*c0909341SAndroid Build Coastguard Worker DEFINE_MMREGS zmm 1131*c0909341SAndroid Build Coastguard Worker AVX512_MM_PERMUTATION 1132*c0909341SAndroid Build Coastguard Worker %xdefine bcstw 1to32 1133*c0909341SAndroid Build Coastguard Worker %xdefine bcstd 1to16 1134*c0909341SAndroid Build Coastguard Worker %xdefine bcstq 1to8 1135*c0909341SAndroid Build Coastguard Worker%endmacro 1136*c0909341SAndroid Build Coastguard Worker 1137*c0909341SAndroid Build Coastguard WorkerINIT_XMM 1138*c0909341SAndroid Build Coastguard Worker 1139*c0909341SAndroid Build Coastguard Worker%macro DECLARE_MMCAST 1 1140*c0909341SAndroid Build Coastguard Worker %define mmmm%1 mm%1 1141*c0909341SAndroid Build Coastguard Worker %define mmxmm%1 mm%1 1142*c0909341SAndroid Build Coastguard Worker %define mmymm%1 mm%1 1143*c0909341SAndroid Build Coastguard Worker %define mmzmm%1 mm%1 1144*c0909341SAndroid Build Coastguard Worker %define xmmmm%1 mm%1 1145*c0909341SAndroid Build Coastguard Worker %define xmmxmm%1 xmm%1 1146*c0909341SAndroid Build Coastguard Worker %define xmmymm%1 xmm%1 1147*c0909341SAndroid Build Coastguard Worker %define xmmzmm%1 xmm%1 1148*c0909341SAndroid Build Coastguard Worker %define ymmmm%1 mm%1 1149*c0909341SAndroid Build Coastguard Worker %define ymmxmm%1 xmm%1 1150*c0909341SAndroid Build Coastguard Worker %define ymmymm%1 ymm%1 1151*c0909341SAndroid Build Coastguard Worker %define ymmzmm%1 ymm%1 1152*c0909341SAndroid Build Coastguard Worker %define zmmmm%1 mm%1 1153*c0909341SAndroid Build Coastguard Worker %define zmmxmm%1 xmm%1 1154*c0909341SAndroid Build Coastguard Worker %define zmmymm%1 ymm%1 1155*c0909341SAndroid Build Coastguard Worker %define zmmzmm%1 zmm%1 1156*c0909341SAndroid Build Coastguard Worker %define xm%1 xmm %+ m%1 1157*c0909341SAndroid Build Coastguard Worker %define ym%1 ymm %+ m%1 1158*c0909341SAndroid Build Coastguard Worker %define zm%1 zmm %+ m%1 1159*c0909341SAndroid Build Coastguard Worker%endmacro 1160*c0909341SAndroid Build Coastguard Worker 1161*c0909341SAndroid Build Coastguard Worker%assign i 0 1162*c0909341SAndroid Build Coastguard Worker%rep 32 1163*c0909341SAndroid Build Coastguard Worker DECLARE_MMCAST i 1164*c0909341SAndroid Build Coastguard Worker %assign i i+1 1165*c0909341SAndroid Build Coastguard Worker%endrep 1166*c0909341SAndroid Build Coastguard Worker 1167*c0909341SAndroid Build Coastguard Worker; I often want to use macros that permute their arguments. e.g. there's no 1168*c0909341SAndroid Build Coastguard Worker; efficient way to implement butterfly or transpose or dct without swapping some 1169*c0909341SAndroid Build Coastguard Worker; arguments. 1170*c0909341SAndroid Build Coastguard Worker; 1171*c0909341SAndroid Build Coastguard Worker; I would like to not have to manually keep track of the permutations: 1172*c0909341SAndroid Build Coastguard Worker; If I insert a permutation in the middle of a function, it should automatically 1173*c0909341SAndroid Build Coastguard Worker; change everything that follows. For more complex macros I may also have multiple 1174*c0909341SAndroid Build Coastguard Worker; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations. 1175*c0909341SAndroid Build Coastguard Worker; 1176*c0909341SAndroid Build Coastguard Worker; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that 1177*c0909341SAndroid Build Coastguard Worker; permutes its arguments. It's equivalent to exchanging the contents of the 1178*c0909341SAndroid Build Coastguard Worker; registers, except that this way you exchange the register names instead, so it 1179*c0909341SAndroid Build Coastguard Worker; doesn't cost any cycles. 1180*c0909341SAndroid Build Coastguard Worker 1181*c0909341SAndroid Build Coastguard Worker%macro PERMUTE 2-* ; takes a list of pairs to swap 1182*c0909341SAndroid Build Coastguard Worker %rep %0/2 1183*c0909341SAndroid Build Coastguard Worker %xdefine %%tmp%2 m%2 1184*c0909341SAndroid Build Coastguard Worker %rotate 2 1185*c0909341SAndroid Build Coastguard Worker %endrep 1186*c0909341SAndroid Build Coastguard Worker %rep %0/2 1187*c0909341SAndroid Build Coastguard Worker %xdefine m%1 %%tmp%2 1188*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE nn, m%1, %1 1189*c0909341SAndroid Build Coastguard Worker %rotate 2 1190*c0909341SAndroid Build Coastguard Worker %endrep 1191*c0909341SAndroid Build Coastguard Worker%endmacro 1192*c0909341SAndroid Build Coastguard Worker 1193*c0909341SAndroid Build Coastguard Worker%macro SWAP 2+ ; swaps a single chain (sometimes more concise than pairs) 1194*c0909341SAndroid Build Coastguard Worker %ifnum %1 ; SWAP 0, 1, ... 1195*c0909341SAndroid Build Coastguard Worker SWAP_INTERNAL_NUM %1, %2 1196*c0909341SAndroid Build Coastguard Worker %else ; SWAP m0, m1, ... 1197*c0909341SAndroid Build Coastguard Worker SWAP_INTERNAL_NAME %1, %2 1198*c0909341SAndroid Build Coastguard Worker %endif 1199*c0909341SAndroid Build Coastguard Worker%endmacro 1200*c0909341SAndroid Build Coastguard Worker 1201*c0909341SAndroid Build Coastguard Worker%macro SWAP_INTERNAL_NUM 2-* 1202*c0909341SAndroid Build Coastguard Worker %rep %0-1 1203*c0909341SAndroid Build Coastguard Worker %xdefine %%tmp m%1 1204*c0909341SAndroid Build Coastguard Worker %xdefine m%1 m%2 1205*c0909341SAndroid Build Coastguard Worker %xdefine m%2 %%tmp 1206*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE nn, m%1, %1 1207*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE nn, m%2, %2 1208*c0909341SAndroid Build Coastguard Worker %rotate 1 1209*c0909341SAndroid Build Coastguard Worker %endrep 1210*c0909341SAndroid Build Coastguard Worker%endmacro 1211*c0909341SAndroid Build Coastguard Worker 1212*c0909341SAndroid Build Coastguard Worker%macro SWAP_INTERNAL_NAME 2-* 1213*c0909341SAndroid Build Coastguard Worker %xdefine %%args nn %+ %1 1214*c0909341SAndroid Build Coastguard Worker %rep %0-1 1215*c0909341SAndroid Build Coastguard Worker %xdefine %%args %%args, nn %+ %2 1216*c0909341SAndroid Build Coastguard Worker %rotate 1 1217*c0909341SAndroid Build Coastguard Worker %endrep 1218*c0909341SAndroid Build Coastguard Worker SWAP_INTERNAL_NUM %%args 1219*c0909341SAndroid Build Coastguard Worker%endmacro 1220*c0909341SAndroid Build Coastguard Worker 1221*c0909341SAndroid Build Coastguard Worker; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later 1222*c0909341SAndroid Build Coastguard Worker; calls to that function will automatically load the permutation, so values can 1223*c0909341SAndroid Build Coastguard Worker; be returned in mmregs. 1224*c0909341SAndroid Build Coastguard Worker%macro SAVE_MM_PERMUTATION 0-1 1225*c0909341SAndroid Build Coastguard Worker %if %0 1226*c0909341SAndroid Build Coastguard Worker %xdefine %%f %1_m 1227*c0909341SAndroid Build Coastguard Worker %else 1228*c0909341SAndroid Build Coastguard Worker %xdefine %%f current_function %+ _m 1229*c0909341SAndroid Build Coastguard Worker %endif 1230*c0909341SAndroid Build Coastguard Worker %assign %%i 0 1231*c0909341SAndroid Build Coastguard Worker %rep num_mmregs 1232*c0909341SAndroid Build Coastguard Worker %xdefine %%tmp m %+ %%i 1233*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE %%f, %%i, regnumof %+ %%tmp 1234*c0909341SAndroid Build Coastguard Worker %assign %%i %%i+1 1235*c0909341SAndroid Build Coastguard Worker %endrep 1236*c0909341SAndroid Build Coastguard Worker%endmacro 1237*c0909341SAndroid Build Coastguard Worker 1238*c0909341SAndroid Build Coastguard Worker%macro LOAD_MM_PERMUTATION 0-1 ; name to load from 1239*c0909341SAndroid Build Coastguard Worker %if %0 1240*c0909341SAndroid Build Coastguard Worker %xdefine %%f %1_m 1241*c0909341SAndroid Build Coastguard Worker %else 1242*c0909341SAndroid Build Coastguard Worker %xdefine %%f current_function %+ _m 1243*c0909341SAndroid Build Coastguard Worker %endif 1244*c0909341SAndroid Build Coastguard Worker %xdefine %%tmp %%f %+ 0 1245*c0909341SAndroid Build Coastguard Worker %ifnum %%tmp 1246*c0909341SAndroid Build Coastguard Worker DEFINE_MMREGS mmtype 1247*c0909341SAndroid Build Coastguard Worker %assign %%i 0 1248*c0909341SAndroid Build Coastguard Worker %rep num_mmregs 1249*c0909341SAndroid Build Coastguard Worker %xdefine %%tmp %%f %+ %%i 1250*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE %%m, %%i, m %+ %%tmp 1251*c0909341SAndroid Build Coastguard Worker %assign %%i %%i+1 1252*c0909341SAndroid Build Coastguard Worker %endrep 1253*c0909341SAndroid Build Coastguard Worker %rep num_mmregs 1254*c0909341SAndroid Build Coastguard Worker %assign %%i %%i-1 1255*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE m, %%i, %%m %+ %%i 1256*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE nn, m %+ %%i, %%i 1257*c0909341SAndroid Build Coastguard Worker %endrep 1258*c0909341SAndroid Build Coastguard Worker %endif 1259*c0909341SAndroid Build Coastguard Worker%endmacro 1260*c0909341SAndroid Build Coastguard Worker 1261*c0909341SAndroid Build Coastguard Worker; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't 1262*c0909341SAndroid Build Coastguard Worker%macro call 1 1263*c0909341SAndroid Build Coastguard Worker %ifid %1 1264*c0909341SAndroid Build Coastguard Worker call_internal %1 %+ SUFFIX, %1 1265*c0909341SAndroid Build Coastguard Worker %else 1266*c0909341SAndroid Build Coastguard Worker call %1 1267*c0909341SAndroid Build Coastguard Worker %endif 1268*c0909341SAndroid Build Coastguard Worker%endmacro 1269*c0909341SAndroid Build Coastguard Worker%macro call_internal 2 1270*c0909341SAndroid Build Coastguard Worker %xdefine %%i %2 1271*c0909341SAndroid Build Coastguard Worker %define %%j %%i 1272*c0909341SAndroid Build Coastguard Worker %ifndef cglobaled_%2 1273*c0909341SAndroid Build Coastguard Worker %ifdef cglobaled_%1 1274*c0909341SAndroid Build Coastguard Worker %xdefine %%i %1 1275*c0909341SAndroid Build Coastguard Worker %endif 1276*c0909341SAndroid Build Coastguard Worker %elif FORMAT_ELF 1277*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 1278*c0909341SAndroid Build Coastguard Worker %if cglobaled_%2 >= 2 1279*c0909341SAndroid Build Coastguard Worker ; Always emit PLT relocations when calling external functions, 1280*c0909341SAndroid Build Coastguard Worker ; the linker will eliminate unnecessary PLT indirections anyway. 1281*c0909341SAndroid Build Coastguard Worker %define %%j %%i wrt ..plt 1282*c0909341SAndroid Build Coastguard Worker %endif 1283*c0909341SAndroid Build Coastguard Worker %elif PIC && cglobaled_%2 == 3 1284*c0909341SAndroid Build Coastguard Worker ; Go through the GOT for functions declared using cextern_naked with 1285*c0909341SAndroid Build Coastguard Worker ; PIC, as such functions presumably exists in external libraries. 1286*c0909341SAndroid Build Coastguard Worker extern _GLOBAL_OFFSET_TABLE_ 1287*c0909341SAndroid Build Coastguard Worker LEA eax, $$+_GLOBAL_OFFSET_TABLE_ wrt ..gotpc 1288*c0909341SAndroid Build Coastguard Worker %define %%j [eax+%%i wrt ..got] 1289*c0909341SAndroid Build Coastguard Worker %endif 1290*c0909341SAndroid Build Coastguard Worker %endif 1291*c0909341SAndroid Build Coastguard Worker call %%j 1292*c0909341SAndroid Build Coastguard Worker LOAD_MM_PERMUTATION %%i 1293*c0909341SAndroid Build Coastguard Worker%endmacro 1294*c0909341SAndroid Build Coastguard Worker 1295*c0909341SAndroid Build Coastguard Worker; Substitutions that reduce instruction size but are functionally equivalent 1296*c0909341SAndroid Build Coastguard Worker%macro add 2 1297*c0909341SAndroid Build Coastguard Worker %ifnum %2 1298*c0909341SAndroid Build Coastguard Worker %if %2==128 1299*c0909341SAndroid Build Coastguard Worker sub %1, -128 1300*c0909341SAndroid Build Coastguard Worker %else 1301*c0909341SAndroid Build Coastguard Worker add %1, %2 1302*c0909341SAndroid Build Coastguard Worker %endif 1303*c0909341SAndroid Build Coastguard Worker %else 1304*c0909341SAndroid Build Coastguard Worker add %1, %2 1305*c0909341SAndroid Build Coastguard Worker %endif 1306*c0909341SAndroid Build Coastguard Worker%endmacro 1307*c0909341SAndroid Build Coastguard Worker 1308*c0909341SAndroid Build Coastguard Worker%macro sub 2 1309*c0909341SAndroid Build Coastguard Worker %ifnum %2 1310*c0909341SAndroid Build Coastguard Worker %if %2==128 1311*c0909341SAndroid Build Coastguard Worker add %1, -128 1312*c0909341SAndroid Build Coastguard Worker %else 1313*c0909341SAndroid Build Coastguard Worker sub %1, %2 1314*c0909341SAndroid Build Coastguard Worker %endif 1315*c0909341SAndroid Build Coastguard Worker %else 1316*c0909341SAndroid Build Coastguard Worker sub %1, %2 1317*c0909341SAndroid Build Coastguard Worker %endif 1318*c0909341SAndroid Build Coastguard Worker%endmacro 1319*c0909341SAndroid Build Coastguard Worker 1320*c0909341SAndroid Build Coastguard Worker;============================================================================= 1321*c0909341SAndroid Build Coastguard Worker; AVX abstraction layer 1322*c0909341SAndroid Build Coastguard Worker;============================================================================= 1323*c0909341SAndroid Build Coastguard Worker 1324*c0909341SAndroid Build Coastguard Worker%assign i 0 1325*c0909341SAndroid Build Coastguard Worker%rep 32 1326*c0909341SAndroid Build Coastguard Worker %if i < 8 1327*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE sizeofmm, i, 8 1328*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE regnumofmm, i, i 1329*c0909341SAndroid Build Coastguard Worker %endif 1330*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE sizeofxmm, i, 16 1331*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE sizeofymm, i, 32 1332*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE sizeofzmm, i, 64 1333*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE regnumofxmm, i, i 1334*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE regnumofymm, i, i 1335*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE regnumofzmm, i, i 1336*c0909341SAndroid Build Coastguard Worker %assign i i+1 1337*c0909341SAndroid Build Coastguard Worker%endrep 1338*c0909341SAndroid Build Coastguard Worker%undef i 1339*c0909341SAndroid Build Coastguard Worker 1340*c0909341SAndroid Build Coastguard Worker%macro CHECK_AVX_INSTR_EMU 3-* 1341*c0909341SAndroid Build Coastguard Worker %xdefine %%opcode %1 1342*c0909341SAndroid Build Coastguard Worker %xdefine %%dst %2 1343*c0909341SAndroid Build Coastguard Worker %rep %0-2 1344*c0909341SAndroid Build Coastguard Worker %ifidn %%dst, %3 1345*c0909341SAndroid Build Coastguard Worker %error non-avx emulation of ``%%opcode'' is not supported 1346*c0909341SAndroid Build Coastguard Worker %endif 1347*c0909341SAndroid Build Coastguard Worker %rotate 1 1348*c0909341SAndroid Build Coastguard Worker %endrep 1349*c0909341SAndroid Build Coastguard Worker%endmacro 1350*c0909341SAndroid Build Coastguard Worker 1351*c0909341SAndroid Build Coastguard Worker;%1 == instruction 1352*c0909341SAndroid Build Coastguard Worker;%2 == minimal instruction set 1353*c0909341SAndroid Build Coastguard Worker;%3 == 1 if float, 0 if int 1354*c0909341SAndroid Build Coastguard Worker;%4 == 1 if 4-operand emulation, 0 if 3-operand emulation, 255 otherwise (no emulation) 1355*c0909341SAndroid Build Coastguard Worker;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not 1356*c0909341SAndroid Build Coastguard Worker;%6+: operands 1357*c0909341SAndroid Build Coastguard Worker%macro RUN_AVX_INSTR 6-9+ 1358*c0909341SAndroid Build Coastguard Worker %ifnum sizeof%7 1359*c0909341SAndroid Build Coastguard Worker %assign __sizeofreg sizeof%7 1360*c0909341SAndroid Build Coastguard Worker %elifnum sizeof%6 1361*c0909341SAndroid Build Coastguard Worker %assign __sizeofreg sizeof%6 1362*c0909341SAndroid Build Coastguard Worker %else 1363*c0909341SAndroid Build Coastguard Worker %assign __sizeofreg mmsize 1364*c0909341SAndroid Build Coastguard Worker %endif 1365*c0909341SAndroid Build Coastguard Worker %assign __emulate_avx 0 1366*c0909341SAndroid Build Coastguard Worker %if avx_enabled && __sizeofreg >= 16 1367*c0909341SAndroid Build Coastguard Worker %xdefine __instr v%1 1368*c0909341SAndroid Build Coastguard Worker %else 1369*c0909341SAndroid Build Coastguard Worker %xdefine __instr %1 1370*c0909341SAndroid Build Coastguard Worker %if %0 >= 8+%4 1371*c0909341SAndroid Build Coastguard Worker %assign __emulate_avx 1 1372*c0909341SAndroid Build Coastguard Worker %endif 1373*c0909341SAndroid Build Coastguard Worker %endif 1374*c0909341SAndroid Build Coastguard Worker %ifnidn %2, fnord 1375*c0909341SAndroid Build Coastguard Worker %ifdef cpuname 1376*c0909341SAndroid Build Coastguard Worker %if notcpuflag(%2) 1377*c0909341SAndroid Build Coastguard Worker %error use of ``%1'' %2 instruction in cpuname function: current_function 1378*c0909341SAndroid Build Coastguard Worker %elif %3 == 0 && __sizeofreg == 16 && notcpuflag(sse2) 1379*c0909341SAndroid Build Coastguard Worker %error use of ``%1'' sse2 instruction in cpuname function: current_function 1380*c0909341SAndroid Build Coastguard Worker %elif %3 == 0 && __sizeofreg == 32 && notcpuflag(avx2) 1381*c0909341SAndroid Build Coastguard Worker %error use of ``%1'' avx2 instruction in cpuname function: current_function 1382*c0909341SAndroid Build Coastguard Worker %elif __sizeofreg == 16 && notcpuflag(sse) 1383*c0909341SAndroid Build Coastguard Worker %error use of ``%1'' sse instruction in cpuname function: current_function 1384*c0909341SAndroid Build Coastguard Worker %elif __sizeofreg == 32 && notcpuflag(avx) 1385*c0909341SAndroid Build Coastguard Worker %error use of ``%1'' avx instruction in cpuname function: current_function 1386*c0909341SAndroid Build Coastguard Worker %elif __sizeofreg == 64 && notcpuflag(avx512) 1387*c0909341SAndroid Build Coastguard Worker %error use of ``%1'' avx512 instruction in cpuname function: current_function 1388*c0909341SAndroid Build Coastguard Worker %elifidn %1, pextrw ; special case because the base instruction is mmx2, 1389*c0909341SAndroid Build Coastguard Worker %ifnid %6 ; but sse4 is required for memory operands 1390*c0909341SAndroid Build Coastguard Worker %if notcpuflag(sse4) 1391*c0909341SAndroid Build Coastguard Worker %error use of ``%1'' sse4 instruction in cpuname function: current_function 1392*c0909341SAndroid Build Coastguard Worker %endif 1393*c0909341SAndroid Build Coastguard Worker %endif 1394*c0909341SAndroid Build Coastguard Worker %endif 1395*c0909341SAndroid Build Coastguard Worker %endif 1396*c0909341SAndroid Build Coastguard Worker %endif 1397*c0909341SAndroid Build Coastguard Worker 1398*c0909341SAndroid Build Coastguard Worker %if __emulate_avx 1399*c0909341SAndroid Build Coastguard Worker %xdefine __src1 %7 1400*c0909341SAndroid Build Coastguard Worker %xdefine __src2 %8 1401*c0909341SAndroid Build Coastguard Worker %if %5 && %4 == 0 1402*c0909341SAndroid Build Coastguard Worker %ifnidn %6, %7 1403*c0909341SAndroid Build Coastguard Worker %ifidn %6, %8 1404*c0909341SAndroid Build Coastguard Worker %xdefine __src1 %8 1405*c0909341SAndroid Build Coastguard Worker %xdefine __src2 %7 1406*c0909341SAndroid Build Coastguard Worker %elifnnum sizeof%8 1407*c0909341SAndroid Build Coastguard Worker ; 3-operand AVX instructions with a memory arg can only have it in src2, 1408*c0909341SAndroid Build Coastguard Worker ; whereas SSE emulation prefers to have it in src1 (i.e. the mov). 1409*c0909341SAndroid Build Coastguard Worker ; So, if the instruction is commutative with a memory arg, swap them. 1410*c0909341SAndroid Build Coastguard Worker %xdefine __src1 %8 1411*c0909341SAndroid Build Coastguard Worker %xdefine __src2 %7 1412*c0909341SAndroid Build Coastguard Worker %endif 1413*c0909341SAndroid Build Coastguard Worker %endif 1414*c0909341SAndroid Build Coastguard Worker %endif 1415*c0909341SAndroid Build Coastguard Worker %ifnidn %6, __src1 1416*c0909341SAndroid Build Coastguard Worker %if %0 >= 9 1417*c0909341SAndroid Build Coastguard Worker CHECK_AVX_INSTR_EMU {%1 %6, %7, %8, %9}, %6, __src2, %9 1418*c0909341SAndroid Build Coastguard Worker %else 1419*c0909341SAndroid Build Coastguard Worker CHECK_AVX_INSTR_EMU {%1 %6, %7, %8}, %6, __src2 1420*c0909341SAndroid Build Coastguard Worker %endif 1421*c0909341SAndroid Build Coastguard Worker %if __sizeofreg == 8 1422*c0909341SAndroid Build Coastguard Worker MOVQ %6, __src1 1423*c0909341SAndroid Build Coastguard Worker %elif %3 1424*c0909341SAndroid Build Coastguard Worker MOVAPS %6, __src1 1425*c0909341SAndroid Build Coastguard Worker %else 1426*c0909341SAndroid Build Coastguard Worker MOVDQA %6, __src1 1427*c0909341SAndroid Build Coastguard Worker %endif 1428*c0909341SAndroid Build Coastguard Worker %endif 1429*c0909341SAndroid Build Coastguard Worker %if %0 >= 9 1430*c0909341SAndroid Build Coastguard Worker %1 %6, __src2, %9 1431*c0909341SAndroid Build Coastguard Worker %else 1432*c0909341SAndroid Build Coastguard Worker %1 %6, __src2 1433*c0909341SAndroid Build Coastguard Worker %endif 1434*c0909341SAndroid Build Coastguard Worker %elif %0 >= 9 1435*c0909341SAndroid Build Coastguard Worker %if avx_enabled && __sizeofreg >= 16 && %4 == 1 1436*c0909341SAndroid Build Coastguard Worker %ifnnum regnumof%7 1437*c0909341SAndroid Build Coastguard Worker %if %3 1438*c0909341SAndroid Build Coastguard Worker vmovaps %6, %7 1439*c0909341SAndroid Build Coastguard Worker %else 1440*c0909341SAndroid Build Coastguard Worker vmovdqa %6, %7 1441*c0909341SAndroid Build Coastguard Worker %endif 1442*c0909341SAndroid Build Coastguard Worker __instr %6, %6, %8, %9 1443*c0909341SAndroid Build Coastguard Worker %else 1444*c0909341SAndroid Build Coastguard Worker __instr %6, %7, %8, %9 1445*c0909341SAndroid Build Coastguard Worker %endif 1446*c0909341SAndroid Build Coastguard Worker %else 1447*c0909341SAndroid Build Coastguard Worker __instr %6, %7, %8, %9 1448*c0909341SAndroid Build Coastguard Worker %endif 1449*c0909341SAndroid Build Coastguard Worker %elif %0 == 8 1450*c0909341SAndroid Build Coastguard Worker %if avx_enabled && __sizeofreg >= 16 && %4 == 0 1451*c0909341SAndroid Build Coastguard Worker %xdefine __src1 %7 1452*c0909341SAndroid Build Coastguard Worker %xdefine __src2 %8 1453*c0909341SAndroid Build Coastguard Worker %if %5 1454*c0909341SAndroid Build Coastguard Worker %ifnum regnumof%7 1455*c0909341SAndroid Build Coastguard Worker %ifnum regnumof%8 1456*c0909341SAndroid Build Coastguard Worker %if regnumof%7 < 8 && regnumof%8 >= 8 && regnumof%8 < 16 && sizeof%8 <= 32 1457*c0909341SAndroid Build Coastguard Worker ; Most VEX-encoded instructions require an additional byte to encode when 1458*c0909341SAndroid Build Coastguard Worker ; src2 is a high register (e.g. m8..15). If the instruction is commutative 1459*c0909341SAndroid Build Coastguard Worker ; we can swap src1 and src2 when doing so reduces the instruction length. 1460*c0909341SAndroid Build Coastguard Worker %xdefine __src1 %8 1461*c0909341SAndroid Build Coastguard Worker %xdefine __src2 %7 1462*c0909341SAndroid Build Coastguard Worker %endif 1463*c0909341SAndroid Build Coastguard Worker %endif 1464*c0909341SAndroid Build Coastguard Worker %elifnum regnumof%8 ; put memory operands in src2 when possible 1465*c0909341SAndroid Build Coastguard Worker %xdefine __src1 %8 1466*c0909341SAndroid Build Coastguard Worker %xdefine __src2 %7 1467*c0909341SAndroid Build Coastguard Worker %else 1468*c0909341SAndroid Build Coastguard Worker %assign __emulate_avx 1 1469*c0909341SAndroid Build Coastguard Worker %endif 1470*c0909341SAndroid Build Coastguard Worker %elifnnum regnumof%7 1471*c0909341SAndroid Build Coastguard Worker ; EVEX allows imm8 shift instructions to be used with memory operands, 1472*c0909341SAndroid Build Coastguard Worker ; but VEX does not. This handles those special cases. 1473*c0909341SAndroid Build Coastguard Worker %ifnnum %8 1474*c0909341SAndroid Build Coastguard Worker %assign __emulate_avx 1 1475*c0909341SAndroid Build Coastguard Worker %elif notcpuflag(avx512) 1476*c0909341SAndroid Build Coastguard Worker %assign __emulate_avx 1 1477*c0909341SAndroid Build Coastguard Worker %endif 1478*c0909341SAndroid Build Coastguard Worker %endif 1479*c0909341SAndroid Build Coastguard Worker %if __emulate_avx ; a separate load is required 1480*c0909341SAndroid Build Coastguard Worker %if %3 1481*c0909341SAndroid Build Coastguard Worker vmovaps %6, %7 1482*c0909341SAndroid Build Coastguard Worker %else 1483*c0909341SAndroid Build Coastguard Worker vmovdqa %6, %7 1484*c0909341SAndroid Build Coastguard Worker %endif 1485*c0909341SAndroid Build Coastguard Worker __instr %6, %6, %8 1486*c0909341SAndroid Build Coastguard Worker %else 1487*c0909341SAndroid Build Coastguard Worker __instr %6, __src1, __src2 1488*c0909341SAndroid Build Coastguard Worker %endif 1489*c0909341SAndroid Build Coastguard Worker %else 1490*c0909341SAndroid Build Coastguard Worker __instr %6, %7, %8 1491*c0909341SAndroid Build Coastguard Worker %endif 1492*c0909341SAndroid Build Coastguard Worker %elif %0 == 7 1493*c0909341SAndroid Build Coastguard Worker %if avx_enabled && __sizeofreg >= 16 && %5 1494*c0909341SAndroid Build Coastguard Worker %xdefine __src1 %6 1495*c0909341SAndroid Build Coastguard Worker %xdefine __src2 %7 1496*c0909341SAndroid Build Coastguard Worker %ifnum regnumof%6 1497*c0909341SAndroid Build Coastguard Worker %ifnum regnumof%7 1498*c0909341SAndroid Build Coastguard Worker %if regnumof%6 < 8 && regnumof%7 >= 8 && regnumof%7 < 16 && sizeof%7 <= 32 1499*c0909341SAndroid Build Coastguard Worker %xdefine __src1 %7 1500*c0909341SAndroid Build Coastguard Worker %xdefine __src2 %6 1501*c0909341SAndroid Build Coastguard Worker %endif 1502*c0909341SAndroid Build Coastguard Worker %endif 1503*c0909341SAndroid Build Coastguard Worker %endif 1504*c0909341SAndroid Build Coastguard Worker __instr %6, __src1, __src2 1505*c0909341SAndroid Build Coastguard Worker %else 1506*c0909341SAndroid Build Coastguard Worker __instr %6, %7 1507*c0909341SAndroid Build Coastguard Worker %endif 1508*c0909341SAndroid Build Coastguard Worker %else 1509*c0909341SAndroid Build Coastguard Worker __instr %6 1510*c0909341SAndroid Build Coastguard Worker %endif 1511*c0909341SAndroid Build Coastguard Worker%endmacro 1512*c0909341SAndroid Build Coastguard Worker 1513*c0909341SAndroid Build Coastguard Worker;%1 == instruction 1514*c0909341SAndroid Build Coastguard Worker;%2 == minimal instruction set 1515*c0909341SAndroid Build Coastguard Worker;%3 == 1 if float, 0 if int 1516*c0909341SAndroid Build Coastguard Worker;%4 == 1 if 4-operand emulation, 0 if 3-operand emulation, 255 otherwise (no emulation) 1517*c0909341SAndroid Build Coastguard Worker;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not 1518*c0909341SAndroid Build Coastguard Worker%macro AVX_INSTR 1-5 fnord, 0, 255, 0 1519*c0909341SAndroid Build Coastguard Worker %macro %1 1-10 fnord, fnord, fnord, fnord, %1, %2, %3, %4, %5 1520*c0909341SAndroid Build Coastguard Worker %ifidn %2, fnord 1521*c0909341SAndroid Build Coastguard Worker RUN_AVX_INSTR %6, %7, %8, %9, %10, %1 1522*c0909341SAndroid Build Coastguard Worker %elifidn %3, fnord 1523*c0909341SAndroid Build Coastguard Worker RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2 1524*c0909341SAndroid Build Coastguard Worker %elifidn %4, fnord 1525*c0909341SAndroid Build Coastguard Worker RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3 1526*c0909341SAndroid Build Coastguard Worker %elifidn %5, fnord 1527*c0909341SAndroid Build Coastguard Worker RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4 1528*c0909341SAndroid Build Coastguard Worker %else 1529*c0909341SAndroid Build Coastguard Worker RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4, %5 1530*c0909341SAndroid Build Coastguard Worker %endif 1531*c0909341SAndroid Build Coastguard Worker %endmacro 1532*c0909341SAndroid Build Coastguard Worker%endmacro 1533*c0909341SAndroid Build Coastguard Worker 1534*c0909341SAndroid Build Coastguard Worker; Instructions with both VEX/EVEX and legacy encodings 1535*c0909341SAndroid Build Coastguard Worker; Non-destructive instructions are written without parameters 1536*c0909341SAndroid Build Coastguard WorkerAVX_INSTR addpd, sse2, 1, 0, 1 1537*c0909341SAndroid Build Coastguard WorkerAVX_INSTR addps, sse, 1, 0, 1 1538*c0909341SAndroid Build Coastguard WorkerAVX_INSTR addsd, sse2, 1, 0, 0 1539*c0909341SAndroid Build Coastguard WorkerAVX_INSTR addss, sse, 1, 0, 0 1540*c0909341SAndroid Build Coastguard WorkerAVX_INSTR addsubpd, sse3, 1, 0, 0 1541*c0909341SAndroid Build Coastguard WorkerAVX_INSTR addsubps, sse3, 1, 0, 0 1542*c0909341SAndroid Build Coastguard WorkerAVX_INSTR aesdec, aesni, 0, 0, 0 1543*c0909341SAndroid Build Coastguard WorkerAVX_INSTR aesdeclast, aesni, 0, 0, 0 1544*c0909341SAndroid Build Coastguard WorkerAVX_INSTR aesenc, aesni, 0, 0, 0 1545*c0909341SAndroid Build Coastguard WorkerAVX_INSTR aesenclast, aesni, 0, 0, 0 1546*c0909341SAndroid Build Coastguard WorkerAVX_INSTR aesimc, aesni 1547*c0909341SAndroid Build Coastguard WorkerAVX_INSTR aeskeygenassist, aesni 1548*c0909341SAndroid Build Coastguard WorkerAVX_INSTR andnpd, sse2, 1, 0, 0 1549*c0909341SAndroid Build Coastguard WorkerAVX_INSTR andnps, sse, 1, 0, 0 1550*c0909341SAndroid Build Coastguard WorkerAVX_INSTR andpd, sse2, 1, 0, 1 1551*c0909341SAndroid Build Coastguard WorkerAVX_INSTR andps, sse, 1, 0, 1 1552*c0909341SAndroid Build Coastguard WorkerAVX_INSTR blendpd, sse4, 1, 1, 0 1553*c0909341SAndroid Build Coastguard WorkerAVX_INSTR blendps, sse4, 1, 1, 0 1554*c0909341SAndroid Build Coastguard WorkerAVX_INSTR blendvpd, sse4, 1, 1, 0 ; last operand must be xmm0 with legacy encoding 1555*c0909341SAndroid Build Coastguard WorkerAVX_INSTR blendvps, sse4, 1, 1, 0 ; last operand must be xmm0 with legacy encoding 1556*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpeqpd, sse2, 1, 0, 1 1557*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpeqps, sse, 1, 0, 1 1558*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpeqsd, sse2, 1, 0, 0 1559*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpeqss, sse, 1, 0, 0 1560*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmplepd, sse2, 1, 0, 0 1561*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpleps, sse, 1, 0, 0 1562*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmplesd, sse2, 1, 0, 0 1563*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpless, sse, 1, 0, 0 1564*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpltpd, sse2, 1, 0, 0 1565*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpltps, sse, 1, 0, 0 1566*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpltsd, sse2, 1, 0, 0 1567*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpltss, sse, 1, 0, 0 1568*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpneqpd, sse2, 1, 0, 1 1569*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpneqps, sse, 1, 0, 1 1570*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpneqsd, sse2, 1, 0, 0 1571*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpneqss, sse, 1, 0, 0 1572*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpnlepd, sse2, 1, 0, 0 1573*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpnleps, sse, 1, 0, 0 1574*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpnlesd, sse2, 1, 0, 0 1575*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpnless, sse, 1, 0, 0 1576*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpnltpd, sse2, 1, 0, 0 1577*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpnltps, sse, 1, 0, 0 1578*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpnltsd, sse2, 1, 0, 0 1579*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpnltss, sse, 1, 0, 0 1580*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpordpd, sse2 1, 0, 1 1581*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpordps, sse 1, 0, 1 1582*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpordsd, sse2 1, 0, 0 1583*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpordss, sse 1, 0, 0 1584*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmppd, sse2, 1, 1, 0 1585*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpps, sse, 1, 1, 0 1586*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpsd, sse2, 1, 1, 0 1587*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpss, sse, 1, 1, 0 1588*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpunordpd, sse2, 1, 0, 1 1589*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpunordps, sse, 1, 0, 1 1590*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpunordsd, sse2, 1, 0, 0 1591*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cmpunordss, sse, 1, 0, 0 1592*c0909341SAndroid Build Coastguard WorkerAVX_INSTR comisd, sse2, 1 1593*c0909341SAndroid Build Coastguard WorkerAVX_INSTR comiss, sse, 1 1594*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvtdq2pd, sse2, 1 1595*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvtdq2ps, sse2, 1 1596*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvtpd2dq, sse2, 1 1597*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvtpd2ps, sse2, 1 1598*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvtps2dq, sse2, 1 1599*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvtps2pd, sse2, 1 1600*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvtsd2si, sse2, 1 1601*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvtsd2ss, sse2, 1, 0, 0 1602*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvtsi2sd, sse2, 1, 0, 0 1603*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvtsi2ss, sse, 1, 0, 0 1604*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvtss2sd, sse2, 1, 0, 0 1605*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvtss2si, sse, 1 1606*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvttpd2dq, sse2, 1 1607*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvttps2dq, sse2, 1 1608*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvttsd2si, sse2, 1 1609*c0909341SAndroid Build Coastguard WorkerAVX_INSTR cvttss2si, sse, 1 1610*c0909341SAndroid Build Coastguard WorkerAVX_INSTR divpd, sse2, 1, 0, 0 1611*c0909341SAndroid Build Coastguard WorkerAVX_INSTR divps, sse, 1, 0, 0 1612*c0909341SAndroid Build Coastguard WorkerAVX_INSTR divsd, sse2, 1, 0, 0 1613*c0909341SAndroid Build Coastguard WorkerAVX_INSTR divss, sse, 1, 0, 0 1614*c0909341SAndroid Build Coastguard WorkerAVX_INSTR dppd, sse4, 1, 1, 0 1615*c0909341SAndroid Build Coastguard WorkerAVX_INSTR dpps, sse4, 1, 1, 0 1616*c0909341SAndroid Build Coastguard WorkerAVX_INSTR extractps, sse4, 1 1617*c0909341SAndroid Build Coastguard WorkerAVX_INSTR gf2p8affineinvqb, gfni, 0, 1, 0 1618*c0909341SAndroid Build Coastguard WorkerAVX_INSTR gf2p8affineqb, gfni, 0, 1, 0 1619*c0909341SAndroid Build Coastguard WorkerAVX_INSTR gf2p8mulb, gfni, 0, 0, 0 1620*c0909341SAndroid Build Coastguard WorkerAVX_INSTR haddpd, sse3, 1, 0, 0 1621*c0909341SAndroid Build Coastguard WorkerAVX_INSTR haddps, sse3, 1, 0, 0 1622*c0909341SAndroid Build Coastguard WorkerAVX_INSTR hsubpd, sse3, 1, 0, 0 1623*c0909341SAndroid Build Coastguard WorkerAVX_INSTR hsubps, sse3, 1, 0, 0 1624*c0909341SAndroid Build Coastguard WorkerAVX_INSTR insertps, sse4, 1, 1, 0 1625*c0909341SAndroid Build Coastguard WorkerAVX_INSTR lddqu, sse3 1626*c0909341SAndroid Build Coastguard WorkerAVX_INSTR ldmxcsr, sse, 1 1627*c0909341SAndroid Build Coastguard WorkerAVX_INSTR maskmovdqu, sse2 1628*c0909341SAndroid Build Coastguard WorkerAVX_INSTR maxpd, sse2, 1, 0, 1 1629*c0909341SAndroid Build Coastguard WorkerAVX_INSTR maxps, sse, 1, 0, 1 1630*c0909341SAndroid Build Coastguard WorkerAVX_INSTR maxsd, sse2, 1, 0, 0 1631*c0909341SAndroid Build Coastguard WorkerAVX_INSTR maxss, sse, 1, 0, 0 1632*c0909341SAndroid Build Coastguard WorkerAVX_INSTR minpd, sse2, 1, 0, 1 1633*c0909341SAndroid Build Coastguard WorkerAVX_INSTR minps, sse, 1, 0, 1 1634*c0909341SAndroid Build Coastguard WorkerAVX_INSTR minsd, sse2, 1, 0, 0 1635*c0909341SAndroid Build Coastguard WorkerAVX_INSTR minss, sse, 1, 0, 0 1636*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movapd, sse2, 1 1637*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movaps, sse, 1 1638*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movd, mmx 1639*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movddup, sse3, 1 1640*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movdqa, sse2 1641*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movdqu, sse2 1642*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movhlps, sse, 1, 0, 0 1643*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movhpd, sse2, 1, 0, 0 1644*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movhps, sse, 1, 0, 0 1645*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movlhps, sse, 1, 0, 0 1646*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movlpd, sse2, 1, 0, 0 1647*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movlps, sse, 1, 0, 0 1648*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movmskpd, sse2, 1 1649*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movmskps, sse, 1 1650*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movntdq, sse2 1651*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movntdqa, sse4 1652*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movntpd, sse2, 1 1653*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movntps, sse, 1 1654*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movq, mmx 1655*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movsd, sse2, 1, 0, 0 1656*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movshdup, sse3, 1 1657*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movsldup, sse3, 1 1658*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movss, sse, 1, 0, 0 1659*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movupd, sse2, 1 1660*c0909341SAndroid Build Coastguard WorkerAVX_INSTR movups, sse, 1 1661*c0909341SAndroid Build Coastguard WorkerAVX_INSTR mpsadbw, sse4, 0, 1, 0 1662*c0909341SAndroid Build Coastguard WorkerAVX_INSTR mulpd, sse2, 1, 0, 1 1663*c0909341SAndroid Build Coastguard WorkerAVX_INSTR mulps, sse, 1, 0, 1 1664*c0909341SAndroid Build Coastguard WorkerAVX_INSTR mulsd, sse2, 1, 0, 0 1665*c0909341SAndroid Build Coastguard WorkerAVX_INSTR mulss, sse, 1, 0, 0 1666*c0909341SAndroid Build Coastguard WorkerAVX_INSTR orpd, sse2, 1, 0, 1 1667*c0909341SAndroid Build Coastguard WorkerAVX_INSTR orps, sse, 1, 0, 1 1668*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pabsb, ssse3 1669*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pabsd, ssse3 1670*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pabsw, ssse3 1671*c0909341SAndroid Build Coastguard WorkerAVX_INSTR packssdw, mmx, 0, 0, 0 1672*c0909341SAndroid Build Coastguard WorkerAVX_INSTR packsswb, mmx, 0, 0, 0 1673*c0909341SAndroid Build Coastguard WorkerAVX_INSTR packusdw, sse4, 0, 0, 0 1674*c0909341SAndroid Build Coastguard WorkerAVX_INSTR packuswb, mmx, 0, 0, 0 1675*c0909341SAndroid Build Coastguard WorkerAVX_INSTR paddb, mmx, 0, 0, 1 1676*c0909341SAndroid Build Coastguard WorkerAVX_INSTR paddd, mmx, 0, 0, 1 1677*c0909341SAndroid Build Coastguard WorkerAVX_INSTR paddq, sse2, 0, 0, 1 1678*c0909341SAndroid Build Coastguard WorkerAVX_INSTR paddsb, mmx, 0, 0, 1 1679*c0909341SAndroid Build Coastguard WorkerAVX_INSTR paddsw, mmx, 0, 0, 1 1680*c0909341SAndroid Build Coastguard WorkerAVX_INSTR paddusb, mmx, 0, 0, 1 1681*c0909341SAndroid Build Coastguard WorkerAVX_INSTR paddusw, mmx, 0, 0, 1 1682*c0909341SAndroid Build Coastguard WorkerAVX_INSTR paddw, mmx, 0, 0, 1 1683*c0909341SAndroid Build Coastguard WorkerAVX_INSTR palignr, ssse3, 0, 1, 0 1684*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pand, mmx, 0, 0, 1 1685*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pandn, mmx, 0, 0, 0 1686*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pavgb, mmx2, 0, 0, 1 1687*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pavgw, mmx2, 0, 0, 1 1688*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pblendvb, sse4, 0, 1, 0 ; last operand must be xmm0 with legacy encoding 1689*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pblendw, sse4, 0, 1, 0 1690*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pclmulhqhqdq, clmul, 0, 0, 0 1691*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pclmulhqlqdq, clmul, 0, 0, 0 1692*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pclmullqhqdq, clmul, 0, 0, 0 1693*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pclmullqlqdq, clmul, 0, 0, 0 1694*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pclmulqdq, clmul, 0, 1, 0 1695*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pcmpeqb, mmx, 0, 0, 1 1696*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pcmpeqd, mmx, 0, 0, 1 1697*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pcmpeqq, sse4, 0, 0, 1 1698*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pcmpeqw, mmx, 0, 0, 1 1699*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pcmpestri, sse42 1700*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pcmpestrm, sse42 1701*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pcmpgtb, mmx, 0, 0, 0 1702*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pcmpgtd, mmx, 0, 0, 0 1703*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pcmpgtq, sse42, 0, 0, 0 1704*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pcmpgtw, mmx, 0, 0, 0 1705*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pcmpistri, sse42 1706*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pcmpistrm, sse42 1707*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pextrb, sse4 1708*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pextrd, sse4 1709*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pextrq, sse4 1710*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pextrw, mmx2 1711*c0909341SAndroid Build Coastguard WorkerAVX_INSTR phaddd, ssse3, 0, 0, 0 1712*c0909341SAndroid Build Coastguard WorkerAVX_INSTR phaddsw, ssse3, 0, 0, 0 1713*c0909341SAndroid Build Coastguard WorkerAVX_INSTR phaddw, ssse3, 0, 0, 0 1714*c0909341SAndroid Build Coastguard WorkerAVX_INSTR phminposuw, sse4 1715*c0909341SAndroid Build Coastguard WorkerAVX_INSTR phsubd, ssse3, 0, 0, 0 1716*c0909341SAndroid Build Coastguard WorkerAVX_INSTR phsubsw, ssse3, 0, 0, 0 1717*c0909341SAndroid Build Coastguard WorkerAVX_INSTR phsubw, ssse3, 0, 0, 0 1718*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pinsrb, sse4, 0, 1, 0 1719*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pinsrd, sse4, 0, 1, 0 1720*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pinsrq, sse4, 0, 1, 0 1721*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pinsrw, mmx2, 0, 1, 0 1722*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmaddubsw, ssse3, 0, 0, 0 1723*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmaddwd, mmx, 0, 0, 1 1724*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmaxsb, sse4, 0, 0, 1 1725*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmaxsd, sse4, 0, 0, 1 1726*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmaxsw, mmx2, 0, 0, 1 1727*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmaxub, mmx2, 0, 0, 1 1728*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmaxud, sse4, 0, 0, 1 1729*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmaxuw, sse4, 0, 0, 1 1730*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pminsb, sse4, 0, 0, 1 1731*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pminsd, sse4, 0, 0, 1 1732*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pminsw, mmx2, 0, 0, 1 1733*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pminub, mmx2, 0, 0, 1 1734*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pminud, sse4, 0, 0, 1 1735*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pminuw, sse4, 0, 0, 1 1736*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmovmskb, mmx2 1737*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmovsxbd, sse4 1738*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmovsxbq, sse4 1739*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmovsxbw, sse4 1740*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmovsxdq, sse4 1741*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmovsxwd, sse4 1742*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmovsxwq, sse4 1743*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmovzxbd, sse4 1744*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmovzxbq, sse4 1745*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmovzxbw, sse4 1746*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmovzxdq, sse4 1747*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmovzxwd, sse4 1748*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmovzxwq, sse4 1749*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmuldq, sse4, 0, 0, 1 1750*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmulhrsw, ssse3, 0, 0, 1 1751*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmulhuw, mmx2, 0, 0, 1 1752*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmulhw, mmx, 0, 0, 1 1753*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmulld, sse4, 0, 0, 1 1754*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmullw, mmx, 0, 0, 1 1755*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pmuludq, sse2, 0, 0, 1 1756*c0909341SAndroid Build Coastguard WorkerAVX_INSTR por, mmx, 0, 0, 1 1757*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psadbw, mmx2, 0, 0, 1 1758*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pshufb, ssse3, 0, 0, 0 1759*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pshufd, sse2 1760*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pshufhw, sse2 1761*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pshuflw, sse2 1762*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psignb, ssse3, 0, 0, 0 1763*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psignd, ssse3, 0, 0, 0 1764*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psignw, ssse3, 0, 0, 0 1765*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pslld, mmx, 0, 0, 0 1766*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pslldq, sse2, 0, 0, 0 1767*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psllq, mmx, 0, 0, 0 1768*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psllw, mmx, 0, 0, 0 1769*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psrad, mmx, 0, 0, 0 1770*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psraw, mmx, 0, 0, 0 1771*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psrld, mmx, 0, 0, 0 1772*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psrldq, sse2, 0, 0, 0 1773*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psrlq, mmx, 0, 0, 0 1774*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psrlw, mmx, 0, 0, 0 1775*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psubb, mmx, 0, 0, 0 1776*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psubd, mmx, 0, 0, 0 1777*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psubq, sse2, 0, 0, 0 1778*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psubsb, mmx, 0, 0, 0 1779*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psubsw, mmx, 0, 0, 0 1780*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psubusb, mmx, 0, 0, 0 1781*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psubusw, mmx, 0, 0, 0 1782*c0909341SAndroid Build Coastguard WorkerAVX_INSTR psubw, mmx, 0, 0, 0 1783*c0909341SAndroid Build Coastguard WorkerAVX_INSTR ptest, sse4 1784*c0909341SAndroid Build Coastguard WorkerAVX_INSTR punpckhbw, mmx, 0, 0, 0 1785*c0909341SAndroid Build Coastguard WorkerAVX_INSTR punpckhdq, mmx, 0, 0, 0 1786*c0909341SAndroid Build Coastguard WorkerAVX_INSTR punpckhqdq, sse2, 0, 0, 0 1787*c0909341SAndroid Build Coastguard WorkerAVX_INSTR punpckhwd, mmx, 0, 0, 0 1788*c0909341SAndroid Build Coastguard WorkerAVX_INSTR punpcklbw, mmx, 0, 0, 0 1789*c0909341SAndroid Build Coastguard WorkerAVX_INSTR punpckldq, mmx, 0, 0, 0 1790*c0909341SAndroid Build Coastguard WorkerAVX_INSTR punpcklqdq, sse2, 0, 0, 0 1791*c0909341SAndroid Build Coastguard WorkerAVX_INSTR punpcklwd, mmx, 0, 0, 0 1792*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pxor, mmx, 0, 0, 1 1793*c0909341SAndroid Build Coastguard WorkerAVX_INSTR rcpps, sse, 1 1794*c0909341SAndroid Build Coastguard WorkerAVX_INSTR rcpss, sse, 1, 0, 0 1795*c0909341SAndroid Build Coastguard WorkerAVX_INSTR roundpd, sse4, 1 1796*c0909341SAndroid Build Coastguard WorkerAVX_INSTR roundps, sse4, 1 1797*c0909341SAndroid Build Coastguard WorkerAVX_INSTR roundsd, sse4, 1, 1, 0 1798*c0909341SAndroid Build Coastguard WorkerAVX_INSTR roundss, sse4, 1, 1, 0 1799*c0909341SAndroid Build Coastguard WorkerAVX_INSTR rsqrtps, sse, 1 1800*c0909341SAndroid Build Coastguard WorkerAVX_INSTR rsqrtss, sse, 1, 0, 0 1801*c0909341SAndroid Build Coastguard WorkerAVX_INSTR shufpd, sse2, 1, 1, 0 1802*c0909341SAndroid Build Coastguard WorkerAVX_INSTR shufps, sse, 1, 1, 0 1803*c0909341SAndroid Build Coastguard WorkerAVX_INSTR sqrtpd, sse2, 1 1804*c0909341SAndroid Build Coastguard WorkerAVX_INSTR sqrtps, sse, 1 1805*c0909341SAndroid Build Coastguard WorkerAVX_INSTR sqrtsd, sse2, 1, 0, 0 1806*c0909341SAndroid Build Coastguard WorkerAVX_INSTR sqrtss, sse, 1, 0, 0 1807*c0909341SAndroid Build Coastguard WorkerAVX_INSTR stmxcsr, sse, 1 1808*c0909341SAndroid Build Coastguard WorkerAVX_INSTR subpd, sse2, 1, 0, 0 1809*c0909341SAndroid Build Coastguard WorkerAVX_INSTR subps, sse, 1, 0, 0 1810*c0909341SAndroid Build Coastguard WorkerAVX_INSTR subsd, sse2, 1, 0, 0 1811*c0909341SAndroid Build Coastguard WorkerAVX_INSTR subss, sse, 1, 0, 0 1812*c0909341SAndroid Build Coastguard WorkerAVX_INSTR ucomisd, sse2, 1 1813*c0909341SAndroid Build Coastguard WorkerAVX_INSTR ucomiss, sse, 1 1814*c0909341SAndroid Build Coastguard WorkerAVX_INSTR unpckhpd, sse2, 1, 0, 0 1815*c0909341SAndroid Build Coastguard WorkerAVX_INSTR unpckhps, sse, 1, 0, 0 1816*c0909341SAndroid Build Coastguard WorkerAVX_INSTR unpcklpd, sse2, 1, 0, 0 1817*c0909341SAndroid Build Coastguard WorkerAVX_INSTR unpcklps, sse, 1, 0, 0 1818*c0909341SAndroid Build Coastguard WorkerAVX_INSTR xorpd, sse2, 1, 0, 1 1819*c0909341SAndroid Build Coastguard WorkerAVX_INSTR xorps, sse, 1, 0, 1 1820*c0909341SAndroid Build Coastguard Worker 1821*c0909341SAndroid Build Coastguard Worker; 3DNow instructions, for sharing code between AVX, SSE and 3DN 1822*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pfadd, 3dnow, 1, 0, 1 1823*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pfmul, 3dnow, 1, 0, 1 1824*c0909341SAndroid Build Coastguard WorkerAVX_INSTR pfsub, 3dnow, 1, 0, 0 1825*c0909341SAndroid Build Coastguard Worker 1826*c0909341SAndroid Build Coastguard Worker;%1 == instruction 1827*c0909341SAndroid Build Coastguard Worker;%2 == minimal instruction set 1828*c0909341SAndroid Build Coastguard Worker%macro GPR_INSTR 2 1829*c0909341SAndroid Build Coastguard Worker %macro %1 2-5 fnord, %1, %2 1830*c0909341SAndroid Build Coastguard Worker %ifdef cpuname 1831*c0909341SAndroid Build Coastguard Worker %if notcpuflag(%5) 1832*c0909341SAndroid Build Coastguard Worker %error use of ``%4'' %5 instruction in cpuname function: current_function 1833*c0909341SAndroid Build Coastguard Worker %endif 1834*c0909341SAndroid Build Coastguard Worker %endif 1835*c0909341SAndroid Build Coastguard Worker %ifidn %3, fnord 1836*c0909341SAndroid Build Coastguard Worker %4 %1, %2 1837*c0909341SAndroid Build Coastguard Worker %else 1838*c0909341SAndroid Build Coastguard Worker %4 %1, %2, %3 1839*c0909341SAndroid Build Coastguard Worker %endif 1840*c0909341SAndroid Build Coastguard Worker %endmacro 1841*c0909341SAndroid Build Coastguard Worker%endmacro 1842*c0909341SAndroid Build Coastguard Worker 1843*c0909341SAndroid Build Coastguard WorkerGPR_INSTR andn, bmi1 1844*c0909341SAndroid Build Coastguard WorkerGPR_INSTR bextr, bmi1 1845*c0909341SAndroid Build Coastguard WorkerGPR_INSTR blsi, bmi1 1846*c0909341SAndroid Build Coastguard WorkerGPR_INSTR blsmsk, bmi1 1847*c0909341SAndroid Build Coastguard WorkerGPR_INSTR blsr, bmi1 1848*c0909341SAndroid Build Coastguard WorkerGPR_INSTR bzhi, bmi2 1849*c0909341SAndroid Build Coastguard WorkerGPR_INSTR crc32, sse42 1850*c0909341SAndroid Build Coastguard WorkerGPR_INSTR mulx, bmi2 1851*c0909341SAndroid Build Coastguard WorkerGPR_INSTR pdep, bmi2 1852*c0909341SAndroid Build Coastguard WorkerGPR_INSTR pext, bmi2 1853*c0909341SAndroid Build Coastguard WorkerGPR_INSTR popcnt, sse42 1854*c0909341SAndroid Build Coastguard WorkerGPR_INSTR rorx, bmi2 1855*c0909341SAndroid Build Coastguard WorkerGPR_INSTR sarx, bmi2 1856*c0909341SAndroid Build Coastguard WorkerGPR_INSTR shlx, bmi2 1857*c0909341SAndroid Build Coastguard WorkerGPR_INSTR shrx, bmi2 1858*c0909341SAndroid Build Coastguard Worker 1859*c0909341SAndroid Build Coastguard Worker; base-4 constants for shuffles 1860*c0909341SAndroid Build Coastguard Worker%assign i 0 1861*c0909341SAndroid Build Coastguard Worker%rep 256 1862*c0909341SAndroid Build Coastguard Worker %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3) 1863*c0909341SAndroid Build Coastguard Worker %if j < 10 1864*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE q000, j, i 1865*c0909341SAndroid Build Coastguard Worker %elif j < 100 1866*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE q00, j, i 1867*c0909341SAndroid Build Coastguard Worker %elif j < 1000 1868*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE q0, j, i 1869*c0909341SAndroid Build Coastguard Worker %else 1870*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE q, j, i 1871*c0909341SAndroid Build Coastguard Worker %endif 1872*c0909341SAndroid Build Coastguard Worker %assign i i+1 1873*c0909341SAndroid Build Coastguard Worker%endrep 1874*c0909341SAndroid Build Coastguard Worker%undef i 1875*c0909341SAndroid Build Coastguard Worker%undef j 1876*c0909341SAndroid Build Coastguard Worker 1877*c0909341SAndroid Build Coastguard Worker%macro FMA_INSTR 3 1878*c0909341SAndroid Build Coastguard Worker %macro %1 4-7 %1, %2, %3 1879*c0909341SAndroid Build Coastguard Worker %if cpuflag(xop) 1880*c0909341SAndroid Build Coastguard Worker v%5 %1, %2, %3, %4 1881*c0909341SAndroid Build Coastguard Worker %elifnidn %1, %4 1882*c0909341SAndroid Build Coastguard Worker %6 %1, %2, %3 1883*c0909341SAndroid Build Coastguard Worker %7 %1, %4 1884*c0909341SAndroid Build Coastguard Worker %else 1885*c0909341SAndroid Build Coastguard Worker %error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported 1886*c0909341SAndroid Build Coastguard Worker %endif 1887*c0909341SAndroid Build Coastguard Worker %endmacro 1888*c0909341SAndroid Build Coastguard Worker%endmacro 1889*c0909341SAndroid Build Coastguard Worker 1890*c0909341SAndroid Build Coastguard WorkerFMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation 1891*c0909341SAndroid Build Coastguard WorkerFMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation 1892*c0909341SAndroid Build Coastguard WorkerFMA_INSTR pmacsww, pmullw, paddw 1893*c0909341SAndroid Build Coastguard WorkerFMA_INSTR pmadcswd, pmaddwd, paddd 1894*c0909341SAndroid Build Coastguard Worker 1895*c0909341SAndroid Build Coastguard Worker; Macros for consolidating FMA3 and FMA4 using 4-operand (dst, src1, src2, src3) syntax. 1896*c0909341SAndroid Build Coastguard Worker; FMA3 is only possible if dst is the same as one of the src registers. 1897*c0909341SAndroid Build Coastguard Worker; Either src2 or src3 can be a memory operand. 1898*c0909341SAndroid Build Coastguard Worker%macro FMA4_INSTR 2-* 1899*c0909341SAndroid Build Coastguard Worker %push fma4_instr 1900*c0909341SAndroid Build Coastguard Worker %xdefine %$prefix %1 1901*c0909341SAndroid Build Coastguard Worker %rep %0 - 1 1902*c0909341SAndroid Build Coastguard Worker %macro %$prefix%2 4-6 %$prefix, %2 1903*c0909341SAndroid Build Coastguard Worker %if notcpuflag(fma3) && notcpuflag(fma4) 1904*c0909341SAndroid Build Coastguard Worker %error use of ``%5%6'' fma instruction in cpuname function: current_function 1905*c0909341SAndroid Build Coastguard Worker %elif cpuflag(fma4) 1906*c0909341SAndroid Build Coastguard Worker v%5%6 %1, %2, %3, %4 1907*c0909341SAndroid Build Coastguard Worker %elifidn %1, %2 1908*c0909341SAndroid Build Coastguard Worker ; If %3 or %4 is a memory operand it needs to be encoded as the last operand. 1909*c0909341SAndroid Build Coastguard Worker %ifnum sizeof%3 1910*c0909341SAndroid Build Coastguard Worker v%{5}213%6 %2, %3, %4 1911*c0909341SAndroid Build Coastguard Worker %else 1912*c0909341SAndroid Build Coastguard Worker v%{5}132%6 %2, %4, %3 1913*c0909341SAndroid Build Coastguard Worker %endif 1914*c0909341SAndroid Build Coastguard Worker %elifidn %1, %3 1915*c0909341SAndroid Build Coastguard Worker v%{5}213%6 %3, %2, %4 1916*c0909341SAndroid Build Coastguard Worker %elifidn %1, %4 1917*c0909341SAndroid Build Coastguard Worker v%{5}231%6 %4, %2, %3 1918*c0909341SAndroid Build Coastguard Worker %else 1919*c0909341SAndroid Build Coastguard Worker %error fma3 emulation of ``%5%6 %1, %2, %3, %4'' is not supported 1920*c0909341SAndroid Build Coastguard Worker %endif 1921*c0909341SAndroid Build Coastguard Worker %endmacro 1922*c0909341SAndroid Build Coastguard Worker %rotate 1 1923*c0909341SAndroid Build Coastguard Worker %endrep 1924*c0909341SAndroid Build Coastguard Worker %pop 1925*c0909341SAndroid Build Coastguard Worker%endmacro 1926*c0909341SAndroid Build Coastguard Worker 1927*c0909341SAndroid Build Coastguard WorkerFMA4_INSTR fmadd, pd, ps, sd, ss 1928*c0909341SAndroid Build Coastguard WorkerFMA4_INSTR fmaddsub, pd, ps 1929*c0909341SAndroid Build Coastguard WorkerFMA4_INSTR fmsub, pd, ps, sd, ss 1930*c0909341SAndroid Build Coastguard WorkerFMA4_INSTR fmsubadd, pd, ps 1931*c0909341SAndroid Build Coastguard WorkerFMA4_INSTR fnmadd, pd, ps, sd, ss 1932*c0909341SAndroid Build Coastguard WorkerFMA4_INSTR fnmsub, pd, ps, sd, ss 1933*c0909341SAndroid Build Coastguard Worker 1934*c0909341SAndroid Build Coastguard Worker; Macros for converting VEX instructions to equivalent EVEX ones. 1935*c0909341SAndroid Build Coastguard Worker%macro EVEX_INSTR 2-3 0 ; vex, evex, prefer_evex 1936*c0909341SAndroid Build Coastguard Worker %macro %1 2-7 fnord, fnord, %1, %2, %3 1937*c0909341SAndroid Build Coastguard Worker %ifidn %3, fnord 1938*c0909341SAndroid Build Coastguard Worker %define %%args %1, %2 1939*c0909341SAndroid Build Coastguard Worker %elifidn %4, fnord 1940*c0909341SAndroid Build Coastguard Worker %define %%args %1, %2, %3 1941*c0909341SAndroid Build Coastguard Worker %else 1942*c0909341SAndroid Build Coastguard Worker %define %%args %1, %2, %3, %4 1943*c0909341SAndroid Build Coastguard Worker %endif 1944*c0909341SAndroid Build Coastguard Worker %assign %%evex_required cpuflag(avx512) & %7 1945*c0909341SAndroid Build Coastguard Worker %ifnum regnumof%1 1946*c0909341SAndroid Build Coastguard Worker %if regnumof%1 >= 16 || sizeof%1 > 32 1947*c0909341SAndroid Build Coastguard Worker %assign %%evex_required 1 1948*c0909341SAndroid Build Coastguard Worker %endif 1949*c0909341SAndroid Build Coastguard Worker %endif 1950*c0909341SAndroid Build Coastguard Worker %ifnum regnumof%2 1951*c0909341SAndroid Build Coastguard Worker %if regnumof%2 >= 16 || sizeof%2 > 32 1952*c0909341SAndroid Build Coastguard Worker %assign %%evex_required 1 1953*c0909341SAndroid Build Coastguard Worker %endif 1954*c0909341SAndroid Build Coastguard Worker %endif 1955*c0909341SAndroid Build Coastguard Worker %ifnum regnumof%3 1956*c0909341SAndroid Build Coastguard Worker %if regnumof%3 >= 16 || sizeof%3 > 32 1957*c0909341SAndroid Build Coastguard Worker %assign %%evex_required 1 1958*c0909341SAndroid Build Coastguard Worker %endif 1959*c0909341SAndroid Build Coastguard Worker %endif 1960*c0909341SAndroid Build Coastguard Worker %if %%evex_required 1961*c0909341SAndroid Build Coastguard Worker %6 %%args 1962*c0909341SAndroid Build Coastguard Worker %else 1963*c0909341SAndroid Build Coastguard Worker %5 %%args ; Prefer VEX over EVEX due to shorter instruction length 1964*c0909341SAndroid Build Coastguard Worker %endif 1965*c0909341SAndroid Build Coastguard Worker %endmacro 1966*c0909341SAndroid Build Coastguard Worker%endmacro 1967*c0909341SAndroid Build Coastguard Worker 1968*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vbroadcastf128, vbroadcastf32x4 1969*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vbroadcasti128, vbroadcasti32x4 1970*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vextractf128, vextractf32x4 1971*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vextracti128, vextracti32x4 1972*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vinsertf128, vinsertf32x4 1973*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vinserti128, vinserti32x4 1974*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vmovdqa, vmovdqa32 1975*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vmovdqu, vmovdqu32 1976*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vpand, vpandd 1977*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vpandn, vpandnd 1978*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vpor, vpord 1979*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vpxor, vpxord 1980*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vrcpps, vrcp14ps, 1 ; EVEX versions have higher precision 1981*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vrcpss, vrcp14ss, 1 1982*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vrsqrtps, vrsqrt14ps, 1 1983*c0909341SAndroid Build Coastguard WorkerEVEX_INSTR vrsqrtss, vrsqrt14ss, 1 1984