1*5f32b710SXin Lifrom peachpy import * 2*5f32b710SXin Lifrom peachpy.x86_64 import * 3*5f32b710SXin Li 4*5f32b710SXin Liimport fp16.avx, fp16.avx2 5*5f32b710SXin Li 6*5f32b710SXin Li 7*5f32b710SXin Liarg_fp16 = Argument(ptr(const_uint16_t), name="fp16") 8*5f32b710SXin Liarg_fp32 = Argument(ptr(uint32_t), name="fp32") 9*5f32b710SXin Li 10*5f32b710SXin Liwith Function("fp16_alt_xmm_to_fp32_ymm_peachpy__avx2", (arg_fp16, arg_fp32), target=uarch.default + isa.avx2): 11*5f32b710SXin Li 12*5f32b710SXin Li reg_fp16 = GeneralPurposeRegister64() 13*5f32b710SXin Li LOAD.ARGUMENT(reg_fp16, arg_fp16) 14*5f32b710SXin Li 15*5f32b710SXin Li reg_fp32 = GeneralPurposeRegister64() 16*5f32b710SXin Li LOAD.ARGUMENT(reg_fp32, arg_fp32) 17*5f32b710SXin Li 18*5f32b710SXin Li xmm_fp16 = XMMRegister() 19*5f32b710SXin Li VMOVUPS(xmm_fp16, [reg_fp16]) 20*5f32b710SXin Li ymm_fp32 = fp16.avx2.fp16_alt_xmm_to_fp32_ymm(xmm_fp16) 21*5f32b710SXin Li VMOVUPS([reg_fp32], ymm_fp32) 22*5f32b710SXin Li 23*5f32b710SXin Li RETURN() 24*5f32b710SXin Li 25*5f32b710SXin Liwith Function("fp16_alt_xmm_to_fp32_xmm_peachpy__avx", (arg_fp16, arg_fp32), target=uarch.default + isa.avx): 26*5f32b710SXin Li 27*5f32b710SXin Li reg_fp16 = GeneralPurposeRegister64() 28*5f32b710SXin Li LOAD.ARGUMENT(reg_fp16, arg_fp16) 29*5f32b710SXin Li 30*5f32b710SXin Li reg_fp32 = GeneralPurposeRegister64() 31*5f32b710SXin Li LOAD.ARGUMENT(reg_fp32, arg_fp32) 32*5f32b710SXin Li 33*5f32b710SXin Li xmm_fp16 = XMMRegister() 34*5f32b710SXin Li VMOVUPS(xmm_fp16, [reg_fp16]) 35*5f32b710SXin Li xmm_fp32 = fp16.avx.fp16_alt_xmm_to_fp32_xmm(xmm_fp16) 36*5f32b710SXin Li VMOVUPS([reg_fp32], xmm_fp32) 37*5f32b710SXin Li 38*5f32b710SXin Li RETURN() 39