xref: /aosp_15_r20/external/FP16/test/peachpy/stubs.py (revision 5f32b7105932ea8520a0e8811c640f936367d707)
1*5f32b710SXin Lifrom peachpy import *
2*5f32b710SXin Lifrom peachpy.x86_64 import *
3*5f32b710SXin Li
4*5f32b710SXin Liimport fp16.avx, fp16.avx2
5*5f32b710SXin Li
6*5f32b710SXin Li
7*5f32b710SXin Liarg_fp16 = Argument(ptr(const_uint16_t), name="fp16")
8*5f32b710SXin Liarg_fp32 = Argument(ptr(uint32_t), name="fp32")
9*5f32b710SXin Li
10*5f32b710SXin Liwith Function("fp16_alt_xmm_to_fp32_ymm_peachpy__avx2", (arg_fp16, arg_fp32), target=uarch.default + isa.avx2):
11*5f32b710SXin Li
12*5f32b710SXin Li    reg_fp16 = GeneralPurposeRegister64()
13*5f32b710SXin Li    LOAD.ARGUMENT(reg_fp16, arg_fp16)
14*5f32b710SXin Li
15*5f32b710SXin Li    reg_fp32 = GeneralPurposeRegister64()
16*5f32b710SXin Li    LOAD.ARGUMENT(reg_fp32, arg_fp32)
17*5f32b710SXin Li
18*5f32b710SXin Li    xmm_fp16 = XMMRegister()
19*5f32b710SXin Li    VMOVUPS(xmm_fp16, [reg_fp16])
20*5f32b710SXin Li    ymm_fp32 = fp16.avx2.fp16_alt_xmm_to_fp32_ymm(xmm_fp16)
21*5f32b710SXin Li    VMOVUPS([reg_fp32], ymm_fp32)
22*5f32b710SXin Li
23*5f32b710SXin Li    RETURN()
24*5f32b710SXin Li
25*5f32b710SXin Liwith Function("fp16_alt_xmm_to_fp32_xmm_peachpy__avx", (arg_fp16, arg_fp32), target=uarch.default + isa.avx):
26*5f32b710SXin Li
27*5f32b710SXin Li    reg_fp16 = GeneralPurposeRegister64()
28*5f32b710SXin Li    LOAD.ARGUMENT(reg_fp16, arg_fp16)
29*5f32b710SXin Li
30*5f32b710SXin Li    reg_fp32 = GeneralPurposeRegister64()
31*5f32b710SXin Li    LOAD.ARGUMENT(reg_fp32, arg_fp32)
32*5f32b710SXin Li
33*5f32b710SXin Li    xmm_fp16 = XMMRegister()
34*5f32b710SXin Li    VMOVUPS(xmm_fp16, [reg_fp16])
35*5f32b710SXin Li    xmm_fp32 = fp16.avx.fp16_alt_xmm_to_fp32_xmm(xmm_fp16)
36*5f32b710SXin Li    VMOVUPS([reg_fp32], xmm_fp32)
37*5f32b710SXin Li
38*5f32b710SXin Li    RETURN()
39