1*5f32b710SXin Li #include <gtest/gtest.h>
2*5f32b710SXin Li
3*5f32b710SXin Li #include <cstdint>
4*5f32b710SXin Li
5*5f32b710SXin Li #include <fp16.h>
6*5f32b710SXin Li
7*5f32b710SXin Li
8*5f32b710SXin Li extern "C" void fp16_alt_xmm_to_fp32_ymm_peachpy__avx2(const uint16_t* fp16, uint32_t* fp32);
9*5f32b710SXin Li const size_t vector_elements = 8;
10*5f32b710SXin Li
11*5f32b710SXin Li
TEST(FP16_ALT_XMM_TO_FP32_YMM,positive_normalized_values)12*5f32b710SXin Li TEST(FP16_ALT_XMM_TO_FP32_YMM, positive_normalized_values) {
13*5f32b710SXin Li const uint32_t exponentBias = 15;
14*5f32b710SXin Li for (int32_t e = -14; e <= 16; e++) {
15*5f32b710SXin Li for (uint16_t h = 0; h < 0x0400; h += vector_elements) {
16*5f32b710SXin Li uint16_t fp16[vector_elements];
17*5f32b710SXin Li for (size_t i = 0; i < vector_elements; i++) {
18*5f32b710SXin Li fp16[i] = h + ((e + exponentBias) << 10) + i;
19*5f32b710SXin Li }
20*5f32b710SXin Li uint32_t fp32[vector_elements];
21*5f32b710SXin Li fp16_alt_xmm_to_fp32_ymm_peachpy__avx2(fp16, fp32);
22*5f32b710SXin Li
23*5f32b710SXin Li for (size_t i = 0; i < vector_elements; i++) {
24*5f32b710SXin Li EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[i]), fp32[i]) <<
25*5f32b710SXin Li std::hex << std::uppercase << std::setfill('0') <<
26*5f32b710SXin Li "F16 = 0x" << std::setw(4) << fp16[i] << ", " <<
27*5f32b710SXin Li "F32(F16) = 0x" << std::setw(8) << fp32[i] << ", " <<
28*5f32b710SXin Li "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[i]) <<
29*5f32b710SXin Li ", lane " << i << "/" << vector_elements;
30*5f32b710SXin Li }
31*5f32b710SXin Li }
32*5f32b710SXin Li }
33*5f32b710SXin Li }
34*5f32b710SXin Li
TEST(FP16_ALT_XMM_TO_FP32_YMM,negative_normalized_values)35*5f32b710SXin Li TEST(FP16_ALT_XMM_TO_FP32_YMM, negative_normalized_values) {
36*5f32b710SXin Li const uint32_t exponentBias = 15;
37*5f32b710SXin Li for (int32_t e = -14; e <= 16; e++) {
38*5f32b710SXin Li for (uint16_t h = 0; h < 0x0400; h += vector_elements) {
39*5f32b710SXin Li uint16_t fp16[vector_elements];
40*5f32b710SXin Li for (size_t i = 0; i < vector_elements; i++) {
41*5f32b710SXin Li fp16[i] = 0x8000 + h + ((e + exponentBias) << 10) + i;
42*5f32b710SXin Li }
43*5f32b710SXin Li uint32_t fp32[vector_elements];
44*5f32b710SXin Li fp16_alt_xmm_to_fp32_ymm_peachpy__avx2(fp16, fp32);
45*5f32b710SXin Li
46*5f32b710SXin Li for (size_t i = 0; i < vector_elements; i++) {
47*5f32b710SXin Li EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[i]), fp32[i]) <<
48*5f32b710SXin Li std::hex << std::uppercase << std::setfill('0') <<
49*5f32b710SXin Li "F16 = 0x" << std::setw(4) << fp16[i] << ", " <<
50*5f32b710SXin Li "F32(F16) = 0x" << std::setw(8) << fp32[i] << ", " <<
51*5f32b710SXin Li "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[i]) <<
52*5f32b710SXin Li ", lane " << i << "/" << vector_elements;
53*5f32b710SXin Li }
54*5f32b710SXin Li }
55*5f32b710SXin Li }
56*5f32b710SXin Li }
57*5f32b710SXin Li
TEST(FP16_ALT_XMM_TO_FP32_YMM,positive_denormalized_values)58*5f32b710SXin Li TEST(FP16_ALT_XMM_TO_FP32_YMM, positive_denormalized_values) {
59*5f32b710SXin Li for (uint16_t h = 0; h < 0x0400; h += vector_elements) {
60*5f32b710SXin Li uint16_t fp16[vector_elements];
61*5f32b710SXin Li for (size_t i = 0; i < vector_elements; i++) {
62*5f32b710SXin Li fp16[i] = h + i;
63*5f32b710SXin Li }
64*5f32b710SXin Li uint32_t fp32[vector_elements];
65*5f32b710SXin Li fp16_alt_xmm_to_fp32_ymm_peachpy__avx2(fp16, fp32);
66*5f32b710SXin Li
67*5f32b710SXin Li for (size_t i = 0; i < vector_elements; i++) {
68*5f32b710SXin Li EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[i]), fp32[i]) <<
69*5f32b710SXin Li std::hex << std::uppercase << std::setfill('0') <<
70*5f32b710SXin Li "F16 = 0x" << std::setw(4) << fp16[i] << ", " <<
71*5f32b710SXin Li "F32(F16) = 0x" << std::setw(8) << fp32[i] << ", " <<
72*5f32b710SXin Li "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[i]) <<
73*5f32b710SXin Li ", lane " << i << "/" << vector_elements;
74*5f32b710SXin Li }
75*5f32b710SXin Li }
76*5f32b710SXin Li }
77*5f32b710SXin Li
TEST(FP16_ALT_XMM_TO_FP32_YMM,negative_denormalized_values)78*5f32b710SXin Li TEST(FP16_ALT_XMM_TO_FP32_YMM, negative_denormalized_values) {
79*5f32b710SXin Li for (uint16_t h = 0; h < 0x0400; h += vector_elements) {
80*5f32b710SXin Li uint16_t fp16[vector_elements];
81*5f32b710SXin Li for (size_t i = 0; i < vector_elements; i++) {
82*5f32b710SXin Li fp16[i] = 0x8000 + h + i;
83*5f32b710SXin Li }
84*5f32b710SXin Li uint32_t fp32[vector_elements];
85*5f32b710SXin Li fp16_alt_xmm_to_fp32_ymm_peachpy__avx2(fp16, fp32);
86*5f32b710SXin Li
87*5f32b710SXin Li for (size_t i = 0; i < vector_elements; i++) {
88*5f32b710SXin Li EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[i]), fp32[i]) <<
89*5f32b710SXin Li std::hex << std::uppercase << std::setfill('0') <<
90*5f32b710SXin Li "F16 = 0x" << std::setw(4) << fp16[i] << ", " <<
91*5f32b710SXin Li "F32(F16) = 0x" << std::setw(8) << fp32[i] << ", " <<
92*5f32b710SXin Li "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[i]) <<
93*5f32b710SXin Li ", lane " << i << "/" << vector_elements;
94*5f32b710SXin Li }
95*5f32b710SXin Li }
96*5f32b710SXin Li }
97