xref: /aosp_15_r20/external/ComputeLibrary/examples/neon_sgemm.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2018-2019 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/Types.h"
25 #include "arm_compute/runtime/NEON/NEFunctions.h"
26 #include "arm_compute/runtime/NEON/NEScheduler.h"
27 #include "utils/Utils.h"
28 
29 #include <cstdlib>
30 
31 using namespace arm_compute;
32 using namespace utils;
33 
34 class NESGEMMExample : public Example
35 {
36 public:
do_setup(int argc,char ** argv)37     bool do_setup(int argc, char **argv) override
38     {
39         NPYLoader npy0;
40         NPYLoader npy1;
41         NPYLoader npy2;
42         alpha = 1.0f;
43         beta  = 0.0f;
44 
45         std::ifstream stream;
46         if(argc > 1)
47         {
48             stream.open(argv[1], std::fstream::in);
49         }
50 
51         if(argc < 3 || (argc < 4 && stream.bad()))
52         {
53             // Print help
54             std::cout << "Usage: 1) ./build/neon_sgemm input_matrix_1.npy input_matrix_2.npy [input_matrix_3.npy] [alpha = 1] [beta = 0]\n";
55             std::cout << "       2) ./build/neon_sgemm M N K [alpha = 1.0f] [beta = 0.0f]\n\n";
56             std::cout << "Too few or no input_matrices provided. Using M=7, N=3, K=5, alpha=1.0f and beta=0.0f\n\n";
57 
58             src0.allocator()->init(TensorInfo(TensorShape(5U, 7U), 1, DataType::F32));
59             src1.allocator()->init(TensorInfo(TensorShape(3U, 5U), 1, DataType::F32));
60             src2.allocator()->init(TensorInfo(TensorShape(3U, 7U), 1, DataType::F32));
61         }
62         else
63         {
64             if(stream.good()) /* case file1.npy file2.npy [file3.npy] [alpha = 1.0f] [beta = 0.0f] */
65             {
66                 npy0.open(argv[1]);
67                 npy0.init_tensor(src0, DataType::F32);
68                 npy1.open(argv[2]);
69                 npy1.init_tensor(src1, DataType::F32);
70 
71                 if(argc > 3)
72                 {
73                     stream.close();
74                     stream.clear();
75                     stream.open(argv[3], std::fstream::in);
76                     if(stream.good()) /* case with third file */
77                     {
78                         npy2.open(argv[3]);
79                         npy2.init_tensor(src2, DataType::F32);
80 
81                         if(argc > 4)
82                         {
83                             // Convert string to float
84                             alpha = strtof(argv[4], nullptr);
85 
86                             if(argc > 5)
87                             {
88                                 // Convert string to float
89                                 beta = strtof(argv[5], nullptr);
90                             }
91                         }
92                     }
93                     else /* case without third file */
94                     {
95                         alpha = strtof(argv[3], nullptr);
96 
97                         if(argc > 4)
98                         {
99                             beta = strtof(argv[4], nullptr);
100                         }
101                     }
102                 }
103             }
104             else /* case M N K [alpha = 1.0f] [beta = 0.0f] */
105             {
106                 size_t M = strtol(argv[1], nullptr, 10);
107                 size_t N = strtol(argv[2], nullptr, 10);
108                 size_t K = strtol(argv[3], nullptr, 10);
109 
110                 src0.allocator()->init(TensorInfo(TensorShape(K, M), 1, DataType::F32));
111                 src1.allocator()->init(TensorInfo(TensorShape(N, K), 1, DataType::F32));
112                 src2.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32));
113 
114                 if(argc > 4)
115                 {
116                     alpha = strtof(argv[4], nullptr);
117 
118                     if(argc > 5)
119                     {
120                         beta = strtof(argv[5], nullptr);
121                     }
122                 }
123             }
124         }
125 
126         init_sgemm_output(dst, src0, src1, DataType::F32);
127 
128         // Configure function
129         sgemm.configure(&src0, &src1, nullptr, &dst, alpha, beta);
130 
131         // Allocate all the images
132         src0.allocator()->allocate();
133         src1.allocator()->allocate();
134         dst.allocator()->allocate();
135 
136         // Fill the input images with either the data provided or random data
137         if(npy0.is_open())
138         {
139             npy0.fill_tensor(src0);
140             npy1.fill_tensor(src1);
141 
142             output_filename = "sgemm_out.npy";
143             is_fortran      = npy0.is_fortran();
144 
145             if(npy2.is_open())
146             {
147                 src2.allocator()->allocate();
148                 npy2.fill_tensor(src2);
149             }
150         }
151         else
152         {
153             src2.allocator()->allocate();
154 
155             fill_random_tensor(src0, -1.f, 1.f);
156             fill_random_tensor(src1, -1.f, 1.f);
157             fill_random_tensor(src2, -1.f, 1.f);
158         }
159 
160         // Dummy run for CLTuner
161         sgemm.run();
162 
163         return true;
164     }
do_run()165     void do_run() override
166     {
167         // Execute the function
168         sgemm.run();
169     }
do_teardown()170     void do_teardown() override
171     {
172         if(!output_filename.empty()) /* Save to .npy file */
173         {
174             save_to_npy(dst, output_filename, is_fortran);
175         }
176     }
177 
178 private:
179     Tensor      src0{}, src1{}, src2{}, dst{};
180     NEGEMM      sgemm{};
181     float       alpha{}, beta{};
182     bool        is_fortran{};
183     std::string output_filename{};
184 };
185 
186 /** Main program for sgemm test
187  *
188  * @param[in] argc Number of arguments
189  * @param[in] argv Arguments ( [optional] Matrix A, [optional] Matrix B, [optional] Matrix C, [optional] alpha, [optional] beta )
190  */
main(int argc,char ** argv)191 int main(int argc, char **argv)
192 {
193     return utils::run_example<NESGEMMExample>(argc, argv);
194 }
195