xref: /aosp_15_r20/external/eigen/bench/sparse_product.cpp (revision bf2c37156dfe67e5dfebd6d394bad8b2ab5804d4)
1*bf2c3715SXin Li 
2*bf2c3715SXin Li //g++ -O3 -g0 -DNDEBUG  sparse_product.cpp -I.. -I/home/gael/Coding/LinearAlgebra/mtl4/ -DDENSITY=0.005 -DSIZE=10000 && ./a.out
3*bf2c3715SXin Li //g++ -O3 -g0 -DNDEBUG  sparse_product.cpp -I.. -I/home/gael/Coding/LinearAlgebra/mtl4/ -DDENSITY=0.05 -DSIZE=2000 && ./a.out
4*bf2c3715SXin Li // -DNOGMM -DNOMTL -DCSPARSE
5*bf2c3715SXin Li // -I /home/gael/Coding/LinearAlgebra/CSparse/Include/ /home/gael/Coding/LinearAlgebra/CSparse/Lib/libcsparse.a
6*bf2c3715SXin Li 
7*bf2c3715SXin Li #include <typeinfo>
8*bf2c3715SXin Li 
9*bf2c3715SXin Li #ifndef SIZE
10*bf2c3715SXin Li #define SIZE 1000000
11*bf2c3715SXin Li #endif
12*bf2c3715SXin Li 
13*bf2c3715SXin Li #ifndef NNZPERCOL
14*bf2c3715SXin Li #define NNZPERCOL 6
15*bf2c3715SXin Li #endif
16*bf2c3715SXin Li 
17*bf2c3715SXin Li #ifndef REPEAT
18*bf2c3715SXin Li #define REPEAT 1
19*bf2c3715SXin Li #endif
20*bf2c3715SXin Li 
21*bf2c3715SXin Li #include <algorithm>
22*bf2c3715SXin Li #include "BenchTimer.h"
23*bf2c3715SXin Li #include "BenchUtil.h"
24*bf2c3715SXin Li #include "BenchSparseUtil.h"
25*bf2c3715SXin Li 
26*bf2c3715SXin Li #ifndef NBTRIES
27*bf2c3715SXin Li #define NBTRIES 1
28*bf2c3715SXin Li #endif
29*bf2c3715SXin Li 
30*bf2c3715SXin Li #define BENCH(X) \
31*bf2c3715SXin Li   timer.reset(); \
32*bf2c3715SXin Li   for (int _j=0; _j<NBTRIES; ++_j) { \
33*bf2c3715SXin Li     timer.start(); \
34*bf2c3715SXin Li     for (int _k=0; _k<REPEAT; ++_k) { \
35*bf2c3715SXin Li         X  \
36*bf2c3715SXin Li   } timer.stop(); }
37*bf2c3715SXin Li 
38*bf2c3715SXin Li // #ifdef MKL
39*bf2c3715SXin Li //
40*bf2c3715SXin Li // #include "mkl_types.h"
41*bf2c3715SXin Li // #include "mkl_spblas.h"
42*bf2c3715SXin Li //
43*bf2c3715SXin Li // template<typename Lhs,typename Rhs,typename Res>
44*bf2c3715SXin Li // void mkl_multiply(const Lhs& lhs, const Rhs& rhs, Res& res)
45*bf2c3715SXin Li // {
46*bf2c3715SXin Li //   char n = 'N';
47*bf2c3715SXin Li //   float alpha = 1;
48*bf2c3715SXin Li //   char matdescra[6];
49*bf2c3715SXin Li //   matdescra[0] = 'G';
50*bf2c3715SXin Li //   matdescra[1] = 0;
51*bf2c3715SXin Li //   matdescra[2] = 0;
52*bf2c3715SXin Li //   matdescra[3] = 'C';
53*bf2c3715SXin Li //   mkl_scscmm(&n, lhs.rows(), rhs.cols(), lhs.cols(), &alpha, matdescra,
54*bf2c3715SXin Li //              lhs._valuePtr(), lhs._innerIndexPtr(), lhs.outerIndexPtr(),
55*bf2c3715SXin Li //              pntre, b, &ldb, &beta, c, &ldc);
56*bf2c3715SXin Li // //   mkl_somatcopy('C', 'T', lhs.rows(), lhs.cols(), 1,
57*bf2c3715SXin Li // //                 lhs._valuePtr(), lhs.rows(), DST, dst_stride);
58*bf2c3715SXin Li // }
59*bf2c3715SXin Li //
60*bf2c3715SXin Li // #endif
61*bf2c3715SXin Li 
62*bf2c3715SXin Li 
63*bf2c3715SXin Li #ifdef CSPARSE
cs_sorted_multiply(const cs * a,const cs * b)64*bf2c3715SXin Li cs* cs_sorted_multiply(const cs* a, const cs* b)
65*bf2c3715SXin Li {
66*bf2c3715SXin Li //   return cs_multiply(a,b);
67*bf2c3715SXin Li 
68*bf2c3715SXin Li   cs* A = cs_transpose(a, 1);
69*bf2c3715SXin Li   cs* B = cs_transpose(b, 1);
70*bf2c3715SXin Li   cs* D = cs_multiply(B,A);   /* D = B'*A' */
71*bf2c3715SXin Li   cs_spfree (A) ;
72*bf2c3715SXin Li   cs_spfree (B) ;
73*bf2c3715SXin Li   cs_dropzeros (D) ;      /* drop zeros from D */
74*bf2c3715SXin Li   cs* C = cs_transpose (D, 1) ;   /* C = D', so that C is sorted */
75*bf2c3715SXin Li   cs_spfree (D) ;
76*bf2c3715SXin Li   return C;
77*bf2c3715SXin Li 
78*bf2c3715SXin Li //   cs* A = cs_transpose(a, 1);
79*bf2c3715SXin Li //   cs* C = cs_transpose(A, 1);
80*bf2c3715SXin Li //   return C;
81*bf2c3715SXin Li }
82*bf2c3715SXin Li 
cs_sorted_multiply2(const cs * a,const cs * b)83*bf2c3715SXin Li cs* cs_sorted_multiply2(const cs* a, const cs* b)
84*bf2c3715SXin Li {
85*bf2c3715SXin Li   cs* D = cs_multiply(a,b);
86*bf2c3715SXin Li   cs* E = cs_transpose(D,1);
87*bf2c3715SXin Li   cs_spfree(D);
88*bf2c3715SXin Li   cs* C = cs_transpose(E,1);
89*bf2c3715SXin Li   cs_spfree(E);
90*bf2c3715SXin Li   return C;
91*bf2c3715SXin Li }
92*bf2c3715SXin Li #endif
93*bf2c3715SXin Li 
94*bf2c3715SXin Li void bench_sort();
95*bf2c3715SXin Li 
main(int argc,char * argv[])96*bf2c3715SXin Li int main(int argc, char *argv[])
97*bf2c3715SXin Li {
98*bf2c3715SXin Li //   bench_sort();
99*bf2c3715SXin Li 
100*bf2c3715SXin Li   int rows = SIZE;
101*bf2c3715SXin Li   int cols = SIZE;
102*bf2c3715SXin Li   float density = DENSITY;
103*bf2c3715SXin Li 
104*bf2c3715SXin Li   EigenSparseMatrix sm1(rows,cols), sm2(rows,cols), sm3(rows,cols), sm4(rows,cols);
105*bf2c3715SXin Li 
106*bf2c3715SXin Li   BenchTimer timer;
107*bf2c3715SXin Li   for (int nnzPerCol = NNZPERCOL; nnzPerCol>1; nnzPerCol/=1.1)
108*bf2c3715SXin Li   {
109*bf2c3715SXin Li     sm1.setZero();
110*bf2c3715SXin Li     sm2.setZero();
111*bf2c3715SXin Li     fillMatrix2(nnzPerCol, rows, cols, sm1);
112*bf2c3715SXin Li     fillMatrix2(nnzPerCol, rows, cols, sm2);
113*bf2c3715SXin Li //     std::cerr << "filling OK\n";
114*bf2c3715SXin Li 
115*bf2c3715SXin Li     // dense matrices
116*bf2c3715SXin Li     #ifdef DENSEMATRIX
117*bf2c3715SXin Li     {
118*bf2c3715SXin Li       std::cout << "Eigen Dense\t" << nnzPerCol << "%\n";
119*bf2c3715SXin Li       DenseMatrix m1(rows,cols), m2(rows,cols), m3(rows,cols);
120*bf2c3715SXin Li       eiToDense(sm1, m1);
121*bf2c3715SXin Li       eiToDense(sm2, m2);
122*bf2c3715SXin Li 
123*bf2c3715SXin Li       timer.reset();
124*bf2c3715SXin Li       timer.start();
125*bf2c3715SXin Li       for (int k=0; k<REPEAT; ++k)
126*bf2c3715SXin Li         m3 = m1 * m2;
127*bf2c3715SXin Li       timer.stop();
128*bf2c3715SXin Li       std::cout << "   a * b:\t" << timer.value() << endl;
129*bf2c3715SXin Li 
130*bf2c3715SXin Li       timer.reset();
131*bf2c3715SXin Li       timer.start();
132*bf2c3715SXin Li       for (int k=0; k<REPEAT; ++k)
133*bf2c3715SXin Li         m3 = m1.transpose() * m2;
134*bf2c3715SXin Li       timer.stop();
135*bf2c3715SXin Li       std::cout << "   a' * b:\t" << timer.value() << endl;
136*bf2c3715SXin Li 
137*bf2c3715SXin Li       timer.reset();
138*bf2c3715SXin Li       timer.start();
139*bf2c3715SXin Li       for (int k=0; k<REPEAT; ++k)
140*bf2c3715SXin Li         m3 = m1.transpose() * m2.transpose();
141*bf2c3715SXin Li       timer.stop();
142*bf2c3715SXin Li       std::cout << "   a' * b':\t" << timer.value() << endl;
143*bf2c3715SXin Li 
144*bf2c3715SXin Li       timer.reset();
145*bf2c3715SXin Li       timer.start();
146*bf2c3715SXin Li       for (int k=0; k<REPEAT; ++k)
147*bf2c3715SXin Li         m3 = m1 * m2.transpose();
148*bf2c3715SXin Li       timer.stop();
149*bf2c3715SXin Li       std::cout << "   a * b':\t" << timer.value() << endl;
150*bf2c3715SXin Li     }
151*bf2c3715SXin Li     #endif
152*bf2c3715SXin Li 
153*bf2c3715SXin Li     // eigen sparse matrices
154*bf2c3715SXin Li     {
155*bf2c3715SXin Li       std::cout << "Eigen sparse\t" << sm1.nonZeros()/(float(sm1.rows())*float(sm1.cols()))*100 << "% * "
156*bf2c3715SXin Li                 << sm2.nonZeros()/(float(sm2.rows())*float(sm2.cols()))*100 << "%\n";
157*bf2c3715SXin Li 
158*bf2c3715SXin Li       BENCH(sm3 = sm1 * sm2; )
159*bf2c3715SXin Li       std::cout << "   a * b:\t" << timer.value() << endl;
160*bf2c3715SXin Li 
161*bf2c3715SXin Li //       BENCH(sm3 = sm1.transpose() * sm2; )
162*bf2c3715SXin Li //       std::cout << "   a' * b:\t" << timer.value() << endl;
163*bf2c3715SXin Li // //
164*bf2c3715SXin Li //       BENCH(sm3 = sm1.transpose() * sm2.transpose(); )
165*bf2c3715SXin Li //       std::cout << "   a' * b':\t" << timer.value() << endl;
166*bf2c3715SXin Li // //
167*bf2c3715SXin Li //       BENCH(sm3 = sm1 * sm2.transpose(); )
168*bf2c3715SXin Li //       std::cout << "   a * b' :\t" << timer.value() << endl;
169*bf2c3715SXin Li 
170*bf2c3715SXin Li 
171*bf2c3715SXin Li //       std::cout << "\n";
172*bf2c3715SXin Li //
173*bf2c3715SXin Li //       BENCH( sm3._experimentalNewProduct(sm1, sm2); )
174*bf2c3715SXin Li //       std::cout << "   a * b:\t" << timer.value() << endl;
175*bf2c3715SXin Li //
176*bf2c3715SXin Li //       BENCH(sm3._experimentalNewProduct(sm1.transpose(),sm2); )
177*bf2c3715SXin Li //       std::cout << "   a' * b:\t" << timer.value() << endl;
178*bf2c3715SXin Li // //
179*bf2c3715SXin Li //       BENCH(sm3._experimentalNewProduct(sm1.transpose(),sm2.transpose()); )
180*bf2c3715SXin Li //       std::cout << "   a' * b':\t" << timer.value() << endl;
181*bf2c3715SXin Li // //
182*bf2c3715SXin Li //       BENCH(sm3._experimentalNewProduct(sm1, sm2.transpose());)
183*bf2c3715SXin Li //       std::cout << "   a * b' :\t" << timer.value() << endl;
184*bf2c3715SXin Li     }
185*bf2c3715SXin Li 
186*bf2c3715SXin Li     // eigen dyn-sparse matrices
187*bf2c3715SXin Li     /*{
188*bf2c3715SXin Li       DynamicSparseMatrix<Scalar> m1(sm1), m2(sm2), m3(sm3);
189*bf2c3715SXin Li       std::cout << "Eigen dyn-sparse\t" << m1.nonZeros()/(float(m1.rows())*float(m1.cols()))*100 << "% * "
190*bf2c3715SXin Li                 << m2.nonZeros()/(float(m2.rows())*float(m2.cols()))*100 << "%\n";
191*bf2c3715SXin Li 
192*bf2c3715SXin Li //       timer.reset();
193*bf2c3715SXin Li //       timer.start();
194*bf2c3715SXin Li       BENCH(for (int k=0; k<REPEAT; ++k) m3 = m1 * m2;)
195*bf2c3715SXin Li //       timer.stop();
196*bf2c3715SXin Li       std::cout << "   a * b:\t" << timer.value() << endl;
197*bf2c3715SXin Li //       std::cout << sm3 << "\n";
198*bf2c3715SXin Li 
199*bf2c3715SXin Li       timer.reset();
200*bf2c3715SXin Li       timer.start();
201*bf2c3715SXin Li //       std::cerr << "transpose...\n";
202*bf2c3715SXin Li //       EigenSparseMatrix sm4 = sm1.transpose();
203*bf2c3715SXin Li //       std::cout << sm4.nonZeros() << " == " << sm1.nonZeros() << "\n";
204*bf2c3715SXin Li //       exit(1);
205*bf2c3715SXin Li //       std::cerr << "transpose OK\n";
206*bf2c3715SXin Li //       std::cout << sm1 << "\n\n" << sm1.transpose() << "\n\n" << sm4.transpose() << "\n\n";
207*bf2c3715SXin Li       BENCH(for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2;)
208*bf2c3715SXin Li //       timer.stop();
209*bf2c3715SXin Li       std::cout << "   a' * b:\t" << timer.value() << endl;
210*bf2c3715SXin Li 
211*bf2c3715SXin Li //       timer.reset();
212*bf2c3715SXin Li //       timer.start();
213*bf2c3715SXin Li       BENCH( for (int k=0; k<REPEAT; ++k) m3 = m1.transpose() * m2.transpose(); )
214*bf2c3715SXin Li //       timer.stop();
215*bf2c3715SXin Li       std::cout << "   a' * b':\t" << timer.value() << endl;
216*bf2c3715SXin Li 
217*bf2c3715SXin Li //       timer.reset();
218*bf2c3715SXin Li //       timer.start();
219*bf2c3715SXin Li       BENCH( for (int k=0; k<REPEAT; ++k) m3 = m1 * m2.transpose(); )
220*bf2c3715SXin Li //       timer.stop();
221*bf2c3715SXin Li       std::cout << "   a * b' :\t" << timer.value() << endl;
222*bf2c3715SXin Li     }*/
223*bf2c3715SXin Li 
224*bf2c3715SXin Li     // CSparse
225*bf2c3715SXin Li     #ifdef CSPARSE
226*bf2c3715SXin Li     {
227*bf2c3715SXin Li       std::cout << "CSparse \t" << nnzPerCol << "%\n";
228*bf2c3715SXin Li       cs *m1, *m2, *m3;
229*bf2c3715SXin Li       eiToCSparse(sm1, m1);
230*bf2c3715SXin Li       eiToCSparse(sm2, m2);
231*bf2c3715SXin Li 
232*bf2c3715SXin Li       BENCH(
233*bf2c3715SXin Li       {
234*bf2c3715SXin Li         m3 = cs_sorted_multiply(m1, m2);
235*bf2c3715SXin Li         if (!m3)
236*bf2c3715SXin Li         {
237*bf2c3715SXin Li           std::cerr << "cs_multiply failed\n";
238*bf2c3715SXin Li         }
239*bf2c3715SXin Li //         cs_print(m3, 0);
240*bf2c3715SXin Li         cs_spfree(m3);
241*bf2c3715SXin Li       }
242*bf2c3715SXin Li       );
243*bf2c3715SXin Li //       timer.stop();
244*bf2c3715SXin Li       std::cout << "   a * b:\t" << timer.value() << endl;
245*bf2c3715SXin Li 
246*bf2c3715SXin Li //       BENCH( { m3 = cs_sorted_multiply2(m1, m2); cs_spfree(m3); } );
247*bf2c3715SXin Li //       std::cout << "   a * b:\t" << timer.value() << endl;
248*bf2c3715SXin Li     }
249*bf2c3715SXin Li     #endif
250*bf2c3715SXin Li 
251*bf2c3715SXin Li     #ifndef NOUBLAS
252*bf2c3715SXin Li     {
253*bf2c3715SXin Li       std::cout << "ublas\t" << nnzPerCol << "%\n";
254*bf2c3715SXin Li       UBlasSparse m1(rows,cols), m2(rows,cols), m3(rows,cols);
255*bf2c3715SXin Li       eiToUblas(sm1, m1);
256*bf2c3715SXin Li       eiToUblas(sm2, m2);
257*bf2c3715SXin Li 
258*bf2c3715SXin Li       BENCH(boost::numeric::ublas::prod(m1, m2, m3););
259*bf2c3715SXin Li       std::cout << "   a * b:\t" << timer.value() << endl;
260*bf2c3715SXin Li     }
261*bf2c3715SXin Li     #endif
262*bf2c3715SXin Li 
263*bf2c3715SXin Li     // GMM++
264*bf2c3715SXin Li     #ifndef NOGMM
265*bf2c3715SXin Li     {
266*bf2c3715SXin Li       std::cout << "GMM++ sparse\t" << nnzPerCol << "%\n";
267*bf2c3715SXin Li       GmmDynSparse  gmmT3(rows,cols);
268*bf2c3715SXin Li       GmmSparse m1(rows,cols), m2(rows,cols), m3(rows,cols);
269*bf2c3715SXin Li       eiToGmm(sm1, m1);
270*bf2c3715SXin Li       eiToGmm(sm2, m2);
271*bf2c3715SXin Li 
272*bf2c3715SXin Li       BENCH(gmm::mult(m1, m2, gmmT3););
273*bf2c3715SXin Li       std::cout << "   a * b:\t" << timer.value() << endl;
274*bf2c3715SXin Li 
275*bf2c3715SXin Li //       BENCH(gmm::mult(gmm::transposed(m1), m2, gmmT3););
276*bf2c3715SXin Li //       std::cout << "   a' * b:\t" << timer.value() << endl;
277*bf2c3715SXin Li //
278*bf2c3715SXin Li //       if (rows<500)
279*bf2c3715SXin Li //       {
280*bf2c3715SXin Li //         BENCH(gmm::mult(gmm::transposed(m1), gmm::transposed(m2), gmmT3););
281*bf2c3715SXin Li //         std::cout << "   a' * b':\t" << timer.value() << endl;
282*bf2c3715SXin Li //
283*bf2c3715SXin Li //         BENCH(gmm::mult(m1, gmm::transposed(m2), gmmT3););
284*bf2c3715SXin Li //         std::cout << "   a * b':\t" << timer.value() << endl;
285*bf2c3715SXin Li //       }
286*bf2c3715SXin Li //       else
287*bf2c3715SXin Li //       {
288*bf2c3715SXin Li //         std::cout << "   a' * b':\t" << "forever" << endl;
289*bf2c3715SXin Li //         std::cout << "   a * b':\t" << "forever" << endl;
290*bf2c3715SXin Li //       }
291*bf2c3715SXin Li     }
292*bf2c3715SXin Li     #endif
293*bf2c3715SXin Li 
294*bf2c3715SXin Li     // MTL4
295*bf2c3715SXin Li     #ifndef NOMTL
296*bf2c3715SXin Li     {
297*bf2c3715SXin Li       std::cout << "MTL4\t" << nnzPerCol << "%\n";
298*bf2c3715SXin Li       MtlSparse m1(rows,cols), m2(rows,cols), m3(rows,cols);
299*bf2c3715SXin Li       eiToMtl(sm1, m1);
300*bf2c3715SXin Li       eiToMtl(sm2, m2);
301*bf2c3715SXin Li 
302*bf2c3715SXin Li       BENCH(m3 = m1 * m2;);
303*bf2c3715SXin Li       std::cout << "   a * b:\t" << timer.value() << endl;
304*bf2c3715SXin Li 
305*bf2c3715SXin Li //       BENCH(m3 = trans(m1) * m2;);
306*bf2c3715SXin Li //       std::cout << "   a' * b:\t" << timer.value() << endl;
307*bf2c3715SXin Li //
308*bf2c3715SXin Li //       BENCH(m3 = trans(m1) * trans(m2););
309*bf2c3715SXin Li //       std::cout << "  a' * b':\t" << timer.value() << endl;
310*bf2c3715SXin Li //
311*bf2c3715SXin Li //       BENCH(m3 = m1 * trans(m2););
312*bf2c3715SXin Li //       std::cout << "   a * b' :\t" << timer.value() << endl;
313*bf2c3715SXin Li     }
314*bf2c3715SXin Li     #endif
315*bf2c3715SXin Li 
316*bf2c3715SXin Li     std::cout << "\n\n";
317*bf2c3715SXin Li   }
318*bf2c3715SXin Li 
319*bf2c3715SXin Li   return 0;
320*bf2c3715SXin Li }
321*bf2c3715SXin Li 
322*bf2c3715SXin Li 
323*bf2c3715SXin Li 
324