xref: /aosp_15_r20/cts/tests/simplecpu/jni/CpuNativeJni.cpp (revision b7c941bb3fa97aba169d73cee0bed2de8ac964bf)
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <jni.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <sys/time.h>
21 
22 #include <new>
23 
24 #ifdef __clang__
25 #define OPTNONE __attribute__((optnone))
26 #else
27 #define OPTNONE __attribute__((optimize("O0")))
28 #endif
29 
30 /* Code from now to qsort_local all copied from bionic source.
31  * The code is duplicated here to remove dependency on optimized bionic
32  */
33 static __inline char    *med3(char *, char *, char *, int (*)(const void *, const void *));
34 static __inline void     swapfunc(char *, char *, int, int);
35 
36 #define min(a, b)   (a) < (b) ? a : b
37 
38 /*
39  * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
40  */
41 #define swapcode(TYPE, parmi, parmj, n) {       \
42     long i = (n) / sizeof (TYPE);           \
43     TYPE *pi = (TYPE *) (parmi);            \
44     TYPE *pj = (TYPE *) (parmj);            \
45     do {                        \
46         TYPE    t = *pi;            \
47         *pi++ = *pj;                \
48         *pj++ = t;              \
49         } while (--i > 0);              \
50 }
51 
52 #define SWAPINIT(a, es) swaptype = ((uintptr_t) a) % sizeof(long) || \
53     es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
54 
55 static __inline void
swapfunc(char * a,char * b,int n,int swaptype)56 swapfunc(char *a, char *b, int n, int swaptype)
57 {
58     if (swaptype <= 1)
59         swapcode(long, a, b, n)
60     else
61         swapcode(char, a, b, n)
62 }
63 
64 #define swap(a, b)                  \
65     if (swaptype == 0) {                \
66         long t = *(long *)(a);          \
67         *(long *)(a) = *(long *)(b);        \
68         *(long *)(b) = t;           \
69     } else                      \
70         swapfunc(a, b, es, swaptype)
71 
72 #define vecswap(a, b, n)    if ((n) > 0) swapfunc(a, b, n, swaptype)
73 
74 static __inline char *
med3(char * a,char * b,char * c,int (* cmp)(const void *,const void *))75 med3(char *a, char *b, char *c, int (*cmp)(const void *, const void *))
76 {
77     return cmp(a, b) < 0 ?
78            (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
79               :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
80 }
81 
82 void
qsort_local(void * aa,size_t n,size_t es,int (* cmp)(const void *,const void *))83 qsort_local(void *aa, size_t n, size_t es, int (*cmp)(const void *, const void *))
84 {
85     char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
86     int d, r, swaptype, swap_cnt;
87     char *a = (char*)aa;
88 
89 loop:   SWAPINIT(a, es);
90     swap_cnt = 0;
91     if (n < 7) {
92         for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
93             for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
94                  pl -= es)
95                 swap(pl, pl - es);
96         return;
97     }
98     pm = (char *)a + (n / 2) * es;
99     if (n > 7) {
100         pl = (char *)a;
101         pn = (char *)a + (n - 1) * es;
102         if (n > 40) {
103             d = (n / 8) * es;
104             pl = med3(pl, pl + d, pl + 2 * d, cmp);
105             pm = med3(pm - d, pm, pm + d, cmp);
106             pn = med3(pn - 2 * d, pn - d, pn, cmp);
107         }
108         pm = med3(pl, pm, pn, cmp);
109     }
110     swap(a, pm);
111     pa = pb = (char *)a + es;
112 
113     pc = pd = (char *)a + (n - 1) * es;
114     for (;;) {
115         while (pb <= pc && (r = cmp(pb, a)) <= 0) {
116             if (r == 0) {
117                 swap_cnt = 1;
118                 swap(pa, pb);
119                 pa += es;
120             }
121             pb += es;
122         }
123         while (pb <= pc && (r = cmp(pc, a)) >= 0) {
124             if (r == 0) {
125                 swap_cnt = 1;
126                 swap(pc, pd);
127                 pd -= es;
128             }
129             pc -= es;
130         }
131         if (pb > pc)
132             break;
133         swap(pb, pc);
134         swap_cnt = 1;
135         pb += es;
136         pc -= es;
137     }
138     if (swap_cnt == 0) {  /* Switch to insertion sort */
139         for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
140             for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
141                  pl -= es)
142                 swap(pl, pl - es);
143         return;
144     }
145 
146     pn = (char *)a + n * es;
147     r = min(pa - (char *)a, pb - pa);
148     vecswap(a, pb - r, r);
149     r = min(pd - pc, pn - pd - (int)es);
150     vecswap(pb, pn - r, r);
151     if ((r = pb - pa) > (int)es)
152         qsort_local(a, r / es, es, cmp);
153     if ((r = pd - pc) > (int)es) {
154         /* Iterate rather than recurse to save stack space */
155         a = pn - r;
156         n = r / es;
157         goto loop;
158     }
159     /* qsort(pn - r, r / es, es, cmp); */
160 }
161 
162 /* code duplication ends here */
163 
164 /**
165  * Util for getting time stamp
166  */
currentTimeMillis()167 double currentTimeMillis()
168 {
169     struct timeval tv;
170     gettimeofday(&tv, (struct timezone *) NULL);
171     return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0;
172 }
173 
174 /**
175  * Initialize given array randomly for the given seed
176  */
randomInitArray(T * array,int len,unsigned int seed)177 template <typename T> void randomInitArray(T* array, int len, unsigned int seed)
178 {
179     srand(seed);
180     for (int i = 0; i < len; i++) {
181         array[i] = (T) rand();
182     }
183 }
184 
185 /**
186  * comparison function for int, for qsort
187  */
cmpint(const void * p1,const void * p2)188 int cmpint(const void* p1, const void* p2)
189 {
190     return *(int*)p1 - *(int*)p2;
191 }
192 
Java_android_simplecpu_cts_CpuNative_runSort(JNIEnv * env,jclass clazz,jint numberElements,jint repetition)193 extern "C" JNIEXPORT jdouble JNICALL Java_android_simplecpu_cts_CpuNative_runSort(JNIEnv* env,
194         jclass clazz, jint numberElements, jint repetition)
195 {
196     int* data = new(std::nothrow) int[numberElements];
197     if (data == NULL) {
198         env->ThrowNew(env->FindClass("java/lang/OutOfMemoryError"), "No memory");
199         return -1;
200     }
201     double totalTime = 0;
202     for (int i = 0; i < repetition; i++) {
203         randomInitArray<int>(data, numberElements, 0);
204         double start = currentTimeMillis();
205         qsort_local(data, numberElements, sizeof(int), cmpint);
206         double end = currentTimeMillis();
207         totalTime += (end - start);
208     }
209     delete[] data;
210     return totalTime;
211 }
212 
213 
214 /**
215  * Do matrix multiplication, C = A x B with all matrices having dimension of n x n
216  * The implementation is not in the most efficient, but it is good enough for benchmarking purpose.
217  * @param n should be multiple of 8
218  */
doMatrixMultiplication(float * A,float * B,float * C,int n)219 void doMatrixMultiplication(float* A, float* B, float* C, int n)
220 {
221     // batch size
222     const int M = 8;
223     for (int i = 0; i < n; i++) {
224         for (int j = 0; j < n; j += M) {
225             float sum[M];
226             for (int k = 0; k < M; k++) {
227                 sum[k] = 0;
228             }
229             // re-use the whole cache line for accessing B.
230             // otherwise, the whole line will be read and only one value will be used.
231 
232             for (int k = 0; k < n; k++) {
233                 float a = A[i * n + k];
234                 sum[0] += a * B[k * n + j];
235                 sum[1] += a * B[k * n + j + 1];
236                 sum[2] += a * B[k * n + j + 2];
237                 sum[3] += a * B[k * n + j + 3];
238                 sum[4] += a * B[k * n + j + 4];
239                 sum[5] += a * B[k * n + j + 5];
240                 sum[6] += a * B[k * n + j + 6];
241                 sum[7] += a * B[k * n + j + 7];
242             }
243             for (int k = 0; k < M; k++) {
244                 C[i * n + j + k] = sum[k];
245             }
246         }
247     }
248 }
249 
Java_android_simplecpu_cts_CpuNative_runMatrixMultiplication(JNIEnv * env,jclass clazz,jint n,jint repetition)250 extern "C" JNIEXPORT jdouble JNICALL Java_android_simplecpu_cts_CpuNative_runMatrixMultiplication(
251         JNIEnv* env, jclass clazz, jint n, jint repetition) OPTNONE
252 {
253     // C = A x B
254     float* A = new(std::nothrow) float[n * n];
255     float* B = new(std::nothrow) float[n * n];
256     float* C = new(std::nothrow) float[n * n];
257     if ((A == NULL) || (B == NULL) || (C == NULL)) {
258         delete[] A;
259         delete[] B;
260         delete[] C;
261         env->ThrowNew(env->FindClass("java/lang/OutOfMemoryError"), "No memory");
262         return -1;
263     }
264     double totalTime = 0;
265     for (int i = 0; i < repetition; i++) {
266         randomInitArray<float>(A, n * n, 0);
267         randomInitArray<float>(B, n * n, 1);
268         double start = currentTimeMillis();
269         doMatrixMultiplication(A, B, C, n);
270         double end = currentTimeMillis();
271         totalTime += (end - start);
272     }
273     delete[] A;
274     delete[] B;
275     delete[] C;
276     return totalTime;
277 }
278 
279