1*b7c941bbSAndroid Build Coastguard Worker /*
2*b7c941bbSAndroid Build Coastguard Worker * Copyright (C) 2012 The Android Open Source Project
3*b7c941bbSAndroid Build Coastguard Worker *
4*b7c941bbSAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*b7c941bbSAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*b7c941bbSAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*b7c941bbSAndroid Build Coastguard Worker *
8*b7c941bbSAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*b7c941bbSAndroid Build Coastguard Worker *
10*b7c941bbSAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*b7c941bbSAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*b7c941bbSAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*b7c941bbSAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*b7c941bbSAndroid Build Coastguard Worker * limitations under the License.
15*b7c941bbSAndroid Build Coastguard Worker */
16*b7c941bbSAndroid Build Coastguard Worker
17*b7c941bbSAndroid Build Coastguard Worker #include <jni.h>
18*b7c941bbSAndroid Build Coastguard Worker #include <stdlib.h>
19*b7c941bbSAndroid Build Coastguard Worker #include <string.h>
20*b7c941bbSAndroid Build Coastguard Worker #include <sys/time.h>
21*b7c941bbSAndroid Build Coastguard Worker
22*b7c941bbSAndroid Build Coastguard Worker #include <new>
23*b7c941bbSAndroid Build Coastguard Worker
24*b7c941bbSAndroid Build Coastguard Worker #ifdef __clang__
25*b7c941bbSAndroid Build Coastguard Worker #define OPTNONE __attribute__((optnone))
26*b7c941bbSAndroid Build Coastguard Worker #else
27*b7c941bbSAndroid Build Coastguard Worker #define OPTNONE __attribute__((optimize("O0")))
28*b7c941bbSAndroid Build Coastguard Worker #endif
29*b7c941bbSAndroid Build Coastguard Worker
30*b7c941bbSAndroid Build Coastguard Worker /* Code from now to qsort_local all copied from bionic source.
31*b7c941bbSAndroid Build Coastguard Worker * The code is duplicated here to remove dependency on optimized bionic
32*b7c941bbSAndroid Build Coastguard Worker */
33*b7c941bbSAndroid Build Coastguard Worker static __inline char *med3(char *, char *, char *, int (*)(const void *, const void *));
34*b7c941bbSAndroid Build Coastguard Worker static __inline void swapfunc(char *, char *, int, int);
35*b7c941bbSAndroid Build Coastguard Worker
36*b7c941bbSAndroid Build Coastguard Worker #define min(a, b) (a) < (b) ? a : b
37*b7c941bbSAndroid Build Coastguard Worker
38*b7c941bbSAndroid Build Coastguard Worker /*
39*b7c941bbSAndroid Build Coastguard Worker * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
40*b7c941bbSAndroid Build Coastguard Worker */
41*b7c941bbSAndroid Build Coastguard Worker #define swapcode(TYPE, parmi, parmj, n) { \
42*b7c941bbSAndroid Build Coastguard Worker long i = (n) / sizeof (TYPE); \
43*b7c941bbSAndroid Build Coastguard Worker TYPE *pi = (TYPE *) (parmi); \
44*b7c941bbSAndroid Build Coastguard Worker TYPE *pj = (TYPE *) (parmj); \
45*b7c941bbSAndroid Build Coastguard Worker do { \
46*b7c941bbSAndroid Build Coastguard Worker TYPE t = *pi; \
47*b7c941bbSAndroid Build Coastguard Worker *pi++ = *pj; \
48*b7c941bbSAndroid Build Coastguard Worker *pj++ = t; \
49*b7c941bbSAndroid Build Coastguard Worker } while (--i > 0); \
50*b7c941bbSAndroid Build Coastguard Worker }
51*b7c941bbSAndroid Build Coastguard Worker
52*b7c941bbSAndroid Build Coastguard Worker #define SWAPINIT(a, es) swaptype = ((uintptr_t) a) % sizeof(long) || \
53*b7c941bbSAndroid Build Coastguard Worker es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
54*b7c941bbSAndroid Build Coastguard Worker
55*b7c941bbSAndroid Build Coastguard Worker static __inline void
swapfunc(char * a,char * b,int n,int swaptype)56*b7c941bbSAndroid Build Coastguard Worker swapfunc(char *a, char *b, int n, int swaptype)
57*b7c941bbSAndroid Build Coastguard Worker {
58*b7c941bbSAndroid Build Coastguard Worker if (swaptype <= 1)
59*b7c941bbSAndroid Build Coastguard Worker swapcode(long, a, b, n)
60*b7c941bbSAndroid Build Coastguard Worker else
61*b7c941bbSAndroid Build Coastguard Worker swapcode(char, a, b, n)
62*b7c941bbSAndroid Build Coastguard Worker }
63*b7c941bbSAndroid Build Coastguard Worker
64*b7c941bbSAndroid Build Coastguard Worker #define swap(a, b) \
65*b7c941bbSAndroid Build Coastguard Worker if (swaptype == 0) { \
66*b7c941bbSAndroid Build Coastguard Worker long t = *(long *)(a); \
67*b7c941bbSAndroid Build Coastguard Worker *(long *)(a) = *(long *)(b); \
68*b7c941bbSAndroid Build Coastguard Worker *(long *)(b) = t; \
69*b7c941bbSAndroid Build Coastguard Worker } else \
70*b7c941bbSAndroid Build Coastguard Worker swapfunc(a, b, es, swaptype)
71*b7c941bbSAndroid Build Coastguard Worker
72*b7c941bbSAndroid Build Coastguard Worker #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
73*b7c941bbSAndroid Build Coastguard Worker
74*b7c941bbSAndroid Build Coastguard Worker static __inline char *
med3(char * a,char * b,char * c,int (* cmp)(const void *,const void *))75*b7c941bbSAndroid Build Coastguard Worker med3(char *a, char *b, char *c, int (*cmp)(const void *, const void *))
76*b7c941bbSAndroid Build Coastguard Worker {
77*b7c941bbSAndroid Build Coastguard Worker return cmp(a, b) < 0 ?
78*b7c941bbSAndroid Build Coastguard Worker (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
79*b7c941bbSAndroid Build Coastguard Worker :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
80*b7c941bbSAndroid Build Coastguard Worker }
81*b7c941bbSAndroid Build Coastguard Worker
82*b7c941bbSAndroid Build Coastguard Worker void
qsort_local(void * aa,size_t n,size_t es,int (* cmp)(const void *,const void *))83*b7c941bbSAndroid Build Coastguard Worker qsort_local(void *aa, size_t n, size_t es, int (*cmp)(const void *, const void *))
84*b7c941bbSAndroid Build Coastguard Worker {
85*b7c941bbSAndroid Build Coastguard Worker char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
86*b7c941bbSAndroid Build Coastguard Worker int d, r, swaptype, swap_cnt;
87*b7c941bbSAndroid Build Coastguard Worker char *a = (char*)aa;
88*b7c941bbSAndroid Build Coastguard Worker
89*b7c941bbSAndroid Build Coastguard Worker loop: SWAPINIT(a, es);
90*b7c941bbSAndroid Build Coastguard Worker swap_cnt = 0;
91*b7c941bbSAndroid Build Coastguard Worker if (n < 7) {
92*b7c941bbSAndroid Build Coastguard Worker for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
93*b7c941bbSAndroid Build Coastguard Worker for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
94*b7c941bbSAndroid Build Coastguard Worker pl -= es)
95*b7c941bbSAndroid Build Coastguard Worker swap(pl, pl - es);
96*b7c941bbSAndroid Build Coastguard Worker return;
97*b7c941bbSAndroid Build Coastguard Worker }
98*b7c941bbSAndroid Build Coastguard Worker pm = (char *)a + (n / 2) * es;
99*b7c941bbSAndroid Build Coastguard Worker if (n > 7) {
100*b7c941bbSAndroid Build Coastguard Worker pl = (char *)a;
101*b7c941bbSAndroid Build Coastguard Worker pn = (char *)a + (n - 1) * es;
102*b7c941bbSAndroid Build Coastguard Worker if (n > 40) {
103*b7c941bbSAndroid Build Coastguard Worker d = (n / 8) * es;
104*b7c941bbSAndroid Build Coastguard Worker pl = med3(pl, pl + d, pl + 2 * d, cmp);
105*b7c941bbSAndroid Build Coastguard Worker pm = med3(pm - d, pm, pm + d, cmp);
106*b7c941bbSAndroid Build Coastguard Worker pn = med3(pn - 2 * d, pn - d, pn, cmp);
107*b7c941bbSAndroid Build Coastguard Worker }
108*b7c941bbSAndroid Build Coastguard Worker pm = med3(pl, pm, pn, cmp);
109*b7c941bbSAndroid Build Coastguard Worker }
110*b7c941bbSAndroid Build Coastguard Worker swap(a, pm);
111*b7c941bbSAndroid Build Coastguard Worker pa = pb = (char *)a + es;
112*b7c941bbSAndroid Build Coastguard Worker
113*b7c941bbSAndroid Build Coastguard Worker pc = pd = (char *)a + (n - 1) * es;
114*b7c941bbSAndroid Build Coastguard Worker for (;;) {
115*b7c941bbSAndroid Build Coastguard Worker while (pb <= pc && (r = cmp(pb, a)) <= 0) {
116*b7c941bbSAndroid Build Coastguard Worker if (r == 0) {
117*b7c941bbSAndroid Build Coastguard Worker swap_cnt = 1;
118*b7c941bbSAndroid Build Coastguard Worker swap(pa, pb);
119*b7c941bbSAndroid Build Coastguard Worker pa += es;
120*b7c941bbSAndroid Build Coastguard Worker }
121*b7c941bbSAndroid Build Coastguard Worker pb += es;
122*b7c941bbSAndroid Build Coastguard Worker }
123*b7c941bbSAndroid Build Coastguard Worker while (pb <= pc && (r = cmp(pc, a)) >= 0) {
124*b7c941bbSAndroid Build Coastguard Worker if (r == 0) {
125*b7c941bbSAndroid Build Coastguard Worker swap_cnt = 1;
126*b7c941bbSAndroid Build Coastguard Worker swap(pc, pd);
127*b7c941bbSAndroid Build Coastguard Worker pd -= es;
128*b7c941bbSAndroid Build Coastguard Worker }
129*b7c941bbSAndroid Build Coastguard Worker pc -= es;
130*b7c941bbSAndroid Build Coastguard Worker }
131*b7c941bbSAndroid Build Coastguard Worker if (pb > pc)
132*b7c941bbSAndroid Build Coastguard Worker break;
133*b7c941bbSAndroid Build Coastguard Worker swap(pb, pc);
134*b7c941bbSAndroid Build Coastguard Worker swap_cnt = 1;
135*b7c941bbSAndroid Build Coastguard Worker pb += es;
136*b7c941bbSAndroid Build Coastguard Worker pc -= es;
137*b7c941bbSAndroid Build Coastguard Worker }
138*b7c941bbSAndroid Build Coastguard Worker if (swap_cnt == 0) { /* Switch to insertion sort */
139*b7c941bbSAndroid Build Coastguard Worker for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
140*b7c941bbSAndroid Build Coastguard Worker for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
141*b7c941bbSAndroid Build Coastguard Worker pl -= es)
142*b7c941bbSAndroid Build Coastguard Worker swap(pl, pl - es);
143*b7c941bbSAndroid Build Coastguard Worker return;
144*b7c941bbSAndroid Build Coastguard Worker }
145*b7c941bbSAndroid Build Coastguard Worker
146*b7c941bbSAndroid Build Coastguard Worker pn = (char *)a + n * es;
147*b7c941bbSAndroid Build Coastguard Worker r = min(pa - (char *)a, pb - pa);
148*b7c941bbSAndroid Build Coastguard Worker vecswap(a, pb - r, r);
149*b7c941bbSAndroid Build Coastguard Worker r = min(pd - pc, pn - pd - (int)es);
150*b7c941bbSAndroid Build Coastguard Worker vecswap(pb, pn - r, r);
151*b7c941bbSAndroid Build Coastguard Worker if ((r = pb - pa) > (int)es)
152*b7c941bbSAndroid Build Coastguard Worker qsort_local(a, r / es, es, cmp);
153*b7c941bbSAndroid Build Coastguard Worker if ((r = pd - pc) > (int)es) {
154*b7c941bbSAndroid Build Coastguard Worker /* Iterate rather than recurse to save stack space */
155*b7c941bbSAndroid Build Coastguard Worker a = pn - r;
156*b7c941bbSAndroid Build Coastguard Worker n = r / es;
157*b7c941bbSAndroid Build Coastguard Worker goto loop;
158*b7c941bbSAndroid Build Coastguard Worker }
159*b7c941bbSAndroid Build Coastguard Worker /* qsort(pn - r, r / es, es, cmp); */
160*b7c941bbSAndroid Build Coastguard Worker }
161*b7c941bbSAndroid Build Coastguard Worker
162*b7c941bbSAndroid Build Coastguard Worker /* code duplication ends here */
163*b7c941bbSAndroid Build Coastguard Worker
164*b7c941bbSAndroid Build Coastguard Worker /**
165*b7c941bbSAndroid Build Coastguard Worker * Util for getting time stamp
166*b7c941bbSAndroid Build Coastguard Worker */
currentTimeMillis()167*b7c941bbSAndroid Build Coastguard Worker double currentTimeMillis()
168*b7c941bbSAndroid Build Coastguard Worker {
169*b7c941bbSAndroid Build Coastguard Worker struct timeval tv;
170*b7c941bbSAndroid Build Coastguard Worker gettimeofday(&tv, (struct timezone *) NULL);
171*b7c941bbSAndroid Build Coastguard Worker return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0;
172*b7c941bbSAndroid Build Coastguard Worker }
173*b7c941bbSAndroid Build Coastguard Worker
174*b7c941bbSAndroid Build Coastguard Worker /**
175*b7c941bbSAndroid Build Coastguard Worker * Initialize given array randomly for the given seed
176*b7c941bbSAndroid Build Coastguard Worker */
randomInitArray(T * array,int len,unsigned int seed)177*b7c941bbSAndroid Build Coastguard Worker template <typename T> void randomInitArray(T* array, int len, unsigned int seed)
178*b7c941bbSAndroid Build Coastguard Worker {
179*b7c941bbSAndroid Build Coastguard Worker srand(seed);
180*b7c941bbSAndroid Build Coastguard Worker for (int i = 0; i < len; i++) {
181*b7c941bbSAndroid Build Coastguard Worker array[i] = (T) rand();
182*b7c941bbSAndroid Build Coastguard Worker }
183*b7c941bbSAndroid Build Coastguard Worker }
184*b7c941bbSAndroid Build Coastguard Worker
185*b7c941bbSAndroid Build Coastguard Worker /**
186*b7c941bbSAndroid Build Coastguard Worker * comparison function for int, for qsort
187*b7c941bbSAndroid Build Coastguard Worker */
cmpint(const void * p1,const void * p2)188*b7c941bbSAndroid Build Coastguard Worker int cmpint(const void* p1, const void* p2)
189*b7c941bbSAndroid Build Coastguard Worker {
190*b7c941bbSAndroid Build Coastguard Worker return *(int*)p1 - *(int*)p2;
191*b7c941bbSAndroid Build Coastguard Worker }
192*b7c941bbSAndroid Build Coastguard Worker
Java_android_simplecpu_cts_CpuNative_runSort(JNIEnv * env,jclass clazz,jint numberElements,jint repetition)193*b7c941bbSAndroid Build Coastguard Worker extern "C" JNIEXPORT jdouble JNICALL Java_android_simplecpu_cts_CpuNative_runSort(JNIEnv* env,
194*b7c941bbSAndroid Build Coastguard Worker jclass clazz, jint numberElements, jint repetition)
195*b7c941bbSAndroid Build Coastguard Worker {
196*b7c941bbSAndroid Build Coastguard Worker int* data = new(std::nothrow) int[numberElements];
197*b7c941bbSAndroid Build Coastguard Worker if (data == NULL) {
198*b7c941bbSAndroid Build Coastguard Worker env->ThrowNew(env->FindClass("java/lang/OutOfMemoryError"), "No memory");
199*b7c941bbSAndroid Build Coastguard Worker return -1;
200*b7c941bbSAndroid Build Coastguard Worker }
201*b7c941bbSAndroid Build Coastguard Worker double totalTime = 0;
202*b7c941bbSAndroid Build Coastguard Worker for (int i = 0; i < repetition; i++) {
203*b7c941bbSAndroid Build Coastguard Worker randomInitArray<int>(data, numberElements, 0);
204*b7c941bbSAndroid Build Coastguard Worker double start = currentTimeMillis();
205*b7c941bbSAndroid Build Coastguard Worker qsort_local(data, numberElements, sizeof(int), cmpint);
206*b7c941bbSAndroid Build Coastguard Worker double end = currentTimeMillis();
207*b7c941bbSAndroid Build Coastguard Worker totalTime += (end - start);
208*b7c941bbSAndroid Build Coastguard Worker }
209*b7c941bbSAndroid Build Coastguard Worker delete[] data;
210*b7c941bbSAndroid Build Coastguard Worker return totalTime;
211*b7c941bbSAndroid Build Coastguard Worker }
212*b7c941bbSAndroid Build Coastguard Worker
213*b7c941bbSAndroid Build Coastguard Worker
214*b7c941bbSAndroid Build Coastguard Worker /**
215*b7c941bbSAndroid Build Coastguard Worker * Do matrix multiplication, C = A x B with all matrices having dimension of n x n
216*b7c941bbSAndroid Build Coastguard Worker * The implementation is not in the most efficient, but it is good enough for benchmarking purpose.
217*b7c941bbSAndroid Build Coastguard Worker * @param n should be multiple of 8
218*b7c941bbSAndroid Build Coastguard Worker */
doMatrixMultiplication(float * A,float * B,float * C,int n)219*b7c941bbSAndroid Build Coastguard Worker void doMatrixMultiplication(float* A, float* B, float* C, int n)
220*b7c941bbSAndroid Build Coastguard Worker {
221*b7c941bbSAndroid Build Coastguard Worker // batch size
222*b7c941bbSAndroid Build Coastguard Worker const int M = 8;
223*b7c941bbSAndroid Build Coastguard Worker for (int i = 0; i < n; i++) {
224*b7c941bbSAndroid Build Coastguard Worker for (int j = 0; j < n; j += M) {
225*b7c941bbSAndroid Build Coastguard Worker float sum[M];
226*b7c941bbSAndroid Build Coastguard Worker for (int k = 0; k < M; k++) {
227*b7c941bbSAndroid Build Coastguard Worker sum[k] = 0;
228*b7c941bbSAndroid Build Coastguard Worker }
229*b7c941bbSAndroid Build Coastguard Worker // re-use the whole cache line for accessing B.
230*b7c941bbSAndroid Build Coastguard Worker // otherwise, the whole line will be read and only one value will be used.
231*b7c941bbSAndroid Build Coastguard Worker
232*b7c941bbSAndroid Build Coastguard Worker for (int k = 0; k < n; k++) {
233*b7c941bbSAndroid Build Coastguard Worker float a = A[i * n + k];
234*b7c941bbSAndroid Build Coastguard Worker sum[0] += a * B[k * n + j];
235*b7c941bbSAndroid Build Coastguard Worker sum[1] += a * B[k * n + j + 1];
236*b7c941bbSAndroid Build Coastguard Worker sum[2] += a * B[k * n + j + 2];
237*b7c941bbSAndroid Build Coastguard Worker sum[3] += a * B[k * n + j + 3];
238*b7c941bbSAndroid Build Coastguard Worker sum[4] += a * B[k * n + j + 4];
239*b7c941bbSAndroid Build Coastguard Worker sum[5] += a * B[k * n + j + 5];
240*b7c941bbSAndroid Build Coastguard Worker sum[6] += a * B[k * n + j + 6];
241*b7c941bbSAndroid Build Coastguard Worker sum[7] += a * B[k * n + j + 7];
242*b7c941bbSAndroid Build Coastguard Worker }
243*b7c941bbSAndroid Build Coastguard Worker for (int k = 0; k < M; k++) {
244*b7c941bbSAndroid Build Coastguard Worker C[i * n + j + k] = sum[k];
245*b7c941bbSAndroid Build Coastguard Worker }
246*b7c941bbSAndroid Build Coastguard Worker }
247*b7c941bbSAndroid Build Coastguard Worker }
248*b7c941bbSAndroid Build Coastguard Worker }
249*b7c941bbSAndroid Build Coastguard Worker
Java_android_simplecpu_cts_CpuNative_runMatrixMultiplication(JNIEnv * env,jclass clazz,jint n,jint repetition)250*b7c941bbSAndroid Build Coastguard Worker extern "C" JNIEXPORT jdouble JNICALL Java_android_simplecpu_cts_CpuNative_runMatrixMultiplication(
251*b7c941bbSAndroid Build Coastguard Worker JNIEnv* env, jclass clazz, jint n, jint repetition) OPTNONE
252*b7c941bbSAndroid Build Coastguard Worker {
253*b7c941bbSAndroid Build Coastguard Worker // C = A x B
254*b7c941bbSAndroid Build Coastguard Worker float* A = new(std::nothrow) float[n * n];
255*b7c941bbSAndroid Build Coastguard Worker float* B = new(std::nothrow) float[n * n];
256*b7c941bbSAndroid Build Coastguard Worker float* C = new(std::nothrow) float[n * n];
257*b7c941bbSAndroid Build Coastguard Worker if ((A == NULL) || (B == NULL) || (C == NULL)) {
258*b7c941bbSAndroid Build Coastguard Worker delete[] A;
259*b7c941bbSAndroid Build Coastguard Worker delete[] B;
260*b7c941bbSAndroid Build Coastguard Worker delete[] C;
261*b7c941bbSAndroid Build Coastguard Worker env->ThrowNew(env->FindClass("java/lang/OutOfMemoryError"), "No memory");
262*b7c941bbSAndroid Build Coastguard Worker return -1;
263*b7c941bbSAndroid Build Coastguard Worker }
264*b7c941bbSAndroid Build Coastguard Worker double totalTime = 0;
265*b7c941bbSAndroid Build Coastguard Worker for (int i = 0; i < repetition; i++) {
266*b7c941bbSAndroid Build Coastguard Worker randomInitArray<float>(A, n * n, 0);
267*b7c941bbSAndroid Build Coastguard Worker randomInitArray<float>(B, n * n, 1);
268*b7c941bbSAndroid Build Coastguard Worker double start = currentTimeMillis();
269*b7c941bbSAndroid Build Coastguard Worker doMatrixMultiplication(A, B, C, n);
270*b7c941bbSAndroid Build Coastguard Worker double end = currentTimeMillis();
271*b7c941bbSAndroid Build Coastguard Worker totalTime += (end - start);
272*b7c941bbSAndroid Build Coastguard Worker }
273*b7c941bbSAndroid Build Coastguard Worker delete[] A;
274*b7c941bbSAndroid Build Coastguard Worker delete[] B;
275*b7c941bbSAndroid Build Coastguard Worker delete[] C;
276*b7c941bbSAndroid Build Coastguard Worker return totalTime;
277*b7c941bbSAndroid Build Coastguard Worker }
278*b7c941bbSAndroid Build Coastguard Worker
279