1*cc02d7e2SAndroid Build Coastguard Worker #include <sys/time.h>
2*cc02d7e2SAndroid Build Coastguard Worker
3*cc02d7e2SAndroid Build Coastguard Worker #include <algorithm>
4*cc02d7e2SAndroid Build Coastguard Worker #include <cassert>
5*cc02d7e2SAndroid Build Coastguard Worker #include <cstdint>
6*cc02d7e2SAndroid Build Coastguard Worker #include <cstdio>
7*cc02d7e2SAndroid Build Coastguard Worker #include <cstring>
8*cc02d7e2SAndroid Build Coastguard Worker #include <vector>
9*cc02d7e2SAndroid Build Coastguard Worker
ascii_std(const uint8_t * data,int len)10*cc02d7e2SAndroid Build Coastguard Worker static inline int ascii_std(const uint8_t *data, int len) {
11*cc02d7e2SAndroid Build Coastguard Worker return !std::any_of(data, data + len, [](int8_t b) { return b < 0; });
12*cc02d7e2SAndroid Build Coastguard Worker }
13*cc02d7e2SAndroid Build Coastguard Worker
ascii_u64(const uint8_t * data,int len)14*cc02d7e2SAndroid Build Coastguard Worker static inline int ascii_u64(const uint8_t *data, int len) {
15*cc02d7e2SAndroid Build Coastguard Worker uint8_t orall = 0;
16*cc02d7e2SAndroid Build Coastguard Worker
17*cc02d7e2SAndroid Build Coastguard Worker if (len >= 16) {
18*cc02d7e2SAndroid Build Coastguard Worker uint64_t or1 = 0, or2 = 0;
19*cc02d7e2SAndroid Build Coastguard Worker const uint8_t *data2 = data + 8;
20*cc02d7e2SAndroid Build Coastguard Worker
21*cc02d7e2SAndroid Build Coastguard Worker do {
22*cc02d7e2SAndroid Build Coastguard Worker or1 |= *(const uint64_t *)data;
23*cc02d7e2SAndroid Build Coastguard Worker or2 |= *(const uint64_t *)data2;
24*cc02d7e2SAndroid Build Coastguard Worker data += 16;
25*cc02d7e2SAndroid Build Coastguard Worker data2 += 16;
26*cc02d7e2SAndroid Build Coastguard Worker len -= 16;
27*cc02d7e2SAndroid Build Coastguard Worker } while (len >= 16);
28*cc02d7e2SAndroid Build Coastguard Worker
29*cc02d7e2SAndroid Build Coastguard Worker /*
30*cc02d7e2SAndroid Build Coastguard Worker * Idea from Benny Halevy <[email protected]>
31*cc02d7e2SAndroid Build Coastguard Worker * - 7-th bit set ==> orall = !(non-zero) - 1 = 0 - 1 = 0xFF
32*cc02d7e2SAndroid Build Coastguard Worker * - 7-th bit clear ==> orall = !0 - 1 = 1 - 1 = 0x00
33*cc02d7e2SAndroid Build Coastguard Worker */
34*cc02d7e2SAndroid Build Coastguard Worker orall = !((or1 | or2) & 0x8080808080808080ULL) - 1;
35*cc02d7e2SAndroid Build Coastguard Worker }
36*cc02d7e2SAndroid Build Coastguard Worker
37*cc02d7e2SAndroid Build Coastguard Worker while (len--) orall |= *data++;
38*cc02d7e2SAndroid Build Coastguard Worker
39*cc02d7e2SAndroid Build Coastguard Worker return orall < 0x80;
40*cc02d7e2SAndroid Build Coastguard Worker }
41*cc02d7e2SAndroid Build Coastguard Worker
42*cc02d7e2SAndroid Build Coastguard Worker #if defined(__x86_64__)
43*cc02d7e2SAndroid Build Coastguard Worker #include <x86intrin.h>
44*cc02d7e2SAndroid Build Coastguard Worker
ascii_simd(const uint8_t * data,int len)45*cc02d7e2SAndroid Build Coastguard Worker static inline int ascii_simd(const uint8_t *data, int len) {
46*cc02d7e2SAndroid Build Coastguard Worker if (len >= 32) {
47*cc02d7e2SAndroid Build Coastguard Worker const uint8_t *data2 = data + 16;
48*cc02d7e2SAndroid Build Coastguard Worker
49*cc02d7e2SAndroid Build Coastguard Worker __m128i or1 = _mm_set1_epi8(0), or2 = or1;
50*cc02d7e2SAndroid Build Coastguard Worker
51*cc02d7e2SAndroid Build Coastguard Worker while (len >= 32) {
52*cc02d7e2SAndroid Build Coastguard Worker __m128i input1 = _mm_loadu_si128((const __m128i *)data);
53*cc02d7e2SAndroid Build Coastguard Worker __m128i input2 = _mm_loadu_si128((const __m128i *)data2);
54*cc02d7e2SAndroid Build Coastguard Worker
55*cc02d7e2SAndroid Build Coastguard Worker or1 = _mm_or_si128(or1, input1);
56*cc02d7e2SAndroid Build Coastguard Worker or2 = _mm_or_si128(or2, input2);
57*cc02d7e2SAndroid Build Coastguard Worker
58*cc02d7e2SAndroid Build Coastguard Worker data += 32;
59*cc02d7e2SAndroid Build Coastguard Worker data2 += 32;
60*cc02d7e2SAndroid Build Coastguard Worker len -= 32;
61*cc02d7e2SAndroid Build Coastguard Worker }
62*cc02d7e2SAndroid Build Coastguard Worker
63*cc02d7e2SAndroid Build Coastguard Worker or1 = _mm_or_si128(or1, or2);
64*cc02d7e2SAndroid Build Coastguard Worker if (_mm_movemask_epi8(_mm_cmplt_epi8(or1, _mm_set1_epi8(0)))) return 0;
65*cc02d7e2SAndroid Build Coastguard Worker }
66*cc02d7e2SAndroid Build Coastguard Worker
67*cc02d7e2SAndroid Build Coastguard Worker return ascii_u64(data, len);
68*cc02d7e2SAndroid Build Coastguard Worker }
69*cc02d7e2SAndroid Build Coastguard Worker
70*cc02d7e2SAndroid Build Coastguard Worker #elif defined(__aarch64__)
71*cc02d7e2SAndroid Build Coastguard Worker #include <arm_neon.h>
72*cc02d7e2SAndroid Build Coastguard Worker
ascii_simd(const uint8_t * data,int len)73*cc02d7e2SAndroid Build Coastguard Worker static inline int ascii_simd(const uint8_t *data, int len) {
74*cc02d7e2SAndroid Build Coastguard Worker if (len >= 32) {
75*cc02d7e2SAndroid Build Coastguard Worker const uint8_t *data2 = data + 16;
76*cc02d7e2SAndroid Build Coastguard Worker
77*cc02d7e2SAndroid Build Coastguard Worker uint8x16_t or1 = vdupq_n_u8(0), or2 = or1;
78*cc02d7e2SAndroid Build Coastguard Worker
79*cc02d7e2SAndroid Build Coastguard Worker while (len >= 32) {
80*cc02d7e2SAndroid Build Coastguard Worker const uint8x16_t input1 = vld1q_u8(data);
81*cc02d7e2SAndroid Build Coastguard Worker const uint8x16_t input2 = vld1q_u8(data2);
82*cc02d7e2SAndroid Build Coastguard Worker
83*cc02d7e2SAndroid Build Coastguard Worker or1 = vorrq_u8(or1, input1);
84*cc02d7e2SAndroid Build Coastguard Worker or2 = vorrq_u8(or2, input2);
85*cc02d7e2SAndroid Build Coastguard Worker
86*cc02d7e2SAndroid Build Coastguard Worker data += 32;
87*cc02d7e2SAndroid Build Coastguard Worker data2 += 32;
88*cc02d7e2SAndroid Build Coastguard Worker len -= 32;
89*cc02d7e2SAndroid Build Coastguard Worker }
90*cc02d7e2SAndroid Build Coastguard Worker
91*cc02d7e2SAndroid Build Coastguard Worker or1 = vorrq_u8(or1, or2);
92*cc02d7e2SAndroid Build Coastguard Worker if (vmaxvq_u8(or1) >= 0x80) return 0;
93*cc02d7e2SAndroid Build Coastguard Worker }
94*cc02d7e2SAndroid Build Coastguard Worker
95*cc02d7e2SAndroid Build Coastguard Worker return ascii_u64(data, len);
96*cc02d7e2SAndroid Build Coastguard Worker }
97*cc02d7e2SAndroid Build Coastguard Worker
98*cc02d7e2SAndroid Build Coastguard Worker #endif
99*cc02d7e2SAndroid Build Coastguard Worker
100*cc02d7e2SAndroid Build Coastguard Worker struct ftab {
101*cc02d7e2SAndroid Build Coastguard Worker const char *name;
102*cc02d7e2SAndroid Build Coastguard Worker int (*func)(const uint8_t *data, int len);
103*cc02d7e2SAndroid Build Coastguard Worker };
104*cc02d7e2SAndroid Build Coastguard Worker
105*cc02d7e2SAndroid Build Coastguard Worker static const std::vector<ftab> _f = {
106*cc02d7e2SAndroid Build Coastguard Worker {
107*cc02d7e2SAndroid Build Coastguard Worker .name = "std",
108*cc02d7e2SAndroid Build Coastguard Worker .func = ascii_std,
109*cc02d7e2SAndroid Build Coastguard Worker },
110*cc02d7e2SAndroid Build Coastguard Worker {
111*cc02d7e2SAndroid Build Coastguard Worker .name = "u64",
112*cc02d7e2SAndroid Build Coastguard Worker .func = ascii_u64,
113*cc02d7e2SAndroid Build Coastguard Worker },
114*cc02d7e2SAndroid Build Coastguard Worker {
115*cc02d7e2SAndroid Build Coastguard Worker .name = "simd",
116*cc02d7e2SAndroid Build Coastguard Worker .func = ascii_simd,
117*cc02d7e2SAndroid Build Coastguard Worker },
118*cc02d7e2SAndroid Build Coastguard Worker };
119*cc02d7e2SAndroid Build Coastguard Worker
load_test_buf(uint8_t * data,int len)120*cc02d7e2SAndroid Build Coastguard Worker static void load_test_buf(uint8_t *data, int len) {
121*cc02d7e2SAndroid Build Coastguard Worker uint8_t v = 0;
122*cc02d7e2SAndroid Build Coastguard Worker
123*cc02d7e2SAndroid Build Coastguard Worker for (int i = 0; i < len; ++i) {
124*cc02d7e2SAndroid Build Coastguard Worker data[i] = v++;
125*cc02d7e2SAndroid Build Coastguard Worker v &= 0x7F;
126*cc02d7e2SAndroid Build Coastguard Worker }
127*cc02d7e2SAndroid Build Coastguard Worker }
128*cc02d7e2SAndroid Build Coastguard Worker
bench(const struct ftab & f,const uint8_t * data,int len)129*cc02d7e2SAndroid Build Coastguard Worker static void bench(const struct ftab &f, const uint8_t *data, int len) {
130*cc02d7e2SAndroid Build Coastguard Worker const int loops = 1024 * 1024 * 1024 / len;
131*cc02d7e2SAndroid Build Coastguard Worker int ret = 1;
132*cc02d7e2SAndroid Build Coastguard Worker double time_aligned, time_unaligned, size;
133*cc02d7e2SAndroid Build Coastguard Worker struct timeval tv1, tv2;
134*cc02d7e2SAndroid Build Coastguard Worker
135*cc02d7e2SAndroid Build Coastguard Worker fprintf(stderr, "bench %s (%d bytes)... ", f.name, len);
136*cc02d7e2SAndroid Build Coastguard Worker
137*cc02d7e2SAndroid Build Coastguard Worker /* aligned */
138*cc02d7e2SAndroid Build Coastguard Worker gettimeofday(&tv1, 0);
139*cc02d7e2SAndroid Build Coastguard Worker for (int i = 0; i < loops; ++i) ret &= f.func(data, len);
140*cc02d7e2SAndroid Build Coastguard Worker gettimeofday(&tv2, 0);
141*cc02d7e2SAndroid Build Coastguard Worker time_aligned = tv2.tv_usec - tv1.tv_usec;
142*cc02d7e2SAndroid Build Coastguard Worker time_aligned = time_aligned / 1000000 + tv2.tv_sec - tv1.tv_sec;
143*cc02d7e2SAndroid Build Coastguard Worker
144*cc02d7e2SAndroid Build Coastguard Worker /* unaligned */
145*cc02d7e2SAndroid Build Coastguard Worker gettimeofday(&tv1, 0);
146*cc02d7e2SAndroid Build Coastguard Worker for (int i = 0; i < loops; ++i) ret &= f.func(data + 1, len);
147*cc02d7e2SAndroid Build Coastguard Worker gettimeofday(&tv2, 0);
148*cc02d7e2SAndroid Build Coastguard Worker time_unaligned = tv2.tv_usec - tv1.tv_usec;
149*cc02d7e2SAndroid Build Coastguard Worker time_unaligned = time_unaligned / 1000000 + tv2.tv_sec - tv1.tv_sec;
150*cc02d7e2SAndroid Build Coastguard Worker
151*cc02d7e2SAndroid Build Coastguard Worker printf("%s ", ret ? "pass" : "FAIL");
152*cc02d7e2SAndroid Build Coastguard Worker
153*cc02d7e2SAndroid Build Coastguard Worker size = ((double)len * loops) / (1024 * 1024);
154*cc02d7e2SAndroid Build Coastguard Worker printf("%.0f/%.0f MB/s\n", size / time_aligned, size / time_unaligned);
155*cc02d7e2SAndroid Build Coastguard Worker }
156*cc02d7e2SAndroid Build Coastguard Worker
test(const struct ftab & f,uint8_t * data,int len)157*cc02d7e2SAndroid Build Coastguard Worker static void test(const struct ftab &f, uint8_t *data, int len) {
158*cc02d7e2SAndroid Build Coastguard Worker int error = 0;
159*cc02d7e2SAndroid Build Coastguard Worker
160*cc02d7e2SAndroid Build Coastguard Worker fprintf(stderr, "test %s (%d bytes)... ", f.name, len);
161*cc02d7e2SAndroid Build Coastguard Worker
162*cc02d7e2SAndroid Build Coastguard Worker /* positive */
163*cc02d7e2SAndroid Build Coastguard Worker error |= !f.func(data, len);
164*cc02d7e2SAndroid Build Coastguard Worker
165*cc02d7e2SAndroid Build Coastguard Worker /* negative */
166*cc02d7e2SAndroid Build Coastguard Worker if (len < 100 * 1024) {
167*cc02d7e2SAndroid Build Coastguard Worker for (int i = 0; i < len; ++i) {
168*cc02d7e2SAndroid Build Coastguard Worker data[i] += 0x80;
169*cc02d7e2SAndroid Build Coastguard Worker error |= f.func(data, len);
170*cc02d7e2SAndroid Build Coastguard Worker data[i] -= 0x80;
171*cc02d7e2SAndroid Build Coastguard Worker }
172*cc02d7e2SAndroid Build Coastguard Worker }
173*cc02d7e2SAndroid Build Coastguard Worker
174*cc02d7e2SAndroid Build Coastguard Worker printf("%s\n", error ? "FAIL" : "pass");
175*cc02d7e2SAndroid Build Coastguard Worker }
176*cc02d7e2SAndroid Build Coastguard Worker
177*cc02d7e2SAndroid Build Coastguard Worker /* ./ascii [test|bench] [alg] */
main(int argc,const char * argv[])178*cc02d7e2SAndroid Build Coastguard Worker int main(int argc, const char *argv[]) {
179*cc02d7e2SAndroid Build Coastguard Worker int do_test = 1, do_bench = 1;
180*cc02d7e2SAndroid Build Coastguard Worker const char *alg = NULL;
181*cc02d7e2SAndroid Build Coastguard Worker
182*cc02d7e2SAndroid Build Coastguard Worker if (argc > 1) {
183*cc02d7e2SAndroid Build Coastguard Worker do_bench &= !!strcmp(argv[1], "test");
184*cc02d7e2SAndroid Build Coastguard Worker do_test &= !!strcmp(argv[1], "bench");
185*cc02d7e2SAndroid Build Coastguard Worker }
186*cc02d7e2SAndroid Build Coastguard Worker
187*cc02d7e2SAndroid Build Coastguard Worker if (do_bench && argc > 2) alg = argv[2];
188*cc02d7e2SAndroid Build Coastguard Worker
189*cc02d7e2SAndroid Build Coastguard Worker const std::vector<int> size = {
190*cc02d7e2SAndroid Build Coastguard Worker 9, 16 + 1, 32 - 1, 128 + 1,
191*cc02d7e2SAndroid Build Coastguard Worker 1024 + 15, 16 * 1024 + 1, 64 * 1024 + 15, 1024 * 1024};
192*cc02d7e2SAndroid Build Coastguard Worker
193*cc02d7e2SAndroid Build Coastguard Worker int max_size = *std::max_element(size.begin(), size.end());
194*cc02d7e2SAndroid Build Coastguard Worker uint8_t *_data = new uint8_t[max_size + 1];
195*cc02d7e2SAndroid Build Coastguard Worker assert(((uintptr_t)_data & 7) == 0);
196*cc02d7e2SAndroid Build Coastguard Worker uint8_t *data = _data + 1; /* Unalign buffer address */
197*cc02d7e2SAndroid Build Coastguard Worker
198*cc02d7e2SAndroid Build Coastguard Worker _data[0] = 0;
199*cc02d7e2SAndroid Build Coastguard Worker load_test_buf(data, max_size);
200*cc02d7e2SAndroid Build Coastguard Worker
201*cc02d7e2SAndroid Build Coastguard Worker if (do_test) {
202*cc02d7e2SAndroid Build Coastguard Worker printf("==================== Test ====================\n");
203*cc02d7e2SAndroid Build Coastguard Worker for (int sz : size) {
204*cc02d7e2SAndroid Build Coastguard Worker for (auto &f : _f) {
205*cc02d7e2SAndroid Build Coastguard Worker test(f, data, sz);
206*cc02d7e2SAndroid Build Coastguard Worker }
207*cc02d7e2SAndroid Build Coastguard Worker }
208*cc02d7e2SAndroid Build Coastguard Worker }
209*cc02d7e2SAndroid Build Coastguard Worker
210*cc02d7e2SAndroid Build Coastguard Worker if (do_bench) {
211*cc02d7e2SAndroid Build Coastguard Worker printf("==================== Bench ====================\n");
212*cc02d7e2SAndroid Build Coastguard Worker for (int sz : size) {
213*cc02d7e2SAndroid Build Coastguard Worker for (auto &f : _f) {
214*cc02d7e2SAndroid Build Coastguard Worker if (!alg || strcmp(alg, f.name) == 0) bench(f, _data, sz);
215*cc02d7e2SAndroid Build Coastguard Worker }
216*cc02d7e2SAndroid Build Coastguard Worker printf("-----------------------------------------------\n");
217*cc02d7e2SAndroid Build Coastguard Worker }
218*cc02d7e2SAndroid Build Coastguard Worker }
219*cc02d7e2SAndroid Build Coastguard Worker
220*cc02d7e2SAndroid Build Coastguard Worker delete _data;
221*cc02d7e2SAndroid Build Coastguard Worker return 0;
222*cc02d7e2SAndroid Build Coastguard Worker }
223