1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright © 2014 Timothy Arceri
3*61046927SAndroid Build Coastguard Worker *
4*61046927SAndroid Build Coastguard Worker * Permission is hereby granted, free of charge, to any person obtaining a
5*61046927SAndroid Build Coastguard Worker * copy of this software and associated documentation files (the "Software"),
6*61046927SAndroid Build Coastguard Worker * to deal in the Software without restriction, including without limitation
7*61046927SAndroid Build Coastguard Worker * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*61046927SAndroid Build Coastguard Worker * and/or sell copies of the Software, and to permit persons to whom the
9*61046927SAndroid Build Coastguard Worker * Software is furnished to do so, subject to the following conditions:
10*61046927SAndroid Build Coastguard Worker *
11*61046927SAndroid Build Coastguard Worker * The above copyright notice and this permission notice (including the next
12*61046927SAndroid Build Coastguard Worker * paragraph) shall be included in all copies or substantial portions of the
13*61046927SAndroid Build Coastguard Worker * Software.
14*61046927SAndroid Build Coastguard Worker *
15*61046927SAndroid Build Coastguard Worker * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*61046927SAndroid Build Coastguard Worker * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*61046927SAndroid Build Coastguard Worker * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18*61046927SAndroid Build Coastguard Worker * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*61046927SAndroid Build Coastguard Worker * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20*61046927SAndroid Build Coastguard Worker * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21*61046927SAndroid Build Coastguard Worker * IN THE SOFTWARE.
22*61046927SAndroid Build Coastguard Worker *
23*61046927SAndroid Build Coastguard Worker * Author:
24*61046927SAndroid Build Coastguard Worker * Timothy Arceri <[email protected]>
25*61046927SAndroid Build Coastguard Worker *
26*61046927SAndroid Build Coastguard Worker */
27*61046927SAndroid Build Coastguard Worker
28*61046927SAndroid Build Coastguard Worker #include "main/sse_minmax.h"
29*61046927SAndroid Build Coastguard Worker #include "util/macros.h"
30*61046927SAndroid Build Coastguard Worker #include <smmintrin.h>
31*61046927SAndroid Build Coastguard Worker #include <stdint.h>
32*61046927SAndroid Build Coastguard Worker
33*61046927SAndroid Build Coastguard Worker void
_mesa_uint_array_min_max(const unsigned * ui_indices,unsigned * min_index,unsigned * max_index,const unsigned count)34*61046927SAndroid Build Coastguard Worker _mesa_uint_array_min_max(const unsigned *ui_indices, unsigned *min_index,
35*61046927SAndroid Build Coastguard Worker unsigned *max_index, const unsigned count)
36*61046927SAndroid Build Coastguard Worker {
37*61046927SAndroid Build Coastguard Worker unsigned max_ui = 0;
38*61046927SAndroid Build Coastguard Worker unsigned min_ui = ~0U;
39*61046927SAndroid Build Coastguard Worker unsigned i = 0;
40*61046927SAndroid Build Coastguard Worker unsigned aligned_count = count;
41*61046927SAndroid Build Coastguard Worker
42*61046927SAndroid Build Coastguard Worker /* handle the first few values without SSE until the pointer is aligned */
43*61046927SAndroid Build Coastguard Worker while (((uintptr_t)ui_indices & 15) && aligned_count) {
44*61046927SAndroid Build Coastguard Worker if (*ui_indices > max_ui)
45*61046927SAndroid Build Coastguard Worker max_ui = *ui_indices;
46*61046927SAndroid Build Coastguard Worker if (*ui_indices < min_ui)
47*61046927SAndroid Build Coastguard Worker min_ui = *ui_indices;
48*61046927SAndroid Build Coastguard Worker
49*61046927SAndroid Build Coastguard Worker aligned_count--;
50*61046927SAndroid Build Coastguard Worker ui_indices++;
51*61046927SAndroid Build Coastguard Worker }
52*61046927SAndroid Build Coastguard Worker
53*61046927SAndroid Build Coastguard Worker /* TODO: The actual threshold for SSE begin useful may be higher than 8.
54*61046927SAndroid Build Coastguard Worker * Some careful microbenchmarks and measurement are required to
55*61046927SAndroid Build Coastguard Worker * find the actual tipping point.
56*61046927SAndroid Build Coastguard Worker */
57*61046927SAndroid Build Coastguard Worker if (aligned_count >= 8) {
58*61046927SAndroid Build Coastguard Worker alignas(16) unsigned max_arr[4];
59*61046927SAndroid Build Coastguard Worker alignas(16) unsigned min_arr[4];
60*61046927SAndroid Build Coastguard Worker unsigned vec_count;
61*61046927SAndroid Build Coastguard Worker __m128i max_ui4 = _mm_setzero_si128();
62*61046927SAndroid Build Coastguard Worker __m128i min_ui4 = _mm_set1_epi32(~0U);
63*61046927SAndroid Build Coastguard Worker __m128i ui_indices4;
64*61046927SAndroid Build Coastguard Worker __m128i *ui_indices_ptr;
65*61046927SAndroid Build Coastguard Worker
66*61046927SAndroid Build Coastguard Worker vec_count = aligned_count & ~0x3;
67*61046927SAndroid Build Coastguard Worker ui_indices_ptr = (__m128i *)ui_indices;
68*61046927SAndroid Build Coastguard Worker for (i = 0; i < vec_count / 4; i++) {
69*61046927SAndroid Build Coastguard Worker ui_indices4 = _mm_load_si128(&ui_indices_ptr[i]);
70*61046927SAndroid Build Coastguard Worker max_ui4 = _mm_max_epu32(ui_indices4, max_ui4);
71*61046927SAndroid Build Coastguard Worker min_ui4 = _mm_min_epu32(ui_indices4, min_ui4);
72*61046927SAndroid Build Coastguard Worker }
73*61046927SAndroid Build Coastguard Worker
74*61046927SAndroid Build Coastguard Worker _mm_store_si128((__m128i *)max_arr, max_ui4);
75*61046927SAndroid Build Coastguard Worker _mm_store_si128((__m128i *)min_arr, min_ui4);
76*61046927SAndroid Build Coastguard Worker
77*61046927SAndroid Build Coastguard Worker for (i = 0; i < 4; i++) {
78*61046927SAndroid Build Coastguard Worker if (max_arr[i] > max_ui)
79*61046927SAndroid Build Coastguard Worker max_ui = max_arr[i];
80*61046927SAndroid Build Coastguard Worker if (min_arr[i] < min_ui)
81*61046927SAndroid Build Coastguard Worker min_ui = min_arr[i];
82*61046927SAndroid Build Coastguard Worker }
83*61046927SAndroid Build Coastguard Worker i = vec_count;
84*61046927SAndroid Build Coastguard Worker }
85*61046927SAndroid Build Coastguard Worker
86*61046927SAndroid Build Coastguard Worker for (; i < aligned_count; i++) {
87*61046927SAndroid Build Coastguard Worker if (ui_indices[i] > max_ui)
88*61046927SAndroid Build Coastguard Worker max_ui = ui_indices[i];
89*61046927SAndroid Build Coastguard Worker if (ui_indices[i] < min_ui)
90*61046927SAndroid Build Coastguard Worker min_ui = ui_indices[i];
91*61046927SAndroid Build Coastguard Worker }
92*61046927SAndroid Build Coastguard Worker
93*61046927SAndroid Build Coastguard Worker *min_index = min_ui;
94*61046927SAndroid Build Coastguard Worker *max_index = max_ui;
95*61046927SAndroid Build Coastguard Worker }
96