xref: /aosp_15_r20/external/mesa3d/src/mesa/main/sse_minmax.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright © 2014 Timothy Arceri
3*61046927SAndroid Build Coastguard Worker  *
4*61046927SAndroid Build Coastguard Worker  * Permission is hereby granted, free of charge, to any person obtaining a
5*61046927SAndroid Build Coastguard Worker  * copy of this software and associated documentation files (the "Software"),
6*61046927SAndroid Build Coastguard Worker  * to deal in the Software without restriction, including without limitation
7*61046927SAndroid Build Coastguard Worker  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*61046927SAndroid Build Coastguard Worker  * and/or sell copies of the Software, and to permit persons to whom the
9*61046927SAndroid Build Coastguard Worker  * Software is furnished to do so, subject to the following conditions:
10*61046927SAndroid Build Coastguard Worker  *
11*61046927SAndroid Build Coastguard Worker  * The above copyright notice and this permission notice (including the next
12*61046927SAndroid Build Coastguard Worker  * paragraph) shall be included in all copies or substantial portions of the
13*61046927SAndroid Build Coastguard Worker  * Software.
14*61046927SAndroid Build Coastguard Worker  *
15*61046927SAndroid Build Coastguard Worker  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*61046927SAndroid Build Coastguard Worker  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*61046927SAndroid Build Coastguard Worker  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18*61046927SAndroid Build Coastguard Worker  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*61046927SAndroid Build Coastguard Worker  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20*61046927SAndroid Build Coastguard Worker  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21*61046927SAndroid Build Coastguard Worker  * IN THE SOFTWARE.
22*61046927SAndroid Build Coastguard Worker  *
23*61046927SAndroid Build Coastguard Worker  * Author:
24*61046927SAndroid Build Coastguard Worker  *    Timothy Arceri <[email protected]>
25*61046927SAndroid Build Coastguard Worker  *
26*61046927SAndroid Build Coastguard Worker  */
27*61046927SAndroid Build Coastguard Worker 
28*61046927SAndroid Build Coastguard Worker #include "main/sse_minmax.h"
29*61046927SAndroid Build Coastguard Worker #include "util/macros.h"
30*61046927SAndroid Build Coastguard Worker #include <smmintrin.h>
31*61046927SAndroid Build Coastguard Worker #include <stdint.h>
32*61046927SAndroid Build Coastguard Worker 
33*61046927SAndroid Build Coastguard Worker void
_mesa_uint_array_min_max(const unsigned * ui_indices,unsigned * min_index,unsigned * max_index,const unsigned count)34*61046927SAndroid Build Coastguard Worker _mesa_uint_array_min_max(const unsigned *ui_indices, unsigned *min_index,
35*61046927SAndroid Build Coastguard Worker                          unsigned *max_index, const unsigned count)
36*61046927SAndroid Build Coastguard Worker {
37*61046927SAndroid Build Coastguard Worker    unsigned max_ui = 0;
38*61046927SAndroid Build Coastguard Worker    unsigned min_ui = ~0U;
39*61046927SAndroid Build Coastguard Worker    unsigned i = 0;
40*61046927SAndroid Build Coastguard Worker    unsigned aligned_count = count;
41*61046927SAndroid Build Coastguard Worker 
42*61046927SAndroid Build Coastguard Worker    /* handle the first few values without SSE until the pointer is aligned */
43*61046927SAndroid Build Coastguard Worker    while (((uintptr_t)ui_indices & 15) && aligned_count) {
44*61046927SAndroid Build Coastguard Worker       if (*ui_indices > max_ui)
45*61046927SAndroid Build Coastguard Worker          max_ui = *ui_indices;
46*61046927SAndroid Build Coastguard Worker       if (*ui_indices < min_ui)
47*61046927SAndroid Build Coastguard Worker          min_ui = *ui_indices;
48*61046927SAndroid Build Coastguard Worker 
49*61046927SAndroid Build Coastguard Worker       aligned_count--;
50*61046927SAndroid Build Coastguard Worker       ui_indices++;
51*61046927SAndroid Build Coastguard Worker    }
52*61046927SAndroid Build Coastguard Worker 
53*61046927SAndroid Build Coastguard Worker    /* TODO: The actual threshold for SSE begin useful may be higher than 8.
54*61046927SAndroid Build Coastguard Worker     * Some careful microbenchmarks and measurement are required to
55*61046927SAndroid Build Coastguard Worker     * find the actual tipping point.
56*61046927SAndroid Build Coastguard Worker     */
57*61046927SAndroid Build Coastguard Worker    if (aligned_count >= 8) {
58*61046927SAndroid Build Coastguard Worker       alignas(16) unsigned max_arr[4];
59*61046927SAndroid Build Coastguard Worker       alignas(16) unsigned min_arr[4];
60*61046927SAndroid Build Coastguard Worker       unsigned vec_count;
61*61046927SAndroid Build Coastguard Worker       __m128i max_ui4 = _mm_setzero_si128();
62*61046927SAndroid Build Coastguard Worker       __m128i min_ui4 = _mm_set1_epi32(~0U);
63*61046927SAndroid Build Coastguard Worker       __m128i ui_indices4;
64*61046927SAndroid Build Coastguard Worker       __m128i *ui_indices_ptr;
65*61046927SAndroid Build Coastguard Worker 
66*61046927SAndroid Build Coastguard Worker       vec_count = aligned_count & ~0x3;
67*61046927SAndroid Build Coastguard Worker       ui_indices_ptr = (__m128i *)ui_indices;
68*61046927SAndroid Build Coastguard Worker       for (i = 0; i < vec_count / 4; i++) {
69*61046927SAndroid Build Coastguard Worker          ui_indices4 = _mm_load_si128(&ui_indices_ptr[i]);
70*61046927SAndroid Build Coastguard Worker          max_ui4 = _mm_max_epu32(ui_indices4, max_ui4);
71*61046927SAndroid Build Coastguard Worker          min_ui4 = _mm_min_epu32(ui_indices4, min_ui4);
72*61046927SAndroid Build Coastguard Worker       }
73*61046927SAndroid Build Coastguard Worker 
74*61046927SAndroid Build Coastguard Worker       _mm_store_si128((__m128i *)max_arr, max_ui4);
75*61046927SAndroid Build Coastguard Worker       _mm_store_si128((__m128i *)min_arr, min_ui4);
76*61046927SAndroid Build Coastguard Worker 
77*61046927SAndroid Build Coastguard Worker       for (i = 0; i < 4; i++) {
78*61046927SAndroid Build Coastguard Worker          if (max_arr[i] > max_ui)
79*61046927SAndroid Build Coastguard Worker             max_ui = max_arr[i];
80*61046927SAndroid Build Coastguard Worker          if (min_arr[i] < min_ui)
81*61046927SAndroid Build Coastguard Worker             min_ui = min_arr[i];
82*61046927SAndroid Build Coastguard Worker       }
83*61046927SAndroid Build Coastguard Worker       i = vec_count;
84*61046927SAndroid Build Coastguard Worker    }
85*61046927SAndroid Build Coastguard Worker 
86*61046927SAndroid Build Coastguard Worker    for (; i < aligned_count; i++) {
87*61046927SAndroid Build Coastguard Worker       if (ui_indices[i] > max_ui)
88*61046927SAndroid Build Coastguard Worker          max_ui = ui_indices[i];
89*61046927SAndroid Build Coastguard Worker       if (ui_indices[i] < min_ui)
90*61046927SAndroid Build Coastguard Worker          min_ui = ui_indices[i];
91*61046927SAndroid Build Coastguard Worker    }
92*61046927SAndroid Build Coastguard Worker 
93*61046927SAndroid Build Coastguard Worker    *min_index = min_ui;
94*61046927SAndroid Build Coastguard Worker    *max_index = max_ui;
95*61046927SAndroid Build Coastguard Worker }
96