xref: /aosp_15_r20/external/deqp/framework/delibs/decpp/deSpinBarrier.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*-------------------------------------------------------------------------
2  * drawElements C++ Base Library
3  * -----------------------------
4  *
5  * Copyright 2015 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Cross-thread barrier.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "deSpinBarrier.hpp"
25 #include "deThread.hpp"
26 #include "deRandom.hpp"
27 #include "deInt32.h"
28 
29 #include <vector>
30 
31 namespace de
32 {
33 
SpinBarrier(int32_t numThreads)34 SpinBarrier::SpinBarrier(int32_t numThreads)
35     : m_numCores(deGetNumAvailableLogicalCores())
36     , m_numThreads(numThreads)
37     , m_numEntered(0)
38     , m_numLeaving(0)
39     , m_numRemoved(0)
40 {
41     DE_ASSERT(numThreads > 0);
42 }
43 
~SpinBarrier(void)44 SpinBarrier::~SpinBarrier(void)
45 {
46     DE_ASSERT(m_numEntered == 0 && m_numLeaving == 0);
47 }
48 
reset(uint32_t numThreads)49 void SpinBarrier::reset(uint32_t numThreads)
50 {
51     // If last threads were removed, m_numEntered > 0 && m_numRemoved > 0
52     DE_ASSERT(m_numLeaving == 0);
53     DE_ASSERT(numThreads > 0);
54     m_numThreads = numThreads;
55     m_numEntered = 0;
56     m_numLeaving = 0;
57     m_numRemoved = 0;
58 }
59 
getWaitMode(SpinBarrier::WaitMode requested,uint32_t numCores,int32_t numThreads)60 inline SpinBarrier::WaitMode getWaitMode(SpinBarrier::WaitMode requested, uint32_t numCores, int32_t numThreads)
61 {
62     if (requested == SpinBarrier::WAIT_MODE_AUTO)
63         return ((uint32_t)numThreads <= numCores) ? SpinBarrier::WAIT_MODE_BUSY : SpinBarrier::WAIT_MODE_YIELD;
64     else
65         return requested;
66 }
67 
wait(SpinBarrier::WaitMode mode)68 inline void wait(SpinBarrier::WaitMode mode)
69 {
70     DE_ASSERT(mode == SpinBarrier::WAIT_MODE_YIELD || mode == SpinBarrier::WAIT_MODE_BUSY);
71 
72     if (mode == SpinBarrier::WAIT_MODE_YIELD)
73         deYield();
74 }
75 
sync(WaitMode requestedMode)76 void SpinBarrier::sync(WaitMode requestedMode)
77 {
78     const int32_t cachedNumThreads = m_numThreads;
79     const WaitMode waitMode        = getWaitMode(requestedMode, m_numCores, cachedNumThreads);
80 
81     deMemoryReadWriteFence();
82 
83     // m_numEntered must not be touched until all threads have had
84     // a chance to observe it being 0.
85     if (m_numLeaving > 0)
86     {
87         for (;;)
88         {
89             if (m_numLeaving == 0)
90                 break;
91 
92             wait(waitMode);
93         }
94     }
95 
96     // If m_numRemoved > 0, m_numThreads will decrease. If m_numThreads is decreased
97     // just after atomicOp and before comparison, the branch could be taken by multiple
98     // threads. Since m_numThreads only changes if all threads are inside the spinbarrier,
99     // cached value at snapshotted at the beginning of the function will be equal for
100     // all threads.
101     if (deAtomicIncrement32(&m_numEntered) == cachedNumThreads)
102     {
103         // Release all waiting threads. Since this thread has not been removed, m_numLeaving will
104         // be >= 1 until m_numLeaving is decremented at the end of this function.
105         m_numThreads = m_numThreads - m_numRemoved;
106         m_numLeaving = m_numThreads;
107         m_numRemoved = 0;
108 
109         deMemoryReadWriteFence();
110         m_numEntered = 0;
111     }
112     else
113     {
114         for (;;)
115         {
116             if (m_numEntered == 0)
117                 break;
118 
119             wait(waitMode);
120         }
121     }
122 
123     deAtomicDecrement32(&m_numLeaving);
124     deMemoryReadWriteFence();
125 }
126 
removeThread(WaitMode requestedMode)127 void SpinBarrier::removeThread(WaitMode requestedMode)
128 {
129     const int32_t cachedNumThreads = m_numThreads;
130     const WaitMode waitMode        = getWaitMode(requestedMode, m_numCores, cachedNumThreads);
131 
132     // Wait for other threads exiting previous barrier
133     if (m_numLeaving > 0)
134     {
135         for (;;)
136         {
137             if (m_numLeaving == 0)
138                 break;
139 
140             wait(waitMode);
141         }
142     }
143 
144     // Ask for last thread entering barrier to adjust thread count
145     deAtomicIncrement32(&m_numRemoved);
146 
147     // See sync() - use cached value
148     if (deAtomicIncrement32(&m_numEntered) == cachedNumThreads)
149     {
150         // Release all waiting threads.
151         m_numThreads = m_numThreads - m_numRemoved;
152         m_numLeaving = m_numThreads;
153         m_numRemoved = 0;
154 
155         deMemoryReadWriteFence();
156         m_numEntered = 0;
157     }
158 }
159 
160 namespace
161 {
162 
singleThreadTest(SpinBarrier::WaitMode mode)163 void singleThreadTest(SpinBarrier::WaitMode mode)
164 {
165     SpinBarrier barrier(1);
166 
167     barrier.sync(mode);
168     barrier.sync(mode);
169     barrier.sync(mode);
170 }
171 
172 class TestThread : public de::Thread
173 {
174 public:
TestThread(SpinBarrier & barrier,volatile int32_t * sharedVar,int numThreads,int threadNdx)175     TestThread(SpinBarrier &barrier, volatile int32_t *sharedVar, int numThreads, int threadNdx)
176         : m_barrier(barrier)
177         , m_sharedVar(sharedVar)
178         , m_numThreads(numThreads)
179         , m_threadNdx(threadNdx)
180         , m_busyOk((uint32_t)m_numThreads <= deGetNumAvailableLogicalCores())
181     {
182     }
183 
run(void)184     void run(void)
185     {
186         const int numIters = 10000;
187         de::Random rnd(deInt32Hash(m_numThreads) ^ deInt32Hash(m_threadNdx));
188 
189         for (int iterNdx = 0; iterNdx < numIters; iterNdx++)
190         {
191             // Phase 1: count up
192             deAtomicIncrement32(m_sharedVar);
193 
194             // Verify
195             m_barrier.sync(getWaitMode(rnd));
196 
197             DE_TEST_ASSERT(*m_sharedVar == m_numThreads);
198 
199             m_barrier.sync(getWaitMode(rnd));
200 
201             // Phase 2: count down
202             deAtomicDecrement32(m_sharedVar);
203 
204             // Verify
205             m_barrier.sync(getWaitMode(rnd));
206 
207             DE_TEST_ASSERT(*m_sharedVar == 0);
208 
209             m_barrier.sync(getWaitMode(rnd));
210         }
211     }
212 
213 private:
214     SpinBarrier &m_barrier;
215     volatile int32_t *const m_sharedVar;
216     const int m_numThreads;
217     const int m_threadNdx;
218     const bool m_busyOk;
219 
getWaitMode(de::Random & rnd)220     SpinBarrier::WaitMode getWaitMode(de::Random &rnd)
221     {
222         static const SpinBarrier::WaitMode s_allModes[] = {
223             SpinBarrier::WAIT_MODE_YIELD,
224             SpinBarrier::WAIT_MODE_AUTO,
225             SpinBarrier::WAIT_MODE_BUSY,
226         };
227         const int numModes = DE_LENGTH_OF_ARRAY(s_allModes) - (m_busyOk ? 0 : 1);
228 
229         return rnd.choose<SpinBarrier::WaitMode>(DE_ARRAY_BEGIN(s_allModes), DE_ARRAY_BEGIN(s_allModes) + numModes);
230     }
231 };
232 
multiThreadTest(int numThreads)233 void multiThreadTest(int numThreads)
234 {
235     SpinBarrier barrier(numThreads);
236     volatile int32_t sharedVar = 0;
237     std::vector<TestThread *> threads(numThreads, static_cast<TestThread *>(DE_NULL));
238 
239     for (int ndx = 0; ndx < numThreads; ndx++)
240     {
241         threads[ndx] = new TestThread(barrier, &sharedVar, numThreads, ndx);
242         DE_TEST_ASSERT(threads[ndx]);
243         threads[ndx]->start();
244     }
245 
246     for (int ndx = 0; ndx < numThreads; ndx++)
247     {
248         threads[ndx]->join();
249         delete threads[ndx];
250     }
251 
252     DE_TEST_ASSERT(sharedVar == 0);
253 }
254 
singleThreadRemoveTest(SpinBarrier::WaitMode mode)255 void singleThreadRemoveTest(SpinBarrier::WaitMode mode)
256 {
257     SpinBarrier barrier(3);
258 
259     barrier.removeThread(mode);
260     barrier.removeThread(mode);
261     barrier.sync(mode);
262     barrier.removeThread(mode);
263 
264     barrier.reset(1);
265     barrier.sync(mode);
266 
267     barrier.reset(2);
268     barrier.removeThread(mode);
269     barrier.sync(mode);
270 }
271 
272 class TestExitThread : public de::Thread
273 {
274 public:
TestExitThread(SpinBarrier & barrier,int numThreads,int threadNdx,SpinBarrier::WaitMode waitMode)275     TestExitThread(SpinBarrier &barrier, int numThreads, int threadNdx, SpinBarrier::WaitMode waitMode)
276         : m_barrier(barrier)
277         , m_numThreads(numThreads)
278         , m_threadNdx(threadNdx)
279         , m_waitMode(waitMode)
280     {
281     }
282 
run(void)283     void run(void)
284     {
285         const int numIters = 10000;
286         de::Random rnd(deInt32Hash(m_numThreads) ^ deInt32Hash(m_threadNdx) ^ deInt32Hash((int32_t)m_waitMode));
287         const int invExitProb = 1000;
288 
289         for (int iterNdx = 0; iterNdx < numIters; iterNdx++)
290         {
291             if (rnd.getInt(0, invExitProb) == 0)
292             {
293                 m_barrier.removeThread(m_waitMode);
294                 break;
295             }
296             else
297                 m_barrier.sync(m_waitMode);
298         }
299     }
300 
301 private:
302     SpinBarrier &m_barrier;
303     const int m_numThreads;
304     const int m_threadNdx;
305     const SpinBarrier::WaitMode m_waitMode;
306 };
307 
multiThreadRemoveTest(int numThreads,SpinBarrier::WaitMode waitMode)308 void multiThreadRemoveTest(int numThreads, SpinBarrier::WaitMode waitMode)
309 {
310     SpinBarrier barrier(numThreads);
311     std::vector<TestExitThread *> threads(numThreads, static_cast<TestExitThread *>(DE_NULL));
312 
313     for (int ndx = 0; ndx < numThreads; ndx++)
314     {
315         threads[ndx] = new TestExitThread(barrier, numThreads, ndx, waitMode);
316         DE_TEST_ASSERT(threads[ndx]);
317         threads[ndx]->start();
318     }
319 
320     for (int ndx = 0; ndx < numThreads; ndx++)
321     {
322         threads[ndx]->join();
323         delete threads[ndx];
324     }
325 }
326 
327 } // namespace
328 
SpinBarrier_selfTest(void)329 void SpinBarrier_selfTest(void)
330 {
331     singleThreadTest(SpinBarrier::WAIT_MODE_YIELD);
332     singleThreadTest(SpinBarrier::WAIT_MODE_BUSY);
333     singleThreadTest(SpinBarrier::WAIT_MODE_AUTO);
334     multiThreadTest(1);
335     multiThreadTest(2);
336     multiThreadTest(4);
337     multiThreadTest(8);
338     multiThreadTest(16);
339 
340     singleThreadRemoveTest(SpinBarrier::WAIT_MODE_YIELD);
341     singleThreadRemoveTest(SpinBarrier::WAIT_MODE_BUSY);
342     singleThreadRemoveTest(SpinBarrier::WAIT_MODE_AUTO);
343     multiThreadRemoveTest(1, SpinBarrier::WAIT_MODE_BUSY);
344     multiThreadRemoveTest(2, SpinBarrier::WAIT_MODE_AUTO);
345     multiThreadRemoveTest(4, SpinBarrier::WAIT_MODE_AUTO);
346     multiThreadRemoveTest(8, SpinBarrier::WAIT_MODE_AUTO);
347     multiThreadRemoveTest(16, SpinBarrier::WAIT_MODE_AUTO);
348     multiThreadRemoveTest(1, SpinBarrier::WAIT_MODE_YIELD);
349     multiThreadRemoveTest(2, SpinBarrier::WAIT_MODE_YIELD);
350     multiThreadRemoveTest(4, SpinBarrier::WAIT_MODE_YIELD);
351     multiThreadRemoveTest(8, SpinBarrier::WAIT_MODE_YIELD);
352     multiThreadRemoveTest(16, SpinBarrier::WAIT_MODE_YIELD);
353 }
354 
355 } // namespace de
356