1 /*-------------------------------------------------------------------------
2 * drawElements C++ Base Library
3 * -----------------------------
4 *
5 * Copyright 2015 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Cross-thread barrier.
22 *//*--------------------------------------------------------------------*/
23
24 #include "deSpinBarrier.hpp"
25 #include "deThread.hpp"
26 #include "deRandom.hpp"
27 #include "deInt32.h"
28
29 #include <vector>
30
31 namespace de
32 {
33
SpinBarrier(int32_t numThreads)34 SpinBarrier::SpinBarrier(int32_t numThreads)
35 : m_numCores(deGetNumAvailableLogicalCores())
36 , m_numThreads(numThreads)
37 , m_numEntered(0)
38 , m_numLeaving(0)
39 , m_numRemoved(0)
40 {
41 DE_ASSERT(numThreads > 0);
42 }
43
~SpinBarrier(void)44 SpinBarrier::~SpinBarrier(void)
45 {
46 DE_ASSERT(m_numEntered == 0 && m_numLeaving == 0);
47 }
48
reset(uint32_t numThreads)49 void SpinBarrier::reset(uint32_t numThreads)
50 {
51 // If last threads were removed, m_numEntered > 0 && m_numRemoved > 0
52 DE_ASSERT(m_numLeaving == 0);
53 DE_ASSERT(numThreads > 0);
54 m_numThreads = numThreads;
55 m_numEntered = 0;
56 m_numLeaving = 0;
57 m_numRemoved = 0;
58 }
59
getWaitMode(SpinBarrier::WaitMode requested,uint32_t numCores,int32_t numThreads)60 inline SpinBarrier::WaitMode getWaitMode(SpinBarrier::WaitMode requested, uint32_t numCores, int32_t numThreads)
61 {
62 if (requested == SpinBarrier::WAIT_MODE_AUTO)
63 return ((uint32_t)numThreads <= numCores) ? SpinBarrier::WAIT_MODE_BUSY : SpinBarrier::WAIT_MODE_YIELD;
64 else
65 return requested;
66 }
67
wait(SpinBarrier::WaitMode mode)68 inline void wait(SpinBarrier::WaitMode mode)
69 {
70 DE_ASSERT(mode == SpinBarrier::WAIT_MODE_YIELD || mode == SpinBarrier::WAIT_MODE_BUSY);
71
72 if (mode == SpinBarrier::WAIT_MODE_YIELD)
73 deYield();
74 }
75
sync(WaitMode requestedMode)76 void SpinBarrier::sync(WaitMode requestedMode)
77 {
78 const int32_t cachedNumThreads = m_numThreads;
79 const WaitMode waitMode = getWaitMode(requestedMode, m_numCores, cachedNumThreads);
80
81 deMemoryReadWriteFence();
82
83 // m_numEntered must not be touched until all threads have had
84 // a chance to observe it being 0.
85 if (m_numLeaving > 0)
86 {
87 for (;;)
88 {
89 if (m_numLeaving == 0)
90 break;
91
92 wait(waitMode);
93 }
94 }
95
96 // If m_numRemoved > 0, m_numThreads will decrease. If m_numThreads is decreased
97 // just after atomicOp and before comparison, the branch could be taken by multiple
98 // threads. Since m_numThreads only changes if all threads are inside the spinbarrier,
99 // cached value at snapshotted at the beginning of the function will be equal for
100 // all threads.
101 if (deAtomicIncrement32(&m_numEntered) == cachedNumThreads)
102 {
103 // Release all waiting threads. Since this thread has not been removed, m_numLeaving will
104 // be >= 1 until m_numLeaving is decremented at the end of this function.
105 m_numThreads = m_numThreads - m_numRemoved;
106 m_numLeaving = m_numThreads;
107 m_numRemoved = 0;
108
109 deMemoryReadWriteFence();
110 m_numEntered = 0;
111 }
112 else
113 {
114 for (;;)
115 {
116 if (m_numEntered == 0)
117 break;
118
119 wait(waitMode);
120 }
121 }
122
123 deAtomicDecrement32(&m_numLeaving);
124 deMemoryReadWriteFence();
125 }
126
removeThread(WaitMode requestedMode)127 void SpinBarrier::removeThread(WaitMode requestedMode)
128 {
129 const int32_t cachedNumThreads = m_numThreads;
130 const WaitMode waitMode = getWaitMode(requestedMode, m_numCores, cachedNumThreads);
131
132 // Wait for other threads exiting previous barrier
133 if (m_numLeaving > 0)
134 {
135 for (;;)
136 {
137 if (m_numLeaving == 0)
138 break;
139
140 wait(waitMode);
141 }
142 }
143
144 // Ask for last thread entering barrier to adjust thread count
145 deAtomicIncrement32(&m_numRemoved);
146
147 // See sync() - use cached value
148 if (deAtomicIncrement32(&m_numEntered) == cachedNumThreads)
149 {
150 // Release all waiting threads.
151 m_numThreads = m_numThreads - m_numRemoved;
152 m_numLeaving = m_numThreads;
153 m_numRemoved = 0;
154
155 deMemoryReadWriteFence();
156 m_numEntered = 0;
157 }
158 }
159
160 namespace
161 {
162
singleThreadTest(SpinBarrier::WaitMode mode)163 void singleThreadTest(SpinBarrier::WaitMode mode)
164 {
165 SpinBarrier barrier(1);
166
167 barrier.sync(mode);
168 barrier.sync(mode);
169 barrier.sync(mode);
170 }
171
172 class TestThread : public de::Thread
173 {
174 public:
TestThread(SpinBarrier & barrier,volatile int32_t * sharedVar,int numThreads,int threadNdx)175 TestThread(SpinBarrier &barrier, volatile int32_t *sharedVar, int numThreads, int threadNdx)
176 : m_barrier(barrier)
177 , m_sharedVar(sharedVar)
178 , m_numThreads(numThreads)
179 , m_threadNdx(threadNdx)
180 , m_busyOk((uint32_t)m_numThreads <= deGetNumAvailableLogicalCores())
181 {
182 }
183
run(void)184 void run(void)
185 {
186 const int numIters = 10000;
187 de::Random rnd(deInt32Hash(m_numThreads) ^ deInt32Hash(m_threadNdx));
188
189 for (int iterNdx = 0; iterNdx < numIters; iterNdx++)
190 {
191 // Phase 1: count up
192 deAtomicIncrement32(m_sharedVar);
193
194 // Verify
195 m_barrier.sync(getWaitMode(rnd));
196
197 DE_TEST_ASSERT(*m_sharedVar == m_numThreads);
198
199 m_barrier.sync(getWaitMode(rnd));
200
201 // Phase 2: count down
202 deAtomicDecrement32(m_sharedVar);
203
204 // Verify
205 m_barrier.sync(getWaitMode(rnd));
206
207 DE_TEST_ASSERT(*m_sharedVar == 0);
208
209 m_barrier.sync(getWaitMode(rnd));
210 }
211 }
212
213 private:
214 SpinBarrier &m_barrier;
215 volatile int32_t *const m_sharedVar;
216 const int m_numThreads;
217 const int m_threadNdx;
218 const bool m_busyOk;
219
getWaitMode(de::Random & rnd)220 SpinBarrier::WaitMode getWaitMode(de::Random &rnd)
221 {
222 static const SpinBarrier::WaitMode s_allModes[] = {
223 SpinBarrier::WAIT_MODE_YIELD,
224 SpinBarrier::WAIT_MODE_AUTO,
225 SpinBarrier::WAIT_MODE_BUSY,
226 };
227 const int numModes = DE_LENGTH_OF_ARRAY(s_allModes) - (m_busyOk ? 0 : 1);
228
229 return rnd.choose<SpinBarrier::WaitMode>(DE_ARRAY_BEGIN(s_allModes), DE_ARRAY_BEGIN(s_allModes) + numModes);
230 }
231 };
232
multiThreadTest(int numThreads)233 void multiThreadTest(int numThreads)
234 {
235 SpinBarrier barrier(numThreads);
236 volatile int32_t sharedVar = 0;
237 std::vector<TestThread *> threads(numThreads, static_cast<TestThread *>(DE_NULL));
238
239 for (int ndx = 0; ndx < numThreads; ndx++)
240 {
241 threads[ndx] = new TestThread(barrier, &sharedVar, numThreads, ndx);
242 DE_TEST_ASSERT(threads[ndx]);
243 threads[ndx]->start();
244 }
245
246 for (int ndx = 0; ndx < numThreads; ndx++)
247 {
248 threads[ndx]->join();
249 delete threads[ndx];
250 }
251
252 DE_TEST_ASSERT(sharedVar == 0);
253 }
254
singleThreadRemoveTest(SpinBarrier::WaitMode mode)255 void singleThreadRemoveTest(SpinBarrier::WaitMode mode)
256 {
257 SpinBarrier barrier(3);
258
259 barrier.removeThread(mode);
260 barrier.removeThread(mode);
261 barrier.sync(mode);
262 barrier.removeThread(mode);
263
264 barrier.reset(1);
265 barrier.sync(mode);
266
267 barrier.reset(2);
268 barrier.removeThread(mode);
269 barrier.sync(mode);
270 }
271
272 class TestExitThread : public de::Thread
273 {
274 public:
TestExitThread(SpinBarrier & barrier,int numThreads,int threadNdx,SpinBarrier::WaitMode waitMode)275 TestExitThread(SpinBarrier &barrier, int numThreads, int threadNdx, SpinBarrier::WaitMode waitMode)
276 : m_barrier(barrier)
277 , m_numThreads(numThreads)
278 , m_threadNdx(threadNdx)
279 , m_waitMode(waitMode)
280 {
281 }
282
run(void)283 void run(void)
284 {
285 const int numIters = 10000;
286 de::Random rnd(deInt32Hash(m_numThreads) ^ deInt32Hash(m_threadNdx) ^ deInt32Hash((int32_t)m_waitMode));
287 const int invExitProb = 1000;
288
289 for (int iterNdx = 0; iterNdx < numIters; iterNdx++)
290 {
291 if (rnd.getInt(0, invExitProb) == 0)
292 {
293 m_barrier.removeThread(m_waitMode);
294 break;
295 }
296 else
297 m_barrier.sync(m_waitMode);
298 }
299 }
300
301 private:
302 SpinBarrier &m_barrier;
303 const int m_numThreads;
304 const int m_threadNdx;
305 const SpinBarrier::WaitMode m_waitMode;
306 };
307
multiThreadRemoveTest(int numThreads,SpinBarrier::WaitMode waitMode)308 void multiThreadRemoveTest(int numThreads, SpinBarrier::WaitMode waitMode)
309 {
310 SpinBarrier barrier(numThreads);
311 std::vector<TestExitThread *> threads(numThreads, static_cast<TestExitThread *>(DE_NULL));
312
313 for (int ndx = 0; ndx < numThreads; ndx++)
314 {
315 threads[ndx] = new TestExitThread(barrier, numThreads, ndx, waitMode);
316 DE_TEST_ASSERT(threads[ndx]);
317 threads[ndx]->start();
318 }
319
320 for (int ndx = 0; ndx < numThreads; ndx++)
321 {
322 threads[ndx]->join();
323 delete threads[ndx];
324 }
325 }
326
327 } // namespace
328
SpinBarrier_selfTest(void)329 void SpinBarrier_selfTest(void)
330 {
331 singleThreadTest(SpinBarrier::WAIT_MODE_YIELD);
332 singleThreadTest(SpinBarrier::WAIT_MODE_BUSY);
333 singleThreadTest(SpinBarrier::WAIT_MODE_AUTO);
334 multiThreadTest(1);
335 multiThreadTest(2);
336 multiThreadTest(4);
337 multiThreadTest(8);
338 multiThreadTest(16);
339
340 singleThreadRemoveTest(SpinBarrier::WAIT_MODE_YIELD);
341 singleThreadRemoveTest(SpinBarrier::WAIT_MODE_BUSY);
342 singleThreadRemoveTest(SpinBarrier::WAIT_MODE_AUTO);
343 multiThreadRemoveTest(1, SpinBarrier::WAIT_MODE_BUSY);
344 multiThreadRemoveTest(2, SpinBarrier::WAIT_MODE_AUTO);
345 multiThreadRemoveTest(4, SpinBarrier::WAIT_MODE_AUTO);
346 multiThreadRemoveTest(8, SpinBarrier::WAIT_MODE_AUTO);
347 multiThreadRemoveTest(16, SpinBarrier::WAIT_MODE_AUTO);
348 multiThreadRemoveTest(1, SpinBarrier::WAIT_MODE_YIELD);
349 multiThreadRemoveTest(2, SpinBarrier::WAIT_MODE_YIELD);
350 multiThreadRemoveTest(4, SpinBarrier::WAIT_MODE_YIELD);
351 multiThreadRemoveTest(8, SpinBarrier::WAIT_MODE_YIELD);
352 multiThreadRemoveTest(16, SpinBarrier::WAIT_MODE_YIELD);
353 }
354
355 } // namespace de
356