1*6467f958SSadaf Ebrahimi //
2*6467f958SSadaf Ebrahimi // Copyright (c) 2017 The Khronos Group Inc.
3*6467f958SSadaf Ebrahimi //
4*6467f958SSadaf Ebrahimi // Licensed under the Apache License, Version 2.0 (the "License");
5*6467f958SSadaf Ebrahimi // you may not use this file except in compliance with the License.
6*6467f958SSadaf Ebrahimi // You may obtain a copy of the License at
7*6467f958SSadaf Ebrahimi //
8*6467f958SSadaf Ebrahimi // http://www.apache.org/licenses/LICENSE-2.0
9*6467f958SSadaf Ebrahimi //
10*6467f958SSadaf Ebrahimi // Unless required by applicable law or agreed to in writing, software
11*6467f958SSadaf Ebrahimi // distributed under the License is distributed on an "AS IS" BASIS,
12*6467f958SSadaf Ebrahimi // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*6467f958SSadaf Ebrahimi // See the License for the specific language governing permissions and
14*6467f958SSadaf Ebrahimi // limitations under the License.
15*6467f958SSadaf Ebrahimi //
16*6467f958SSadaf Ebrahimi #include "ThreadPool.h"
17*6467f958SSadaf Ebrahimi #include "errorHelpers.h"
18*6467f958SSadaf Ebrahimi #include "fpcontrol.h"
19*6467f958SSadaf Ebrahimi #include <stdio.h>
20*6467f958SSadaf Ebrahimi #include <stdlib.h>
21*6467f958SSadaf Ebrahimi
22*6467f958SSadaf Ebrahimi #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
23*6467f958SSadaf Ebrahimi // or any other POSIX system
24*6467f958SSadaf Ebrahimi
25*6467f958SSadaf Ebrahimi #include <atomic>
26*6467f958SSadaf Ebrahimi #include <vector>
27*6467f958SSadaf Ebrahimi
28*6467f958SSadaf Ebrahimi #if defined(_WIN32)
29*6467f958SSadaf Ebrahimi #include <windows.h>
30*6467f958SSadaf Ebrahimi #if defined(_MSC_VER)
31*6467f958SSadaf Ebrahimi #include <intrin.h>
32*6467f958SSadaf Ebrahimi #endif
33*6467f958SSadaf Ebrahimi #include "mingw_compat.h"
34*6467f958SSadaf Ebrahimi #include <process.h>
35*6467f958SSadaf Ebrahimi #else // !_WIN32
36*6467f958SSadaf Ebrahimi #include <pthread.h>
37*6467f958SSadaf Ebrahimi #include <unistd.h>
38*6467f958SSadaf Ebrahimi #include <sys/errno.h>
39*6467f958SSadaf Ebrahimi #ifdef __linux__
40*6467f958SSadaf Ebrahimi #include <sched.h>
41*6467f958SSadaf Ebrahimi #endif
42*6467f958SSadaf Ebrahimi #endif // !_WIN32
43*6467f958SSadaf Ebrahimi
44*6467f958SSadaf Ebrahimi // declarations
45*6467f958SSadaf Ebrahimi #ifdef _WIN32
46*6467f958SSadaf Ebrahimi void ThreadPool_WorkerFunc(void *p);
47*6467f958SSadaf Ebrahimi #else
48*6467f958SSadaf Ebrahimi void *ThreadPool_WorkerFunc(void *p);
49*6467f958SSadaf Ebrahimi #endif
50*6467f958SSadaf Ebrahimi void ThreadPool_Init(void);
51*6467f958SSadaf Ebrahimi void ThreadPool_Exit(void);
52*6467f958SSadaf Ebrahimi
53*6467f958SSadaf Ebrahimi #if defined(__MINGW32__)
54*6467f958SSadaf Ebrahimi // Mutex for implementing super heavy atomic operations if you don't have GCC or
55*6467f958SSadaf Ebrahimi // MSVC
56*6467f958SSadaf Ebrahimi CRITICAL_SECTION gAtomicLock;
57*6467f958SSadaf Ebrahimi #elif defined(__GNUC__) || defined(_MSC_VER)
58*6467f958SSadaf Ebrahimi #else
59*6467f958SSadaf Ebrahimi pthread_mutex_t gAtomicLock;
60*6467f958SSadaf Ebrahimi #endif
61*6467f958SSadaf Ebrahimi
62*6467f958SSadaf Ebrahimi #if !defined(_WIN32)
63*6467f958SSadaf Ebrahimi // Keep track of pthread_t's created in ThreadPool_Init() so they can be joined
64*6467f958SSadaf Ebrahimi // in ThreadPool_Exit() and avoid thread leaks.
65*6467f958SSadaf Ebrahimi static std::vector<pthread_t> pthreads;
66*6467f958SSadaf Ebrahimi #endif
67*6467f958SSadaf Ebrahimi
68*6467f958SSadaf Ebrahimi // Atomic add operator with mem barrier. Mem barrier needed to protect state
69*6467f958SSadaf Ebrahimi // modified by the worker functions.
ThreadPool_AtomicAdd(volatile cl_int * a,cl_int b)70*6467f958SSadaf Ebrahimi cl_int ThreadPool_AtomicAdd(volatile cl_int *a, cl_int b)
71*6467f958SSadaf Ebrahimi {
72*6467f958SSadaf Ebrahimi #if defined(__MINGW32__)
73*6467f958SSadaf Ebrahimi // No atomics on Mingw32
74*6467f958SSadaf Ebrahimi EnterCriticalSection(&gAtomicLock);
75*6467f958SSadaf Ebrahimi cl_int old = *a;
76*6467f958SSadaf Ebrahimi *a = old + b;
77*6467f958SSadaf Ebrahimi LeaveCriticalSection(&gAtomicLock);
78*6467f958SSadaf Ebrahimi return old;
79*6467f958SSadaf Ebrahimi #elif defined(__GNUC__)
80*6467f958SSadaf Ebrahimi // GCC extension:
81*6467f958SSadaf Ebrahimi // http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
82*6467f958SSadaf Ebrahimi return __sync_fetch_and_add(a, b);
83*6467f958SSadaf Ebrahimi // do we need __sync_synchronize() here, too? GCC docs are unclear whether
84*6467f958SSadaf Ebrahimi // __sync_fetch_and_add does a synchronize
85*6467f958SSadaf Ebrahimi #elif defined(_MSC_VER)
86*6467f958SSadaf Ebrahimi return (cl_int)_InterlockedExchangeAdd((volatile LONG *)a, (LONG)b);
87*6467f958SSadaf Ebrahimi #else
88*6467f958SSadaf Ebrahimi #warning Please add a atomic add implementation here, with memory barrier. Fallback code is slow.
89*6467f958SSadaf Ebrahimi if (pthread_mutex_lock(&gAtomicLock))
90*6467f958SSadaf Ebrahimi log_error("Atomic operation failed. pthread_mutex_lock(&gAtomicLock) "
91*6467f958SSadaf Ebrahimi "returned an error\n");
92*6467f958SSadaf Ebrahimi cl_int old = *a;
93*6467f958SSadaf Ebrahimi *a = old + b;
94*6467f958SSadaf Ebrahimi if (pthread_mutex_unlock(&gAtomicLock))
95*6467f958SSadaf Ebrahimi log_error("Failed to release gAtomicLock. Further atomic operations "
96*6467f958SSadaf Ebrahimi "may deadlock!\n");
97*6467f958SSadaf Ebrahimi return old;
98*6467f958SSadaf Ebrahimi #endif
99*6467f958SSadaf Ebrahimi }
100*6467f958SSadaf Ebrahimi
101*6467f958SSadaf Ebrahimi #if defined(_WIN32)
102*6467f958SSadaf Ebrahimi // Uncomment the following line if Windows XP support is not required.
103*6467f958SSadaf Ebrahimi // #define HAS_INIT_ONCE_EXECUTE_ONCE 1
104*6467f958SSadaf Ebrahimi
105*6467f958SSadaf Ebrahimi #if defined(HAS_INIT_ONCE_EXECUTE_ONCE)
106*6467f958SSadaf Ebrahimi #define _INIT_ONCE INIT_ONCE
107*6467f958SSadaf Ebrahimi #define _PINIT_ONCE PINIT_ONCE
108*6467f958SSadaf Ebrahimi #define _InitOnceExecuteOnce InitOnceExecuteOnce
109*6467f958SSadaf Ebrahimi #else // !HAS_INIT_ONCE_EXECUTE_ONCE
110*6467f958SSadaf Ebrahimi
111*6467f958SSadaf Ebrahimi typedef volatile LONG _INIT_ONCE;
112*6467f958SSadaf Ebrahimi typedef _INIT_ONCE *_PINIT_ONCE;
113*6467f958SSadaf Ebrahimi typedef BOOL(CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *);
114*6467f958SSadaf Ebrahimi
115*6467f958SSadaf Ebrahimi #define _INIT_ONCE_UNINITIALIZED 0
116*6467f958SSadaf Ebrahimi #define _INIT_ONCE_IN_PROGRESS 1
117*6467f958SSadaf Ebrahimi #define _INIT_ONCE_DONE 2
118*6467f958SSadaf Ebrahimi
_InitOnceExecuteOnce(_PINIT_ONCE InitOnce,_PINIT_ONCE_FN InitFn,PVOID Parameter,LPVOID * Context)119*6467f958SSadaf Ebrahimi static BOOL _InitOnceExecuteOnce(_PINIT_ONCE InitOnce, _PINIT_ONCE_FN InitFn,
120*6467f958SSadaf Ebrahimi PVOID Parameter, LPVOID *Context)
121*6467f958SSadaf Ebrahimi {
122*6467f958SSadaf Ebrahimi while (*InitOnce != _INIT_ONCE_DONE)
123*6467f958SSadaf Ebrahimi {
124*6467f958SSadaf Ebrahimi if (*InitOnce != _INIT_ONCE_IN_PROGRESS
125*6467f958SSadaf Ebrahimi && _InterlockedCompareExchange(InitOnce, _INIT_ONCE_IN_PROGRESS,
126*6467f958SSadaf Ebrahimi _INIT_ONCE_UNINITIALIZED)
127*6467f958SSadaf Ebrahimi == _INIT_ONCE_UNINITIALIZED)
128*6467f958SSadaf Ebrahimi {
129*6467f958SSadaf Ebrahimi InitFn(InitOnce, Parameter, Context);
130*6467f958SSadaf Ebrahimi *InitOnce = _INIT_ONCE_DONE;
131*6467f958SSadaf Ebrahimi return TRUE;
132*6467f958SSadaf Ebrahimi }
133*6467f958SSadaf Ebrahimi Sleep(1);
134*6467f958SSadaf Ebrahimi }
135*6467f958SSadaf Ebrahimi return TRUE;
136*6467f958SSadaf Ebrahimi }
137*6467f958SSadaf Ebrahimi #endif // !HAS_INIT_ONCE_EXECUTE_ONCE
138*6467f958SSadaf Ebrahimi
139*6467f958SSadaf Ebrahimi // Uncomment the following line if Windows XP support is not required.
140*6467f958SSadaf Ebrahimi // #define HAS_CONDITION_VARIABLE 1
141*6467f958SSadaf Ebrahimi
142*6467f958SSadaf Ebrahimi #if defined(HAS_CONDITION_VARIABLE)
143*6467f958SSadaf Ebrahimi #define _CONDITION_VARIABLE CONDITION_VARIABLE
144*6467f958SSadaf Ebrahimi #define _InitializeConditionVariable InitializeConditionVariable
145*6467f958SSadaf Ebrahimi #define _SleepConditionVariableCS SleepConditionVariableCS
146*6467f958SSadaf Ebrahimi #define _WakeAllConditionVariable WakeAllConditionVariable
147*6467f958SSadaf Ebrahimi #else // !HAS_CONDITION_VARIABLE
148*6467f958SSadaf Ebrahimi typedef struct
149*6467f958SSadaf Ebrahimi {
150*6467f958SSadaf Ebrahimi HANDLE mEvent; // Used to park the thread.
151*6467f958SSadaf Ebrahimi // Used to protect mWaiters, mGeneration and mReleaseCount:
152*6467f958SSadaf Ebrahimi CRITICAL_SECTION mLock[1];
153*6467f958SSadaf Ebrahimi volatile cl_int mWaiters; // Number of threads waiting on this cond var.
154*6467f958SSadaf Ebrahimi volatile cl_int mGeneration; // Wait generation count.
155*6467f958SSadaf Ebrahimi volatile cl_int mReleaseCount; // Number of releases to execute before
156*6467f958SSadaf Ebrahimi // reseting the event.
157*6467f958SSadaf Ebrahimi } _CONDITION_VARIABLE;
158*6467f958SSadaf Ebrahimi
159*6467f958SSadaf Ebrahimi typedef _CONDITION_VARIABLE *_PCONDITION_VARIABLE;
160*6467f958SSadaf Ebrahimi
_InitializeConditionVariable(_PCONDITION_VARIABLE cond_var)161*6467f958SSadaf Ebrahimi static void _InitializeConditionVariable(_PCONDITION_VARIABLE cond_var)
162*6467f958SSadaf Ebrahimi {
163*6467f958SSadaf Ebrahimi cond_var->mEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
164*6467f958SSadaf Ebrahimi InitializeCriticalSection(cond_var->mLock);
165*6467f958SSadaf Ebrahimi cond_var->mWaiters = 0;
166*6467f958SSadaf Ebrahimi cond_var->mGeneration = 0;
167*6467f958SSadaf Ebrahimi #if !defined(NDEBUG)
168*6467f958SSadaf Ebrahimi cond_var->mReleaseCount = 0;
169*6467f958SSadaf Ebrahimi #endif // !NDEBUG
170*6467f958SSadaf Ebrahimi }
171*6467f958SSadaf Ebrahimi
_SleepConditionVariableCS(_PCONDITION_VARIABLE cond_var,PCRITICAL_SECTION cond_lock,DWORD ignored)172*6467f958SSadaf Ebrahimi static void _SleepConditionVariableCS(_PCONDITION_VARIABLE cond_var,
173*6467f958SSadaf Ebrahimi PCRITICAL_SECTION cond_lock,
174*6467f958SSadaf Ebrahimi DWORD ignored)
175*6467f958SSadaf Ebrahimi {
176*6467f958SSadaf Ebrahimi EnterCriticalSection(cond_var->mLock);
177*6467f958SSadaf Ebrahimi cl_int generation = cond_var->mGeneration;
178*6467f958SSadaf Ebrahimi ++cond_var->mWaiters;
179*6467f958SSadaf Ebrahimi LeaveCriticalSection(cond_var->mLock);
180*6467f958SSadaf Ebrahimi LeaveCriticalSection(cond_lock);
181*6467f958SSadaf Ebrahimi
182*6467f958SSadaf Ebrahimi while (TRUE)
183*6467f958SSadaf Ebrahimi {
184*6467f958SSadaf Ebrahimi WaitForSingleObject(cond_var->mEvent, INFINITE);
185*6467f958SSadaf Ebrahimi EnterCriticalSection(cond_var->mLock);
186*6467f958SSadaf Ebrahimi BOOL done =
187*6467f958SSadaf Ebrahimi cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
188*6467f958SSadaf Ebrahimi LeaveCriticalSection(cond_var->mLock);
189*6467f958SSadaf Ebrahimi if (done)
190*6467f958SSadaf Ebrahimi {
191*6467f958SSadaf Ebrahimi break;
192*6467f958SSadaf Ebrahimi }
193*6467f958SSadaf Ebrahimi }
194*6467f958SSadaf Ebrahimi
195*6467f958SSadaf Ebrahimi EnterCriticalSection(cond_lock);
196*6467f958SSadaf Ebrahimi EnterCriticalSection(cond_var->mLock);
197*6467f958SSadaf Ebrahimi if (--cond_var->mReleaseCount == 0)
198*6467f958SSadaf Ebrahimi {
199*6467f958SSadaf Ebrahimi ResetEvent(cond_var->mEvent);
200*6467f958SSadaf Ebrahimi }
201*6467f958SSadaf Ebrahimi --cond_var->mWaiters;
202*6467f958SSadaf Ebrahimi LeaveCriticalSection(cond_var->mLock);
203*6467f958SSadaf Ebrahimi }
204*6467f958SSadaf Ebrahimi
_WakeAllConditionVariable(_PCONDITION_VARIABLE cond_var)205*6467f958SSadaf Ebrahimi static void _WakeAllConditionVariable(_PCONDITION_VARIABLE cond_var)
206*6467f958SSadaf Ebrahimi {
207*6467f958SSadaf Ebrahimi EnterCriticalSection(cond_var->mLock);
208*6467f958SSadaf Ebrahimi if (cond_var->mWaiters > 0)
209*6467f958SSadaf Ebrahimi {
210*6467f958SSadaf Ebrahimi ++cond_var->mGeneration;
211*6467f958SSadaf Ebrahimi cond_var->mReleaseCount = cond_var->mWaiters;
212*6467f958SSadaf Ebrahimi SetEvent(cond_var->mEvent);
213*6467f958SSadaf Ebrahimi }
214*6467f958SSadaf Ebrahimi LeaveCriticalSection(cond_var->mLock);
215*6467f958SSadaf Ebrahimi }
216*6467f958SSadaf Ebrahimi #endif // !HAS_CONDITION_VARIABLE
217*6467f958SSadaf Ebrahimi #endif // _WIN32
218*6467f958SSadaf Ebrahimi
219*6467f958SSadaf Ebrahimi #define MAX_COUNT (1 << 29)
220*6467f958SSadaf Ebrahimi
221*6467f958SSadaf Ebrahimi // Global state to coordinate whether the threads have been launched
222*6467f958SSadaf Ebrahimi // successfully or not
223*6467f958SSadaf Ebrahimi #if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
224*6467f958SSadaf Ebrahimi static _INIT_ONCE threadpool_init_control;
225*6467f958SSadaf Ebrahimi #elif defined(_WIN32) // MingW of XP
226*6467f958SSadaf Ebrahimi static int threadpool_init_control;
227*6467f958SSadaf Ebrahimi #else // Posix platforms
228*6467f958SSadaf Ebrahimi pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT;
229*6467f958SSadaf Ebrahimi #endif
230*6467f958SSadaf Ebrahimi cl_int threadPoolInitErr = -1; // set to CL_SUCCESS on successful thread launch
231*6467f958SSadaf Ebrahimi
232*6467f958SSadaf Ebrahimi // critical region lock around ThreadPool_Do. We can only run one ThreadPool_Do
233*6467f958SSadaf Ebrahimi // at a time, because we are too lazy to set up a queue here, and don't expect
234*6467f958SSadaf Ebrahimi // to need one.
235*6467f958SSadaf Ebrahimi #if defined(_WIN32)
236*6467f958SSadaf Ebrahimi CRITICAL_SECTION gThreadPoolLock[1];
237*6467f958SSadaf Ebrahimi #else // !_WIN32
238*6467f958SSadaf Ebrahimi pthread_mutex_t gThreadPoolLock;
239*6467f958SSadaf Ebrahimi #endif // !_WIN32
240*6467f958SSadaf Ebrahimi
241*6467f958SSadaf Ebrahimi // Condition variable to park ThreadPool threads when not working
242*6467f958SSadaf Ebrahimi #if defined(_WIN32)
243*6467f958SSadaf Ebrahimi CRITICAL_SECTION cond_lock[1];
244*6467f958SSadaf Ebrahimi _CONDITION_VARIABLE cond_var[1];
245*6467f958SSadaf Ebrahimi #else // !_WIN32
246*6467f958SSadaf Ebrahimi pthread_mutex_t cond_lock;
247*6467f958SSadaf Ebrahimi pthread_cond_t cond_var;
248*6467f958SSadaf Ebrahimi #endif // !_WIN32
249*6467f958SSadaf Ebrahimi
250*6467f958SSadaf Ebrahimi // Condition variable state. How many iterations on the function left to run,
251*6467f958SSadaf Ebrahimi // set to CL_INT_MAX to cause worker threads to exit. Note: this value might
252*6467f958SSadaf Ebrahimi // go negative.
253*6467f958SSadaf Ebrahimi std::atomic<cl_int> gRunCount{ 0 };
254*6467f958SSadaf Ebrahimi
255*6467f958SSadaf Ebrahimi // State that only changes when the threadpool is not working.
256*6467f958SSadaf Ebrahimi volatile TPFuncPtr gFunc_ptr = NULL;
257*6467f958SSadaf Ebrahimi volatile void *gUserInfo = NULL;
258*6467f958SSadaf Ebrahimi volatile cl_int gJobCount = 0;
259*6467f958SSadaf Ebrahimi
260*6467f958SSadaf Ebrahimi // State that may change while the thread pool is working
261*6467f958SSadaf Ebrahimi volatile cl_int jobError = CL_SUCCESS; // err code return for the job as a whole
262*6467f958SSadaf Ebrahimi
263*6467f958SSadaf Ebrahimi // Condition variable to park caller while waiting
264*6467f958SSadaf Ebrahimi #if defined(_WIN32)
265*6467f958SSadaf Ebrahimi HANDLE caller_event;
266*6467f958SSadaf Ebrahimi #else // !_WIN32
267*6467f958SSadaf Ebrahimi pthread_mutex_t caller_cond_lock;
268*6467f958SSadaf Ebrahimi pthread_cond_t caller_cond_var;
269*6467f958SSadaf Ebrahimi #endif // !_WIN32
270*6467f958SSadaf Ebrahimi
271*6467f958SSadaf Ebrahimi // # of threads intended to be running. Running threads will decrement this
272*6467f958SSadaf Ebrahimi // as they discover they've run out of work to do.
273*6467f958SSadaf Ebrahimi std::atomic<cl_int> gRunning{ 0 };
274*6467f958SSadaf Ebrahimi
275*6467f958SSadaf Ebrahimi // The total number of threads launched.
276*6467f958SSadaf Ebrahimi std::atomic<cl_int> gThreadCount{ 0 };
277*6467f958SSadaf Ebrahimi
278*6467f958SSadaf Ebrahimi #ifdef _WIN32
ThreadPool_WorkerFunc(void * p)279*6467f958SSadaf Ebrahimi void ThreadPool_WorkerFunc(void *p)
280*6467f958SSadaf Ebrahimi #else
281*6467f958SSadaf Ebrahimi void *ThreadPool_WorkerFunc(void *p)
282*6467f958SSadaf Ebrahimi #endif
283*6467f958SSadaf Ebrahimi {
284*6467f958SSadaf Ebrahimi auto &tid = *static_cast<std::atomic<cl_uint> *>(p);
285*6467f958SSadaf Ebrahimi cl_uint threadID = tid++;
286*6467f958SSadaf Ebrahimi cl_int item = gRunCount--;
287*6467f958SSadaf Ebrahimi
288*6467f958SSadaf Ebrahimi while (MAX_COUNT > item)
289*6467f958SSadaf Ebrahimi {
290*6467f958SSadaf Ebrahimi cl_int err;
291*6467f958SSadaf Ebrahimi
292*6467f958SSadaf Ebrahimi // check for more work to do
293*6467f958SSadaf Ebrahimi if (0 >= item)
294*6467f958SSadaf Ebrahimi {
295*6467f958SSadaf Ebrahimi // No work to do. Attempt to block waiting for work
296*6467f958SSadaf Ebrahimi #if defined(_WIN32)
297*6467f958SSadaf Ebrahimi EnterCriticalSection(cond_lock);
298*6467f958SSadaf Ebrahimi #else // !_WIN32
299*6467f958SSadaf Ebrahimi if ((err = pthread_mutex_lock(&cond_lock)))
300*6467f958SSadaf Ebrahimi {
301*6467f958SSadaf Ebrahimi log_error(
302*6467f958SSadaf Ebrahimi "Error %d from pthread_mutex_lock. Worker %d unable to "
303*6467f958SSadaf Ebrahimi "block waiting for work. ThreadPool_WorkerFunc failed.\n",
304*6467f958SSadaf Ebrahimi err, threadID);
305*6467f958SSadaf Ebrahimi goto exit;
306*6467f958SSadaf Ebrahimi }
307*6467f958SSadaf Ebrahimi #endif // !_WIN32
308*6467f958SSadaf Ebrahimi
309*6467f958SSadaf Ebrahimi cl_int remaining = gRunning--;
310*6467f958SSadaf Ebrahimi if (1 == remaining)
311*6467f958SSadaf Ebrahimi { // last thread out signal the main thread to wake up
312*6467f958SSadaf Ebrahimi #if defined(_WIN32)
313*6467f958SSadaf Ebrahimi SetEvent(caller_event);
314*6467f958SSadaf Ebrahimi #else // !_WIN32
315*6467f958SSadaf Ebrahimi if ((err = pthread_mutex_lock(&caller_cond_lock)))
316*6467f958SSadaf Ebrahimi {
317*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_mutex_lock. Unable to "
318*6467f958SSadaf Ebrahimi "wake caller.\n",
319*6467f958SSadaf Ebrahimi err);
320*6467f958SSadaf Ebrahimi goto exit;
321*6467f958SSadaf Ebrahimi }
322*6467f958SSadaf Ebrahimi if ((err = pthread_cond_broadcast(&caller_cond_var)))
323*6467f958SSadaf Ebrahimi {
324*6467f958SSadaf Ebrahimi log_error(
325*6467f958SSadaf Ebrahimi "Error %d from pthread_cond_broadcast. Unable to wake "
326*6467f958SSadaf Ebrahimi "up main thread. ThreadPool_WorkerFunc failed.\n",
327*6467f958SSadaf Ebrahimi err);
328*6467f958SSadaf Ebrahimi goto exit;
329*6467f958SSadaf Ebrahimi }
330*6467f958SSadaf Ebrahimi if ((err = pthread_mutex_unlock(&caller_cond_lock)))
331*6467f958SSadaf Ebrahimi {
332*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_mutex_lock. Unable to "
333*6467f958SSadaf Ebrahimi "wake caller.\n",
334*6467f958SSadaf Ebrahimi err);
335*6467f958SSadaf Ebrahimi goto exit;
336*6467f958SSadaf Ebrahimi }
337*6467f958SSadaf Ebrahimi #endif // !_WIN32
338*6467f958SSadaf Ebrahimi }
339*6467f958SSadaf Ebrahimi
340*6467f958SSadaf Ebrahimi // loop in case we are woken only to discover that some other thread
341*6467f958SSadaf Ebrahimi // already did all the work
342*6467f958SSadaf Ebrahimi while (0 >= item)
343*6467f958SSadaf Ebrahimi {
344*6467f958SSadaf Ebrahimi #if defined(_WIN32)
345*6467f958SSadaf Ebrahimi _SleepConditionVariableCS(cond_var, cond_lock, INFINITE);
346*6467f958SSadaf Ebrahimi #else // !_WIN32
347*6467f958SSadaf Ebrahimi if ((err = pthread_cond_wait(&cond_var, &cond_lock)))
348*6467f958SSadaf Ebrahimi {
349*6467f958SSadaf Ebrahimi log_error(
350*6467f958SSadaf Ebrahimi "Error %d from pthread_cond_wait. Unable to block for "
351*6467f958SSadaf Ebrahimi "waiting for work. ThreadPool_WorkerFunc failed.\n",
352*6467f958SSadaf Ebrahimi err);
353*6467f958SSadaf Ebrahimi pthread_mutex_unlock(&cond_lock);
354*6467f958SSadaf Ebrahimi goto exit;
355*6467f958SSadaf Ebrahimi }
356*6467f958SSadaf Ebrahimi #endif // !_WIN32
357*6467f958SSadaf Ebrahimi
358*6467f958SSadaf Ebrahimi // try again to get a valid item id
359*6467f958SSadaf Ebrahimi item = gRunCount--;
360*6467f958SSadaf Ebrahimi if (MAX_COUNT <= item) // exit if we are done
361*6467f958SSadaf Ebrahimi {
362*6467f958SSadaf Ebrahimi #if defined(_WIN32)
363*6467f958SSadaf Ebrahimi LeaveCriticalSection(cond_lock);
364*6467f958SSadaf Ebrahimi #else // !_WIN32
365*6467f958SSadaf Ebrahimi pthread_mutex_unlock(&cond_lock);
366*6467f958SSadaf Ebrahimi #endif // !_WIN32
367*6467f958SSadaf Ebrahimi goto exit;
368*6467f958SSadaf Ebrahimi }
369*6467f958SSadaf Ebrahimi }
370*6467f958SSadaf Ebrahimi
371*6467f958SSadaf Ebrahimi gRunning++;
372*6467f958SSadaf Ebrahimi
373*6467f958SSadaf Ebrahimi #if defined(_WIN32)
374*6467f958SSadaf Ebrahimi LeaveCriticalSection(cond_lock);
375*6467f958SSadaf Ebrahimi #else // !_WIN32
376*6467f958SSadaf Ebrahimi if ((err = pthread_mutex_unlock(&cond_lock)))
377*6467f958SSadaf Ebrahimi {
378*6467f958SSadaf Ebrahimi log_error(
379*6467f958SSadaf Ebrahimi "Error %d from pthread_mutex_unlock. Unable to block for "
380*6467f958SSadaf Ebrahimi "waiting for work. ThreadPool_WorkerFunc failed.\n",
381*6467f958SSadaf Ebrahimi err);
382*6467f958SSadaf Ebrahimi goto exit;
383*6467f958SSadaf Ebrahimi }
384*6467f958SSadaf Ebrahimi #endif // !_WIN32
385*6467f958SSadaf Ebrahimi }
386*6467f958SSadaf Ebrahimi
387*6467f958SSadaf Ebrahimi // we have a valid item, so do the work
388*6467f958SSadaf Ebrahimi // but only if we haven't already encountered an error
389*6467f958SSadaf Ebrahimi if (CL_SUCCESS == jobError)
390*6467f958SSadaf Ebrahimi {
391*6467f958SSadaf Ebrahimi // log_info("Thread %d doing job %d\n", threadID, item - 1);
392*6467f958SSadaf Ebrahimi
393*6467f958SSadaf Ebrahimi #if defined(__APPLE__) && defined(__arm__)
394*6467f958SSadaf Ebrahimi // On most platforms which support denorm, default is FTZ off.
395*6467f958SSadaf Ebrahimi // However, on some hardware where the reference is computed,
396*6467f958SSadaf Ebrahimi // default might be flush denorms to zero e.g. arm. This creates
397*6467f958SSadaf Ebrahimi // issues in result verification. Since spec allows the
398*6467f958SSadaf Ebrahimi // implementation to either flush or not flush denorms to zero, an
399*6467f958SSadaf Ebrahimi // implementation may choose not be flush i.e. return denorm result
400*6467f958SSadaf Ebrahimi // whereas reference result may be zero (flushed denorm). Hence we
401*6467f958SSadaf Ebrahimi // need to disable denorm flushing on host side where reference is
402*6467f958SSadaf Ebrahimi // being computed to make sure we get non-flushed reference result.
403*6467f958SSadaf Ebrahimi // If implementation returns flushed result, we correctly take care
404*6467f958SSadaf Ebrahimi // of that in verification code.
405*6467f958SSadaf Ebrahimi FPU_mode_type oldMode;
406*6467f958SSadaf Ebrahimi DisableFTZ(&oldMode);
407*6467f958SSadaf Ebrahimi #endif
408*6467f958SSadaf Ebrahimi
409*6467f958SSadaf Ebrahimi // Call the user's function with this item ID
410*6467f958SSadaf Ebrahimi err = gFunc_ptr(item - 1, threadID, (void *)gUserInfo);
411*6467f958SSadaf Ebrahimi #if defined(__APPLE__) && defined(__arm__)
412*6467f958SSadaf Ebrahimi // Restore FP state
413*6467f958SSadaf Ebrahimi RestoreFPState(&oldMode);
414*6467f958SSadaf Ebrahimi #endif
415*6467f958SSadaf Ebrahimi
416*6467f958SSadaf Ebrahimi if (err)
417*6467f958SSadaf Ebrahimi {
418*6467f958SSadaf Ebrahimi #if (__MINGW32__)
419*6467f958SSadaf Ebrahimi EnterCriticalSection(&gAtomicLock);
420*6467f958SSadaf Ebrahimi if (jobError == CL_SUCCESS) jobError = err;
421*6467f958SSadaf Ebrahimi gRunCount = 0;
422*6467f958SSadaf Ebrahimi LeaveCriticalSection(&gAtomicLock);
423*6467f958SSadaf Ebrahimi #elif defined(__GNUC__)
424*6467f958SSadaf Ebrahimi // GCC extension:
425*6467f958SSadaf Ebrahimi // http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
426*6467f958SSadaf Ebrahimi // set the new error if we are the first one there.
427*6467f958SSadaf Ebrahimi __sync_val_compare_and_swap(&jobError, CL_SUCCESS, err);
428*6467f958SSadaf Ebrahimi
429*6467f958SSadaf Ebrahimi // drop run count to 0
430*6467f958SSadaf Ebrahimi gRunCount = 0;
431*6467f958SSadaf Ebrahimi __sync_synchronize();
432*6467f958SSadaf Ebrahimi #elif defined(_MSC_VER)
433*6467f958SSadaf Ebrahimi // set the new error if we are the first one there.
434*6467f958SSadaf Ebrahimi _InterlockedCompareExchange((volatile LONG *)&jobError, err,
435*6467f958SSadaf Ebrahimi CL_SUCCESS);
436*6467f958SSadaf Ebrahimi
437*6467f958SSadaf Ebrahimi // drop run count to 0
438*6467f958SSadaf Ebrahimi gRunCount = 0;
439*6467f958SSadaf Ebrahimi _mm_mfence();
440*6467f958SSadaf Ebrahimi #else
441*6467f958SSadaf Ebrahimi if (pthread_mutex_lock(&gAtomicLock))
442*6467f958SSadaf Ebrahimi log_error(
443*6467f958SSadaf Ebrahimi "Atomic operation failed. "
444*6467f958SSadaf Ebrahimi "pthread_mutex_lock(&gAtomicLock) returned an error\n");
445*6467f958SSadaf Ebrahimi if (jobError == CL_SUCCESS) jobError = err;
446*6467f958SSadaf Ebrahimi gRunCount = 0;
447*6467f958SSadaf Ebrahimi if (pthread_mutex_unlock(&gAtomicLock))
448*6467f958SSadaf Ebrahimi log_error("Failed to release gAtomicLock. Further atomic "
449*6467f958SSadaf Ebrahimi "operations may deadlock\n");
450*6467f958SSadaf Ebrahimi #endif
451*6467f958SSadaf Ebrahimi }
452*6467f958SSadaf Ebrahimi }
453*6467f958SSadaf Ebrahimi
454*6467f958SSadaf Ebrahimi // get the next item
455*6467f958SSadaf Ebrahimi item = gRunCount--;
456*6467f958SSadaf Ebrahimi }
457*6467f958SSadaf Ebrahimi
458*6467f958SSadaf Ebrahimi exit:
459*6467f958SSadaf Ebrahimi log_info("ThreadPool: thread %d exiting.\n", threadID);
460*6467f958SSadaf Ebrahimi gThreadCount--;
461*6467f958SSadaf Ebrahimi #if !defined(_WIN32)
462*6467f958SSadaf Ebrahimi return NULL;
463*6467f958SSadaf Ebrahimi #endif
464*6467f958SSadaf Ebrahimi }
465*6467f958SSadaf Ebrahimi
466*6467f958SSadaf Ebrahimi // SetThreadCount() may be used to artifically set the number of worker threads
467*6467f958SSadaf Ebrahimi // If the value is 0 (the default) the number of threads will be determined
468*6467f958SSadaf Ebrahimi // based on the number of CPU cores. If it is a unicore machine, then 2 will be
469*6467f958SSadaf Ebrahimi // used, so that we still get some testing for thread safety.
470*6467f958SSadaf Ebrahimi //
471*6467f958SSadaf Ebrahimi // If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then
472*6467f958SSadaf Ebrahimi // the code will run single threaded, but will report an error to indicate that
473*6467f958SSadaf Ebrahimi // the test is invalid. This option is intended for debugging purposes only. It
474*6467f958SSadaf Ebrahimi // is suggested as a convention that test apps set the thread count to 1 in
475*6467f958SSadaf Ebrahimi // response to the -m flag.
476*6467f958SSadaf Ebrahimi //
477*6467f958SSadaf Ebrahimi // SetThreadCount() must be called before the first call to GetThreadCount() or
478*6467f958SSadaf Ebrahimi // ThreadPool_Do(), otherwise the behavior is indefined.
SetThreadCount(int count)479*6467f958SSadaf Ebrahimi void SetThreadCount(int count)
480*6467f958SSadaf Ebrahimi {
481*6467f958SSadaf Ebrahimi if (threadPoolInitErr == CL_SUCCESS)
482*6467f958SSadaf Ebrahimi {
483*6467f958SSadaf Ebrahimi log_error("Error: It is illegal to set the thread count after the "
484*6467f958SSadaf Ebrahimi "first call to ThreadPool_Do or GetThreadCount\n");
485*6467f958SSadaf Ebrahimi abort();
486*6467f958SSadaf Ebrahimi }
487*6467f958SSadaf Ebrahimi
488*6467f958SSadaf Ebrahimi gThreadCount = count;
489*6467f958SSadaf Ebrahimi }
490*6467f958SSadaf Ebrahimi
ThreadPool_Init(void)491*6467f958SSadaf Ebrahimi void ThreadPool_Init(void)
492*6467f958SSadaf Ebrahimi {
493*6467f958SSadaf Ebrahimi cl_int i;
494*6467f958SSadaf Ebrahimi int err;
495*6467f958SSadaf Ebrahimi std::atomic<cl_uint> threadID{ 0 };
496*6467f958SSadaf Ebrahimi
497*6467f958SSadaf Ebrahimi // Check for manual override of multithreading code. We add this for better
498*6467f958SSadaf Ebrahimi // debuggability.
499*6467f958SSadaf Ebrahimi if (getenv("CL_TEST_SINGLE_THREADED"))
500*6467f958SSadaf Ebrahimi {
501*6467f958SSadaf Ebrahimi log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. "
502*6467f958SSadaf Ebrahimi "Running single threaded.\n*** TEST IS INVALID! ***\n");
503*6467f958SSadaf Ebrahimi gThreadCount = 1;
504*6467f958SSadaf Ebrahimi return;
505*6467f958SSadaf Ebrahimi }
506*6467f958SSadaf Ebrahimi
507*6467f958SSadaf Ebrahimi // Figure out how many threads to run -- check first for non-zero to give
508*6467f958SSadaf Ebrahimi // the implementation the chance
509*6467f958SSadaf Ebrahimi if (0 == gThreadCount)
510*6467f958SSadaf Ebrahimi {
511*6467f958SSadaf Ebrahimi #if defined(_MSC_VER) || defined(__MINGW64__)
512*6467f958SSadaf Ebrahimi PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
513*6467f958SSadaf Ebrahimi DWORD length = 0;
514*6467f958SSadaf Ebrahimi
515*6467f958SSadaf Ebrahimi GetLogicalProcessorInformation(NULL, &length);
516*6467f958SSadaf Ebrahimi buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(length);
517*6467f958SSadaf Ebrahimi if (buffer != NULL)
518*6467f958SSadaf Ebrahimi {
519*6467f958SSadaf Ebrahimi if (GetLogicalProcessorInformation(buffer, &length) == TRUE)
520*6467f958SSadaf Ebrahimi {
521*6467f958SSadaf Ebrahimi PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
522*6467f958SSadaf Ebrahimi while (
523*6467f958SSadaf Ebrahimi ptr
524*6467f958SSadaf Ebrahimi < &buffer[length
525*6467f958SSadaf Ebrahimi / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)])
526*6467f958SSadaf Ebrahimi {
527*6467f958SSadaf Ebrahimi if (ptr->Relationship == RelationProcessorCore)
528*6467f958SSadaf Ebrahimi {
529*6467f958SSadaf Ebrahimi // Count the number of bits in ProcessorMask (number of
530*6467f958SSadaf Ebrahimi // logical cores)
531*6467f958SSadaf Ebrahimi ULONG_PTR mask = ptr->ProcessorMask;
532*6467f958SSadaf Ebrahimi while (mask)
533*6467f958SSadaf Ebrahimi {
534*6467f958SSadaf Ebrahimi ++gThreadCount;
535*6467f958SSadaf Ebrahimi mask &= mask - 1; // Remove 1 bit at a time
536*6467f958SSadaf Ebrahimi }
537*6467f958SSadaf Ebrahimi }
538*6467f958SSadaf Ebrahimi ++ptr;
539*6467f958SSadaf Ebrahimi }
540*6467f958SSadaf Ebrahimi }
541*6467f958SSadaf Ebrahimi free(buffer);
542*6467f958SSadaf Ebrahimi }
543*6467f958SSadaf Ebrahimi #elif defined(__MINGW32__)
544*6467f958SSadaf Ebrahimi {
545*6467f958SSadaf Ebrahimi #warning How about this, instead of hard coding it to 2?
546*6467f958SSadaf Ebrahimi SYSTEM_INFO sysinfo;
547*6467f958SSadaf Ebrahimi GetSystemInfo(&sysinfo);
548*6467f958SSadaf Ebrahimi gThreadCount = sysinfo.dwNumberOfProcessors;
549*6467f958SSadaf Ebrahimi }
550*6467f958SSadaf Ebrahimi #elif defined(__linux__) && !defined(__ANDROID__)
551*6467f958SSadaf Ebrahimi cpu_set_t affinity;
552*6467f958SSadaf Ebrahimi if (0 == sched_getaffinity(0, sizeof(cpu_set_t), &affinity))
553*6467f958SSadaf Ebrahimi {
554*6467f958SSadaf Ebrahimi #if !(defined(CPU_COUNT))
555*6467f958SSadaf Ebrahimi gThreadCount = 1;
556*6467f958SSadaf Ebrahimi #else
557*6467f958SSadaf Ebrahimi gThreadCount = CPU_COUNT(&affinity);
558*6467f958SSadaf Ebrahimi #endif
559*6467f958SSadaf Ebrahimi }
560*6467f958SSadaf Ebrahimi else
561*6467f958SSadaf Ebrahimi {
562*6467f958SSadaf Ebrahimi // Hopefully your system returns logical cpus here, as does MacOS X
563*6467f958SSadaf Ebrahimi gThreadCount = (cl_int)sysconf(_SC_NPROCESSORS_CONF);
564*6467f958SSadaf Ebrahimi }
565*6467f958SSadaf Ebrahimi #else /* !_WIN32 */
566*6467f958SSadaf Ebrahimi // Hopefully your system returns logical cpus here, as does MacOS X
567*6467f958SSadaf Ebrahimi gThreadCount = (cl_int)sysconf(_SC_NPROCESSORS_CONF);
568*6467f958SSadaf Ebrahimi #endif // !_WIN32
569*6467f958SSadaf Ebrahimi
570*6467f958SSadaf Ebrahimi // Multithreaded tests are required to run multithreaded even on unicore
571*6467f958SSadaf Ebrahimi // systems so as to test thread safety
572*6467f958SSadaf Ebrahimi if (1 == gThreadCount) gThreadCount = 2;
573*6467f958SSadaf Ebrahimi }
574*6467f958SSadaf Ebrahimi
575*6467f958SSadaf Ebrahimi // When working in 32 bit limit the thread number to 12
576*6467f958SSadaf Ebrahimi // This fix was made due to memory issues in integer_ops test
577*6467f958SSadaf Ebrahimi // When running integer_ops, the test opens as many threads as the
578*6467f958SSadaf Ebrahimi // machine has and each thread allocates a fixed amount of memory
579*6467f958SSadaf Ebrahimi // When running this test on dual socket machine in 32-bit, the
580*6467f958SSadaf Ebrahimi // process memory is not sufficient and the test fails
581*6467f958SSadaf Ebrahimi #if defined(_WIN32) && !defined(_M_X64)
582*6467f958SSadaf Ebrahimi if (gThreadCount > 12)
583*6467f958SSadaf Ebrahimi {
584*6467f958SSadaf Ebrahimi gThreadCount = 12;
585*6467f958SSadaf Ebrahimi }
586*6467f958SSadaf Ebrahimi #endif
587*6467f958SSadaf Ebrahimi
588*6467f958SSadaf Ebrahimi // Allow the app to set thread count to <0 for debugging purposes.
589*6467f958SSadaf Ebrahimi // This will cause the test to run single threaded.
590*6467f958SSadaf Ebrahimi if (gThreadCount < 2)
591*6467f958SSadaf Ebrahimi {
592*6467f958SSadaf Ebrahimi log_error("ERROR: Running single threaded because thread count < 2. "
593*6467f958SSadaf Ebrahimi "\n*** TEST IS INVALID! ***\n");
594*6467f958SSadaf Ebrahimi gThreadCount = 1;
595*6467f958SSadaf Ebrahimi return;
596*6467f958SSadaf Ebrahimi }
597*6467f958SSadaf Ebrahimi
598*6467f958SSadaf Ebrahimi #if defined(_WIN32)
599*6467f958SSadaf Ebrahimi InitializeCriticalSection(gThreadPoolLock);
600*6467f958SSadaf Ebrahimi InitializeCriticalSection(cond_lock);
601*6467f958SSadaf Ebrahimi _InitializeConditionVariable(cond_var);
602*6467f958SSadaf Ebrahimi caller_event = CreateEvent(NULL, FALSE, FALSE, NULL);
603*6467f958SSadaf Ebrahimi #elif defined(__GNUC__)
604*6467f958SSadaf Ebrahimi // Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since
605*6467f958SSadaf Ebrahimi // it might cause problem with some flavors of gcc compilers.
606*6467f958SSadaf Ebrahimi pthread_cond_init(&cond_var, NULL);
607*6467f958SSadaf Ebrahimi pthread_mutex_init(&cond_lock, NULL);
608*6467f958SSadaf Ebrahimi pthread_cond_init(&caller_cond_var, NULL);
609*6467f958SSadaf Ebrahimi pthread_mutex_init(&caller_cond_lock, NULL);
610*6467f958SSadaf Ebrahimi pthread_mutex_init(&gThreadPoolLock, NULL);
611*6467f958SSadaf Ebrahimi #endif
612*6467f958SSadaf Ebrahimi
613*6467f958SSadaf Ebrahimi #if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__))
614*6467f958SSadaf Ebrahimi pthread_mutex_initialize(gAtomicLock);
615*6467f958SSadaf Ebrahimi #elif defined(__MINGW32__)
616*6467f958SSadaf Ebrahimi InitializeCriticalSection(&gAtomicLock);
617*6467f958SSadaf Ebrahimi #endif
618*6467f958SSadaf Ebrahimi // Make sure the last thread done in the work pool doesn't signal us to wake
619*6467f958SSadaf Ebrahimi // before we get to the point where we are supposed to wait
620*6467f958SSadaf Ebrahimi // That would cause a deadlock.
621*6467f958SSadaf Ebrahimi #if !defined(_WIN32)
622*6467f958SSadaf Ebrahimi if ((err = pthread_mutex_lock(&caller_cond_lock)))
623*6467f958SSadaf Ebrahimi {
624*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_mutex_lock. Unable to block for work "
625*6467f958SSadaf Ebrahimi "to finish. ThreadPool_Init failed.\n",
626*6467f958SSadaf Ebrahimi err);
627*6467f958SSadaf Ebrahimi gThreadCount = 1;
628*6467f958SSadaf Ebrahimi return;
629*6467f958SSadaf Ebrahimi }
630*6467f958SSadaf Ebrahimi #endif // !_WIN32
631*6467f958SSadaf Ebrahimi
632*6467f958SSadaf Ebrahimi gRunning = gThreadCount.load();
633*6467f958SSadaf Ebrahimi // init threads
634*6467f958SSadaf Ebrahimi for (i = 0; i < gThreadCount; i++)
635*6467f958SSadaf Ebrahimi {
636*6467f958SSadaf Ebrahimi #if defined(_WIN32)
637*6467f958SSadaf Ebrahimi uintptr_t handle =
638*6467f958SSadaf Ebrahimi _beginthread(ThreadPool_WorkerFunc, 0, (void *)&threadID);
639*6467f958SSadaf Ebrahimi err = (handle == 0);
640*6467f958SSadaf Ebrahimi #else // !_WIN32
641*6467f958SSadaf Ebrahimi pthread_t tid = 0;
642*6467f958SSadaf Ebrahimi err = pthread_create(&tid, NULL, ThreadPool_WorkerFunc,
643*6467f958SSadaf Ebrahimi (void *)&threadID);
644*6467f958SSadaf Ebrahimi #endif // !_WIN32
645*6467f958SSadaf Ebrahimi if (err)
646*6467f958SSadaf Ebrahimi {
647*6467f958SSadaf Ebrahimi log_error("Error %d launching thread %d\n", err, i);
648*6467f958SSadaf Ebrahimi threadPoolInitErr = err;
649*6467f958SSadaf Ebrahimi gThreadCount = i;
650*6467f958SSadaf Ebrahimi break;
651*6467f958SSadaf Ebrahimi }
652*6467f958SSadaf Ebrahimi #if !defined(_WIN32)
653*6467f958SSadaf Ebrahimi pthreads.push_back(tid);
654*6467f958SSadaf Ebrahimi #endif // !_WIN32
655*6467f958SSadaf Ebrahimi }
656*6467f958SSadaf Ebrahimi
657*6467f958SSadaf Ebrahimi atexit(ThreadPool_Exit);
658*6467f958SSadaf Ebrahimi
659*6467f958SSadaf Ebrahimi // block until they are done launching.
660*6467f958SSadaf Ebrahimi do
661*6467f958SSadaf Ebrahimi {
662*6467f958SSadaf Ebrahimi #if defined(_WIN32)
663*6467f958SSadaf Ebrahimi WaitForSingleObject(caller_event, INFINITE);
664*6467f958SSadaf Ebrahimi #else // !_WIN32
665*6467f958SSadaf Ebrahimi if ((err = pthread_cond_wait(&caller_cond_var, &caller_cond_lock)))
666*6467f958SSadaf Ebrahimi {
667*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_cond_wait. Unable to block for "
668*6467f958SSadaf Ebrahimi "work to finish. ThreadPool_Init failed.\n",
669*6467f958SSadaf Ebrahimi err);
670*6467f958SSadaf Ebrahimi pthread_mutex_unlock(&caller_cond_lock);
671*6467f958SSadaf Ebrahimi return;
672*6467f958SSadaf Ebrahimi }
673*6467f958SSadaf Ebrahimi #endif // !_WIN32
674*6467f958SSadaf Ebrahimi } while (gRunCount != -gThreadCount);
675*6467f958SSadaf Ebrahimi #if !defined(_WIN32)
676*6467f958SSadaf Ebrahimi if ((err = pthread_mutex_unlock(&caller_cond_lock)))
677*6467f958SSadaf Ebrahimi {
678*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_mutex_unlock. Unable to block for "
679*6467f958SSadaf Ebrahimi "work to finish. ThreadPool_Init failed.\n",
680*6467f958SSadaf Ebrahimi err);
681*6467f958SSadaf Ebrahimi return;
682*6467f958SSadaf Ebrahimi }
683*6467f958SSadaf Ebrahimi #endif // !_WIN32
684*6467f958SSadaf Ebrahimi
685*6467f958SSadaf Ebrahimi threadPoolInitErr = CL_SUCCESS;
686*6467f958SSadaf Ebrahimi }
687*6467f958SSadaf Ebrahimi
688*6467f958SSadaf Ebrahimi #if defined(_MSC_VER)
_ThreadPool_Init(_PINIT_ONCE InitOnce,PVOID Parameter,PVOID * lpContex)689*6467f958SSadaf Ebrahimi static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter,
690*6467f958SSadaf Ebrahimi PVOID *lpContex)
691*6467f958SSadaf Ebrahimi {
692*6467f958SSadaf Ebrahimi ThreadPool_Init();
693*6467f958SSadaf Ebrahimi return TRUE;
694*6467f958SSadaf Ebrahimi }
695*6467f958SSadaf Ebrahimi #endif
696*6467f958SSadaf Ebrahimi
ThreadPool_Exit(void)697*6467f958SSadaf Ebrahimi void ThreadPool_Exit(void)
698*6467f958SSadaf Ebrahimi {
699*6467f958SSadaf Ebrahimi gRunCount = CL_INT_MAX;
700*6467f958SSadaf Ebrahimi
701*6467f958SSadaf Ebrahimi #if defined(__GNUC__)
702*6467f958SSadaf Ebrahimi // GCC extension:
703*6467f958SSadaf Ebrahimi // http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
704*6467f958SSadaf Ebrahimi __sync_synchronize();
705*6467f958SSadaf Ebrahimi #elif defined(_MSC_VER)
706*6467f958SSadaf Ebrahimi _mm_mfence();
707*6467f958SSadaf Ebrahimi #else
708*6467f958SSadaf Ebrahimi #warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
709*6467f958SSadaf Ebrahimi #endif
710*6467f958SSadaf Ebrahimi
711*6467f958SSadaf Ebrahimi // spin waiting for threads to die
712*6467f958SSadaf Ebrahimi for (int count = 0; 0 != gThreadCount && count < 1000; count++)
713*6467f958SSadaf Ebrahimi {
714*6467f958SSadaf Ebrahimi #if defined(_WIN32)
715*6467f958SSadaf Ebrahimi _WakeAllConditionVariable(cond_var);
716*6467f958SSadaf Ebrahimi Sleep(1);
717*6467f958SSadaf Ebrahimi #else // !_WIN32
718*6467f958SSadaf Ebrahimi if (int err = pthread_cond_broadcast(&cond_var))
719*6467f958SSadaf Ebrahimi {
720*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_cond_broadcast. Unable to wake up "
721*6467f958SSadaf Ebrahimi "work threads. ThreadPool_Exit failed.\n",
722*6467f958SSadaf Ebrahimi err);
723*6467f958SSadaf Ebrahimi break;
724*6467f958SSadaf Ebrahimi }
725*6467f958SSadaf Ebrahimi usleep(1000);
726*6467f958SSadaf Ebrahimi #endif // !_WIN32
727*6467f958SSadaf Ebrahimi }
728*6467f958SSadaf Ebrahimi
729*6467f958SSadaf Ebrahimi if (gThreadCount)
730*6467f958SSadaf Ebrahimi log_error("Error: Thread pool timed out after 1 second with %d threads "
731*6467f958SSadaf Ebrahimi "still active.\n",
732*6467f958SSadaf Ebrahimi gThreadCount.load());
733*6467f958SSadaf Ebrahimi else
734*6467f958SSadaf Ebrahimi {
735*6467f958SSadaf Ebrahimi #if !defined(_WIN32)
736*6467f958SSadaf Ebrahimi for (pthread_t pthread : pthreads)
737*6467f958SSadaf Ebrahimi {
738*6467f958SSadaf Ebrahimi if (int err = pthread_join(pthread, nullptr))
739*6467f958SSadaf Ebrahimi {
740*6467f958SSadaf Ebrahimi log_error("Error from %d from pthread_join. Unable to join "
741*6467f958SSadaf Ebrahimi "work threads. ThreadPool_Exit failed.\n",
742*6467f958SSadaf Ebrahimi err);
743*6467f958SSadaf Ebrahimi }
744*6467f958SSadaf Ebrahimi }
745*6467f958SSadaf Ebrahimi #endif
746*6467f958SSadaf Ebrahimi log_info("Thread pool exited in a orderly fashion.\n");
747*6467f958SSadaf Ebrahimi }
748*6467f958SSadaf Ebrahimi }
749*6467f958SSadaf Ebrahimi
750*6467f958SSadaf Ebrahimi
751*6467f958SSadaf Ebrahimi // Blocking API that farms out count jobs to a thread pool.
752*6467f958SSadaf Ebrahimi // It may return with some work undone if func_ptr() returns a non-zero
753*6467f958SSadaf Ebrahimi // result.
754*6467f958SSadaf Ebrahimi //
755*6467f958SSadaf Ebrahimi // This function obviously has its shortcommings. Only one call to ThreadPool_Do
756*6467f958SSadaf Ebrahimi // can be running at a time. It is not intended for general purpose use.
757*6467f958SSadaf Ebrahimi // If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were
758*6467f958SSadaf Ebrahimi // all available then it would make more sense to use those features.
ThreadPool_Do(TPFuncPtr func_ptr,cl_uint count,void * userInfo)759*6467f958SSadaf Ebrahimi cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo)
760*6467f958SSadaf Ebrahimi {
761*6467f958SSadaf Ebrahimi #ifndef _WIN32
762*6467f958SSadaf Ebrahimi cl_int newErr;
763*6467f958SSadaf Ebrahimi #endif
764*6467f958SSadaf Ebrahimi cl_int err = 0;
765*6467f958SSadaf Ebrahimi // Lazily set up our threads
766*6467f958SSadaf Ebrahimi #if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
767*6467f958SSadaf Ebrahimi err = !_InitOnceExecuteOnce(&threadpool_init_control, _ThreadPool_Init,
768*6467f958SSadaf Ebrahimi NULL, NULL);
769*6467f958SSadaf Ebrahimi #elif defined(_WIN32)
770*6467f958SSadaf Ebrahimi if (threadpool_init_control == 0)
771*6467f958SSadaf Ebrahimi {
772*6467f958SSadaf Ebrahimi #warning This is buggy and race prone. Find a better way.
773*6467f958SSadaf Ebrahimi ThreadPool_Init();
774*6467f958SSadaf Ebrahimi threadpool_init_control = 1;
775*6467f958SSadaf Ebrahimi }
776*6467f958SSadaf Ebrahimi #else // posix platform
777*6467f958SSadaf Ebrahimi err = pthread_once(&threadpool_init_control, ThreadPool_Init);
778*6467f958SSadaf Ebrahimi if (err)
779*6467f958SSadaf Ebrahimi {
780*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_once. Unable to init threads. "
781*6467f958SSadaf Ebrahimi "ThreadPool_Do failed.\n",
782*6467f958SSadaf Ebrahimi err);
783*6467f958SSadaf Ebrahimi return err;
784*6467f958SSadaf Ebrahimi }
785*6467f958SSadaf Ebrahimi #endif
786*6467f958SSadaf Ebrahimi // Single threaded code to handle case where threadpool wasn't allocated or
787*6467f958SSadaf Ebrahimi // was disabled by environment variable
788*6467f958SSadaf Ebrahimi if (threadPoolInitErr)
789*6467f958SSadaf Ebrahimi {
790*6467f958SSadaf Ebrahimi cl_uint currentJob = 0;
791*6467f958SSadaf Ebrahimi cl_int result = CL_SUCCESS;
792*6467f958SSadaf Ebrahimi
793*6467f958SSadaf Ebrahimi #if defined(__APPLE__) && defined(__arm__)
794*6467f958SSadaf Ebrahimi // On most platforms which support denorm, default is FTZ off. However,
795*6467f958SSadaf Ebrahimi // on some hardware where the reference is computed, default might be
796*6467f958SSadaf Ebrahimi // flush denorms to zero e.g. arm. This creates issues in result
797*6467f958SSadaf Ebrahimi // verification. Since spec allows the implementation to either flush or
798*6467f958SSadaf Ebrahimi // not flush denorms to zero, an implementation may choose not be flush
799*6467f958SSadaf Ebrahimi // i.e. return denorm result whereas reference result may be zero
800*6467f958SSadaf Ebrahimi // (flushed denorm). Hence we need to disable denorm flushing on host
801*6467f958SSadaf Ebrahimi // side where reference is being computed to make sure we get
802*6467f958SSadaf Ebrahimi // non-flushed reference result. If implementation returns flushed
803*6467f958SSadaf Ebrahimi // result, we correctly take care of that in verification code.
804*6467f958SSadaf Ebrahimi FPU_mode_type oldMode;
805*6467f958SSadaf Ebrahimi DisableFTZ(&oldMode);
806*6467f958SSadaf Ebrahimi #endif
807*6467f958SSadaf Ebrahimi for (currentJob = 0; currentJob < count; currentJob++)
808*6467f958SSadaf Ebrahimi if ((result = func_ptr(currentJob, 0, userInfo)))
809*6467f958SSadaf Ebrahimi {
810*6467f958SSadaf Ebrahimi #if defined(__APPLE__) && defined(__arm__)
811*6467f958SSadaf Ebrahimi // Restore FP state before leaving
812*6467f958SSadaf Ebrahimi RestoreFPState(&oldMode);
813*6467f958SSadaf Ebrahimi #endif
814*6467f958SSadaf Ebrahimi return result;
815*6467f958SSadaf Ebrahimi }
816*6467f958SSadaf Ebrahimi
817*6467f958SSadaf Ebrahimi #if defined(__APPLE__) && defined(__arm__)
818*6467f958SSadaf Ebrahimi // Restore FP state before leaving
819*6467f958SSadaf Ebrahimi RestoreFPState(&oldMode);
820*6467f958SSadaf Ebrahimi #endif
821*6467f958SSadaf Ebrahimi
822*6467f958SSadaf Ebrahimi return CL_SUCCESS;
823*6467f958SSadaf Ebrahimi }
824*6467f958SSadaf Ebrahimi
825*6467f958SSadaf Ebrahimi if (count >= MAX_COUNT)
826*6467f958SSadaf Ebrahimi {
827*6467f958SSadaf Ebrahimi log_error(
828*6467f958SSadaf Ebrahimi "Error: ThreadPool_Do count %d >= max threadpool count of %d\n",
829*6467f958SSadaf Ebrahimi count, MAX_COUNT);
830*6467f958SSadaf Ebrahimi return -1;
831*6467f958SSadaf Ebrahimi }
832*6467f958SSadaf Ebrahimi
833*6467f958SSadaf Ebrahimi // Enter critical region
834*6467f958SSadaf Ebrahimi #if defined(_WIN32)
835*6467f958SSadaf Ebrahimi EnterCriticalSection(gThreadPoolLock);
836*6467f958SSadaf Ebrahimi #else // !_WIN32
837*6467f958SSadaf Ebrahimi if ((err = pthread_mutex_lock(&gThreadPoolLock)))
838*6467f958SSadaf Ebrahimi {
839*6467f958SSadaf Ebrahimi switch (err)
840*6467f958SSadaf Ebrahimi {
841*6467f958SSadaf Ebrahimi case EDEADLK:
842*6467f958SSadaf Ebrahimi log_error(
843*6467f958SSadaf Ebrahimi "Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do "
844*6467f958SSadaf Ebrahimi "is not designed to work recursively!\n");
845*6467f958SSadaf Ebrahimi break;
846*6467f958SSadaf Ebrahimi case EINVAL:
847*6467f958SSadaf Ebrahimi log_error("Error EINVAL returned in ThreadPool_Do(). How did "
848*6467f958SSadaf Ebrahimi "we end up with an invalid gThreadPoolLock?\n");
849*6467f958SSadaf Ebrahimi break;
850*6467f958SSadaf Ebrahimi default: break;
851*6467f958SSadaf Ebrahimi }
852*6467f958SSadaf Ebrahimi return err;
853*6467f958SSadaf Ebrahimi }
854*6467f958SSadaf Ebrahimi #endif // !_WIN32
855*6467f958SSadaf Ebrahimi
856*6467f958SSadaf Ebrahimi // Start modifying the job state observable by worker threads
857*6467f958SSadaf Ebrahimi #if defined(_WIN32)
858*6467f958SSadaf Ebrahimi EnterCriticalSection(cond_lock);
859*6467f958SSadaf Ebrahimi #else // !_WIN32
860*6467f958SSadaf Ebrahimi if ((err = pthread_mutex_lock(&cond_lock)))
861*6467f958SSadaf Ebrahimi {
862*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_mutex_lock. Unable to wake up work "
863*6467f958SSadaf Ebrahimi "threads. ThreadPool_Do failed.\n",
864*6467f958SSadaf Ebrahimi err);
865*6467f958SSadaf Ebrahimi goto exit;
866*6467f958SSadaf Ebrahimi }
867*6467f958SSadaf Ebrahimi #endif // !_WIN32
868*6467f958SSadaf Ebrahimi
869*6467f958SSadaf Ebrahimi // Make sure the last thread done in the work pool doesn't signal us to wake
870*6467f958SSadaf Ebrahimi // before we get to the point where we are supposed to wait
871*6467f958SSadaf Ebrahimi // That would cause a deadlock.
872*6467f958SSadaf Ebrahimi #if !defined(_WIN32)
873*6467f958SSadaf Ebrahimi if ((err = pthread_mutex_lock(&caller_cond_lock)))
874*6467f958SSadaf Ebrahimi {
875*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_mutex_lock. Unable to block for work "
876*6467f958SSadaf Ebrahimi "to finish. ThreadPool_Do failed.\n",
877*6467f958SSadaf Ebrahimi err);
878*6467f958SSadaf Ebrahimi goto exit;
879*6467f958SSadaf Ebrahimi }
880*6467f958SSadaf Ebrahimi #endif // !_WIN32
881*6467f958SSadaf Ebrahimi
882*6467f958SSadaf Ebrahimi // Prime the worker threads to get going
883*6467f958SSadaf Ebrahimi jobError = CL_SUCCESS;
884*6467f958SSadaf Ebrahimi gRunCount = gJobCount = count;
885*6467f958SSadaf Ebrahimi gFunc_ptr = func_ptr;
886*6467f958SSadaf Ebrahimi gUserInfo = userInfo;
887*6467f958SSadaf Ebrahimi
888*6467f958SSadaf Ebrahimi #if defined(_WIN32)
889*6467f958SSadaf Ebrahimi ResetEvent(caller_event);
890*6467f958SSadaf Ebrahimi _WakeAllConditionVariable(cond_var);
891*6467f958SSadaf Ebrahimi LeaveCriticalSection(cond_lock);
892*6467f958SSadaf Ebrahimi #else // !_WIN32
893*6467f958SSadaf Ebrahimi if ((err = pthread_cond_broadcast(&cond_var)))
894*6467f958SSadaf Ebrahimi {
895*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_cond_broadcast. Unable to wake up "
896*6467f958SSadaf Ebrahimi "work threads. ThreadPool_Do failed.\n",
897*6467f958SSadaf Ebrahimi err);
898*6467f958SSadaf Ebrahimi goto exit;
899*6467f958SSadaf Ebrahimi }
900*6467f958SSadaf Ebrahimi if ((err = pthread_mutex_unlock(&cond_lock)))
901*6467f958SSadaf Ebrahimi {
902*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_mutex_unlock. Unable to wake up work "
903*6467f958SSadaf Ebrahimi "threads. ThreadPool_Do failed.\n",
904*6467f958SSadaf Ebrahimi err);
905*6467f958SSadaf Ebrahimi goto exit;
906*6467f958SSadaf Ebrahimi }
907*6467f958SSadaf Ebrahimi #endif // !_WIN32
908*6467f958SSadaf Ebrahimi
909*6467f958SSadaf Ebrahimi // block until they are done. It would be slightly more efficient to do
910*6467f958SSadaf Ebrahimi // some of the work here though.
911*6467f958SSadaf Ebrahimi do
912*6467f958SSadaf Ebrahimi {
913*6467f958SSadaf Ebrahimi #if defined(_WIN32)
914*6467f958SSadaf Ebrahimi WaitForSingleObject(caller_event, INFINITE);
915*6467f958SSadaf Ebrahimi #else // !_WIN32
916*6467f958SSadaf Ebrahimi if ((err = pthread_cond_wait(&caller_cond_var, &caller_cond_lock)))
917*6467f958SSadaf Ebrahimi {
918*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_cond_wait. Unable to block for "
919*6467f958SSadaf Ebrahimi "work to finish. ThreadPool_Do failed.\n",
920*6467f958SSadaf Ebrahimi err);
921*6467f958SSadaf Ebrahimi pthread_mutex_unlock(&caller_cond_lock);
922*6467f958SSadaf Ebrahimi goto exit;
923*6467f958SSadaf Ebrahimi }
924*6467f958SSadaf Ebrahimi #endif // !_WIN32
925*6467f958SSadaf Ebrahimi } while (gRunning);
926*6467f958SSadaf Ebrahimi #if !defined(_WIN32)
927*6467f958SSadaf Ebrahimi if ((err = pthread_mutex_unlock(&caller_cond_lock)))
928*6467f958SSadaf Ebrahimi {
929*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_mutex_unlock. Unable to block for "
930*6467f958SSadaf Ebrahimi "work to finish. ThreadPool_Do failed.\n",
931*6467f958SSadaf Ebrahimi err);
932*6467f958SSadaf Ebrahimi goto exit;
933*6467f958SSadaf Ebrahimi }
934*6467f958SSadaf Ebrahimi #endif // !_WIN32
935*6467f958SSadaf Ebrahimi
936*6467f958SSadaf Ebrahimi err = jobError;
937*6467f958SSadaf Ebrahimi
938*6467f958SSadaf Ebrahimi #ifndef _WIN32
939*6467f958SSadaf Ebrahimi exit:
940*6467f958SSadaf Ebrahimi #endif
941*6467f958SSadaf Ebrahimi // exit critical region
942*6467f958SSadaf Ebrahimi #if defined(_WIN32)
943*6467f958SSadaf Ebrahimi LeaveCriticalSection(gThreadPoolLock);
944*6467f958SSadaf Ebrahimi #else // !_WIN32
945*6467f958SSadaf Ebrahimi newErr = pthread_mutex_unlock(&gThreadPoolLock);
946*6467f958SSadaf Ebrahimi if (newErr)
947*6467f958SSadaf Ebrahimi {
948*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_mutex_unlock. Unable to exit critical "
949*6467f958SSadaf Ebrahimi "region. ThreadPool_Do failed.\n",
950*6467f958SSadaf Ebrahimi newErr);
951*6467f958SSadaf Ebrahimi return err;
952*6467f958SSadaf Ebrahimi }
953*6467f958SSadaf Ebrahimi #endif // !_WIN32
954*6467f958SSadaf Ebrahimi
955*6467f958SSadaf Ebrahimi return err;
956*6467f958SSadaf Ebrahimi }
957*6467f958SSadaf Ebrahimi
GetThreadCount(void)958*6467f958SSadaf Ebrahimi cl_uint GetThreadCount(void)
959*6467f958SSadaf Ebrahimi {
960*6467f958SSadaf Ebrahimi // Lazily set up our threads
961*6467f958SSadaf Ebrahimi #if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
962*6467f958SSadaf Ebrahimi cl_int err = !_InitOnceExecuteOnce(&threadpool_init_control,
963*6467f958SSadaf Ebrahimi _ThreadPool_Init, NULL, NULL);
964*6467f958SSadaf Ebrahimi #elif defined(_WIN32)
965*6467f958SSadaf Ebrahimi if (threadpool_init_control == 0)
966*6467f958SSadaf Ebrahimi {
967*6467f958SSadaf Ebrahimi #warning This is buggy and race prone. Find a better way.
968*6467f958SSadaf Ebrahimi ThreadPool_Init();
969*6467f958SSadaf Ebrahimi threadpool_init_control = 1;
970*6467f958SSadaf Ebrahimi }
971*6467f958SSadaf Ebrahimi #else
972*6467f958SSadaf Ebrahimi cl_int err = pthread_once(&threadpool_init_control, ThreadPool_Init);
973*6467f958SSadaf Ebrahimi if (err)
974*6467f958SSadaf Ebrahimi {
975*6467f958SSadaf Ebrahimi log_error("Error %d from pthread_once. Unable to init threads. "
976*6467f958SSadaf Ebrahimi "ThreadPool_Do failed.\n",
977*6467f958SSadaf Ebrahimi err);
978*6467f958SSadaf Ebrahimi return err;
979*6467f958SSadaf Ebrahimi }
980*6467f958SSadaf Ebrahimi #endif // !_WIN32
981*6467f958SSadaf Ebrahimi
982*6467f958SSadaf Ebrahimi if (gThreadCount < 1) return 1;
983*6467f958SSadaf Ebrahimi
984*6467f958SSadaf Ebrahimi return gThreadCount;
985*6467f958SSadaf Ebrahimi }
986*6467f958SSadaf Ebrahimi
987*6467f958SSadaf Ebrahimi #else
988*6467f958SSadaf Ebrahimi
989*6467f958SSadaf Ebrahimi #ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
990*6467f958SSadaf Ebrahimi #error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
991*6467f958SSadaf Ebrahimi #endif
992*6467f958SSadaf Ebrahimi //
993*6467f958SSadaf Ebrahimi // We require multithreading in parts of the test as a means of simultaneously
994*6467f958SSadaf Ebrahimi // testing reentrancy requirements of OpenCL API, while also checking
995*6467f958SSadaf Ebrahimi //
996*6467f958SSadaf Ebrahimi // A sample single threaded implementation follows, for documentation /
997*6467f958SSadaf Ebrahimi // bootstrapping purposes. It is not okay to use this for conformance testing!!!
998*6467f958SSadaf Ebrahimi //
999*6467f958SSadaf Ebrahimi // Exception: If your operating system does not support multithreaded execution
1000*6467f958SSadaf Ebrahimi // of any kind, then you may use this code.
1001*6467f958SSadaf Ebrahimi //
1002*6467f958SSadaf Ebrahimi
ThreadPool_AtomicAdd(volatile cl_int * a,cl_int b)1003*6467f958SSadaf Ebrahimi cl_int ThreadPool_AtomicAdd(volatile cl_int *a, cl_int b)
1004*6467f958SSadaf Ebrahimi {
1005*6467f958SSadaf Ebrahimi cl_uint r = *a;
1006*6467f958SSadaf Ebrahimi
1007*6467f958SSadaf Ebrahimi // since this fallback code path is not multithreaded, we just do a regular
1008*6467f958SSadaf Ebrahimi // add here. If your operating system supports memory-barrier-atomics, use
1009*6467f958SSadaf Ebrahimi // those here.
1010*6467f958SSadaf Ebrahimi *a = r + b;
1011*6467f958SSadaf Ebrahimi
1012*6467f958SSadaf Ebrahimi return r;
1013*6467f958SSadaf Ebrahimi }
1014*6467f958SSadaf Ebrahimi
1015*6467f958SSadaf Ebrahimi // Blocking API that farms out count jobs to a thread pool.
1016*6467f958SSadaf Ebrahimi // It may return with some work undone if func_ptr() returns a non-zero
1017*6467f958SSadaf Ebrahimi // result.
ThreadPool_Do(TPFuncPtr func_ptr,cl_uint count,void * userInfo)1018*6467f958SSadaf Ebrahimi cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo)
1019*6467f958SSadaf Ebrahimi {
1020*6467f958SSadaf Ebrahimi cl_uint currentJob = 0;
1021*6467f958SSadaf Ebrahimi cl_int result = CL_SUCCESS;
1022*6467f958SSadaf Ebrahimi
1023*6467f958SSadaf Ebrahimi #ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
1024*6467f958SSadaf Ebrahimi // THIS FUNCTION IS NOT INTENDED FOR USE!!
1025*6467f958SSadaf Ebrahimi log_error("ERROR: Test must be multithreaded!\n");
1026*6467f958SSadaf Ebrahimi exit(-1);
1027*6467f958SSadaf Ebrahimi #else
1028*6467f958SSadaf Ebrahimi static int spewCount = 0;
1029*6467f958SSadaf Ebrahimi
1030*6467f958SSadaf Ebrahimi if (0 == spewCount)
1031*6467f958SSadaf Ebrahimi {
1032*6467f958SSadaf Ebrahimi log_info("\nWARNING: The operating system is claimed not to support "
1033*6467f958SSadaf Ebrahimi "threads of any sort. Running single threaded.\n");
1034*6467f958SSadaf Ebrahimi spewCount = 1;
1035*6467f958SSadaf Ebrahimi }
1036*6467f958SSadaf Ebrahimi #endif
1037*6467f958SSadaf Ebrahimi
1038*6467f958SSadaf Ebrahimi // The multithreaded code should mimic this behavior:
1039*6467f958SSadaf Ebrahimi for (currentJob = 0; currentJob < count; currentJob++)
1040*6467f958SSadaf Ebrahimi if ((result = func_ptr(currentJob, 0, userInfo))) return result;
1041*6467f958SSadaf Ebrahimi
1042*6467f958SSadaf Ebrahimi return CL_SUCCESS;
1043*6467f958SSadaf Ebrahimi }
1044*6467f958SSadaf Ebrahimi
GetThreadCount(void)1045*6467f958SSadaf Ebrahimi cl_uint GetThreadCount(void) { return 1; }
1046*6467f958SSadaf Ebrahimi
SetThreadCount(int count)1047*6467f958SSadaf Ebrahimi void SetThreadCount(int count)
1048*6467f958SSadaf Ebrahimi {
1049*6467f958SSadaf Ebrahimi if (count > 1) log_info("WARNING: SetThreadCount(%d) ignored\n", count);
1050*6467f958SSadaf Ebrahimi }
1051*6467f958SSadaf Ebrahimi
1052*6467f958SSadaf Ebrahimi #endif
1053