1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "gl/setup.h"
18 #include "harness/genericThread.h"
19
20 #if defined(__APPLE__)
21 #include <OpenGL/glu.h>
22 #else
23 #include <GL/glu.h>
24 #include <CL/cl_gl.h>
25 #if !defined(_WIN32) && !defined(__ANDROID__)
26 #include <GL/glx.h>
27 #endif
28 #endif
29
30 #ifndef GLsync
31 // For OpenGL before 3.2, we look for the ARB_sync extension and try to use that
32 #if !defined(_WIN32)
33 #include <inttypes.h>
34 #endif // !_WIN32
35 typedef int64_t GLint64;
36 typedef uint64_t GLuint64;
37 typedef struct __GLsync *GLsync;
38
39 #ifndef APIENTRY
40 #define APIENTRY
41 #endif
42
43 typedef GLsync(APIENTRY *glFenceSyncPtr)(GLenum condition, GLbitfield flags);
44 glFenceSyncPtr glFenceSyncFunc;
45
46 typedef bool(APIENTRY *glIsSyncPtr)(GLsync sync);
47 glIsSyncPtr glIsSyncFunc;
48
49 typedef void(APIENTRY *glDeleteSyncPtr)(GLsync sync);
50 glDeleteSyncPtr glDeleteSyncFunc;
51
52 typedef GLenum(APIENTRY *glClientWaitSyncPtr)(GLsync sync, GLbitfield flags,
53 GLuint64 timeout);
54 glClientWaitSyncPtr glClientWaitSyncFunc;
55
56 typedef void(APIENTRY *glWaitSyncPtr)(GLsync sync, GLbitfield flags,
57 GLuint64 timeout);
58 glWaitSyncPtr glWaitSyncFunc;
59
60 typedef void(APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params);
61 glGetInteger64vPtr glGetInteger64vFunc;
62
63 typedef void(APIENTRY *glGetSyncivPtr)(GLsync sync, GLenum pname,
64 GLsizei bufSize, GLsizei *length,
65 GLint *values);
66 glGetSyncivPtr glGetSyncivFunc;
67
68 #define CHK_GL_ERR() printf("%s\n", gluErrorString(glGetError()))
69
InitSyncFns(void)70 static void InitSyncFns(void)
71 {
72 glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress("glFenceSync");
73 glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress("glIsSync");
74 glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress("glDeleteSync");
75 glClientWaitSyncFunc =
76 (glClientWaitSyncPtr)glutGetProcAddress("glClientWaitSync");
77 glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress("glWaitSync");
78 glGetInteger64vFunc =
79 (glGetInteger64vPtr)glutGetProcAddress("glGetInteger64v");
80 glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress("glGetSynciv");
81 }
82 #ifndef GL_ARB_sync
83 #define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111
84
85 #define GL_OBJECT_TYPE 0x9112
86 #define GL_SYNC_CONDITION 0x9113
87 #define GL_SYNC_STATUS 0x9114
88 #define GL_SYNC_FLAGS 0x9115
89
90 #define GL_SYNC_FENCE 0x9116
91
92 #define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117
93
94 #define GL_UNSIGNALED 0x9118
95 #define GL_SIGNALED 0x9119
96
97 #define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001
98
99 #define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull
100
101 #define GL_ALREADY_SIGNALED 0x911A
102 #define GL_TIMEOUT_EXPIRED 0x911B
103 #define GL_CONDITION_SATISFIED 0x911C
104 #define GL_WAIT_FAILED 0x911D
105 #endif
106
107 #define USING_ARB_sync 1
108 #endif
109
110 typedef cl_event(CL_API_CALL *clCreateEventFromGLsyncKHR_fn)(
111 cl_context context, GLsync sync, cl_int *errCode_ret);
112
113 clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr;
114
115
116 // clang-format off
117 static const char *updateBuffersKernel[] = {
118 "__kernel void update( __global float4 * vertices, __global float4 "
119 "*colors, int horizWrap, int rowIdx )\n"
120 "{\n"
121 " size_t tid = get_global_id(0);\n"
122 "\n"
123 " size_t xVal = ( tid & ( horizWrap - 1 ) );\n"
124 " vertices[ tid * 2 + 0 ] = (float4)( xVal, rowIdx*16.f, 0.0f, 1.f );\n"
125 " vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, "
126 "1.f );\n"
127 "\n"
128 " int rowV = rowIdx + 1;\n"
129 " colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 "
130 ") >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n"
131 " //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, "
132 "1.0f, 1.0f, 1.0f );\n"
133 " colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n"
134 "}\n"
135 };
136 // clang-format on
137
138 // Passthrough VertexShader
139 static const char *vertexshader = "#version 150\n"
140 "uniform mat4 projMatrix;\n"
141 "in vec4 inPosition;\n"
142 "in vec4 inColor;\n"
143 "out vec4 vertColor;\n"
144 "void main (void) {\n"
145 " gl_Position = projMatrix*inPosition;\n"
146 " vertColor = inColor;\n"
147 "}\n";
148
149 // Passthrough FragmentShader
150 static const char *fragmentshader = "#version 150\n"
151 "in vec4 vertColor;\n"
152 "out vec4 outColor;\n"
153 "void main (void) {\n"
154 " outColor = vertColor;\n"
155 "}\n";
156
createShaderProgram(GLint * posLoc,GLint * colLoc)157 GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
158 {
159 GLint logLength, status;
160 GLuint program = glCreateProgram();
161 GLuint vpShader;
162
163 vpShader = glCreateShader(GL_VERTEX_SHADER);
164 glShaderSource(vpShader, 1, (const GLchar **)&vertexshader, NULL);
165 glCompileShader(vpShader);
166 glGetShaderiv(vpShader, GL_INFO_LOG_LENGTH, &logLength);
167 if (logLength > 0)
168 {
169 GLchar *log = (GLchar *)malloc(logLength);
170 glGetShaderInfoLog(vpShader, logLength, &logLength, log);
171 log_info("Vtx Shader compile log:\n%s", log);
172 free(log);
173 }
174
175 glGetShaderiv(vpShader, GL_COMPILE_STATUS, &status);
176 if (status == 0)
177 {
178 log_error("Failed to compile vtx shader:\n");
179 return 0;
180 }
181
182 glAttachShader(program, vpShader);
183
184 GLuint fpShader;
185 fpShader = glCreateShader(GL_FRAGMENT_SHADER);
186 glShaderSource(fpShader, 1, (const GLchar **)&fragmentshader, NULL);
187 glCompileShader(fpShader);
188
189 glGetShaderiv(fpShader, GL_INFO_LOG_LENGTH, &logLength);
190 if (logLength > 0)
191 {
192 GLchar *log = (GLchar *)malloc(logLength);
193 glGetShaderInfoLog(fpShader, logLength, &logLength, log);
194 log_info("Frag Shader compile log:\n%s", log);
195 free(log);
196 }
197
198 glAttachShader(program, fpShader);
199 glGetShaderiv(fpShader, GL_COMPILE_STATUS, &status);
200 if (status == 0)
201 {
202 log_error("Failed to compile frag shader:\n\n");
203 return 0;
204 }
205
206 glLinkProgram(program);
207 glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
208 if (logLength > 0)
209 {
210 GLchar *log = (GLchar *)malloc(logLength);
211 glGetProgramInfoLog(program, logLength, &logLength, log);
212 log_info("Program link log:\n%s", log);
213 free(log);
214 }
215
216 glGetProgramiv(program, GL_LINK_STATUS, &status);
217 if (status == 0)
218 {
219 log_error("Failed to link program\n");
220 return 0;
221 }
222
223 *posLoc = glGetAttribLocation(program, "inPosition");
224 *colLoc = glGetAttribLocation(program, "inColor");
225
226 return program;
227 }
228
destroyShaderProgram(GLuint program)229 void destroyShaderProgram(GLuint program)
230 {
231 GLuint shaders[2];
232 GLsizei count;
233 glUseProgram(0);
234 glGetAttachedShaders(program, 2, &count, shaders);
235 int i;
236 for (i = 0; i < count; i++)
237 {
238 glDetachShader(program, shaders[i]);
239 glDeleteShader(shaders[i]);
240 }
241 glDeleteProgram(program);
242 }
243
244 // This function queues up and runs the above CL kernel that writes the vertex
245 // data
run_cl_kernel(cl_kernel kernel,cl_command_queue queue,cl_mem stream0,cl_mem stream1,cl_int rowIdx,cl_event fenceEvent,size_t numThreads)246 cl_int run_cl_kernel(cl_kernel kernel, cl_command_queue queue, cl_mem stream0,
247 cl_mem stream1, cl_int rowIdx, cl_event fenceEvent,
248 size_t numThreads)
249 {
250 cl_int error = clSetKernelArg(kernel, 3, sizeof(rowIdx), &rowIdx);
251 test_error(error, "Unable to set kernel arguments");
252
253 clEventWrapper acqEvent1, acqEvent2, kernEvent, relEvent1, relEvent2;
254 int numEvents = (fenceEvent != NULL) ? 1 : 0;
255 cl_event *fence_evt = (fenceEvent != NULL) ? &fenceEvent : NULL;
256
257 error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream0, numEvents,
258 fence_evt, &acqEvent1);
259 test_error(error, "Unable to acquire GL obejcts");
260 error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream1, numEvents,
261 fence_evt, &acqEvent2);
262 test_error(error, "Unable to acquire GL obejcts");
263
264 cl_event evts[2] = { acqEvent1, acqEvent2 };
265
266 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &numThreads, NULL, 2,
267 evts, &kernEvent);
268 test_error(error, "Unable to execute test kernel");
269
270 error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream0, 1, &kernEvent,
271 &relEvent1);
272 test_error(error, "clEnqueueReleaseGLObjects failed");
273 error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream1, 1, &kernEvent,
274 &relEvent2);
275 test_error(error, "clEnqueueReleaseGLObjects failed");
276
277 evts[0] = relEvent1;
278 evts[1] = relEvent2;
279 error = clWaitForEvents(2, evts);
280 test_error(error, "Unable to wait for release events");
281
282 return 0;
283 }
284
285 class RunThread : public genericThread {
286 public:
287 cl_kernel mKernel;
288 cl_command_queue mQueue;
289 cl_mem mStream0, mStream1;
290 cl_int mRowIdx;
291 cl_event mFenceEvent;
292 size_t mNumThreads;
293
RunThread(cl_kernel kernel,cl_command_queue queue,cl_mem stream0,cl_mem stream1,size_t numThreads)294 RunThread(cl_kernel kernel, cl_command_queue queue, cl_mem stream0,
295 cl_mem stream1, size_t numThreads)
296 : mKernel(kernel), mQueue(queue), mStream0(stream0), mStream1(stream1),
297 mNumThreads(numThreads)
298 {}
299
SetRunData(cl_int rowIdx,cl_event fenceEvent)300 void SetRunData(cl_int rowIdx, cl_event fenceEvent)
301 {
302 mRowIdx = rowIdx;
303 mFenceEvent = fenceEvent;
304 }
305
IRun(void)306 virtual void *IRun(void)
307 {
308 cl_int error = run_cl_kernel(mKernel, mQueue, mStream0, mStream1,
309 mRowIdx, mFenceEvent, mNumThreads);
310 return (void *)(uintptr_t)error;
311 }
312 };
313
314
test_fence_sync_single(cl_device_id device,cl_context context,cl_command_queue queue,bool separateThreads,GLint rend_vs,GLint read_vs,cl_device_id rend_device)315 int test_fence_sync_single(cl_device_id device, cl_context context,
316 cl_command_queue queue, bool separateThreads,
317 GLint rend_vs, GLint read_vs,
318 cl_device_id rend_device)
319 {
320 int error;
321 const int framebufferSize = 512;
322
323
324 if (!is_extension_available(device, "cl_khr_gl_event"))
325 {
326 log_info("NOTE: cl_khr_gl_event extension not present on this device; "
327 "skipping fence sync test\n");
328 return 0;
329 }
330
331 // Ask OpenCL for the platforms. Warn if more than one platform found,
332 // since this might not be the platform we want. By default, we simply
333 // use the first returned platform.
334
335 cl_uint nplatforms;
336 cl_platform_id platform;
337 clGetPlatformIDs(0, NULL, &nplatforms);
338 clGetPlatformIDs(1, &platform, NULL);
339
340 if (nplatforms > 1)
341 {
342 log_info("clGetPlatformIDs returned multiple values. This is not "
343 "an error, but might result in obtaining incorrect function "
344 "pointers if you do not want the first returned platform.\n");
345
346 // Show them the platform name, in case it is a problem.
347
348 size_t size;
349 char *name;
350
351 clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
352 name = (char *)malloc(size);
353 clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
354
355 log_info("Using platform with name: %s \n", name);
356 free(name);
357 }
358
359 clCreateEventFromGLsyncKHR_ptr =
360 (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(
361 platform, "clCreateEventFromGLsyncKHR");
362 if (clCreateEventFromGLsyncKHR_ptr == NULL)
363 {
364 log_error("ERROR: Unable to run fence_sync test "
365 "(clCreateEventFromGLsyncKHR function not discovered!)\n");
366 clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)
367 clGetExtensionFunctionAddressForPlatform(
368 platform, "clCreateEventFromGLsyncAPPLE");
369 return -1;
370 }
371
372 #ifdef USING_ARB_sync
373 char *gl_version_str = (char *)glGetString(GL_VERSION);
374 float glCoreVersion;
375 sscanf(gl_version_str, "%f", &glCoreVersion);
376 if (glCoreVersion < 3.0f)
377 {
378 log_info(
379 "OpenGL version %f does not support fence/sync! Skipping test.\n",
380 glCoreVersion);
381 return 0;
382 }
383
384 #ifdef __APPLE__
385 CGLContextObj currCtx = CGLGetCurrentContext();
386 CGLPixelFormatObj pixFmt = CGLGetPixelFormat(currCtx);
387 GLint val, screen;
388 CGLGetVirtualScreen(currCtx, &screen);
389 CGLDescribePixelFormat(pixFmt, screen, kCGLPFAOpenGLProfile, &val);
390 if (val != kCGLOGLPVersion_3_2_Core)
391 {
392 log_error(
393 "OpenGL context was not created with OpenGL version >= 3.0 profile "
394 "even though platform supports it"
395 "OpenGL profile %f does not support fence/sync! Skipping test.\n",
396 glCoreVersion);
397 return -1;
398 }
399 #else
400 #ifdef _WIN32
401 HDC hdc = wglGetCurrentDC();
402 HGLRC hglrc = wglGetCurrentContext();
403 #else
404 Display *dpy = glXGetCurrentDisplay();
405 GLXDrawable drawable = glXGetCurrentDrawable();
406 GLXContext ctx = glXGetCurrentContext();
407 #endif
408 #endif
409
410 InitSyncFns();
411 #endif
412
413 #ifdef __APPLE__
414 CGLSetVirtualScreen(CGLGetCurrentContext(), rend_vs);
415 #else
416 #ifdef _WIN32
417 wglMakeCurrent(hdc, hglrc);
418 #else
419 glXMakeCurrent(dpy, drawable, ctx);
420 #endif
421 #endif
422
423 GLint posLoc, colLoc;
424 GLuint shaderprogram = createShaderProgram(&posLoc, &colLoc);
425 if (!shaderprogram)
426 {
427 log_error("Failed to create shader program\n");
428 return -1;
429 }
430
431 float l = 0.0f;
432 float r = framebufferSize;
433 float b = 0.0f;
434 float t = framebufferSize;
435
436 float projMatrix[16] = { 2.0f / (r - l),
437 0.0f,
438 0.0f,
439 0.0f,
440 0.0f,
441 2.0f / (t - b),
442 0.0f,
443 0.0f,
444 0.0f,
445 0.0f,
446 -1.0f,
447 0.0f,
448 -(r + l) / (r - l),
449 -(t + b) / (t - b),
450 0.0f,
451 1.0f };
452
453 glUseProgram(shaderprogram);
454 GLuint projMatLoc = glGetUniformLocation(shaderprogram, "projMatrix");
455 glUniformMatrix4fv(projMatLoc, 1, 0, projMatrix);
456 glUseProgram(0);
457
458 // Note: the framebuffer is just the target to verify our results against,
459 // so we don't really care to go through all the possible formats in this
460 // case
461 glFramebufferWrapper glFramebuffer;
462 glRenderbufferWrapper glRenderbuffer;
463 error = CreateGLRenderbufferRaw(
464 framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA, GL_RGBA,
465 GL_UNSIGNED_INT_8_8_8_8_REV, &glFramebuffer, &glRenderbuffer);
466 if (error != 0) return error;
467
468 GLuint vao;
469 glGenVertexArrays(1, &vao);
470 glBindVertexArray(vao);
471
472 glBufferWrapper vtxBuffer, colorBuffer;
473 glGenBuffers(1, &vtxBuffer);
474 glGenBuffers(1, &colorBuffer);
475
476 const int numHorizVertices = (framebufferSize * 64) + 1;
477
478 glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer);
479 glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4,
480 NULL, GL_STATIC_DRAW);
481
482 glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
483 glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4,
484 NULL, GL_STATIC_DRAW);
485
486 // Now that the requisite objects are bound, we can attempt program
487 // validation:
488
489 glValidateProgram(shaderprogram);
490
491 GLint logLength, status;
492 glGetProgramiv(shaderprogram, GL_INFO_LOG_LENGTH, &logLength);
493 if (logLength > 0)
494 {
495 GLchar *log = (GLchar *)malloc(logLength);
496 glGetProgramInfoLog(shaderprogram, logLength, &logLength, log);
497 log_info("Program validate log:\n%s", log);
498 free(log);
499 }
500
501 glGetProgramiv(shaderprogram, GL_VALIDATE_STATUS, &status);
502 if (status == 0)
503 {
504 log_error("Failed to validate program\n");
505 return 0;
506 }
507
508 clProgramWrapper program;
509 clKernelWrapper kernel;
510 clMemWrapper streams[2];
511
512 if (create_single_kernel_helper(context, &program, &kernel, 1,
513 updateBuffersKernel, "update"))
514 return -1;
515
516 streams[0] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE,
517 vtxBuffer, &error);
518 test_error(error, "Unable to create CL buffer from GL vertex buffer");
519
520 streams[1] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE,
521 colorBuffer, &error);
522 test_error(error, "Unable to create CL buffer from GL color buffer");
523
524 error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
525 test_error(error, "Unable to set kernel arguments");
526
527 error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
528 test_error(error, "Unable to set kernel arguments");
529
530 cl_int horizWrap = (cl_int)framebufferSize;
531 error = clSetKernelArg(kernel, 2, sizeof(horizWrap), &horizWrap);
532 test_error(error, "Unable to set kernel arguments");
533
534 glViewport(0, 0, framebufferSize, framebufferSize);
535 glClearColor(0, 0, 0, 0);
536 glClear(GL_COLOR_BUFFER_BIT);
537 glClear(GL_DEPTH_BUFFER_BIT);
538 glDisable(GL_DEPTH_TEST);
539 glEnable(GL_BLEND);
540 glBlendFunc(GL_ONE, GL_ONE);
541
542 clEventWrapper fenceEvent;
543 GLsync glFence = 0;
544
545 // Do a loop through 8 different horizontal stripes against the framebuffer
546 RunThread thread(kernel, queue, streams[0], streams[1],
547 (size_t)numHorizVertices);
548
549 for (int i = 0; i < 8; i++)
550 {
551 // if current rendering device is not the compute device and
552 // separateThreads == false which means compute is going on same
553 // thread and we are using implicit synchronization (no GLSync obj used)
554 // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we
555 // need to wait for rendering to finish on other device before CL can
556 // start writing to CL/GL shared mem objects. When separateThreads is
557 // true i.e. we are using GLSync obj to synchronize then we dont need to
558 // call glFinish here since CL should wait for rendering on other device
559 // before this GLSync object to finish before it starts writing to
560 // shared mem object. Also rend_device == compute_device no need to call
561 // glFinish
562 if (rend_device != device && !separateThreads) glFinish();
563
564 if (separateThreads)
565 {
566 glDeleteSyncFunc(glFence);
567
568 glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
569 fenceEvent =
570 clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
571 test_error(error, "Unable to create CL event from GL fence");
572
573 // in case of explicit synchronization, we just wait for the sync
574 // object to complete in clEnqueueAcquireGLObject but we dont flush.
575 // Its application's responsibility to flush on the context on which
576 // glSync is created
577 glFlush();
578
579 thread.SetRunData((cl_int)i, fenceEvent);
580 thread.Start();
581
582 error = (cl_int)(size_t)thread.Join();
583 }
584 else
585 {
586 error =
587 run_cl_kernel(kernel, queue, streams[0], streams[1], (cl_int)i,
588 fenceEvent, (size_t)numHorizVertices);
589 }
590 test_error(error, "Unable to run CL kernel");
591
592 glUseProgram(shaderprogram);
593 glEnableVertexAttribArray(posLoc);
594 glEnableVertexAttribArray(colLoc);
595 glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer);
596 glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE,
597 4 * sizeof(GLfloat), 0);
598 glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
599 glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE,
600 4 * sizeof(GLfloat), 0);
601 glBindBuffer(GL_ARRAY_BUFFER, 0);
602
603 glDrawArrays(GL_TRIANGLE_STRIP, 0, numHorizVertices * 2);
604
605 glDisableVertexAttribArray(posLoc);
606 glDisableVertexAttribArray(colLoc);
607 glUseProgram(0);
608
609 if (separateThreads)
610 {
611 // If we're on the same thread, then we're testing implicit syncing,
612 // so we don't need the actual fence code
613 glDeleteSyncFunc(glFence);
614
615
616 glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
617 fenceEvent =
618 clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
619 test_error(error, "Unable to create CL event from GL fence");
620
621 // in case of explicit synchronization, we just wait for the sync
622 // object to complete in clEnqueueAcquireGLObject but we dont flush.
623 // Its application's responsibility to flush on the context on which
624 // glSync is created
625 glFlush();
626 }
627 else
628 glFinish();
629 }
630
631 if (glFence != 0)
632 // Don't need the final release for fenceEvent, because the wrapper will
633 // take care of that
634 glDeleteSyncFunc(glFence);
635
636 #ifdef __APPLE__
637 CGLSetVirtualScreen(CGLGetCurrentContext(), read_vs);
638 #else
639 #ifdef _WIN32
640 wglMakeCurrent(hdc, hglrc);
641 #else
642 glXMakeCurrent(dpy, drawable, ctx);
643 #endif
644 #endif
645 // Grab the contents of the final framebuffer
646 BufferOwningPtr<char> resultData(ReadGLRenderbuffer(
647 glFramebuffer, glRenderbuffer, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA,
648 GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar, framebufferSize, 128));
649
650 // Check the contents now. We should end up with solid color bands 32 pixels
651 // high and the full width of the framebuffer, at values (128,128,128) due
652 // to the additive blending
653 for (int i = 0; i < 8; i++)
654 {
655 for (int y = 0; y < 4; y++)
656 {
657 // Note: coverage will be double because the 63-0 triangle
658 // overwrites again at the end of the pass
659 cl_uchar valA =
660 (((i + 1) & 1)) * numHorizVertices * 2 / framebufferSize;
661 cl_uchar valB =
662 (((i + 1) & 2) >> 1) * numHorizVertices * 2 / framebufferSize;
663 cl_uchar valC =
664 (((i + 1) & 4) >> 2) * numHorizVertices * 2 / framebufferSize;
665
666 cl_uchar *row =
667 (cl_uchar *)&resultData[(i * 16 + y) * framebufferSize * 4];
668 for (int x = 0; x < (framebufferSize - 1) - 1; x++)
669 {
670 if ((row[x * 4] != valA) || (row[x * 4 + 1] != valB)
671 || (row[x * 4 + 2] != valC))
672 {
673 log_error("ERROR: Output framebuffer did not validate!\n");
674 DumpGLBuffer(GL_UNSIGNED_BYTE, framebufferSize, 128,
675 resultData);
676 log_error("RUNS:\n");
677 uint32_t *p = (uint32_t *)(char *)resultData;
678 size_t a = 0;
679 for (size_t t = 1; t < framebufferSize * framebufferSize;
680 t++)
681 {
682 if (p[a] != 0)
683 {
684 if (p[t] == 0)
685 {
686 log_error(
687 "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n",
688 a, t - 1, (int)(a % framebufferSize),
689 (int)(a / framebufferSize),
690 (int)((t - 1) % framebufferSize),
691 (int)((t - 1) / framebufferSize), p[a]);
692 a = t;
693 }
694 }
695 else
696 {
697 if (p[t] != 0)
698 {
699 a = t;
700 }
701 }
702 }
703 return -1;
704 }
705 }
706 }
707 }
708
709 destroyShaderProgram(shaderprogram);
710 glDeleteVertexArrays(1, &vao);
711 return 0;
712 }
713
test_fence_sync(cl_device_id device,cl_context context,cl_command_queue queue,int numElements)714 int test_fence_sync(cl_device_id device, cl_context context,
715 cl_command_queue queue, int numElements)
716 {
717 GLint vs_count = 0;
718 cl_device_id *device_list = NULL;
719
720 if (!is_extension_available(device, "cl_khr_gl_event"))
721 {
722 log_info("NOTE: cl_khr_gl_event extension not present on this device; "
723 "skipping fence sync test\n");
724 return 0;
725 }
726 #ifdef __APPLE__
727 CGLContextObj ctx = CGLGetCurrentContext();
728 CGLPixelFormatObj pix = CGLGetPixelFormat(ctx);
729 CGLError err =
730 CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count);
731
732 device_list = (cl_device_id *)malloc(sizeof(cl_device_id) * vs_count);
733 clGetGLContextInfoAPPLE(context, ctx,
734 CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE,
735 sizeof(cl_device_id) * vs_count, device_list, NULL);
736 #else
737 // Need platform specific way of getting devices from CL context to which
738 // OpenGL can render If not available it can be replaced with
739 // clGetContextInfo with CL_CONTEXT_DEVICES
740 size_t device_cb;
741 cl_int err =
742 clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb);
743 if (err != CL_SUCCESS)
744 {
745 print_error(err, "Unable to get device count from context");
746 return -1;
747 }
748 vs_count = (GLint)device_cb / sizeof(cl_device_id);
749
750 if (vs_count < 1)
751 {
752 log_error("No devices found.\n");
753 return -1;
754 }
755
756 device_list = (cl_device_id *)malloc(device_cb);
757 err = clGetContextInfo(context, CL_CONTEXT_DEVICES, device_cb, device_list,
758 NULL);
759 if (err != CL_SUCCESS)
760 {
761 free(device_list);
762 print_error(err, "Unable to get device list from context");
763 return -1;
764 }
765
766 #endif
767
768 GLint rend_vs, read_vs;
769 int error = 0;
770 int any_failed = 0;
771
772 // Loop through all the devices capable to OpenGL rendering
773 // and set them as current rendering target
774 for (rend_vs = 0; rend_vs < vs_count; rend_vs++)
775 {
776 // Loop through all the devices and set them as current
777 // compute target
778 for (read_vs = 0; read_vs < vs_count; read_vs++)
779 {
780 cl_device_id rend_device = device_list[rend_vs],
781 read_device = device_list[read_vs];
782 char rend_name[200], read_name[200];
783
784 clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name),
785 rend_name, NULL);
786 clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name),
787 read_name, NULL);
788
789 log_info("Rendering on: %s, read back on: %s\n", rend_name,
790 read_name);
791 error = test_fence_sync_single(device, context, queue, false,
792 rend_vs, read_vs, rend_device);
793 any_failed |= error;
794 if (error != 0)
795 log_error(
796 "ERROR: Implicit syncing with GL sync events failed!\n\n");
797 else
798 log_info("Implicit syncing Passed\n");
799
800 error = test_fence_sync_single(device, context, queue, true,
801 rend_vs, read_vs, rend_device);
802 any_failed |= error;
803 if (error != 0)
804 log_error(
805 "ERROR: Explicit syncing with GL sync events failed!\n\n");
806 else
807 log_info("Explicit syncing Passed\n");
808 }
809 }
810
811 free(device_list);
812
813 return any_failed;
814 }
815