xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/gl/test_fence_sync.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "gl/setup.h"
18 #include "harness/genericThread.h"
19 
20 #if defined(__APPLE__)
21 #include <OpenGL/glu.h>
22 #else
23 #include <GL/glu.h>
24 #include <CL/cl_gl.h>
25 #if !defined(_WIN32) && !defined(__ANDROID__)
26 #include <GL/glx.h>
27 #endif
28 #endif
29 
30 #ifndef GLsync
31 // For OpenGL before 3.2, we look for the ARB_sync extension and try to use that
32 #if !defined(_WIN32)
33 #include <inttypes.h>
34 #endif // !_WIN32
35 typedef int64_t GLint64;
36 typedef uint64_t GLuint64;
37 typedef struct __GLsync *GLsync;
38 
39 #ifndef APIENTRY
40 #define APIENTRY
41 #endif
42 
43 typedef GLsync(APIENTRY *glFenceSyncPtr)(GLenum condition, GLbitfield flags);
44 glFenceSyncPtr glFenceSyncFunc;
45 
46 typedef bool(APIENTRY *glIsSyncPtr)(GLsync sync);
47 glIsSyncPtr glIsSyncFunc;
48 
49 typedef void(APIENTRY *glDeleteSyncPtr)(GLsync sync);
50 glDeleteSyncPtr glDeleteSyncFunc;
51 
52 typedef GLenum(APIENTRY *glClientWaitSyncPtr)(GLsync sync, GLbitfield flags,
53                                               GLuint64 timeout);
54 glClientWaitSyncPtr glClientWaitSyncFunc;
55 
56 typedef void(APIENTRY *glWaitSyncPtr)(GLsync sync, GLbitfield flags,
57                                       GLuint64 timeout);
58 glWaitSyncPtr glWaitSyncFunc;
59 
60 typedef void(APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params);
61 glGetInteger64vPtr glGetInteger64vFunc;
62 
63 typedef void(APIENTRY *glGetSyncivPtr)(GLsync sync, GLenum pname,
64                                        GLsizei bufSize, GLsizei *length,
65                                        GLint *values);
66 glGetSyncivPtr glGetSyncivFunc;
67 
68 #define CHK_GL_ERR() printf("%s\n", gluErrorString(glGetError()))
69 
InitSyncFns(void)70 static void InitSyncFns(void)
71 {
72     glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress("glFenceSync");
73     glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress("glIsSync");
74     glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress("glDeleteSync");
75     glClientWaitSyncFunc =
76         (glClientWaitSyncPtr)glutGetProcAddress("glClientWaitSync");
77     glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress("glWaitSync");
78     glGetInteger64vFunc =
79         (glGetInteger64vPtr)glutGetProcAddress("glGetInteger64v");
80     glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress("glGetSynciv");
81 }
82 #ifndef GL_ARB_sync
83 #define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111
84 
85 #define GL_OBJECT_TYPE 0x9112
86 #define GL_SYNC_CONDITION 0x9113
87 #define GL_SYNC_STATUS 0x9114
88 #define GL_SYNC_FLAGS 0x9115
89 
90 #define GL_SYNC_FENCE 0x9116
91 
92 #define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117
93 
94 #define GL_UNSIGNALED 0x9118
95 #define GL_SIGNALED 0x9119
96 
97 #define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001
98 
99 #define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull
100 
101 #define GL_ALREADY_SIGNALED 0x911A
102 #define GL_TIMEOUT_EXPIRED 0x911B
103 #define GL_CONDITION_SATISFIED 0x911C
104 #define GL_WAIT_FAILED 0x911D
105 #endif
106 
107 #define USING_ARB_sync 1
108 #endif
109 
110 typedef cl_event(CL_API_CALL *clCreateEventFromGLsyncKHR_fn)(
111     cl_context context, GLsync sync, cl_int *errCode_ret);
112 
113 clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr;
114 
115 
116 // clang-format off
117 static const char *updateBuffersKernel[] = {
118     "__kernel void update( __global float4 * vertices, __global float4 "
119     "*colors, int horizWrap, int rowIdx )\n"
120     "{\n"
121     "    size_t tid = get_global_id(0);\n"
122     "\n"
123     "    size_t xVal = ( tid & ( horizWrap - 1 ) );\n"
124     "    vertices[ tid * 2 + 0 ] = (float4)( xVal, rowIdx*16.f, 0.0f, 1.f );\n"
125     "    vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, "
126     "1.f );\n"
127     "\n"
128     "    int rowV = rowIdx + 1;\n"
129     "    colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 "
130     ") >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n"
131     "    //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, "
132     "1.0f, 1.0f, 1.0f );\n"
133     "    colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n"
134     "}\n"
135 };
136 // clang-format on
137 
138 // Passthrough VertexShader
139 static const char *vertexshader = "#version 150\n"
140                                   "uniform mat4 projMatrix;\n"
141                                   "in vec4 inPosition;\n"
142                                   "in vec4 inColor;\n"
143                                   "out vec4 vertColor;\n"
144                                   "void main (void) {\n"
145                                   "    gl_Position = projMatrix*inPosition;\n"
146                                   "   vertColor = inColor;\n"
147                                   "}\n";
148 
149 // Passthrough FragmentShader
150 static const char *fragmentshader = "#version 150\n"
151                                     "in vec4 vertColor;\n"
152                                     "out vec4 outColor;\n"
153                                     "void main (void) {\n"
154                                     "    outColor = vertColor;\n"
155                                     "}\n";
156 
createShaderProgram(GLint * posLoc,GLint * colLoc)157 GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
158 {
159     GLint logLength, status;
160     GLuint program = glCreateProgram();
161     GLuint vpShader;
162 
163     vpShader = glCreateShader(GL_VERTEX_SHADER);
164     glShaderSource(vpShader, 1, (const GLchar **)&vertexshader, NULL);
165     glCompileShader(vpShader);
166     glGetShaderiv(vpShader, GL_INFO_LOG_LENGTH, &logLength);
167     if (logLength > 0)
168     {
169         GLchar *log = (GLchar *)malloc(logLength);
170         glGetShaderInfoLog(vpShader, logLength, &logLength, log);
171         log_info("Vtx Shader compile log:\n%s", log);
172         free(log);
173     }
174 
175     glGetShaderiv(vpShader, GL_COMPILE_STATUS, &status);
176     if (status == 0)
177     {
178         log_error("Failed to compile vtx shader:\n");
179         return 0;
180     }
181 
182     glAttachShader(program, vpShader);
183 
184     GLuint fpShader;
185     fpShader = glCreateShader(GL_FRAGMENT_SHADER);
186     glShaderSource(fpShader, 1, (const GLchar **)&fragmentshader, NULL);
187     glCompileShader(fpShader);
188 
189     glGetShaderiv(fpShader, GL_INFO_LOG_LENGTH, &logLength);
190     if (logLength > 0)
191     {
192         GLchar *log = (GLchar *)malloc(logLength);
193         glGetShaderInfoLog(fpShader, logLength, &logLength, log);
194         log_info("Frag Shader compile log:\n%s", log);
195         free(log);
196     }
197 
198     glAttachShader(program, fpShader);
199     glGetShaderiv(fpShader, GL_COMPILE_STATUS, &status);
200     if (status == 0)
201     {
202         log_error("Failed to compile frag shader:\n\n");
203         return 0;
204     }
205 
206     glLinkProgram(program);
207     glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
208     if (logLength > 0)
209     {
210         GLchar *log = (GLchar *)malloc(logLength);
211         glGetProgramInfoLog(program, logLength, &logLength, log);
212         log_info("Program link log:\n%s", log);
213         free(log);
214     }
215 
216     glGetProgramiv(program, GL_LINK_STATUS, &status);
217     if (status == 0)
218     {
219         log_error("Failed to link program\n");
220         return 0;
221     }
222 
223     *posLoc = glGetAttribLocation(program, "inPosition");
224     *colLoc = glGetAttribLocation(program, "inColor");
225 
226     return program;
227 }
228 
destroyShaderProgram(GLuint program)229 void destroyShaderProgram(GLuint program)
230 {
231     GLuint shaders[2];
232     GLsizei count;
233     glUseProgram(0);
234     glGetAttachedShaders(program, 2, &count, shaders);
235     int i;
236     for (i = 0; i < count; i++)
237     {
238         glDetachShader(program, shaders[i]);
239         glDeleteShader(shaders[i]);
240     }
241     glDeleteProgram(program);
242 }
243 
244 // This function queues up and runs the above CL kernel that writes the vertex
245 // data
run_cl_kernel(cl_kernel kernel,cl_command_queue queue,cl_mem stream0,cl_mem stream1,cl_int rowIdx,cl_event fenceEvent,size_t numThreads)246 cl_int run_cl_kernel(cl_kernel kernel, cl_command_queue queue, cl_mem stream0,
247                      cl_mem stream1, cl_int rowIdx, cl_event fenceEvent,
248                      size_t numThreads)
249 {
250     cl_int error = clSetKernelArg(kernel, 3, sizeof(rowIdx), &rowIdx);
251     test_error(error, "Unable to set kernel arguments");
252 
253     clEventWrapper acqEvent1, acqEvent2, kernEvent, relEvent1, relEvent2;
254     int numEvents = (fenceEvent != NULL) ? 1 : 0;
255     cl_event *fence_evt = (fenceEvent != NULL) ? &fenceEvent : NULL;
256 
257     error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream0, numEvents,
258                                              fence_evt, &acqEvent1);
259     test_error(error, "Unable to acquire GL obejcts");
260     error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream1, numEvents,
261                                              fence_evt, &acqEvent2);
262     test_error(error, "Unable to acquire GL obejcts");
263 
264     cl_event evts[2] = { acqEvent1, acqEvent2 };
265 
266     error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &numThreads, NULL, 2,
267                                    evts, &kernEvent);
268     test_error(error, "Unable to execute test kernel");
269 
270     error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream0, 1, &kernEvent,
271                                              &relEvent1);
272     test_error(error, "clEnqueueReleaseGLObjects failed");
273     error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream1, 1, &kernEvent,
274                                              &relEvent2);
275     test_error(error, "clEnqueueReleaseGLObjects failed");
276 
277     evts[0] = relEvent1;
278     evts[1] = relEvent2;
279     error = clWaitForEvents(2, evts);
280     test_error(error, "Unable to wait for release events");
281 
282     return 0;
283 }
284 
285 class RunThread : public genericThread {
286 public:
287     cl_kernel mKernel;
288     cl_command_queue mQueue;
289     cl_mem mStream0, mStream1;
290     cl_int mRowIdx;
291     cl_event mFenceEvent;
292     size_t mNumThreads;
293 
RunThread(cl_kernel kernel,cl_command_queue queue,cl_mem stream0,cl_mem stream1,size_t numThreads)294     RunThread(cl_kernel kernel, cl_command_queue queue, cl_mem stream0,
295               cl_mem stream1, size_t numThreads)
296         : mKernel(kernel), mQueue(queue), mStream0(stream0), mStream1(stream1),
297           mNumThreads(numThreads)
298     {}
299 
SetRunData(cl_int rowIdx,cl_event fenceEvent)300     void SetRunData(cl_int rowIdx, cl_event fenceEvent)
301     {
302         mRowIdx = rowIdx;
303         mFenceEvent = fenceEvent;
304     }
305 
IRun(void)306     virtual void *IRun(void)
307     {
308         cl_int error = run_cl_kernel(mKernel, mQueue, mStream0, mStream1,
309                                      mRowIdx, mFenceEvent, mNumThreads);
310         return (void *)(uintptr_t)error;
311     }
312 };
313 
314 
test_fence_sync_single(cl_device_id device,cl_context context,cl_command_queue queue,bool separateThreads,GLint rend_vs,GLint read_vs,cl_device_id rend_device)315 int test_fence_sync_single(cl_device_id device, cl_context context,
316                            cl_command_queue queue, bool separateThreads,
317                            GLint rend_vs, GLint read_vs,
318                            cl_device_id rend_device)
319 {
320     int error;
321     const int framebufferSize = 512;
322 
323 
324     if (!is_extension_available(device, "cl_khr_gl_event"))
325     {
326         log_info("NOTE: cl_khr_gl_event extension not present on this device; "
327                  "skipping fence sync test\n");
328         return 0;
329     }
330 
331     // Ask OpenCL for the platforms.  Warn if more than one platform found,
332     // since this might not be the platform we want.  By default, we simply
333     // use the first returned platform.
334 
335     cl_uint nplatforms;
336     cl_platform_id platform;
337     clGetPlatformIDs(0, NULL, &nplatforms);
338     clGetPlatformIDs(1, &platform, NULL);
339 
340     if (nplatforms > 1)
341     {
342         log_info("clGetPlatformIDs returned multiple values.  This is not "
343                  "an error, but might result in obtaining incorrect function "
344                  "pointers if you do not want the first returned platform.\n");
345 
346         // Show them the platform name, in case it is a problem.
347 
348         size_t size;
349         char *name;
350 
351         clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
352         name = (char *)malloc(size);
353         clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
354 
355         log_info("Using platform with name: %s \n", name);
356         free(name);
357     }
358 
359     clCreateEventFromGLsyncKHR_ptr =
360         (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(
361             platform, "clCreateEventFromGLsyncKHR");
362     if (clCreateEventFromGLsyncKHR_ptr == NULL)
363     {
364         log_error("ERROR: Unable to run fence_sync test "
365                   "(clCreateEventFromGLsyncKHR function not discovered!)\n");
366         clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)
367             clGetExtensionFunctionAddressForPlatform(
368                 platform, "clCreateEventFromGLsyncAPPLE");
369         return -1;
370     }
371 
372 #ifdef USING_ARB_sync
373     char *gl_version_str = (char *)glGetString(GL_VERSION);
374     float glCoreVersion;
375     sscanf(gl_version_str, "%f", &glCoreVersion);
376     if (glCoreVersion < 3.0f)
377     {
378         log_info(
379             "OpenGL version %f does not support fence/sync! Skipping test.\n",
380             glCoreVersion);
381         return 0;
382     }
383 
384 #ifdef __APPLE__
385     CGLContextObj currCtx = CGLGetCurrentContext();
386     CGLPixelFormatObj pixFmt = CGLGetPixelFormat(currCtx);
387     GLint val, screen;
388     CGLGetVirtualScreen(currCtx, &screen);
389     CGLDescribePixelFormat(pixFmt, screen, kCGLPFAOpenGLProfile, &val);
390     if (val != kCGLOGLPVersion_3_2_Core)
391     {
392         log_error(
393             "OpenGL context was not created with OpenGL version >= 3.0 profile "
394             "even though platform supports it"
395             "OpenGL profile %f does not support fence/sync! Skipping test.\n",
396             glCoreVersion);
397         return -1;
398     }
399 #else
400 #ifdef _WIN32
401     HDC hdc = wglGetCurrentDC();
402     HGLRC hglrc = wglGetCurrentContext();
403 #else
404     Display *dpy = glXGetCurrentDisplay();
405     GLXDrawable drawable = glXGetCurrentDrawable();
406     GLXContext ctx = glXGetCurrentContext();
407 #endif
408 #endif
409 
410     InitSyncFns();
411 #endif
412 
413 #ifdef __APPLE__
414     CGLSetVirtualScreen(CGLGetCurrentContext(), rend_vs);
415 #else
416 #ifdef _WIN32
417     wglMakeCurrent(hdc, hglrc);
418 #else
419     glXMakeCurrent(dpy, drawable, ctx);
420 #endif
421 #endif
422 
423     GLint posLoc, colLoc;
424     GLuint shaderprogram = createShaderProgram(&posLoc, &colLoc);
425     if (!shaderprogram)
426     {
427         log_error("Failed to create shader program\n");
428         return -1;
429     }
430 
431     float l = 0.0f;
432     float r = framebufferSize;
433     float b = 0.0f;
434     float t = framebufferSize;
435 
436     float projMatrix[16] = { 2.0f / (r - l),
437                              0.0f,
438                              0.0f,
439                              0.0f,
440                              0.0f,
441                              2.0f / (t - b),
442                              0.0f,
443                              0.0f,
444                              0.0f,
445                              0.0f,
446                              -1.0f,
447                              0.0f,
448                              -(r + l) / (r - l),
449                              -(t + b) / (t - b),
450                              0.0f,
451                              1.0f };
452 
453     glUseProgram(shaderprogram);
454     GLuint projMatLoc = glGetUniformLocation(shaderprogram, "projMatrix");
455     glUniformMatrix4fv(projMatLoc, 1, 0, projMatrix);
456     glUseProgram(0);
457 
458     // Note: the framebuffer is just the target to verify our results against,
459     // so we don't really care to go through all the possible formats in this
460     // case
461     glFramebufferWrapper glFramebuffer;
462     glRenderbufferWrapper glRenderbuffer;
463     error = CreateGLRenderbufferRaw(
464         framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA, GL_RGBA,
465         GL_UNSIGNED_INT_8_8_8_8_REV, &glFramebuffer, &glRenderbuffer);
466     if (error != 0) return error;
467 
468     GLuint vao;
469     glGenVertexArrays(1, &vao);
470     glBindVertexArray(vao);
471 
472     glBufferWrapper vtxBuffer, colorBuffer;
473     glGenBuffers(1, &vtxBuffer);
474     glGenBuffers(1, &colorBuffer);
475 
476     const int numHorizVertices = (framebufferSize * 64) + 1;
477 
478     glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer);
479     glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4,
480                  NULL, GL_STATIC_DRAW);
481 
482     glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
483     glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4,
484                  NULL, GL_STATIC_DRAW);
485 
486     // Now that the requisite objects are bound, we can attempt program
487     // validation:
488 
489     glValidateProgram(shaderprogram);
490 
491     GLint logLength, status;
492     glGetProgramiv(shaderprogram, GL_INFO_LOG_LENGTH, &logLength);
493     if (logLength > 0)
494     {
495         GLchar *log = (GLchar *)malloc(logLength);
496         glGetProgramInfoLog(shaderprogram, logLength, &logLength, log);
497         log_info("Program validate log:\n%s", log);
498         free(log);
499     }
500 
501     glGetProgramiv(shaderprogram, GL_VALIDATE_STATUS, &status);
502     if (status == 0)
503     {
504         log_error("Failed to validate program\n");
505         return 0;
506     }
507 
508     clProgramWrapper program;
509     clKernelWrapper kernel;
510     clMemWrapper streams[2];
511 
512     if (create_single_kernel_helper(context, &program, &kernel, 1,
513                                     updateBuffersKernel, "update"))
514         return -1;
515 
516     streams[0] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE,
517                                              vtxBuffer, &error);
518     test_error(error, "Unable to create CL buffer from GL vertex buffer");
519 
520     streams[1] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE,
521                                              colorBuffer, &error);
522     test_error(error, "Unable to create CL buffer from GL color buffer");
523 
524     error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
525     test_error(error, "Unable to set kernel arguments");
526 
527     error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
528     test_error(error, "Unable to set kernel arguments");
529 
530     cl_int horizWrap = (cl_int)framebufferSize;
531     error = clSetKernelArg(kernel, 2, sizeof(horizWrap), &horizWrap);
532     test_error(error, "Unable to set kernel arguments");
533 
534     glViewport(0, 0, framebufferSize, framebufferSize);
535     glClearColor(0, 0, 0, 0);
536     glClear(GL_COLOR_BUFFER_BIT);
537     glClear(GL_DEPTH_BUFFER_BIT);
538     glDisable(GL_DEPTH_TEST);
539     glEnable(GL_BLEND);
540     glBlendFunc(GL_ONE, GL_ONE);
541 
542     clEventWrapper fenceEvent;
543     GLsync glFence = 0;
544 
545     // Do a loop through 8 different horizontal stripes against the framebuffer
546     RunThread thread(kernel, queue, streams[0], streams[1],
547                      (size_t)numHorizVertices);
548 
549     for (int i = 0; i < 8; i++)
550     {
551         // if current rendering device is not the compute device and
552         // separateThreads == false which means compute is going on same
553         // thread and we are using implicit synchronization (no GLSync obj used)
554         // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we
555         // need to wait for rendering to finish on other device before CL can
556         // start writing to CL/GL shared mem objects. When separateThreads is
557         // true i.e. we are using GLSync obj to synchronize then we dont need to
558         // call glFinish here since CL should wait for rendering on other device
559         // before this GLSync object to finish before it starts writing to
560         // shared mem object. Also rend_device == compute_device no need to call
561         // glFinish
562         if (rend_device != device && !separateThreads) glFinish();
563 
564         if (separateThreads)
565         {
566             glDeleteSyncFunc(glFence);
567 
568             glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
569             fenceEvent =
570                 clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
571             test_error(error, "Unable to create CL event from GL fence");
572 
573             // in case of explicit synchronization, we just wait for the sync
574             // object to complete in clEnqueueAcquireGLObject but we dont flush.
575             // Its application's responsibility to flush on the context on which
576             // glSync is created
577             glFlush();
578 
579             thread.SetRunData((cl_int)i, fenceEvent);
580             thread.Start();
581 
582             error = (cl_int)(size_t)thread.Join();
583         }
584         else
585         {
586             error =
587                 run_cl_kernel(kernel, queue, streams[0], streams[1], (cl_int)i,
588                               fenceEvent, (size_t)numHorizVertices);
589         }
590         test_error(error, "Unable to run CL kernel");
591 
592         glUseProgram(shaderprogram);
593         glEnableVertexAttribArray(posLoc);
594         glEnableVertexAttribArray(colLoc);
595         glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer);
596         glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE,
597                               4 * sizeof(GLfloat), 0);
598         glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
599         glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE,
600                               4 * sizeof(GLfloat), 0);
601         glBindBuffer(GL_ARRAY_BUFFER, 0);
602 
603         glDrawArrays(GL_TRIANGLE_STRIP, 0, numHorizVertices * 2);
604 
605         glDisableVertexAttribArray(posLoc);
606         glDisableVertexAttribArray(colLoc);
607         glUseProgram(0);
608 
609         if (separateThreads)
610         {
611             // If we're on the same thread, then we're testing implicit syncing,
612             // so we don't need the actual fence code
613             glDeleteSyncFunc(glFence);
614 
615 
616             glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
617             fenceEvent =
618                 clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
619             test_error(error, "Unable to create CL event from GL fence");
620 
621             // in case of explicit synchronization, we just wait for the sync
622             // object to complete in clEnqueueAcquireGLObject but we dont flush.
623             // Its application's responsibility to flush on the context on which
624             // glSync is created
625             glFlush();
626         }
627         else
628             glFinish();
629     }
630 
631     if (glFence != 0)
632         // Don't need the final release for fenceEvent, because the wrapper will
633         // take care of that
634         glDeleteSyncFunc(glFence);
635 
636 #ifdef __APPLE__
637     CGLSetVirtualScreen(CGLGetCurrentContext(), read_vs);
638 #else
639 #ifdef _WIN32
640     wglMakeCurrent(hdc, hglrc);
641 #else
642     glXMakeCurrent(dpy, drawable, ctx);
643 #endif
644 #endif
645     // Grab the contents of the final framebuffer
646     BufferOwningPtr<char> resultData(ReadGLRenderbuffer(
647         glFramebuffer, glRenderbuffer, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA,
648         GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar, framebufferSize, 128));
649 
650     // Check the contents now. We should end up with solid color bands 32 pixels
651     // high and the full width of the framebuffer, at values (128,128,128) due
652     // to the additive blending
653     for (int i = 0; i < 8; i++)
654     {
655         for (int y = 0; y < 4; y++)
656         {
657             // Note: coverage will be double because the 63-0 triangle
658             // overwrites again at the end of the pass
659             cl_uchar valA =
660                 (((i + 1) & 1)) * numHorizVertices * 2 / framebufferSize;
661             cl_uchar valB =
662                 (((i + 1) & 2) >> 1) * numHorizVertices * 2 / framebufferSize;
663             cl_uchar valC =
664                 (((i + 1) & 4) >> 2) * numHorizVertices * 2 / framebufferSize;
665 
666             cl_uchar *row =
667                 (cl_uchar *)&resultData[(i * 16 + y) * framebufferSize * 4];
668             for (int x = 0; x < (framebufferSize - 1) - 1; x++)
669             {
670                 if ((row[x * 4] != valA) || (row[x * 4 + 1] != valB)
671                     || (row[x * 4 + 2] != valC))
672                 {
673                     log_error("ERROR: Output framebuffer did not validate!\n");
674                     DumpGLBuffer(GL_UNSIGNED_BYTE, framebufferSize, 128,
675                                  resultData);
676                     log_error("RUNS:\n");
677                     uint32_t *p = (uint32_t *)(char *)resultData;
678                     size_t a = 0;
679                     for (size_t t = 1; t < framebufferSize * framebufferSize;
680                          t++)
681                     {
682                         if (p[a] != 0)
683                         {
684                             if (p[t] == 0)
685                             {
686                                 log_error(
687                                     "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n",
688                                     a, t - 1, (int)(a % framebufferSize),
689                                     (int)(a / framebufferSize),
690                                     (int)((t - 1) % framebufferSize),
691                                     (int)((t - 1) / framebufferSize), p[a]);
692                                 a = t;
693                             }
694                         }
695                         else
696                         {
697                             if (p[t] != 0)
698                             {
699                                 a = t;
700                             }
701                         }
702                     }
703                     return -1;
704                 }
705             }
706         }
707     }
708 
709     destroyShaderProgram(shaderprogram);
710     glDeleteVertexArrays(1, &vao);
711     return 0;
712 }
713 
test_fence_sync(cl_device_id device,cl_context context,cl_command_queue queue,int numElements)714 int test_fence_sync(cl_device_id device, cl_context context,
715                     cl_command_queue queue, int numElements)
716 {
717     GLint vs_count = 0;
718     cl_device_id *device_list = NULL;
719 
720     if (!is_extension_available(device, "cl_khr_gl_event"))
721     {
722         log_info("NOTE: cl_khr_gl_event extension not present on this device; "
723                  "skipping fence sync test\n");
724         return 0;
725     }
726 #ifdef __APPLE__
727     CGLContextObj ctx = CGLGetCurrentContext();
728     CGLPixelFormatObj pix = CGLGetPixelFormat(ctx);
729     CGLError err =
730         CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count);
731 
732     device_list = (cl_device_id *)malloc(sizeof(cl_device_id) * vs_count);
733     clGetGLContextInfoAPPLE(context, ctx,
734                             CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE,
735                             sizeof(cl_device_id) * vs_count, device_list, NULL);
736 #else
737     // Need platform specific way of getting devices from CL context to which
738     // OpenGL can render If not available it can be replaced with
739     // clGetContextInfo with CL_CONTEXT_DEVICES
740     size_t device_cb;
741     cl_int err =
742         clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb);
743     if (err != CL_SUCCESS)
744     {
745         print_error(err, "Unable to get device count from context");
746         return -1;
747     }
748     vs_count = (GLint)device_cb / sizeof(cl_device_id);
749 
750     if (vs_count < 1)
751     {
752         log_error("No devices found.\n");
753         return -1;
754     }
755 
756     device_list = (cl_device_id *)malloc(device_cb);
757     err = clGetContextInfo(context, CL_CONTEXT_DEVICES, device_cb, device_list,
758                            NULL);
759     if (err != CL_SUCCESS)
760     {
761         free(device_list);
762         print_error(err, "Unable to get device list from context");
763         return -1;
764     }
765 
766 #endif
767 
768     GLint rend_vs, read_vs;
769     int error = 0;
770     int any_failed = 0;
771 
772     // Loop through all the devices capable to OpenGL rendering
773     // and set them as current rendering target
774     for (rend_vs = 0; rend_vs < vs_count; rend_vs++)
775     {
776         // Loop through all the devices and set them as current
777         // compute target
778         for (read_vs = 0; read_vs < vs_count; read_vs++)
779         {
780             cl_device_id rend_device = device_list[rend_vs],
781                          read_device = device_list[read_vs];
782             char rend_name[200], read_name[200];
783 
784             clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name),
785                             rend_name, NULL);
786             clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name),
787                             read_name, NULL);
788 
789             log_info("Rendering on: %s, read back on: %s\n", rend_name,
790                      read_name);
791             error = test_fence_sync_single(device, context, queue, false,
792                                            rend_vs, read_vs, rend_device);
793             any_failed |= error;
794             if (error != 0)
795                 log_error(
796                     "ERROR: Implicit syncing with GL sync events failed!\n\n");
797             else
798                 log_info("Implicit syncing Passed\n");
799 
800             error = test_fence_sync_single(device, context, queue, true,
801                                            rend_vs, read_vs, rend_device);
802             any_failed |= error;
803             if (error != 0)
804                 log_error(
805                     "ERROR: Explicit syncing with GL sync events failed!\n\n");
806             else
807                 log_info("Explicit syncing Passed\n");
808         }
809     }
810 
811     free(device_list);
812 
813     return any_failed;
814 }
815