xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/gles/test_fence_sync.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "gl_headers.h"
17 
18 #include "testBase.h"
19 #include "setup.h"
20 #include "harness/genericThread.h"
21 
22 #ifndef GLsync
23 // For OpenGL before 3.2, we look for the ARB_sync extension and try to use that
24 #if !defined(_WIN32)
25 #include <inttypes.h>
26 #endif // !_WIN32
27 typedef int64_t GLint64;
28 typedef uint64_t GLuint64;
29 typedef struct __GLsync *GLsync;
30 
31 typedef GLsync (*glFenceSyncPtr)(GLenum condition,GLbitfield flags);
32 glFenceSyncPtr glFenceSyncFunc;
33 
34 typedef bool (*glIsSyncPtr)(GLsync sync);
35 glIsSyncPtr glIsSyncFunc;
36 
37 typedef void (*glDeleteSyncPtr)(GLsync sync);
38 glDeleteSyncPtr glDeleteSyncFunc;
39 
40 typedef GLenum (*glClientWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout);
41 glClientWaitSyncPtr glClientWaitSyncFunc;
42 
43 typedef void (*glWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout);
44 glWaitSyncPtr glWaitSyncFunc;
45 
46 typedef void (*glGetInteger64vPtr)(GLenum pname, GLint64 *params);
47 glGetInteger64vPtr glGetInteger64vFunc;
48 
49 typedef void (*glGetSyncivPtr)(GLsync sync,GLenum pname,GLsizei bufSize,GLsizei *length,
50                                GLint *values);
51 glGetSyncivPtr glGetSyncivFunc;
52 
53 #define CHK_GL_ERR() printf("%s\n", gluErrorString(glGetError()))
54 
InitSyncFns(void)55 static void InitSyncFns( void )
56 {
57     glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress( "glFenceSync" );
58     glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress( "glIsSync" );
59     glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress( "glDeleteSync" );
60     glClientWaitSyncFunc = (glClientWaitSyncPtr)glutGetProcAddress( "glClientWaitSync" );
61     glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress( "glWaitSync" );
62     glGetInteger64vFunc = (glGetInteger64vPtr)glutGetProcAddress( "glGetInteger64v" );
63     glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress( "glGetSynciv" );
64 }
65 
66 #define GL_MAX_SERVER_WAIT_TIMEOUT        0x9111
67 
68 #define GL_OBJECT_TYPE            0x9112
69 #define GL_SYNC_CONDITION            0x9113
70 #define GL_SYNC_STATUS            0x9114
71 #define GL_SYNC_FLAGS            0x9115
72 
73 #define GL_SYNC_FENCE            0x9116
74 
75 #define GL_SYNC_GPU_COMMANDS_COMPLETE    0x9117
76 
77 #define GL_UNSIGNALED            0x9118
78 #define GL_SIGNALED            0x9119
79 
80 #define GL_SYNC_FLUSH_COMMANDS_BIT        0x00000001
81 
82 #define GL_TIMEOUT_IGNORED            0xFFFFFFFFFFFFFFFFull
83 
84 #define GL_ALREADY_SIGNALED        0x911A
85 #define GL_TIMEOUT_EXPIRED            0x911B
86 #define GL_CONDITION_SATISFIED        0x911C
87 #define GL_WAIT_FAILED            0x911D
88 
89 #define USING_ARB_sync 1
90 #endif
91 
92 typedef cl_event (CL_API_CALL *clCreateEventFromGLsyncKHR_fn)( cl_context context, GLsync sync, cl_int *errCode_ret) ;
93 
94 clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr;
95 
96 
97 static const char *updateBuffersKernel[] = {
98     "__kernel void update( __global float4 * vertices, __global float4 *colors, int horizWrap, int rowIdx )\n"
99     "{\n"
100     "    size_t tid = get_global_id(0);\n"
101     "\n"
102     "    size_t xVal = ( tid & ( horizWrap - 1 ) );\n"
103     "    vertices[ tid * 2 + 0 ] = (float4)( xVal, rowIdx*16.f, 0.0f, 1.f );\n"
104     "    vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, 1.f );\n"
105     "\n"
106     "    int rowV = rowIdx + 1;\n"
107     "    colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 ) >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n"
108     "    //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, 1.0f, 1.0f, 1.0f );\n"
109     "    colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n"
110     "}\n" };
111 
112 //Passthrough VertexShader
113 static const char vertexshader[] =
114 "uniform mat4 projMatrix;\n"
115 "attribute vec4 inPosition;\n"
116 "attribute vec4 inColor;\n"
117 "varying   vec4 outColor;\n"
118 "void main (void) {\n"
119 "    gl_Position = projMatrix*inPosition;\n"
120 "   outColor = inColor;\n"
121 "}\n";
122 
123 //Passthrough FragmentShader
124 static const char fragmentshader[] =
125 "varying   vec4 outColor;\n"
126 "void main (void) {\n"
127 "    gl_FragColor = outColor;\n"
128 "}\n";
129 
createShaderProgram(GLint * posLoc,GLint * colLoc)130 GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
131 {
132     GLint  logLength, status;
133     GLuint program = glCreateProgram();
134     GLuint vpShader;
135 
136     char* vpstr = (char*)malloc(sizeof(vertexshader));
137     strcpy(vpstr, vertexshader);
138 
139     vpShader = glCreateShader(GL_VERTEX_SHADER);
140     glShaderSource(vpShader, 1, (const GLchar **)&vpstr, NULL);
141     glCompileShader(vpShader);
142     glGetShaderiv(vpShader, GL_INFO_LOG_LENGTH, &logLength);
143     if (logLength > 0) {
144         GLchar *log = (GLchar*) malloc(logLength);
145         glGetShaderInfoLog(vpShader, logLength, &logLength, log);
146         log_info("Vtx Shader compile log:\n%s", log);
147         free(log);
148     }
149 
150     free(vpstr);
151     vpstr = NULL;
152 
153     glGetShaderiv(vpShader, GL_COMPILE_STATUS, &status);
154     if (status == 0)
155     {
156         log_error("Failed to compile vtx shader:\n");
157         return 0;
158     }
159 
160     glAttachShader(program, vpShader);
161 
162     GLuint fpShader;
163     char *fpstr = (char *)malloc(sizeof(fragmentshader));
164     strcpy(fpstr, fragmentshader);
165     fpShader = glCreateShader(GL_FRAGMENT_SHADER);
166     glShaderSource(fpShader, 1, (const GLchar **)&fpstr, NULL);
167     glCompileShader(fpShader);
168 
169     free(fpstr);
170     fpstr = NULL;
171 
172     glGetShaderiv(fpShader, GL_INFO_LOG_LENGTH, &logLength);
173     if (logLength > 0) {
174         GLchar *log = (GLchar*)malloc(logLength);
175         glGetShaderInfoLog(fpShader, logLength, &logLength, log);
176         log_info("Frag Shader compile log:\n%s", log);
177         free(log);
178     }
179 
180     glAttachShader(program, fpShader);
181     glGetShaderiv(fpShader, GL_COMPILE_STATUS, &status);
182     if (status == 0)
183     {
184         log_error("Failed to compile frag shader:\n\n");
185         return 0;
186     }
187 
188     glLinkProgram(program);
189     glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
190     if (logLength > 0) {
191         GLchar *log = (GLchar*)malloc(logLength);
192         glGetProgramInfoLog(program, logLength, &logLength, log);
193         log_info("Program link log:\n%s", log);
194         free(log);
195     }
196 
197     glGetProgramiv(program, GL_LINK_STATUS, &status);
198     if (status == 0)
199     {
200         log_error("Failed to link program\n");
201         return 0;
202     }
203 
204     glValidateProgram(program);
205     glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
206     if (logLength > 0) {
207         GLchar *log = (GLchar*)malloc(logLength);
208         glGetProgramInfoLog(program, logLength, &logLength, log);
209         log_info("Program validate log:\n%s", log);
210         free(log);
211     }
212 
213     glGetProgramiv(program, GL_VALIDATE_STATUS, &status);
214     if (status == 0)
215     {
216         log_error("Failed to validate program\n");
217         return 0;
218     }
219 
220     *posLoc = glGetAttribLocation(program, "inPosition");
221     *colLoc = glGetAttribLocation(program, "inColor");
222 
223     return program;
224 }
225 
destroyShaderProgram(GLuint program)226 void destroyShaderProgram(GLuint program)
227 {
228     GLuint shaders[2];
229     GLsizei count;
230     glUseProgram(0);
231     glGetAttachedShaders(program, 2, &count, shaders);
232     int i;
233     for(i = 0; i < count; i++)
234     {
235         glDetachShader(program, shaders[i]);
236         glDeleteShader(shaders[i]);
237     }
238     glDeleteProgram(program);
239 }
240 
241 // This function queues up and runs the above CL kernel that writes the vertex data
run_cl_kernel(cl_kernel kernel,cl_command_queue queue,cl_mem stream0,cl_mem stream1,cl_int rowIdx,cl_event fenceEvent,size_t numThreads)242 cl_int run_cl_kernel( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1,
243                      cl_int rowIdx, cl_event fenceEvent, size_t numThreads )
244 {
245     cl_int error = clSetKernelArg( kernel, 3, sizeof( rowIdx ), &rowIdx );
246     test_error( error, "Unable to set kernel arguments" );
247 
248     clEventWrapper acqEvent1, acqEvent2, kernEvent, relEvent1, relEvent2;
249     int numEvents = ( fenceEvent != NULL ) ? 1 : 0;
250     cl_event *fence_evt = ( fenceEvent != NULL ) ? &fenceEvent : NULL;
251 
252     error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream0, numEvents, fence_evt, &acqEvent1 );
253     test_error( error, "Unable to acquire GL obejcts");
254     error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream1, numEvents, fence_evt, &acqEvent2 );
255     test_error( error, "Unable to acquire GL obejcts");
256 
257     cl_event evts[ 2 ] = { acqEvent1, acqEvent2 };
258 
259     error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numThreads, NULL, 2, evts, &kernEvent );
260     test_error( error, "Unable to execute test kernel" );
261 
262     error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream0, 1, &kernEvent, &relEvent1 );
263     test_error(error, "clEnqueueReleaseGLObjects failed");
264     error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream1, 1, &kernEvent, &relEvent2 );
265     test_error(error, "clEnqueueReleaseGLObjects failed");
266 
267     evts[ 0 ] = relEvent1;
268     evts[ 1 ] = relEvent2;
269     error = clWaitForEvents( 2, evts );
270     test_error( error, "Unable to wait for release events" );
271 
272     return 0;
273 }
274 
275 class RunThread : public genericThread
276 {
277 public:
278 
279     cl_kernel mKernel;
280     cl_command_queue mQueue;
281     cl_mem mStream0, mStream1;
282     cl_int mRowIdx;
283     cl_event mFenceEvent;
284     size_t mNumThreads;
285 
RunThread(cl_kernel kernel,cl_command_queue queue,cl_mem stream0,cl_mem stream1,size_t numThreads)286     RunThread( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, size_t numThreads )
287     : mKernel( kernel ), mQueue( queue ), mStream0( stream0 ), mStream1( stream1 ), mNumThreads( numThreads )
288     {
289     }
290 
SetRunData(cl_int rowIdx,cl_event fenceEvent)291     void SetRunData( cl_int rowIdx, cl_event fenceEvent )
292     {
293         mRowIdx = rowIdx;
294         mFenceEvent = fenceEvent;
295     }
296 
IRun(void)297     virtual void * IRun( void )
298     {
299         cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads );
300         return (void *)(intptr_t)error;
301     }
302 };
303 
304 
test_fence_sync_single(cl_device_id device,cl_context context,cl_command_queue queue,bool separateThreads,GLint rend_vs,GLint read_vs,cl_device_id rend_device)305 int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_queue queue, bool separateThreads, GLint rend_vs, GLint read_vs, cl_device_id rend_device )
306 {
307     int error;
308     const int framebufferSize = 512;
309     cl_platform_id platform_id = NULL;
310 
311     if( !is_extension_available( device, "cl_khr_gl_event" ) )
312     {
313         log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" );
314         return 0;
315     }
316 
317     error = clGetDeviceInfo(device,
318                             CL_DEVICE_PLATFORM,
319                             sizeof(platform_id),
320                             &platform_id,
321                             NULL);
322     if(error)
323     {
324         return error;
325     }
326 
327     clCreateEventFromGLsyncKHR_ptr = \
328         (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform_id,"clCreateEventFromGLsyncKHR");
329     if( clCreateEventFromGLsyncKHR_ptr == NULL )
330     {
331         log_error( "ERROR: Unable to run fence_sync test (clCreateEventFromGLsyncKHR function not discovered!)\n" );
332         clCreateEventFromGLsyncKHR_ptr = \
333             (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform_id, "clCreateEventFromGLsyncAPPLE");
334         return -1;
335     }
336 
337 #ifdef USING_ARB_sync
338     char *gl_version_str = (char*)glGetString( GL_VERSION );
339     float glCoreVersion;
340     sscanf(gl_version_str, "%f", &glCoreVersion);
341     if( glCoreVersion < 3.0f )
342     {
343         log_info( "OpenGL version %f does not support fence/sync! Skipping test.\n", glCoreVersion );
344         return 0;
345     }
346 
347 #ifdef __APPLE__
348     CGLContextObj currCtx = CGLGetCurrentContext();
349     CGLPixelFormatObj pixFmt = CGLGetPixelFormat(currCtx);
350     GLint val, screen;
351     CGLGetVirtualScreen(currCtx, &screen);
352     CGLDescribePixelFormat(pixFmt, screen, kCGLPFAOpenGLProfile, &val);
353     if(val != kCGLOGLPVersion_3_2_Core)
354     {
355         log_error( "OpenGL context was not created with OpenGL version >= 3.0 profile even though platform supports it"
356                   "OpenGL profile %f does not support fence/sync! Skipping test.\n", glCoreVersion );
357         return -1;
358     }
359 #else
360     // Need platform specific way to query if current GL context was created with 3.x profile
361     log_error( "ERROR: not implemented\n\n" );
362     return -1;
363 #endif
364 
365     InitSyncFns();
366 #endif
367 
368 #ifdef __APPLE__
369     CGLSetVirtualScreen(CGLGetCurrentContext(), rend_vs);
370 #else
371     // Need platform specific way to set device with id rend_vs the current
372     // rendering target
373     log_error( "ERROR: not implemented\n\n" );
374     return -1;
375 #endif
376 
377     GLint posLoc, colLoc;
378     GLuint shaderprogram = createShaderProgram(&posLoc, &colLoc);
379     if(!shaderprogram)
380     {
381         log_error("Failed to create shader program\n");
382         return -1;
383     }
384 
385     float l = 0.0f; float r = framebufferSize;
386     float b = 0.0f; float t = framebufferSize;
387 
388     float projMatrix[16] = { 2.0f/(r-l), 0.0f, 0.0f, 0.0f,
389         0.0f, 2.0f/(t-b), 0.0f, 0.0f,
390         0.0f, 0.0f, -1.0f, 0.0f,
391         -(r+l)/(r-l), -(t+b)/(t-b), 0.0f, 1.0f
392     };
393 
394     glUseProgram(shaderprogram);
395     GLuint projMatLoc = glGetUniformLocation(shaderprogram, "projMatrix");
396     glUniformMatrix4fv(projMatLoc, 1, 0, projMatrix);
397     glUseProgram(0);
398 
399     // Note: the framebuffer is just the target to verify our results against, so we don't
400     // really care to go through all the possible formats in this case
401     glFramebufferWrapper glFramebuffer;
402     glRenderbufferWrapper glRenderbuffer;
403     error = CreateGLRenderbufferRaw( framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT,
404                                     GL_RGBA, GL_UNSIGNED_BYTE,
405                                     &glFramebuffer, &glRenderbuffer );
406     if( error != 0 )
407         return error;
408 
409 //    GLuint vao;
410 //    glGenVertexArrays(1, &vao);
411 //    glBindVertexArray(vao);
412 
413     glBufferWrapper vtxBuffer, colorBuffer;
414     glGenBuffers( 1, &vtxBuffer );
415     glGenBuffers( 1, &colorBuffer );
416 
417     const int numHorizVertices = ( framebufferSize * 64 ) + 1;
418 
419     glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer );
420     glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW );
421 
422     glBindBuffer( GL_ARRAY_BUFFER, colorBuffer );
423     glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW );
424 
425     clProgramWrapper program;
426     clKernelWrapper kernel;
427     clMemWrapper streams[ 2 ];
428 
429     if( create_single_kernel_helper( context, &program, &kernel, 1, updateBuffersKernel, "update" ) )
430         return -1;
431 
432 
433     streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, vtxBuffer, &error );
434     test_error( error, "Unable to create CL buffer from GL vertex buffer" );
435 
436     streams[ 1 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, colorBuffer, &error );
437     test_error( error, "Unable to create CL buffer from GL color buffer" );
438 
439     error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
440     test_error( error, "Unable to set kernel arguments" );
441 
442     error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
443     test_error( error, "Unable to set kernel arguments" );
444 
445     cl_int horizWrap = (cl_int)framebufferSize;
446     error = clSetKernelArg( kernel, 2, sizeof( horizWrap ), &horizWrap );
447     test_error( error, "Unable to set kernel arguments" );
448 
449     glViewport( 0, 0, framebufferSize, framebufferSize );
450     glClearColor( 0, 0, 0, 0 );
451     glClear( GL_COLOR_BUFFER_BIT );
452     glClear( GL_DEPTH_BUFFER_BIT );
453     glDisable( GL_DEPTH_TEST );
454     glEnable( GL_BLEND );
455     glBlendFunc( GL_ONE, GL_ONE );
456 
457     clEventWrapper fenceEvent;
458     GLsync glFence = 0;
459 
460     // Do a loop through 8 different horizontal stripes against the framebuffer
461     RunThread thread( kernel, queue, streams[ 0 ], streams[ 1 ], (size_t)numHorizVertices );
462 
463     for( int i = 0; i < 8; i++ )
464     {
465         // if current rendering device is not the compute device and
466         // separateThreads == false which means compute is going on same
467         // thread and we are using implicit synchronization (no GLSync obj used)
468         // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we need
469         // to wait for rendering to finish on other device before CL can start
470         // writing to CL/GL shared mem objects. When separateThreads is true i.e.
471         // we are using GLSync obj to synchronize then we dont need to call glFinish
472         // here since CL should wait for rendering on other device before this
473         // GLSync object to finish before it starts writing to shared mem object.
474         // Also rend_device == compute_device no need to call glFinish
475         if(rend_device != device && !separateThreads)
476             glFinish();
477 
478         if( separateThreads )
479         {
480             thread.SetRunData( (cl_int)i, fenceEvent );
481             thread.Start();
482 
483             error = (cl_int)(size_t)thread.Join();
484         }
485         else
486         {
487             error = run_cl_kernel( kernel, queue, streams[ 0 ], streams[ 1 ], (cl_int)i, fenceEvent, (size_t)numHorizVertices );
488         }
489         test_error( error, "Unable to run CL kernel" );
490 
491         glUseProgram(shaderprogram);
492         glEnableVertexAttribArray(posLoc);
493         glEnableVertexAttribArray(colLoc);
494         glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer );
495         glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0);
496         glBindBuffer( GL_ARRAY_BUFFER, colorBuffer );
497         glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0);
498         glBindBuffer( GL_ARRAY_BUFFER, 0 );
499 
500         glDrawArrays( GL_TRIANGLE_STRIP, 0, numHorizVertices * 2 );
501 
502         glDisableVertexAttribArray(posLoc);
503         glDisableVertexAttribArray(colLoc);
504         glUseProgram(0);
505 
506         if( separateThreads )
507         {
508             // If we're on the same thread, then we're testing implicit syncing, so we
509             // don't need the actual fence code
510             if( fenceEvent != NULL )
511             {
512                 clReleaseEvent( fenceEvent );
513                 glDeleteSyncFunc( glFence );
514             }
515 
516             glFence = glFenceSyncFunc( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 );
517             fenceEvent = clCreateEventFromGLsyncKHR_ptr( context, glFence, &error );
518             test_error( error, "Unable to create CL event from GL fence" );
519 
520             // in case of explicit synchronization, we just wait for the sync object to complete
521             // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility
522             // to flush on the context on which glSync is created
523             glFlush();
524         }
525     }
526 
527     if( glFence != 0 )
528         // Don't need the final release for fenceEvent, because the wrapper will take care of that
529         glDeleteSyncFunc( glFence );
530 
531 #ifdef __APPLE__
532     CGLSetVirtualScreen(CGLGetCurrentContext(), read_vs);
533 #else
534     // Need platform specific code to set the current rendering device (OpenGL target)
535     // to device with id read_vs so that next glReadPixels get submitted to that device
536     log_error( "ERROR: not implemented\n\n" );
537     return -1;
538 #endif
539     // Grab the contents of the final framebuffer
540     BufferOwningPtr<char> resultData( ReadGLRenderbuffer( glFramebuffer, glRenderbuffer,
541                                                          GL_COLOR_ATTACHMENT0_EXT,
542                                                          GL_RGBA8_OES, GL_UNSIGNED_BYTE, GL_RGBA, GL_UNSIGNED_BYTE, kUChar,
543                                                          framebufferSize, 128 ) );
544 
545     // Check the contents now. We should end up with solid color bands 32 pixels high and the
546     // full width of the framebuffer, at values (128,128,128) due to the additive blending
547     for( int i = 0; i < 8; i++ )
548     {
549         for( int y = 0; y < 4; y++ )
550         {
551             // Note: coverage will be double because the 63-0 triangle overwrites again at the end of the pass
552             cl_uchar valA = ( ( ( i + 1 ) & 1 )      ) * numHorizVertices * 2 / framebufferSize;
553             cl_uchar valB = ( ( ( i + 1 ) & 2 ) >> 1 ) * numHorizVertices * 2 / framebufferSize;
554             cl_uchar valC = ( ( ( i + 1 ) & 4 ) >> 2 ) * numHorizVertices * 2 / framebufferSize;
555 
556             cl_uchar *row = (cl_uchar *)&resultData[ ( i * 16 + y ) * framebufferSize * 4 ];
557             for( int x = 0; x < ( framebufferSize - 1 ) - 1; x++ )
558             {
559                 if( ( row[ x * 4 ] != valA ) || ( row[ x * 4 + 1 ] != valB ) ||
560                    ( row[ x * 4 + 2 ] != valC ) )
561                 {
562                     log_error( "ERROR: Output framebuffer did not validate!\n" );
563                     DumpGLBuffer( GL_UNSIGNED_BYTE, framebufferSize, 128, resultData );
564                     log_error( "RUNS:\n" );
565                     uint32_t *p = (uint32_t *)(char *)resultData;
566                     size_t a = 0;
567                     for( size_t t = 1; t < framebufferSize * framebufferSize; t++ )
568                     {
569                         if( p[ a ] != 0 )
570                         {
571                             if( p[ t ] == 0 )
572                             {
573                                 log_error(
574                                     "RUN: %zu to %zu (%d,%d to %d,%d) 0x%08x\n",
575                                     a, t - 1, (int)(a % framebufferSize),
576                                     (int)(a / framebufferSize),
577                                     (int)((t - 1) % framebufferSize),
578                                     (int)((t - 1) / framebufferSize), p[a]);
579                                 a = t;
580                             }
581                         }
582                         else
583                         {
584                             if( p[ t ] != 0 )
585                             {
586                                 a = t;
587                             }
588                         }
589 
590                     }
591                     return -1;
592                 }
593             }
594         }
595     }
596 
597     glDeleteBuffers( 1, &vtxBuffer );
598     glDeleteBuffers( 1, &colorBuffer );
599     destroyShaderProgram(shaderprogram);
600 //    glDeleteVertexArrays(1, &vao);
601     return 0;
602 }
603 
test_fence_sync(cl_device_id device,cl_context context,cl_command_queue queue,int numElements)604 int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
605 {
606     GLint vs_count = 0;
607     cl_device_id *device_list = NULL;
608 
609     if( !is_extension_available( device, "cl_khr_gl_event" ) )
610     {
611         log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" );
612         return 0;
613     }
614 #ifdef __APPLE__
615     CGLContextObj ctx = CGLGetCurrentContext();
616     CGLPixelFormatObj pix = CGLGetPixelFormat(ctx);
617     CGLError err = CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count);
618 
619     device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*vs_count);
620     clGetGLContextInfoAPPLE(context, ctx, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, sizeof(cl_device_id)*vs_count, device_list, NULL);
621 #else
622     // Need platform specific way of getting devices from CL context to which OpenGL can render
623     // If not available it can be replaced with clGetContextInfo with CL_CONTEXT_DEVICES
624     log_error( "ERROR: not implemented\n\n" );
625     return -1;
626 #endif
627 
628     GLint rend_vs, read_vs;
629     int error = 0;
630     int any_failed = 0;
631 
632     // Loop through all the devices capable to OpenGL rendering
633     // and set them as current rendering target
634     for(rend_vs = 0; rend_vs < vs_count; rend_vs++)
635     {
636         // Loop through all the devices and set them as current
637         // compute target
638         for(read_vs = 0; read_vs < vs_count; read_vs++)
639         {
640             cl_device_id rend_device = device_list[rend_vs], read_device = device_list[read_vs];
641             char rend_name[200], read_name[200];
642 
643             clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name), rend_name, NULL);
644             clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name), read_name, NULL);
645 
646             log_info("Rendering on: %s, read back on: %s\n", rend_name, read_name);
647             error = test_fence_sync_single( device, context, queue, false, rend_vs, read_vs, rend_device );
648             any_failed |= error;
649             if( error != 0 )
650                 log_error( "ERROR: Implicit syncing with GL sync events failed!\n\n" );
651             else
652                 log_info("Implicit syncing Passed\n");
653 
654             error = test_fence_sync_single( device, context, queue, true, rend_vs, read_vs, rend_device );
655             any_failed |= error;
656             if( error != 0 )
657                 log_error( "ERROR: Explicit syncing with GL sync events failed!\n\n" );
658             else
659                 log_info("Explicit syncing Passed\n");
660         }
661     }
662 
663     free(device_list);
664 
665     return any_failed;
666 }
667