1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 #include "harness/rounding_mode.h"
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24
25 #include <algorithm>
26
27 #include "procs.h"
28
29 static const char *enqueued_local_size_2d_code = R"(
30 __kernel void test_enqueued_local_size_2d(global int *dst)
31 {
32 if ((get_global_id(0) == 0) && (get_global_id(1) == 0))
33 {
34 dst[0] = (int)get_enqueued_local_size(0);
35 dst[1] = (int)get_enqueued_local_size(1);
36 }
37 }
38 )";
39
40 static const char *enqueued_local_size_1d_code = R"(
41 __kernel void test_enqueued_local_size_1d(global int *dst)
42 {
43 int tid_x = get_global_id(0);
44 if (get_global_id(0) == 0)
45 {
46 dst[tid_x] = (int)get_enqueued_local_size(0);
47 }
48 }
49 )";
50
51
verify_enqueued_local_size(int * result,size_t * expected,int n)52 static int verify_enqueued_local_size(int *result, size_t *expected, int n)
53 {
54 int i;
55 for (i = 0; i < n; i++)
56 {
57 if (result[i] != (int)expected[i])
58 {
59 log_error("get_enqueued_local_size failed\n");
60 return -1;
61 }
62 }
63 log_info("get_enqueued_local_size passed\n");
64 return 0;
65 }
66
67
test_enqueued_local_size(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)68 int test_enqueued_local_size(cl_device_id device, cl_context context,
69 cl_command_queue queue, int num_elements)
70 {
71 clMemWrapper stream;
72 clProgramWrapper program[2];
73 clKernelWrapper kernel[2];
74
75 cl_int output_ptr[2];
76 size_t globalsize[2];
77 size_t localsize[2];
78 int err;
79
80 // For an OpenCL-3.0 device that does not support non-uniform work-groups
81 // we cannot enqueue local sizes which do not divide the global dimensions
82 // but we can still run the test checking that get_enqueued_local_size ==
83 // get_local_size.
84 bool use_uniform_work_groups{ false };
85 if (get_device_cl_version(device) >= Version(3, 0))
86 {
87 cl_bool areNonUniformWorkGroupsSupported = false;
88 err = clGetDeviceInfo(device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT,
89 sizeof(areNonUniformWorkGroupsSupported),
90 &areNonUniformWorkGroupsSupported, nullptr);
91 test_error_ret(err, "clGetDeviceInfo failed.", TEST_FAIL);
92
93 if (CL_FALSE == areNonUniformWorkGroupsSupported)
94 {
95 log_info("Non-uniform work group sizes are not supported, "
96 "enqueuing with uniform workgroups\n");
97 use_uniform_work_groups = true;
98 }
99 }
100
101 stream = clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * sizeof(cl_int),
102 nullptr, &err);
103 test_error(err, "clCreateBuffer failed.");
104
105 std::string cl_std = "-cl-std=CL";
106 cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0";
107 err = create_single_kernel_helper_with_build_options(
108 context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code,
109 "test_enqueued_local_size_1d", cl_std.c_str());
110 test_error(err, "create_single_kernel_helper failed");
111 err = create_single_kernel_helper_with_build_options(
112 context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code,
113 "test_enqueued_local_size_2d", cl_std.c_str());
114 test_error(err, "create_single_kernel_helper failed");
115
116 err = clSetKernelArg(kernel[0], 0, sizeof stream, &stream);
117 test_error(err, "clSetKernelArgs failed.");
118 err = clSetKernelArg(kernel[1], 0, sizeof stream, &stream);
119 test_error(err, "clSetKernelArgs failed.");
120
121 globalsize[0] = static_cast<size_t>(num_elements);
122 globalsize[1] = static_cast<size_t>(num_elements);
123
124 size_t max_wgs;
125 err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
126 sizeof(max_wgs), &max_wgs, nullptr);
127 test_error(err, "clGetDeviceInfo failed.");
128
129 localsize[0] = std::min<size_t>(16, max_wgs);
130 localsize[1] = std::min<size_t>(11, max_wgs / localsize[0]);
131 // If we need to use uniform workgroups because non-uniform workgroups are
132 // not supported, round up to the next global size that is divisible by the
133 // local size.
134 if (use_uniform_work_groups)
135 {
136 if (globalsize[0] % localsize[0])
137 {
138 globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0]));
139 }
140 if (globalsize[1] % localsize[1])
141 {
142 globalsize[1] += (localsize[1] - (globalsize[1] % localsize[1]));
143 }
144 }
145
146 err = clEnqueueNDRangeKernel(queue, kernel[1], 2, nullptr, globalsize,
147 localsize, 0, nullptr, nullptr);
148 test_error(err, "clEnqueueNDRangeKernel failed.");
149
150 err = clEnqueueReadBuffer(queue, stream, CL_BLOCKING, 0, 2 * sizeof(int),
151 output_ptr, 0, nullptr, nullptr);
152 test_error(err, "clEnqueueReadBuffer failed.");
153
154 err = verify_enqueued_local_size(output_ptr, localsize, 2);
155
156 globalsize[0] = static_cast<size_t>(num_elements);
157 localsize[0] = 9;
158 if (use_uniform_work_groups && (globalsize[0] % localsize[0]))
159 {
160 globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0]));
161 }
162 err = clEnqueueNDRangeKernel(queue, kernel[1], 1, nullptr, globalsize,
163 localsize, 0, nullptr, nullptr);
164 test_error(err, "clEnqueueNDRangeKernel failed.");
165
166 err = clEnqueueReadBuffer(queue, stream, CL_BLOCKING, 0, 2 * sizeof(int),
167 output_ptr, 0, nullptr, nullptr);
168 test_error(err, "clEnqueueReadBuffer failed.");
169
170 err = verify_enqueued_local_size(output_ptr, localsize, 1);
171
172 return err;
173 }
174