1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "subhelpers.h"
18 #include "harness/conversions.h"
19 #include "harness/typeWrappers.h"
20
21 static const char *lbar_source =
22 "__kernel void test_lbar(const __global Type *in, __global int2 *xy, "
23 "__global Type *out)\n"
24 "{\n"
25 " __local int tmp[200];\n"
26 " int gid = get_global_id(0);\n"
27 " int nid = get_sub_group_size();\n"
28 " int lid = get_sub_group_local_id();\n"
29 " xy[gid].x = lid;\n"
30 " xy[gid].y = get_sub_group_id();\n"
31 " if (get_sub_group_id() == 0) {\n"
32 " tmp[lid] = in[gid];\n"
33 " sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
34 " out[gid] = tmp[nid-1-lid];\n"
35 " } else {\n"
36 " out[gid] = -in[gid];\n"
37 " }\n"
38 "}\n";
39
40 static const char *gbar_source =
41 "__kernel void test_gbar(const __global Type *in, __global int2 *xy, "
42 "__global Type *out, __global Type *tmp)\n"
43 "{\n"
44 " int gid = get_global_id(0);\n"
45 " int nid = get_sub_group_size();\n"
46 " int lid = get_sub_group_local_id();\n"
47 " int tof = get_group_id(0)*get_max_sub_group_size();\n"
48 " xy[gid].x = lid;\n"
49 " xy[gid].y = get_sub_group_id();\n"
50 " if (get_sub_group_id() == 0) {\n"
51 " tmp[tof+lid] = in[gid];\n"
52 " sub_group_barrier(CLK_GLOBAL_MEM_FENCE);\n"
53 " out[gid] = tmp[tof+nid-1-lid];\n"
54 " } else {\n"
55 " out[gid] = -in[gid];\n"
56 " }\n"
57 "}\n";
58
59 // barrier test functions
60 template <int Which> struct BAR
61 {
log_testBAR62 static void log_test(const WorkGroupParams &test_params,
63 const char *extra_text)
64 {
65 if (Which == 0)
66 log_info(" sub_group_barrier(CLK_LOCAL_MEM_FENCE)...%s\n",
67 extra_text);
68 else
69 log_info(" sub_group_barrier(CLK_GLOBAL_MEM_FENCE)...%s\n",
70 extra_text);
71 }
72
genBAR73 static void gen(cl_int *x, cl_int *t, cl_int *m,
74 const WorkGroupParams &test_params)
75 {
76 int i, ii, j, k, n;
77 int nw = test_params.local_workgroup_size;
78 int ns = test_params.subgroup_size;
79 int ng = test_params.global_workgroup_size;
80 int nj = (nw + ns - 1) / ns;
81 ng = ng / nw;
82
83 ii = 0;
84 for (k = 0; k < ng; ++k)
85 {
86 for (j = 0; j < nj; ++j)
87 {
88 ii = j * ns;
89 n = ii + ns > nw ? nw - ii : ns;
90
91 for (i = 0; i < n; ++i) t[ii + i] = genrand_int32(gMTdata);
92 }
93
94 // Now map into work group using map from device
95 for (j = 0; j < nw; ++j)
96 {
97 x[j] = t[j];
98 }
99
100 x += nw;
101 m += 2 * nw;
102 }
103 }
104
chkBAR105 static test_status chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my,
106 cl_int *m, const WorkGroupParams &test_params)
107 {
108 int ii, i, j, k, n;
109 int nw = test_params.local_workgroup_size;
110 int ns = test_params.subgroup_size;
111 int ng = test_params.global_workgroup_size;
112 int nj = (nw + ns - 1) / ns;
113 ng = ng / nw;
114 cl_int tr, rr;
115
116 for (k = 0; k < ng; ++k)
117 {
118 // Map to array indexed to array indexed by local ID and sub group
119 for (j = 0; j < nw; ++j)
120 {
121 mx[j] = x[j];
122 my[j] = y[j];
123 }
124
125 for (j = 0; j < nj; ++j)
126 {
127 ii = j * ns;
128 n = ii + ns > nw ? nw - ii : ns;
129
130 for (i = 0; i < n; ++i)
131 {
132 tr = j == 0 ? mx[ii + n - 1 - i] : -mx[ii + i];
133 rr = my[ii + i];
134
135 if (tr != rr)
136 {
137 log_error("ERROR: sub_group_barrier mismatch for local "
138 "id %d in sub group %d in group %d expected "
139 "%d got %d\n",
140 i, j, k, tr, rr);
141 return TEST_FAIL;
142 }
143 }
144 }
145
146 x += nw;
147 y += nw;
148 m += 2 * nw;
149 }
150
151 return TEST_PASS;
152 }
153 };
154
155 // Entry point from main
test_barrier_functions(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,bool useCoreSubgroups)156 int test_barrier_functions(cl_device_id device, cl_context context,
157 cl_command_queue queue, int num_elements,
158 bool useCoreSubgroups)
159 {
160 int error = TEST_PASS;
161
162 // Adjust these individually below if desired/needed
163 constexpr size_t global_work_size = 2000;
164 constexpr size_t local_work_size = 200;
165 WorkGroupParams test_params(global_work_size, local_work_size);
166 test_params.use_core_subgroups = useCoreSubgroups;
167 error = test<cl_int, BAR<0>>::run(device, context, queue, num_elements,
168 "test_lbar", lbar_source, test_params);
169 error |= test<cl_int, BAR<1>, global_work_size>::run(
170 device, context, queue, num_elements, "test_gbar", gbar_source,
171 test_params);
172
173 return error;
174 }
175
test_barrier_functions_core(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)176 int test_barrier_functions_core(cl_device_id device, cl_context context,
177 cl_command_queue queue, int num_elements)
178 {
179 return test_barrier_functions(device, context, queue, num_elements, true);
180 }
181
test_barrier_functions_ext(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)182 int test_barrier_functions_ext(cl_device_id device, cl_context context,
183 cl_command_queue queue, int num_elements)
184 {
185 bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
186
187 if (!hasExtension)
188 {
189 log_info(
190 "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
191 return TEST_SKIPPED_ITSELF;
192 }
193
194 return test_barrier_functions(device, context, queue, num_elements, false);
195 }
196