1#include <metal_stdlib> 2#include <simd/simd.h> 3#ifdef __clang__ 4#pragma clang diagnostic ignored "-Wall" 5#endif 6using namespace metal; 7struct GlobalCounts { 8 atomic_uint firstHalfCount; 9 atomic_uint secondHalfCount; 10}; 11struct Inputs { 12 uint3 sk_LocalInvocationID; 13}; 14struct ssbo { 15 GlobalCounts globalCounts; 16}; 17struct Globals { 18 device ssbo* _anonInterface0; 19}; 20struct Threadgroups { 21 array<atomic_uint, 2> localCounts; 22}; 23kernel void computeMain(uint3 sk_LocalInvocationID [[thread_position_in_threadgroup]], device ssbo& _anonInterface0 [[buffer(0)]]) { 24 Globals _globals{&_anonInterface0}; 25 (void)_globals; 26 threadgroup Threadgroups _threadgroups{{}}; 27 (void)_threadgroups; 28 Inputs _in = { sk_LocalInvocationID }; 29 if (_in.sk_LocalInvocationID.x == 0u) { 30 atomic_store_explicit(&_threadgroups.localCounts[0], 0u, memory_order_relaxed); 31 atomic_store_explicit(&_threadgroups.localCounts[1], 0u, memory_order_relaxed); 32 } 33 threadgroup_barrier(mem_flags::mem_threadgroup); 34 uint idx = uint(_in.sk_LocalInvocationID.x < 128u ? 0 : 1); 35 atomic_fetch_add_explicit(&_threadgroups.localCounts[idx], 1u, memory_order_relaxed); 36 threadgroup_barrier(mem_flags::mem_threadgroup); 37 if (_in.sk_LocalInvocationID.x == 0u) { 38 atomic_fetch_add_explicit(&_globals._anonInterface0->globalCounts.firstHalfCount, atomic_load_explicit(&_threadgroups.localCounts[0], memory_order_relaxed), memory_order_relaxed); 39 atomic_fetch_add_explicit(&_globals._anonInterface0->globalCounts.secondHalfCount, atomic_load_explicit(&_threadgroups.localCounts[1], memory_order_relaxed), memory_order_relaxed); 40 } 41 return; 42} 43