xref: /aosp_15_r20/external/skia/resources/sksl/compute/Workgroup.compute (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1// Implementation of the parallel prefix sum algorithm
2
3layout(local_size_x = 256) in;
4const int SIZE = 256;
5
6layout(set=0, binding=0) readonly buffer inputs {
7    float[] in_data;
8};
9layout(set=0, binding=1) writeonly buffer outputs {
10    float[] out_data;
11};
12
13workgroup float[SIZE * 2] shared_data;
14
15// Test that workgroup-shared variables are passed to user-defined functions
16// correctly.
17noinline void store(uint i, float value) {
18    shared_data[i] = value;
19}
20
21void main() {
22    uint id = sk_GlobalInvocationID.x;
23    uint rd_id;
24    uint wr_id;
25    uint mask;
26
27    // Each thread is responsible for two elements of the output array
28    shared_data[id * 2] = in_data[id * 2];
29    shared_data[id * 2 + 1] = in_data[id * 2 + 1];
30
31    workgroupBarrier();
32
33    const uint steps = uint(log2(float(SIZE))) + 1;
34    for (uint step = 0; step < steps; step++) {
35        // Calculate the read and write index in the shared array
36        mask = (1 << step) - 1;
37        rd_id = ((id >> step) << (step + 1)) + mask;
38        wr_id = rd_id + 1 + (id & mask);
39
40        // Accumulate the read data into our element
41        store(wr_id, shared_data[wr_id] + shared_data[rd_id]);
42
43        workgroupBarrier();
44    }
45
46    // Write the final result out
47    out_data[id * 2] = shared_data[id * 2];
48    out_data[id * 2 + 1] = shared_data[id * 2 + 1];
49}
50