1#version 450 core 2#extension GL_KHR_memory_scope_semantics : enable 3#extension GL_KHR_cooperative_matrix : enable 4#extension GL_EXT_shader_explicit_arithmetic_types : enable 5#extension GL_EXT_buffer_reference : enable 6 7layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 8 9const int X = 8; 10layout(constant_id = 0) const int Y = 2; 11const int Z = X*Y; 12 13coopmat<float16_t, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator> mC; 14coopmat<float16_t, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator> mC2[3]; 15 16layout(constant_id = 1) const float F = 3.0; 17 18const coopmat<float, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator> mD = coopmat<float, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator>(0.0); 19const coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> mD2 = coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator>(1); 20 21struct S { int a; int b; int c; }; 22 23const S s = S(12, 23, 34); 24 25layout(set = 0, binding = 0, buffer_reference) coherent buffer Block { 26 float y[1024*1024]; 27 float x[]; 28} block; 29 30layout(set = 0, binding = 0) coherent buffer Block16 { 31 float16_t y[1024*1024]; 32 float16_t x[]; 33 34 Block b; 35} block16; 36 37coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> f16(coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> m) { return -m; } 38coopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> f32(coopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> m) { return -m; } 39 40layout(constant_id = 2) const int SC = 1; 41coopmat<float16_t, gl_ScopeSubgroup, SC, SC, gl_MatrixUseAccumulator> scm[SC][SC]; 42 43// sized for coopmat<float16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseAccumulator> 44shared uvec4 shmatrix[16*16*2/16]; 45 46void main() 47{ 48 coopmat<float, gl_ScopeSubgroup, 16, (2>1?8:4), gl_MatrixUseAccumulator> m = coopmat<float, gl_ScopeSubgroup, 16, (2>1?8:4), gl_MatrixUseAccumulator>(0.0); 49 50 m = m + m; 51 m = m - m; 52 m = -m; 53 m = 2.0*m; 54 m = m*2.0; 55 56 coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> m2 = coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator>(m); 57 58 float x = m[1]; 59 m[0] = x; 60 61 coopMatLoad(m, block.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); 62 coopMatStore(m, block.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); 63 coopMatLoad(m2, block16.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); 64 coopMatStore(m2, block16.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); 65 coopMatLoad(m, block16.b.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); 66 coopMatStore(m, block16.b.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); 67 68 coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseA> A; 69 coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseB> B; 70 coopmat<float, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> C; 71 coopmat<float, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> D; 72 D = coopMatMulAdd(A, B, C); 73 74 int l = D.length(); 75 76 coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> E; 77 78 coopmat<float16_t, gl_ScopeSubgroup, Z, Z, gl_MatrixUseAccumulator> F = coopmat<float16_t, gl_ScopeSubgroup, Z, Z, gl_MatrixUseAccumulator>(0.0); 79 80 coopmat<float, gl_ScopeSubgroup, 16, (2>1?8:4), gl_MatrixUseAccumulator> a[5]; 81 a[3][0] = 1.0; 82 83 float md1 = mD[1]; 84 85 md1 += (m += m)[1234]; 86 87 mC2[1] = mC2[2]; 88 89 coopMatLoad(m, block.y, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); 90 coopMatStore(m, block.y, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); 91 coopMatLoad(m2, block16.y, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); 92 coopMatStore(m2, block16.y, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); 93 94 coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> p1; 95 coopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> p2; 96 97 p1 = f16(p1); 98 p2 = f32(p2); 99 100 p1 = coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator>(0.0); 101 p2 = coopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator>(0.0); 102 103 p1 /= p1; 104 105 p1 *= float16_t(2.0); 106 p2 *= 4.0; 107 108 coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> ms; 109 coopMatLoad(ms, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); 110 coopMatStore(ms, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); 111 112 coopmat<int8_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseA> ms8A; 113 coopmat<int8_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseB> ms8B; 114 coopmat<int8_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> ms8C; 115 coopMatMulAdd(ms8A, ms8B, ms8C); 116 coopMatMulAdd(ms8A, ms8B, ms8C, 0); 117 coopMatMulAdd(ms8A, ms8B, ms8C, gl_MatrixOperandsSaturatingAccumulation); 118 119 coopmat<int16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseA> m16; 120 coopMatStore(m16, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); 121} 122