1/* 2 * Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22#version 450 core 23#pragma use_vulkan_memory_model 24#extension GL_EXT_scalar_block_layout : enable 25#extension GL_KHR_memory_scope_semantics : enable 26#extension GL_KHR_cooperative_matrix : enable 27#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable 28#extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable 29#extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable 30#extension GL_EXT_buffer_reference : enable 31#extension GL_EXT_control_flow_attributes : enable 32 33layout(constant_id = 0) const uint lM = 1; 34layout(constant_id = 1) const uint lN = 1; 35layout(constant_id = 2) const uint lK = 1; 36layout(constant_id = 3) const uint TILE_M = 1; 37layout(constant_id = 4) const uint TILE_N = 1; 38layout(constant_id = 5) const uint TILE_K = 1; 39layout(constant_id = 6) const uint K = 1; 40 41#define A_BITS 16 42#define A_TYPE float16_t 43#define C_BITS 16 44#define C_TYPE float16_t 45 46buffer Output { C_TYPE x[]; } outputO; 47 48shared uvec4 Ash[128]; 49shared uvec4 Bsh[128]; 50 51const uint C_ROWS = 2; 52const uint C_COLS = 2; 53coopmat<C_TYPE, gl_ScopeSubgroup, lM, lN, gl_MatrixUseAccumulator> result[C_ROWS][C_COLS]; 54 55void main() 56{ 57 [[unroll]] for (uint i = 0; i < C_ROWS; ++i) { 58 [[unroll]] for (uint j = 0; j < C_COLS; ++j) { 59 result[i][j] = coopmat<C_TYPE, gl_ScopeSubgroup, lM, lN, gl_MatrixUseAccumulator>(0.0); 60 } 61 } 62 63 for (uint chunkK = 0; chunkK < K; chunkK += TILE_K) { 64 [[unroll]] for (uint k = 0; k < TILE_K / lK; ++k) 65 { 66 coopmat<A_TYPE, gl_ScopeSubgroup, lM, lK, gl_MatrixUseA> matA[C_ROWS]; 67 [[unroll]] for (uint i = 0; i < C_ROWS; ++i) { 68 coopMatLoad(matA[i], Ash, 0, 0, gl_CooperativeMatrixLayoutRowMajor); 69 } 70 71 coopmat<A_TYPE, gl_ScopeSubgroup, lK, lN, gl_MatrixUseB> matB; 72 [[unroll]] for (uint j = 0; j < C_COLS; ++j) { 73 coopMatLoad(matB, Bsh, 0, 0, gl_CooperativeMatrixLayoutRowMajor); 74 75 [[unroll]] for (uint i = 0; i < C_ROWS; ++i) { 76 result[i][j] = coopMatMulAdd(matA[i], matB, result[i][j]); 77 } 78 } 79 } 80 } 81 82 [[unroll]] for (uint i = 0; i < C_ROWS; ++i) { 83 [[unroll]] for (uint j = 0; j < C_COLS; ++j) { 84 coopMatStore(result[i][j], outputO.x, 0, 0, gl_CooperativeMatrixLayoutRowMajor); 85 } 86 } 87} 88