1import math 2from pathlib import Path 3 4from scipy import sparse 5 6import torch 7 8 9def to_coo_scipy(x): 10 indices_1 = x._indices().numpy() 11 values_1 = x._values().numpy() 12 return sparse.coo_matrix((values_1, (indices_1[0], indices_1[1])), shape=x.shape) 13 14 15def sparse_grad_output(a, b): 16 c = torch.sparse.mm(a, b) 17 if c.is_sparse: 18 c2 = torch.rand_like(c.to_dense()) 19 return c2.sparse_mask(c.coalesce()) 20 else: 21 return torch.rand_like(c) 22 23 24def read_matrix_params(path): 25 with open(path) as file: 26 line = file.readline() 27 nrows, ncols, nnz = (int(el) for el in line.split(", ")) 28 return (nrows, ncols), nnz 29 30 31def csr_to_coo(indices, indptr, shape): 32 n_rows, n_cols = shape 33 cols = indices 34 rows = [0] * len(cols) 35 for i in range(n_rows): 36 for j in range(indptr[i], indptr[i + 1]): 37 rows[j] = i 38 return torch.tensor([rows, cols], dtype=torch.long) 39 40 41def load_sparse_matrix(path, device): 42 with open(path) as file: 43 nrows, ncols, nnz = (int(el) for el in file.readline().split(", ")) 44 index_pointers = (int(el) for el in file.readline().split()) 45 indices = (int(el) for el in file.readline().split()) 46 47 index_pointers = list(index_pointers) 48 indices = list(indices) 49 data = torch.randn(nnz, dtype=torch.double) 50 shape = (nrows, ncols) 51 return torch.sparse_coo_tensor( 52 csr_to_coo(indices, index_pointers, shape), data, shape, device=device 53 ) 54 55 56def gen_vector(path, device): 57 with open(path) as file: 58 nrows, ncols, nnz = (int(el) for el in file.readline().split(", ")) 59 index_pointers = (int(el) for el in file.readline().split()) 60 indices = (int(el) for el in file.readline().split()) 61 return torch.randn(nrows, dtype=torch.double, device=device) 62 63 64def gen_matrix(path, device): 65 with open(path) as file: 66 nrows, ncols, nnz = (int(el) for el in file.readline().split(", ")) 67 index_pointers = (int(el) for el in file.readline().split()) 68 indices = (int(el) for el in file.readline().split()) 69 return torch.randn(nrows, ncols, dtype=torch.double, device=device) 70 71 72def load_spmv_dataset(dataset_path, hidden_size, sparsity, device, n_limit=math.inf): 73 """load_spmv_dataset loads a DLMC dataset for a sparse matrix-vector multiplication (SPMV) performance test. 74 Args: 75 dataset_path: 76 path of the dataset from DLMC collection. 77 hidden_size 78 This value allows tensors of varying sizes. 79 sparsity: 80 This value allows tensors of varying sparsities. 81 device: 82 Whether to place the Tensor on a GPU or CPU. 83 n_limit: 84 This value allows a dataset with some limit size. 85 """ 86 current_folder_path = f"{dataset_path}/{sparsity}" 87 path = Path(current_folder_path) 88 files = path.glob("**/*.smtx") 89 print(dataset_path, hidden_size, sparsity) 90 index = 0 91 x_files, y_files = [], [] 92 for f in files: 93 if index >= n_limit: 94 break 95 print(".", end="") 96 size, nnz = read_matrix_params(f.as_posix()) 97 if size[1] == hidden_size: 98 x_files.append(f.as_posix()) 99 if size[0] == hidden_size: 100 y_files.append(f.as_posix()) 101 index += 1 102 print() 103 104 for fx, fy in zip(x_files, y_files): 105 x = load_sparse_matrix(fx, device) 106 y = gen_vector(fy, device) 107 yield (x, y) 108 109 110def load_spmm_dataset( 111 dataset_path, hidden_size, sparsity, spmm_type, device, n_limit=math.inf 112): 113 """load_spmm_dataset loads a DLMC dataset for a sparse matrix-matrix multiplication (SPMM) performance test. 114 Args: 115 dataset_path: 116 path of the dataset from DLMC collection. 117 hidden_size 118 This value allows tensors of varying sizes. 119 sparsity: 120 This value allows tensors of varying sparsities. 121 spmm_type: 122 This value allows tensors for `sparse@sparse` or `sparse@dense` operations. 123 device: 124 Whether to place the Tensor on a GPU or CPU. 125 n_limit: 126 This value allows a dataset with some limit size. 127 """ 128 current_folder_path = f"{dataset_path}/{sparsity}" 129 path = Path(current_folder_path) 130 files = path.glob("**/*.smtx") 131 print(dataset_path, hidden_size, sparsity) 132 index = 0 133 x_files, y_files = [], [] 134 for f in files: 135 if index >= n_limit: 136 break 137 print(".", end="") 138 size, nnz = read_matrix_params(f.as_posix()) 139 if size[1] == hidden_size: 140 x_files.append(f.as_posix()) 141 if size[0] == hidden_size: 142 y_files.append(f.as_posix()) 143 index += 1 144 print() 145 146 for fx, fy in zip(x_files, y_files): 147 x = load_sparse_matrix(fx, device) 148 y = ( 149 gen_matrix(fy, device) 150 if spmm_type == "sparse@dense" 151 else load_sparse_matrix(fy, device) 152 ) 153 yield (x, y) 154 155 156def load_dlmc_dataset( 157 dataset_path, 158 operation, 159 hidden_size, 160 sparsity, 161 device, 162 requires_grad, 163 n_limit=math.inf, 164): 165 """load_dlmc_dataset loads a DLMC dataset for a matmul performance test. 166 Args: 167 dataset_path: 168 path of the dataset from DLMC collection. 169 operation: 170 This value allows tensors for `sparse@sparse`|`sparse@dense`|`sparse@vector` operations. 171 hidden_size 172 This value allows tensors of varying sizes. 173 sparsity: 174 This value allows tensors of varying sparsities. 175 device: 176 Whether to place the Tensor on a GPU or CPU. 177 requires_grad: 178 Loads the dataset for backward test. 179 n_limit: 180 This value allows a dataset with some limit size. 181 """ 182 if operation == "sparse@sparse" or operation == "sparse@dense": 183 collection = load_spmm_dataset( 184 dataset_path, hidden_size, sparsity, operation, device, n_limit 185 ) 186 elif operation == "sparse@vector": 187 collection = load_spmv_dataset( 188 dataset_path, hidden_size, sparsity, device, n_limit 189 ) 190 scipy_vars = {} 191 backward_vars = {} 192 for x, y in collection: 193 if device == "cpu": 194 scipy_vars = { 195 "sx": to_coo_scipy(x) if x.is_sparse else x.numpy(), 196 "sy": to_coo_scipy(y) if y.is_sparse else y.numpy(), 197 } 198 if not requires_grad: 199 dx = x.to_dense() if x.is_sparse else x 200 dy = y.to_dense() if y.is_sparse else y 201 else: 202 c = sparse_grad_output(x, y) 203 backward_vars = { 204 "sparse_grad_output": c, 205 "grad_output": c.to_dense() if c.is_sparse else c, 206 } 207 x.requires_grad_(True) 208 y.requires_grad_(True) 209 dx = x.to_dense().detach() if x.is_sparse else x.clone().detach() 210 dy = y.to_dense().detach() if y.is_sparse else y.clone().detach() 211 dx.requires_grad_(True) 212 dy.requires_grad_(True) 213 yield {"x": x, "y": y, "dx": dx, "dy": dy, **scipy_vars, **backward_vars} 214