xref: /aosp_15_r20/external/pytorch/benchmarks/sparse/dlmc/utils.py (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1import math
2from pathlib import Path
3
4from scipy import sparse
5
6import torch
7
8
9def to_coo_scipy(x):
10    indices_1 = x._indices().numpy()
11    values_1 = x._values().numpy()
12    return sparse.coo_matrix((values_1, (indices_1[0], indices_1[1])), shape=x.shape)
13
14
15def sparse_grad_output(a, b):
16    c = torch.sparse.mm(a, b)
17    if c.is_sparse:
18        c2 = torch.rand_like(c.to_dense())
19        return c2.sparse_mask(c.coalesce())
20    else:
21        return torch.rand_like(c)
22
23
24def read_matrix_params(path):
25    with open(path) as file:
26        line = file.readline()
27        nrows, ncols, nnz = (int(el) for el in line.split(", "))
28        return (nrows, ncols), nnz
29
30
31def csr_to_coo(indices, indptr, shape):
32    n_rows, n_cols = shape
33    cols = indices
34    rows = [0] * len(cols)
35    for i in range(n_rows):
36        for j in range(indptr[i], indptr[i + 1]):
37            rows[j] = i
38    return torch.tensor([rows, cols], dtype=torch.long)
39
40
41def load_sparse_matrix(path, device):
42    with open(path) as file:
43        nrows, ncols, nnz = (int(el) for el in file.readline().split(", "))
44        index_pointers = (int(el) for el in file.readline().split())
45        indices = (int(el) for el in file.readline().split())
46
47    index_pointers = list(index_pointers)
48    indices = list(indices)
49    data = torch.randn(nnz, dtype=torch.double)
50    shape = (nrows, ncols)
51    return torch.sparse_coo_tensor(
52        csr_to_coo(indices, index_pointers, shape), data, shape, device=device
53    )
54
55
56def gen_vector(path, device):
57    with open(path) as file:
58        nrows, ncols, nnz = (int(el) for el in file.readline().split(", "))
59        index_pointers = (int(el) for el in file.readline().split())
60        indices = (int(el) for el in file.readline().split())
61        return torch.randn(nrows, dtype=torch.double, device=device)
62
63
64def gen_matrix(path, device):
65    with open(path) as file:
66        nrows, ncols, nnz = (int(el) for el in file.readline().split(", "))
67        index_pointers = (int(el) for el in file.readline().split())
68        indices = (int(el) for el in file.readline().split())
69        return torch.randn(nrows, ncols, dtype=torch.double, device=device)
70
71
72def load_spmv_dataset(dataset_path, hidden_size, sparsity, device, n_limit=math.inf):
73    """load_spmv_dataset loads a DLMC dataset for a sparse matrix-vector multiplication (SPMV) performance test.
74    Args:
75        dataset_path:
76            path of the dataset from DLMC collection.
77        hidden_size
78            This value allows tensors of varying sizes.
79        sparsity:
80            This value allows tensors of varying sparsities.
81        device:
82            Whether to place the Tensor on a GPU or CPU.
83        n_limit:
84            This value allows a dataset with some limit size.
85    """
86    current_folder_path = f"{dataset_path}/{sparsity}"
87    path = Path(current_folder_path)
88    files = path.glob("**/*.smtx")
89    print(dataset_path, hidden_size, sparsity)
90    index = 0
91    x_files, y_files = [], []
92    for f in files:
93        if index >= n_limit:
94            break
95        print(".", end="")
96        size, nnz = read_matrix_params(f.as_posix())
97        if size[1] == hidden_size:
98            x_files.append(f.as_posix())
99        if size[0] == hidden_size:
100            y_files.append(f.as_posix())
101        index += 1
102    print()
103
104    for fx, fy in zip(x_files, y_files):
105        x = load_sparse_matrix(fx, device)
106        y = gen_vector(fy, device)
107        yield (x, y)
108
109
110def load_spmm_dataset(
111    dataset_path, hidden_size, sparsity, spmm_type, device, n_limit=math.inf
112):
113    """load_spmm_dataset loads a DLMC dataset for a sparse matrix-matrix multiplication (SPMM) performance test.
114    Args:
115        dataset_path:
116            path of the dataset from DLMC collection.
117        hidden_size
118            This value allows tensors of varying sizes.
119        sparsity:
120            This value allows tensors of varying sparsities.
121        spmm_type:
122            This value allows tensors for `sparse@sparse` or `sparse@dense` operations.
123        device:
124            Whether to place the Tensor on a GPU or CPU.
125        n_limit:
126            This value allows a dataset with some limit size.
127    """
128    current_folder_path = f"{dataset_path}/{sparsity}"
129    path = Path(current_folder_path)
130    files = path.glob("**/*.smtx")
131    print(dataset_path, hidden_size, sparsity)
132    index = 0
133    x_files, y_files = [], []
134    for f in files:
135        if index >= n_limit:
136            break
137        print(".", end="")
138        size, nnz = read_matrix_params(f.as_posix())
139        if size[1] == hidden_size:
140            x_files.append(f.as_posix())
141        if size[0] == hidden_size:
142            y_files.append(f.as_posix())
143        index += 1
144    print()
145
146    for fx, fy in zip(x_files, y_files):
147        x = load_sparse_matrix(fx, device)
148        y = (
149            gen_matrix(fy, device)
150            if spmm_type == "sparse@dense"
151            else load_sparse_matrix(fy, device)
152        )
153        yield (x, y)
154
155
156def load_dlmc_dataset(
157    dataset_path,
158    operation,
159    hidden_size,
160    sparsity,
161    device,
162    requires_grad,
163    n_limit=math.inf,
164):
165    """load_dlmc_dataset loads a DLMC dataset for a matmul performance test.
166    Args:
167        dataset_path:
168            path of the dataset from DLMC collection.
169        operation:
170            This value allows tensors for `sparse@sparse`|`sparse@dense`|`sparse@vector` operations.
171        hidden_size
172            This value allows tensors of varying sizes.
173        sparsity:
174            This value allows tensors of varying sparsities.
175        device:
176            Whether to place the Tensor on a GPU or CPU.
177        requires_grad:
178            Loads the dataset for backward test.
179        n_limit:
180            This value allows a dataset with some limit size.
181    """
182    if operation == "sparse@sparse" or operation == "sparse@dense":
183        collection = load_spmm_dataset(
184            dataset_path, hidden_size, sparsity, operation, device, n_limit
185        )
186    elif operation == "sparse@vector":
187        collection = load_spmv_dataset(
188            dataset_path, hidden_size, sparsity, device, n_limit
189        )
190    scipy_vars = {}
191    backward_vars = {}
192    for x, y in collection:
193        if device == "cpu":
194            scipy_vars = {
195                "sx": to_coo_scipy(x) if x.is_sparse else x.numpy(),
196                "sy": to_coo_scipy(y) if y.is_sparse else y.numpy(),
197            }
198        if not requires_grad:
199            dx = x.to_dense() if x.is_sparse else x
200            dy = y.to_dense() if y.is_sparse else y
201        else:
202            c = sparse_grad_output(x, y)
203            backward_vars = {
204                "sparse_grad_output": c,
205                "grad_output": c.to_dense() if c.is_sparse else c,
206            }
207            x.requires_grad_(True)
208            y.requires_grad_(True)
209            dx = x.to_dense().detach() if x.is_sparse else x.clone().detach()
210            dy = y.to_dense().detach() if y.is_sparse else y.clone().detach()
211            dx.requires_grad_(True)
212            dy.requires_grad_(True)
213        yield {"x": x, "y": y, "dx": dx, "dy": dy, **scipy_vars, **backward_vars}
214