xref: /aosp_15_r20/external/pigweed/pw_bloat/py/pw_bloat/label.py (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1# Copyright 2022 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""
15The label module defines a class to store and manipulate size reports.
16"""
17
18from __future__ import annotations
19
20from collections import defaultdict
21from dataclasses import dataclass
22from functools import lru_cache
23from typing import Iterable, Sequence
24import csv
25
26
27@dataclass
28class Label:
29    """Return type of DataSourceMap generator."""
30
31    name: str
32    size: int
33    capacity: int | None = None
34    exists_both: bool | None = None
35    parents: tuple[str, ...] = ()
36
37    def is_new(self) -> bool:
38        return (not self.exists_both) and self.size > 0
39
40    def is_del(self) -> bool:
41        return (not self.exists_both) and self.size < 0
42
43
44@dataclass
45class LabelInfo:
46    size: int = 0
47    capacity: int | None = None
48    exists_both: bool | None = None
49
50
51class _LabelMap:
52    """Private module to hold parent and child labels with their size."""
53
54    _label_map: dict[str, dict[str, LabelInfo]]
55
56    def __init__(self):
57        self._label_map = defaultdict(lambda: defaultdict(LabelInfo))
58
59    def remove(self, parent_label: str, child_label: str | None = None) -> None:
60        """Delete entire parent label or the child label."""
61        if child_label:
62            del self._label_map[parent_label][child_label]
63        else:
64            del self._label_map[parent_label]
65
66    def __getitem__(self, parent_label: str) -> dict[str, LabelInfo]:
67        """Indexing LabelMap using '[]' operators by specifying a label."""
68        return self._label_map[parent_label]
69
70    def __contains__(self, parent_label: str) -> bool:
71        return parent_label in self._label_map
72
73    def map_items(self) -> Iterable[tuple[str, dict[str, LabelInfo]]]:
74        return self._label_map.items()
75
76
77class _DataSource:
78    """Private module to store a data source name with a _LabelMap."""
79
80    def __init__(self, name: str):
81        self._name = name
82        self._ds_label_map = _LabelMap()
83
84    def get_name(self) -> str:
85        return self._name
86
87    def add_label(
88        self,
89        parent_label: str,
90        child_label: str,
91        size: int,
92        diff_exist: bool | None = None,
93    ) -> None:
94        curr_label_info = self._ds_label_map[parent_label][child_label]
95        curr_label_info.size += size
96        if curr_label_info.exists_both is None:
97            curr_label_info.exists_both = diff_exist
98
99    def __getitem__(self, parent_label: str) -> dict[str, LabelInfo]:
100        return self._ds_label_map[parent_label]
101
102    def __contains__(self, parent_label: str) -> bool:
103        return parent_label in self._ds_label_map
104
105    def label_map_items(self) -> Iterable[tuple[str, dict[str, LabelInfo]]]:
106        return self._ds_label_map.map_items()
107
108
109class DataSourceMap:
110    """Module to store an array of DataSources and capacities.
111
112    An organized way to store a hierarchy of labels and their sizes.
113    Includes a capacity array to hold regex patterns for applying
114    capacities to matching label names.
115
116    """
117
118    _BASE_TOTAL_LABEL = 'total'
119
120    @classmethod
121    def from_bloaty_tsv(cls, raw_tsv: Iterable[str]) -> DataSourceMap:
122        """Read in Bloaty TSV output and store in DataSourceMap."""
123        reader = csv.reader(raw_tsv, delimiter='\t')
124        top_row = next(reader)
125        vmsize_index = top_row.index('vmsize')
126        ds_map_tsv = cls(top_row[:vmsize_index])
127        for row in reader:
128            ds_map_tsv.insert_label_hierarchy(
129                row[:vmsize_index], int(row[vmsize_index])
130            )
131        return ds_map_tsv
132
133    def __init__(self, data_sources_names: Iterable[str]):
134        self._data_sources = list(
135            _DataSource(name) for name in ['base', *data_sources_names]
136        )
137        self._capacity_array: list[tuple[str, int]] = []
138
139    def label_exists(
140        self, ds_index: int, parent_label: str, child_label: str
141    ) -> bool:
142        return (parent_label in self._data_sources[ds_index]) and (
143            child_label in self._data_sources[ds_index][parent_label]
144        )
145
146    def insert_label_hierarchy(
147        self,
148        label_hierarchy: Iterable[str],
149        size: int,
150        diff_exist: bool | None = None,
151    ) -> None:
152        """Insert a hierarchy of labels with its size."""
153
154        # Insert initial '__base__' data source that holds the
155        # running total size.
156        self._data_sources[0].add_label(
157            '__base__', self._BASE_TOTAL_LABEL, size
158        )
159        complete_label_hierarchy = [self._BASE_TOTAL_LABEL, *label_hierarchy]
160        for index in range(len(complete_label_hierarchy) - 1):
161            if complete_label_hierarchy[index]:
162                self._data_sources[index + 1].add_label(
163                    complete_label_hierarchy[index],
164                    complete_label_hierarchy[index + 1],
165                    size,
166                    diff_exist,
167                )
168
169    def add_capacity(self, regex_pattern: str, capacity: int) -> None:
170        """Insert regex pattern and capacity into dictionary."""
171        self._capacity_array.append((regex_pattern, capacity))
172
173    def diff(self, base: DataSourceMap) -> DiffDataSourceMap:
174        """Calculate the difference between 2 DataSourceMaps."""
175        diff_dsm = DiffDataSourceMap(self.get_ds_names())
176        curr_parent = self._BASE_TOTAL_LABEL
177
178        # Iterate through base labels at each datasource index.
179        last_data_source = len(base.get_ds_names()) - 1
180        parent_data_source_index = last_data_source + 1
181        for b_label in base.labels(last_data_source):
182            if last_data_source > 0:
183                curr_parent = b_label.parents[-1]
184            lb_hierarchy_names = [*b_label.parents, b_label.name]
185
186            # Check if label exists in target binary DataSourceMap.
187            # Subtract base from target size and insert diff size
188            # into DiffDataSourceMap.
189            if self.label_exists(
190                parent_data_source_index, curr_parent, b_label.name
191            ):
192                diff_size = (
193                    self._data_sources[parent_data_source_index][curr_parent][
194                        b_label.name
195                    ].size
196                ) - b_label.size
197
198                if diff_size:
199                    diff_dsm.insert_label_hierarchy(
200                        lb_hierarchy_names, diff_size, True
201                    )
202                else:
203                    diff_dsm.insert_label_hierarchy(lb_hierarchy_names, 0, True)
204
205            # label is not present in target - insert with negative size
206            else:
207                diff_dsm.insert_label_hierarchy(
208                    lb_hierarchy_names, -1 * b_label.size, False
209                )
210
211        # Iterate through all of target labels
212        # to find labels new to target from base.
213        for t_label in self.labels(last_data_source):
214            if last_data_source > 0:
215                curr_parent = t_label.parents[-1]
216
217            # New addition to target
218            if not base.label_exists(
219                parent_data_source_index, curr_parent, t_label.name
220            ):
221                diff_dsm.insert_label_hierarchy(
222                    [*t_label.parents, f"{t_label.name}"], t_label.size, False
223                )
224
225        return diff_dsm
226
227    def get_total_size(self) -> int:
228        return self._data_sources[0]['__base__'][self._BASE_TOTAL_LABEL].size
229
230    def get_ds_names(self) -> tuple[str, ...]:
231        """List of DataSource names for easy indexing and reference."""
232        return tuple(
233            data_source.get_name() for data_source in self._data_sources[1:]
234        )
235
236    @lru_cache
237    def labels(self, ds_index: int | None = None) -> Iterable[Label]:
238        """Generator that yields a Label depending on specified data source.
239
240        Args:
241            ds_index: Integer index of target data source.
242
243        Returns:
244            Iterable Label objects.
245        """
246        ds_index = len(self._data_sources) if ds_index is None else ds_index + 2
247        return self._per_data_source_labels(
248            tuple(), self._data_sources[1:ds_index]
249        )
250
251    def _per_data_source_labels(
252        self,
253        parent_labels: tuple[str, ...],
254        data_sources: Sequence[_DataSource],
255    ) -> Iterable[Label]:
256        """Recursive generator to return Label based off parent labels."""
257        if parent_labels:
258            current_parent = parent_labels[-1]
259        else:
260            current_parent = self._BASE_TOTAL_LABEL
261        labels = []
262        for ds_index, curr_ds in enumerate(data_sources):
263            if current_parent not in curr_ds:
264                continue
265            label_map = curr_ds[current_parent]
266            for child_label, label_info in label_map.items():
267                if len(data_sources) == 1:
268                    labels.append(
269                        Label(
270                            child_label,
271                            label_info.size,
272                            parents=parent_labels,
273                            exists_both=label_info.exists_both,
274                        )
275                    )
276                else:
277                    labels.extend(
278                        self._per_data_source_labels(
279                            (*parent_labels, child_label),
280                            data_sources[ds_index + 1 :],
281                        )
282                    )
283        return labels
284
285
286class DiffDataSourceMap(DataSourceMap):
287    """DataSourceMap that holds diff information."""
288
289    def has_diff_sublabels(self, top_ds_label: str) -> bool:
290        """Checks if first datasource is identical."""
291        for label in self.labels():
292            if label.size != 0:
293                if (label.parents and (label.parents[0] == top_ds_label)) or (
294                    label.name == top_ds_label
295                ):
296                    return True
297        return False
298