1# Copyright 2022 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14""" 15The label module defines a class to store and manipulate size reports. 16""" 17 18from __future__ import annotations 19 20from collections import defaultdict 21from dataclasses import dataclass 22from functools import lru_cache 23from typing import Iterable, Sequence 24import csv 25 26 27@dataclass 28class Label: 29 """Return type of DataSourceMap generator.""" 30 31 name: str 32 size: int 33 capacity: int | None = None 34 exists_both: bool | None = None 35 parents: tuple[str, ...] = () 36 37 def is_new(self) -> bool: 38 return (not self.exists_both) and self.size > 0 39 40 def is_del(self) -> bool: 41 return (not self.exists_both) and self.size < 0 42 43 44@dataclass 45class LabelInfo: 46 size: int = 0 47 capacity: int | None = None 48 exists_both: bool | None = None 49 50 51class _LabelMap: 52 """Private module to hold parent and child labels with their size.""" 53 54 _label_map: dict[str, dict[str, LabelInfo]] 55 56 def __init__(self): 57 self._label_map = defaultdict(lambda: defaultdict(LabelInfo)) 58 59 def remove(self, parent_label: str, child_label: str | None = None) -> None: 60 """Delete entire parent label or the child label.""" 61 if child_label: 62 del self._label_map[parent_label][child_label] 63 else: 64 del self._label_map[parent_label] 65 66 def __getitem__(self, parent_label: str) -> dict[str, LabelInfo]: 67 """Indexing LabelMap using '[]' operators by specifying a label.""" 68 return self._label_map[parent_label] 69 70 def __contains__(self, parent_label: str) -> bool: 71 return parent_label in self._label_map 72 73 def map_items(self) -> Iterable[tuple[str, dict[str, LabelInfo]]]: 74 return self._label_map.items() 75 76 77class _DataSource: 78 """Private module to store a data source name with a _LabelMap.""" 79 80 def __init__(self, name: str): 81 self._name = name 82 self._ds_label_map = _LabelMap() 83 84 def get_name(self) -> str: 85 return self._name 86 87 def add_label( 88 self, 89 parent_label: str, 90 child_label: str, 91 size: int, 92 diff_exist: bool | None = None, 93 ) -> None: 94 curr_label_info = self._ds_label_map[parent_label][child_label] 95 curr_label_info.size += size 96 if curr_label_info.exists_both is None: 97 curr_label_info.exists_both = diff_exist 98 99 def __getitem__(self, parent_label: str) -> dict[str, LabelInfo]: 100 return self._ds_label_map[parent_label] 101 102 def __contains__(self, parent_label: str) -> bool: 103 return parent_label in self._ds_label_map 104 105 def label_map_items(self) -> Iterable[tuple[str, dict[str, LabelInfo]]]: 106 return self._ds_label_map.map_items() 107 108 109class DataSourceMap: 110 """Module to store an array of DataSources and capacities. 111 112 An organized way to store a hierarchy of labels and their sizes. 113 Includes a capacity array to hold regex patterns for applying 114 capacities to matching label names. 115 116 """ 117 118 _BASE_TOTAL_LABEL = 'total' 119 120 @classmethod 121 def from_bloaty_tsv(cls, raw_tsv: Iterable[str]) -> DataSourceMap: 122 """Read in Bloaty TSV output and store in DataSourceMap.""" 123 reader = csv.reader(raw_tsv, delimiter='\t') 124 top_row = next(reader) 125 vmsize_index = top_row.index('vmsize') 126 ds_map_tsv = cls(top_row[:vmsize_index]) 127 for row in reader: 128 ds_map_tsv.insert_label_hierarchy( 129 row[:vmsize_index], int(row[vmsize_index]) 130 ) 131 return ds_map_tsv 132 133 def __init__(self, data_sources_names: Iterable[str]): 134 self._data_sources = list( 135 _DataSource(name) for name in ['base', *data_sources_names] 136 ) 137 self._capacity_array: list[tuple[str, int]] = [] 138 139 def label_exists( 140 self, ds_index: int, parent_label: str, child_label: str 141 ) -> bool: 142 return (parent_label in self._data_sources[ds_index]) and ( 143 child_label in self._data_sources[ds_index][parent_label] 144 ) 145 146 def insert_label_hierarchy( 147 self, 148 label_hierarchy: Iterable[str], 149 size: int, 150 diff_exist: bool | None = None, 151 ) -> None: 152 """Insert a hierarchy of labels with its size.""" 153 154 # Insert initial '__base__' data source that holds the 155 # running total size. 156 self._data_sources[0].add_label( 157 '__base__', self._BASE_TOTAL_LABEL, size 158 ) 159 complete_label_hierarchy = [self._BASE_TOTAL_LABEL, *label_hierarchy] 160 for index in range(len(complete_label_hierarchy) - 1): 161 if complete_label_hierarchy[index]: 162 self._data_sources[index + 1].add_label( 163 complete_label_hierarchy[index], 164 complete_label_hierarchy[index + 1], 165 size, 166 diff_exist, 167 ) 168 169 def add_capacity(self, regex_pattern: str, capacity: int) -> None: 170 """Insert regex pattern and capacity into dictionary.""" 171 self._capacity_array.append((regex_pattern, capacity)) 172 173 def diff(self, base: DataSourceMap) -> DiffDataSourceMap: 174 """Calculate the difference between 2 DataSourceMaps.""" 175 diff_dsm = DiffDataSourceMap(self.get_ds_names()) 176 curr_parent = self._BASE_TOTAL_LABEL 177 178 # Iterate through base labels at each datasource index. 179 last_data_source = len(base.get_ds_names()) - 1 180 parent_data_source_index = last_data_source + 1 181 for b_label in base.labels(last_data_source): 182 if last_data_source > 0: 183 curr_parent = b_label.parents[-1] 184 lb_hierarchy_names = [*b_label.parents, b_label.name] 185 186 # Check if label exists in target binary DataSourceMap. 187 # Subtract base from target size and insert diff size 188 # into DiffDataSourceMap. 189 if self.label_exists( 190 parent_data_source_index, curr_parent, b_label.name 191 ): 192 diff_size = ( 193 self._data_sources[parent_data_source_index][curr_parent][ 194 b_label.name 195 ].size 196 ) - b_label.size 197 198 if diff_size: 199 diff_dsm.insert_label_hierarchy( 200 lb_hierarchy_names, diff_size, True 201 ) 202 else: 203 diff_dsm.insert_label_hierarchy(lb_hierarchy_names, 0, True) 204 205 # label is not present in target - insert with negative size 206 else: 207 diff_dsm.insert_label_hierarchy( 208 lb_hierarchy_names, -1 * b_label.size, False 209 ) 210 211 # Iterate through all of target labels 212 # to find labels new to target from base. 213 for t_label in self.labels(last_data_source): 214 if last_data_source > 0: 215 curr_parent = t_label.parents[-1] 216 217 # New addition to target 218 if not base.label_exists( 219 parent_data_source_index, curr_parent, t_label.name 220 ): 221 diff_dsm.insert_label_hierarchy( 222 [*t_label.parents, f"{t_label.name}"], t_label.size, False 223 ) 224 225 return diff_dsm 226 227 def get_total_size(self) -> int: 228 return self._data_sources[0]['__base__'][self._BASE_TOTAL_LABEL].size 229 230 def get_ds_names(self) -> tuple[str, ...]: 231 """List of DataSource names for easy indexing and reference.""" 232 return tuple( 233 data_source.get_name() for data_source in self._data_sources[1:] 234 ) 235 236 @lru_cache 237 def labels(self, ds_index: int | None = None) -> Iterable[Label]: 238 """Generator that yields a Label depending on specified data source. 239 240 Args: 241 ds_index: Integer index of target data source. 242 243 Returns: 244 Iterable Label objects. 245 """ 246 ds_index = len(self._data_sources) if ds_index is None else ds_index + 2 247 return self._per_data_source_labels( 248 tuple(), self._data_sources[1:ds_index] 249 ) 250 251 def _per_data_source_labels( 252 self, 253 parent_labels: tuple[str, ...], 254 data_sources: Sequence[_DataSource], 255 ) -> Iterable[Label]: 256 """Recursive generator to return Label based off parent labels.""" 257 if parent_labels: 258 current_parent = parent_labels[-1] 259 else: 260 current_parent = self._BASE_TOTAL_LABEL 261 labels = [] 262 for ds_index, curr_ds in enumerate(data_sources): 263 if current_parent not in curr_ds: 264 continue 265 label_map = curr_ds[current_parent] 266 for child_label, label_info in label_map.items(): 267 if len(data_sources) == 1: 268 labels.append( 269 Label( 270 child_label, 271 label_info.size, 272 parents=parent_labels, 273 exists_both=label_info.exists_both, 274 ) 275 ) 276 else: 277 labels.extend( 278 self._per_data_source_labels( 279 (*parent_labels, child_label), 280 data_sources[ds_index + 1 :], 281 ) 282 ) 283 return labels 284 285 286class DiffDataSourceMap(DataSourceMap): 287 """DataSourceMap that holds diff information.""" 288 289 def has_diff_sublabels(self, top_ds_label: str) -> bool: 290 """Checks if first datasource is identical.""" 291 for label in self.labels(): 292 if label.size != 0: 293 if (label.parents and (label.parents[0] == top_ds_label)) or ( 294 label.name == top_ds_label 295 ): 296 return True 297 return False 298