1# Copyright 2018 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15import fnmatch 16import locale 17import os 18import pathlib 19import shutil 20import subprocess 21import sys 22import tempfile 23import threading 24import time 25from typing import Dict, Iterable, List 26 27import google.protobuf.json_format 28import watchdog.events 29import watchdog.observers 30 31from synthtool.log import logger 32from synthtool.protos import metadata_pb2 33 34_metadata = metadata_pb2.Metadata() 35 36 37def get_environment_bool(var_name: str) -> bool: 38 val = os.environ.get(var_name) 39 return False if not val or val.lower() == "false" else True 40 41 42_track_obsolete_files = get_environment_bool("SYNTHTOOL_TRACK_OBSOLETE_FILES") 43 44# The list of file patterns excluded during a copy() or move() operation. 45_excluded_patterns: List[str] = [] 46 47 48def reset() -> None: 49 """Clear all metadata so far.""" 50 global _metadata 51 _metadata = metadata_pb2.Metadata() 52 global _excluded_patterns 53 _excluded_patterns = [] 54 55 56def get(): 57 return _metadata 58 59 60def add_git_source(**kwargs) -> None: 61 """Adds a git source to the current metadata.""" 62 _metadata.sources.add(git=metadata_pb2.GitSource(**kwargs)) 63 64 65def add_pattern_excluded_during_copy(glob_pattern: str) -> None: 66 """Adds a file excluded during copy. 67 68 Used to avoid deleting an obsolete file that is excluded.""" 69 _excluded_patterns.append(glob_pattern) 70 71 72def add_generator_source(**kwargs) -> None: 73 """Adds a generator source to the current metadata.""" 74 _metadata.sources.add(generator=metadata_pb2.GeneratorSource(**kwargs)) 75 76 77def add_template_source(**kwargs) -> None: 78 """Adds a template source to the current metadata.""" 79 _metadata.sources.add(template=metadata_pb2.TemplateSource(**kwargs)) 80 81 82def add_client_destination(**kwargs) -> None: 83 """Adds a client library destination to the current metadata.""" 84 _metadata.destinations.add(client=metadata_pb2.ClientDestination(**kwargs)) 85 86 87def _git_slashes(path: str): 88 # git speaks only forward slashes 89 return path.replace("\\", "/") if sys.platform == "win32" else path 90 91 92def _read_or_empty(path: str = "synth.metadata"): 93 """Reads a metadata json file. Returns empty if that file is not found.""" 94 try: 95 with open(path, "rt") as file: 96 text = file.read() 97 return google.protobuf.json_format.Parse(text, metadata_pb2.Metadata()) 98 except FileNotFoundError: 99 return metadata_pb2.Metadata() 100 101 102def write(outfile: str = "synth.metadata") -> None: 103 """Writes out the metadata to a file.""" 104 jsonified = google.protobuf.json_format.MessageToJson(_metadata) 105 106 with open(outfile, "w") as fh: 107 fh.write(jsonified) 108 109 logger.debug(f"Wrote metadata to {outfile}.") 110 111 112def _remove_obsolete_files(old_metadata): 113 """Remove obsolete files from the file system. 114 115 Call add_new_files() before this function or it will remove all generated 116 files. 117 118 Parameters: 119 old_metadata: old metadata loaded from a call to read_or_empty(). 120 """ 121 old_files = set(old_metadata.generated_files) 122 new_files = set(_metadata.generated_files) 123 excluded_patterns = set([pattern for pattern in _excluded_patterns]) 124 obsolete_files = old_files - new_files 125 for file_path in git_ignore(obsolete_files): 126 try: 127 matched_pattern = False 128 for pattern in excluded_patterns: 129 if fnmatch.fnmatch(file_path, pattern): 130 matched_pattern = True 131 break 132 if matched_pattern: 133 logger.info( 134 f"Leaving obsolete file {file_path} because it matched excluded pattern {pattern} during copy." 135 ) 136 else: 137 logger.info(f"Removing obsolete file {file_path}...") 138 os.unlink(file_path) 139 except FileNotFoundError: 140 pass # Already deleted. That's OK. 141 142 143def git_ignore(file_paths: Iterable[str]): 144 """Returns a new list of the same files, with ignored files removed.""" 145 # Surprisingly, git check-ignore doesn't ignore .git directories, take those 146 # files out manually. 147 nongit_file_paths = [ 148 file_path 149 for file_path in file_paths 150 if ".git" not in pathlib.Path(file_path).parts 151 ] 152 153 encoding = locale.getpreferredencoding(False) 154 # Write the files to a temporary text file. 155 with tempfile.TemporaryFile("w+b") as f: 156 for file_path in nongit_file_paths: 157 f.write(_git_slashes(file_path).encode(encoding)) 158 f.write("\n".encode(encoding)) 159 # Invoke git. 160 f.seek(0) 161 git = shutil.which("git") 162 if not git: 163 raise FileNotFoundError("Could not find git in PATH.") 164 completed_process = subprocess.run( 165 [git, "check-ignore", "--stdin"], stdin=f, stdout=subprocess.PIPE 166 ) 167 # Digest git output. 168 output_text = completed_process.stdout.decode(encoding) 169 ignored_file_paths = set( 170 [os.path.normpath(path.strip()) for path in output_text.split("\n")] 171 ) 172 # Filter the ignored paths from the file_paths. 173 return [ 174 path 175 for path in nongit_file_paths 176 if os.path.normpath(path) not in ignored_file_paths 177 ] 178 179 180def set_track_obsolete_files(track_obsolete_files=True): 181 """Instructs synthtool to track and remove obsolete files.""" 182 global _track_obsolete_files 183 _track_obsolete_files = track_obsolete_files 184 185 186def should_track_obsolete_files(): 187 return _track_obsolete_files 188 189 190class FileSystemEventHandler(watchdog.events.FileSystemEventHandler): 191 """Records all the files that were touched.""" 192 193 def __init__(self, watch_dir: pathlib.Path): 194 super().__init__() 195 self._touched_file_paths: List[str] = list() 196 self._touched_lock = threading.Lock() 197 self._watch_dir = watch_dir 198 199 def on_any_event(self, event): 200 if event.is_directory: 201 return 202 if event.event_type in ( 203 watchdog.events.EVENT_TYPE_MODIFIED, 204 watchdog.events.EVENT_TYPE_CREATED, 205 ): 206 touched_path = event.src_path 207 elif event.event_type == watchdog.events.EVENT_TYPE_MOVED: 208 touched_path = event.dest_path 209 else: 210 return 211 touched_path = pathlib.Path(touched_path).relative_to(self._watch_dir) 212 with self._touched_lock: 213 self._touched_file_paths.append(str(touched_path)) 214 215 def get_touched_file_paths(self) -> List[str]: 216 # deduplicate and sort 217 with self._touched_lock: 218 paths = set(self._touched_file_paths) 219 result = list(paths) 220 result.sort() 221 return result 222 223 224class MetadataTrackerAndWriter: 225 """Writes metadata file upon exiting scope.""" 226 227 def __init__(self, metadata_file_path: str): 228 self.metadata_file_path = metadata_file_path 229 230 def __enter__(self): 231 self.old_metadata = _read_or_empty(self.metadata_file_path) 232 _add_self_git_source() 233 watch_dir = pathlib.Path(self.metadata_file_path).parent 234 os.makedirs(watch_dir, exist_ok=True) 235 # Create an observer only if obsolete file tracking is enabled. 236 # This prevents inotify errors in synth jobs that may delete the watch 237 # dir. Such synth jobs should leave obsolete file tracking disabled. 238 if should_track_obsolete_files(): 239 self.handler = FileSystemEventHandler(watch_dir) 240 self.observer = watchdog.observers.Observer() 241 self.observer.schedule(self.handler, str(watch_dir), recursive=True) 242 self.observer.start() 243 244 def __exit__(self, type, value, traceback): 245 if value: 246 pass # An exception was raised. Don't write metadata or clean up. 247 else: 248 if should_track_obsolete_files(): 249 time.sleep(2) # Finish collecting observations about modified files. 250 self.observer.stop() 251 self.observer.join() 252 for path in git_ignore(self.handler.get_touched_file_paths()): 253 _metadata.generated_files.append(path) 254 _remove_obsolete_files(self.old_metadata) 255 _clear_local_paths(get()) 256 _metadata.sources.sort(key=_source_key) 257 if _enable_write_metadata: 258 write(self.metadata_file_path) 259 260 261def _get_git_source_map(metadata) -> Dict[str, object]: 262 """Gets the git sources from the metadata. 263 264 Parameters: 265 metadata: an instance of metadata_pb2.Metadata. 266 267 Returns: 268 A dict mapping git source name to metadata_pb2.GitSource instance. 269 """ 270 source_map = {} 271 for source in metadata.sources: 272 if source.HasField("git"): 273 git_source = source.git 274 source_map[git_source.name] = git_source 275 return source_map 276 277 278def _clear_local_paths(metadata): 279 """Clear the local_path from the git sources. 280 281 There's no reason to preserve it, and it may leak some info we don't 282 want to leak in the path. 283 """ 284 for source in metadata.sources: 285 if source.HasField("git"): 286 git_source = source.git 287 git_source.ClearField("local_path") 288 289 290def _add_self_git_source(): 291 """Adds current working directory as a git source. 292 293 Returns: 294 The number of git sources added to metadata. 295 """ 296 # Use the repository's root directory name as the name. 297 return _add_git_source_from_directory(".", os.getcwd()) 298 299 300def _add_git_source_from_directory(name: str, dir_path: str) -> int: 301 """Adds the git repo containing the directory as a git source. 302 303 Returns: 304 The number of git sources added to metadata. 305 """ 306 completed_process = subprocess.run( 307 ["git", "-C", dir_path, "status"], universal_newlines=True 308 ) 309 if completed_process.returncode: 310 logger.warning("%s is not directory in a git repo.", dir_path) 311 return 0 312 completed_process = subprocess.run( 313 ["git", "-C", dir_path, "remote", "get-url", "origin"], 314 stdout=subprocess.PIPE, 315 universal_newlines=True, 316 ) 317 url = completed_process.stdout.strip() 318 completed_process = subprocess.run( 319 ["git", "-C", dir_path, "log", "--no-decorate", "-1", "--pretty=format:%H"], 320 stdout=subprocess.PIPE, 321 universal_newlines=True, 322 ) 323 latest_sha = completed_process.stdout.strip() 324 add_git_source(name=name, remote=url, sha=latest_sha) 325 return 1 326 327 328def _source_key(source): 329 """Creates a key to use to sort a list of sources. 330 331 Arguments: 332 source {metadata_pb2.Source} -- the Source for which to formulate a sort key 333 334 Returns: 335 tuple -- A key to use to sort a list of sources. 336 """ 337 if source.HasField("git"): 338 return ("git", source.git.name, source.git.remote, source.git.sha) 339 if source.HasField("generator"): 340 return ( 341 "generator", 342 source.generator.name, 343 source.generator.version, 344 source.generator.docker_image, 345 ) 346 if source.HasField("template"): 347 return ( 348 "template", 349 source.template.name, 350 source.template.origin, 351 source.template.version, 352 ) 353 354 355_enable_write_metadata = True 356 357 358def enable_write_metadata(enable: bool = True) -> None: 359 """Control whether synthtool writes synth.metadata file.""" 360 global _enable_write_metadata 361 _enable_write_metadata = enable 362