1import argparse 2import os 3import re 4from tempfile import TemporaryDirectory 5 6from tools.stats.upload_stats_lib import download_gha_artifacts, upload_file_to_s3 7 8 9ARTIFACTS = [ 10 "sccache-stats", 11 "test-jsons", 12 "test-reports", 13 "usage-log", 14] 15BUCKET_NAME = "gha-artifacts" 16FILENAME_REGEX = r"-runattempt\d+" 17 18 19def get_artifacts(repo: str, workflow_run_id: int, workflow_run_attempt: int) -> None: 20 with TemporaryDirectory() as temp_dir: 21 print("Using temporary directory:", temp_dir) 22 os.chdir(temp_dir) 23 24 for artifact in ARTIFACTS: 25 artifact_paths = download_gha_artifacts( 26 artifact, workflow_run_id, workflow_run_attempt 27 ) 28 29 for artifact_path in artifact_paths: 30 # GHA artifact is named as follows: NAME-runattempt${{ github.run_attempt }}-SUFFIX.zip 31 # and we want remove the run_attempt to conform with the naming convention on S3, i.e. 32 # pytorch/pytorch/WORKFLOW_ID/RUN_ATTEMPT/artifact/NAME-SUFFIX.zip 33 s3_filename = re.sub(FILENAME_REGEX, "", artifact_path.name) 34 upload_file_to_s3( 35 file_name=str(artifact_path.resolve()), 36 bucket=BUCKET_NAME, 37 key=f"{repo}/{workflow_run_id}/{workflow_run_attempt}/artifact/{s3_filename}", 38 ) 39 40 41if __name__ == "__main__": 42 parser = argparse.ArgumentParser(description="Upload test artifacts from GHA to S3") 43 parser.add_argument( 44 "--workflow-run-id", 45 type=int, 46 required=True, 47 help="id of the workflow to get artifacts from", 48 ) 49 parser.add_argument( 50 "--workflow-run-attempt", 51 type=int, 52 required=True, 53 help="which retry of the workflow this is", 54 ) 55 parser.add_argument( 56 "--repo", 57 type=str, 58 required=True, 59 help="which GitHub repo this workflow run belongs to", 60 ) 61 args = parser.parse_args() 62 get_artifacts(args.repo, args.workflow_run_id, args.workflow_run_attempt) 63