xref: /aosp_15_r20/external/pytorch/tools/stats/upload_artifacts.py (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1import argparse
2import os
3import re
4from tempfile import TemporaryDirectory
5
6from tools.stats.upload_stats_lib import download_gha_artifacts, upload_file_to_s3
7
8
9ARTIFACTS = [
10    "sccache-stats",
11    "test-jsons",
12    "test-reports",
13    "usage-log",
14]
15BUCKET_NAME = "gha-artifacts"
16FILENAME_REGEX = r"-runattempt\d+"
17
18
19def get_artifacts(repo: str, workflow_run_id: int, workflow_run_attempt: int) -> None:
20    with TemporaryDirectory() as temp_dir:
21        print("Using temporary directory:", temp_dir)
22        os.chdir(temp_dir)
23
24        for artifact in ARTIFACTS:
25            artifact_paths = download_gha_artifacts(
26                artifact, workflow_run_id, workflow_run_attempt
27            )
28
29            for artifact_path in artifact_paths:
30                # GHA artifact is named as follows: NAME-runattempt${{ github.run_attempt }}-SUFFIX.zip
31                # and we want remove the run_attempt to conform with the naming convention on S3, i.e.
32                # pytorch/pytorch/WORKFLOW_ID/RUN_ATTEMPT/artifact/NAME-SUFFIX.zip
33                s3_filename = re.sub(FILENAME_REGEX, "", artifact_path.name)
34                upload_file_to_s3(
35                    file_name=str(artifact_path.resolve()),
36                    bucket=BUCKET_NAME,
37                    key=f"{repo}/{workflow_run_id}/{workflow_run_attempt}/artifact/{s3_filename}",
38                )
39
40
41if __name__ == "__main__":
42    parser = argparse.ArgumentParser(description="Upload test artifacts from GHA to S3")
43    parser.add_argument(
44        "--workflow-run-id",
45        type=int,
46        required=True,
47        help="id of the workflow to get artifacts from",
48    )
49    parser.add_argument(
50        "--workflow-run-attempt",
51        type=int,
52        required=True,
53        help="which retry of the workflow this is",
54    )
55    parser.add_argument(
56        "--repo",
57        type=str,
58        required=True,
59        help="which GitHub repo this workflow run belongs to",
60    )
61    args = parser.parse_args()
62    get_artifacts(args.repo, args.workflow_run_id, args.workflow_run_attempt)
63