xref: /aosp_15_r20/external/skia/infra/bots/task_drivers/codesize/codesize.go (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1// Copyright 2022 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This task driver takes a binary (e.g. "dm") built by a Build-* task (e.g.
6// "Build-Debian10-Clang-x86_64-Release"), runs Bloaty against the binary, and uploads the resulting
7// code size statistics to the GCS bucket belonging to the https://codesize.skia.org service.
8//
9// When running as a tryjob, this task driver performs a size diff of said binary built at the
10// tryjob's changelist/patchset vs. built at tip-of-tree. The binary built at tip-of-tree is
11// produced by a *-NoPatch task (e.g. "Build-Debian10-Clang-x86_64-Release-NoPatch"), whereas the
12// binary built at the tryjob's changelist/patchset is produced by a task of the same name except
13// without the "-NoPatch" suffix (e.g. "Build-Debian10-Clang-x86_64-Release"). The size diff is
14// calculated using Bloaty, see
15// https://github.com/google/bloaty/blob/f01ea59bdda11708d74a3826c23d6e2db6c996f0/doc/using.md#size-diffs.
16// The resulting diff is uploaded to the GCS bucket belonging to the https://codesize.skia.org
17// service.
18package main
19
20import (
21	"context"
22	"encoding/json"
23	"flag"
24	"fmt"
25	"os"
26	"path/filepath"
27	"strconv"
28	"strings"
29	"time"
30
31	"cloud.google.com/go/storage"
32	"google.golang.org/api/option"
33
34	"go.skia.org/infra/go/auth"
35	"go.skia.org/infra/go/exec"
36	"go.skia.org/infra/go/gcs"
37	"go.skia.org/infra/go/gcs/gcsclient"
38	"go.skia.org/infra/go/gerrit"
39	"go.skia.org/infra/go/gitiles"
40	"go.skia.org/infra/go/now"
41	"go.skia.org/infra/go/skerr"
42	"go.skia.org/infra/perf/go/ingest/format"
43	"go.skia.org/infra/task_driver/go/lib/auth_steps"
44	"go.skia.org/infra/task_driver/go/lib/checkout"
45	"go.skia.org/infra/task_driver/go/lib/os_steps"
46	"go.skia.org/infra/task_driver/go/td"
47	"go.skia.org/infra/task_scheduler/go/types"
48)
49
50const (
51	codesizeGCSBucketName = "skia-codesize"
52	perfGCSBucketName     = "skia-perf"
53	taskdriverURL         = "https://task-driver.skia.org/td/"
54)
55
56// BloatyOutputMetadata contains the Bloaty version and command-line arguments used, and metadata
57// about the task where Bloaty was invoked. This struct is serialized into a JSON file that is
58// uploaded to GCS alongside the Bloaty output file.
59//
60// TODO(lovisolo): Move this struct to the buildbot repository.
61type BloatyOutputMetadata struct {
62	Version   int    `json:"version"` // Schema version of this file, starting at 1.
63	Timestamp string `json:"timestamp"`
64
65	SwarmingTaskID string `json:"swarming_task_id"`
66	SwarmingServer string `json:"swarming_server"`
67
68	TaskID          string `json:"task_id"`
69	TaskName        string `json:"task_name"`
70	CompileTaskName string `json:"compile_task_name"`
71	// CompileTaskNameNoPatch should only be set for tryjobs.
72	CompileTaskNameNoPatch string `json:"compile_task_name_no_patch,omitempty"`
73	BinaryName             string `json:"binary_name"`
74
75	BloatyCipdVersion string   `json:"bloaty_cipd_version"`
76	BloatyArgs        []string `json:"bloaty_args"`
77	// BloatyDiffArgs should only be set for tryjobs.
78	BloatyDiffArgs []string `json:"bloaty_diff_args,omitempty"`
79
80	PatchIssue  string `json:"patch_issue"`
81	PatchServer string `json:"patch_server"`
82	PatchSet    string `json:"patch_set"`
83	Repo        string `json:"repo"`
84	Revision    string `json:"revision"`
85
86	CommitTimestamp string `json:"commit_timestamp"`
87	Author          string `json:"author"`
88	Subject         string `json:"subject"`
89}
90
91func main() {
92	var (
93		projectID              = flag.String("project_id", "", "ID of the Google Cloud project.")
94		taskID                 = flag.String("task_id", "", "ID of this task.")
95		taskName               = flag.String("task_name", "", "Name of the task.")
96		compileTaskName        = flag.String("compile_task_name", "", "Name of the compile task that produced the binary to analyze.")
97		compileTaskNameNoPatch = flag.String("compile_task_name_no_patch", "", "Name of the *-NoPatch compile task that produced the binary to diff against (ignored when the task is not a tryjob).")
98		binaryName             = flag.String("binary_name", "", "Name of the binary to analyze (e.g. \"dm\").")
99		bloatyCIPDVersion      = flag.String("bloaty_cipd_version", "", "Version of the \"bloaty\" CIPD package used.")
100		bloatyBinary           = flag.String("bloaty_binary", "", "Path to the bloaty binary.")
101		stripBinary            = flag.String("strip_binary", "", "Path to the strip binary (part of binutils).")
102		output                 = flag.String("o", "", "If provided, dump a JSON blob of step data to the given file. Prints to stdout if '-' is given.")
103		local                  = flag.Bool("local", true, "True if running locally (as opposed to on the bots).")
104
105		checkoutFlags = checkout.SetupFlags(nil)
106	)
107	ctx := td.StartRun(projectID, taskID, taskName, output, local)
108	defer td.EndRun(ctx)
109
110	if *bloatyBinary == "" || *stripBinary == "" {
111		td.Fatal(ctx, skerr.Fmt("Must specify --bloaty_binary and --strip_binary"))
112	}
113
114	// The repository state contains the commit hash and patch/patchset if available.
115	repoState, err := checkout.GetRepoState(checkoutFlags)
116	if err != nil {
117		td.Fatal(ctx, skerr.Wrap(err))
118	}
119
120	// Make an HTTP client with the required permissions to hit GCS, Gerrit and Gitiles.
121	httpClient, _, err := auth_steps.InitHttpClient(ctx, *local, auth.ScopeReadWrite, gerrit.AuthScope, auth.ScopeUserinfoEmail)
122	if err != nil {
123		td.Fatal(ctx, skerr.Wrap(err))
124	}
125
126	// Make a GCS client with the required permissions to upload to the codesize.skia.org GCS bucket.
127	store, err := storage.NewClient(ctx, option.WithHTTPClient(httpClient))
128	if err != nil {
129		td.Fatal(ctx, skerr.Wrap(err))
130	}
131	codesizeGCS := gcsclient.New(store, codesizeGCSBucketName)
132	perfGCS := gcsclient.New(store, perfGCSBucketName)
133
134	// Make a Gerrit client.
135	gerritClient, err := gerrit.NewGerrit(repoState.Server, httpClient)
136	if err != nil {
137		td.Fatal(ctx, skerr.Wrap(err))
138	}
139
140	// Make a Gitiles client.
141	gitilesRepo := gitiles.NewRepo(repoState.Repo, httpClient)
142
143	args := runStepsArgs{
144		repoState:              repoState,
145		gerrit:                 gerritClient,
146		gitilesRepo:            gitilesRepo,
147		codesizeGCS:            codesizeGCS,
148		perfGCS:                perfGCS,
149		swarmingTaskID:         os.Getenv("SWARMING_TASK_ID"),
150		swarmingServer:         os.Getenv("SWARMING_SERVER"),
151		taskID:                 *taskID,
152		taskName:               *taskName,
153		compileTaskName:        *compileTaskName,
154		compileTaskNameNoPatch: *compileTaskNameNoPatch,
155		binaryName:             *binaryName,
156		bloatyPath:             *bloatyBinary,
157		bloatyCIPDVersion:      *bloatyCIPDVersion,
158		stripPath:              *stripBinary,
159	}
160
161	if err := runSteps(ctx, args); err != nil {
162		td.Fatal(ctx, skerr.Wrap(err))
163	}
164}
165
166// runStepsArgs contains the input arguments to the runSteps function.
167type runStepsArgs struct {
168	repoState              types.RepoState
169	gerrit                 *gerrit.Gerrit
170	gitilesRepo            gitiles.GitilesRepo
171	codesizeGCS            gcs.GCSClient
172	perfGCS                gcs.GCSClient
173	swarmingTaskID         string
174	swarmingServer         string
175	taskID                 string
176	taskName               string
177	compileTaskName        string
178	compileTaskNameNoPatch string
179	binaryName             string
180	bloatyCIPDVersion      string
181	bloatyPath             string
182	stripPath              string
183}
184
185// runSteps runs the main steps of this task driver.
186func runSteps(ctx context.Context, args runStepsArgs) error {
187	var (
188		author          string
189		subject         string
190		commitTimestamp string
191	)
192
193	// Read the CL subject, author and timestamp. We talk to Gerrit when running as a tryjob, or to
194	// Gitiles when running as a post-submit task.
195	if args.repoState.IsTryJob() {
196		issue, err := strconv.ParseInt(args.repoState.Issue, 10, 64)
197		if err != nil {
198			return skerr.Wrap(err)
199		}
200		patchset, err := strconv.ParseInt(args.repoState.Patchset, 10, 64)
201		if err != nil {
202			return skerr.Wrap(err)
203		}
204		changeInfo, err := args.gerrit.GetIssueProperties(ctx, issue)
205		if err != nil {
206			return skerr.Wrap(err)
207		}
208		// This matches the format of the author field returned by Gitiles.
209		author = fmt.Sprintf("%s (%s)", changeInfo.Owner.Name, changeInfo.Owner.Email)
210		subject = changeInfo.Subject
211		for _, revision := range changeInfo.Revisions {
212			if revision.Number == patchset {
213				commitTimestamp = revision.CreatedString
214				break
215			}
216		}
217	} else {
218		longCommit, err := args.gitilesRepo.Details(ctx, args.repoState.Revision)
219		if err != nil {
220			return skerr.Wrap(err)
221		}
222		author = longCommit.Author
223		subject = longCommit.Subject
224		commitTimestamp = longCommit.Timestamp.Format(time.RFC3339)
225	}
226
227	// Run Bloaty and capture its output.
228	bloatyOutput, bloatyArgs, err := runBloaty(ctx, args.stripPath, args.bloatyPath, args.binaryName)
229	if err != nil {
230		return skerr.Wrap(err)
231	}
232
233	// Build metadata structure.
234	metadata := &BloatyOutputMetadata{
235		Version:           1,
236		Timestamp:         now.Now(ctx).UTC().Format(time.RFC3339),
237		SwarmingTaskID:    args.swarmingTaskID,
238		SwarmingServer:    args.swarmingServer,
239		TaskID:            args.taskID,
240		TaskName:          args.taskName,
241		CompileTaskName:   args.compileTaskName,
242		BinaryName:        args.binaryName,
243		BloatyCipdVersion: args.bloatyCIPDVersion,
244		BloatyArgs:        bloatyArgs,
245		PatchIssue:        args.repoState.Issue,
246		PatchServer:       args.repoState.Server,
247		PatchSet:          args.repoState.Patchset,
248		Repo:              args.repoState.Repo,
249		Revision:          args.repoState.Revision,
250		CommitTimestamp:   commitTimestamp,
251		Author:            author,
252		Subject:           subject,
253	}
254
255	var bloatyDiffOutput string
256	// Diff the binary built at the current changelist/patchset vs. at tip-of-tree.
257	bloatyDiffOutput, metadata.BloatyDiffArgs, err = runBloatyDiff(ctx, args.stripPath, args.bloatyPath, args.binaryName)
258	if err != nil {
259		return skerr.Wrap(err)
260	}
261	metadata.CompileTaskNameNoPatch = args.compileTaskNameNoPatch
262
263	gcsDir := computeTargetGCSDirectory(ctx, args.repoState, args.taskID, args.compileTaskName)
264
265	// Upload pretty-printed JSON metadata file to GCS.
266	jsonMetadata, err := json.MarshalIndent(metadata, "", "  ")
267	if err != nil {
268		return skerr.Wrap(err)
269	}
270	if err = uploadFileToGCS(ctx, args.codesizeGCS, fmt.Sprintf("%s/%s.json", gcsDir, args.binaryName), jsonMetadata); err != nil {
271		return skerr.Wrap(err)
272	}
273
274	// Upload Bloaty diff output plain-text file to GCS.
275	if err = uploadFileToGCS(ctx, args.codesizeGCS, fmt.Sprintf("%s/%s.diff.txt", gcsDir, args.binaryName), []byte(bloatyDiffOutput)); err != nil {
276		return skerr.Wrap(err)
277	}
278
279	// Upload Bloaty output .tsv file to GCS.
280	//
281	// It is important that we upload the .tsv file last because the codesizeserver binary will
282	// only start processing the .json and .diff.txt files once it receives the Pub/Sub
283	// notification that a .tsv file has been uploaded. Pub/Sub notifications are pretty quick, so
284	// by uploading files in this order we avoid a race condition.
285	if err = uploadFileToGCS(ctx, args.codesizeGCS, fmt.Sprintf("%s/%s.tsv", gcsDir, args.binaryName), []byte(bloatyOutput)); err != nil {
286		return skerr.Wrap(err)
287	}
288	if args.repoState.IsTryJob() {
289		// Add VM and file diff results to the step data. This is consumed by the codesize plugin
290		// to display results on the Gerrit CL for tryjob runs.
291		vmDiff, fileDiff := parseBloatyDiffOutput(bloatyDiffOutput)
292		if vmDiff != "" && fileDiff != "" {
293			td.StepText(ctx, "VM Diff", vmDiff)
294			td.StepText(ctx, "File Diff", fileDiff)
295		}
296
297		// TODO(rmistry): Remove the below "Diff Bytes" section after the above
298		// works and is integrated with the codesize plugin.
299		s, err := os_steps.Stat(ctx, filepath.Join("build", args.binaryName+"_stripped"))
300		if err != nil {
301			return err
302		}
303		totalBytes := s.Size()
304
305		s, err = os_steps.Stat(ctx, filepath.Join("build_nopatch", args.binaryName+"_stripped"))
306		if err != nil {
307			return err
308		}
309		beforeBytes := s.Size()
310
311		diffBytes := totalBytes - beforeBytes
312		td.StepText(ctx, "Diff Bytes", strconv.FormatInt(diffBytes, 10))
313	} else {
314		// Upload perf data for non-tryjob runs on status.skia.org.
315		perfData := format.Format{
316			Version: 1,
317			GitHash: args.repoState.Revision,
318			Key: map[string]string{
319				"binary":            args.binaryName,
320				"compile_task_name": args.compileTaskName,
321			},
322			Links: map[string]string{
323				"full_data": taskdriverURL + args.taskID,
324			},
325		}
326		if err = uploadPerfData(ctx, args.perfGCS, gcsDir, args.binaryName, args.taskID, perfData); err != nil {
327			return skerr.Wrap(err)
328		}
329	}
330
331	return nil
332}
333
334// parseBloatyDiffOutput parses bloaty output and returns the VM diff
335// and the file diff strings.
336// Example: for "...\n...\n+0.0% +832 TOTAL +848Ki +0.0%\n\n" we return
337// (+832, +848Ki).
338// If the output is not in expected format then we return empty strings.
339func parseBloatyDiffOutput(bloatyDiffOutput string) (string, string) {
340	tokens := strings.Split(strings.Trim(bloatyDiffOutput, "\n"), "\n")
341	if len(tokens) > 0 {
342		// Final line in bloaty output is the line with the results.
343		outputLine := tokens[len(tokens)-1]
344		words := strings.Fields(outputLine)
345		// Format is expected to look like this:
346		// +0.0% +832 TOTAL +848 +0.0%
347		if len(words) == 5 {
348			return words[1], words[3]
349		}
350	}
351	return "", ""
352}
353
354// runBloaty runs Bloaty against the given binary and returns the Bloaty output in TSV format and
355// the Bloaty command-line arguments used. It uses the strip command to strip out debug symbols,
356// so they do not inflate the file size numbers.
357func runBloaty(ctx context.Context, stripPath, bloatyPath, binaryName string) (string, []string, error) {
358	binaryWithSymbols := filepath.Join("build", binaryName)
359	binaryNoSymbols := filepath.Join("build", binaryName+"_stripped")
360	err := td.Do(ctx, td.Props("Create stripped version of binary"), func(ctx context.Context) error {
361		runCmd := &exec.Command{
362			Name:       "cp",
363			Args:       []string{binaryWithSymbols, binaryNoSymbols},
364			InheritEnv: true,
365			LogStdout:  true,
366			LogStderr:  true,
367		}
368		_, err := exec.RunCommand(ctx, runCmd)
369		if err != nil {
370			return skerr.Wrap(err)
371		}
372		runCmd = &exec.Command{
373			Name:       stripPath,
374			Args:       []string{binaryNoSymbols},
375			InheritEnv: true,
376			LogStdout:  true,
377			LogStderr:  true,
378		}
379		_, err = exec.RunCommand(ctx, runCmd)
380		if err != nil {
381			return skerr.Wrap(err)
382		}
383		runCmd = &exec.Command{
384			Name:       "ls",
385			Args:       []string{"-al", "build"},
386			InheritEnv: true,
387			LogStdout:  true,
388			LogStderr:  true,
389		}
390		_, err = exec.RunCommand(ctx, runCmd)
391		if err != nil {
392			return skerr.Wrap(err)
393		}
394
395		return nil
396	})
397	if err != nil {
398		return "", nil, skerr.Wrap(err)
399	}
400
401	runCmd := &exec.Command{
402		Name: bloatyPath,
403		Args: []string{
404			binaryNoSymbols,
405			"-d",
406			"compileunits,symbols",
407			"-n",
408			"0",
409			"--tsv",
410			"--debug-file=" + binaryWithSymbols,
411		},
412		InheritEnv: true,
413		LogStdout:  true,
414		LogStderr:  true,
415	}
416
417	var bloatyOutput string
418
419	if err := td.Do(ctx, td.Props(fmt.Sprintf("Run Bloaty against binary %q", binaryName)), func(ctx context.Context) error {
420		bloatyOutput, err = exec.RunCommand(ctx, runCmd)
421		return err
422	}); err != nil {
423		return "", nil, skerr.Wrap(err)
424	}
425
426	return bloatyOutput, runCmd.Args, nil
427}
428
429// runBloatyDiff invokes Bloaty to diff the given binary built at the current changelist/patchset
430// vs. at tip of tree, and returns the plain-text Bloaty output and the command-line arguments
431// used. Like before, it strips the debug symbols out before computing that diff.
432func runBloatyDiff(ctx context.Context, stripPath, bloatyPath, binaryName string) (string, []string, error) {
433	// These were created from the runBloaty step
434	binaryWithPatchWithSymbols := filepath.Join("build", binaryName)
435	binaryWithPatchWithNoSymbols := filepath.Join("build", binaryName+"_stripped")
436	// These will be created next
437	binaryWithNoPatchWithSymbols := filepath.Join("build_nopatch", binaryName)
438	binaryWithNoPatchWithNoSymbols := filepath.Join("build_nopatch", binaryName+"_stripped")
439	err := td.Do(ctx, td.Props("Create stripped version of no_patch binary"), func(ctx context.Context) error {
440		runCmd := &exec.Command{
441			Name:       "cp",
442			Args:       []string{binaryWithNoPatchWithSymbols, binaryWithNoPatchWithNoSymbols},
443			InheritEnv: true,
444			LogStdout:  true,
445			LogStderr:  true,
446		}
447		_, err := exec.RunCommand(ctx, runCmd)
448		if err != nil {
449			return skerr.Wrap(err)
450		}
451		runCmd = &exec.Command{
452			Name:       stripPath,
453			Args:       []string{binaryWithNoPatchWithNoSymbols},
454			InheritEnv: true,
455			LogStdout:  true,
456			LogStderr:  true,
457		}
458		_, err = exec.RunCommand(ctx, runCmd)
459		if err != nil {
460			return skerr.Wrap(err)
461		}
462		runCmd = &exec.Command{
463			Name:       "ls",
464			Args:       []string{"-al", "build_nopatch"},
465			InheritEnv: true,
466			LogStdout:  true,
467			LogStderr:  true,
468		}
469		_, err = exec.RunCommand(ctx, runCmd)
470		return err
471	})
472	if err != nil {
473		return "", nil, skerr.Wrap(err)
474	}
475
476	runCmd := &exec.Command{
477		Name: bloatyPath,
478		Args: []string{
479			binaryWithPatchWithNoSymbols,
480			"--debug-file=" + binaryWithPatchWithSymbols,
481			"-d", "symbols", "-n", "0", "-s", "file",
482			"--",
483			binaryWithNoPatchWithNoSymbols,
484			"--debug-file=" + binaryWithNoPatchWithSymbols,
485		},
486		InheritEnv: true,
487		LogStdout:  true,
488		LogStderr:  true,
489	}
490
491	var bloatyOutput string
492	if err := td.Do(ctx, td.Props(fmt.Sprintf("Run Bloaty diff against binary %q", binaryName)), func(ctx context.Context) error {
493		bloatyOutput, err = exec.RunCommand(ctx, runCmd)
494		return err
495	}); err != nil {
496		return "", nil, skerr.Wrap(err)
497	}
498
499	return bloatyOutput, runCmd.Args, nil
500}
501
502// computeTargetGCSDirectory computes the target GCS directory where to upload the Bloaty output file
503// and JSON metadata file.
504func computeTargetGCSDirectory(ctx context.Context, repoState types.RepoState, taskID, compileTaskName string) string {
505	timePrefix := now.Now(ctx).UTC().Format("2006/01/02/15") // YYYY/MM/DD/HH.
506	if repoState.IsTryJob() {
507		// Example: 2022/01/31/01/tryjob/12345/3/CkPp9ElAaEXyYWNHpXHU/Build-Debian10-Clang-x86_64-Release
508		return fmt.Sprintf("%s/tryjob/%s/%s/%s/%s", timePrefix, repoState.Patch.Issue, repoState.Patch.Patchset, taskID, compileTaskName)
509	} else {
510		// Example: 2022/01/31/01/033ccea12c0949d0f712471bfcb4ed6daf69aaff/Build-Debian10-Clang-x86_64-Release
511		return fmt.Sprintf("%s/%s/%s", timePrefix, repoState.Revision, compileTaskName)
512	}
513}
514
515// uploadPerfData gets the file size of the stripped binary (i.e. without debug symbols), formats
516// the JSON how Perf expects it, and uploads it to Perf's GCS bucket.
517func uploadPerfData(ctx context.Context, perfGCS gcs.GCSClient, gcsPathPrefix, binaryName, taskID string, perfData format.Format) error {
518	// Use the taskID to guarantee unique file ids
519	gcsPath := "nano-json-v1/" + gcsPathPrefix + "/codesize_" + taskID + ".json"
520
521	err := td.Do(ctx, td.Props("Upload total stripped binary size to Perf"), func(ctx context.Context) error {
522		s, err := os_steps.Stat(ctx, filepath.Join("build", binaryName+"_stripped"))
523		if err != nil {
524			return err
525		}
526		totalBytes := s.Size()
527
528		s, err = os_steps.Stat(ctx, filepath.Join("build_nopatch", binaryName+"_stripped"))
529		if err != nil {
530			return err
531		}
532		beforeBytes := s.Size()
533
534		perfData.Results = []format.Result{{
535			Key:         map[string]string{"measurement": "stripped_binary_bytes"},
536			Measurement: float32(totalBytes),
537		}, {
538			Key:         map[string]string{"measurement": "stripped_diff_bytes"},
539			Measurement: float32(totalBytes - beforeBytes),
540		}}
541
542		perfJSON, err := json.MarshalIndent(perfData, "", "  ")
543		if err != nil {
544			return err
545		}
546		return uploadFileToGCS(ctx, perfGCS, gcsPath, perfJSON)
547	})
548	if err != nil {
549		return skerr.Wrap(err)
550	}
551	return nil
552}
553
554// uploadFileToGCS uploads a file to the given GCS bucket.
555func uploadFileToGCS(ctx context.Context, gcsClient gcs.GCSClient, path string, contents []byte) error {
556	gcsURL := fmt.Sprintf("gs://%s/%s", gcsClient.Bucket(), path)
557	return td.Do(ctx, td.Props(fmt.Sprintf("Upload %s", gcsURL)), func(ctx context.Context) error {
558		if err := gcsClient.SetFileContents(ctx, path, gcs.FILE_WRITE_OPTS_TEXT, contents); err != nil {
559			return skerr.Wrapf(err, "Could not write task to %s", gcsURL)
560		}
561		return nil
562	})
563}
564