1// Copyright 2022 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// This task driver takes a binary (e.g. "dm") built by a Build-* task (e.g. 6// "Build-Debian10-Clang-x86_64-Release"), runs Bloaty against the binary, and uploads the resulting 7// code size statistics to the GCS bucket belonging to the https://codesize.skia.org service. 8// 9// When running as a tryjob, this task driver performs a size diff of said binary built at the 10// tryjob's changelist/patchset vs. built at tip-of-tree. The binary built at tip-of-tree is 11// produced by a *-NoPatch task (e.g. "Build-Debian10-Clang-x86_64-Release-NoPatch"), whereas the 12// binary built at the tryjob's changelist/patchset is produced by a task of the same name except 13// without the "-NoPatch" suffix (e.g. "Build-Debian10-Clang-x86_64-Release"). The size diff is 14// calculated using Bloaty, see 15// https://github.com/google/bloaty/blob/f01ea59bdda11708d74a3826c23d6e2db6c996f0/doc/using.md#size-diffs. 16// The resulting diff is uploaded to the GCS bucket belonging to the https://codesize.skia.org 17// service. 18package main 19 20import ( 21 "context" 22 "encoding/json" 23 "flag" 24 "fmt" 25 "os" 26 "path/filepath" 27 "strconv" 28 "strings" 29 "time" 30 31 "cloud.google.com/go/storage" 32 "google.golang.org/api/option" 33 34 "go.skia.org/infra/go/auth" 35 "go.skia.org/infra/go/exec" 36 "go.skia.org/infra/go/gcs" 37 "go.skia.org/infra/go/gcs/gcsclient" 38 "go.skia.org/infra/go/gerrit" 39 "go.skia.org/infra/go/gitiles" 40 "go.skia.org/infra/go/now" 41 "go.skia.org/infra/go/skerr" 42 "go.skia.org/infra/perf/go/ingest/format" 43 "go.skia.org/infra/task_driver/go/lib/auth_steps" 44 "go.skia.org/infra/task_driver/go/lib/checkout" 45 "go.skia.org/infra/task_driver/go/lib/os_steps" 46 "go.skia.org/infra/task_driver/go/td" 47 "go.skia.org/infra/task_scheduler/go/types" 48) 49 50const ( 51 codesizeGCSBucketName = "skia-codesize" 52 perfGCSBucketName = "skia-perf" 53 taskdriverURL = "https://task-driver.skia.org/td/" 54) 55 56// BloatyOutputMetadata contains the Bloaty version and command-line arguments used, and metadata 57// about the task where Bloaty was invoked. This struct is serialized into a JSON file that is 58// uploaded to GCS alongside the Bloaty output file. 59// 60// TODO(lovisolo): Move this struct to the buildbot repository. 61type BloatyOutputMetadata struct { 62 Version int `json:"version"` // Schema version of this file, starting at 1. 63 Timestamp string `json:"timestamp"` 64 65 SwarmingTaskID string `json:"swarming_task_id"` 66 SwarmingServer string `json:"swarming_server"` 67 68 TaskID string `json:"task_id"` 69 TaskName string `json:"task_name"` 70 CompileTaskName string `json:"compile_task_name"` 71 // CompileTaskNameNoPatch should only be set for tryjobs. 72 CompileTaskNameNoPatch string `json:"compile_task_name_no_patch,omitempty"` 73 BinaryName string `json:"binary_name"` 74 75 BloatyCipdVersion string `json:"bloaty_cipd_version"` 76 BloatyArgs []string `json:"bloaty_args"` 77 // BloatyDiffArgs should only be set for tryjobs. 78 BloatyDiffArgs []string `json:"bloaty_diff_args,omitempty"` 79 80 PatchIssue string `json:"patch_issue"` 81 PatchServer string `json:"patch_server"` 82 PatchSet string `json:"patch_set"` 83 Repo string `json:"repo"` 84 Revision string `json:"revision"` 85 86 CommitTimestamp string `json:"commit_timestamp"` 87 Author string `json:"author"` 88 Subject string `json:"subject"` 89} 90 91func main() { 92 var ( 93 projectID = flag.String("project_id", "", "ID of the Google Cloud project.") 94 taskID = flag.String("task_id", "", "ID of this task.") 95 taskName = flag.String("task_name", "", "Name of the task.") 96 compileTaskName = flag.String("compile_task_name", "", "Name of the compile task that produced the binary to analyze.") 97 compileTaskNameNoPatch = flag.String("compile_task_name_no_patch", "", "Name of the *-NoPatch compile task that produced the binary to diff against (ignored when the task is not a tryjob).") 98 binaryName = flag.String("binary_name", "", "Name of the binary to analyze (e.g. \"dm\").") 99 bloatyCIPDVersion = flag.String("bloaty_cipd_version", "", "Version of the \"bloaty\" CIPD package used.") 100 bloatyBinary = flag.String("bloaty_binary", "", "Path to the bloaty binary.") 101 stripBinary = flag.String("strip_binary", "", "Path to the strip binary (part of binutils).") 102 output = flag.String("o", "", "If provided, dump a JSON blob of step data to the given file. Prints to stdout if '-' is given.") 103 local = flag.Bool("local", true, "True if running locally (as opposed to on the bots).") 104 105 checkoutFlags = checkout.SetupFlags(nil) 106 ) 107 ctx := td.StartRun(projectID, taskID, taskName, output, local) 108 defer td.EndRun(ctx) 109 110 if *bloatyBinary == "" || *stripBinary == "" { 111 td.Fatal(ctx, skerr.Fmt("Must specify --bloaty_binary and --strip_binary")) 112 } 113 114 // The repository state contains the commit hash and patch/patchset if available. 115 repoState, err := checkout.GetRepoState(checkoutFlags) 116 if err != nil { 117 td.Fatal(ctx, skerr.Wrap(err)) 118 } 119 120 // Make an HTTP client with the required permissions to hit GCS, Gerrit and Gitiles. 121 httpClient, _, err := auth_steps.InitHttpClient(ctx, *local, auth.ScopeReadWrite, gerrit.AuthScope, auth.ScopeUserinfoEmail) 122 if err != nil { 123 td.Fatal(ctx, skerr.Wrap(err)) 124 } 125 126 // Make a GCS client with the required permissions to upload to the codesize.skia.org GCS bucket. 127 store, err := storage.NewClient(ctx, option.WithHTTPClient(httpClient)) 128 if err != nil { 129 td.Fatal(ctx, skerr.Wrap(err)) 130 } 131 codesizeGCS := gcsclient.New(store, codesizeGCSBucketName) 132 perfGCS := gcsclient.New(store, perfGCSBucketName) 133 134 // Make a Gerrit client. 135 gerritClient, err := gerrit.NewGerrit(repoState.Server, httpClient) 136 if err != nil { 137 td.Fatal(ctx, skerr.Wrap(err)) 138 } 139 140 // Make a Gitiles client. 141 gitilesRepo := gitiles.NewRepo(repoState.Repo, httpClient) 142 143 args := runStepsArgs{ 144 repoState: repoState, 145 gerrit: gerritClient, 146 gitilesRepo: gitilesRepo, 147 codesizeGCS: codesizeGCS, 148 perfGCS: perfGCS, 149 swarmingTaskID: os.Getenv("SWARMING_TASK_ID"), 150 swarmingServer: os.Getenv("SWARMING_SERVER"), 151 taskID: *taskID, 152 taskName: *taskName, 153 compileTaskName: *compileTaskName, 154 compileTaskNameNoPatch: *compileTaskNameNoPatch, 155 binaryName: *binaryName, 156 bloatyPath: *bloatyBinary, 157 bloatyCIPDVersion: *bloatyCIPDVersion, 158 stripPath: *stripBinary, 159 } 160 161 if err := runSteps(ctx, args); err != nil { 162 td.Fatal(ctx, skerr.Wrap(err)) 163 } 164} 165 166// runStepsArgs contains the input arguments to the runSteps function. 167type runStepsArgs struct { 168 repoState types.RepoState 169 gerrit *gerrit.Gerrit 170 gitilesRepo gitiles.GitilesRepo 171 codesizeGCS gcs.GCSClient 172 perfGCS gcs.GCSClient 173 swarmingTaskID string 174 swarmingServer string 175 taskID string 176 taskName string 177 compileTaskName string 178 compileTaskNameNoPatch string 179 binaryName string 180 bloatyCIPDVersion string 181 bloatyPath string 182 stripPath string 183} 184 185// runSteps runs the main steps of this task driver. 186func runSteps(ctx context.Context, args runStepsArgs) error { 187 var ( 188 author string 189 subject string 190 commitTimestamp string 191 ) 192 193 // Read the CL subject, author and timestamp. We talk to Gerrit when running as a tryjob, or to 194 // Gitiles when running as a post-submit task. 195 if args.repoState.IsTryJob() { 196 issue, err := strconv.ParseInt(args.repoState.Issue, 10, 64) 197 if err != nil { 198 return skerr.Wrap(err) 199 } 200 patchset, err := strconv.ParseInt(args.repoState.Patchset, 10, 64) 201 if err != nil { 202 return skerr.Wrap(err) 203 } 204 changeInfo, err := args.gerrit.GetIssueProperties(ctx, issue) 205 if err != nil { 206 return skerr.Wrap(err) 207 } 208 // This matches the format of the author field returned by Gitiles. 209 author = fmt.Sprintf("%s (%s)", changeInfo.Owner.Name, changeInfo.Owner.Email) 210 subject = changeInfo.Subject 211 for _, revision := range changeInfo.Revisions { 212 if revision.Number == patchset { 213 commitTimestamp = revision.CreatedString 214 break 215 } 216 } 217 } else { 218 longCommit, err := args.gitilesRepo.Details(ctx, args.repoState.Revision) 219 if err != nil { 220 return skerr.Wrap(err) 221 } 222 author = longCommit.Author 223 subject = longCommit.Subject 224 commitTimestamp = longCommit.Timestamp.Format(time.RFC3339) 225 } 226 227 // Run Bloaty and capture its output. 228 bloatyOutput, bloatyArgs, err := runBloaty(ctx, args.stripPath, args.bloatyPath, args.binaryName) 229 if err != nil { 230 return skerr.Wrap(err) 231 } 232 233 // Build metadata structure. 234 metadata := &BloatyOutputMetadata{ 235 Version: 1, 236 Timestamp: now.Now(ctx).UTC().Format(time.RFC3339), 237 SwarmingTaskID: args.swarmingTaskID, 238 SwarmingServer: args.swarmingServer, 239 TaskID: args.taskID, 240 TaskName: args.taskName, 241 CompileTaskName: args.compileTaskName, 242 BinaryName: args.binaryName, 243 BloatyCipdVersion: args.bloatyCIPDVersion, 244 BloatyArgs: bloatyArgs, 245 PatchIssue: args.repoState.Issue, 246 PatchServer: args.repoState.Server, 247 PatchSet: args.repoState.Patchset, 248 Repo: args.repoState.Repo, 249 Revision: args.repoState.Revision, 250 CommitTimestamp: commitTimestamp, 251 Author: author, 252 Subject: subject, 253 } 254 255 var bloatyDiffOutput string 256 // Diff the binary built at the current changelist/patchset vs. at tip-of-tree. 257 bloatyDiffOutput, metadata.BloatyDiffArgs, err = runBloatyDiff(ctx, args.stripPath, args.bloatyPath, args.binaryName) 258 if err != nil { 259 return skerr.Wrap(err) 260 } 261 metadata.CompileTaskNameNoPatch = args.compileTaskNameNoPatch 262 263 gcsDir := computeTargetGCSDirectory(ctx, args.repoState, args.taskID, args.compileTaskName) 264 265 // Upload pretty-printed JSON metadata file to GCS. 266 jsonMetadata, err := json.MarshalIndent(metadata, "", " ") 267 if err != nil { 268 return skerr.Wrap(err) 269 } 270 if err = uploadFileToGCS(ctx, args.codesizeGCS, fmt.Sprintf("%s/%s.json", gcsDir, args.binaryName), jsonMetadata); err != nil { 271 return skerr.Wrap(err) 272 } 273 274 // Upload Bloaty diff output plain-text file to GCS. 275 if err = uploadFileToGCS(ctx, args.codesizeGCS, fmt.Sprintf("%s/%s.diff.txt", gcsDir, args.binaryName), []byte(bloatyDiffOutput)); err != nil { 276 return skerr.Wrap(err) 277 } 278 279 // Upload Bloaty output .tsv file to GCS. 280 // 281 // It is important that we upload the .tsv file last because the codesizeserver binary will 282 // only start processing the .json and .diff.txt files once it receives the Pub/Sub 283 // notification that a .tsv file has been uploaded. Pub/Sub notifications are pretty quick, so 284 // by uploading files in this order we avoid a race condition. 285 if err = uploadFileToGCS(ctx, args.codesizeGCS, fmt.Sprintf("%s/%s.tsv", gcsDir, args.binaryName), []byte(bloatyOutput)); err != nil { 286 return skerr.Wrap(err) 287 } 288 if args.repoState.IsTryJob() { 289 // Add VM and file diff results to the step data. This is consumed by the codesize plugin 290 // to display results on the Gerrit CL for tryjob runs. 291 vmDiff, fileDiff := parseBloatyDiffOutput(bloatyDiffOutput) 292 if vmDiff != "" && fileDiff != "" { 293 td.StepText(ctx, "VM Diff", vmDiff) 294 td.StepText(ctx, "File Diff", fileDiff) 295 } 296 297 // TODO(rmistry): Remove the below "Diff Bytes" section after the above 298 // works and is integrated with the codesize plugin. 299 s, err := os_steps.Stat(ctx, filepath.Join("build", args.binaryName+"_stripped")) 300 if err != nil { 301 return err 302 } 303 totalBytes := s.Size() 304 305 s, err = os_steps.Stat(ctx, filepath.Join("build_nopatch", args.binaryName+"_stripped")) 306 if err != nil { 307 return err 308 } 309 beforeBytes := s.Size() 310 311 diffBytes := totalBytes - beforeBytes 312 td.StepText(ctx, "Diff Bytes", strconv.FormatInt(diffBytes, 10)) 313 } else { 314 // Upload perf data for non-tryjob runs on status.skia.org. 315 perfData := format.Format{ 316 Version: 1, 317 GitHash: args.repoState.Revision, 318 Key: map[string]string{ 319 "binary": args.binaryName, 320 "compile_task_name": args.compileTaskName, 321 }, 322 Links: map[string]string{ 323 "full_data": taskdriverURL + args.taskID, 324 }, 325 } 326 if err = uploadPerfData(ctx, args.perfGCS, gcsDir, args.binaryName, args.taskID, perfData); err != nil { 327 return skerr.Wrap(err) 328 } 329 } 330 331 return nil 332} 333 334// parseBloatyDiffOutput parses bloaty output and returns the VM diff 335// and the file diff strings. 336// Example: for "...\n...\n+0.0% +832 TOTAL +848Ki +0.0%\n\n" we return 337// (+832, +848Ki). 338// If the output is not in expected format then we return empty strings. 339func parseBloatyDiffOutput(bloatyDiffOutput string) (string, string) { 340 tokens := strings.Split(strings.Trim(bloatyDiffOutput, "\n"), "\n") 341 if len(tokens) > 0 { 342 // Final line in bloaty output is the line with the results. 343 outputLine := tokens[len(tokens)-1] 344 words := strings.Fields(outputLine) 345 // Format is expected to look like this: 346 // +0.0% +832 TOTAL +848 +0.0% 347 if len(words) == 5 { 348 return words[1], words[3] 349 } 350 } 351 return "", "" 352} 353 354// runBloaty runs Bloaty against the given binary and returns the Bloaty output in TSV format and 355// the Bloaty command-line arguments used. It uses the strip command to strip out debug symbols, 356// so they do not inflate the file size numbers. 357func runBloaty(ctx context.Context, stripPath, bloatyPath, binaryName string) (string, []string, error) { 358 binaryWithSymbols := filepath.Join("build", binaryName) 359 binaryNoSymbols := filepath.Join("build", binaryName+"_stripped") 360 err := td.Do(ctx, td.Props("Create stripped version of binary"), func(ctx context.Context) error { 361 runCmd := &exec.Command{ 362 Name: "cp", 363 Args: []string{binaryWithSymbols, binaryNoSymbols}, 364 InheritEnv: true, 365 LogStdout: true, 366 LogStderr: true, 367 } 368 _, err := exec.RunCommand(ctx, runCmd) 369 if err != nil { 370 return skerr.Wrap(err) 371 } 372 runCmd = &exec.Command{ 373 Name: stripPath, 374 Args: []string{binaryNoSymbols}, 375 InheritEnv: true, 376 LogStdout: true, 377 LogStderr: true, 378 } 379 _, err = exec.RunCommand(ctx, runCmd) 380 if err != nil { 381 return skerr.Wrap(err) 382 } 383 runCmd = &exec.Command{ 384 Name: "ls", 385 Args: []string{"-al", "build"}, 386 InheritEnv: true, 387 LogStdout: true, 388 LogStderr: true, 389 } 390 _, err = exec.RunCommand(ctx, runCmd) 391 if err != nil { 392 return skerr.Wrap(err) 393 } 394 395 return nil 396 }) 397 if err != nil { 398 return "", nil, skerr.Wrap(err) 399 } 400 401 runCmd := &exec.Command{ 402 Name: bloatyPath, 403 Args: []string{ 404 binaryNoSymbols, 405 "-d", 406 "compileunits,symbols", 407 "-n", 408 "0", 409 "--tsv", 410 "--debug-file=" + binaryWithSymbols, 411 }, 412 InheritEnv: true, 413 LogStdout: true, 414 LogStderr: true, 415 } 416 417 var bloatyOutput string 418 419 if err := td.Do(ctx, td.Props(fmt.Sprintf("Run Bloaty against binary %q", binaryName)), func(ctx context.Context) error { 420 bloatyOutput, err = exec.RunCommand(ctx, runCmd) 421 return err 422 }); err != nil { 423 return "", nil, skerr.Wrap(err) 424 } 425 426 return bloatyOutput, runCmd.Args, nil 427} 428 429// runBloatyDiff invokes Bloaty to diff the given binary built at the current changelist/patchset 430// vs. at tip of tree, and returns the plain-text Bloaty output and the command-line arguments 431// used. Like before, it strips the debug symbols out before computing that diff. 432func runBloatyDiff(ctx context.Context, stripPath, bloatyPath, binaryName string) (string, []string, error) { 433 // These were created from the runBloaty step 434 binaryWithPatchWithSymbols := filepath.Join("build", binaryName) 435 binaryWithPatchWithNoSymbols := filepath.Join("build", binaryName+"_stripped") 436 // These will be created next 437 binaryWithNoPatchWithSymbols := filepath.Join("build_nopatch", binaryName) 438 binaryWithNoPatchWithNoSymbols := filepath.Join("build_nopatch", binaryName+"_stripped") 439 err := td.Do(ctx, td.Props("Create stripped version of no_patch binary"), func(ctx context.Context) error { 440 runCmd := &exec.Command{ 441 Name: "cp", 442 Args: []string{binaryWithNoPatchWithSymbols, binaryWithNoPatchWithNoSymbols}, 443 InheritEnv: true, 444 LogStdout: true, 445 LogStderr: true, 446 } 447 _, err := exec.RunCommand(ctx, runCmd) 448 if err != nil { 449 return skerr.Wrap(err) 450 } 451 runCmd = &exec.Command{ 452 Name: stripPath, 453 Args: []string{binaryWithNoPatchWithNoSymbols}, 454 InheritEnv: true, 455 LogStdout: true, 456 LogStderr: true, 457 } 458 _, err = exec.RunCommand(ctx, runCmd) 459 if err != nil { 460 return skerr.Wrap(err) 461 } 462 runCmd = &exec.Command{ 463 Name: "ls", 464 Args: []string{"-al", "build_nopatch"}, 465 InheritEnv: true, 466 LogStdout: true, 467 LogStderr: true, 468 } 469 _, err = exec.RunCommand(ctx, runCmd) 470 return err 471 }) 472 if err != nil { 473 return "", nil, skerr.Wrap(err) 474 } 475 476 runCmd := &exec.Command{ 477 Name: bloatyPath, 478 Args: []string{ 479 binaryWithPatchWithNoSymbols, 480 "--debug-file=" + binaryWithPatchWithSymbols, 481 "-d", "symbols", "-n", "0", "-s", "file", 482 "--", 483 binaryWithNoPatchWithNoSymbols, 484 "--debug-file=" + binaryWithNoPatchWithSymbols, 485 }, 486 InheritEnv: true, 487 LogStdout: true, 488 LogStderr: true, 489 } 490 491 var bloatyOutput string 492 if err := td.Do(ctx, td.Props(fmt.Sprintf("Run Bloaty diff against binary %q", binaryName)), func(ctx context.Context) error { 493 bloatyOutput, err = exec.RunCommand(ctx, runCmd) 494 return err 495 }); err != nil { 496 return "", nil, skerr.Wrap(err) 497 } 498 499 return bloatyOutput, runCmd.Args, nil 500} 501 502// computeTargetGCSDirectory computes the target GCS directory where to upload the Bloaty output file 503// and JSON metadata file. 504func computeTargetGCSDirectory(ctx context.Context, repoState types.RepoState, taskID, compileTaskName string) string { 505 timePrefix := now.Now(ctx).UTC().Format("2006/01/02/15") // YYYY/MM/DD/HH. 506 if repoState.IsTryJob() { 507 // Example: 2022/01/31/01/tryjob/12345/3/CkPp9ElAaEXyYWNHpXHU/Build-Debian10-Clang-x86_64-Release 508 return fmt.Sprintf("%s/tryjob/%s/%s/%s/%s", timePrefix, repoState.Patch.Issue, repoState.Patch.Patchset, taskID, compileTaskName) 509 } else { 510 // Example: 2022/01/31/01/033ccea12c0949d0f712471bfcb4ed6daf69aaff/Build-Debian10-Clang-x86_64-Release 511 return fmt.Sprintf("%s/%s/%s", timePrefix, repoState.Revision, compileTaskName) 512 } 513} 514 515// uploadPerfData gets the file size of the stripped binary (i.e. without debug symbols), formats 516// the JSON how Perf expects it, and uploads it to Perf's GCS bucket. 517func uploadPerfData(ctx context.Context, perfGCS gcs.GCSClient, gcsPathPrefix, binaryName, taskID string, perfData format.Format) error { 518 // Use the taskID to guarantee unique file ids 519 gcsPath := "nano-json-v1/" + gcsPathPrefix + "/codesize_" + taskID + ".json" 520 521 err := td.Do(ctx, td.Props("Upload total stripped binary size to Perf"), func(ctx context.Context) error { 522 s, err := os_steps.Stat(ctx, filepath.Join("build", binaryName+"_stripped")) 523 if err != nil { 524 return err 525 } 526 totalBytes := s.Size() 527 528 s, err = os_steps.Stat(ctx, filepath.Join("build_nopatch", binaryName+"_stripped")) 529 if err != nil { 530 return err 531 } 532 beforeBytes := s.Size() 533 534 perfData.Results = []format.Result{{ 535 Key: map[string]string{"measurement": "stripped_binary_bytes"}, 536 Measurement: float32(totalBytes), 537 }, { 538 Key: map[string]string{"measurement": "stripped_diff_bytes"}, 539 Measurement: float32(totalBytes - beforeBytes), 540 }} 541 542 perfJSON, err := json.MarshalIndent(perfData, "", " ") 543 if err != nil { 544 return err 545 } 546 return uploadFileToGCS(ctx, perfGCS, gcsPath, perfJSON) 547 }) 548 if err != nil { 549 return skerr.Wrap(err) 550 } 551 return nil 552} 553 554// uploadFileToGCS uploads a file to the given GCS bucket. 555func uploadFileToGCS(ctx context.Context, gcsClient gcs.GCSClient, path string, contents []byte) error { 556 gcsURL := fmt.Sprintf("gs://%s/%s", gcsClient.Bucket(), path) 557 return td.Do(ctx, td.Props(fmt.Sprintf("Upload %s", gcsURL)), func(ctx context.Context) error { 558 if err := gcsClient.SetFileContents(ctx, path, gcs.FILE_WRITE_OPTS_TEXT, contents); err != nil { 559 return skerr.Wrapf(err, "Could not write task to %s", gcsURL) 560 } 561 return nil 562 }) 563} 564