xref: /aosp_15_r20/external/skia/tools/testrunners/common/android/adb_test_runner/adb_test_runner.go (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1// Copyright 2023 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This program is the brains behind the adb_test Bazel rule, which allows running a test on an
6// Android device via adb.
7
8package main
9
10import (
11	"bytes"
12	"context"
13	"flag"
14	"fmt"
15	"os"
16	"path/filepath"
17	"regexp"
18	"sort"
19	"strconv"
20	"strings"
21	"time"
22
23	"go.skia.org/infra/go/exec"
24
25	"go.skia.org/skia/bazel/device_specific_configs"
26	"golang.org/x/exp/slices"
27)
28
29const (
30	// timeout for this program.
31	timeout = time.Hour
32
33	// adbTestOutputDirEnvVar is the environment variable that tells the test running on device where
34	// to write output files, if any.
35	adbTestOutputDirEnvVar = "ADB_TEST_OUTPUT_DIR"
36)
37
38func main() {
39	deviceSpecificBazelConfigFlag := flag.String("device-specific-bazel-config", "", "The Bazel config corresponding to this Android device (see //bazel/devicesrc).")
40	benchmarkFlag := flag.Bool("benchmark", false, "Whether this is a benchmark test or not. The Android device will be tuned to reduce variations in performance for single-threaded tests.")
41	archiveFlag := flag.String("archive", "", "Tarball with the payload to upload to the device under test.")
42	testRunnerFlag := flag.String("test-runner", "", "Path to the test runner inside the tarball.")
43	// Some context regarding the parsing step mentioned in this flag's help text:
44	//
45	//  - The adb_test Bazel rule produces a Bash script that invokes this Go program with various
46	//    flags. These flags can be divided into two groups: those that are determined when the
47	//    adb_test target is built, which are hardcoded in the script; and those that are determined
48	//    at runtime, which the script should set based on its own command-line arguments.
49	//
50	//  - The only two flags determined at runtime are --device-specific-bazel-config and
51	//    --test-runner-extra-args. The first should be set with the value of the
52	//    --device-specific-bazel-config flag passed to the script, while the second should be set to
53	//    a sepace-separated string with any other command-line arguments passed to the script.
54	//
55	//  - Ideally, we would want the script to parse its own command-line arguments and set
56	//    --device-specific-bazel-config and --test-runner-extra-args as described in the previous
57	//    paragraph. However, parsing flags in Bash is awkward, and the resulting logic is hard to
58	//    test.
59	//
60	//  - Instead, the script simply sets flag --test-runner-extra-args to a space-separated string
61	//    with all command-line arguments it receives, and this Go program parses out flag
62	//    --device-specific-bazel-config from said space-separated string.
63	testRunnerExtraArgsFlag := flag.String("test-runner-extra-args", "", "Any extra command-line arguments to pass to the test runner inside the tarball. Note that if this string contains a --device-specific-bazel-config=<config name> flag, it will be omitted from the test runner's arguments, and <config name> will override this program's --device-specific-bazel-config flag.")
64	outputDirFlag := flag.String("output-dir", "", "Path on the host machine where to write any outputs produced by the test.")
65	flag.Parse()
66
67	var quotedArgs []string
68	for _, arg := range os.Args[1:] {
69		quotedArgs = append(quotedArgs, fmt.Sprintf("%q", arg))
70	}
71	log("adb_test_runner invoked with arguments: %s", strings.Join(quotedArgs, " "))
72
73	testRunnerExtraArgs, deviceSpecificBazelConfigName := parseTestRunnerExtraArgsFlag(*testRunnerExtraArgsFlag)
74	if deviceSpecificBazelConfigName != "" {
75		deviceSpecificBazelConfigFlag = &deviceSpecificBazelConfigName
76	}
77
78	die := func(msg string, a ...interface{}) {
79		printToStdErr(msg, a...)
80		os.Exit(1)
81	}
82
83	if *deviceSpecificBazelConfigFlag == "" {
84		die("Flag --device-specific-bazel-config is required.\n")
85	}
86	if *archiveFlag == "" {
87		die("Flag --archive is required.\n")
88	}
89	if *testRunnerFlag == "" {
90		die("Flag --test-runner is required.\n")
91	}
92
93	// Fail early if the output directory on the host machine is not empty or if it's non-writable.
94	if *outputDirFlag != "" {
95		// Check whether the directory exists.
96		fileInfo, err := os.Stat(*outputDirFlag)
97		if err != nil {
98			die("while stating output dir %q: %s\n", *outputDirFlag, err)
99		}
100		if !fileInfo.IsDir() {
101			die("output dir %q is not a directory.\n", *outputDirFlag)
102		}
103
104		// Check whether the directory is empty.
105		entries, err := os.ReadDir(*outputDirFlag)
106		if err != nil {
107			die("while listing the contents of output dir %q: %s\n", *outputDirFlag, err)
108		}
109		if len(entries) != 0 {
110			die("output dir %q is not empty.\n", *outputDirFlag)
111		}
112
113		// Check whether the directory is writable by creating and then removing an empty file.
114		testFile := filepath.Join(*outputDirFlag, "test")
115		if err := os.WriteFile(testFile, []byte{}, 0644); err != nil {
116			die("while writing test file %q in output dir: %s\n", testFile, err)
117		}
118		if err := os.Remove(testFile); err != nil {
119			die("while deleting test file %q in output dir: %s\n", testFile, err)
120		}
121	}
122
123	deviceSpecificBazelConfig, ok := device_specific_configs.Configs[*deviceSpecificBazelConfigFlag]
124	if !ok {
125		die("Unknown device-specific Bazel config: %q\n", *deviceSpecificBazelConfigFlag)
126	}
127
128	ctx, cancelFn := context.WithTimeout(context.Background(), timeout)
129	defer cancelFn()
130	if err := runTest(ctx, deviceSpecificBazelConfig.Model(), *benchmarkFlag, *archiveFlag, *testRunnerFlag, testRunnerExtraArgs, *outputDirFlag); err != nil {
131		die("%s\n", err)
132	}
133}
134
135var deviceSpecificBazelConfigFlagRegexp = regexp.MustCompile(`\s*--device-specific-bazel-config(?:=|\s+)(?P<configName>[a-zA-Z0-9_-]+)\s*`)
136
137// parseTestRunnerExtraArgsFlag takes the raw --test-runner-extra-args flag, which might contain a
138// --device-specific-bazel-config=<config name> argument, and returns the former without the latter
139// and the <config name>.
140func parseTestRunnerExtraArgsFlag(rawTestRunnerExtraArgsFlag string) (testRunnerExtraArgs string, deviceSpecificBazelConfig string) {
141	match := deviceSpecificBazelConfigFlagRegexp.FindStringSubmatch(rawTestRunnerExtraArgsFlag)
142	if len(match) > 0 {
143		deviceSpecificBazelConfig = match[deviceSpecificBazelConfigFlagRegexp.SubexpIndex("configName")]
144		testRunnerExtraArgs = strings.ReplaceAll(rawTestRunnerExtraArgsFlag, match[0], " ")
145	} else {
146		testRunnerExtraArgs = rawTestRunnerExtraArgsFlag
147	}
148	return
149}
150
151// runTest runs the test on device via adb.
152func runTest(ctx context.Context, model string, isBenchmarkTest bool, archive, testRunner, testRunnerExtraArgs, outputDir string) error {
153	// TODO(lovisolo): Should we check that the machine is attached to the expected device type?
154	//                 E.g. run "adb devices -l" and check that the output contains
155	//                 "model:Pixel_5". What happens if there are more than one device?
156
157	// Clean up the device before running the test. Previous tests might have left the device in a
158	// dirty state.
159	cleanUpDevice := func(model string) error {
160		_, err := adb(ctx, "shell", "su", "root", "rm", "-rf", getArchivePathOnDevice(model), getArchiveExtractionDirOnDevice(model), getOutputDirOnDevice(model))
161		return err
162	}
163	if err := cleanUpDevice(model); err != nil {
164		return fmt.Errorf("while cleaning up the device before running the test: %s", err)
165	}
166
167	// Also clean up device after running the test.
168	defer func() {
169		if err := cleanUpDevice(model); err != nil {
170			printToStdErr("while cleaning up the device after running the test: %s\n", err)
171		}
172	}()
173
174	// Reset the device after running the test.
175	//
176	// Based on
177	// https://skia.googlesource.com/skia/+/0e8023dc0a1a5655703b39454c090b5a004415d6/infra/bots/recipe_modules/flavor/android.py#512.
178	//
179	// Note that android.py (see link above) used to set to quarantine the Raspberry Pi in case of
180	// "infra failures", but we do not do that here because that is incompatible with the use case
181	// where a developer runs an Android test on their local workstation.
182	defer func() {
183		if _, err := adb(ctx, "reboot"); err != nil {
184			printToStdErr("while executing \"adb reboot\": %s", err)
185			return
186		}
187
188		if _, err := adb(ctx, "wait-for-device"); err != nil {
189			printToStdErr("while executing \"adb wait-for-device\": %s", err)
190		}
191	}()
192
193	// Turn CPU cores on/off, set CPU core frequencies, etc.
194	if err := scaleDevice(ctx, model, isBenchmarkTest); err != nil {
195		return fmt.Errorf("while scaling device: %s", err)
196	}
197
198	// Upload archive to device.
199	if _, err := adb(ctx, "push", archive, getArchivePathOnDevice(model)); err != nil {
200		return fmt.Errorf("while pushing archive to device: %s", err)
201	}
202
203	// Extract archive.
204	if _, err := adb(ctx, "shell", "su", "root", "mkdir", "-p", getArchiveExtractionDirOnDevice(model)); err != nil {
205		return fmt.Errorf("while creating archive extraction directory on device: %s", err)
206	}
207	if _, err := adb(ctx, "shell", "su", "root", "tar", "xzvf", getArchivePathOnDevice(model), "-C", getArchiveExtractionDirOnDevice(model)); err != nil {
208		return fmt.Errorf("while extracting archive on device: %s", err)
209	}
210
211	// Create on-device output dir if necessary.
212	if outputDir != "" {
213		if _, err := adb(ctx, "shell", "su", "root", "mkdir", "-p", getOutputDirOnDevice(model)); err != nil {
214			return fmt.Errorf("while creating output dir on device: %s", err)
215		}
216	}
217
218	// If necessary, we will tell the test runner where to store output files via an environment
219	// variable.
220	outputDirEnvVar := ""
221	if outputDir != "" {
222		outputDirEnvVar = fmt.Sprintf("%s=%s", adbTestOutputDirEnvVar, getOutputDirOnDevice(model))
223	}
224
225	// Run test.
226	stdin := fmt.Sprintf("cd %s && %s %s %s", getArchiveExtractionDirOnDevice(model), outputDirEnvVar, testRunner, testRunnerExtraArgs)
227	if _, err := adbWithStdin(ctx, stdin, "shell", "su", "root"); err != nil {
228		return fmt.Errorf("while running the test: %s", err)
229	}
230
231	// Pull output files from the device if necessary.
232	if outputDir != "" {
233		// This will save the output files to <output dir>/<output dir on device>.
234		if _, err := adb(ctx, "pull", getOutputDirOnDevice(model), outputDir); err != nil {
235			return fmt.Errorf("while pulling on-device output dir %q into host output dir %q: %s", getOutputDirOnDevice(model), outputDir, err)
236		}
237
238		// But we want the output files to be placed in <output dir>, so we'll move them one by one.
239		srcDir := filepath.Join(outputDir, filepath.Base(getOutputDirOnDevice(model)))
240		dstDir := outputDir
241		entries, err := os.ReadDir(srcDir)
242		if err != nil {
243			return fmt.Errorf("while reading the contents of output dir %q: %s", outputDir, err)
244		}
245		for _, entry := range entries {
246			oldPath := filepath.Join(srcDir, entry.Name())
247			newPath := filepath.Join(dstDir, entry.Name())
248			if err := os.Rename(oldPath, newPath); err != nil {
249				return fmt.Errorf("while renaming %q to %q: %s", oldPath, newPath, err)
250			}
251		}
252
253		// Finally, delete the spurious <output dir>/<output dir on device> directory created by
254		// "adb pull".
255		if err := os.Remove(srcDir); err != nil {
256			return fmt.Errorf("while removing directory %q: %s", srcDir, err)
257		}
258	}
259
260	return nil
261}
262
263// scaleDevice scales the CPU of the device as required based on the type of test.
264func scaleDevice(ctx context.Context, model string, isBenchmarkTest bool) error {
265	// Based on
266	// https://skia.googlesource.com/skia/+/5a635f2211ceb7639ceca4200e0094a6ca17111b/infra/bots/recipe_modules/flavor/android.py#151
267	// and
268	// https://skia.googlesource.com/skia/+/5a635f2211ceb7639ceca4200e0094a6ca17111b/infra/bots/recipe_modules/flavor/android.py#179.
269	if doesNotAllowADBRoot(model) {
270		return nil
271	}
272
273	if err := adbRoot(ctx); err != nil {
274		return fmt.Errorf("while executing \"adb root\": %s", err)
275	}
276
277	if isBenchmarkTest {
278		return scaleDeviceForBenchmark(ctx, model)
279	}
280	return scaleDeviceForPerformance(ctx, model)
281}
282
283// scaleDeviceForPerformance tunes the device's CPUs for performance in order to make tests run as
284// fast as possible. Do not use this function for benchmark tests, as it does not guarantee stable
285// performance over successive runs.
286//
287// Based on
288// https://skia.googlesource.com/skia/+/0e8023dc0a1a5655703b39454c090b5a004415d6/infra/bots/recipe_modules/flavor/android.py#149.
289func scaleDeviceForPerformance(ctx context.Context, model string) error {
290	// This is paranoia... any CPUs we disabled while running benchmark tests ought to be back online
291	// now that we've restarted the device.
292	for _, cpu := range getCPUsToDisableForBenchmarkTests(model) {
293		if err := enableOrDisableCPU(ctx, model, cpu, true /* =enable */); err != nil {
294			return err
295		}
296	}
297
298	// CPU cores are grouped together by kind. Scaling a single core of the biggest kind has the
299	// effect of scaling all cores of that kind.
300	cpusToScale := []int{getBiggestKindCPU(model)}
301	// For big.LITTLE devices, make sure we also scale the little cores up; there is a chance they
302	// are still in powersave mode from when Swarming slows things down for cooling down and
303	// charging.
304	if !slices.Contains(cpusToScale, 0) {
305		cpusToScale = append(cpusToScale, 0)
306	}
307
308	for _, cpu := range cpusToScale {
309		if err := setCPUGovernor(ctx, model, cpu, getCPUGovernorForPerformanceTests(model)); err != nil {
310			return err
311		}
312	}
313
314	return nil
315}
316
317// scaleDeviceForBenchmark tunes the device's CPUs for single-threaded tests, such as benchmark
318// tests. It tries to minimize variations in the performance over successive test runs.
319//
320// Based on
321// https://skia.googlesource.com/skia/+/0e8023dc0a1a5655703b39454c090b5a004415d6/infra/bots/recipe_modules/flavor/android.py#177.
322func scaleDeviceForBenchmark(ctx context.Context, model string) error {
323	if err := setCPUGovernor(ctx, model, getBiggestKindCPU(model), getCPUGovernorForBenchmarkTests(model)); err != nil {
324		return err
325	}
326
327	if model != "Pixel6" && model != "Pixel7" {
328		// CPU cores are grouped together by kind. Scaling a single core of the biggest kind has the
329		// effect of scaling all cores of that kind.
330		if err := scaleCPU(ctx, model, getBiggestKindCPU(model), 0.6); err != nil {
331			return err
332		}
333	}
334
335	for _, cpu := range getCPUsToDisableForBenchmarkTests(model) {
336		if err := enableOrDisableCPU(ctx, model, cpu, false /* =enable */); err != nil {
337			return err
338		}
339	}
340
341	return nil
342}
343
344// doesNotAllowADBRoot returns true if the ADB daemon (adbd) cannot be restarted as the root user
345// on the device (e.g. via "adb root").
346//
347// Based on
348// https://skia.googlesource.com/skia/+/0e8023dc0a1a5655703b39454c090b5a004415d6/infra/bots/recipe_modules/flavor/android.py#45.
349func doesNotAllowADBRoot(model string) bool {
350	return map[string]bool{
351		"GalaxyS7_G930FD": true,
352		"GalaxyS9":        true,
353		"GalaxyS20":       true,
354		"P30":             true,
355		"Pixel4":          true,
356		"Pixel4XL":        true,
357		"JioNext":         true,
358
359		// The below device is listed in the "cant_root" list in
360		// https://skia.googlesource.com/skia/+/0e8023dc0a1a5655703b39454c090b5a004415d6/infra/bots/recipe_modules/flavor/android.py#45.
361		// However, lovisolo@ did not encounter any issues running "adb root", so we are excluding it
362		// from the list to see if anything breaks.
363		//
364		// "Pixel5":          true,
365	}[model]
366}
367
368// getBiggestKindCPU returns the ID of a CPU core of the biggest kind.
369//
370// The reason why this function only returns one CPU ID is that CPUs are grouped together, so it
371// suffices to scale one CPU in a group in order to scale them all. As an example, the Nexus 5x has
372// a big.LITTLE CPU; it groups the little cores as cpu0-3 and the big cores as cpu4-5. Thus, for
373// single-threaded tests such as benchmark tests it makes sense to disable cpu0-3 (i.e. the little
374// cores) and scale up just one of the big cores, e.g. cpu4, which has the effect of scaling up all
375// big cores (cpu4-5) at the same frequency.
376//
377// Based on
378// https://skia.googlesource.com/skia/+/5a635f2211ceb7639ceca4200e0094a6ca17111b/infra/bots/recipe_modules/flavor/android.py#58.
379func getBiggestKindCPU(model string) int {
380	// We return 0 if the model is not in the below map, meaning that we assume 0 is the ID of a core
381	// of the biggest kind.
382	return map[string]int{
383		"Nexus5x":  4,
384		"Pixel":    2,
385		"Pixel2XL": 4,
386	}[model]
387}
388
389// Returns the Android device's CPU IDs that should be disabled when running single-threaded tests
390// such as benchmark tests.
391//
392// For devices with multiple kinds of cores, such as big.LITTLE cores, we noticed a lot of noise
393// that seems to be caused by alternating between the slow and fast cores. We reduce this noise by
394// only enabling cores of a given kind.
395//
396// Based on
397// https://skia.googlesource.com/skia/+/5a635f2211ceb7639ceca4200e0094a6ca17111b/infra/bots/recipe_modules/flavor/android.py#70.
398func getCPUsToDisableForBenchmarkTests(model string) []int {
399	// We return an empty slice if the model is not in the below map.
400	return map[string][]int{
401		"Nexus5x":  {0, 1, 2, 3},
402		"Pixel":    {0, 1},
403		"Pixel2XL": {0, 1, 2, 3},
404		"Pixel6":   {4, 5, 6, 7}, // Only use the 4 small cores.
405		"Pixel7":   {4, 5, 6, 7},
406	}[model]
407}
408
409// getCPUGovernorForPerformanceTests returns the CPU governor that should be used for performance
410// tests.
411func getCPUGovernorForPerformanceTests(model string) string {
412	switch model {
413	// AndroidOne doesn't support the "ondemand" governor, but "hotplug" is similar.
414	case "AndroidOne":
415		return "hotplug"
416
417	// Pixel3a/4/4a support the "userspace", "powersave", "performance" and "schedutil"
418	// governors. The "performance" governor seems like a reasonable choice.
419	case "Pixel3a":
420		fallthrough
421	case "Pixel4":
422		fallthrough
423	case "Pixel4a":
424		fallthrough
425	case "Pixel5":
426		fallthrough
427	case "Wembley":
428		fallthrough
429	case "Pixel6":
430		fallthrough
431	case "Pixel7":
432		return "performance"
433
434	default:
435		return "ondemand"
436	}
437}
438
439// getCPUGovernorForBenchmarkTests returns the CPU governor that should be used for benchmark
440// tests.
441func getCPUGovernorForBenchmarkTests(model string) string {
442	switch model {
443	// Pixel 6 and 7 use the "powersave" CPU governor. This decision was originally made for Pixel 6
444	// in
445	// https://skia-review.googlesource.com/c/skia/+/500439/14/infra/bots/recipe_modules/flavor/android.py#157,
446	// and jcgregorio@ seems to remember that the "userspace" governor did not work for Pixel 6 for
447	// some unknown reason.
448	case "Pixel6":
449		fallthrough
450	case "Pixel7":
451		return "powersave"
452
453	default:
454		return "userspace"
455	}
456}
457
458// maxAttempts is the number of maximum attempts performed by withRetry.
459const maxAttempts = 3
460
461// withRetry runs the given function up to maxAttempts times until it succeeds. It performs device
462// recovery steps between failures. If the function fails maxAttempts times, it returns the error
463// returned by the last function invocation.
464//
465// Based on
466// https://skia.googlesource.com/skia/+/0e8023dc0a1a5655703b39454c090b5a004415d6/infra/bots/recipe_modules/run/api.py#81.
467func withRetry(ctx context.Context, model, description string, fn func() error) error {
468	var err error
469
470	for attempt := 1; attempt <= maxAttempts; attempt++ {
471		log("Attempt %d/%d: %s", attempt, maxAttempts, description)
472
473		err = fn()
474		if err != nil {
475			log("Attempt %d/%d failed with: %s", attempt, maxAttempts, err)
476
477			if attempt == maxAttempts {
478				break
479			}
480
481			log("Rebooting device and waiting for it to be ready...")
482			if err := rebootAndWaitForDevice(ctx, model); err != nil {
483				return fmt.Errorf("while rebooting and waiting for device: %s", err)
484			}
485		} else {
486			log("Attempt %d/%d was successful.", attempt, maxAttempts)
487			return nil
488		}
489	}
490
491	return fmt.Errorf("last attempt at %q failed with: %s", description, err)
492}
493
494// rebootAndWaitForDevice reboots the device after a failed attempt at performing an action, and
495// waits for it to become ready.
496//
497// Based on https://skia-review.googlesource.com/c/skia/+/631997.
498func rebootAndWaitForDevice(ctx context.Context, model string) error {
499	// We used to run "adb kill-server" in android.py:
500	// https://skia.googlesource.com/skia/+/0e8023dc0a1a5655703b39454c090b5a004415d6/infra/bots/recipe_modules/flavor/android.py#87.
501	// However, this breaks the use case when we are talking to an ADB server port-forwarded from
502	// another machine (e.g. a Skolo Raspberry Pi). Specifically, any subsequent "adb" commands fail
503	// with "Connection reset by peer" because there is no ADB server listening on the other side of
504	// the port-forward.
505	//
506	// Thus, we use "adb reconnect" instead, which does not kill the ADB server and causes it to
507	// reconnect with the device without killing the server. It is unclear to lovisolo@ why the
508	// "adb kill-server" step was necessary in the first place, so it might be a good idea to remove
509	// this step in the future and see if things still work without it.
510	if _, err := adb(ctx, "reconnect"); err != nil {
511		return fmt.Errorf("while executing \"adb reconnect\": %s", err)
512	}
513
514	if _, err := adb(ctx, "wait-for-device"); err != nil {
515		return fmt.Errorf("while executing \"adb wait-for-device\": %s", err)
516	}
517
518	if _, err := adb(ctx, "reboot"); err != nil {
519		return fmt.Errorf("while executing \"adb reboot\": %s", err)
520	}
521
522	// Wait until the boot is actually complete. See https://android.stackexchange.com/a/164050.
523	waitForBootComplete := "while [[ -z $(getprop sys.boot_completed) ]]; do sleep 1; done"
524	if _, err := adb(ctx, "wait-for-device", "shell", waitForBootComplete); err != nil {
525		return fmt.Errorf("while executing \"adb wait-for-device shell %q\": %s", waitForBootComplete, err)
526	}
527
528	if doesNotAllowADBRoot(model) {
529		return nil
530	}
531
532	if err := adbRoot(ctx); err != nil {
533		return fmt.Errorf("while executing \"adb root\": %s", err)
534	}
535
536	return nil
537}
538
539// adbRoot runs the "adb root" command and checks for errors.
540//
541// Based on
542// https://skia.googlesource.com/skia/+/0e8023dc0a1a5655703b39454c090b5a004415d6/infra/bots/recipe_modules/flavor/android.py#212.
543func adbRoot(ctx context.Context) error {
544	if output, err := adb(ctx, "root"); err != nil {
545		return err
546	} else if strings.Contains(output, "cannot") {
547		// Check for message like "adbd cannot run as root in production builds".
548		return fmt.Errorf("output of \"adb root\" contains the word \"cannot\"; full output: %q", output)
549	}
550	return nil
551}
552
553// setCPUGovernor sets the CPU governor of the given CPU.
554//
555// Based on
556// https://skia.googlesource.com/skia/+/5a635f2211ceb7639ceca4200e0094a6ca17111b/infra/bots/recipe_modules/flavor/android.py#251.
557func setCPUGovernor(ctx context.Context, model string, cpu int, governor string) error {
558	return withRetry(ctx, model, fmt.Sprintf("Setting CPU %d governor to %q", cpu, governor), func() error {
559		return writeFileOnDeviceAndAssertContents(ctx, fmt.Sprintf("/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", cpu), governor)
560	})
561}
562
563// scaleCPU sets the frequency of the given CPU.
564//
565// Based on
566// https://skia.googlesource.com/skia/+/5a635f2211ceb7639ceca4200e0094a6ca17111b/infra/bots/recipe_modules/flavor/android.py#337.
567func scaleCPU(ctx context.Context, model string, cpu int, maxFreqFactor float64) error {
568	availableFreqs, err := getAvailableCPUFrequencies(ctx, cpu)
569	if err != nil {
570		return fmt.Errorf("while querying available CPU frequencies: %s", err)
571	}
572
573	// Find an available frequency that is close enough to the target frequency.
574	//
575	// Based on
576	// https://skia.googlesource.com/skia/+/0e8023dc0a1a5655703b39454c090b5a004415d6/infra/bots/recipe_modules/flavor/android.py#374.
577	maxFreq := availableFreqs[len(availableFreqs)-1]
578	targetFreq := int64(float64(maxFreq) * maxFreqFactor)
579	chosenFreq := maxFreq
580	for i := len(availableFreqs) - 1; i >= 0; i-- {
581		candidateFreq := availableFreqs[i]
582		if candidateFreq <= targetFreq {
583			chosenFreq = candidateFreq
584			break
585		}
586	}
587
588	// We will try scaling the CPU multiple times. Some devices, especially Nexus 7s, seem to
589	// occassionally fail when setting the CPU frequency.
590	//
591	// See https://skia-review.googlesource.com/c/skia/+/78140.
592	return withRetry(ctx, model, fmt.Sprintf("Setting CPU ID %d frequency to %d", cpu, chosenFreq), func() error {
593		chosenFreqAsStr := fmt.Sprintf("%d", chosenFreq)
594
595		// Scale the CPU to the chosen frequency.
596		//
597		// If scaling_max_freq is lower than our chosen frequency, it won't take. We must set
598		// scaling_min_freq first, because if we try to set scaling_max_freq to be less than
599		// scaling_min_freq (which sometimes happens after certain devices reboot) it returns a
600		// perplexing permissions error.
601		if err := writeFileOnDevice(ctx, fmt.Sprintf("/sys/devices/system/cpu/cpu%d/cpufreq/scaling_min_freq", cpu), "0"); err != nil {
602			return err
603		}
604		if err := writeFileOnDevice(ctx, fmt.Sprintf("/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq", cpu), chosenFreqAsStr); err != nil {
605			return err
606		}
607		if err := writeFileOnDevice(ctx, fmt.Sprintf("/sys/devices/system/cpu/cpu%d/cpufreq/scaling_setspeed", cpu), chosenFreqAsStr); err != nil {
608			return err
609		}
610
611		// Wait for settings to take effect. See https://skia-review.googlesource.com/c/skia/+/78140.
612		log("Sleeping for 5 seconds before checking whether CPU frequency change took effect...")
613		time.Sleep(5 * time.Second)
614
615		// Check that the frequency change took effect.
616		return assertContentsOfFileOnDevice(ctx, fmt.Sprintf("/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq", cpu), chosenFreqAsStr)
617	})
618}
619
620// getAvailableCPUFrequencies returns the list of available frequencies for a given CPU ID.
621//
622// Based on
623// https://skia.googlesource.com/skia/+/0e8023dc0a1a5655703b39454c090b5a004415d6/infra/bots/recipe_modules/flavor/android.py#362
624// to
625// https://skia.googlesource.com/skia/+/0e8023dc0a1a5655703b39454c090b5a004415d6/infra/bots/recipe_modules/flavor/android.py#372.
626func getAvailableCPUFrequencies(ctx context.Context, cpu int) ([]int64, error) {
627	// All devices we test on give a list of their available frequencies.
628	scalingAvailableFrequenciesFile := fmt.Sprintf("/sys/devices/system/cpu/cpu%d/cpufreq/scaling_available_frequencies", cpu)
629
630	output, err := readFileOnDevice(ctx, scalingAvailableFrequenciesFile)
631	if err != nil {
632		return nil, err
633	}
634
635	if len(output) == 0 {
636		return nil, fmt.Errorf("file %s is empty", scalingAvailableFrequenciesFile)
637	}
638
639	// Check for errors like "/system/bin/sh: file not found".
640	if strings.Contains(output, "/system/bin/sh") {
641		return nil, fmt.Errorf("unrecognized file %s contents: %q", scalingAvailableFrequenciesFile, output)
642	}
643
644	// Parse available frequencies.
645	var availableFreqs []int64
646	for _, freqAsString := range strings.Split(strings.TrimSpace(output), " ") {
647		freq, err := strconv.ParseInt(freqAsString, 10, 64)
648		if err != nil {
649			return nil, fmt.Errorf("while parsing frequency %q: %s", freqAsString, err)
650		}
651		availableFreqs = append(availableFreqs, freq)
652	}
653	sort.Slice(availableFreqs, func(i, j int) bool { return availableFreqs[i] < availableFreqs[j] })
654
655	return availableFreqs, nil
656}
657
658// enableOrDisableCPU enables or disables the given CPU ID.
659//
660// Based on
661// https://skia.googlesource.com/skia/+/0e8023dc0a1a5655703b39454c090b5a004415d6/infra/bots/recipe_modules/flavor/android.py#287.
662func enableOrDisableCPU(ctx context.Context, model string, cpu int, enable bool) error {
663	targetState := "0"
664	msg := fmt.Sprintf("Disabling CPU %d", cpu)
665	if enable {
666		targetState = "1"
667		msg = fmt.Sprintf("Enabling CPU %d", cpu)
668	}
669
670	return withRetry(ctx, model, msg, func() error {
671		// Is the CPU already at the target state?
672		//
673		// ADB returns exit code 1 if we try to echo "1" to a CPU that's already online.
674		if output, err := readFileOnDevice(ctx, fmt.Sprintf("/sys/devices/system/cpu/cpu%d/online", cpu)); err != nil {
675			return fmt.Errorf("while checking whether CPU %d is online: %s", cpu, err)
676		} else if output == targetState {
677			log("CPU %d is already at the desired state.", cpu)
678			return nil
679		}
680
681		// Change the CPU state.
682		return writeFileOnDeviceAndAssertContents(ctx, fmt.Sprintf("/sys/devices/system/cpu/cpu%d/online", cpu), targetState)
683	})
684}
685
686// writeFileOnDevice writes a file on the device using an "echo contents > path" command executed
687// via "adb shell".
688func writeFileOnDevice(ctx context.Context, path, contents string) error {
689	if _, err := adb(ctx, "shell", fmt.Sprintf("echo %q > %s", contents, path)); err != nil {
690		return fmt.Errorf("while writing %s: %s", path, err)
691	}
692	return nil
693}
694
695// readFileOnDevice reads the contents of a file on the device.
696func readFileOnDevice(ctx context.Context, path string) (string, error) {
697	contents, err := adb(ctx, "shell", "cat "+path)
698	if err != nil {
699		return "", fmt.Errorf("while reading file %s: %s", path, err)
700	}
701	return contents, nil
702}
703
704// assertContentsOfFileOnDevice asserts that a file on the device has the expected contents. Both
705// the expected and actual contents are trimmed of leading and trailing spaces prior to comparing
706// them.
707func assertContentsOfFileOnDevice(ctx context.Context, path, expectedContents string) error {
708	actualContents, err := readFileOnDevice(ctx, path)
709	if err != nil {
710		return err
711	}
712	actualContents = strings.TrimSpace(actualContents)
713	expectedContents = strings.TrimSpace(expectedContents)
714	if actualContents != expectedContents {
715		return fmt.Errorf("file %s does not have the expected contents; expected %q, got %q (leading/trailing spaces trimmed)", path, expectedContents, actualContents)
716	}
717	return nil
718}
719
720// writeFileOnDeviceAndAssertContents combines writeFileOnDevice and assertContentsOfFileOnDevice
721// in a single function.
722func writeFileOnDeviceAndAssertContents(ctx context.Context, path, contents string) error {
723	if err := writeFileOnDevice(ctx, path, contents); err != nil {
724		return err
725	}
726	return assertContentsOfFileOnDevice(ctx, path, contents)
727}
728
729// getArchivePathOnDevice returns the path in the device's file system where the archive should be
730// uploaded.
731func getArchivePathOnDevice(model string) string {
732	// The /sdcard directory is writable by non-root users, but files in this directory cannot be
733	// executed. For this reason, we extract the archive in a directory under /data, which allows
734	// executing files but requires root privileges.
735	//
736	// This might change in the future based on the device type, whether or not it's rooted, etc.
737	return "/sdcard/bazel-adb-test.tar.gz"
738}
739
740// getArchiveExtractionDirOnDevice returns the directory in the device's file system where the
741// archive should be extracted.
742func getArchiveExtractionDirOnDevice(model string) string {
743	// This might change in the future based on the device type, whether or not it's rooted, etc.
744	return "/data/bazel-adb-test"
745}
746
747// getOutputDirOnDevice returns the directory in the device's file system where the test should
748// write any output files. These files will then be copied from the device to the machine where adb
749// is running.
750func getOutputDirOnDevice(model string) string {
751	// We have tests write output files to a directory under /sdcard, rather than /data, because the
752	// /data directory permissions make it impossible to "adb pull" from it.
753	//
754	// This might change in the future based on the device type, whether or not it's rooted, etc.
755	return "/sdcard/bazel-adb-test-output-dir"
756}
757
758// adb runs adb with the given arguments. It returns the combined standard output and standard
759// error.
760func adb(ctx context.Context, args ...string) (string, error) {
761	return adbWithStdin(ctx, "", args...)
762}
763
764// adbWithStdin runs adb with the given arguments, and pipes the given input via standard input. It
765// returns the combined standard output and standard error.
766func adbWithStdin(ctx context.Context, stdin string, args ...string) (string, error) {
767	commandAndArgs := strings.Join(append([]string{"adb"}, args...), " ")
768	withStdin := ""
769	if stdin != "" {
770		withStdin = fmt.Sprintf(" with standard input %q", stdin)
771	}
772	log("Executing: %q%s...", commandAndArgs, withStdin)
773
774	cmd := &exec.Command{
775		Name:   "adb",
776		Args:   args,
777		Stdout: os.Stdout,
778		Stderr: os.Stderr,
779	}
780	if stdin != "" {
781		cmd.Stdin = bytes.NewBufferString(stdin)
782	}
783	return exec.RunCommand(ctx, cmd)
784}
785
786func log(msg string, a ...interface{}) {
787	timestamp := time.Now().Format(time.RFC3339)
788	fmtString := "[%s] " + msg + "\n"
789	args := append([]interface{}{timestamp}, a...)
790	if _, err := fmt.Printf(fmtString, args...); err != nil {
791		panic(err)
792	}
793}
794
795func printToStdErr(msg string, a ...interface{}) {
796	if _, err := fmt.Fprintf(os.Stderr, msg, a...); err != nil {
797		panic(err)
798	}
799}
800