xref: /aosp_15_r20/external/skia/infra/bots/task_drivers/common/bazel_clean_step.go (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1// Copyright 2023 Google LLC
2//
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5
6package common
7
8import (
9	"context"
10	"fmt"
11	"strings"
12
13	sk_exec "go.skia.org/infra/go/exec"
14
15	"github.com/shirou/gopsutil/disk"
16	"go.skia.org/infra/go/skerr"
17	"go.skia.org/infra/task_driver/go/td"
18)
19
20// The DiskSpaceLow alert triggers at 10GB, so we set this threshold to a slightly higher value.
21// For reference, Swarming seems to quarantine machines when they go below 3GB.
22const bazelCachePartitionMinRequiredFreeSpaceBytes = uint64(15_000_000_000)
23
24type bazelCleanIfLowDiskSpaceContextKeyType = string
25
26// BazelCleanIfLowDiskSpaceContextKey is a context key that can be used from tests to override the
27// functions used by BazelCleanIfLowDiskSpace to compute the free space on the partition where the
28// Bazel cache lives. Values associated to this context key should be of type
29// BazelCleanIfLowDiskSpaceContextValue.
30const BazelCleanIfLowDiskSpaceContextKey = bazelCleanIfLowDiskSpaceContextKeyType("overwriteBazelCleanIfLowDiskSpaceDiskFns")
31
32// BazelCleanIfLowDiskSpaceContextValue is the type of the value associated with the
33// BazelCleanIfLowDiskSpaceContextKey context key.
34type BazelCleanIfLowDiskSpaceContextValue = struct {
35	GetPartitionMountpoints func() ([]string, error)
36	FreeBytesOnPartition    func(string) (uint64, error)
37}
38
39// WithEnoughSpaceOnBazelCachePartitionTestOnlyContext returns a context that makes
40// common.BazelCleanIfLowDiskSpace() think there is enough space on the partition where the Bazel
41// cache is found. It also returns a path within said partition where the Bazel cache is assumed to
42// live, which should be passed to the code under test that invokes
43// common.BazelCleanIfLowDiskSpace().
44//
45// This function is placed here rather than in the testutils Go package to avoid an import cycle.
46func WithEnoughSpaceOnBazelCachePartitionTestOnlyContext(ctx context.Context) (context.Context, string) {
47	const (
48		bazelCacheDir                 = "/mnt/pd0/bazel_cache"
49		bazelCachePartitionMountpoint = "/mnt/pd0"
50	)
51
52	ctx = context.WithValue(ctx, BazelCleanIfLowDiskSpaceContextKey, BazelCleanIfLowDiskSpaceContextValue{
53		GetPartitionMountpoints: func() ([]string, error) {
54			// For the purposes of satisfying common.BazelCleanIfLowDiskSpace(), it suffices to only return
55			// the mountpoint for the partition where the Bazel cache directory lives.
56			return []string{bazelCachePartitionMountpoint}, nil
57		},
58		FreeBytesOnPartition: func(mountpoint string) (uint64, error) {
59			if mountpoint != bazelCachePartitionMountpoint {
60				panic(fmt.Sprintf("mountpoint %q does not equal %q; this is a bug", mountpoint, bazelCachePartitionMountpoint))
61			}
62			return uint64(20_000_000_000), nil
63		},
64	})
65
66	return ctx, bazelCacheDir
67}
68
69// BazelCleanIfLowDiskSpace runs "bazel clean" as a task driver step if disk space is too low. This
70// step should be added at the end of any task driver that shells out to Bazel in order to prevent
71// DiskSpaceLow alerts due to the Bazel cache (usually at /mnt/pd0/bazel_cache) growing too large.
72//
73// Ideally, we would like to tell Bazel to prevent the cache from growing above a certain size, but
74// there is currently no way to do this. See discussion in the below links:
75//
76// - https://github.com/bazelbuild/bazel/issues/1035
77// - https://github.com/bazelbuild/bazel/issues/5139
78//
79// Testing: Set the BazelCleanIfLowDiskSpaceContextKey context key to override the functions that
80// compute the free space (measured in bytes) on the partition where the Bazel cache lives.
81func BazelCleanIfLowDiskSpace(ctx context.Context, bazelCacheDir, bazelWorkspaceDir, pathToBazel string) error {
82	return skerr.Wrap(td.Do(ctx, td.Props("Clean Bazel cache if disk space is too low"), func(ctx context.Context) error {
83		// Are any of the disk-related functions mocked?
84		getPartitionMountpointsFn := getPartitionMountpoints
85		freeBytesOnPartitionFn := freeBytesOnPartition
86		if ctxValue := ctx.Value(BazelCleanIfLowDiskSpaceContextKey); ctxValue != nil {
87			typedCtxValue, ok := ctxValue.(BazelCleanIfLowDiskSpaceContextValue)
88			if !ok {
89				panic("context value associated with BazelCleanIfLowDiskSpaceContextKey is not a BazelCleanIfLowDiskSpaceContextValue")
90			}
91			if typedCtxValue.FreeBytesOnPartition != nil {
92				freeBytesOnPartitionFn = typedCtxValue.FreeBytesOnPartition
93			}
94			if typedCtxValue.GetPartitionMountpoints != nil {
95				getPartitionMountpointsFn = typedCtxValue.GetPartitionMountpoints
96			}
97		}
98
99		// Find the partition where the Bazel cache lives.
100		mountpoints, err := getPartitionMountpointsFn()
101		if err != nil {
102			return skerr.Wrap(err)
103		}
104		var mountpointCandidates []string // Any mountpoints that are prefixes of bazelCacheDir.
105		for _, mountpoint := range mountpoints {
106			if strings.HasPrefix(bazelCacheDir, mountpoint) {
107				mountpointCandidates = append(mountpointCandidates, mountpoint)
108			}
109		}
110		bazelCachePartitionMountpoint := ""
111		for _, candidate := range mountpointCandidates {
112			// The longest candidate wins. For example, if the Bazel cache directory is
113			// "/mnt/pd0/bazel_cache" and the candidates are "/mnt", "/mnt/pd0" and "/", then "/mnt/pd0"
114			// is selected.
115			if len(candidate) > len(bazelCachePartitionMountpoint) {
116				bazelCachePartitionMountpoint = candidate
117			}
118		}
119		if bazelCachePartitionMountpoint == "" {
120			return skerr.Fmt("could not find partition for Bazel cache directory at %q", bazelCacheDir)
121		}
122
123		// Find out how much free space is left on that partition.
124		freeSpace, err := freeBytesOnPartitionFn(bazelCachePartitionMountpoint)
125		if err != nil {
126			return skerr.Wrap(err)
127		}
128
129		// Run "bazel clean" if free space on that partition is too low.
130		if freeSpace < bazelCachePartitionMinRequiredFreeSpaceBytes {
131			msg := fmt.Sprintf("Free space on partition %s is %d bytes, which is below the threshold of %d bytes", bazelCachePartitionMountpoint, freeSpace, bazelCachePartitionMinRequiredFreeSpaceBytes)
132			if err := td.Do(ctx, td.Props(msg), func(ctx context.Context) error { return nil }); err != nil {
133				return skerr.Wrap(err)
134			}
135
136			cmd := &sk_exec.Command{
137				Name:       pathToBazel,
138				Dir:        bazelWorkspaceDir,
139				Args:       []string{"clean"},
140				InheritEnv: true, // Make sure "bazelisk" is on PATH.
141				LogStdout:  true,
142				LogStderr:  true,
143			}
144			_, err := sk_exec.RunCommand(ctx, cmd)
145			return skerr.Wrap(err)
146		}
147
148		msg := fmt.Sprintf("No need to clear the Bazel cache: free space on partition %s is %d bytes, which is above the threshold of %d bytes", bazelCachePartitionMountpoint, freeSpace, bazelCachePartitionMinRequiredFreeSpaceBytes)
149		return skerr.Wrap(td.Do(ctx, td.Props(msg), func(ctx context.Context) error { return nil }))
150	}))
151}
152
153// getPartitionMountpoints returns the mountpoints for all mounted partitions.
154func getPartitionMountpoints() ([]string, error) {
155	partitionStats, err := disk.Partitions(true /* =all */)
156	if err != nil {
157		return nil, skerr.Wrap(err)
158	}
159	var mountpoints []string
160	for _, stat := range partitionStats {
161		mountpoints = append(mountpoints, stat.Mountpoint)
162	}
163	return mountpoints, nil
164}
165
166// freeBytesOnPartition returns the free space measured in bytes for the partition mounted at the
167// given mountpoint
168func freeBytesOnPartition(mountpoint string) (uint64, error) {
169	usage, err := disk.Usage(mountpoint)
170	if err != nil {
171		return 0, skerr.Wrap(err)
172	}
173	return usage.Free, nil
174}
175