1// Copyright 2023 Google LLC 2// 3// Use of this source code is governed by a BSD-style license that can be 4// found in the LICENSE file. 5 6package common 7 8import ( 9 "context" 10 "fmt" 11 "strings" 12 13 sk_exec "go.skia.org/infra/go/exec" 14 15 "github.com/shirou/gopsutil/disk" 16 "go.skia.org/infra/go/skerr" 17 "go.skia.org/infra/task_driver/go/td" 18) 19 20// The DiskSpaceLow alert triggers at 10GB, so we set this threshold to a slightly higher value. 21// For reference, Swarming seems to quarantine machines when they go below 3GB. 22const bazelCachePartitionMinRequiredFreeSpaceBytes = uint64(15_000_000_000) 23 24type bazelCleanIfLowDiskSpaceContextKeyType = string 25 26// BazelCleanIfLowDiskSpaceContextKey is a context key that can be used from tests to override the 27// functions used by BazelCleanIfLowDiskSpace to compute the free space on the partition where the 28// Bazel cache lives. Values associated to this context key should be of type 29// BazelCleanIfLowDiskSpaceContextValue. 30const BazelCleanIfLowDiskSpaceContextKey = bazelCleanIfLowDiskSpaceContextKeyType("overwriteBazelCleanIfLowDiskSpaceDiskFns") 31 32// BazelCleanIfLowDiskSpaceContextValue is the type of the value associated with the 33// BazelCleanIfLowDiskSpaceContextKey context key. 34type BazelCleanIfLowDiskSpaceContextValue = struct { 35 GetPartitionMountpoints func() ([]string, error) 36 FreeBytesOnPartition func(string) (uint64, error) 37} 38 39// WithEnoughSpaceOnBazelCachePartitionTestOnlyContext returns a context that makes 40// common.BazelCleanIfLowDiskSpace() think there is enough space on the partition where the Bazel 41// cache is found. It also returns a path within said partition where the Bazel cache is assumed to 42// live, which should be passed to the code under test that invokes 43// common.BazelCleanIfLowDiskSpace(). 44// 45// This function is placed here rather than in the testutils Go package to avoid an import cycle. 46func WithEnoughSpaceOnBazelCachePartitionTestOnlyContext(ctx context.Context) (context.Context, string) { 47 const ( 48 bazelCacheDir = "/mnt/pd0/bazel_cache" 49 bazelCachePartitionMountpoint = "/mnt/pd0" 50 ) 51 52 ctx = context.WithValue(ctx, BazelCleanIfLowDiskSpaceContextKey, BazelCleanIfLowDiskSpaceContextValue{ 53 GetPartitionMountpoints: func() ([]string, error) { 54 // For the purposes of satisfying common.BazelCleanIfLowDiskSpace(), it suffices to only return 55 // the mountpoint for the partition where the Bazel cache directory lives. 56 return []string{bazelCachePartitionMountpoint}, nil 57 }, 58 FreeBytesOnPartition: func(mountpoint string) (uint64, error) { 59 if mountpoint != bazelCachePartitionMountpoint { 60 panic(fmt.Sprintf("mountpoint %q does not equal %q; this is a bug", mountpoint, bazelCachePartitionMountpoint)) 61 } 62 return uint64(20_000_000_000), nil 63 }, 64 }) 65 66 return ctx, bazelCacheDir 67} 68 69// BazelCleanIfLowDiskSpace runs "bazel clean" as a task driver step if disk space is too low. This 70// step should be added at the end of any task driver that shells out to Bazel in order to prevent 71// DiskSpaceLow alerts due to the Bazel cache (usually at /mnt/pd0/bazel_cache) growing too large. 72// 73// Ideally, we would like to tell Bazel to prevent the cache from growing above a certain size, but 74// there is currently no way to do this. See discussion in the below links: 75// 76// - https://github.com/bazelbuild/bazel/issues/1035 77// - https://github.com/bazelbuild/bazel/issues/5139 78// 79// Testing: Set the BazelCleanIfLowDiskSpaceContextKey context key to override the functions that 80// compute the free space (measured in bytes) on the partition where the Bazel cache lives. 81func BazelCleanIfLowDiskSpace(ctx context.Context, bazelCacheDir, bazelWorkspaceDir, pathToBazel string) error { 82 return skerr.Wrap(td.Do(ctx, td.Props("Clean Bazel cache if disk space is too low"), func(ctx context.Context) error { 83 // Are any of the disk-related functions mocked? 84 getPartitionMountpointsFn := getPartitionMountpoints 85 freeBytesOnPartitionFn := freeBytesOnPartition 86 if ctxValue := ctx.Value(BazelCleanIfLowDiskSpaceContextKey); ctxValue != nil { 87 typedCtxValue, ok := ctxValue.(BazelCleanIfLowDiskSpaceContextValue) 88 if !ok { 89 panic("context value associated with BazelCleanIfLowDiskSpaceContextKey is not a BazelCleanIfLowDiskSpaceContextValue") 90 } 91 if typedCtxValue.FreeBytesOnPartition != nil { 92 freeBytesOnPartitionFn = typedCtxValue.FreeBytesOnPartition 93 } 94 if typedCtxValue.GetPartitionMountpoints != nil { 95 getPartitionMountpointsFn = typedCtxValue.GetPartitionMountpoints 96 } 97 } 98 99 // Find the partition where the Bazel cache lives. 100 mountpoints, err := getPartitionMountpointsFn() 101 if err != nil { 102 return skerr.Wrap(err) 103 } 104 var mountpointCandidates []string // Any mountpoints that are prefixes of bazelCacheDir. 105 for _, mountpoint := range mountpoints { 106 if strings.HasPrefix(bazelCacheDir, mountpoint) { 107 mountpointCandidates = append(mountpointCandidates, mountpoint) 108 } 109 } 110 bazelCachePartitionMountpoint := "" 111 for _, candidate := range mountpointCandidates { 112 // The longest candidate wins. For example, if the Bazel cache directory is 113 // "/mnt/pd0/bazel_cache" and the candidates are "/mnt", "/mnt/pd0" and "/", then "/mnt/pd0" 114 // is selected. 115 if len(candidate) > len(bazelCachePartitionMountpoint) { 116 bazelCachePartitionMountpoint = candidate 117 } 118 } 119 if bazelCachePartitionMountpoint == "" { 120 return skerr.Fmt("could not find partition for Bazel cache directory at %q", bazelCacheDir) 121 } 122 123 // Find out how much free space is left on that partition. 124 freeSpace, err := freeBytesOnPartitionFn(bazelCachePartitionMountpoint) 125 if err != nil { 126 return skerr.Wrap(err) 127 } 128 129 // Run "bazel clean" if free space on that partition is too low. 130 if freeSpace < bazelCachePartitionMinRequiredFreeSpaceBytes { 131 msg := fmt.Sprintf("Free space on partition %s is %d bytes, which is below the threshold of %d bytes", bazelCachePartitionMountpoint, freeSpace, bazelCachePartitionMinRequiredFreeSpaceBytes) 132 if err := td.Do(ctx, td.Props(msg), func(ctx context.Context) error { return nil }); err != nil { 133 return skerr.Wrap(err) 134 } 135 136 cmd := &sk_exec.Command{ 137 Name: pathToBazel, 138 Dir: bazelWorkspaceDir, 139 Args: []string{"clean"}, 140 InheritEnv: true, // Make sure "bazelisk" is on PATH. 141 LogStdout: true, 142 LogStderr: true, 143 } 144 _, err := sk_exec.RunCommand(ctx, cmd) 145 return skerr.Wrap(err) 146 } 147 148 msg := fmt.Sprintf("No need to clear the Bazel cache: free space on partition %s is %d bytes, which is above the threshold of %d bytes", bazelCachePartitionMountpoint, freeSpace, bazelCachePartitionMinRequiredFreeSpaceBytes) 149 return skerr.Wrap(td.Do(ctx, td.Props(msg), func(ctx context.Context) error { return nil })) 150 })) 151} 152 153// getPartitionMountpoints returns the mountpoints for all mounted partitions. 154func getPartitionMountpoints() ([]string, error) { 155 partitionStats, err := disk.Partitions(true /* =all */) 156 if err != nil { 157 return nil, skerr.Wrap(err) 158 } 159 var mountpoints []string 160 for _, stat := range partitionStats { 161 mountpoints = append(mountpoints, stat.Mountpoint) 162 } 163 return mountpoints, nil 164} 165 166// freeBytesOnPartition returns the free space measured in bytes for the partition mounted at the 167// given mountpoint 168func freeBytesOnPartition(mountpoint string) (uint64, error) { 169 usage, err := disk.Usage(mountpoint) 170 if err != nil { 171 return 0, skerr.Wrap(err) 172 } 173 return usage.Free, nil 174} 175