1// Copyright 2017 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package cache 6 7import ( 8 "bytes" 9 "crypto/sha256" 10 "fmt" 11 "hash" 12 "io" 13 "os" 14 "runtime" 15 "strings" 16 "sync" 17) 18 19var debugHash = false // set when GODEBUG=gocachehash=1 20 21// HashSize is the number of bytes in a hash. 22const HashSize = 32 23 24// A Hash provides access to the canonical hash function used to index the cache. 25// The current implementation uses salted SHA256, but clients must not assume this. 26type Hash struct { 27 h hash.Hash 28 name string // for debugging 29 buf *bytes.Buffer // for verify 30} 31 32// hashSalt is a salt string added to the beginning of every hash 33// created by NewHash. Using the Go version makes sure that different 34// versions of the go command (or even different Git commits during 35// work on the development branch) do not address the same cache 36// entries, so that a bug in one version does not affect the execution 37// of other versions. This salt will result in additional ActionID files 38// in the cache, but not additional copies of the large output files, 39// which are still addressed by unsalted SHA256. 40// 41// We strip any GOEXPERIMENTs the go tool was built with from this 42// version string on the assumption that they shouldn't affect go tool 43// execution. This allows bootstrapping to converge faster: dist builds 44// go_bootstrap without any experiments, so by stripping experiments 45// go_bootstrap and the final go binary will use the same salt. 46var hashSalt = []byte(stripExperiment(runtime.Version())) 47 48// stripExperiment strips any GOEXPERIMENT configuration from the Go 49// version string. 50func stripExperiment(version string) string { 51 if i := strings.Index(version, " X:"); i >= 0 { 52 return version[:i] 53 } 54 return version 55} 56 57// Subkey returns an action ID corresponding to mixing a parent 58// action ID with a string description of the subkey. 59func Subkey(parent ActionID, desc string) ActionID { 60 h := sha256.New() 61 h.Write([]byte("subkey:")) 62 h.Write(parent[:]) 63 h.Write([]byte(desc)) 64 var out ActionID 65 h.Sum(out[:0]) 66 if debugHash { 67 fmt.Fprintf(os.Stderr, "HASH subkey %x %q = %x\n", parent, desc, out) 68 } 69 if verify { 70 hashDebug.Lock() 71 hashDebug.m[out] = fmt.Sprintf("subkey %x %q", parent, desc) 72 hashDebug.Unlock() 73 } 74 return out 75} 76 77// NewHash returns a new Hash. 78// The caller is expected to Write data to it and then call Sum. 79func NewHash(name string) *Hash { 80 h := &Hash{h: sha256.New(), name: name} 81 if debugHash { 82 fmt.Fprintf(os.Stderr, "HASH[%s]\n", h.name) 83 } 84 h.Write(hashSalt) 85 if verify { 86 h.buf = new(bytes.Buffer) 87 } 88 return h 89} 90 91// Write writes data to the running hash. 92func (h *Hash) Write(b []byte) (int, error) { 93 if debugHash { 94 fmt.Fprintf(os.Stderr, "HASH[%s]: %q\n", h.name, b) 95 } 96 if h.buf != nil { 97 h.buf.Write(b) 98 } 99 return h.h.Write(b) 100} 101 102// Sum returns the hash of the data written previously. 103func (h *Hash) Sum() [HashSize]byte { 104 var out [HashSize]byte 105 h.h.Sum(out[:0]) 106 if debugHash { 107 fmt.Fprintf(os.Stderr, "HASH[%s]: %x\n", h.name, out) 108 } 109 if h.buf != nil { 110 hashDebug.Lock() 111 if hashDebug.m == nil { 112 hashDebug.m = make(map[[HashSize]byte]string) 113 } 114 hashDebug.m[out] = h.buf.String() 115 hashDebug.Unlock() 116 } 117 return out 118} 119 120// In GODEBUG=gocacheverify=1 mode, 121// hashDebug holds the input to every computed hash ID, 122// so that we can work backward from the ID involved in a 123// cache entry mismatch to a description of what should be there. 124var hashDebug struct { 125 sync.Mutex 126 m map[[HashSize]byte]string 127} 128 129// reverseHash returns the input used to compute the hash id. 130func reverseHash(id [HashSize]byte) string { 131 hashDebug.Lock() 132 s := hashDebug.m[id] 133 hashDebug.Unlock() 134 return s 135} 136 137var hashFileCache struct { 138 sync.Mutex 139 m map[string][HashSize]byte 140} 141 142// FileHash returns the hash of the named file. 143// It caches repeated lookups for a given file, 144// and the cache entry for a file can be initialized 145// using SetFileHash. 146// The hash used by FileHash is not the same as 147// the hash used by NewHash. 148func FileHash(file string) ([HashSize]byte, error) { 149 hashFileCache.Lock() 150 out, ok := hashFileCache.m[file] 151 hashFileCache.Unlock() 152 153 if ok { 154 return out, nil 155 } 156 157 h := sha256.New() 158 f, err := os.Open(file) 159 if err != nil { 160 if debugHash { 161 fmt.Fprintf(os.Stderr, "HASH %s: %v\n", file, err) 162 } 163 return [HashSize]byte{}, err 164 } 165 _, err = io.Copy(h, f) 166 f.Close() 167 if err != nil { 168 if debugHash { 169 fmt.Fprintf(os.Stderr, "HASH %s: %v\n", file, err) 170 } 171 return [HashSize]byte{}, err 172 } 173 h.Sum(out[:0]) 174 if debugHash { 175 fmt.Fprintf(os.Stderr, "HASH %s: %x\n", file, out) 176 } 177 178 SetFileHash(file, out) 179 return out, nil 180} 181 182// SetFileHash sets the hash returned by FileHash for file. 183func SetFileHash(file string, sum [HashSize]byte) { 184 hashFileCache.Lock() 185 if hashFileCache.m == nil { 186 hashFileCache.m = make(map[string][HashSize]byte) 187 } 188 hashFileCache.m[file] = sum 189 hashFileCache.Unlock() 190} 191