1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package cache
6
7import (
8	"bytes"
9	"crypto/sha256"
10	"fmt"
11	"hash"
12	"io"
13	"os"
14	"runtime"
15	"strings"
16	"sync"
17)
18
19var debugHash = false // set when GODEBUG=gocachehash=1
20
21// HashSize is the number of bytes in a hash.
22const HashSize = 32
23
24// A Hash provides access to the canonical hash function used to index the cache.
25// The current implementation uses salted SHA256, but clients must not assume this.
26type Hash struct {
27	h    hash.Hash
28	name string        // for debugging
29	buf  *bytes.Buffer // for verify
30}
31
32// hashSalt is a salt string added to the beginning of every hash
33// created by NewHash. Using the Go version makes sure that different
34// versions of the go command (or even different Git commits during
35// work on the development branch) do not address the same cache
36// entries, so that a bug in one version does not affect the execution
37// of other versions. This salt will result in additional ActionID files
38// in the cache, but not additional copies of the large output files,
39// which are still addressed by unsalted SHA256.
40//
41// We strip any GOEXPERIMENTs the go tool was built with from this
42// version string on the assumption that they shouldn't affect go tool
43// execution. This allows bootstrapping to converge faster: dist builds
44// go_bootstrap without any experiments, so by stripping experiments
45// go_bootstrap and the final go binary will use the same salt.
46var hashSalt = []byte(stripExperiment(runtime.Version()))
47
48// stripExperiment strips any GOEXPERIMENT configuration from the Go
49// version string.
50func stripExperiment(version string) string {
51	if i := strings.Index(version, " X:"); i >= 0 {
52		return version[:i]
53	}
54	return version
55}
56
57// Subkey returns an action ID corresponding to mixing a parent
58// action ID with a string description of the subkey.
59func Subkey(parent ActionID, desc string) ActionID {
60	h := sha256.New()
61	h.Write([]byte("subkey:"))
62	h.Write(parent[:])
63	h.Write([]byte(desc))
64	var out ActionID
65	h.Sum(out[:0])
66	if debugHash {
67		fmt.Fprintf(os.Stderr, "HASH subkey %x %q = %x\n", parent, desc, out)
68	}
69	if verify {
70		hashDebug.Lock()
71		hashDebug.m[out] = fmt.Sprintf("subkey %x %q", parent, desc)
72		hashDebug.Unlock()
73	}
74	return out
75}
76
77// NewHash returns a new Hash.
78// The caller is expected to Write data to it and then call Sum.
79func NewHash(name string) *Hash {
80	h := &Hash{h: sha256.New(), name: name}
81	if debugHash {
82		fmt.Fprintf(os.Stderr, "HASH[%s]\n", h.name)
83	}
84	h.Write(hashSalt)
85	if verify {
86		h.buf = new(bytes.Buffer)
87	}
88	return h
89}
90
91// Write writes data to the running hash.
92func (h *Hash) Write(b []byte) (int, error) {
93	if debugHash {
94		fmt.Fprintf(os.Stderr, "HASH[%s]: %q\n", h.name, b)
95	}
96	if h.buf != nil {
97		h.buf.Write(b)
98	}
99	return h.h.Write(b)
100}
101
102// Sum returns the hash of the data written previously.
103func (h *Hash) Sum() [HashSize]byte {
104	var out [HashSize]byte
105	h.h.Sum(out[:0])
106	if debugHash {
107		fmt.Fprintf(os.Stderr, "HASH[%s]: %x\n", h.name, out)
108	}
109	if h.buf != nil {
110		hashDebug.Lock()
111		if hashDebug.m == nil {
112			hashDebug.m = make(map[[HashSize]byte]string)
113		}
114		hashDebug.m[out] = h.buf.String()
115		hashDebug.Unlock()
116	}
117	return out
118}
119
120// In GODEBUG=gocacheverify=1 mode,
121// hashDebug holds the input to every computed hash ID,
122// so that we can work backward from the ID involved in a
123// cache entry mismatch to a description of what should be there.
124var hashDebug struct {
125	sync.Mutex
126	m map[[HashSize]byte]string
127}
128
129// reverseHash returns the input used to compute the hash id.
130func reverseHash(id [HashSize]byte) string {
131	hashDebug.Lock()
132	s := hashDebug.m[id]
133	hashDebug.Unlock()
134	return s
135}
136
137var hashFileCache struct {
138	sync.Mutex
139	m map[string][HashSize]byte
140}
141
142// FileHash returns the hash of the named file.
143// It caches repeated lookups for a given file,
144// and the cache entry for a file can be initialized
145// using SetFileHash.
146// The hash used by FileHash is not the same as
147// the hash used by NewHash.
148func FileHash(file string) ([HashSize]byte, error) {
149	hashFileCache.Lock()
150	out, ok := hashFileCache.m[file]
151	hashFileCache.Unlock()
152
153	if ok {
154		return out, nil
155	}
156
157	h := sha256.New()
158	f, err := os.Open(file)
159	if err != nil {
160		if debugHash {
161			fmt.Fprintf(os.Stderr, "HASH %s: %v\n", file, err)
162		}
163		return [HashSize]byte{}, err
164	}
165	_, err = io.Copy(h, f)
166	f.Close()
167	if err != nil {
168		if debugHash {
169			fmt.Fprintf(os.Stderr, "HASH %s: %v\n", file, err)
170		}
171		return [HashSize]byte{}, err
172	}
173	h.Sum(out[:0])
174	if debugHash {
175		fmt.Fprintf(os.Stderr, "HASH %s: %x\n", file, out)
176	}
177
178	SetFileHash(file, out)
179	return out, nil
180}
181
182// SetFileHash sets the hash returned by FileHash for file.
183func SetFileHash(file string, sum [HashSize]byte) {
184	hashFileCache.Lock()
185	if hashFileCache.m == nil {
186		hashFileCache.m = make(map[string][HashSize]byte)
187	}
188	hashFileCache.m[file] = sum
189	hashFileCache.Unlock()
190}
191