1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package tlog
6
7import (
8	"fmt"
9	"strconv"
10	"strings"
11)
12
13// A Tile is a description of a transparency log tile.
14// A tile of height H at level L offset N lists W consecutive hashes
15// at level H*L of the tree starting at offset N*(2**H).
16// A complete tile lists 2**H hashes; a partial tile lists fewer.
17// Note that a tile represents the entire subtree of height H
18// with those hashes as the leaves. The levels above H*L
19// can be reconstructed by hashing the leaves.
20//
21// Each Tile can be encoded as a “tile coordinate path”
22// of the form tile/H/L/NNN[.p/W].
23// The .p/W suffix is present only for partial tiles, meaning W < 2**H.
24// The NNN element is an encoding of N into 3-digit path elements.
25// All but the last path element begins with an "x".
26// For example,
27// Tile{H: 3, L: 4, N: 1234067, W: 1}'s path
28// is tile/3/4/x001/x234/067.p/1, and
29// Tile{H: 3, L: 4, N: 1234067, W: 8}'s path
30// is tile/3/4/x001/x234/067.
31// See the [Tile.Path] method and the [ParseTilePath] function.
32//
33// The special level L=-1 holds raw record data instead of hashes.
34// In this case, the level encodes into a tile path as the path element
35// "data" instead of "-1".
36//
37// See also https://golang.org/design/25530-sumdb#checksum-database
38// and https://research.swtch.com/tlog#tiling_a_log.
39type Tile struct {
40	H int   // height of tile (1 ≤ H ≤ 30)
41	L int   // level in tiling (-1 ≤ L ≤ 63)
42	N int64 // number within level (0 ≤ N, unbounded)
43	W int   // width of tile (1 ≤ W ≤ 2**H; 2**H is complete tile)
44}
45
46// TileForIndex returns the tile of fixed height h ≥ 1
47// and least width storing the given hash storage index.
48//
49// If h ≤ 0, [TileForIndex] panics.
50func TileForIndex(h int, index int64) Tile {
51	if h <= 0 {
52		panic(fmt.Sprintf("TileForIndex: invalid height %d", h))
53	}
54	t, _, _ := tileForIndex(h, index)
55	return t
56}
57
58// tileForIndex returns the tile of height h ≥ 1
59// storing the given hash index, which can be
60// reconstructed using tileHash(data[start:end]).
61func tileForIndex(h int, index int64) (t Tile, start, end int) {
62	level, n := SplitStoredHashIndex(index)
63	t.H = h
64	t.L = level / h
65	level -= t.L * h // now level within tile
66	t.N = n << uint(level) >> uint(t.H)
67	n -= t.N << uint(t.H) >> uint(level) // now n within tile at level
68	t.W = int((n + 1) << uint(level))
69	return t, int(n<<uint(level)) * HashSize, int((n+1)<<uint(level)) * HashSize
70}
71
72// HashFromTile returns the hash at the given storage index,
73// provided that t == TileForIndex(t.H, index) or a wider version,
74// and data is t's tile data (of length at least t.W*HashSize).
75func HashFromTile(t Tile, data []byte, index int64) (Hash, error) {
76	if t.H < 1 || t.H > 30 || t.L < 0 || t.L >= 64 || t.W < 1 || t.W > 1<<uint(t.H) {
77		return Hash{}, fmt.Errorf("invalid tile %v", t.Path())
78	}
79	if len(data) < t.W*HashSize {
80		return Hash{}, fmt.Errorf("data len %d too short for tile %v", len(data), t.Path())
81	}
82	t1, start, end := tileForIndex(t.H, index)
83	if t.L != t1.L || t.N != t1.N || t.W < t1.W {
84		return Hash{}, fmt.Errorf("index %v is in %v not %v", index, t1.Path(), t.Path())
85	}
86	return tileHash(data[start:end]), nil
87}
88
89// tileHash computes the subtree hash corresponding to the (2^K)-1 hashes in data.
90func tileHash(data []byte) Hash {
91	if len(data) == 0 {
92		panic("bad math in tileHash")
93	}
94	if len(data) == HashSize {
95		var h Hash
96		copy(h[:], data)
97		return h
98	}
99	n := len(data) / 2
100	return NodeHash(tileHash(data[:n]), tileHash(data[n:]))
101}
102
103// NewTiles returns the coordinates of the tiles of height h ≥ 1
104// that must be published when publishing from a tree of
105// size newTreeSize to replace a tree of size oldTreeSize.
106// (No tiles need to be published for a tree of size zero.)
107//
108// If h ≤ 0, NewTiles panics.
109func NewTiles(h int, oldTreeSize, newTreeSize int64) []Tile {
110	if h <= 0 {
111		panic(fmt.Sprintf("NewTiles: invalid height %d", h))
112	}
113	H := uint(h)
114	var tiles []Tile
115	for level := uint(0); newTreeSize>>(H*level) > 0; level++ {
116		oldN := oldTreeSize >> (H * level)
117		newN := newTreeSize >> (H * level)
118		if oldN == newN {
119			continue
120		}
121		for n := oldN >> H; n < newN>>H; n++ {
122			tiles = append(tiles, Tile{H: h, L: int(level), N: n, W: 1 << H})
123		}
124		n := newN >> H
125		if w := int(newN - n<<H); w > 0 {
126			tiles = append(tiles, Tile{H: h, L: int(level), N: n, W: w})
127		}
128	}
129	return tiles
130}
131
132// ReadTileData reads the hashes for tile t from r
133// and returns the corresponding tile data.
134func ReadTileData(t Tile, r HashReader) ([]byte, error) {
135	size := t.W
136	if size == 0 {
137		size = 1 << uint(t.H)
138	}
139	start := t.N << uint(t.H)
140	indexes := make([]int64, size)
141	for i := 0; i < size; i++ {
142		indexes[i] = StoredHashIndex(t.H*t.L, start+int64(i))
143	}
144
145	hashes, err := r.ReadHashes(indexes)
146	if err != nil {
147		return nil, err
148	}
149	if len(hashes) != len(indexes) {
150		return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes))
151	}
152
153	tile := make([]byte, size*HashSize)
154	for i := 0; i < size; i++ {
155		copy(tile[i*HashSize:], hashes[i][:])
156	}
157	return tile, nil
158}
159
160// To limit the size of any particular directory listing,
161// we encode the (possibly very large) number N
162// by encoding three digits at a time.
163// For example, 123456789 encodes as x123/x456/789.
164// Each directory has at most 1000 each xNNN, NNN, and NNN.p children,
165// so there are at most 3000 entries in any one directory.
166const pathBase = 1000
167
168// Path returns a tile coordinate path describing t.
169func (t Tile) Path() string {
170	n := t.N
171	nStr := fmt.Sprintf("%03d", n%pathBase)
172	for n >= pathBase {
173		n /= pathBase
174		nStr = fmt.Sprintf("x%03d/%s", n%pathBase, nStr)
175	}
176	pStr := ""
177	if t.W != 1<<uint(t.H) {
178		pStr = fmt.Sprintf(".p/%d", t.W)
179	}
180	var L string
181	if t.L == -1 {
182		L = "data"
183	} else {
184		L = fmt.Sprintf("%d", t.L)
185	}
186	return fmt.Sprintf("tile/%d/%s/%s%s", t.H, L, nStr, pStr)
187}
188
189// ParseTilePath parses a tile coordinate path.
190func ParseTilePath(path string) (Tile, error) {
191	f := strings.Split(path, "/")
192	if len(f) < 4 || f[0] != "tile" {
193		return Tile{}, &badPathError{path}
194	}
195	h, err1 := strconv.Atoi(f[1])
196	isData := false
197	if f[2] == "data" {
198		isData = true
199		f[2] = "0"
200	}
201	l, err2 := strconv.Atoi(f[2])
202	if err1 != nil || err2 != nil || h < 1 || l < 0 || h > 30 {
203		return Tile{}, &badPathError{path}
204	}
205	w := 1 << uint(h)
206	if dotP := f[len(f)-2]; strings.HasSuffix(dotP, ".p") {
207		ww, err := strconv.Atoi(f[len(f)-1])
208		if err != nil || ww <= 0 || ww >= w {
209			return Tile{}, &badPathError{path}
210		}
211		w = ww
212		f[len(f)-2] = dotP[:len(dotP)-len(".p")]
213		f = f[:len(f)-1]
214	}
215	f = f[3:]
216	n := int64(0)
217	for _, s := range f {
218		nn, err := strconv.Atoi(strings.TrimPrefix(s, "x"))
219		if err != nil || nn < 0 || nn >= pathBase {
220			return Tile{}, &badPathError{path}
221		}
222		n = n*pathBase + int64(nn)
223	}
224	if isData {
225		l = -1
226	}
227	t := Tile{H: h, L: l, N: n, W: w}
228	if path != t.Path() {
229		return Tile{}, &badPathError{path}
230	}
231	return t, nil
232}
233
234type badPathError struct {
235	path string
236}
237
238func (e *badPathError) Error() string {
239	return fmt.Sprintf("malformed tile path %q", e.path)
240}
241
242// A TileReader reads tiles from a go.sum database log.
243type TileReader interface {
244	// Height returns the height of the available tiles.
245	Height() int
246
247	// ReadTiles returns the data for each requested tile.
248	// If ReadTiles returns err == nil, it must also return
249	// a data record for each tile (len(data) == len(tiles))
250	// and each data record must be the correct length
251	// (len(data[i]) == tiles[i].W*HashSize).
252	//
253	// An implementation of ReadTiles typically reads
254	// them from an on-disk cache or else from a remote
255	// tile server. Tile data downloaded from a server should
256	// be considered suspect and not saved into a persistent
257	// on-disk cache before returning from ReadTiles.
258	// When the client confirms the validity of the tile data,
259	// it will call SaveTiles to signal that they can be safely
260	// written to persistent storage.
261	// See also https://research.swtch.com/tlog#authenticating_tiles.
262	ReadTiles(tiles []Tile) (data [][]byte, err error)
263
264	// SaveTiles informs the TileReader that the tile data
265	// returned by ReadTiles has been confirmed as valid
266	// and can be saved in persistent storage (on disk).
267	SaveTiles(tiles []Tile, data [][]byte)
268}
269
270// TileHashReader returns a HashReader that satisfies requests
271// by loading tiles of the given tree.
272//
273// The returned [HashReader] checks that loaded tiles are
274// valid for the given tree. Therefore, any hashes returned
275// by the HashReader are already proven to be in the tree.
276func TileHashReader(tree Tree, tr TileReader) HashReader {
277	return &tileHashReader{tree: tree, tr: tr}
278}
279
280type tileHashReader struct {
281	tree Tree
282	tr   TileReader
283}
284
285// tileParent returns t's k'th tile parent in the tiles for a tree of size n.
286// If there is no such parent, tileParent returns Tile{}.
287func tileParent(t Tile, k int, n int64) Tile {
288	t.L += k
289	t.N >>= uint(k * t.H)
290	t.W = 1 << uint(t.H)
291	if max := n >> uint(t.L*t.H); t.N<<uint(t.H)+int64(t.W) >= max {
292		if t.N<<uint(t.H) >= max {
293			return Tile{}
294		}
295		t.W = int(max - t.N<<uint(t.H))
296	}
297	return t
298}
299
300func (r *tileHashReader) ReadHashes(indexes []int64) ([]Hash, error) {
301	h := r.tr.Height()
302
303	tileOrder := make(map[Tile]int) // tileOrder[tileKey(tiles[i])] = i
304	var tiles []Tile
305
306	// Plan to fetch tiles necessary to recompute tree hash.
307	// If it matches, those tiles are authenticated.
308	stx := subTreeIndex(0, r.tree.N, nil)
309	stxTileOrder := make([]int, len(stx))
310	for i, x := range stx {
311		tile, _, _ := tileForIndex(h, x)
312		tile = tileParent(tile, 0, r.tree.N)
313		if j, ok := tileOrder[tile]; ok {
314			stxTileOrder[i] = j
315			continue
316		}
317		stxTileOrder[i] = len(tiles)
318		tileOrder[tile] = len(tiles)
319		tiles = append(tiles, tile)
320	}
321
322	// Plan to fetch tiles containing the indexes,
323	// along with any parent tiles needed
324	// for authentication. For most calls,
325	// the parents are being fetched anyway.
326	indexTileOrder := make([]int, len(indexes))
327	for i, x := range indexes {
328		if x >= StoredHashIndex(0, r.tree.N) {
329			return nil, fmt.Errorf("indexes not in tree")
330		}
331
332		tile, _, _ := tileForIndex(h, x)
333
334		// Walk up parent tiles until we find one we've requested.
335		// That one will be authenticated.
336		k := 0
337		for ; ; k++ {
338			p := tileParent(tile, k, r.tree.N)
339			if j, ok := tileOrder[p]; ok {
340				if k == 0 {
341					indexTileOrder[i] = j
342				}
343				break
344			}
345		}
346
347		// Walk back down recording child tiles after parents.
348		// This loop ends by revisiting the tile for this index
349		// (tileParent(tile, 0, r.tree.N)) unless k == 0, in which
350		// case the previous loop did it.
351		for k--; k >= 0; k-- {
352			p := tileParent(tile, k, r.tree.N)
353			if p.W != 1<<uint(p.H) {
354				// Only full tiles have parents.
355				// This tile has a parent, so it must be full.
356				return nil, fmt.Errorf("bad math in tileHashReader: %d %d %v", r.tree.N, x, p)
357			}
358			tileOrder[p] = len(tiles)
359			if k == 0 {
360				indexTileOrder[i] = len(tiles)
361			}
362			tiles = append(tiles, p)
363		}
364	}
365
366	// Fetch all the tile data.
367	data, err := r.tr.ReadTiles(tiles)
368	if err != nil {
369		return nil, err
370	}
371	if len(data) != len(tiles) {
372		return nil, fmt.Errorf("TileReader returned bad result slice (len=%d, want %d)", len(data), len(tiles))
373	}
374	for i, tile := range tiles {
375		if len(data[i]) != tile.W*HashSize {
376			return nil, fmt.Errorf("TileReader returned bad result slice (%v len=%d, want %d)", tile.Path(), len(data[i]), tile.W*HashSize)
377		}
378	}
379
380	// Authenticate the initial tiles against the tree hash.
381	// They are arranged so that parents are authenticated before children.
382	// First the tiles needed for the tree hash.
383	th, err := HashFromTile(tiles[stxTileOrder[len(stx)-1]], data[stxTileOrder[len(stx)-1]], stx[len(stx)-1])
384	if err != nil {
385		return nil, err
386	}
387	for i := len(stx) - 2; i >= 0; i-- {
388		h, err := HashFromTile(tiles[stxTileOrder[i]], data[stxTileOrder[i]], stx[i])
389		if err != nil {
390			return nil, err
391		}
392		th = NodeHash(h, th)
393	}
394	if th != r.tree.Hash {
395		// The tiles do not support the tree hash.
396		// We know at least one is wrong, but not which one.
397		return nil, fmt.Errorf("downloaded inconsistent tile")
398	}
399
400	// Authenticate full tiles against their parents.
401	for i := len(stx); i < len(tiles); i++ {
402		tile := tiles[i]
403		p := tileParent(tile, 1, r.tree.N)
404		j, ok := tileOrder[p]
405		if !ok {
406			return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost parent of %v", r.tree.N, indexes, tile)
407		}
408		h, err := HashFromTile(p, data[j], StoredHashIndex(p.L*p.H, tile.N))
409		if err != nil {
410			return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost hash of %v: %v", r.tree.N, indexes, tile, err)
411		}
412		if h != tileHash(data[i]) {
413			return nil, fmt.Errorf("downloaded inconsistent tile")
414		}
415	}
416
417	// Now we have all the tiles needed for the requested hashes,
418	// and we've authenticated the full tile set against the trusted tree hash.
419	r.tr.SaveTiles(tiles, data)
420
421	// Pull out the requested hashes.
422	hashes := make([]Hash, len(indexes))
423	for i, x := range indexes {
424		j := indexTileOrder[i]
425		h, err := HashFromTile(tiles[j], data[j], x)
426		if err != nil {
427			return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost hash %v: %v", r.tree.N, indexes, x, err)
428		}
429		hashes[i] = h
430	}
431
432	return hashes, nil
433}
434