1// Copyright 2019 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package tlog 6 7import ( 8 "fmt" 9 "strconv" 10 "strings" 11) 12 13// A Tile is a description of a transparency log tile. 14// A tile of height H at level L offset N lists W consecutive hashes 15// at level H*L of the tree starting at offset N*(2**H). 16// A complete tile lists 2**H hashes; a partial tile lists fewer. 17// Note that a tile represents the entire subtree of height H 18// with those hashes as the leaves. The levels above H*L 19// can be reconstructed by hashing the leaves. 20// 21// Each Tile can be encoded as a “tile coordinate path” 22// of the form tile/H/L/NNN[.p/W]. 23// The .p/W suffix is present only for partial tiles, meaning W < 2**H. 24// The NNN element is an encoding of N into 3-digit path elements. 25// All but the last path element begins with an "x". 26// For example, 27// Tile{H: 3, L: 4, N: 1234067, W: 1}'s path 28// is tile/3/4/x001/x234/067.p/1, and 29// Tile{H: 3, L: 4, N: 1234067, W: 8}'s path 30// is tile/3/4/x001/x234/067. 31// See the [Tile.Path] method and the [ParseTilePath] function. 32// 33// The special level L=-1 holds raw record data instead of hashes. 34// In this case, the level encodes into a tile path as the path element 35// "data" instead of "-1". 36// 37// See also https://golang.org/design/25530-sumdb#checksum-database 38// and https://research.swtch.com/tlog#tiling_a_log. 39type Tile struct { 40 H int // height of tile (1 ≤ H ≤ 30) 41 L int // level in tiling (-1 ≤ L ≤ 63) 42 N int64 // number within level (0 ≤ N, unbounded) 43 W int // width of tile (1 ≤ W ≤ 2**H; 2**H is complete tile) 44} 45 46// TileForIndex returns the tile of fixed height h ≥ 1 47// and least width storing the given hash storage index. 48// 49// If h ≤ 0, [TileForIndex] panics. 50func TileForIndex(h int, index int64) Tile { 51 if h <= 0 { 52 panic(fmt.Sprintf("TileForIndex: invalid height %d", h)) 53 } 54 t, _, _ := tileForIndex(h, index) 55 return t 56} 57 58// tileForIndex returns the tile of height h ≥ 1 59// storing the given hash index, which can be 60// reconstructed using tileHash(data[start:end]). 61func tileForIndex(h int, index int64) (t Tile, start, end int) { 62 level, n := SplitStoredHashIndex(index) 63 t.H = h 64 t.L = level / h 65 level -= t.L * h // now level within tile 66 t.N = n << uint(level) >> uint(t.H) 67 n -= t.N << uint(t.H) >> uint(level) // now n within tile at level 68 t.W = int((n + 1) << uint(level)) 69 return t, int(n<<uint(level)) * HashSize, int((n+1)<<uint(level)) * HashSize 70} 71 72// HashFromTile returns the hash at the given storage index, 73// provided that t == TileForIndex(t.H, index) or a wider version, 74// and data is t's tile data (of length at least t.W*HashSize). 75func HashFromTile(t Tile, data []byte, index int64) (Hash, error) { 76 if t.H < 1 || t.H > 30 || t.L < 0 || t.L >= 64 || t.W < 1 || t.W > 1<<uint(t.H) { 77 return Hash{}, fmt.Errorf("invalid tile %v", t.Path()) 78 } 79 if len(data) < t.W*HashSize { 80 return Hash{}, fmt.Errorf("data len %d too short for tile %v", len(data), t.Path()) 81 } 82 t1, start, end := tileForIndex(t.H, index) 83 if t.L != t1.L || t.N != t1.N || t.W < t1.W { 84 return Hash{}, fmt.Errorf("index %v is in %v not %v", index, t1.Path(), t.Path()) 85 } 86 return tileHash(data[start:end]), nil 87} 88 89// tileHash computes the subtree hash corresponding to the (2^K)-1 hashes in data. 90func tileHash(data []byte) Hash { 91 if len(data) == 0 { 92 panic("bad math in tileHash") 93 } 94 if len(data) == HashSize { 95 var h Hash 96 copy(h[:], data) 97 return h 98 } 99 n := len(data) / 2 100 return NodeHash(tileHash(data[:n]), tileHash(data[n:])) 101} 102 103// NewTiles returns the coordinates of the tiles of height h ≥ 1 104// that must be published when publishing from a tree of 105// size newTreeSize to replace a tree of size oldTreeSize. 106// (No tiles need to be published for a tree of size zero.) 107// 108// If h ≤ 0, NewTiles panics. 109func NewTiles(h int, oldTreeSize, newTreeSize int64) []Tile { 110 if h <= 0 { 111 panic(fmt.Sprintf("NewTiles: invalid height %d", h)) 112 } 113 H := uint(h) 114 var tiles []Tile 115 for level := uint(0); newTreeSize>>(H*level) > 0; level++ { 116 oldN := oldTreeSize >> (H * level) 117 newN := newTreeSize >> (H * level) 118 if oldN == newN { 119 continue 120 } 121 for n := oldN >> H; n < newN>>H; n++ { 122 tiles = append(tiles, Tile{H: h, L: int(level), N: n, W: 1 << H}) 123 } 124 n := newN >> H 125 if w := int(newN - n<<H); w > 0 { 126 tiles = append(tiles, Tile{H: h, L: int(level), N: n, W: w}) 127 } 128 } 129 return tiles 130} 131 132// ReadTileData reads the hashes for tile t from r 133// and returns the corresponding tile data. 134func ReadTileData(t Tile, r HashReader) ([]byte, error) { 135 size := t.W 136 if size == 0 { 137 size = 1 << uint(t.H) 138 } 139 start := t.N << uint(t.H) 140 indexes := make([]int64, size) 141 for i := 0; i < size; i++ { 142 indexes[i] = StoredHashIndex(t.H*t.L, start+int64(i)) 143 } 144 145 hashes, err := r.ReadHashes(indexes) 146 if err != nil { 147 return nil, err 148 } 149 if len(hashes) != len(indexes) { 150 return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes)) 151 } 152 153 tile := make([]byte, size*HashSize) 154 for i := 0; i < size; i++ { 155 copy(tile[i*HashSize:], hashes[i][:]) 156 } 157 return tile, nil 158} 159 160// To limit the size of any particular directory listing, 161// we encode the (possibly very large) number N 162// by encoding three digits at a time. 163// For example, 123456789 encodes as x123/x456/789. 164// Each directory has at most 1000 each xNNN, NNN, and NNN.p children, 165// so there are at most 3000 entries in any one directory. 166const pathBase = 1000 167 168// Path returns a tile coordinate path describing t. 169func (t Tile) Path() string { 170 n := t.N 171 nStr := fmt.Sprintf("%03d", n%pathBase) 172 for n >= pathBase { 173 n /= pathBase 174 nStr = fmt.Sprintf("x%03d/%s", n%pathBase, nStr) 175 } 176 pStr := "" 177 if t.W != 1<<uint(t.H) { 178 pStr = fmt.Sprintf(".p/%d", t.W) 179 } 180 var L string 181 if t.L == -1 { 182 L = "data" 183 } else { 184 L = fmt.Sprintf("%d", t.L) 185 } 186 return fmt.Sprintf("tile/%d/%s/%s%s", t.H, L, nStr, pStr) 187} 188 189// ParseTilePath parses a tile coordinate path. 190func ParseTilePath(path string) (Tile, error) { 191 f := strings.Split(path, "/") 192 if len(f) < 4 || f[0] != "tile" { 193 return Tile{}, &badPathError{path} 194 } 195 h, err1 := strconv.Atoi(f[1]) 196 isData := false 197 if f[2] == "data" { 198 isData = true 199 f[2] = "0" 200 } 201 l, err2 := strconv.Atoi(f[2]) 202 if err1 != nil || err2 != nil || h < 1 || l < 0 || h > 30 { 203 return Tile{}, &badPathError{path} 204 } 205 w := 1 << uint(h) 206 if dotP := f[len(f)-2]; strings.HasSuffix(dotP, ".p") { 207 ww, err := strconv.Atoi(f[len(f)-1]) 208 if err != nil || ww <= 0 || ww >= w { 209 return Tile{}, &badPathError{path} 210 } 211 w = ww 212 f[len(f)-2] = dotP[:len(dotP)-len(".p")] 213 f = f[:len(f)-1] 214 } 215 f = f[3:] 216 n := int64(0) 217 for _, s := range f { 218 nn, err := strconv.Atoi(strings.TrimPrefix(s, "x")) 219 if err != nil || nn < 0 || nn >= pathBase { 220 return Tile{}, &badPathError{path} 221 } 222 n = n*pathBase + int64(nn) 223 } 224 if isData { 225 l = -1 226 } 227 t := Tile{H: h, L: l, N: n, W: w} 228 if path != t.Path() { 229 return Tile{}, &badPathError{path} 230 } 231 return t, nil 232} 233 234type badPathError struct { 235 path string 236} 237 238func (e *badPathError) Error() string { 239 return fmt.Sprintf("malformed tile path %q", e.path) 240} 241 242// A TileReader reads tiles from a go.sum database log. 243type TileReader interface { 244 // Height returns the height of the available tiles. 245 Height() int 246 247 // ReadTiles returns the data for each requested tile. 248 // If ReadTiles returns err == nil, it must also return 249 // a data record for each tile (len(data) == len(tiles)) 250 // and each data record must be the correct length 251 // (len(data[i]) == tiles[i].W*HashSize). 252 // 253 // An implementation of ReadTiles typically reads 254 // them from an on-disk cache or else from a remote 255 // tile server. Tile data downloaded from a server should 256 // be considered suspect and not saved into a persistent 257 // on-disk cache before returning from ReadTiles. 258 // When the client confirms the validity of the tile data, 259 // it will call SaveTiles to signal that they can be safely 260 // written to persistent storage. 261 // See also https://research.swtch.com/tlog#authenticating_tiles. 262 ReadTiles(tiles []Tile) (data [][]byte, err error) 263 264 // SaveTiles informs the TileReader that the tile data 265 // returned by ReadTiles has been confirmed as valid 266 // and can be saved in persistent storage (on disk). 267 SaveTiles(tiles []Tile, data [][]byte) 268} 269 270// TileHashReader returns a HashReader that satisfies requests 271// by loading tiles of the given tree. 272// 273// The returned [HashReader] checks that loaded tiles are 274// valid for the given tree. Therefore, any hashes returned 275// by the HashReader are already proven to be in the tree. 276func TileHashReader(tree Tree, tr TileReader) HashReader { 277 return &tileHashReader{tree: tree, tr: tr} 278} 279 280type tileHashReader struct { 281 tree Tree 282 tr TileReader 283} 284 285// tileParent returns t's k'th tile parent in the tiles for a tree of size n. 286// If there is no such parent, tileParent returns Tile{}. 287func tileParent(t Tile, k int, n int64) Tile { 288 t.L += k 289 t.N >>= uint(k * t.H) 290 t.W = 1 << uint(t.H) 291 if max := n >> uint(t.L*t.H); t.N<<uint(t.H)+int64(t.W) >= max { 292 if t.N<<uint(t.H) >= max { 293 return Tile{} 294 } 295 t.W = int(max - t.N<<uint(t.H)) 296 } 297 return t 298} 299 300func (r *tileHashReader) ReadHashes(indexes []int64) ([]Hash, error) { 301 h := r.tr.Height() 302 303 tileOrder := make(map[Tile]int) // tileOrder[tileKey(tiles[i])] = i 304 var tiles []Tile 305 306 // Plan to fetch tiles necessary to recompute tree hash. 307 // If it matches, those tiles are authenticated. 308 stx := subTreeIndex(0, r.tree.N, nil) 309 stxTileOrder := make([]int, len(stx)) 310 for i, x := range stx { 311 tile, _, _ := tileForIndex(h, x) 312 tile = tileParent(tile, 0, r.tree.N) 313 if j, ok := tileOrder[tile]; ok { 314 stxTileOrder[i] = j 315 continue 316 } 317 stxTileOrder[i] = len(tiles) 318 tileOrder[tile] = len(tiles) 319 tiles = append(tiles, tile) 320 } 321 322 // Plan to fetch tiles containing the indexes, 323 // along with any parent tiles needed 324 // for authentication. For most calls, 325 // the parents are being fetched anyway. 326 indexTileOrder := make([]int, len(indexes)) 327 for i, x := range indexes { 328 if x >= StoredHashIndex(0, r.tree.N) { 329 return nil, fmt.Errorf("indexes not in tree") 330 } 331 332 tile, _, _ := tileForIndex(h, x) 333 334 // Walk up parent tiles until we find one we've requested. 335 // That one will be authenticated. 336 k := 0 337 for ; ; k++ { 338 p := tileParent(tile, k, r.tree.N) 339 if j, ok := tileOrder[p]; ok { 340 if k == 0 { 341 indexTileOrder[i] = j 342 } 343 break 344 } 345 } 346 347 // Walk back down recording child tiles after parents. 348 // This loop ends by revisiting the tile for this index 349 // (tileParent(tile, 0, r.tree.N)) unless k == 0, in which 350 // case the previous loop did it. 351 for k--; k >= 0; k-- { 352 p := tileParent(tile, k, r.tree.N) 353 if p.W != 1<<uint(p.H) { 354 // Only full tiles have parents. 355 // This tile has a parent, so it must be full. 356 return nil, fmt.Errorf("bad math in tileHashReader: %d %d %v", r.tree.N, x, p) 357 } 358 tileOrder[p] = len(tiles) 359 if k == 0 { 360 indexTileOrder[i] = len(tiles) 361 } 362 tiles = append(tiles, p) 363 } 364 } 365 366 // Fetch all the tile data. 367 data, err := r.tr.ReadTiles(tiles) 368 if err != nil { 369 return nil, err 370 } 371 if len(data) != len(tiles) { 372 return nil, fmt.Errorf("TileReader returned bad result slice (len=%d, want %d)", len(data), len(tiles)) 373 } 374 for i, tile := range tiles { 375 if len(data[i]) != tile.W*HashSize { 376 return nil, fmt.Errorf("TileReader returned bad result slice (%v len=%d, want %d)", tile.Path(), len(data[i]), tile.W*HashSize) 377 } 378 } 379 380 // Authenticate the initial tiles against the tree hash. 381 // They are arranged so that parents are authenticated before children. 382 // First the tiles needed for the tree hash. 383 th, err := HashFromTile(tiles[stxTileOrder[len(stx)-1]], data[stxTileOrder[len(stx)-1]], stx[len(stx)-1]) 384 if err != nil { 385 return nil, err 386 } 387 for i := len(stx) - 2; i >= 0; i-- { 388 h, err := HashFromTile(tiles[stxTileOrder[i]], data[stxTileOrder[i]], stx[i]) 389 if err != nil { 390 return nil, err 391 } 392 th = NodeHash(h, th) 393 } 394 if th != r.tree.Hash { 395 // The tiles do not support the tree hash. 396 // We know at least one is wrong, but not which one. 397 return nil, fmt.Errorf("downloaded inconsistent tile") 398 } 399 400 // Authenticate full tiles against their parents. 401 for i := len(stx); i < len(tiles); i++ { 402 tile := tiles[i] 403 p := tileParent(tile, 1, r.tree.N) 404 j, ok := tileOrder[p] 405 if !ok { 406 return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost parent of %v", r.tree.N, indexes, tile) 407 } 408 h, err := HashFromTile(p, data[j], StoredHashIndex(p.L*p.H, tile.N)) 409 if err != nil { 410 return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost hash of %v: %v", r.tree.N, indexes, tile, err) 411 } 412 if h != tileHash(data[i]) { 413 return nil, fmt.Errorf("downloaded inconsistent tile") 414 } 415 } 416 417 // Now we have all the tiles needed for the requested hashes, 418 // and we've authenticated the full tile set against the trusted tree hash. 419 r.tr.SaveTiles(tiles, data) 420 421 // Pull out the requested hashes. 422 hashes := make([]Hash, len(indexes)) 423 for i, x := range indexes { 424 j := indexTileOrder[i] 425 h, err := HashFromTile(tiles[j], data[j], x) 426 if err != nil { 427 return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost hash %v: %v", r.tree.N, indexes, x, err) 428 } 429 hashes[i] = h 430 } 431 432 return hashes, nil 433} 434