1// Copyright 2019 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package tlog implements a tamper-evident log 6// used in the Go module go.sum database server. 7// 8// This package follows the design of Certificate Transparency (RFC 6962) 9// and its proofs are compatible with that system. 10// See TestCertificateTransparency. 11package tlog 12 13import ( 14 "crypto/sha256" 15 "encoding/base64" 16 "errors" 17 "fmt" 18 "math/bits" 19) 20 21// A Hash is a hash identifying a log record or tree root. 22type Hash [HashSize]byte 23 24// HashSize is the size of a Hash in bytes. 25const HashSize = 32 26 27// String returns a base64 representation of the hash for printing. 28func (h Hash) String() string { 29 return base64.StdEncoding.EncodeToString(h[:]) 30} 31 32// MarshalJSON marshals the hash as a JSON string containing the base64-encoded hash. 33func (h Hash) MarshalJSON() ([]byte, error) { 34 return []byte(`"` + h.String() + `"`), nil 35} 36 37// UnmarshalJSON unmarshals a hash from JSON string containing the a base64-encoded hash. 38func (h *Hash) UnmarshalJSON(data []byte) error { 39 if len(data) != 1+44+1 || data[0] != '"' || data[len(data)-2] != '=' || data[len(data)-1] != '"' { 40 return errors.New("cannot decode hash") 41 } 42 43 // As of Go 1.12, base64.StdEncoding.Decode insists on 44 // slicing into target[33:] even when it only writes 32 bytes. 45 // Since we already checked that the hash ends in = above, 46 // we can use base64.RawStdEncoding with the = removed; 47 // RawStdEncoding does not exhibit the same bug. 48 // We decode into a temporary to avoid writing anything to *h 49 // unless the entire input is well-formed. 50 var tmp Hash 51 n, err := base64.RawStdEncoding.Decode(tmp[:], data[1:len(data)-2]) 52 if err != nil || n != HashSize { 53 return errors.New("cannot decode hash") 54 } 55 *h = tmp 56 return nil 57} 58 59// ParseHash parses the base64-encoded string form of a hash. 60func ParseHash(s string) (Hash, error) { 61 data, err := base64.StdEncoding.DecodeString(s) 62 if err != nil || len(data) != HashSize { 63 return Hash{}, fmt.Errorf("malformed hash") 64 } 65 var h Hash 66 copy(h[:], data) 67 return h, nil 68} 69 70// maxpow2 returns k, the maximum power of 2 smaller than n, 71// as well as l = log₂ k (so k = 1<<l). 72func maxpow2(n int64) (k int64, l int) { 73 l = 0 74 for 1<<uint(l+1) < n { 75 l++ 76 } 77 return 1 << uint(l), l 78} 79 80var zeroPrefix = []byte{0x00} 81 82// RecordHash returns the content hash for the given record data. 83func RecordHash(data []byte) Hash { 84 // SHA256(0x00 || data) 85 // https://tools.ietf.org/html/rfc6962#section-2.1 86 h := sha256.New() 87 h.Write(zeroPrefix) 88 h.Write(data) 89 var h1 Hash 90 h.Sum(h1[:0]) 91 return h1 92} 93 94// NodeHash returns the hash for an interior tree node with the given left and right hashes. 95func NodeHash(left, right Hash) Hash { 96 // SHA256(0x01 || left || right) 97 // https://tools.ietf.org/html/rfc6962#section-2.1 98 // We use a stack buffer to assemble the hash input 99 // to avoid allocating a hash struct with sha256.New. 100 var buf [1 + HashSize + HashSize]byte 101 buf[0] = 0x01 102 copy(buf[1:], left[:]) 103 copy(buf[1+HashSize:], right[:]) 104 return sha256.Sum256(buf[:]) 105} 106 107// For information about the stored hash index ordering, 108// see section 3.3 of Crosby and Wallach's paper 109// "Efficient Data Structures for Tamper-Evident Logging". 110// https://www.usenix.org/legacy/event/sec09/tech/full_papers/crosby.pdf 111 112// StoredHashIndex maps the tree coordinates (level, n) 113// to a dense linear ordering that can be used for hash storage. 114// Hash storage implementations that store hashes in sequential 115// storage can use this function to compute where to read or write 116// a given hash. 117func StoredHashIndex(level int, n int64) int64 { 118 // Level L's n'th hash is written right after level L+1's 2n+1'th hash. 119 // Work our way down to the level 0 ordering. 120 // We'll add back the original level count at the end. 121 for l := level; l > 0; l-- { 122 n = 2*n + 1 123 } 124 125 // Level 0's n'th hash is written at n+n/2+n/4+... (eventually n/2ⁱ hits zero). 126 i := int64(0) 127 for ; n > 0; n >>= 1 { 128 i += n 129 } 130 131 return i + int64(level) 132} 133 134// SplitStoredHashIndex is the inverse of [StoredHashIndex]. 135// That is, SplitStoredHashIndex(StoredHashIndex(level, n)) == level, n. 136func SplitStoredHashIndex(index int64) (level int, n int64) { 137 // Determine level 0 record before index. 138 // StoredHashIndex(0, n) < 2*n, 139 // so the n we want is in [index/2, index/2+log₂(index)]. 140 n = index / 2 141 indexN := StoredHashIndex(0, n) 142 if indexN > index { 143 panic("bad math") 144 } 145 for { 146 // Each new record n adds 1 + trailingZeros(n) hashes. 147 x := indexN + 1 + int64(bits.TrailingZeros64(uint64(n+1))) 148 if x > index { 149 break 150 } 151 n++ 152 indexN = x 153 } 154 // The hash we want was committed with record n, 155 // meaning it is one of (0, n), (1, n/2), (2, n/4), ... 156 level = int(index - indexN) 157 return level, n >> uint(level) 158} 159 160// StoredHashCount returns the number of stored hashes 161// that are expected for a tree with n records. 162func StoredHashCount(n int64) int64 { 163 if n == 0 { 164 return 0 165 } 166 // The tree will have the hashes up to the last leaf hash. 167 numHash := StoredHashIndex(0, n-1) + 1 168 // And it will have any hashes for subtrees completed by that leaf. 169 for i := uint64(n - 1); i&1 != 0; i >>= 1 { 170 numHash++ 171 } 172 return numHash 173} 174 175// StoredHashes returns the hashes that must be stored when writing 176// record n with the given data. The hashes should be stored starting 177// at StoredHashIndex(0, n). The result will have at most 1 + log₂ n hashes, 178// but it will average just under two per call for a sequence of calls for n=1..k. 179// 180// StoredHashes may read up to log n earlier hashes from r 181// in order to compute hashes for completed subtrees. 182func StoredHashes(n int64, data []byte, r HashReader) ([]Hash, error) { 183 return StoredHashesForRecordHash(n, RecordHash(data), r) 184} 185 186// StoredHashesForRecordHash is like [StoredHashes] but takes 187// as its second argument RecordHash(data) instead of data itself. 188func StoredHashesForRecordHash(n int64, h Hash, r HashReader) ([]Hash, error) { 189 // Start with the record hash. 190 hashes := []Hash{h} 191 192 // Build list of indexes needed for hashes for completed subtrees. 193 // Each trailing 1 bit in the binary representation of n completes a subtree 194 // and consumes a hash from an adjacent subtree. 195 m := int(bits.TrailingZeros64(uint64(n + 1))) 196 indexes := make([]int64, m) 197 for i := 0; i < m; i++ { 198 // We arrange indexes in sorted order. 199 // Note that n>>i is always odd. 200 indexes[m-1-i] = StoredHashIndex(i, n>>uint(i)-1) 201 } 202 203 // Fetch hashes. 204 old, err := r.ReadHashes(indexes) 205 if err != nil { 206 return nil, err 207 } 208 if len(old) != len(indexes) { 209 return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(old)) 210 } 211 212 // Build new hashes. 213 for i := 0; i < m; i++ { 214 h = NodeHash(old[m-1-i], h) 215 hashes = append(hashes, h) 216 } 217 return hashes, nil 218} 219 220// A HashReader can read hashes for nodes in the log's tree structure. 221type HashReader interface { 222 // ReadHashes returns the hashes with the given stored hash indexes 223 // (see StoredHashIndex and SplitStoredHashIndex). 224 // ReadHashes must return a slice of hashes the same length as indexes, 225 // or else it must return a non-nil error. 226 // ReadHashes may run faster if indexes is sorted in increasing order. 227 ReadHashes(indexes []int64) ([]Hash, error) 228} 229 230// A HashReaderFunc is a function implementing [HashReader]. 231type HashReaderFunc func([]int64) ([]Hash, error) 232 233func (f HashReaderFunc) ReadHashes(indexes []int64) ([]Hash, error) { 234 return f(indexes) 235} 236 237// emptyHash is the hash of the empty tree, per RFC 6962, Section 2.1. 238// It is the hash of the empty string. 239var emptyHash = Hash{ 240 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 241 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 242 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 243 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55, 244} 245 246// TreeHash computes the hash for the root of the tree with n records, 247// using the HashReader to obtain previously stored hashes 248// (those returned by StoredHashes during the writes of those n records). 249// TreeHash makes a single call to ReadHash requesting at most 1 + log₂ n hashes. 250func TreeHash(n int64, r HashReader) (Hash, error) { 251 if n == 0 { 252 return emptyHash, nil 253 } 254 indexes := subTreeIndex(0, n, nil) 255 hashes, err := r.ReadHashes(indexes) 256 if err != nil { 257 return Hash{}, err 258 } 259 if len(hashes) != len(indexes) { 260 return Hash{}, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes)) 261 } 262 hash, hashes := subTreeHash(0, n, hashes) 263 if len(hashes) != 0 { 264 panic("tlog: bad index math in TreeHash") 265 } 266 return hash, nil 267} 268 269// subTreeIndex returns the storage indexes needed to compute 270// the hash for the subtree containing records [lo, hi), 271// appending them to need and returning the result. 272// See https://tools.ietf.org/html/rfc6962#section-2.1 273func subTreeIndex(lo, hi int64, need []int64) []int64 { 274 // See subTreeHash below for commentary. 275 for lo < hi { 276 k, level := maxpow2(hi - lo + 1) 277 if lo&(k-1) != 0 { 278 panic("tlog: bad math in subTreeIndex") 279 } 280 need = append(need, StoredHashIndex(level, lo>>uint(level))) 281 lo += k 282 } 283 return need 284} 285 286// subTreeHash computes the hash for the subtree containing records [lo, hi), 287// assuming that hashes are the hashes corresponding to the indexes 288// returned by subTreeIndex(lo, hi). 289// It returns any leftover hashes. 290func subTreeHash(lo, hi int64, hashes []Hash) (Hash, []Hash) { 291 // Repeatedly partition the tree into a left side with 2^level nodes, 292 // for as large a level as possible, and a right side with the fringe. 293 // The left hash is stored directly and can be read from storage. 294 // The right side needs further computation. 295 numTree := 0 296 for lo < hi { 297 k, _ := maxpow2(hi - lo + 1) 298 if lo&(k-1) != 0 || lo >= hi { 299 panic("tlog: bad math in subTreeHash") 300 } 301 numTree++ 302 lo += k 303 } 304 305 if len(hashes) < numTree { 306 panic("tlog: bad index math in subTreeHash") 307 } 308 309 // Reconstruct hash. 310 h := hashes[numTree-1] 311 for i := numTree - 2; i >= 0; i-- { 312 h = NodeHash(hashes[i], h) 313 } 314 return h, hashes[numTree:] 315} 316 317// A RecordProof is a verifiable proof that a particular log root contains a particular record. 318// RFC 6962 calls this a “Merkle audit path.” 319type RecordProof []Hash 320 321// ProveRecord returns the proof that the tree of size t contains the record with index n. 322func ProveRecord(t, n int64, r HashReader) (RecordProof, error) { 323 if t < 0 || n < 0 || n >= t { 324 return nil, fmt.Errorf("tlog: invalid inputs in ProveRecord") 325 } 326 indexes := leafProofIndex(0, t, n, nil) 327 if len(indexes) == 0 { 328 return RecordProof{}, nil 329 } 330 hashes, err := r.ReadHashes(indexes) 331 if err != nil { 332 return nil, err 333 } 334 if len(hashes) != len(indexes) { 335 return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes)) 336 } 337 338 p, hashes := leafProof(0, t, n, hashes) 339 if len(hashes) != 0 { 340 panic("tlog: bad index math in ProveRecord") 341 } 342 return p, nil 343} 344 345// leafProofIndex builds the list of indexes needed to construct the proof 346// that leaf n is contained in the subtree with leaves [lo, hi). 347// It appends those indexes to need and returns the result. 348// See https://tools.ietf.org/html/rfc6962#section-2.1.1 349func leafProofIndex(lo, hi, n int64, need []int64) []int64 { 350 // See leafProof below for commentary. 351 if !(lo <= n && n < hi) { 352 panic("tlog: bad math in leafProofIndex") 353 } 354 if lo+1 == hi { 355 return need 356 } 357 if k, _ := maxpow2(hi - lo); n < lo+k { 358 need = leafProofIndex(lo, lo+k, n, need) 359 need = subTreeIndex(lo+k, hi, need) 360 } else { 361 need = subTreeIndex(lo, lo+k, need) 362 need = leafProofIndex(lo+k, hi, n, need) 363 } 364 return need 365} 366 367// leafProof constructs the proof that leaf n is contained in the subtree with leaves [lo, hi). 368// It returns any leftover hashes as well. 369// See https://tools.ietf.org/html/rfc6962#section-2.1.1 370func leafProof(lo, hi, n int64, hashes []Hash) (RecordProof, []Hash) { 371 // We must have lo <= n < hi or else the code here has a bug. 372 if !(lo <= n && n < hi) { 373 panic("tlog: bad math in leafProof") 374 } 375 376 if lo+1 == hi { // n == lo 377 // Reached the leaf node. 378 // The verifier knows what the leaf hash is, so we don't need to send it. 379 return RecordProof{}, hashes 380 } 381 382 // Walk down the tree toward n. 383 // Record the hash of the path not taken (needed for verifying the proof). 384 var p RecordProof 385 var th Hash 386 if k, _ := maxpow2(hi - lo); n < lo+k { 387 // n is on left side 388 p, hashes = leafProof(lo, lo+k, n, hashes) 389 th, hashes = subTreeHash(lo+k, hi, hashes) 390 } else { 391 // n is on right side 392 th, hashes = subTreeHash(lo, lo+k, hashes) 393 p, hashes = leafProof(lo+k, hi, n, hashes) 394 } 395 return append(p, th), hashes 396} 397 398var errProofFailed = errors.New("invalid transparency proof") 399 400// CheckRecord verifies that p is a valid proof that the tree of size t 401// with hash th has an n'th record with hash h. 402func CheckRecord(p RecordProof, t int64, th Hash, n int64, h Hash) error { 403 if t < 0 || n < 0 || n >= t { 404 return fmt.Errorf("tlog: invalid inputs in CheckRecord") 405 } 406 th2, err := runRecordProof(p, 0, t, n, h) 407 if err != nil { 408 return err 409 } 410 if th2 == th { 411 return nil 412 } 413 return errProofFailed 414} 415 416// runRecordProof runs the proof p that leaf n is contained in the subtree with leaves [lo, hi). 417// Running the proof means constructing and returning the implied hash of that 418// subtree. 419func runRecordProof(p RecordProof, lo, hi, n int64, leafHash Hash) (Hash, error) { 420 // We must have lo <= n < hi or else the code here has a bug. 421 if !(lo <= n && n < hi) { 422 panic("tlog: bad math in runRecordProof") 423 } 424 425 if lo+1 == hi { // m == lo 426 // Reached the leaf node. 427 // The proof must not have any unnecessary hashes. 428 if len(p) != 0 { 429 return Hash{}, errProofFailed 430 } 431 return leafHash, nil 432 } 433 434 if len(p) == 0 { 435 return Hash{}, errProofFailed 436 } 437 438 k, _ := maxpow2(hi - lo) 439 if n < lo+k { 440 th, err := runRecordProof(p[:len(p)-1], lo, lo+k, n, leafHash) 441 if err != nil { 442 return Hash{}, err 443 } 444 return NodeHash(th, p[len(p)-1]), nil 445 } else { 446 th, err := runRecordProof(p[:len(p)-1], lo+k, hi, n, leafHash) 447 if err != nil { 448 return Hash{}, err 449 } 450 return NodeHash(p[len(p)-1], th), nil 451 } 452} 453 454// A TreeProof is a verifiable proof that a particular log tree contains 455// as a prefix all records present in an earlier tree. 456// RFC 6962 calls this a “Merkle consistency proof.” 457type TreeProof []Hash 458 459// ProveTree returns the proof that the tree of size t contains 460// as a prefix all the records from the tree of smaller size n. 461func ProveTree(t, n int64, h HashReader) (TreeProof, error) { 462 if t < 1 || n < 1 || n > t { 463 return nil, fmt.Errorf("tlog: invalid inputs in ProveTree") 464 } 465 indexes := treeProofIndex(0, t, n, nil) 466 if len(indexes) == 0 { 467 return TreeProof{}, nil 468 } 469 hashes, err := h.ReadHashes(indexes) 470 if err != nil { 471 return nil, err 472 } 473 if len(hashes) != len(indexes) { 474 return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes)) 475 } 476 477 p, hashes := treeProof(0, t, n, hashes) 478 if len(hashes) != 0 { 479 panic("tlog: bad index math in ProveTree") 480 } 481 return p, nil 482} 483 484// treeProofIndex builds the list of indexes needed to construct 485// the sub-proof related to the subtree containing records [lo, hi). 486// See https://tools.ietf.org/html/rfc6962#section-2.1.2. 487func treeProofIndex(lo, hi, n int64, need []int64) []int64 { 488 // See treeProof below for commentary. 489 if !(lo < n && n <= hi) { 490 panic("tlog: bad math in treeProofIndex") 491 } 492 493 if n == hi { 494 if lo == 0 { 495 return need 496 } 497 return subTreeIndex(lo, hi, need) 498 } 499 500 if k, _ := maxpow2(hi - lo); n <= lo+k { 501 need = treeProofIndex(lo, lo+k, n, need) 502 need = subTreeIndex(lo+k, hi, need) 503 } else { 504 need = subTreeIndex(lo, lo+k, need) 505 need = treeProofIndex(lo+k, hi, n, need) 506 } 507 return need 508} 509 510// treeProof constructs the sub-proof related to the subtree containing records [lo, hi). 511// It returns any leftover hashes as well. 512// See https://tools.ietf.org/html/rfc6962#section-2.1.2. 513func treeProof(lo, hi, n int64, hashes []Hash) (TreeProof, []Hash) { 514 // We must have lo < n <= hi or else the code here has a bug. 515 if !(lo < n && n <= hi) { 516 panic("tlog: bad math in treeProof") 517 } 518 519 // Reached common ground. 520 if n == hi { 521 if lo == 0 { 522 // This subtree corresponds exactly to the old tree. 523 // The verifier knows that hash, so we don't need to send it. 524 return TreeProof{}, hashes 525 } 526 th, hashes := subTreeHash(lo, hi, hashes) 527 return TreeProof{th}, hashes 528 } 529 530 // Interior node for the proof. 531 // Decide whether to walk down the left or right side. 532 var p TreeProof 533 var th Hash 534 if k, _ := maxpow2(hi - lo); n <= lo+k { 535 // m is on left side 536 p, hashes = treeProof(lo, lo+k, n, hashes) 537 th, hashes = subTreeHash(lo+k, hi, hashes) 538 } else { 539 // m is on right side 540 th, hashes = subTreeHash(lo, lo+k, hashes) 541 p, hashes = treeProof(lo+k, hi, n, hashes) 542 } 543 return append(p, th), hashes 544} 545 546// CheckTree verifies that p is a valid proof that the tree of size t with hash th 547// contains as a prefix the tree of size n with hash h. 548func CheckTree(p TreeProof, t int64, th Hash, n int64, h Hash) error { 549 if t < 1 || n < 1 || n > t { 550 return fmt.Errorf("tlog: invalid inputs in CheckTree") 551 } 552 h2, th2, err := runTreeProof(p, 0, t, n, h) 553 if err != nil { 554 return err 555 } 556 if th2 == th && h2 == h { 557 return nil 558 } 559 return errProofFailed 560} 561 562// runTreeProof runs the sub-proof p related to the subtree containing records [lo, hi), 563// where old is the hash of the old tree with n records. 564// Running the proof means constructing and returning the implied hashes of that 565// subtree in both the old and new tree. 566func runTreeProof(p TreeProof, lo, hi, n int64, old Hash) (Hash, Hash, error) { 567 // We must have lo < n <= hi or else the code here has a bug. 568 if !(lo < n && n <= hi) { 569 panic("tlog: bad math in runTreeProof") 570 } 571 572 // Reached common ground. 573 if n == hi { 574 if lo == 0 { 575 if len(p) != 0 { 576 return Hash{}, Hash{}, errProofFailed 577 } 578 return old, old, nil 579 } 580 if len(p) != 1 { 581 return Hash{}, Hash{}, errProofFailed 582 } 583 return p[0], p[0], nil 584 } 585 586 if len(p) == 0 { 587 return Hash{}, Hash{}, errProofFailed 588 } 589 590 // Interior node for the proof. 591 k, _ := maxpow2(hi - lo) 592 if n <= lo+k { 593 oh, th, err := runTreeProof(p[:len(p)-1], lo, lo+k, n, old) 594 if err != nil { 595 return Hash{}, Hash{}, err 596 } 597 return oh, NodeHash(th, p[len(p)-1]), nil 598 } else { 599 oh, th, err := runTreeProof(p[:len(p)-1], lo+k, hi, n, old) 600 if err != nil { 601 return Hash{}, Hash{}, err 602 } 603 return NodeHash(p[len(p)-1], oh), NodeHash(p[len(p)-1], th), nil 604 } 605} 606