1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package tlog implements a tamper-evident log
6// used in the Go module go.sum database server.
7//
8// This package follows the design of Certificate Transparency (RFC 6962)
9// and its proofs are compatible with that system.
10// See TestCertificateTransparency.
11package tlog
12
13import (
14	"crypto/sha256"
15	"encoding/base64"
16	"errors"
17	"fmt"
18	"math/bits"
19)
20
21// A Hash is a hash identifying a log record or tree root.
22type Hash [HashSize]byte
23
24// HashSize is the size of a Hash in bytes.
25const HashSize = 32
26
27// String returns a base64 representation of the hash for printing.
28func (h Hash) String() string {
29	return base64.StdEncoding.EncodeToString(h[:])
30}
31
32// MarshalJSON marshals the hash as a JSON string containing the base64-encoded hash.
33func (h Hash) MarshalJSON() ([]byte, error) {
34	return []byte(`"` + h.String() + `"`), nil
35}
36
37// UnmarshalJSON unmarshals a hash from JSON string containing the a base64-encoded hash.
38func (h *Hash) UnmarshalJSON(data []byte) error {
39	if len(data) != 1+44+1 || data[0] != '"' || data[len(data)-2] != '=' || data[len(data)-1] != '"' {
40		return errors.New("cannot decode hash")
41	}
42
43	// As of Go 1.12, base64.StdEncoding.Decode insists on
44	// slicing into target[33:] even when it only writes 32 bytes.
45	// Since we already checked that the hash ends in = above,
46	// we can use base64.RawStdEncoding with the = removed;
47	// RawStdEncoding does not exhibit the same bug.
48	// We decode into a temporary to avoid writing anything to *h
49	// unless the entire input is well-formed.
50	var tmp Hash
51	n, err := base64.RawStdEncoding.Decode(tmp[:], data[1:len(data)-2])
52	if err != nil || n != HashSize {
53		return errors.New("cannot decode hash")
54	}
55	*h = tmp
56	return nil
57}
58
59// ParseHash parses the base64-encoded string form of a hash.
60func ParseHash(s string) (Hash, error) {
61	data, err := base64.StdEncoding.DecodeString(s)
62	if err != nil || len(data) != HashSize {
63		return Hash{}, fmt.Errorf("malformed hash")
64	}
65	var h Hash
66	copy(h[:], data)
67	return h, nil
68}
69
70// maxpow2 returns k, the maximum power of 2 smaller than n,
71// as well as l = log₂ k (so k = 1<<l).
72func maxpow2(n int64) (k int64, l int) {
73	l = 0
74	for 1<<uint(l+1) < n {
75		l++
76	}
77	return 1 << uint(l), l
78}
79
80var zeroPrefix = []byte{0x00}
81
82// RecordHash returns the content hash for the given record data.
83func RecordHash(data []byte) Hash {
84	// SHA256(0x00 || data)
85	// https://tools.ietf.org/html/rfc6962#section-2.1
86	h := sha256.New()
87	h.Write(zeroPrefix)
88	h.Write(data)
89	var h1 Hash
90	h.Sum(h1[:0])
91	return h1
92}
93
94// NodeHash returns the hash for an interior tree node with the given left and right hashes.
95func NodeHash(left, right Hash) Hash {
96	// SHA256(0x01 || left || right)
97	// https://tools.ietf.org/html/rfc6962#section-2.1
98	// We use a stack buffer to assemble the hash input
99	// to avoid allocating a hash struct with sha256.New.
100	var buf [1 + HashSize + HashSize]byte
101	buf[0] = 0x01
102	copy(buf[1:], left[:])
103	copy(buf[1+HashSize:], right[:])
104	return sha256.Sum256(buf[:])
105}
106
107// For information about the stored hash index ordering,
108// see section 3.3 of Crosby and Wallach's paper
109// "Efficient Data Structures for Tamper-Evident Logging".
110// https://www.usenix.org/legacy/event/sec09/tech/full_papers/crosby.pdf
111
112// StoredHashIndex maps the tree coordinates (level, n)
113// to a dense linear ordering that can be used for hash storage.
114// Hash storage implementations that store hashes in sequential
115// storage can use this function to compute where to read or write
116// a given hash.
117func StoredHashIndex(level int, n int64) int64 {
118	// Level L's n'th hash is written right after level L+1's 2n+1'th hash.
119	// Work our way down to the level 0 ordering.
120	// We'll add back the original level count at the end.
121	for l := level; l > 0; l-- {
122		n = 2*n + 1
123	}
124
125	// Level 0's n'th hash is written at n+n/2+n/4+... (eventually n/2ⁱ hits zero).
126	i := int64(0)
127	for ; n > 0; n >>= 1 {
128		i += n
129	}
130
131	return i + int64(level)
132}
133
134// SplitStoredHashIndex is the inverse of [StoredHashIndex].
135// That is, SplitStoredHashIndex(StoredHashIndex(level, n)) == level, n.
136func SplitStoredHashIndex(index int64) (level int, n int64) {
137	// Determine level 0 record before index.
138	// StoredHashIndex(0, n) < 2*n,
139	// so the n we want is in [index/2, index/2+log₂(index)].
140	n = index / 2
141	indexN := StoredHashIndex(0, n)
142	if indexN > index {
143		panic("bad math")
144	}
145	for {
146		// Each new record n adds 1 + trailingZeros(n) hashes.
147		x := indexN + 1 + int64(bits.TrailingZeros64(uint64(n+1)))
148		if x > index {
149			break
150		}
151		n++
152		indexN = x
153	}
154	// The hash we want was committed with record n,
155	// meaning it is one of (0, n), (1, n/2), (2, n/4), ...
156	level = int(index - indexN)
157	return level, n >> uint(level)
158}
159
160// StoredHashCount returns the number of stored hashes
161// that are expected for a tree with n records.
162func StoredHashCount(n int64) int64 {
163	if n == 0 {
164		return 0
165	}
166	// The tree will have the hashes up to the last leaf hash.
167	numHash := StoredHashIndex(0, n-1) + 1
168	// And it will have any hashes for subtrees completed by that leaf.
169	for i := uint64(n - 1); i&1 != 0; i >>= 1 {
170		numHash++
171	}
172	return numHash
173}
174
175// StoredHashes returns the hashes that must be stored when writing
176// record n with the given data. The hashes should be stored starting
177// at StoredHashIndex(0, n). The result will have at most 1 + log₂ n hashes,
178// but it will average just under two per call for a sequence of calls for n=1..k.
179//
180// StoredHashes may read up to log n earlier hashes from r
181// in order to compute hashes for completed subtrees.
182func StoredHashes(n int64, data []byte, r HashReader) ([]Hash, error) {
183	return StoredHashesForRecordHash(n, RecordHash(data), r)
184}
185
186// StoredHashesForRecordHash is like [StoredHashes] but takes
187// as its second argument RecordHash(data) instead of data itself.
188func StoredHashesForRecordHash(n int64, h Hash, r HashReader) ([]Hash, error) {
189	// Start with the record hash.
190	hashes := []Hash{h}
191
192	// Build list of indexes needed for hashes for completed subtrees.
193	// Each trailing 1 bit in the binary representation of n completes a subtree
194	// and consumes a hash from an adjacent subtree.
195	m := int(bits.TrailingZeros64(uint64(n + 1)))
196	indexes := make([]int64, m)
197	for i := 0; i < m; i++ {
198		// We arrange indexes in sorted order.
199		// Note that n>>i is always odd.
200		indexes[m-1-i] = StoredHashIndex(i, n>>uint(i)-1)
201	}
202
203	// Fetch hashes.
204	old, err := r.ReadHashes(indexes)
205	if err != nil {
206		return nil, err
207	}
208	if len(old) != len(indexes) {
209		return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(old))
210	}
211
212	// Build new hashes.
213	for i := 0; i < m; i++ {
214		h = NodeHash(old[m-1-i], h)
215		hashes = append(hashes, h)
216	}
217	return hashes, nil
218}
219
220// A HashReader can read hashes for nodes in the log's tree structure.
221type HashReader interface {
222	// ReadHashes returns the hashes with the given stored hash indexes
223	// (see StoredHashIndex and SplitStoredHashIndex).
224	// ReadHashes must return a slice of hashes the same length as indexes,
225	// or else it must return a non-nil error.
226	// ReadHashes may run faster if indexes is sorted in increasing order.
227	ReadHashes(indexes []int64) ([]Hash, error)
228}
229
230// A HashReaderFunc is a function implementing [HashReader].
231type HashReaderFunc func([]int64) ([]Hash, error)
232
233func (f HashReaderFunc) ReadHashes(indexes []int64) ([]Hash, error) {
234	return f(indexes)
235}
236
237// emptyHash is the hash of the empty tree, per RFC 6962, Section 2.1.
238// It is the hash of the empty string.
239var emptyHash = Hash{
240	0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
241	0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
242	0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
243	0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
244}
245
246// TreeHash computes the hash for the root of the tree with n records,
247// using the HashReader to obtain previously stored hashes
248// (those returned by StoredHashes during the writes of those n records).
249// TreeHash makes a single call to ReadHash requesting at most 1 + log₂ n hashes.
250func TreeHash(n int64, r HashReader) (Hash, error) {
251	if n == 0 {
252		return emptyHash, nil
253	}
254	indexes := subTreeIndex(0, n, nil)
255	hashes, err := r.ReadHashes(indexes)
256	if err != nil {
257		return Hash{}, err
258	}
259	if len(hashes) != len(indexes) {
260		return Hash{}, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes))
261	}
262	hash, hashes := subTreeHash(0, n, hashes)
263	if len(hashes) != 0 {
264		panic("tlog: bad index math in TreeHash")
265	}
266	return hash, nil
267}
268
269// subTreeIndex returns the storage indexes needed to compute
270// the hash for the subtree containing records [lo, hi),
271// appending them to need and returning the result.
272// See https://tools.ietf.org/html/rfc6962#section-2.1
273func subTreeIndex(lo, hi int64, need []int64) []int64 {
274	// See subTreeHash below for commentary.
275	for lo < hi {
276		k, level := maxpow2(hi - lo + 1)
277		if lo&(k-1) != 0 {
278			panic("tlog: bad math in subTreeIndex")
279		}
280		need = append(need, StoredHashIndex(level, lo>>uint(level)))
281		lo += k
282	}
283	return need
284}
285
286// subTreeHash computes the hash for the subtree containing records [lo, hi),
287// assuming that hashes are the hashes corresponding to the indexes
288// returned by subTreeIndex(lo, hi).
289// It returns any leftover hashes.
290func subTreeHash(lo, hi int64, hashes []Hash) (Hash, []Hash) {
291	// Repeatedly partition the tree into a left side with 2^level nodes,
292	// for as large a level as possible, and a right side with the fringe.
293	// The left hash is stored directly and can be read from storage.
294	// The right side needs further computation.
295	numTree := 0
296	for lo < hi {
297		k, _ := maxpow2(hi - lo + 1)
298		if lo&(k-1) != 0 || lo >= hi {
299			panic("tlog: bad math in subTreeHash")
300		}
301		numTree++
302		lo += k
303	}
304
305	if len(hashes) < numTree {
306		panic("tlog: bad index math in subTreeHash")
307	}
308
309	// Reconstruct hash.
310	h := hashes[numTree-1]
311	for i := numTree - 2; i >= 0; i-- {
312		h = NodeHash(hashes[i], h)
313	}
314	return h, hashes[numTree:]
315}
316
317// A RecordProof is a verifiable proof that a particular log root contains a particular record.
318// RFC 6962 calls this a “Merkle audit path.”
319type RecordProof []Hash
320
321// ProveRecord returns the proof that the tree of size t contains the record with index n.
322func ProveRecord(t, n int64, r HashReader) (RecordProof, error) {
323	if t < 0 || n < 0 || n >= t {
324		return nil, fmt.Errorf("tlog: invalid inputs in ProveRecord")
325	}
326	indexes := leafProofIndex(0, t, n, nil)
327	if len(indexes) == 0 {
328		return RecordProof{}, nil
329	}
330	hashes, err := r.ReadHashes(indexes)
331	if err != nil {
332		return nil, err
333	}
334	if len(hashes) != len(indexes) {
335		return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes))
336	}
337
338	p, hashes := leafProof(0, t, n, hashes)
339	if len(hashes) != 0 {
340		panic("tlog: bad index math in ProveRecord")
341	}
342	return p, nil
343}
344
345// leafProofIndex builds the list of indexes needed to construct the proof
346// that leaf n is contained in the subtree with leaves [lo, hi).
347// It appends those indexes to need and returns the result.
348// See https://tools.ietf.org/html/rfc6962#section-2.1.1
349func leafProofIndex(lo, hi, n int64, need []int64) []int64 {
350	// See leafProof below for commentary.
351	if !(lo <= n && n < hi) {
352		panic("tlog: bad math in leafProofIndex")
353	}
354	if lo+1 == hi {
355		return need
356	}
357	if k, _ := maxpow2(hi - lo); n < lo+k {
358		need = leafProofIndex(lo, lo+k, n, need)
359		need = subTreeIndex(lo+k, hi, need)
360	} else {
361		need = subTreeIndex(lo, lo+k, need)
362		need = leafProofIndex(lo+k, hi, n, need)
363	}
364	return need
365}
366
367// leafProof constructs the proof that leaf n is contained in the subtree with leaves [lo, hi).
368// It returns any leftover hashes as well.
369// See https://tools.ietf.org/html/rfc6962#section-2.1.1
370func leafProof(lo, hi, n int64, hashes []Hash) (RecordProof, []Hash) {
371	// We must have lo <= n < hi or else the code here has a bug.
372	if !(lo <= n && n < hi) {
373		panic("tlog: bad math in leafProof")
374	}
375
376	if lo+1 == hi { // n == lo
377		// Reached the leaf node.
378		// The verifier knows what the leaf hash is, so we don't need to send it.
379		return RecordProof{}, hashes
380	}
381
382	// Walk down the tree toward n.
383	// Record the hash of the path not taken (needed for verifying the proof).
384	var p RecordProof
385	var th Hash
386	if k, _ := maxpow2(hi - lo); n < lo+k {
387		// n is on left side
388		p, hashes = leafProof(lo, lo+k, n, hashes)
389		th, hashes = subTreeHash(lo+k, hi, hashes)
390	} else {
391		// n is on right side
392		th, hashes = subTreeHash(lo, lo+k, hashes)
393		p, hashes = leafProof(lo+k, hi, n, hashes)
394	}
395	return append(p, th), hashes
396}
397
398var errProofFailed = errors.New("invalid transparency proof")
399
400// CheckRecord verifies that p is a valid proof that the tree of size t
401// with hash th has an n'th record with hash h.
402func CheckRecord(p RecordProof, t int64, th Hash, n int64, h Hash) error {
403	if t < 0 || n < 0 || n >= t {
404		return fmt.Errorf("tlog: invalid inputs in CheckRecord")
405	}
406	th2, err := runRecordProof(p, 0, t, n, h)
407	if err != nil {
408		return err
409	}
410	if th2 == th {
411		return nil
412	}
413	return errProofFailed
414}
415
416// runRecordProof runs the proof p that leaf n is contained in the subtree with leaves [lo, hi).
417// Running the proof means constructing and returning the implied hash of that
418// subtree.
419func runRecordProof(p RecordProof, lo, hi, n int64, leafHash Hash) (Hash, error) {
420	// We must have lo <= n < hi or else the code here has a bug.
421	if !(lo <= n && n < hi) {
422		panic("tlog: bad math in runRecordProof")
423	}
424
425	if lo+1 == hi { // m == lo
426		// Reached the leaf node.
427		// The proof must not have any unnecessary hashes.
428		if len(p) != 0 {
429			return Hash{}, errProofFailed
430		}
431		return leafHash, nil
432	}
433
434	if len(p) == 0 {
435		return Hash{}, errProofFailed
436	}
437
438	k, _ := maxpow2(hi - lo)
439	if n < lo+k {
440		th, err := runRecordProof(p[:len(p)-1], lo, lo+k, n, leafHash)
441		if err != nil {
442			return Hash{}, err
443		}
444		return NodeHash(th, p[len(p)-1]), nil
445	} else {
446		th, err := runRecordProof(p[:len(p)-1], lo+k, hi, n, leafHash)
447		if err != nil {
448			return Hash{}, err
449		}
450		return NodeHash(p[len(p)-1], th), nil
451	}
452}
453
454// A TreeProof is a verifiable proof that a particular log tree contains
455// as a prefix all records present in an earlier tree.
456// RFC 6962 calls this a “Merkle consistency proof.”
457type TreeProof []Hash
458
459// ProveTree returns the proof that the tree of size t contains
460// as a prefix all the records from the tree of smaller size n.
461func ProveTree(t, n int64, h HashReader) (TreeProof, error) {
462	if t < 1 || n < 1 || n > t {
463		return nil, fmt.Errorf("tlog: invalid inputs in ProveTree")
464	}
465	indexes := treeProofIndex(0, t, n, nil)
466	if len(indexes) == 0 {
467		return TreeProof{}, nil
468	}
469	hashes, err := h.ReadHashes(indexes)
470	if err != nil {
471		return nil, err
472	}
473	if len(hashes) != len(indexes) {
474		return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes))
475	}
476
477	p, hashes := treeProof(0, t, n, hashes)
478	if len(hashes) != 0 {
479		panic("tlog: bad index math in ProveTree")
480	}
481	return p, nil
482}
483
484// treeProofIndex builds the list of indexes needed to construct
485// the sub-proof related to the subtree containing records [lo, hi).
486// See https://tools.ietf.org/html/rfc6962#section-2.1.2.
487func treeProofIndex(lo, hi, n int64, need []int64) []int64 {
488	// See treeProof below for commentary.
489	if !(lo < n && n <= hi) {
490		panic("tlog: bad math in treeProofIndex")
491	}
492
493	if n == hi {
494		if lo == 0 {
495			return need
496		}
497		return subTreeIndex(lo, hi, need)
498	}
499
500	if k, _ := maxpow2(hi - lo); n <= lo+k {
501		need = treeProofIndex(lo, lo+k, n, need)
502		need = subTreeIndex(lo+k, hi, need)
503	} else {
504		need = subTreeIndex(lo, lo+k, need)
505		need = treeProofIndex(lo+k, hi, n, need)
506	}
507	return need
508}
509
510// treeProof constructs the sub-proof related to the subtree containing records [lo, hi).
511// It returns any leftover hashes as well.
512// See https://tools.ietf.org/html/rfc6962#section-2.1.2.
513func treeProof(lo, hi, n int64, hashes []Hash) (TreeProof, []Hash) {
514	// We must have lo < n <= hi or else the code here has a bug.
515	if !(lo < n && n <= hi) {
516		panic("tlog: bad math in treeProof")
517	}
518
519	// Reached common ground.
520	if n == hi {
521		if lo == 0 {
522			// This subtree corresponds exactly to the old tree.
523			// The verifier knows that hash, so we don't need to send it.
524			return TreeProof{}, hashes
525		}
526		th, hashes := subTreeHash(lo, hi, hashes)
527		return TreeProof{th}, hashes
528	}
529
530	// Interior node for the proof.
531	// Decide whether to walk down the left or right side.
532	var p TreeProof
533	var th Hash
534	if k, _ := maxpow2(hi - lo); n <= lo+k {
535		// m is on left side
536		p, hashes = treeProof(lo, lo+k, n, hashes)
537		th, hashes = subTreeHash(lo+k, hi, hashes)
538	} else {
539		// m is on right side
540		th, hashes = subTreeHash(lo, lo+k, hashes)
541		p, hashes = treeProof(lo+k, hi, n, hashes)
542	}
543	return append(p, th), hashes
544}
545
546// CheckTree verifies that p is a valid proof that the tree of size t with hash th
547// contains as a prefix the tree of size n with hash h.
548func CheckTree(p TreeProof, t int64, th Hash, n int64, h Hash) error {
549	if t < 1 || n < 1 || n > t {
550		return fmt.Errorf("tlog: invalid inputs in CheckTree")
551	}
552	h2, th2, err := runTreeProof(p, 0, t, n, h)
553	if err != nil {
554		return err
555	}
556	if th2 == th && h2 == h {
557		return nil
558	}
559	return errProofFailed
560}
561
562// runTreeProof runs the sub-proof p related to the subtree containing records [lo, hi),
563// where old is the hash of the old tree with n records.
564// Running the proof means constructing and returning the implied hashes of that
565// subtree in both the old and new tree.
566func runTreeProof(p TreeProof, lo, hi, n int64, old Hash) (Hash, Hash, error) {
567	// We must have lo < n <= hi or else the code here has a bug.
568	if !(lo < n && n <= hi) {
569		panic("tlog: bad math in runTreeProof")
570	}
571
572	// Reached common ground.
573	if n == hi {
574		if lo == 0 {
575			if len(p) != 0 {
576				return Hash{}, Hash{}, errProofFailed
577			}
578			return old, old, nil
579		}
580		if len(p) != 1 {
581			return Hash{}, Hash{}, errProofFailed
582		}
583		return p[0], p[0], nil
584	}
585
586	if len(p) == 0 {
587		return Hash{}, Hash{}, errProofFailed
588	}
589
590	// Interior node for the proof.
591	k, _ := maxpow2(hi - lo)
592	if n <= lo+k {
593		oh, th, err := runTreeProof(p[:len(p)-1], lo, lo+k, n, old)
594		if err != nil {
595			return Hash{}, Hash{}, err
596		}
597		return oh, NodeHash(th, p[len(p)-1]), nil
598	} else {
599		oh, th, err := runTreeProof(p[:len(p)-1], lo+k, hi, n, old)
600		if err != nil {
601			return Hash{}, Hash{}, err
602		}
603		return NodeHash(p[len(p)-1], oh), NodeHash(p[len(p)-1], th), nil
604	}
605}
606