From 979e87fcce5482d6da1dd40b3e5ba790dc7af7ea Mon Sep 17 00:00:00 2001 From: Rasmus Dahlberg Date: Fri, 17 Mar 2023 10:42:57 +0100 Subject: Add heap chunk and helpers --- internal/chunk/chunk.go | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 internal/chunk/chunk.go (limited to 'internal/chunk/chunk.go') diff --git a/internal/chunk/chunk.go b/internal/chunk/chunk.go new file mode 100644 index 0000000..db048c0 --- /dev/null +++ b/internal/chunk/chunk.go @@ -0,0 +1,86 @@ +// Package chunk provides a heap of chunks so that larger in-order chunks can be +// processed more easily, e.g., for verifying ranged inclusion proofs and +// batching file writes. For how to implement the heap interface, see: +// https://pkg.go.dev/container/heap#Interface +package chunk + +import ( + "container/heap" + "crypto/sha256" +) + +// Chunk holds the SANs of a consecutive range of downloaded leaves. The start +// index and associated leaf hashes are included so that they can be verified. +type Chunk struct { + Start int64 // index of first leaf in this chunk + LeafHashes [][sha256.Size]byte // in-order leaf hashes in this chunk + SANs []string // sans of all leaves in this chunk +} + +// ChunkHeap is a min-heap of chunks wrt. to start indices. Use TPush() and +// TPop() to to push/pop chunks without having to worry about type casts. +type ChunkHeap []*Chunk + +// TPop pops a chunk from the heap +func (h *ChunkHeap) TPop() *Chunk { + x := heap.Pop(h) + return x.(*Chunk) +} + +// TPush pushes a chunk to the heap +func (h *ChunkHeap) TPush(c *Chunk) { + heap.Push(h, c) +} + +// Sequence checks if the next pop would provide a chunk starting at index +// start. If so, the heap is also re-organized so that the popped chunk will be +// as large as possible (i.e., in-order chunks are merged to a single chunk). +func (h *ChunkHeap) Sequence(start int64) bool { + if len(*h) == 0 { + return false + } + + // See if we have the next in-order chunk ready + s := h.TPop() + if start != s.Start { + h.TPush(s) + return false + } + + // See if we can make a larger in-order chunk + for len(*h) > 0 { + c := h.TPop() + if c.Start != s.Start+int64(len(s.LeafHashes)) { + h.TPush(c) + break + } + + s.LeafHashes = append(s.LeafHashes, c.LeafHashes...) + s.SANs = append(s.SANs, c.SANs...) + } + + // Put back the largest in-order chunk we have + h.TPush(s) + return true +} + +// +// Implement the heap interface +// + +func (h ChunkHeap) Len() int { return len(h) } +func (h ChunkHeap) Less(i, j int) bool { return h[i].Start < h[j].Start } +func (h ChunkHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } + +func (h *ChunkHeap) Push(x any) { + *h = append(*h, x.(*Chunk)) +} + +func (h *ChunkHeap) Pop() any { + old := *h + n := len(old) + x := old[n-1] + old[n-1] = nil // avoid memory leak + *h = old[:n-1] + return x +} -- cgit v1.2.3