From 82e93342948eef01c1b8ed0c03e03456c73922ab Mon Sep 17 00:00:00 2001 From: dholms <dtholmgren@gmail.com> Date: Wed, 27 Jul 2022 22:26:13 -0500 Subject: [PATCH] redoing a bunch of stuff, all in memory --- packages/common/src/repo/mst.ts | 434 ----------------------- packages/common/src/repo/mst/mst.ts | 518 ++++++++++++++++++++++++++++ packages/common/tests/mst.test.ts | 176 +++++----- 3 files changed, 597 insertions(+), 531 deletions(-) delete mode 100644 packages/common/src/repo/mst.ts create mode 100644 packages/common/src/repo/mst/mst.ts diff --git a/packages/common/src/repo/mst.ts b/packages/common/src/repo/mst.ts deleted file mode 100644 index aa964944..00000000 --- a/packages/common/src/repo/mst.ts +++ /dev/null @@ -1,434 +0,0 @@ -import { CID } from 'multiformats' -import * as uint8arrays from 'uint8arrays' -import IpldStore from '../blockstore/ipld-store' -import { sha256 } from '@adxp/crypto' - -import z from 'zod' -import { schema } from '../common/types' -import * as check from '../common/check' - -const leafPointer = z.tuple([z.string(), schema.cid]) -const treePointer = schema.cid -const treeEntry = z.union([leafPointer, treePointer]) -const nodeSchema = z.array(treeEntry) - -type LeafPointer = z.infer<typeof leafPointer> -type TreePointer = z.infer<typeof treePointer> -type TreeEntry = z.infer<typeof treeEntry> -type Node = z.infer<typeof nodeSchema> - -export const leadingZerosOnHash = async (key: string): Promise<number> => { - const hash = await sha256(key) - const b32 = uint8arrays.toString(hash, 'base32') - let count = 0 - for (const char of b32) { - if (char === 'a') { - // 'a' is 0 in b32 - count++ - } else { - break - } - } - return count -} - -const spliceIn = <T>(array: T[], item: T, index: number): T[] => { - return [...array.slice(0, index), item, ...array.slice(index)] -} - -export class MST { - blockstore: IpldStore - cid: CID - node: Node - zeros: number - - constructor(blockstore: IpldStore, cid: CID, node: Node, zeros: number) { - this.blockstore = blockstore - this.cid = cid - this.node = node - this.zeros = zeros - } - - static async create(blockstore: IpldStore, zeros = 0): Promise<MST> { - return MST.fromData(blockstore, [], zeros) - } - - static async fromData( - blockstore: IpldStore, - node: Node, - zeros: number, - ): Promise<MST> { - const cid = await blockstore.put(node as any) - return new MST(blockstore, cid, node, zeros) - } - - static async load( - blockstore: IpldStore, - cid: CID, - zeros?: number, - ): Promise<MST> { - const node = await blockstore.get(cid, nodeSchema) - if (zeros === undefined) { - const firstLeaf = node.find((entry) => check.is(entry, leafPointer)) - if (!firstLeaf) { - throw new Error('not a valid mst node: no leaves') - } - zeros = await leadingZerosOnHash(firstLeaf[0]) - } - return new MST(blockstore, cid, node, zeros) - } - - async put(): Promise<CID> { - this.cid = await this.blockstore.put(this.node as any) // @TODO no any - return this.cid - } - - async add(key: string, value: CID): Promise<CID> { - const keyZeros = await leadingZerosOnHash(key) - if (keyZeros === this.zeros) { - // it belongs in this layer - const index = this.findGtOrEqualLeafIndex(key) - const found = this.node[index] - if (found && found[0] === key) { - throw new Error(`There is already a value at key: ${key}`) - } - const prevNode = this.node[index - 1] - if (!prevNode || check.is(prevNode, leafPointer)) { - // if entry before is a leaf, (or we're on far left) we can just splice in - this.node = spliceIn(this.node, [key, value], index) - return this.put() - } else { - // else we need to investigate the subtree - const subTree = await MST.load( - this.blockstore, - prevNode, - this.zeros - 1, - ) - // we try to split the subtree around the key - const splitSubTree = await subTree.splitAround(key) - const newNode = this.node.slice(0, index - 1) - if (splitSubTree[0]) newNode.push(splitSubTree[0]) - newNode.push([key, value]) - if (splitSubTree[1]) newNode.push(splitSubTree[1]) - newNode.push(...this.node.slice(index)) - this.node = newNode - return this.put() - } - } else if (keyZeros < this.zeros) { - // it belongs on a lower layer - const index = this.findGtOrEqualLeafIndex(key) - const prevNode = this.node[index - 1] - if (check.is(prevNode, treePointer)) { - // if entry before is a tree, we add it to that tree - const subTree = await MST.load( - this.blockstore, - prevNode, - this.zeros - 1, - ) - const newSubTreeCid = await subTree.add(key, value) - this.node[index - 1] = newSubTreeCid - return this.put() - } else { - // else we need to create the subtree for it to go in - const subTree = await MST.create(this.blockstore, this.zeros - 1) - const newSubTreeCid = await subTree.add(key, value) - this.node = spliceIn(this.node, newSubTreeCid, index) - return this.put() - } - } else { - // it belongs on a higher layer & we must push the rest of the tree down - let split = await this.splitAround(key) - // if the newly added key has >=2 more leading zeros than the current highest layer - // then we need to add in structural nodes in between as well - let left: CID | null = split[0] - let right: CID | null = split[1] - const extraLayersToAdd = keyZeros - this.zeros - // intentionally starting at 1, since first layer is taken care of by split - for (let i = 1; i < extraLayersToAdd; i++) { - if (left !== null) { - const leftNode = await MST.fromData( - this.blockstore, - [left], - this.zeros + i, - ) - left = leftNode.cid - } - if (right !== null) { - const rightNode = await MST.fromData( - this.blockstore, - [right], - this.zeros + i, - ) - right = rightNode.cid - } - } - let newNode: Node = [] - if (left) newNode.push(left) - newNode.push([key, value]) - if (right) newNode.push(right) - this.node = newNode - this.zeros = keyZeros - return this.put() - } - } - - // finds first leaf node that is greater than or equal to the value - findGtOrEqualLeafIndex(key: string): number { - const maybeIndex = this.node.findIndex( - (entry) => check.is(entry, leafPointer) && entry[0] >= key, - ) - // if we can't find, we're on the end - return maybeIndex >= 0 ? maybeIndex : this.node.length - } - - async splitAround(key: string): Promise<[CID | null, CID | null]> { - const index = this.findGtOrEqualLeafIndex(key) - const leftData = this.node.slice(0, index) - const rightData = this.node.slice(index) - - if (leftData.length === 0) { - return [null, this.cid] - } - if (rightData.length === 0) { - return [this.cid, null] - } - const left = await MST.fromData(this.blockstore, leftData, this.zeros) - const right = await MST.fromData(this.blockstore, rightData, this.zeros) - const prev = leftData[leftData.length - 1] - if (check.is(prev, treePointer)) { - const prevSubtree = await MST.load(this.blockstore, prev, this.zeros - 1) - const prevSplit = await prevSubtree.splitAround(key) - if (prevSplit[0]) { - await left.append(prev) - } - if (prevSplit[1]) { - await right.prepend(prev) - } - } - - return [left.cid, right.cid] - } - - async append(entry: TreeEntry): Promise<CID> { - this.node = [...this.node, entry] - return this.put() - } - - async prepend(entry: TreeEntry): Promise<CID> { - this.node = [entry, ...this.node] - return this.put() - } - - async get(key: string): Promise<CID | null> { - const index = this.findGtOrEqualLeafIndex(key) - const found = this.node[index] - if (found && check.is(found, leafPointer) && found[0] === key) { - return found[1] - } - const prev = this.node[index - 1] - if (check.is(prev, treePointer)) { - const subTree = await MST.load(this.blockstore, prev, this.zeros - 1) - return subTree.get(key) - } - return null - } - - async edit(key: string, value: CID): Promise<CID> { - const index = this.findGtOrEqualLeafIndex(key) - const found = this.node[index] - if (found && check.is(found, leafPointer) && found[0] === key) { - this.node[index][1] = value - return await this.put() - } - const prev = this.node[index - 1] - if (check.is(prev, treePointer)) { - const subTree = await MST.load(this.blockstore, prev, this.zeros - 1) - const subTreeCid = await subTree.edit(key, value) - this.node[index - 1] = subTreeCid - return await this.put() - } - throw new Error(`Could not find a record with key: ${key}`) - } - - // async delete(key: string): Promise<void> {} - - layerHasEntry(entry: TreeEntry): boolean { - let found: TreeEntry | undefined - if (check.is(entry, leafPointer)) { - found = this.node.find((e) => { - return ( - check.is(e, leafPointer) && entry[0] === e[0] && entry[1].equals(e[1]) - ) - }) - } else { - found = this.node.find((e) => { - return check.is(e, treePointer) && entry.equals(e) - }) - } - return found !== undefined - } - - async loadChild(cid: CID): Promise<MST> { - return MST.load(this.blockstore, cid, this.zeros - 1) - } - - async mergeIn(toMerge: MST): Promise<CID> { - let newNode: Node = [] - let thisI = 0, - toMergeI = 0 - while (thisI < this.node.length && toMergeI < toMerge.node.length) { - const thisHead = this.node[thisI] - const toMergeHead = toMerge.node[toMergeI] - if (!thisHead) { - newNode.push(toMergeHead) - toMergeI++ - } else if (!toMergeHead) { - newNode.push(thisHead) - thisI++ - } else if ( - check.is(thisHead, leafPointer) && - check.is(toMergeHead, leafPointer) - ) { - if (thisHead[0] === toMergeHead[0]) { - // on same, toMerge wins - newNode.push(toMergeHead) - thisI++ - toMergeI++ - } else if (thisHead[0] < toMergeHead[0]) { - newNode.push(thisHead) - thisI++ - } else { - newNode.push(toMergeHead) - toMergeI++ - } - } else if ( - check.is(thisHead, treePointer) && - check.is(toMergeHead, leafPointer) - ) { - const toSplit = await this.loadChild(thisHead) - const split = await toSplit.splitAround(toMergeHead[0]) - if (split[0] !== null) { - const prev = newNode[newNode.length - 1] - if (check.is(prev, treePointer)) { - const toMerge = await this.loadChild(split[0]) - const toMergeIn = await this.loadChild(prev) - await toMerge.mergeIn(toMergeIn) - newNode.push(toMerge.cid) - } else { - newNode.push(split[0]) - } - } - newNode.push(toMergeHead) - if (split[1] !== null) newNode.push(split[1]) - thisI++ - toMergeI++ - } else if ( - check.is(thisHead, leafPointer) && - check.is(toMergeHead, treePointer) - ) { - const toSplit = await this.loadChild(toMergeHead) - const split = await toSplit.splitAround(thisHead[0]) - if (split[0] !== null) { - const prev = newNode[newNode.length - 1] - if (check.is(prev, treePointer)) { - const toMerge = await this.loadChild(prev) - const toMergeIn = await this.loadChild(split[0]) - await toMerge.mergeIn(toMergeIn) - newNode.push(toMerge.cid) - } else { - newNode.push(split[0]) - } - } - newNode.push(toMergeHead) - if (split[1] !== null) newNode.push(split[1]) - thisI++ - toMergeI++ - } else if ( - check.is(thisHead, treePointer) && - check.is(toMergeHead, treePointer) - ) { - const toMerge = await this.loadChild(thisHead) - const toMergeIn = await this.loadChild(toMergeHead) - await toMerge.mergeIn(toMergeIn) - newNode.push(toMerge.cid) - thisI++ - toMergeI++ - } else { - throw new Error('SHOULDNT ever reach this') - } - } - return this.put() - } - - // toMerge wins on merge conflicts - async mergeInOld(toMerge: MST): Promise<CID> { - let lastIndex = 0 - for (const entry of toMerge.node) { - if (check.is(entry, leafPointer)) { - lastIndex = this.findGtOrEqualLeafIndex(entry[0]) - const found = this.node[lastIndex] - if (found && found[0] === entry[0]) { - // does nothing if same, overwrites if different - this.node[lastIndex] = entry - lastIndex++ - } else { - this.node = spliceIn(this.node, entry, lastIndex) - lastIndex++ - } - } else { - const nextEntryInNode = this.node[lastIndex] - if (!check.is(nextEntryInNode, treePointer)) { - // if the next is a leaf, we splice in before - this.node = spliceIn(this.node, entry, lastIndex) - lastIndex++ - } else if (!nextEntryInNode.equals(entry)) { - // if it's a new subtree, then we have to merge the two children - const nodeChild = await MST.load( - this.blockstore, - nextEntryInNode, - this.zeros - 1, - ) - const toMergeChild = await MST.load( - this.blockstore, - entry, - this.zeros - 1, - ) - const mergedCid = await nodeChild.mergeIn(toMergeChild) - this.node[lastIndex] = mergedCid - lastIndex++ - } else { - // if it's the same subtree, do nothing & increment index - lastIndex++ - } - } - } - return this.put() - } - - async walk(fn: (level: number, key: string | null) => void) { - for (const entry of this.node) { - if (check.is(entry, treePointer)) { - const subTree = await MST.load(this.blockstore, entry, this.zeros - 1) - fn(this.zeros, null) - await subTree.walk(fn) - } else { - fn(this.zeros, entry[0]) - } - } - } - - async structure() { - const tree: any = [] - for (const entry of this.node) { - if (check.is(entry, treePointer)) { - const subTree = await MST.load(this.blockstore, entry, this.zeros - 1) - tree.push(['LINK', await subTree.structure()]) - } else { - tree.push([entry[0], entry[1].toString()]) - } - } - return tree - } -} - -export default MST diff --git a/packages/common/src/repo/mst/mst.ts b/packages/common/src/repo/mst/mst.ts new file mode 100644 index 00000000..06a0671a --- /dev/null +++ b/packages/common/src/repo/mst/mst.ts @@ -0,0 +1,518 @@ +import * as Block from 'multiformats/block' +import { sha256 as blockHasher } from 'multiformats/hashes/sha2' +import * as blockCodec from '@ipld/dag-cbor' +import { CID } from 'multiformats' +import * as uint8arrays from 'uint8arrays' +import IpldStore from '../../blockstore/ipld-store' +import { sha256 } from '@adxp/crypto' + +import z from 'zod' +import { schema } from '../../common/types' +import * as check from '../../common/check' + +const leafPointer = z.tuple([z.string(), schema.cid]) +const treePointer = schema.cid +const treeEntry = z.union([leafPointer, treePointer]) +const nodeDataSchema = z.array(treeEntry) + +// type LeafPointer = z.infer<typeof leafPointer> +// type TreePointer = z.infer<typeof treePointer> +// type TreeEntry = z.infer<typeof treeEntry> +type NodeData = z.infer<typeof nodeDataSchema> + +export const leadingZerosOnHash = async (key: string): Promise<number> => { + const hash = await sha256(key) + const b32 = uint8arrays.toString(hash, 'base32') + let count = 0 + for (const char of b32) { + if (char === 'a') { + // 'a' is 0 in b32 + count++ + } else { + break + } + } + return count +} + +class MST { + blockstore: IpldStore + entries: NodeEntry[] | null + layer: number | null + pointer: CID + + constructor( + blockstore: IpldStore, + pointer: CID, + entries: NodeEntry[] | null, + layer: number | null, + ) { + this.blockstore = blockstore + this.entries = entries + this.layer = layer + this.pointer = pointer + } + + static async getCid(entries: NodeEntry[]): Promise<CID> { + const data = entries.map((entry) => { + if (entry.isLeaf()) { + return [entry.key, entry.value] + } else { + return entry.pointer + } + }) + const block = await Block.encode({ + value: data as any, + codec: blockCodec, + hasher: blockHasher, + }) + return block.cid + } + + static async create( + blockstore: IpldStore, + entries: NodeEntry[] = [], + layer = 0, + ): Promise<MST> { + const pointer = await MST.getCid(entries) + return new MST(blockstore, pointer, entries, layer) + } + + static async fromData( + blockstore: IpldStore, + data: NodeData, + layer?: number, + ): Promise<MST> { + const entries = data.map((entry) => { + if (check.is(entry, treePointer)) { + return MST.fromCid(blockstore, entry, layer ? layer - 1 : undefined) + } else { + return new Leaf(entry[0], entry[1]) + } + }) + const pointer = await MST.getCid(entries) + return new MST(blockstore, pointer, entries, layer ?? null) + } + + static fromCid(blockstore: IpldStore, cid: CID, layer?: number): MST { + return new MST(blockstore, cid, null, layer ?? null) + } + + async getEntries(): Promise<NodeEntry[]> { + if (this.entries) return this.entries + if (this.pointer) { + const data = await this.blockstore.get(this.pointer, nodeDataSchema) + this.entries = data.map((entry) => { + if (check.is(entry, treePointer)) { + // @TODO using this.layer instead of getLayer here?? + return MST.fromCid( + this.blockstore, + entry, + this.layer ? this.layer - 1 : undefined, + ) + } else { + return new Leaf(entry[0], entry[1]) + } + }) + + return this.entries + } + throw new Error('No entries or CID provided') + } + + async getLayer(): Promise<number> { + if (this.layer !== null) return this.layer + const entries = await this.getEntries() + const firstLeaf = entries.find((entry) => entry.isLeaf()) + if (!firstLeaf) { + throw new Error('not a valid mst node: no leaves') + } + this.layer = await leadingZerosOnHash(firstLeaf[0]) + return this.layer + } + + async add(key: string, value: CID): Promise<MST> { + const keyZeros = await leadingZerosOnHash(key) + const layer = await this.getLayer() + const newLeaf = new Leaf(key, value) + if (keyZeros === layer) { + // it belongs in this layer + const index = await this.findGtOrEqualLeafIndex(key) + const found = await this.atIndex(index) + if (found && found.equals(newLeaf)) { + throw new Error(`There is already a value at key: ${key}`) + } + const prevNode = await this.atIndex(index - 1) + if (!prevNode || prevNode.isLeaf()) { + // if entry before is a leaf, (or we're on far left) we can just splice in + return this.spliceIn(newLeaf, index) + } else { + // else we try to split the subtree around the key + const splitSubTree = await prevNode.splitAround(key) + return this.replaceWithSplit( + index - 1, + splitSubTree[0], + newLeaf, + splitSubTree[1], + ) + } + } else if (keyZeros < layer) { + // it belongs on a lower layer + const index = await this.findGtOrEqualLeafIndex(key) + const prevNode = await this.atIndex(index - 1) + if (prevNode && prevNode.isTree()) { + // if entry before is a tree, we add it to that tree + const newSubtree = await prevNode.add(key, value) + return this.updateEntry(index - 1, newSubtree) + } else { + const subTree = await this.createChild() + const newSubTree = await subTree.add(key, value) + return this.spliceIn(newSubTree, index) + } + } else { + // it belongs on a higher layer & we must push the rest of the tree down + let split = await this.splitAround(key) + // if the newly added key has >=2 more leading zeros than the current highest layer + // then we need to add in structural nodes in between as well + let left: MST | null = split[0] + let right: MST | null = split[1] + const layer = await this.getLayer() + const extraLayersToAdd = keyZeros - layer + // intentionally starting at 1, since first layer is taken care of by split + for (let i = 1; i < extraLayersToAdd; i++) { + if (left !== null) { + left = await left.createParent() + } + if (right !== null) { + right = await right.createParent() + } + } + const updated: NodeEntry[] = [] + if (left) updated.push(left) + updated.push(new Leaf(key, value)) + if (right) updated.push(right) + return MST.create(this.blockstore, updated, keyZeros) + } + } + + async get(key: string): Promise<CID | null> { + const index = await this.findGtOrEqualLeafIndex(key) + const found = await this.atIndex(index) + if (found && found.isLeaf() && found.key === key) { + return found.value + } + const prev = await this.atIndex(index - 1) + if (prev && prev.isTree()) { + return prev.get(key) + } + return null + } + + async edit(key: string, value: CID): Promise<MST> { + const index = await this.findGtOrEqualLeafIndex(key) + const found = await this.atIndex(index) + if (found && found.isLeaf() && found.key === key) { + return this.updateEntry(index, new Leaf(key, value)) + } + const prev = await this.atIndex(index - 1) + if (prev && prev.isTree()) { + const updatedTree = await prev.edit(key, value) + return this.updateEntry(index - 1, updatedTree) + } + throw new Error(`Could not find a record with key: ${key}`) + } + + async delete(key: string): Promise<MST> { + const index = await this.findGtOrEqualLeafIndex(key) + const found = await this.atIndex(index) + if (found?.isLeaf() && found.key === key) { + const prev = await this.atIndex(index - 1) + const next = await this.atIndex(index + 10) + if (prev?.isTree() && next?.isTree()) { + const merged = await prev.appendMerge(next) + return this.newTree([ + ...(await this.slice(0, index - 1)), + merged, + ...(await this.slice(0, index + 1)), + ]) + } else { + return this.removeEntry(index) + } + } + const prev = await this.atIndex(index - 1) + if (prev?.isTree()) { + const subtree = await prev.delete(key) + return this.updateEntry(index - 1, subtree) + } else { + throw new Error(`Could not find a record with key: ${key}`) + } + } + + // the simple merge case where every key in the right tree is greater than every key in the left tree (ie deletes) + async appendMerge(toMerge: MST): Promise<MST> { + if (!(await this.isSameLayer(toMerge))) { + throw new Error( + 'Trying to merge two nodes from different layers of the MST', + ) + } + const thisEntries = await this.getEntries() + const toMergeEntries = await toMerge.getEntries() + const lastInLeft = thisEntries[toMergeEntries.length - 1] + const firstInRight = toMergeEntries[0] + if (lastInLeft?.isTree() && firstInRight?.isTree()) { + const merged = await lastInLeft.appendMerge(firstInRight) + return this.newTree([ + ...thisEntries.slice(0, thisEntries.length - 1), + merged, + ...toMergeEntries.slice(1), + ]) + } else { + return this.newTree([...thisEntries, ...toMergeEntries]) + } + } + + async isSameLayer(other: MST): Promise<boolean> { + const thisLayer = await this.getLayer() + const otherLayer = await other.getLayer() + return thisLayer === otherLayer + } + + async createChild(): Promise<MST> { + const layer = await this.getLayer() + return MST.create(this.blockstore, [], layer - 1) + } + + async createParent(): Promise<MST> { + const layer = await this.getLayer() + return MST.create(this.blockstore, [this], layer + 1) + } + + async updateEntry(index: number, entry: NodeEntry): Promise<MST> { + const entries = await this.getEntries() + entries[index] = entry + return this.newTree(entries) + } + + async removeEntry(index: number): Promise<MST> { + const entries = await this.getEntries() + const updated = entries.splice(index, 1) + return this.newTree(updated) + } + + newTree(entries: NodeEntry[]): MST { + return new MST(this.blockstore, this.pointer, entries, this.layer) + } + + async splitAround(key: string): Promise<[MST | null, MST | null]> { + const index = await this.findGtOrEqualLeafIndex(key) + const leftData = await this.slice(0, index) + const rightData = await this.slice(index) + + if (leftData.length === 0) { + return [null, this] + } + if (rightData.length === 0) { + return [this, null] + } + const left = this.newTree(leftData) + const right = this.newTree(rightData) + const prev = leftData[leftData.length - 1] + if (prev.isTree()) { + const prevSplit = await prev.splitAround(key) + if (prevSplit[0]) { + left.append(prev) + } + if (prevSplit[1]) { + right.prepend(prev) + } + } + + return [left, right] + } + + async append(entry: NodeEntry): Promise<MST> { + const entries = await this.getEntries() + return this.newTree([...entries, entry]) + } + + async prepend(entry: NodeEntry): Promise<MST> { + const entries = await this.getEntries() + return this.newTree([entry, ...entries]) + } + + async atIndex(index: number): Promise<NodeEntry | null> { + const entries = await this.getEntries() + return entries[index] ?? null + } + + async slice( + start?: number | undefined, + end?: number | undefined, + ): Promise<NodeEntry[]> { + const entries = await this.getEntries() + return entries.slice(start, end) + } + + async spliceIn(entry: NodeEntry, index: number): Promise<MST> { + const update = [ + ...(await this.slice(0, index)), + entry, + ...(await this.slice(index)), + ] + return this.newTree(update) + } + + async replaceWithSplit( + index: number, + left: MST | null, + leaf: Leaf, + right: MST | null, + ): Promise<MST> { + const update = await this.slice(0, index) + if (left) update.push(left) + update.push(leaf) + if (right) update.push(right) + update.push(...(await this.slice(index + 1))) + return this.newTree(update) + } + + async findLeafOrPriorSubTree(key: string): Promise<NodeEntry | null> { + const index = await this.findGtOrEqualLeafIndex(key) + const found = await this.atIndex(index) + if (found && found.isLeaf() && found.key === key) { + return found + } + const prev = await this.atIndex(index - 1) + if (prev && prev.isTree()) { + return prev + } + return null + } + + // finds first leaf node that is greater than or equal to the value + async findGtOrEqualLeafIndex(key: string): Promise<number> { + const entries = await this.getEntries() + const maybeIndex = entries.findIndex( + (entry) => entry.isLeaf() && entry.key >= key, + ) + // if we can't find, we're on the end + return maybeIndex >= 0 ? maybeIndex : entries.length + } + + isTree(): this is MST { + return true + } + + isLeaf(): this is Leaf { + return false + } + + equals(entry: NodeEntry): boolean { + if (entry.isTree()) { + return entry.pointer.equals(this.pointer) + } else { + return false + } + } +} + +type NodeEntry = MST | Leaf + +// class Subtree { + +// constructor(public pointer: CID) {} + +// isSubtree(): this is Subtree { +// return true +// } + +// isLeaf(): this is Leaf { +// return false +// } + +// equals(entry: NodeEntry): boolean { +// if(entry.isSubtree()) { +// return entry.pointer.equals(this.pointer) +// } else { +// return false +// } +// } + +// } + +class Leaf { + constructor(public key: string, public value: CID) {} + + isTree(): this is MST { + return false + } + + isLeaf(): this is Leaf { + return true + } + + equals(entry: NodeEntry): boolean { + if (entry.isLeaf()) { + return this.key === entry.key && this.value.equals(entry.value) + } else { + return false + } + } +} + +// class DiffTracker { +// adds: Record<string, Add> = {} +// updates: Record<string, Update> = {} +// deletes: Record<string, Delete> = {} + +// recordDelete(key: string): void { +// if (this.adds[key]) { +// delete this.adds[key] +// } else { +// this.deletes[key] = { key } +// } +// } + +// recordAdd(key: string, cid: CID): void { +// if (this.deletes[key]) { +// delete this.deletes[key] +// } else { +// this.adds[key] = { key, cid } +// } +// } + +// recordUpdate(key: string, old: CID, cid: CID): void { +// this.updates[key] = { key, old, cid } +// } + +// getDiff(): Diff { +// return { +// adds: Object.values(adds), +// updates: Object.values(updates), +// deletes: Object.values(deletes), +// } +// } +// } + +// type Delete = { +// key: string +// } + +// type Add = { +// key: string +// cid: CID +// } + +// type Update = { +// key: string +// old: CID +// cid: CID +// } + +// type Diff = { +// adds: Add[] +// updates: Update[] +// deletes: Delete[] +// } + +export default MST diff --git a/packages/common/tests/mst.test.ts b/packages/common/tests/mst.test.ts index 842f91e3..732cfe8a 100644 --- a/packages/common/tests/mst.test.ts +++ b/packages/common/tests/mst.test.ts @@ -1,4 +1,4 @@ -import MST from '../src/repo/mst' +import MST from '../src/repo/mst/mst' import * as util from './_util' import { IpldStore } from '../src' @@ -6,108 +6,94 @@ import { CID } from 'multiformats' import fs from 'fs' describe('Merkle Search Tree', () => { - it('height of all stupidity', async () => { - const blockstore = IpldStore.createInMemory() - const mst = await MST.create(blockstore) - const toMerge = await MST.create(blockstore) - const mapping = await util.generateBulkTidMapping(500) - const shuffled = shuffle(Object.entries(mapping)) - - for (const entry of shuffled.slice(0, 350)) { - await mst.add(entry[0], entry[1]) - await toMerge.add(entry[0], entry[1]) - } - for (const entry of shuffled.slice(350, 400)) { - await mst.add(entry[0], entry[1]) - } - for (const entry of shuffled.slice(400)) { - await toMerge.add(entry[0], entry[1]) - } - console.log('zeros 1: ', mst.zeros) - console.log('zeros 2: ', toMerge.zeros) - - await mst.mergeIn(toMerge) - for (const entry of shuffled) { - const got = await mst.get(entry[0]) - expect(entry[1].equals(got)).toBeTruthy() - } - }) - - it('merges', async () => { - const blockstore = IpldStore.createInMemory() - const mst = await MST.create(blockstore) - const toMerge = await MST.create(blockstore) - // const mapping = await util.generateBulkTidMapping(500) - // const shuffled = shuffle(Object.entries(mapping)) - const values: Record<string, CID> = {} - const layer1 = ['3j6hnk65jju2t'] - const layer0 = ['3j6hnk65jis2t', '3j6hnk65jit2t'] - - const newKeys = ['3j6hnk65jnm2t'] - - const all = [...layer0, ...layer1] - - for (const tid of all) { - const cid = await util.randomCid() - values[tid] = cid - await mst.add(tid, cid) - await toMerge.add(tid, cid) - } - - console.log('ADDING NEW KEYS') - for (const tid of newKeys) { - const cid = await util.randomCid() - values[tid] = cid - await toMerge.add(tid, cid) - } - console.log('MERGING') - await mst.mergeIn(toMerge) - - const structure = await mst.structure() - - let output = '' - await mst.walk((lvl, key) => { - if (key) { - output += `${lvl}: ${key}\n` - } - output += `${lvl}\n` - }) - - fs.writeFileSync('structure', output) - - // const tree = { - // 0: [], - // 1: [], - // 2: [], - // } - // await mst.walk((lvl, key) => { - // tree[lvl].push(key) - // }) - // console.log(tree) - - const got = await mst.get(newKeys[0]) - console.log('GOT: ', got) - - // for (const entry of Object.entries(values)) { - // const got = await mst.get(entry[0]) - // expect(entry[1].equals(got)).toBeTruthy() - // } - }) - - // it('works', async () => { + // it('height of all stupidity', async () => { // const blockstore = IpldStore.createInMemory() // const mst = await MST.create(blockstore) - // const mapping = await util.generateBulkTidMapping(1000) + // const toMerge = await MST.create(blockstore) + // const mapping = await util.generateBulkTidMapping(500) // const shuffled = shuffle(Object.entries(mapping)) - // for (const entry of shuffled) { + // for (const entry of shuffled.slice(0, 350)) { + // await mst.add(entry[0], entry[1]) + // await toMerge.add(entry[0], entry[1]) + // } + // for (const entry of shuffled.slice(350, 400)) { // await mst.add(entry[0], entry[1]) // } - + // for (const entry of shuffled.slice(400)) { + // await toMerge.add(entry[0], entry[1]) + // } + // console.log('zeros 1: ', mst.zeros) + // console.log('zeros 2: ', toMerge.zeros) + // await mst.mergeIn(toMerge) // for (const entry of shuffled) { // const got = await mst.get(entry[0]) // expect(entry[1].equals(got)).toBeTruthy() // } // }) + // it('merges', async () => { + // const blockstore = IpldStore.createInMemory() + // const mst = await MST.create(blockstore) + // const toMerge = await MST.create(blockstore) + // // const mapping = await util.generateBulkTidMapping(500) + // // const shuffled = shuffle(Object.entries(mapping)) + // const values: Record<string, CID> = {} + // const layer1 = ['3j6hnk65jju2t'] + // const layer0 = ['3j6hnk65jis2t', '3j6hnk65jit2t'] + // const newKeys = ['3j6hnk65jnm2t'] + // const all = [...layer0, ...layer1] + // for (const tid of all) { + // const cid = await util.randomCid() + // values[tid] = cid + // await mst.add(tid, cid) + // await toMerge.add(tid, cid) + // } + // console.log('ADDING NEW KEYS') + // for (const tid of newKeys) { + // const cid = await util.randomCid() + // values[tid] = cid + // await toMerge.add(tid, cid) + // } + // console.log('MERGING') + // await mst.mergeIn(toMerge) + // const structure = await mst.structure() + // let output = '' + // await mst.walk((lvl, key) => { + // if (key) { + // output += `${lvl}: ${key}\n` + // } + // output += `${lvl}\n` + // }) + // fs.writeFileSync('structure', output) + // // const tree = { + // // 0: [], + // // 1: [], + // // 2: [], + // // } + // // await mst.walk((lvl, key) => { + // // tree[lvl].push(key) + // // }) + // // console.log(tree) + // const got = await mst.get(newKeys[0]) + // console.log('GOT: ', got) + // // for (const entry of Object.entries(values)) { + // // const got = await mst.get(entry[0]) + // // expect(entry[1].equals(got)).toBeTruthy() + // // } + // }) + + it('works', async () => { + const blockstore = IpldStore.createInMemory() + let mst = await MST.create(blockstore) + const mapping = await util.generateBulkTidMapping(1000) + const shuffled = shuffle(Object.entries(mapping)) + for (const entry of shuffled) { + mst = await mst.add(entry[0], entry[1]) + } + for (const entry of shuffled) { + const got = await mst.get(entry[0]) + expect(entry[1].equals(got)).toBeTruthy() + } + }) // /** // * `f` gets added & it does two node splits (e is no longer grouped with g/h) @@ -141,7 +127,6 @@ describe('Merkle Search Tree', () => { // const blockstore = IpldStore.createInMemory() // const mst = await MST.create(blockstore) // const cid = await util.randomCid() - // for (const tid of layer0) { // await mst.add(tid, cid) // } @@ -150,14 +135,12 @@ describe('Merkle Search Tree', () => { // } // await mst.add(layer2, cid) // expect(mst.zeros).toBe(2) - // const allTids = [...layer0, ...layer1, layer2] // for (const tid of allTids) { // const got = await mst.get(tid) // expect(cid.equals(got)).toBeTruthy() // } // }) - // /** // * `b` gets added & it hashes to 2 levels above any existing laves // * @@ -185,7 +168,6 @@ describe('Merkle Search Tree', () => { // for (const tid of layer1) { // await mst.add(tid, cid) // } - // expect(mst.zeros).toBe(2) // const allTids = [...layer0, ...layer1, layer2] // for (const tid of allTids) {