fix some subtle mst bugs & get diff working

This commit is contained in:
dholms 2022-08-11 16:04:24 -05:00
parent 84d56dc742
commit 49cdb5074c
3 changed files with 62 additions and 141 deletions
packages/common
src/repo/mst
tests

@ -322,13 +322,19 @@ export class MST implements DataStore {
const prev = await this.atIndex(index - 1)
if (prev?.isTree()) {
const subtree = await prev.delete(key)
return this.updateEntry(index - 1, subtree)
const subTreeEntries = await subtree.getEntries()
if (subTreeEntries.length === 0) {
return this.removeEntry(index - 1)
} else {
return this.updateEntry(index - 1, subtree)
}
} else {
throw new Error(`Could not find a record with key: ${key}`)
}
}
async diffNew(other: MST): Promise<DataDiff> {
// Walk two MSTs to find the semantic changes
async diff(other: MST): Promise<DataDiff> {
await this.getPointer()
await other.getPointer()
const diff = new DataDiff()
@ -350,21 +356,43 @@ export class MST implements DataStore {
const right = rightWalker.status.curr
if (left === null || right === null) break
if (leftWalker.layer() > rightWalker.layer()) {
if (left.isLeaf()) {
if (left.isLeaf() && right.isLeaf()) {
if (left.key === right.key) {
if (!left.value.equals(right.value)) {
diff.recordUpdate(left.key, left.value, right.value)
}
await leftWalker.stepOver()
await rightWalker.stepOver()
} else if (left.key < right.key) {
diff.recordDelete(left)
await leftWalker.stepOver()
} else {
diff.recordAdd(right)
await rightWalker.stepOver()
}
continue
}
if (leftWalker.layer() > rightWalker.layer()) {
if (left.isLeaf()) {
if (right.isLeaf()) {
diff.recordAdd(right)
} else {
diff.recordAddedCid(right.pointer)
}
await rightWalker.advance()
} else {
await leftWalker.stepInto()
}
continue
} else if (leftWalker.layer() < rightWalker.layer()) {
if (right.isLeaf()) {
diff.recordAdd(right)
await rightWalker.stepOver()
if (left.isLeaf()) {
diff.recordDelete(left)
}
await leftWalker.advance()
} else {
diff.recordAddedCid(right.pointer)
console.log('RIGHT: ', right.pointer.toString())
await rightWalker.stepInto()
}
continue
@ -375,115 +403,25 @@ export class MST implements DataStore {
await leftWalker.stepOver()
await rightWalker.stepOver()
} else {
console.log('RECORIND: ', right.pointer.toString())
diff.recordAddedCid(right.pointer)
await leftWalker.stepInto()
await rightWalker.stepInto()
}
} else if (left.isLeaf() && right.isLeaf()) {
if (left.key === right.key) {
if (!left.value.equals(right.value)) {
diff.recordUpdate(left.key, left.value, right.value)
}
await leftWalker.stepOver()
await rightWalker.stepOver()
} else if (left.key < right.key) {
diff.recordDelete(left)
await leftWalker.stepOver()
} else {
diff.recordAdd(right)
await rightWalker.stepOver()
}
} else if (left.isLeaf() && right.isTree()) {
diff.recordDelete(left)
await leftWalker.stepOver()
} else if (left.isTree() && right.isLeaf()) {
diff.recordAdd(right)
await rightWalker.stepOver()
} else {
throw new Error("Shouldn't reach this")
continue
}
}
return diff
}
// Finds the semantic changes between two MSTs
// This uses a stateful diff tracker that will sometimes record encountered leaves
// before removing them later when they're encountered in the other tree
async diff(other: MST): Promise<DataDiff> {
// we need to make sure both of our pointers are in date for diffing
await this.getPointer()
await other.getPointer()
const diff = new DataDiff()
diff.recordAddedCid(other.pointer)
diff.recordDeletedCid(this.pointer)
let leftI = 0
let rightI = 0
const leftEntries = await this.getEntries()
const rightEntries = await other.getEntries()
while (leftI < leftEntries.length || rightI < rightEntries.length) {
const left = leftEntries[leftI]
const right = rightEntries[rightI]
if (!left && !right) {
// shouldn't ever reach this, but if both are null, we break
break
} else if (!left) {
// if no left, record a right leaf as an add, or add all leaves in the right subtree
if (right.isLeaf()) {
diff.recordAdd(right)
} else {
const allChildren = await right.allNodes()
for (const entry of allChildren) {
diff.recordAdd(entry)
}
}
rightI++
} else if (!right) {
// if no right, record a left leaf as an del, or del all leaves in the left subtree
if (left.isLeaf()) {
diff.recordDelete(left)
} else {
const allChildren = await left.leaves()
for (const entry of allChildren) {
diff.recordDelete(entry)
}
}
leftI++
} else if (left.isLeaf() && right.isLeaf()) {
// if both are leaves, check if they're the same key
// if they're equal, move on. if the value is changed, record update
// if they're different, record the smaller one & increment that side
if (left.key === right.key) {
if (!left.value.equals(right.value)) {
diff.recordUpdate(left.key, left.value, right.value)
}
leftI++
rightI++
} else if (left.key < right.key) {
diff.recordDelete(left)
leftI++
} else {
diff.recordAdd(right)
rightI++
}
} else if (left.isTree() && right.isTree()) {
// if both are trees, find the diff of those trees
if (!(await left.equals(right))) {
const subDiff = await left.diff(right)
diff.addDiff(subDiff)
}
leftI++
rightI++
} else if (left.isLeaf() && right.isTree()) {
// if one is a leaf & one is a tree, record the leaf and increment that side
diff.recordDelete(left)
leftI++
} else if (left.isTree() && right.isLeaf()) {
diff.recordAdd(right)
rightI++
if (left.isLeaf() && right.isTree()) {
await diff.recordAddedCid(right.pointer)
await rightWalker.stepInto()
continue
}
if (left.isTree() && right.isLeaf()) {
await leftWalker.stepInto()
continue
}
throw new Error('Unidentifiable case in diff walk')
}
return diff
}
@ -1027,12 +965,11 @@ class Walker {
}
return
}
const node = await this.status.walking.atIndex(this.status.index)
if (!node?.isTree()) {
if (!this.status.curr.isTree()) {
throw new Error('No tree at pointer, cannot step into')
}
const next = await node.atIndex(0)
const next = await this.status.curr.atIndex(0)
if (!next) {
throw new Error(
'Tried to step into a node with 0 entries which is invalid',
@ -1040,7 +977,7 @@ class Walker {
}
this.stack.push({ ...this.status })
this.status.walking = node
this.status.walking = this.status.curr
this.status.curr = next
this.status.index = 0
}

@ -168,3 +168,14 @@ export const writeMstLog = async (filename: string, tree: MST) => {
}
fs.writeFileSync(filename, log)
}
export const saveMstEntries = (filename: string, entries: [string, CID][]) => {
const writable = entries.map(([key, val]) => [key, val.toString()])
fs.writeFileSync(filename, JSON.stringify(writable))
}
export const loadMstEntries = (filename: string): [string, CID][] => {
const contents = fs.readFileSync(filename)
const parsed = JSON.parse(contents.toString())
return parsed.map(([key, value]) => [key, CID.parse(value)])
}

@ -105,33 +105,6 @@ describe('Merkle Search Tree', () => {
}
})
it('temp diff test', async () => {
const layer0 = [
'3j6hnk65jis2t',
'3j6hnk65jit2t',
'3j6hnk65jiu2t',
'3j6hnk65jne2t',
'3j6hnk65jnm2t',
'3j6hnk65jnn2t',
'3j6hnk65kvx2t',
'3j6hnk65kvy2t',
'3j6hnk65kvz2t',
]
const layer1 = ['3j6hnk65jju2t', '3j6hnk65kve2t']
const layer2 = '3j6hnk65jng2t'
mst = await MST.create(blockstore, [], { fanout: 32 })
const cid = await util.randomCid()
for (const tid of layer0) {
mst = await mst.add(tid, cid)
}
for (const tid of layer1) {
mst = await mst.add(tid, cid)
}
const toDiff = await mst.add(layer2, cid)
const diff = await mst.diffNew(toDiff)
console.log(diff)
})
it('diffs', async () => {
let toDiff = mst
@ -163,7 +136,7 @@ describe('Merkle Search Tree', () => {
expectedDels[entry[0]] = { key: entry[0], cid: entry[1] }
}
const diff = await mst.diffNew(toDiff)
const diff = await mst.diff(toDiff)
expect(diff.addList().length).toBe(100)
expect(diff.updateList().length).toBe(100)