put functions in separate file and some cleanup
This commit is contained in:
parent
174b33a099
commit
e3325eac3f
364
read_tree.py
Normal file
364
read_tree.py
Normal file
@ -0,0 +1,364 @@
|
|||||||
|
import plyvel
|
||||||
|
import struct
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# functions for reading IAVL tree
|
||||||
|
def read_varint(x: bytes, offset: int = 0) -> int:
|
||||||
|
result = 0
|
||||||
|
factor = 1
|
||||||
|
|
||||||
|
for i, b in enumerate(x[offset:]):
|
||||||
|
if b >= 128:
|
||||||
|
result = result + (b - 128) * factor
|
||||||
|
else:
|
||||||
|
result = result + b * factor
|
||||||
|
return result // 2, offset+i+1
|
||||||
|
factor *= 128
|
||||||
|
|
||||||
|
def read_uvarint(x: bytes, offset: int = 0) -> int:
|
||||||
|
result = 0
|
||||||
|
factor = 1
|
||||||
|
|
||||||
|
for i, b in enumerate(x[offset:]):
|
||||||
|
if b >= 128:
|
||||||
|
result = result + (b - 128) * factor
|
||||||
|
else:
|
||||||
|
result = result + b * factor
|
||||||
|
return result, offset+i+1
|
||||||
|
factor *= 128
|
||||||
|
|
||||||
|
def read_key(key: bytes) -> tuple[int, int] | None:
|
||||||
|
if not key.startswith(b's'):
|
||||||
|
return None
|
||||||
|
|
||||||
|
version = struct.unpack_from('>Q', key[1:9])[0]
|
||||||
|
nonce = struct.unpack_from('>I', key[9:13])[0]
|
||||||
|
|
||||||
|
return (version, nonce)
|
||||||
|
|
||||||
|
def write_key(key: tuple[int, int]) -> bytes:
|
||||||
|
version = struct.pack('>Q', key[0])
|
||||||
|
nonce = struct.pack('>I', key[1])
|
||||||
|
|
||||||
|
return b's' + version + nonce
|
||||||
|
|
||||||
|
def read_node(node: bytes) -> tuple[int, int, bytes, tuple[int, int], tuple[int, int]] | tuple[int, int, list[int], bytes] | tuple[int, int]:
|
||||||
|
|
||||||
|
if node.startswith(b's'):
|
||||||
|
return read_key(node)
|
||||||
|
|
||||||
|
n = 0
|
||||||
|
height, n = read_varint(node, n)
|
||||||
|
|
||||||
|
if height == 0:
|
||||||
|
length, n = read_varint(node, n)
|
||||||
|
size, n = read_uvarint(node, n)
|
||||||
|
key = node[n:n+size]
|
||||||
|
n += size
|
||||||
|
valuesize, n = read_uvarint(node, n)
|
||||||
|
value = node[n:n+valuesize]
|
||||||
|
|
||||||
|
return (height, length, key, value)
|
||||||
|
else:
|
||||||
|
length, n = read_varint(node, n)
|
||||||
|
size, n = read_uvarint(node, n)
|
||||||
|
key = node[n:n+size]
|
||||||
|
n += size
|
||||||
|
hashsize, n = read_uvarint(node, n)
|
||||||
|
n += hashsize
|
||||||
|
mode, n = read_uvarint(node, n)
|
||||||
|
left_version, n = read_varint(node, n)
|
||||||
|
left_nonce, n = read_varint(node, n)
|
||||||
|
right_version, n = read_varint(node, n)
|
||||||
|
right_nonce, n = read_varint(node, n)
|
||||||
|
|
||||||
|
return (height, length, key, (left_version, left_nonce), (right_version, right_nonce))
|
||||||
|
|
||||||
|
def walk(tree, version, searchkey):
|
||||||
|
if (version, 1) not in tree:
|
||||||
|
return None
|
||||||
|
|
||||||
|
node = tree[(version, 1)]
|
||||||
|
if len(node) == 2: # root copy?
|
||||||
|
node = tree[node]
|
||||||
|
|
||||||
|
while node[0] > 0:
|
||||||
|
nodekey = node[2]
|
||||||
|
if searchkey < nodekey:
|
||||||
|
next = node[3]
|
||||||
|
else:
|
||||||
|
next = node[4]
|
||||||
|
|
||||||
|
node = tree[next]
|
||||||
|
|
||||||
|
return node[3]
|
||||||
|
|
||||||
|
def walk_disk_raw(db, prefix: bytes, version: int, searchkey: bytes) -> None | bytes:
|
||||||
|
|
||||||
|
root = db.get(prefix + write_key((version, 1)))
|
||||||
|
if root is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
node = read_node(root)
|
||||||
|
|
||||||
|
if len(node) == 2: # root copy?
|
||||||
|
node = read_node(db.get(prefix + write_key(node)))
|
||||||
|
|
||||||
|
while node[0] > 0:
|
||||||
|
# print(node)
|
||||||
|
|
||||||
|
nodekey = node[2]
|
||||||
|
if searchkey < nodekey:
|
||||||
|
next = node[3]
|
||||||
|
else:
|
||||||
|
next = node[4]
|
||||||
|
|
||||||
|
node = read_node(db.get(prefix + write_key(next)))
|
||||||
|
|
||||||
|
if node[2] == searchkey:
|
||||||
|
return node[3]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def walk_disk_next_key_raw(db, prefix: bytes, version: int, searchkey: bytes) -> None | bytes:
|
||||||
|
root = db.get(prefix + write_key((version, 1)))
|
||||||
|
if root is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
node = read_node(root)
|
||||||
|
lowest_geq_key = node[2] if node[2] >= searchkey else None
|
||||||
|
|
||||||
|
if len(node) == 2: # root copy?
|
||||||
|
node = read_node(db.get(prefix + write_key(node)))
|
||||||
|
|
||||||
|
while node[0] > 0:
|
||||||
|
# print(node)
|
||||||
|
|
||||||
|
nodekey = node[2]
|
||||||
|
if searchkey < nodekey:
|
||||||
|
next = node[3]
|
||||||
|
else:
|
||||||
|
next = node[4]
|
||||||
|
|
||||||
|
node = read_node(db.get(prefix + write_key(next)))
|
||||||
|
if node[2] >= searchkey and (lowest_geq_key is None or node[2] < lowest_geq_key):
|
||||||
|
lowest_geq_key = node[2]
|
||||||
|
|
||||||
|
return lowest_geq_key
|
||||||
|
|
||||||
|
def walk_disk(db, prefix: str, version: int, format: str, searchkey: list) -> None | bytes:
|
||||||
|
return walk_disk_raw(db, prefix.encode('utf-8'), version, encode_key(format, searchkey))
|
||||||
|
|
||||||
|
def parse_struct(data):
|
||||||
|
n = 0
|
||||||
|
results = []
|
||||||
|
|
||||||
|
while n < len(data):
|
||||||
|
key, n = read_uvarint(data, n)
|
||||||
|
ty = key & 7
|
||||||
|
key >>= 3
|
||||||
|
if ty == 2:
|
||||||
|
l, n = read_uvarint(data, n)
|
||||||
|
val = data[n:n+l]
|
||||||
|
n += l
|
||||||
|
elif ty == 0:
|
||||||
|
val, n = read_uvarint(data, n)
|
||||||
|
else:
|
||||||
|
raise Exception(f'unknown type {ty}, {data[n:]}')
|
||||||
|
results.append((key, val))
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
# find max height
|
||||||
|
def next_key(db, k: bytes) -> bytes | None:
|
||||||
|
it = db.iterator(start = k)
|
||||||
|
try:
|
||||||
|
nk, _ = next(it)
|
||||||
|
return nk
|
||||||
|
except StopIteration:
|
||||||
|
return None
|
||||||
|
finally:
|
||||||
|
it.close()
|
||||||
|
|
||||||
|
def max_height(db) -> int:
|
||||||
|
testnr = 1<<63
|
||||||
|
|
||||||
|
for i in range(62, -1, -1):
|
||||||
|
prefix = b's/k:emissions/s'
|
||||||
|
n = next_key(db, prefix + struct.pack('>Q', testnr))
|
||||||
|
|
||||||
|
if n is not None and n.startswith(prefix):
|
||||||
|
# print(f'{testnr:16x} is low')
|
||||||
|
testnr += 1 << i
|
||||||
|
else:
|
||||||
|
# print(f'{testnr:16x} is high')
|
||||||
|
testnr -= 1 << i
|
||||||
|
|
||||||
|
n = next_key(db, prefix + struct.pack('>Q', testnr))
|
||||||
|
if n is not None and n.startswith(prefix):
|
||||||
|
return testnr
|
||||||
|
else:
|
||||||
|
return testnr - 1
|
||||||
|
|
||||||
|
# encode and decode keys
|
||||||
|
def encode_key(format: str, key: list) -> bytes:
|
||||||
|
result_bytes = []
|
||||||
|
|
||||||
|
result_bytes.append(key[0])
|
||||||
|
|
||||||
|
for i, f in enumerate(format):
|
||||||
|
if i >= len(key) - 1:
|
||||||
|
break
|
||||||
|
if f == 's':
|
||||||
|
result_bytes += list(key[i+1].encode('utf-8'))
|
||||||
|
if i < len(format) - 1:
|
||||||
|
result_bytes += [0]
|
||||||
|
elif f == 'Q':
|
||||||
|
result_bytes += list(struct.pack('>Q', key[i+1]))
|
||||||
|
elif f == 'q':
|
||||||
|
result_bytes += list(struct.pack('>Q', key[i+1] + (1<<63)))
|
||||||
|
|
||||||
|
return bytes(result_bytes)
|
||||||
|
|
||||||
|
def decode_key(format: str, key: bytes) -> list:
|
||||||
|
result = []
|
||||||
|
|
||||||
|
result.append(key[0])
|
||||||
|
idx = 1
|
||||||
|
|
||||||
|
for f in format:
|
||||||
|
if f == 's':
|
||||||
|
end = key[idx:].find(b'\x00')
|
||||||
|
if end < 0:
|
||||||
|
result.append(key[idx:].decode('utf-8'))
|
||||||
|
idx = len(key)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
result.append(key[idx:idx+end].decode('utf-8'))
|
||||||
|
idx += end + 1
|
||||||
|
elif f == 'Q':
|
||||||
|
v = struct.unpack('>Q', key[idx:idx+8])[0]
|
||||||
|
result.append(v)
|
||||||
|
idx += 8
|
||||||
|
elif f == 'q':
|
||||||
|
v = struct.unpack('>Q', key[idx:idx+8])[0]
|
||||||
|
result.append(v - (1<<63))
|
||||||
|
idx += 8
|
||||||
|
|
||||||
|
if idx < len(key):
|
||||||
|
result.append(key[idx:])
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
# iteration
|
||||||
|
class IAVLTreeIteratorRaw:
|
||||||
|
def __init__(self, db, prefix: bytes, version: int, start: bytes | None = None, end: bytes | None = None):
|
||||||
|
self.db = db
|
||||||
|
self.prefix = prefix
|
||||||
|
self.version = version
|
||||||
|
self.start = start
|
||||||
|
self.end = end
|
||||||
|
self.stack = []
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
if len(self.stack) == 0:
|
||||||
|
# get root node
|
||||||
|
root = db.get(self.prefix + write_key((self.version, 1)))
|
||||||
|
if root is None:
|
||||||
|
raise StopIteration
|
||||||
|
node = read_node(root)
|
||||||
|
if len(node) == 2: # link to other root node
|
||||||
|
node = read_node(db.get(self.prefix + write_key(node)))
|
||||||
|
self.stack.append(((self.version, 1), node))
|
||||||
|
|
||||||
|
# walk tree to either last before start or first after start
|
||||||
|
while node[0] > 0:
|
||||||
|
# print(node)
|
||||||
|
nodekey = node[2]
|
||||||
|
if self.start is None or self.start < nodekey:
|
||||||
|
next = node[3]
|
||||||
|
else:
|
||||||
|
next = node[4]
|
||||||
|
node = read_node(db.get(self.prefix + write_key(next)))
|
||||||
|
self.stack.append((next, node))
|
||||||
|
|
||||||
|
# return early if we ended up at first item after start
|
||||||
|
if self.start is None or node[2] >= self.start:
|
||||||
|
return (node[2], node[3])
|
||||||
|
|
||||||
|
# print('Stack:', [x[0] for x in self.stack])
|
||||||
|
|
||||||
|
# go up to first parent which we're a left child of
|
||||||
|
key = None
|
||||||
|
for i in range(len(self.stack)-1, 0, -1):
|
||||||
|
current_key = self.stack[i][0]
|
||||||
|
parent_node = self.stack[i-1][1]
|
||||||
|
self.stack.pop()
|
||||||
|
left = parent_node[3]
|
||||||
|
right = parent_node[4]
|
||||||
|
if current_key == left:
|
||||||
|
key = right
|
||||||
|
break
|
||||||
|
|
||||||
|
# are we at the right end of the tree?
|
||||||
|
if key is None:
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
# go right
|
||||||
|
node = read_node(db.get(self.prefix + write_key(key)))
|
||||||
|
self.stack.append((key, node))
|
||||||
|
|
||||||
|
# go left until at a leaf
|
||||||
|
while node[0] > 0:
|
||||||
|
key = node[3]
|
||||||
|
node = read_node(db.get(self.prefix + write_key(key)))
|
||||||
|
self.stack.append((key, node))
|
||||||
|
|
||||||
|
if self.end is not None and node[2] >= self.end:
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
return (node[2], node[3])
|
||||||
|
|
||||||
|
class IAVLTreeIterator:
|
||||||
|
def __init__(self, db, prefix: str, version: int, format: str, start: list | None = None, end: list | None = None):
|
||||||
|
self.format = format
|
||||||
|
start_enc = encode_key(format, start) if start is not None else None
|
||||||
|
end_enc = encode_key(format, end) if end is not None else None
|
||||||
|
self.inner = IAVLTreeIteratorRaw(db, prefix.encode('utf-8'), version, start = start_enc, end = end_enc)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
(k, v) = next(self.inner)
|
||||||
|
return (decode_key(self.format, k), v)
|
||||||
|
|
||||||
|
def iterate(db, prefix, version, format = '', field = None, start = None, end = None):
|
||||||
|
if field is not None:
|
||||||
|
return IAVLTreeIterator(db, prefix, version, format, start = [field], end = [field+1] if field < 255 else None)
|
||||||
|
else:
|
||||||
|
return IAVLTreeIterator(db, prefix, version, format, start = start, end = end)
|
||||||
|
|
||||||
|
def indexof_raw(db, prefix: bytes, version: int, key: bytes) -> int:
|
||||||
|
"""
|
||||||
|
Find how many items come before `key` in the tree. If `key` doesn't exist, how many
|
||||||
|
items come before the slot it would get inserted at
|
||||||
|
"""
|
||||||
|
it = IAVLTreeIteratorRaw(db, prefix, version, start=key)
|
||||||
|
try:
|
||||||
|
next(it)
|
||||||
|
except StopIteration:
|
||||||
|
# get root count
|
||||||
|
return read_node(db.get(prefix + write_key(it.stack[0][0])))[1]
|
||||||
|
|
||||||
|
keys = [p[1][3] for p, c in zip(it.stack, it.stack[1:]) if c[0] == p[1][4]]
|
||||||
|
keys_encoded = [prefix + write_key(k) for k in keys]
|
||||||
|
count = sum([read_node(db.get(k))[1] for k in keys_encoded])
|
||||||
|
|
||||||
|
return count
|
||||||
|
|
||||||
|
def indexof(db, prefix: str, version: int, format: str, key: list) -> int:
|
||||||
|
return indexof_raw(db, prefix.encode('utf-8'), version, encode_key(format, key))
|
476
store.ipynb
476
store.ipynb
@ -2,281 +2,13 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import plyvel\n",
|
"import plyvel\n",
|
||||||
"import struct\n",
|
|
||||||
"import numpy as np"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# functions for reading IAVL tree\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"def read_varint(x: bytes, offset: int = 0) -> int:\n",
|
"%run -i read_tree.py"
|
||||||
" result = 0\n",
|
|
||||||
" factor = 1\n",
|
|
||||||
"\n",
|
|
||||||
" for i, b in enumerate(x[offset:]):\n",
|
|
||||||
" if b >= 128:\n",
|
|
||||||
" result = result + (b - 128) * factor\n",
|
|
||||||
" else:\n",
|
|
||||||
" result = result + b * factor\n",
|
|
||||||
" return result // 2, offset+i+1\n",
|
|
||||||
" factor *= 128\n",
|
|
||||||
"\n",
|
|
||||||
"def read_uvarint(x: bytes, offset: int = 0) -> int:\n",
|
|
||||||
" result = 0\n",
|
|
||||||
" factor = 1\n",
|
|
||||||
"\n",
|
|
||||||
" for i, b in enumerate(x[offset:]):\n",
|
|
||||||
" if b >= 128:\n",
|
|
||||||
" result = result + (b - 128) * factor\n",
|
|
||||||
" else:\n",
|
|
||||||
" result = result + b * factor\n",
|
|
||||||
" return result, offset+i+1\n",
|
|
||||||
" factor *= 128\n",
|
|
||||||
"\n",
|
|
||||||
"def read_key(key: bytes) -> tuple[int, int] | None:\n",
|
|
||||||
" if not key.startswith(b's'):\n",
|
|
||||||
" return None\n",
|
|
||||||
"\n",
|
|
||||||
" version = struct.unpack_from('>Q', key[1:9])[0]\n",
|
|
||||||
" nonce = struct.unpack_from('>I', key[9:13])[0]\n",
|
|
||||||
"\n",
|
|
||||||
" return (version, nonce)\n",
|
|
||||||
"\n",
|
|
||||||
"def write_key(key: tuple[int, int]) -> bytes:\n",
|
|
||||||
" version = struct.pack('>Q', key[0])\n",
|
|
||||||
" nonce = struct.pack('>I', key[1])\n",
|
|
||||||
"\n",
|
|
||||||
" return b's' + version + nonce\n",
|
|
||||||
"\n",
|
|
||||||
"def read_node(node: bytes) -> tuple[int, int, bytes, tuple[int, int], tuple[int, int]] | tuple[int, int, list[int], bytes] | tuple[int, int]:\n",
|
|
||||||
"\n",
|
|
||||||
" if node.startswith(b's'):\n",
|
|
||||||
" return read_key(node)\n",
|
|
||||||
"\n",
|
|
||||||
" n = 0\n",
|
|
||||||
" height, n = read_varint(node, n)\n",
|
|
||||||
"\n",
|
|
||||||
" if height == 0:\n",
|
|
||||||
" length, n = read_varint(node, n)\n",
|
|
||||||
" size, n = read_uvarint(node, n)\n",
|
|
||||||
" key = node[n:n+size]\n",
|
|
||||||
" n += size\n",
|
|
||||||
" valuesize, n = read_uvarint(node, n)\n",
|
|
||||||
" value = node[n:n+valuesize]\n",
|
|
||||||
"\n",
|
|
||||||
" return (height, length, key, value)\n",
|
|
||||||
" else:\n",
|
|
||||||
" length, n = read_varint(node, n)\n",
|
|
||||||
" size, n = read_uvarint(node, n)\n",
|
|
||||||
" key = node[n:n+size]\n",
|
|
||||||
" n += size\n",
|
|
||||||
" hashsize, n = read_uvarint(node, n)\n",
|
|
||||||
" n += hashsize\n",
|
|
||||||
" mode, n = read_uvarint(node, n)\n",
|
|
||||||
" left_version, n = read_varint(node, n)\n",
|
|
||||||
" left_nonce, n = read_varint(node, n)\n",
|
|
||||||
" right_version, n = read_varint(node, n)\n",
|
|
||||||
" right_nonce, n = read_varint(node, n)\n",
|
|
||||||
"\n",
|
|
||||||
" return (height, length, key, (left_version, left_nonce), (right_version, right_nonce))\n",
|
|
||||||
"\n",
|
|
||||||
"def walk(tree, version, searchkey):\n",
|
|
||||||
" if (version, 1) not in tree:\n",
|
|
||||||
" return None\n",
|
|
||||||
"\n",
|
|
||||||
" node = tree[(version, 1)]\n",
|
|
||||||
" if len(node) == 2: # root copy?\n",
|
|
||||||
" node = tree[node]\n",
|
|
||||||
"\n",
|
|
||||||
" while node[0] > 0:\n",
|
|
||||||
" nodekey = node[2]\n",
|
|
||||||
" if searchkey < nodekey:\n",
|
|
||||||
" next = node[3]\n",
|
|
||||||
" else:\n",
|
|
||||||
" next = node[4]\n",
|
|
||||||
"\n",
|
|
||||||
" node = tree[next]\n",
|
|
||||||
"\n",
|
|
||||||
" return node[3]\n",
|
|
||||||
"\n",
|
|
||||||
"def walk_disk_raw(db, prefix: bytes, version: int, searchkey: bytes) -> None | bytes:\n",
|
|
||||||
"\n",
|
|
||||||
" root = db.get(prefix + write_key((version, 1)))\n",
|
|
||||||
" if root is None:\n",
|
|
||||||
" return None\n",
|
|
||||||
"\n",
|
|
||||||
" node = read_node(root)\n",
|
|
||||||
"\n",
|
|
||||||
" if len(node) == 2: # root copy?\n",
|
|
||||||
" node = read_node(db.get(prefix + write_key(node)))\n",
|
|
||||||
"\n",
|
|
||||||
" while node[0] > 0:\n",
|
|
||||||
" # print(node)\n",
|
|
||||||
"\n",
|
|
||||||
" nodekey = node[2]\n",
|
|
||||||
" if searchkey < nodekey:\n",
|
|
||||||
" next = node[3]\n",
|
|
||||||
" else:\n",
|
|
||||||
" next = node[4]\n",
|
|
||||||
"\n",
|
|
||||||
" node = read_node(db.get(prefix + write_key(next)))\n",
|
|
||||||
"\n",
|
|
||||||
" if node[2] == searchkey:\n",
|
|
||||||
" return node[3]\n",
|
|
||||||
" else:\n",
|
|
||||||
" return None\n",
|
|
||||||
"\n",
|
|
||||||
"def walk_disk_next_key_raw(db, prefix: bytes, version: int, searchkey: bytes) -> None | bytes:\n",
|
|
||||||
" root = db.get(prefix + write_key((version, 1)))\n",
|
|
||||||
" if root is None:\n",
|
|
||||||
" return None\n",
|
|
||||||
"\n",
|
|
||||||
" node = read_node(root)\n",
|
|
||||||
" lowest_geq_key = node[2] if node[2] >= searchkey else None\n",
|
|
||||||
"\n",
|
|
||||||
" if len(node) == 2: # root copy?\n",
|
|
||||||
" node = read_node(db.get(prefix + write_key(node)))\n",
|
|
||||||
"\n",
|
|
||||||
" while node[0] > 0:\n",
|
|
||||||
" # print(node)\n",
|
|
||||||
"\n",
|
|
||||||
" nodekey = node[2]\n",
|
|
||||||
" if searchkey < nodekey:\n",
|
|
||||||
" next = node[3]\n",
|
|
||||||
" else:\n",
|
|
||||||
" next = node[4]\n",
|
|
||||||
"\n",
|
|
||||||
" node = read_node(db.get(prefix + write_key(next)))\n",
|
|
||||||
" if node[2] >= searchkey and (lowest_geq_key is None or node[2] < lowest_geq_key):\n",
|
|
||||||
" lowest_geq_key = node[2]\n",
|
|
||||||
"\n",
|
|
||||||
" return lowest_geq_key\n",
|
|
||||||
"\n",
|
|
||||||
"def walk_disk(db, prefix: str, version: int, format: str, searchkey: list) -> None | bytes:\n",
|
|
||||||
" return walk_disk_raw(db, prefix.encode('utf-8'), version, encode_key(format, searchkey))\n",
|
|
||||||
"\n",
|
|
||||||
"def parse_struct(data):\n",
|
|
||||||
" n = 0\n",
|
|
||||||
" results = []\n",
|
|
||||||
"\n",
|
|
||||||
" while n < len(data):\n",
|
|
||||||
" key, n = read_uvarint(data, n)\n",
|
|
||||||
" ty = key & 7\n",
|
|
||||||
" key >>= 3\n",
|
|
||||||
" if ty == 2:\n",
|
|
||||||
" l, n = read_uvarint(data, n)\n",
|
|
||||||
" val = data[n:n+l]\n",
|
|
||||||
" n += l\n",
|
|
||||||
" elif ty == 0:\n",
|
|
||||||
" val, n = read_uvarint(data, n)\n",
|
|
||||||
" else:\n",
|
|
||||||
" raise Exception(f'unknown type {ty}, {data[n:]}')\n",
|
|
||||||
" results.append((key, val))\n",
|
|
||||||
"\n",
|
|
||||||
" return results"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# find max height\n",
|
|
||||||
"\n",
|
|
||||||
"def next_key(db, k: bytes) -> bytes | None:\n",
|
|
||||||
" it = db.iterator(start = k)\n",
|
|
||||||
" try:\n",
|
|
||||||
" nk, _ = next(it)\n",
|
|
||||||
" return nk\n",
|
|
||||||
" except StopIteration:\n",
|
|
||||||
" return None\n",
|
|
||||||
" finally:\n",
|
|
||||||
" it.close()\n",
|
|
||||||
"\n",
|
|
||||||
"def max_height(db) -> int:\n",
|
|
||||||
" testnr = 1<<63\n",
|
|
||||||
"\n",
|
|
||||||
" for i in range(62, -1, -1):\n",
|
|
||||||
" prefix = b's/k:emissions/s'\n",
|
|
||||||
" n = next_key(db, prefix + struct.pack('>Q', testnr))\n",
|
|
||||||
"\n",
|
|
||||||
" if n is not None and n.startswith(prefix):\n",
|
|
||||||
" # print(f'{testnr:16x} is low')\n",
|
|
||||||
" testnr += 1 << i\n",
|
|
||||||
" else:\n",
|
|
||||||
" # print(f'{testnr:16x} is high')\n",
|
|
||||||
" testnr -= 1 << i\n",
|
|
||||||
"\n",
|
|
||||||
" n = next_key(db, prefix + struct.pack('>Q', testnr))\n",
|
|
||||||
" if n is not None and n.startswith(prefix):\n",
|
|
||||||
" return testnr\n",
|
|
||||||
" else:\n",
|
|
||||||
" return testnr - 1"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 4,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# encode and decode keys\n",
|
|
||||||
"\n",
|
|
||||||
"def encode_key(format: str, key: list) -> bytes:\n",
|
|
||||||
" result_bytes = []\n",
|
|
||||||
"\n",
|
|
||||||
" result_bytes.append(key[0])\n",
|
|
||||||
"\n",
|
|
||||||
" for i, f in enumerate(format):\n",
|
|
||||||
" if i >= len(key) - 1:\n",
|
|
||||||
" break\n",
|
|
||||||
" if f == 's':\n",
|
|
||||||
" result_bytes += list(key[i+1].encode('utf-8'))\n",
|
|
||||||
" if i < len(format) - 1:\n",
|
|
||||||
" result_bytes += [0]\n",
|
|
||||||
" elif f == 'Q':\n",
|
|
||||||
" result_bytes += list(struct.pack('>Q', key[i+1]))\n",
|
|
||||||
" elif f == 'q':\n",
|
|
||||||
" result_bytes += list(struct.pack('>Q', key[i+1] + (1<<63)))\n",
|
|
||||||
"\n",
|
|
||||||
" return bytes(result_bytes)\n",
|
|
||||||
"\n",
|
|
||||||
"def decode_key(format: str, key: bytes) -> list:\n",
|
|
||||||
" result = []\n",
|
|
||||||
"\n",
|
|
||||||
" result.append(key[0])\n",
|
|
||||||
" idx = 1\n",
|
|
||||||
"\n",
|
|
||||||
" for f in format:\n",
|
|
||||||
" if f == 's':\n",
|
|
||||||
" end = key[idx:].find(b'\\x00')\n",
|
|
||||||
" if end < 0:\n",
|
|
||||||
" result.append(key[idx:].decode('utf-8'))\n",
|
|
||||||
" break\n",
|
|
||||||
" else:\n",
|
|
||||||
" result.append(key[idx:idx+end].decode('utf-8'))\n",
|
|
||||||
" idx += end + 1\n",
|
|
||||||
" elif f == 'Q':\n",
|
|
||||||
" v = struct.unpack('>Q', key[idx:idx+8])[0]\n",
|
|
||||||
" result.append(v)\n",
|
|
||||||
" idx += 8\n",
|
|
||||||
" elif f == 'q':\n",
|
|
||||||
" v = struct.unpack('>Q', key[idx:idx+8])[0]\n",
|
|
||||||
" result.append(v - (1<<63))\n",
|
|
||||||
" idx += 8\n",
|
|
||||||
"\n",
|
|
||||||
" return result"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -285,8 +17,8 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"db = plyvel.DB('../node/nodedir/data/application.db')\n",
|
"# db = plyvel.DB('../testnode/nodedir/data/application.db')\n",
|
||||||
"height = max_height(db)"
|
"max_height(db)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -295,106 +27,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# get\n",
|
"[k for k, v in iterate(db, 's/k:mint/', 5224815)]"
|
||||||
"v = walk_disk(db, 's/k:emissions/', height, 'Qs', [11, 37, 'allo1mn4d32hwyn6grp89akek52arsw2vcdqezr0dc7'])\n",
|
|
||||||
"parse_struct(v)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 183,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"class IAVLTreeIteratorRaw:\n",
|
|
||||||
" def __init__(self, db, prefix: bytes, version: int, start: bytes | None = None, end: bytes | None = None):\n",
|
|
||||||
" self.db = db\n",
|
|
||||||
" self.prefix = prefix\n",
|
|
||||||
" self.version = version\n",
|
|
||||||
" self.start = start\n",
|
|
||||||
" self.end = end\n",
|
|
||||||
" self.stack = []\n",
|
|
||||||
"\n",
|
|
||||||
" def __iter__(self):\n",
|
|
||||||
" return self\n",
|
|
||||||
"\n",
|
|
||||||
" def __next__(self):\n",
|
|
||||||
" if len(self.stack) == 0:\n",
|
|
||||||
" # get root node\n",
|
|
||||||
" root = db.get(self.prefix + write_key((self.version, 1)))\n",
|
|
||||||
" if root is None:\n",
|
|
||||||
" raise StopIteration\n",
|
|
||||||
" node = read_node(root)\n",
|
|
||||||
" if len(node) == 2: # link to other root node\n",
|
|
||||||
" node = read_node(db.get(self.prefix + write_key(node)))\n",
|
|
||||||
" self.stack.append(((self.version, 1), node))\n",
|
|
||||||
"\n",
|
|
||||||
" # walk tree to either last before start or first after start\n",
|
|
||||||
" while node[0] > 0:\n",
|
|
||||||
" # print(node)\n",
|
|
||||||
" nodekey = node[2]\n",
|
|
||||||
" if self.start is None or self.start < nodekey:\n",
|
|
||||||
" next = node[3]\n",
|
|
||||||
" else:\n",
|
|
||||||
" next = node[4]\n",
|
|
||||||
" node = read_node(db.get(self.prefix + write_key(next)))\n",
|
|
||||||
" self.stack.append((next, node))\n",
|
|
||||||
"\n",
|
|
||||||
" # return early if we ended up at first item after start\n",
|
|
||||||
" if self.start is None or node[2] >= self.start:\n",
|
|
||||||
" return (node[2], node[3])\n",
|
|
||||||
"\n",
|
|
||||||
" # print('Stack:', [x[0] for x in self.stack])\n",
|
|
||||||
"\n",
|
|
||||||
" # go up to first parent which we're a left child of\n",
|
|
||||||
" key = None\n",
|
|
||||||
" for i in range(len(self.stack)-1, 0, -1):\n",
|
|
||||||
" current_key = self.stack[i][0]\n",
|
|
||||||
" parent_node = self.stack[i-1][1]\n",
|
|
||||||
" self.stack.pop()\n",
|
|
||||||
" left = parent_node[3]\n",
|
|
||||||
" right = parent_node[4]\n",
|
|
||||||
" if current_key == left:\n",
|
|
||||||
" key = right\n",
|
|
||||||
" break\n",
|
|
||||||
"\n",
|
|
||||||
" # are we at the right end of the tree?\n",
|
|
||||||
" if key is None:\n",
|
|
||||||
" raise StopIteration\n",
|
|
||||||
"\n",
|
|
||||||
" # go right\n",
|
|
||||||
" node = read_node(db.get(self.prefix + write_key(key)))\n",
|
|
||||||
" self.stack.append((key, node))\n",
|
|
||||||
"\n",
|
|
||||||
" # go left until at a leaf\n",
|
|
||||||
" while node[0] > 0:\n",
|
|
||||||
" key = node[3]\n",
|
|
||||||
" node = read_node(db.get(self.prefix + write_key(key)))\n",
|
|
||||||
" self.stack.append((key, node))\n",
|
|
||||||
"\n",
|
|
||||||
" if self.end is not None and node[2] >= self.end:\n",
|
|
||||||
" raise StopIteration\n",
|
|
||||||
"\n",
|
|
||||||
" return (node[2], node[3])\n",
|
|
||||||
"\n",
|
|
||||||
"class IAVLTreeIterator:\n",
|
|
||||||
" def __init__(self, db, prefix: str, version: int, format: str, start: list | None = None, end: list | None = None):\n",
|
|
||||||
" self.format = format\n",
|
|
||||||
" start_enc = encode_key(format, start) if start is not None else None\n",
|
|
||||||
" end_enc = encode_key(format, end) if end is not None else None\n",
|
|
||||||
" self.inner = IAVLTreeIteratorRaw(db, prefix.encode('utf-8'), version, start = start_enc, end = end_enc)\n",
|
|
||||||
"\n",
|
|
||||||
" def __iter__(self):\n",
|
|
||||||
" return self\n",
|
|
||||||
"\n",
|
|
||||||
" def __next__(self):\n",
|
|
||||||
" (k, v) = next(self.inner)\n",
|
|
||||||
" return (decode_key(self.format, k), v)\n",
|
|
||||||
"\n",
|
|
||||||
"def iterate(db, prefix, version, format, field):\n",
|
|
||||||
" return IAVLTreeIterator(db, prefix, version, format, start = [field], end = [field+1] if field < 255 else None)\n",
|
|
||||||
"\n",
|
|
||||||
"# [k for k, v in IAVLTreeIterator(db, 's/k:mint/', height, '')]"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -403,7 +36,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"[(k, parse_struct(v)) for k, v in iterate(db, 's/k:emissions/', height, 'Q', 5)]"
|
"parse_struct(next(iterate(db, 's/k:mint/', 5224815, field = 138))[1])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -412,7 +45,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"[k for k, v in IAVLTreeIterator(db, 's/k:emissions/', height, 'Q', start = [5], end = [6])]"
|
"[k for k,v in iterate(db, 's/k:emissions/', 5224815, start = [62, 60], end = [62, 61], format = 'Qss')]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -421,83 +54,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"encode_key('Q', [5])"
|
"lens = np.zeros(256, dtype = int)\n",
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"[decode_key('Q', k) for k,v in IAVLTreeIteratorRaw(db, b's/k:emissions/', height, start = bytes([5]), end = bytes([6]))]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"encode_key('', [1]), decode_key('', b'\\x8a')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"decode_key()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"list(range(5, 0, -1))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"read_node(db.get(b's/k:emissions/' + write_key((height, 1))))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 49,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"heights = np.arange(5776000, height+1)\n",
|
|
||||||
"sizes = np.empty(len(heights), dtype = int)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"for i in range(len(heights)):\n",
|
"with plyvel.DB('../testnode/nodedir/data/application.db') as db:\n",
|
||||||
" h = heights[i]\n",
|
" height = max_height(db)\n",
|
||||||
" n = db.get(b's/k:emissions/s' + struct.pack('>Q', h) + struct.pack('>I', 1))\n",
|
" for field in range(255):\n",
|
||||||
" nd = read_node(n)\n",
|
" count1 = indexof(db, 's/k:emissions/', height, '', [field])\n",
|
||||||
" sizes[i] = nd[1]"
|
" count2 = indexof(db, 's/k:emissions/', height, '', [field+1])\n",
|
||||||
]
|
" lens[field] = count2 - count1\n",
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import matplotlib.pyplot as plt\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"plt.plot(sizes)\n",
|
"np.argsort(lens)"
|
||||||
"# plt.xlim(0, 1000)\n",
|
|
||||||
"# plt.ylim(19906000, 19908000)\n",
|
|
||||||
"plt.grid()\n",
|
|
||||||
"plt.show()"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -505,18 +71,6 @@
|
|||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
|
||||||
"n = db.get(b's/k:emissions/s' + struct.pack('>Q', 5776000) + struct.pack('>I', 1))\n",
|
|
||||||
"nd = read_node(n)\n",
|
|
||||||
"# decode_key('Qss', nd[2])\n",
|
|
||||||
"nd"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
"source": [
|
||||||
"# def has_prefix(db, prefix: bytes) -> bool:\n",
|
"# def has_prefix(db, prefix: bytes) -> bool:\n",
|
||||||
"# it = db.iterator(start = prefix)\n",
|
"# it = db.iterator(start = prefix)\n",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user