{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import plyvel\n", "import struct\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# functions for reading IAVL tree\n", "\n", "def read_varint(x: bytes, offset: int = 0) -> int:\n", " result = 0\n", " factor = 1\n", "\n", " for i, b in enumerate(x[offset:]):\n", " if b >= 128:\n", " result = result + (b - 128) * factor\n", " else:\n", " result = result + b * factor\n", " return result // 2, offset+i+1\n", " factor *= 128\n", "\n", "def read_uvarint(x: bytes, offset: int = 0) -> int:\n", " result = 0\n", " factor = 1\n", "\n", " for i, b in enumerate(x[offset:]):\n", " if b >= 128:\n", " result = result + (b - 128) * factor\n", " else:\n", " result = result + b * factor\n", " return result, offset+i+1\n", " factor *= 128\n", "\n", "def read_key(key: bytes) -> tuple[int, int] | None:\n", " if not key.startswith(b's'):\n", " return None\n", "\n", " version = struct.unpack_from('>Q', key[1:9])[0]\n", " nonce = struct.unpack_from('>I', key[9:13])[0]\n", "\n", " return (version, nonce)\n", "\n", "def write_key(key: tuple[int, int]) -> bytes:\n", " version = struct.pack('>Q', key[0])\n", " nonce = struct.pack('>I', key[1])\n", "\n", " return b's' + version + nonce\n", "\n", "def read_node(node: bytes) -> tuple[int, int, bytes, tuple[int, int], tuple[int, int]] | tuple[int, int, list[int], bytes] | tuple[int, int]:\n", "\n", " if node.startswith(b's'):\n", " return read_key(node)\n", "\n", " n = 0\n", " height, n = read_varint(node, n)\n", "\n", " if height == 0:\n", " length, n = read_varint(node, n)\n", " size, n = read_uvarint(node, n)\n", " key = node[n:n+size]\n", " n += size\n", " valuesize, n = read_uvarint(node, n)\n", " value = node[n:n+valuesize]\n", "\n", " return (height, length, key, value)\n", " else:\n", " length, n = read_varint(node, n)\n", " size, n = read_uvarint(node, n)\n", " key = node[n:n+size]\n", " n += size\n", " hashsize, n = read_uvarint(node, n)\n", " n += hashsize\n", " mode, n = read_uvarint(node, n)\n", " left_version, n = read_varint(node, n)\n", " left_nonce, n = read_varint(node, n)\n", " right_version, n = read_varint(node, n)\n", " right_nonce, n = read_varint(node, n)\n", "\n", " return (height, length, key, (left_version, left_nonce), (right_version, right_nonce))\n", "\n", "def walk(tree, version, searchkey):\n", " if (version, 1) not in tree:\n", " return None\n", "\n", " node = tree[(version, 1)]\n", " if len(node) == 2: # root copy?\n", " node = tree[node]\n", "\n", " while node[0] > 0:\n", " nodekey = node[2]\n", " if searchkey < nodekey:\n", " next = node[3]\n", " else:\n", " next = node[4]\n", "\n", " node = tree[next]\n", "\n", " return node[3]\n", "\n", "def walk_disk_raw(db, prefix: bytes, version: int, searchkey: bytes) -> None | bytes:\n", "\n", " root = db.get(prefix + write_key((version, 1)))\n", " if root is None:\n", " return None\n", "\n", " node = read_node(root)\n", "\n", " if len(node) == 2: # root copy?\n", " node = read_node(db.get(prefix + write_key(node)))\n", "\n", " while node[0] > 0:\n", " # print(node)\n", "\n", " nodekey = node[2]\n", " if searchkey < nodekey:\n", " next = node[3]\n", " else:\n", " next = node[4]\n", "\n", " node = read_node(db.get(prefix + write_key(next)))\n", "\n", " if node[2] == searchkey:\n", " return node[3]\n", " else:\n", " return None\n", "\n", "def walk_disk_next_key_raw(db, prefix: bytes, version: int, searchkey: bytes) -> None | bytes:\n", " root = db.get(prefix + write_key((version, 1)))\n", " if root is None:\n", " return None\n", "\n", " node = read_node(root)\n", " lowest_geq_key = node[2] if node[2] >= searchkey else None\n", "\n", " if len(node) == 2: # root copy?\n", " node = read_node(db.get(prefix + write_key(node)))\n", "\n", " while node[0] > 0:\n", " # print(node)\n", "\n", " nodekey = node[2]\n", " if searchkey < nodekey:\n", " next = node[3]\n", " else:\n", " next = node[4]\n", "\n", " node = read_node(db.get(prefix + write_key(next)))\n", " if node[2] >= searchkey and (lowest_geq_key is None or node[2] < lowest_geq_key):\n", " lowest_geq_key = node[2]\n", "\n", " return lowest_geq_key\n", "\n", "def walk_disk(db, prefix: str, version: int, format: str, searchkey: list) -> None | bytes:\n", " return walk_disk_raw(db, prefix.encode('utf-8'), version, encode_key(format, searchkey))\n", "\n", "def parse_struct(data):\n", " n = 0\n", " results = []\n", "\n", " while n < len(data):\n", " key, n = read_uvarint(data, n)\n", " ty = key & 7\n", " key >>= 3\n", " if ty == 2:\n", " l, n = read_uvarint(data, n)\n", " val = data[n:n+l]\n", " n += l\n", " elif ty == 0:\n", " val, n = read_uvarint(data, n)\n", " else:\n", " raise Exception(f'unknown type {ty}, {data[n:]}')\n", " results.append((key, val))\n", "\n", " return results" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# find max height\n", "\n", "def next_key(db, k: bytes) -> bytes | None:\n", " it = db.iterator(start = k)\n", " try:\n", " nk, _ = next(it)\n", " return nk\n", " except StopIteration:\n", " return None\n", " finally:\n", " it.close()\n", "\n", "def max_height(db) -> int:\n", " testnr = 1<<63\n", "\n", " for i in range(62, -1, -1):\n", " prefix = b's/k:emissions/s'\n", " n = next_key(db, prefix + struct.pack('>Q', testnr))\n", "\n", " if n is not None and n.startswith(prefix):\n", " # print(f'{testnr:16x} is low')\n", " testnr += 1 << i\n", " else:\n", " # print(f'{testnr:16x} is high')\n", " testnr -= 1 << i\n", "\n", " n = next_key(db, prefix + struct.pack('>Q', testnr))\n", " if n is not None and n.startswith(prefix):\n", " return testnr\n", " else:\n", " return testnr - 1" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# encode and decode keys\n", "\n", "def encode_key(format: str, key: list) -> bytes:\n", " result_bytes = []\n", "\n", " result_bytes.append(key[0])\n", "\n", " for i, f in enumerate(format):\n", " if i >= len(key) - 1:\n", " break\n", " if f == 's':\n", " result_bytes += list(key[i+1].encode('utf-8'))\n", " if i < len(format) - 1:\n", " result_bytes += [0]\n", " elif f == 'Q':\n", " result_bytes += list(struct.pack('>Q', key[i+1]))\n", " elif f == 'q':\n", " result_bytes += list(struct.pack('>Q', key[i+1] + (1<<63)))\n", "\n", " return bytes(result_bytes)\n", "\n", "def decode_key(format: str, key: bytes) -> list:\n", " result = []\n", "\n", " result.append(key[0])\n", " idx = 1\n", "\n", " for f in format:\n", " if f == 's':\n", " end = key[idx:].find(b'\\x00')\n", " if end < 0:\n", " result.append(key[idx:].decode('utf-8'))\n", " break\n", " else:\n", " result.append(key[idx:idx+end].decode('utf-8'))\n", " idx += end + 1\n", " elif f == 'Q':\n", " v = struct.unpack('>Q', key[idx:idx+8])[0]\n", " result.append(v)\n", " idx += 8\n", " elif f == 'q':\n", " v = struct.unpack('>Q', key[idx:idx+8])[0]\n", " result.append(v - (1<<63))\n", " idx += 8\n", "\n", " return result" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "db = plyvel.DB('../node/nodedir/data/application.db')\n", "height = max_height(db)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# get\n", "v = walk_disk(db, 's/k:emissions/', height, 'Qs', [11, 37, 'allo1mn4d32hwyn6grp89akek52arsw2vcdqezr0dc7'])\n", "parse_struct(v)" ] }, { "cell_type": "code", "execution_count": 183, "metadata": {}, "outputs": [], "source": [ "class IAVLTreeIteratorRaw:\n", " def __init__(self, db, prefix: bytes, version: int, start: bytes | None = None, end: bytes | None = None):\n", " self.db = db\n", " self.prefix = prefix\n", " self.version = version\n", " self.start = start\n", " self.end = end\n", " self.stack = []\n", "\n", " def __iter__(self):\n", " return self\n", "\n", " def __next__(self):\n", " if len(self.stack) == 0:\n", " # get root node\n", " root = db.get(self.prefix + write_key((self.version, 1)))\n", " if root is None:\n", " raise StopIteration\n", " node = read_node(root)\n", " if len(node) == 2: # link to other root node\n", " node = read_node(db.get(self.prefix + write_key(node)))\n", " self.stack.append(((self.version, 1), node))\n", "\n", " # walk tree to either last before start or first after start\n", " while node[0] > 0:\n", " # print(node)\n", " nodekey = node[2]\n", " if self.start is None or self.start < nodekey:\n", " next = node[3]\n", " else:\n", " next = node[4]\n", " node = read_node(db.get(self.prefix + write_key(next)))\n", " self.stack.append((next, node))\n", "\n", " # return early if we ended up at first item after start\n", " if self.start is None or node[2] >= self.start:\n", " return (node[2], node[3])\n", "\n", " # print('Stack:', [x[0] for x in self.stack])\n", "\n", " # go up to first parent which we're a left child of\n", " key = None\n", " for i in range(len(self.stack)-1, 0, -1):\n", " current_key = self.stack[i][0]\n", " parent_node = self.stack[i-1][1]\n", " self.stack.pop()\n", " left = parent_node[3]\n", " right = parent_node[4]\n", " if current_key == left:\n", " key = right\n", " break\n", "\n", " # are we at the right end of the tree?\n", " if key is None:\n", " raise StopIteration\n", "\n", " # go right\n", " node = read_node(db.get(self.prefix + write_key(key)))\n", " self.stack.append((key, node))\n", "\n", " # go left until at a leaf\n", " while node[0] > 0:\n", " key = node[3]\n", " node = read_node(db.get(self.prefix + write_key(key)))\n", " self.stack.append((key, node))\n", "\n", " if self.end is not None and node[2] >= self.end:\n", " raise StopIteration\n", "\n", " return (node[2], node[3])\n", "\n", "class IAVLTreeIterator:\n", " def __init__(self, db, prefix: str, version: int, format: str, start: list | None = None, end: list | None = None):\n", " self.format = format\n", " start_enc = encode_key(format, start) if start is not None else None\n", " end_enc = encode_key(format, end) if end is not None else None\n", " self.inner = IAVLTreeIteratorRaw(db, prefix.encode('utf-8'), version, start = start_enc, end = end_enc)\n", "\n", " def __iter__(self):\n", " return self\n", "\n", " def __next__(self):\n", " (k, v) = next(self.inner)\n", " return (decode_key(self.format, k), v)\n", "\n", "def iterate(db, prefix, version, format, field):\n", " return IAVLTreeIterator(db, prefix, version, format, start = [field], end = [field+1] if field < 255 else None)\n", "\n", "# [k for k, v in IAVLTreeIterator(db, 's/k:mint/', height, '')]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "[(k, parse_struct(v)) for k, v in iterate(db, 's/k:emissions/', height, 'Q', 5)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "[k for k, v in IAVLTreeIterator(db, 's/k:emissions/', height, 'Q', start = [5], end = [6])]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "encode_key('Q', [5])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "[decode_key('Q', k) for k,v in IAVLTreeIteratorRaw(db, b's/k:emissions/', height, start = bytes([5]), end = bytes([6]))]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "encode_key('', [1]), decode_key('', b'\\x8a')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "decode_key()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "list(range(5, 0, -1))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "read_node(db.get(b's/k:emissions/' + write_key((height, 1))))" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "heights = np.arange(5776000, height+1)\n", "sizes = np.empty(len(heights), dtype = int)\n", "\n", "for i in range(len(heights)):\n", " h = heights[i]\n", " n = db.get(b's/k:emissions/s' + struct.pack('>Q', h) + struct.pack('>I', 1))\n", " nd = read_node(n)\n", " sizes[i] = nd[1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "\n", "plt.plot(sizes)\n", "# plt.xlim(0, 1000)\n", "# plt.ylim(19906000, 19908000)\n", "plt.grid()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "n = db.get(b's/k:emissions/s' + struct.pack('>Q', 5776000) + struct.pack('>I', 1))\n", "nd = read_node(n)\n", "# decode_key('Qss', nd[2])\n", "nd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# def has_prefix(db, prefix: bytes) -> bool:\n", "# it = db.iterator(start = prefix)\n", "# try:\n", "# first_key, _ = next(it)\n", "# return first_key.startswith(prefix)\n", "# except StopIteration:\n", "# return False\n", "# finally:\n", "# it.close()\n", "\n", "# found = []\n", "# prefixes = [b's/']\n", "\n", "# for i in range(20):\n", "# new_prefixes = []\n", "\n", "# for p in prefixes:\n", "# for i in range(256):\n", "# b = p + bytes([i])\n", "# if has_prefix(db, b):\n", "# if b.endswith(b'/'):\n", "# found.append((b, db.approximate_size(b, b + b'\\xff')))\n", "# else:\n", "# new_prefixes.append(b)\n", "# # new_prefixes.append(b)\n", "\n", "# prefixes = new_prefixes\n", "\n", "# found" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.7" } }, "nbformat": 4, "nbformat_minor": 2 }