commit bc7cb5650fb4e3f0808c5de71ff9debdffe2fcbb Author: Florian Stecker Date: Sun Oct 5 23:12:31 2025 -0400 read IAVL tree diff --git a/store.ipynb b/store.ipynb new file mode 100644 index 0000000..7c0305e --- /dev/null +++ b/store.ipynb @@ -0,0 +1,376 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import plyvel\n", + "import struct\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": {}, + "outputs": [], + "source": [ + "# functions for reading IAVL tree\n", + "\n", + "def read_varint(x: bytes, offset: int = 0) -> int:\n", + " result = 0\n", + " factor = 1\n", + "\n", + " for i, b in enumerate(x[offset:]):\n", + " if b >= 128:\n", + " result = result + (b - 128) * factor\n", + " else:\n", + " result = result + b * factor\n", + " return result // 2, offset+i+1\n", + " factor *= 128\n", + "\n", + "def read_uvarint(x: bytes, offset: int = 0) -> int:\n", + " result = 0\n", + " factor = 1\n", + "\n", + " for i, b in enumerate(x[offset:]):\n", + " if b >= 128:\n", + " result = result + (b - 128) * factor\n", + " else:\n", + " result = result + b * factor\n", + " return result, offset+i+1\n", + " factor *= 128\n", + "\n", + "def read_key(key: bytes) -> tuple[int, int] | None:\n", + " if not key.startswith(b's'):\n", + " return None\n", + "\n", + " version = struct.unpack_from('>Q', key[1:9])[0]\n", + " nonce = struct.unpack_from('>I', key[9:13])[0]\n", + "\n", + " return (version, nonce)\n", + "\n", + "def write_key(key: tuple[int, int]) -> bytes:\n", + " version = struct.pack('>Q', key[0])\n", + " nonce = struct.pack('>I', key[1])\n", + "\n", + " return b's' + version + nonce\n", + "\n", + "def read_node(node: bytes) -> tuple[int, int, bytes, tuple[int, int], tuple[int, int]] | tuple[int, int, list[int], bytes] | tuple[int, int]:\n", + "\n", + " if node.startswith(b's'):\n", + " return read_key(node)\n", + "\n", + " n = 0\n", + " height, n = read_varint(node, n)\n", + "\n", + " if height == 0:\n", + " length, n = read_varint(node, n)\n", + " size, n = read_uvarint(node, n)\n", + " key = node[n:n+size]\n", + " n += size\n", + " valuesize, n = read_uvarint(node, n)\n", + " value = node[n:n+valuesize]\n", + "\n", + " return (height, length, key, value)\n", + " else:\n", + " length, n = read_varint(node, n)\n", + " size, n = read_uvarint(node, n)\n", + " key = node[n:n+size]\n", + " n += size\n", + " hashsize, n = read_uvarint(node, n)\n", + " n += hashsize\n", + " mode, n = read_uvarint(node, n)\n", + " left_version, n = read_varint(node, n)\n", + " left_nonce, n = read_varint(node, n)\n", + " right_version, n = read_varint(node, n)\n", + " right_nonce, n = read_varint(node, n)\n", + "\n", + " return (height, length, key, (left_version, left_nonce), (right_version, right_nonce))\n", + "\n", + "def walk(tree, version, searchkey):\n", + " if (version, 1) not in tree:\n", + " return None\n", + "\n", + " node = tree[(version, 1)]\n", + " if len(node) == 2: # root copy?\n", + " node = tree[node]\n", + "\n", + " while node[0] > 0:\n", + " nodekey = node[2]\n", + " if searchkey < nodekey:\n", + " next = node[3]\n", + " else:\n", + " next = node[4]\n", + "\n", + " node = tree[next]\n", + "\n", + " return node[3]\n", + "\n", + "def walk_disk_raw(db, prefix: bytes, version: int, searchkey: bytes) -> None | bytes:\n", + "\n", + " root = db.get(prefix + write_key((version, 1)))\n", + " if root is None:\n", + " return None\n", + "\n", + " node = read_node(root)\n", + "\n", + " if len(node) == 2: # root copy?\n", + " node = read_node(db.get(prefix + write_key(node)))\n", + "\n", + " while node[0] > 0:\n", + " # print(node)\n", + "\n", + " nodekey = node[2]\n", + " if searchkey < nodekey:\n", + " next = node[3]\n", + " else:\n", + " next = node[4]\n", + "\n", + " node = read_node(db.get(prefix + write_key(next)))\n", + "\n", + " if node[2] == searchkey:\n", + " return node[3]\n", + " else:\n", + " return None\n", + "\n", + "def walk_disk_next_key_raw(db, prefix: bytes, version: int, searchkey: bytes) -> None | bytes:\n", + " root = db.get(prefix + write_key((version, 1)))\n", + " if root is None:\n", + " return None\n", + "\n", + " node = read_node(root)\n", + " lowest_geq_key = node[2] if node[2] >= searchkey else None\n", + "\n", + " if len(node) == 2: # root copy?\n", + " node = read_node(db.get(prefix + write_key(node)))\n", + "\n", + " while node[0] > 0:\n", + " # print(node)\n", + "\n", + " nodekey = node[2]\n", + " if searchkey < nodekey:\n", + " next = node[3]\n", + " else:\n", + " next = node[4]\n", + "\n", + " node = read_node(db.get(prefix + write_key(next)))\n", + " if node[2] >= searchkey and (lowest_geq_key is None or node[2] < lowest_geq_key):\n", + " lowest_geq_key = node[2]\n", + "\n", + " return lowest_geq_key\n", + "\n", + "def walk_disk(db, prefix: str, version: int, format: str, searchkey: list) -> None | bytes:\n", + " return walk_disk_raw(db, prefix.encode('utf-8'), version, encode_key(format, searchkey))\n", + "\n", + "def parse_struct(data):\n", + " n = 0\n", + " results = []\n", + "\n", + " while n < len(data):\n", + " key, n = read_uvarint(data, n)\n", + " ty = key & 7\n", + " key >>= 3\n", + " if ty == 2:\n", + " l, n = read_uvarint(data, n)\n", + " val = data[n:n+l]\n", + " n += l\n", + " elif ty == 0:\n", + " val, n = read_uvarint(data, n)\n", + " else:\n", + " raise Exception(f'unknown type {ty}, {data[n:]}')\n", + " results.append((key, val))\n", + "\n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "# find max height\n", + "\n", + "def next_key(db, k: bytes) -> bytes | None:\n", + " it = db.iterator(start = k)\n", + " try:\n", + " nk, _ = next(it)\n", + " return nk\n", + " except StopIteration:\n", + " return None\n", + " finally:\n", + " it.close()\n", + "\n", + "def max_height(db) -> int:\n", + " testnr = 1<<63\n", + "\n", + " for i in range(62, -1, -1):\n", + " prefix = b's/k:emissions/s'\n", + " n = next_key(db, prefix + struct.pack('>Q', testnr))\n", + "\n", + " if n is not None and n.startswith(prefix):\n", + " # print(f'{testnr:16x} is low')\n", + " testnr += 1 << i\n", + " else:\n", + " # print(f'{testnr:16x} is high')\n", + " testnr -= 1 << i\n", + "\n", + " n = next_key(db, prefix + struct.pack('>Q', testnr))\n", + " if n is not None and n.startswith(prefix):\n", + " return testnr\n", + " else:\n", + " return testnr - 1" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [], + "source": [ + "# encode and decode keys\n", + "\n", + "def encode_key(format: str, key: list) -> bytes:\n", + " result_bytes = []\n", + "\n", + " result_bytes.append(key[0])\n", + "\n", + " for i, f in enumerate(format):\n", + " if i >= len(key) - 1:\n", + " break\n", + " if f == 's':\n", + " result_bytes += list(key[i+1].encode('utf-8'))\n", + " if i < len(format) - 1:\n", + " result_bytes += [0]\n", + " elif f == 'Q':\n", + " result_bytes += list(struct.pack('>Q', key[i+1]))\n", + " elif f == 'q':\n", + " result_bytes += list(struct.pack('>Q', key[i+1] + (1<<63)))\n", + "\n", + " return bytes(result_bytes)\n", + "\n", + "def decode_key(format: str, key: bytes) -> list:\n", + " result = []\n", + "\n", + " result.append(key[0])\n", + " idx = 1\n", + "\n", + " for f in format:\n", + " if f == 's':\n", + " end = key[idx:].find(b'\\x00')\n", + " if end < 0:\n", + " result.append(key[idx:].decode('utf-8'))\n", + " break\n", + " else:\n", + " result.append(key[idx:idx+end].decode('utf-8'))\n", + " idx += end + 1\n", + " elif f == 'Q':\n", + " v = struct.unpack('>Q', key[idx:idx+8])[0]\n", + " result.append(v)\n", + " idx += 8\n", + " elif f == 'q':\n", + " v = struct.unpack('>Q', key[idx:idx+8])[0]\n", + " result.append(v - (1<<63))\n", + " idx += 8\n", + "\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [], + "source": [ + "db = plyvel.DB('nodedir/data/application.db')\n", + "height = max_height(db)" + ] + }, + { + "cell_type": "code", + "execution_count": 188, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(1, 37),\n", + " (2, 5224812),\n", + " (3, b'allo1mn4d32hwyn6grp89akek52arsw2vcdqezr0dc7'),\n", + " (4, b'183.2449346998165')]" + ] + }, + "execution_count": 188, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# get\n", + "v = walk_disk(db, 's/k:emissions/', height, 'Qs', [11, 37, 'allo1mn4d32hwyn6grp89akek52arsw2vcdqezr0dc7'])\n", + "parse_struct(v)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# def has_prefix(db, prefix: bytes) -> bool:\n", + "# it = db.iterator(start = prefix)\n", + "# try:\n", + "# first_key, _ = next(it)\n", + "# return first_key.startswith(prefix)\n", + "# except StopIteration:\n", + "# return False\n", + "# finally:\n", + "# it.close()\n", + "\n", + "# found = []\n", + "# prefixes = [b's/']\n", + "\n", + "# for i in range(20):\n", + "# new_prefixes = []\n", + "\n", + "# for p in prefixes:\n", + "# for i in range(256):\n", + "# b = p + bytes([i])\n", + "# if has_prefix(db, b):\n", + "# if b.endswith(b'/'):\n", + "# found.append((b, db.approximate_size(b, b + b'\\xff')))\n", + "# else:\n", + "# new_prefixes.append(b)\n", + "# # new_prefixes.append(b)\n", + "\n", + "# prefixes = new_prefixes\n", + "\n", + "# found" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}