From 174b33a099545f4c18e5aeccde0600778ef84c2e Mon Sep 17 00:00:00 2001 From: Florian Stecker Date: Tue, 7 Oct 2025 23:08:30 -0400 Subject: [PATCH] iterate through tree --- store.ipynb | 240 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 219 insertions(+), 21 deletions(-) diff --git a/store.ipynb b/store.ipynb index 7c0305e..6603ebe 100644 --- a/store.ipynb +++ b/store.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 163, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -188,7 +188,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -227,7 +227,7 @@ }, { "cell_type": "code", - "execution_count": 176, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -281,39 +281,237 @@ }, { "cell_type": "code", - "execution_count": 175, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "db = plyvel.DB('nodedir/data/application.db')\n", + "db = plyvel.DB('../node/nodedir/data/application.db')\n", "height = max_height(db)" ] }, { "cell_type": "code", - "execution_count": 188, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(1, 37),\n", - " (2, 5224812),\n", - " (3, b'allo1mn4d32hwyn6grp89akek52arsw2vcdqezr0dc7'),\n", - " (4, b'183.2449346998165')]" - ] - }, - "execution_count": 188, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# get\n", "v = walk_disk(db, 's/k:emissions/', height, 'Qs', [11, 37, 'allo1mn4d32hwyn6grp89akek52arsw2vcdqezr0dc7'])\n", "parse_struct(v)" ] }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [], + "source": [ + "class IAVLTreeIteratorRaw:\n", + " def __init__(self, db, prefix: bytes, version: int, start: bytes | None = None, end: bytes | None = None):\n", + " self.db = db\n", + " self.prefix = prefix\n", + " self.version = version\n", + " self.start = start\n", + " self.end = end\n", + " self.stack = []\n", + "\n", + " def __iter__(self):\n", + " return self\n", + "\n", + " def __next__(self):\n", + " if len(self.stack) == 0:\n", + " # get root node\n", + " root = db.get(self.prefix + write_key((self.version, 1)))\n", + " if root is None:\n", + " raise StopIteration\n", + " node = read_node(root)\n", + " if len(node) == 2: # link to other root node\n", + " node = read_node(db.get(self.prefix + write_key(node)))\n", + " self.stack.append(((self.version, 1), node))\n", + "\n", + " # walk tree to either last before start or first after start\n", + " while node[0] > 0:\n", + " # print(node)\n", + " nodekey = node[2]\n", + " if self.start is None or self.start < nodekey:\n", + " next = node[3]\n", + " else:\n", + " next = node[4]\n", + " node = read_node(db.get(self.prefix + write_key(next)))\n", + " self.stack.append((next, node))\n", + "\n", + " # return early if we ended up at first item after start\n", + " if self.start is None or node[2] >= self.start:\n", + " return (node[2], node[3])\n", + "\n", + " # print('Stack:', [x[0] for x in self.stack])\n", + "\n", + " # go up to first parent which we're a left child of\n", + " key = None\n", + " for i in range(len(self.stack)-1, 0, -1):\n", + " current_key = self.stack[i][0]\n", + " parent_node = self.stack[i-1][1]\n", + " self.stack.pop()\n", + " left = parent_node[3]\n", + " right = parent_node[4]\n", + " if current_key == left:\n", + " key = right\n", + " break\n", + "\n", + " # are we at the right end of the tree?\n", + " if key is None:\n", + " raise StopIteration\n", + "\n", + " # go right\n", + " node = read_node(db.get(self.prefix + write_key(key)))\n", + " self.stack.append((key, node))\n", + "\n", + " # go left until at a leaf\n", + " while node[0] > 0:\n", + " key = node[3]\n", + " node = read_node(db.get(self.prefix + write_key(key)))\n", + " self.stack.append((key, node))\n", + "\n", + " if self.end is not None and node[2] >= self.end:\n", + " raise StopIteration\n", + "\n", + " return (node[2], node[3])\n", + "\n", + "class IAVLTreeIterator:\n", + " def __init__(self, db, prefix: str, version: int, format: str, start: list | None = None, end: list | None = None):\n", + " self.format = format\n", + " start_enc = encode_key(format, start) if start is not None else None\n", + " end_enc = encode_key(format, end) if end is not None else None\n", + " self.inner = IAVLTreeIteratorRaw(db, prefix.encode('utf-8'), version, start = start_enc, end = end_enc)\n", + "\n", + " def __iter__(self):\n", + " return self\n", + "\n", + " def __next__(self):\n", + " (k, v) = next(self.inner)\n", + " return (decode_key(self.format, k), v)\n", + "\n", + "def iterate(db, prefix, version, format, field):\n", + " return IAVLTreeIterator(db, prefix, version, format, start = [field], end = [field+1] if field < 255 else None)\n", + "\n", + "# [k for k, v in IAVLTreeIterator(db, 's/k:mint/', height, '')]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "[(k, parse_struct(v)) for k, v in iterate(db, 's/k:emissions/', height, 'Q', 5)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "[k for k, v in IAVLTreeIterator(db, 's/k:emissions/', height, 'Q', start = [5], end = [6])]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "encode_key('Q', [5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "[decode_key('Q', k) for k,v in IAVLTreeIteratorRaw(db, b's/k:emissions/', height, start = bytes([5]), end = bytes([6]))]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "encode_key('', [1]), decode_key('', b'\\x8a')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "decode_key()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "list(range(5, 0, -1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "read_node(db.get(b's/k:emissions/' + write_key((height, 1))))" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "heights = np.arange(5776000, height+1)\n", + "sizes = np.empty(len(heights), dtype = int)\n", + "\n", + "for i in range(len(heights)):\n", + " h = heights[i]\n", + " n = db.get(b's/k:emissions/s' + struct.pack('>Q', h) + struct.pack('>I', 1))\n", + " nd = read_node(n)\n", + " sizes[i] = nd[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.plot(sizes)\n", + "# plt.xlim(0, 1000)\n", + "# plt.ylim(19906000, 19908000)\n", + "plt.grid()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n = db.get(b's/k:emissions/s' + struct.pack('>Q', 5776000) + struct.pack('>I', 1))\n", + "nd = read_node(n)\n", + "# decode_key('Qss', nd[2])\n", + "nd" + ] + }, { "cell_type": "code", "execution_count": 3,