From 279fea727666f4849209c347a2edefe56e2392fb Mon Sep 17 00:00:00 2001 From: Florian Stecker Date: Sat, 11 Oct 2025 16:37:38 -0400 Subject: [PATCH] clean up the code a little --- iavlread | 133 +++++++++++++++++++++++++++------------------------- iavltree.py | 58 ++++++++++++----------- store.ipynb | 49 +++++++++---------- 3 files changed, 126 insertions(+), 114 deletions(-) diff --git a/iavlread b/iavlread index d2e2139..f623a17 100755 --- a/iavlread +++ b/iavlread @@ -6,7 +6,7 @@ import json def decode_protobuf(subformats: dict, format_prefix: str, data: bytes): result = [] - for (k,v) in iavltree.parse_struct(data): + for (k,v) in iavltree.parse_pb(data): idx = f'{format_prefix}.{k}' if idx in subformats: f = subformats[idx] @@ -43,75 +43,82 @@ def decode_output(format: str, data: bytes) -> str: else: return data -parser = argparse.ArgumentParser(description="Read the IAVL tree in a cosmos snapshot") +def get_args(): + parser = argparse.ArgumentParser(description="Read the IAVL tree in a cosmos snapshot") -parser.add_argument('-d', '--database', help='Path to database (application.db folder)') -parser.add_argument('-H', '--height', type=int, help='Block height') -parser.add_argument('-k', '--keyformat', help='Key format for maps (e.g. Qss)') -parser.add_argument('-v', '--valueformat', help='Value format') + parser.add_argument('-d', '--database', help='Path to database (application.db folder)') + parser.add_argument('-H', '--height', type=int, help='Block height') + parser.add_argument('-k', '--keyformat', help='Key format for maps (e.g. Qss)') + parser.add_argument('-v', '--valueformat', help='Value format') -subparsers = parser.add_subparsers(required=True, dest='cmd') -p_max_height = subparsers.add_parser('max_height', help = 'Get the max block height in the snapshot') -p_get = subparsers.add_parser('get', help = 'Retrieve a single item') -p_get.add_argument('prefix', help = 'Prefix (e.g. "s/k:emissions/")') -p_get.add_argument('key', nargs='+', help = 'Key parts') -p_count = subparsers.add_parser('count', help = 'Count number of items with a prefix') -p_count.add_argument('prefix', help = 'Prefix (e.g. "s/k:emissions/")') -p_count.add_argument('key', nargs='*', help = 'Key parts') -p_iterate = subparsers.add_parser('iterate', help = 'Iterate over items with some prefix') -p_iterate.add_argument('prefix', help = 'Prefix (e.g. "s/k:emissions/")') -p_iterate.add_argument('key', nargs='*', help = 'Key parts') -p_iterate = subparsers.add_parser('iterate_keys', help = 'Iterate over items with some prefix, output keys only') -p_iterate.add_argument('prefix', help = 'Prefix (e.g. "s/k:emissions/")') -p_iterate.add_argument('key', nargs='*', help = 'Key parts') -p_iterate = subparsers.add_parser('iterate_values', help = 'Iterate over items with some prefix, output values only') -p_iterate.add_argument('prefix', help = 'Prefix (e.g. "s/k:emissions/")') -p_iterate.add_argument('key', nargs='*', help = 'Key parts') + subparsers = parser.add_subparsers(required=True, dest='cmd') + p_max_height = subparsers.add_parser('max_height', help = 'Get the max block height in the snapshot') + p_get = subparsers.add_parser('get', help = 'Retrieve a single item') + p_get.add_argument('prefix', help = 'Prefix (e.g. "s/k:emissions/")') + p_get.add_argument('key', nargs='+', help = 'Key parts') + p_count = subparsers.add_parser('count', help = 'Count number of items with a prefix') + p_count.add_argument('prefix', help = 'Prefix (e.g. "s/k:emissions/")') + p_count.add_argument('key', nargs='*', help = 'Key parts') + p_iterate = subparsers.add_parser('iterate', help = 'Iterate over items with some prefix') + p_iterate.add_argument('prefix', help = 'Prefix (e.g. "s/k:emissions/")') + p_iterate.add_argument('key', nargs='*', help = 'Key parts') + p_iterate = subparsers.add_parser('iterate_keys', help = 'Iterate over items with some prefix, output keys only') + p_iterate.add_argument('prefix', help = 'Prefix (e.g. "s/k:emissions/")') + p_iterate.add_argument('key', nargs='*', help = 'Key parts') + p_iterate = subparsers.add_parser('iterate_values', help = 'Iterate over items with some prefix, output values only') + p_iterate.add_argument('prefix', help = 'Prefix (e.g. "s/k:emissions/")') + p_iterate.add_argument('key', nargs='*', help = 'Key parts') -args = parser.parse_args() + return parser.parse_args() -dbpath = args.database if args.database is not None else 'data/application.db' -keyformat = args.keyformat if args.keyformat is not None else '' -valueformat = args.valueformat if args.valueformat is not None else 'b' +def run(args): + dbpath = args.database if args.database is not None else 'data/application.db' + keyformat = args.keyformat if args.keyformat is not None else '' + valueformat = args.valueformat if args.valueformat is not None else 'b' -if args.key is None or len(args.key) == 0: - key = None -else: - if len(args.key) > len(keyformat) + 1: - raise Exception('Too many key elements for keyformat') - key = [int(args.key[0])] - for f, k in zip(keyformat, args.key[1:]): - if f in ['i', 'I', 'q', 'Q']: - key.append(int(k)) - else: - key.append(k) - -with plyvel.DB(dbpath) as db: - if args.height is None or args.cmd == 'max_height': - height = iavltree.max_height(db) + if args.cmd == 'max_height' or args.key is None or len(args.key) == 0: + key = None else: - height = args.height + if len(args.key) > len(keyformat) + 1: + raise Exception('Too many key elements for keyformat') + key = [int(args.key[0])] + for f, k in zip(keyformat, args.key[1:]): + if f in ['i', 'I', 'q', 'Q']: + key.append(int(k)) + else: + key.append(k) - if args.cmd == 'max_height': - print(height) - elif args.cmd == 'get': - result = iavltree.walk_disk(db, args.prefix, height, keyformat, key) + with plyvel.DB(dbpath) as db: + if args.height is None or args.cmd == 'max_height': + height = iavltree.max_height(db) + else: + height = args.height - print(decode_output(valueformat, result)) - elif args.cmd == 'count': - result = iavltree.count(db, args.prefix, height, keyformat, key = key) + if args.cmd == 'max_height': + print(height) + elif args.cmd == 'get': + result = iavltree.get(db, args.prefix, height, keyformat, key) - print(result) - elif args.cmd == 'iterate' or args.cmd == 'iterate_keys' or args.cmd == 'iterate_values': - it = iavltree.iterate(db, args.prefix, height, keyformat, key = key) + if result is not None: + print(decode_output(valueformat, result)) + elif args.cmd == 'count': + result = iavltree.count(db, args.prefix, height, keyformat, key = key) - try: - for k, v in it: - if args.cmd == 'iterate_keys': - print(k) - elif args.cmd == 'iterate_values': - print(decode_output(valueformat,v)) - else: - print((k, decode_output(valueformat, v))) - except BrokenPipeError: - pass + print(result) + elif args.cmd == 'iterate' or args.cmd == 'iterate_keys' or args.cmd == 'iterate_values': + it = iavltree.iterate(db, args.prefix, height, keyformat, key = key) + + try: + for k, v in it: + if args.cmd == 'iterate_keys': + print(k) + elif args.cmd == 'iterate_values': + print(decode_output(valueformat,v)) + else: + print((k, decode_output(valueformat, v))) + except BrokenPipeError: + pass + +if __name__ == '__main__': + args = get_args() + run(args) diff --git a/iavltree.py b/iavltree.py index 967df21..f0760f1 100644 --- a/iavltree.py +++ b/iavltree.py @@ -3,7 +3,7 @@ import struct import numpy as np # functions for reading IAVL tree -def read_varint(x: bytes, offset: int = 0) -> int: +def read_varint(x: bytes, offset: int = 0) -> tuple[int, int]: result = 0 factor = 1 @@ -15,7 +15,7 @@ def read_varint(x: bytes, offset: int = 0) -> int: return result // 2, offset+i+1 factor *= 128 -def read_uvarint(x: bytes, offset: int = 0) -> int: +def read_uvarint(x: bytes, offset: int = 0) -> tuple[int, int]: result = 0 factor = 1 @@ -27,6 +27,20 @@ def read_uvarint(x: bytes, offset: int = 0) -> int: return result, offset+i+1 factor *= 128 +def write_uvarint(x: int) -> list[int]: + if x < 0: + raise Exception('write_uvarint only supports positive integers') + elif x == 0: + return [0] + + result = [] + while x > 0: + result.append(128 + x % 128) + x //= 128 + result[-1] -= 128 + return result + + def read_key(key: bytes) -> tuple[int, int] | None: if not key.startswith(b's'): return None @@ -73,26 +87,7 @@ def read_node(node: bytes) -> tuple[int, int, bytes, tuple[int, int], tuple[int, return (height, length, key, (left_version, left_nonce), (right_version, right_nonce)) -def walk(tree, version, searchkey): - if (version, 1) not in tree: - return None - - node = tree[(version, 1)] - if len(node) == 2: # root copy? - node = tree[node] - - while node[0] > 0: - nodekey = node[2] - if searchkey < nodekey: - next = node[3] - else: - next = node[4] - - node = tree[next] - - return node[3] - -def walk_disk_raw(db, prefix: bytes, version: int, searchkey: bytes) -> None | bytes: +def get_raw(db, prefix: bytes, version: int, searchkey: bytes) -> None | bytes: root = db.get(prefix + write_key((version, 1))) if root is None: return None @@ -118,7 +113,7 @@ def walk_disk_raw(db, prefix: bytes, version: int, searchkey: bytes) -> None | b else: return None -def walk_disk_next_key_raw(db, prefix: bytes, version: int, searchkey: bytes) -> None | bytes: +def get_next_key_raw(db, prefix: bytes, version: int, searchkey: bytes) -> None | bytes: root = db.get(prefix + write_key((version, 1))) if root is None: return None @@ -144,10 +139,10 @@ def walk_disk_next_key_raw(db, prefix: bytes, version: int, searchkey: bytes) -> return lowest_geq_key -def walk_disk(db, prefix: str, version: int, format: str, searchkey: list) -> None | bytes: - return walk_disk_raw(db, prefix.encode('utf-8'), version, encode_key(format, searchkey)) +def get(db, prefix: str, version: int, format: str, searchkey: list) -> None | bytes: + return get_raw(db, prefix.encode('utf-8'), version, encode_key(format, searchkey)) -def parse_struct(data): +def parse_pb(data): n = 0 results = [] @@ -215,6 +210,10 @@ def encode_key(format: str, key: list) -> bytes: result_bytes += list(struct.pack('>Q', key[i+1])) elif f == 'q': result_bytes += list(struct.pack('>Q', key[i+1] + (1<<63))) + elif f == 'b': + data = list(bytes.fromhex(key[i+1])) + result_bytes += write_uvarint(len(data)) + result_bytes += data return bytes(result_bytes) @@ -242,6 +241,11 @@ def decode_key(format: str, key: bytes) -> list: v = struct.unpack('>Q', key[idx:idx+8])[0] result.append(v - (1<<63)) idx += 8 + elif f == 'b': + length, offset = read_uvarint(key[idx:]) + data = key[idx+offset:idx+offset+length] + result.append(data.hex().upper()) + idx += offset + length if idx < len(key): result.append(key[idx:]) @@ -397,7 +401,7 @@ def indexof_raw(db, prefix: bytes, version: int, key: bytes) -> int: next(it) except StopIteration: # get root count - return read_node(db.get(prefix + write_key(it.stack[0][0])))[1] + return it.stack[0][1][1] keys = [p[1][3] for p, c in zip(it.stack, it.stack[1:]) if c[0] == p[1][4]] keys_encoded = [prefix + write_key(k) for k in keys] diff --git a/store.ipynb b/store.ipynb index 43210f2..cc10ef1 100644 --- a/store.ipynb +++ b/store.ipynb @@ -2,14 +2,13 @@ "cells": [ { "cell_type": "code", - "execution_count": 168, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import plyvel\n", "from itertools import islice\n", - "\n", - "%run -i read_tree.py" + "import iavltree" ] }, { @@ -18,8 +17,8 @@ "metadata": {}, "outputs": [], "source": [ - "db = plyvel.DB('../node/nodedir/data/application.db')\n", - "height = max_height(db)\n", + "# db = plyvel.DB('../node/nodedir/data/application.db')\n", + "height = iavltree.max_height(db)\n", "height" ] }, @@ -29,9 +28,7 @@ "metadata": {}, "outputs": [], "source": [ - "it = iterate(db, 's/k:mint/', height)\n", - "[k for k, v in it]\n", - "it.inner.lookups" + "[k for k, v in iavltree.iterate(db, 's/k:mint/', height)]" ] }, { @@ -40,7 +37,7 @@ "metadata": {}, "outputs": [], "source": [ - "dict(parse_struct(next(iterate(db, 's/k:mint/', height, key = [138]))[1]))" + "dict(iavltree.parse_pb(next(iavltree.iterate(db, 's/k:mint/', height, key = [138]))[1]))" ] }, { @@ -49,19 +46,10 @@ "metadata": {}, "outputs": [], "source": [ - "it = iterate(db, 's/k:emissions/', height, key = [62, 64], format = 'Qss')\n", - "ooiiregrets = [(k[2],k[3],value[1],float(value[2])) for k,v in it for value in (dict(parse_struct(v)),)]\n", + "it = iavltree.iterate(db, 's/k:emissions/', height, key = [62, 64], format = 'Qss')\n", + "ooiiregrets = [(k[2],k[3],value[1],float(value[2])) for k,v in it for value in (dict(iavltree.parse_pb(v)),)]\n", "\n", - "len(ooiiregrets), it.inner.lookups" - ] - }, - { - "cell_type": "code", - "execution_count": 181, - "metadata": {}, - "outputs": [], - "source": [ - "keynames = {0: \"Params\", 1: \"TotalStake\", 2: \"TopicStake\", 3: \"Rewards\", 4: \"NextTopicId\", 5: \"Topics\", 6: \"TopicWorkers\", 7: \"TopicReputers\", 8: \"DelegatorStake\", 9: \"DelegateStakePlacement\", 10: \"TargetStake\", 11: \"Inferences\", 12: \"Forecasts\", 13: \"WorkerNodes\", 14: \"ReputerNodes\", 15: \"LatestInferencesTs\", 16: \"ActiveTopics\", 17: \"AllInferences\", 18: \"AllForecasts\", 19: \"AllLossBundles\", 20: \"StakeRemoval\", 21: \"StakeByReputerAndTopicId\", 22: \"DelegateStakeRemoval\", 23: \"AllTopicStakeSum\", 24: \"AddressTopics\", 24: \"WhitelistAdmins\", 25: \"ChurnableTopics\", 26: \"RewardableTopics\", 27: \"NetworkLossBundles\", 28: \"NetworkRegrets\", 29: \"StakeByReputerAndTopicId\", 30: \"ReputerScores\", 31: \"InferenceScores\", 32: \"ForecastScores\", 33: \"ReputerListeningCoefficient\", 34: \"InfererNetworkRegrets\", 35: \"ForecasterNetworkRegrets\", 36: \"OneInForecasterNetworkRegrets\", 37: \"OneInForecasterSelfNetworkRegrets\", 38: \"UnfulfilledWorkerNonces\", 39: \"UnfulfilledReputerNonces\", 40: \"FeeRevenueEpoch\", 41: \"TopicFeeRevenue\", 42: \"PreviousTopicWeight\", 43: \"PreviousReputerRewardFraction\", 44: \"PreviousInferenceRewardFraction\", 45: \"PreviousForecastRewardFraction\", 46: \"InfererScoreEmas\", 47: \"ForecasterScoreEmas\", 48: \"ReputerScoreEmas\", 49: \"TopicRewardNonce\", 50: \"DelegateRewardPerShare\", 51: \"PreviousPercentageRewardToStakedReputers\", 52: \"StakeRemovalsByBlock\", 53: \"DelegateStakeRemovalsByBlock\", 54: \"StakeRemovalsByActor\", 55: \"DelegateStakeRemovalsByActor\", 56: \"TopicLastWorkerCommit\", 57: \"TopicLastReputerCommit\", 58: \"TopicLastWorkerPayload\", 59: \"TopicLastReputerPayload\", 60: \"OpenWorkerWindows\", 61: \"LatestNaiveInfererNetworkRegrets\", 62: \"LatestOneOutInfererInfererNetworkRegrets\", 63: \"LatestOneOutInfererForecasterNetworkRegrets\", 64: \"LatestOneOutForecasterInfererNetworkRegrets\", 65: \"LatestOneOutForecasterForecasterNetworkRegrets\", 66: \"PreviousForecasterScoreRatio\", 67: \"LastDripBlock\", 68: \"TopicToNextPossibleChurningBlock\", 69: \"BlockToActiveTopics\", 70: \"BlockToLowestActiveTopicWeight\", 71: \"PreviousTopicQuantileInfererScoreEma\", 72: \"PreviousTopicQuantileForecasterScoreEma\", 73: \"PreviousTopicQuantileReputerScoreEma\", 74: \"CountInfererInclusionsInTopic\", 75: \"CountForecasterInclusionsInTopic\", 76: \"ActiveInferers\", 77: \"ActiveForecasters\", 78: \"ActiveReputers\", 79: \"LowestInfererScoreEma\", 80: \"LowestForecasterScoreEma\", 81: \"LowestReputerScoreEma\", 82: \"LossBundles\", 83: \"TotalSumPreviousTopicWeights\", 84: \"RewardCurrentBlockEmission\", 85: \"GlobalWhitelist\", 86: \"TopicCreatorWhitelist\", 87: \"TopicWorkerWhitelist\", 88: \"TopicReputerWhitelist\", 89: \"TopicWorkerWhitelistEnabled\", 90: \"TopicReputerWhitelistEnabled\", 91: \"LastMedianInferences\", 92: \"MadInferences\", 93: \"InitialInfererEmaScore\", 94: \"InitialForecasterEmaScore\", 95: \"InitialReputerEmaScore\", 96: \"GlobalWorkerWhitelist\", 97: \"GlobalReputerWhitelist\", 98: \"GlobalAdminWhitelist\", 99: \"LatestRegretStdNorm\", 100: \"LatestInfererWeights\", 101: \"LatestForecasterWeights\", 102: \"NetworkInferences\", 103: \"OutlierResistantNetworkInferences\", 104: \"MonthlyReputerRewards\", 105: \"MonthlyTopicRewards\",}" + "len(ooiiregrets), len(it.inner.lookups)" ] }, { @@ -70,15 +58,15 @@ "metadata": {}, "outputs": [], "source": [ + "import numpy as np\n", + "keynames = {0: \"Params\", 1: \"TotalStake\", 2: \"TopicStake\", 3: \"Rewards\", 4: \"NextTopicId\", 5: \"Topics\", 6: \"TopicWorkers\", 7: \"TopicReputers\", 8: \"DelegatorStake\", 9: \"DelegateStakePlacement\", 10: \"TargetStake\", 11: \"Inferences\", 12: \"Forecasts\", 13: \"WorkerNodes\", 14: \"ReputerNodes\", 15: \"LatestInferencesTs\", 16: \"ActiveTopics\", 17: \"AllInferences\", 18: \"AllForecasts\", 19: \"AllLossBundles\", 20: \"StakeRemoval\", 21: \"StakeByReputerAndTopicId\", 22: \"DelegateStakeRemoval\", 23: \"AllTopicStakeSum\", 24: \"AddressTopics\", 24: \"WhitelistAdmins\", 25: \"ChurnableTopics\", 26: \"RewardableTopics\", 27: \"NetworkLossBundles\", 28: \"NetworkRegrets\", 29: \"StakeByReputerAndTopicId\", 30: \"ReputerScores\", 31: \"InferenceScores\", 32: \"ForecastScores\", 33: \"ReputerListeningCoefficient\", 34: \"InfererNetworkRegrets\", 35: \"ForecasterNetworkRegrets\", 36: \"OneInForecasterNetworkRegrets\", 37: \"OneInForecasterSelfNetworkRegrets\", 38: \"UnfulfilledWorkerNonces\", 39: \"UnfulfilledReputerNonces\", 40: \"FeeRevenueEpoch\", 41: \"TopicFeeRevenue\", 42: \"PreviousTopicWeight\", 43: \"PreviousReputerRewardFraction\", 44: \"PreviousInferenceRewardFraction\", 45: \"PreviousForecastRewardFraction\", 46: \"InfererScoreEmas\", 47: \"ForecasterScoreEmas\", 48: \"ReputerScoreEmas\", 49: \"TopicRewardNonce\", 50: \"DelegateRewardPerShare\", 51: \"PreviousPercentageRewardToStakedReputers\", 52: \"StakeRemovalsByBlock\", 53: \"DelegateStakeRemovalsByBlock\", 54: \"StakeRemovalsByActor\", 55: \"DelegateStakeRemovalsByActor\", 56: \"TopicLastWorkerCommit\", 57: \"TopicLastReputerCommit\", 58: \"TopicLastWorkerPayload\", 59: \"TopicLastReputerPayload\", 60: \"OpenWorkerWindows\", 61: \"LatestNaiveInfererNetworkRegrets\", 62: \"LatestOneOutInfererInfererNetworkRegrets\", 63: \"LatestOneOutInfererForecasterNetworkRegrets\", 64: \"LatestOneOutForecasterInfererNetworkRegrets\", 65: \"LatestOneOutForecasterForecasterNetworkRegrets\", 66: \"PreviousForecasterScoreRatio\", 67: \"LastDripBlock\", 68: \"TopicToNextPossibleChurningBlock\", 69: \"BlockToActiveTopics\", 70: \"BlockToLowestActiveTopicWeight\", 71: \"PreviousTopicQuantileInfererScoreEma\", 72: \"PreviousTopicQuantileForecasterScoreEma\", 73: \"PreviousTopicQuantileReputerScoreEma\", 74: \"CountInfererInclusionsInTopic\", 75: \"CountForecasterInclusionsInTopic\", 76: \"ActiveInferers\", 77: \"ActiveForecasters\", 78: \"ActiveReputers\", 79: \"LowestInfererScoreEma\", 80: \"LowestForecasterScoreEma\", 81: \"LowestReputerScoreEma\", 82: \"LossBundles\", 83: \"TotalSumPreviousTopicWeights\", 84: \"RewardCurrentBlockEmission\", 85: \"GlobalWhitelist\", 86: \"TopicCreatorWhitelist\", 87: \"TopicWorkerWhitelist\", 88: \"TopicReputerWhitelist\", 89: \"TopicWorkerWhitelistEnabled\", 90: \"TopicReputerWhitelistEnabled\", 91: \"LastMedianInferences\", 92: \"MadInferences\", 93: \"InitialInfererEmaScore\", 94: \"InitialForecasterEmaScore\", 95: \"InitialReputerEmaScore\", 96: \"GlobalWorkerWhitelist\", 97: \"GlobalReputerWhitelist\", 98: \"GlobalAdminWhitelist\", 99: \"LatestRegretStdNorm\", 100: \"LatestInfererWeights\", 101: \"LatestForecasterWeights\", 102: \"NetworkInferences\", 103: \"OutlierResistantNetworkInferences\", 104: \"MonthlyReputerRewards\", 105: \"MonthlyTopicRewards\",}\n", "lens = np.zeros(256, dtype = int)\n", "\n", "for field in range(255):\n", - " lens[field] = count(db, 's/k:emissions/', height, key = [field])\n", + " lens[field] = iavltree.count(db, 's/k:emissions/', height, key = [field])\n", "\n", "order = np.lexsort((np.arange(256)[::-1], lens))[::-1]\n", "\n", - "print('Map lengths:')\n", - "\n", "for i in range(len(order)):\n", " if lens[order[i]] == 0 and order[i] not in keynames:\n", " break\n", @@ -122,6 +110,19 @@ "\n", "# found" ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# allora testnet module addresses\n", + "# mod allorapendingrewards 54C6D62FF29ECFEE9A5F0366DEC0F9CB44C10BB4\n", + "# mod allorarewards F3CA54C42E5B7DC7CB2A347B21E77AC248D914D2\n", + "# mod allorastaking 3C19B4642DA1C2DBB7E44679FA48F72FD9A97E5E\n", + "# mod ecosystem 570DD38DC5BAF3112A7C83A420ED399A8E59C5FC" + ] } ], "metadata": {