diff --git a/perftest.py b/perftest.py new file mode 100755 index 0000000..2d2de9a --- /dev/null +++ b/perftest.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +import plyvel +import read_tree +from tqdm import tqdm + +with plyvel.DB('../node/nodedir/data/application.db') as db: + height = read_tree.max_height(db) + + total = read_tree.count(db, 's/k:emissions/', height, 'Qss', key = [62]) + progress = tqdm(total = total) + + it = read_tree.iterate(db, 's/k:emissions/', height, 'Qss', key = [62]) + + for k, v in it: + progress.update(1) + + progress.close() + + keys = it.inner.lookups + +print(f'Number of items: {total}') +print(f'Lookups needed: {len(keys)}') + +with plyvel.DB('../node/nodedir/data/application.db') as db: + progress = tqdm(total = len(keys)) + for k in keys: + db.get(k) + progress.update(1) + progress.close() diff --git a/read_tree.py b/read_tree.py index cc66eab..967df21 100644 --- a/read_tree.py +++ b/read_tree.py @@ -43,7 +43,6 @@ def write_key(key: tuple[int, int]) -> bytes: return b's' + version + nonce def read_node(node: bytes) -> tuple[int, int, bytes, tuple[int, int], tuple[int, int]] | tuple[int, int, list[int], bytes] | tuple[int, int]: - if node.startswith(b's'): return read_key(node) @@ -94,7 +93,6 @@ def walk(tree, version, searchkey): return node[3] def walk_disk_raw(db, prefix: bytes, version: int, searchkey: bytes) -> None | bytes: - root = db.get(prefix + write_key((version, 1))) if root is None: return None @@ -259,19 +257,25 @@ class IAVLTreeIteratorRaw: self.start = start self.end = end self.stack = [] + self.lookups = [] def __iter__(self): return self + def get_node(self, key): + key_enc = self.prefix + write_key(key) + self.lookups.append(key_enc) + return self.db.get(key_enc) + def __next__(self): if len(self.stack) == 0: # get root node - root = db.get(self.prefix + write_key((self.version, 1))) + root = self.get_node((self.version, 1)) if root is None: raise StopIteration node = read_node(root) if len(node) == 2: # link to other root node - node = read_node(db.get(self.prefix + write_key(node))) + node = read_node(self.get_node(node)) self.stack.append(((self.version, 1), node)) # walk tree to either last before start or first after start @@ -282,7 +286,7 @@ class IAVLTreeIteratorRaw: next = node[3] else: next = node[4] - node = read_node(db.get(self.prefix + write_key(next))) + node = read_node(self.get_node(next)) self.stack.append((next, node)) # return early if we ended up at first item after start @@ -308,13 +312,13 @@ class IAVLTreeIteratorRaw: raise StopIteration # go right - node = read_node(db.get(self.prefix + write_key(key))) + node = read_node(self.get_node(key)) self.stack.append((key, node)) # go left until at a leaf while node[0] > 0: key = node[3] - node = read_node(db.get(self.prefix + write_key(key))) + node = read_node(self.get_node(key)) self.stack.append((key, node)) if self.end is not None and node[2] >= self.end: @@ -323,11 +327,9 @@ class IAVLTreeIteratorRaw: return (node[2], node[3]) class IAVLTreeIterator: - def __init__(self, db, prefix: str, version: int, format: str, start: list | None = None, end: list | None = None): + def __init__(self, db, prefix: bytes, version: int, format: str, start: bytes | None = None, end: bytes | None = None): self.format = format - start_enc = encode_key(format, start) if start is not None else None - end_enc = encode_key(format, end) if end is not None else None - self.inner = IAVLTreeIteratorRaw(db, prefix.encode('utf-8'), version, start = start_enc, end = end_enc) + self.inner = IAVLTreeIteratorRaw(db, prefix, version, start, end) def __iter__(self): return self @@ -336,15 +338,58 @@ class IAVLTreeIterator: (k, v) = next(self.inner) return (decode_key(self.format, k), v) -def iterate(db, prefix, version, format = '', field = None, start = None, end = None): - if field is not None: - return IAVLTreeIterator(db, prefix, version, format, start = [field], end = [field+1] if field < 255 else None) +def next_bs(x: bytes) -> bytes | None: + if len(x) == 0: + return None + + x_enc = None + for i in range(len(x),0,-1): + if x[i-1] != 255: + x_enc = x[:i-1] + bytes([x[i-1] + 1]) + bytes([0 for _ in range(len(x)-i)]) + break + + return x_enc + +def iterate(db, prefix, version, format = '', key = None, start = None, end = None): + prefix_enc = prefix.encode('utf-8') + + if key is not None: + start_enc = encode_key(format, key) + end_enc = next_bs(start_enc) else: - return IAVLTreeIterator(db, prefix, version, format, start = start, end = end) + start_enc = encode_key(format, start) if start is not None else None + end_enc = encode_key(format, end) if end is not None else None + + return IAVLTreeIterator(db, prefix_enc, version, format, start = start_enc, end = end_enc) + +def count(db, prefix, version, format = '', key = None, start = None, end = None): + prefix_enc = prefix.encode('utf-8') + + if key is not None: + start_enc = encode_key(format, key) + end_enc = next_bs(start_enc) + else: + start_enc = encode_key(format, start) if start is not None else None + end_enc = encode_key(format, end) if end is not None else None + + startidx = indexof_raw(db, prefix_enc, version, start_enc) if start_enc is not None else 0 + + if end_enc is not None: + endidx = indexof_raw(db, prefix_enc, version, end_enc) + else: + # get full count + it = IAVLTreeIteratorRaw(db, prefix_enc, version) + try: + next(it) + endidx = it.stack[0][1][1] # just read the length field of the root element + except StopIteration: + endidx = 0 + + return endidx - startidx def indexof_raw(db, prefix: bytes, version: int, key: bytes) -> int: """ - Find how many items come before `key` in the tree. If `key` doesn't exist, how many + Find how many items come before `key` in the tree. If `key` doesn't exist, how many items come before the slot it would get inserted at """ it = IAVLTreeIteratorRaw(db, prefix, version, start=key) @@ -353,7 +398,7 @@ def indexof_raw(db, prefix: bytes, version: int, key: bytes) -> int: except StopIteration: # get root count return read_node(db.get(prefix + write_key(it.stack[0][0])))[1] - + keys = [p[1][3] for p, c in zip(it.stack, it.stack[1:]) if c[0] == p[1][4]] keys_encoded = [prefix + write_key(k) for k in keys] count = sum([read_node(db.get(k))[1] for k in keys_encoded]) diff --git a/store.ipynb b/store.ipynb index 16ea945..43210f2 100644 --- a/store.ipynb +++ b/store.ipynb @@ -2,11 +2,12 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 168, "metadata": {}, "outputs": [], "source": [ "import plyvel\n", + "from itertools import islice\n", "\n", "%run -i read_tree.py" ] @@ -17,8 +18,9 @@ "metadata": {}, "outputs": [], "source": [ - "# db = plyvel.DB('../testnode/nodedir/data/application.db')\n", - "max_height(db)" + "db = plyvel.DB('../node/nodedir/data/application.db')\n", + "height = max_height(db)\n", + "height" ] }, { @@ -27,7 +29,9 @@ "metadata": {}, "outputs": [], "source": [ - "[k for k, v in iterate(db, 's/k:mint/', 5224815)]" + "it = iterate(db, 's/k:mint/', height)\n", + "[k for k, v in it]\n", + "it.inner.lookups" ] }, { @@ -36,7 +40,7 @@ "metadata": {}, "outputs": [], "source": [ - "parse_struct(next(iterate(db, 's/k:mint/', 5224815, field = 138))[1])" + "dict(parse_struct(next(iterate(db, 's/k:mint/', height, key = [138]))[1]))" ] }, { @@ -45,7 +49,19 @@ "metadata": {}, "outputs": [], "source": [ - "[k for k,v in iterate(db, 's/k:emissions/', 5224815, start = [62, 60], end = [62, 61], format = 'Qss')]" + "it = iterate(db, 's/k:emissions/', height, key = [62, 64], format = 'Qss')\n", + "ooiiregrets = [(k[2],k[3],value[1],float(value[2])) for k,v in it for value in (dict(parse_struct(v)),)]\n", + "\n", + "len(ooiiregrets), it.inner.lookups" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [], + "source": [ + "keynames = {0: \"Params\", 1: \"TotalStake\", 2: \"TopicStake\", 3: \"Rewards\", 4: \"NextTopicId\", 5: \"Topics\", 6: \"TopicWorkers\", 7: \"TopicReputers\", 8: \"DelegatorStake\", 9: \"DelegateStakePlacement\", 10: \"TargetStake\", 11: \"Inferences\", 12: \"Forecasts\", 13: \"WorkerNodes\", 14: \"ReputerNodes\", 15: \"LatestInferencesTs\", 16: \"ActiveTopics\", 17: \"AllInferences\", 18: \"AllForecasts\", 19: \"AllLossBundles\", 20: \"StakeRemoval\", 21: \"StakeByReputerAndTopicId\", 22: \"DelegateStakeRemoval\", 23: \"AllTopicStakeSum\", 24: \"AddressTopics\", 24: \"WhitelistAdmins\", 25: \"ChurnableTopics\", 26: \"RewardableTopics\", 27: \"NetworkLossBundles\", 28: \"NetworkRegrets\", 29: \"StakeByReputerAndTopicId\", 30: \"ReputerScores\", 31: \"InferenceScores\", 32: \"ForecastScores\", 33: \"ReputerListeningCoefficient\", 34: \"InfererNetworkRegrets\", 35: \"ForecasterNetworkRegrets\", 36: \"OneInForecasterNetworkRegrets\", 37: \"OneInForecasterSelfNetworkRegrets\", 38: \"UnfulfilledWorkerNonces\", 39: \"UnfulfilledReputerNonces\", 40: \"FeeRevenueEpoch\", 41: \"TopicFeeRevenue\", 42: \"PreviousTopicWeight\", 43: \"PreviousReputerRewardFraction\", 44: \"PreviousInferenceRewardFraction\", 45: \"PreviousForecastRewardFraction\", 46: \"InfererScoreEmas\", 47: \"ForecasterScoreEmas\", 48: \"ReputerScoreEmas\", 49: \"TopicRewardNonce\", 50: \"DelegateRewardPerShare\", 51: \"PreviousPercentageRewardToStakedReputers\", 52: \"StakeRemovalsByBlock\", 53: \"DelegateStakeRemovalsByBlock\", 54: \"StakeRemovalsByActor\", 55: \"DelegateStakeRemovalsByActor\", 56: \"TopicLastWorkerCommit\", 57: \"TopicLastReputerCommit\", 58: \"TopicLastWorkerPayload\", 59: \"TopicLastReputerPayload\", 60: \"OpenWorkerWindows\", 61: \"LatestNaiveInfererNetworkRegrets\", 62: \"LatestOneOutInfererInfererNetworkRegrets\", 63: \"LatestOneOutInfererForecasterNetworkRegrets\", 64: \"LatestOneOutForecasterInfererNetworkRegrets\", 65: \"LatestOneOutForecasterForecasterNetworkRegrets\", 66: \"PreviousForecasterScoreRatio\", 67: \"LastDripBlock\", 68: \"TopicToNextPossibleChurningBlock\", 69: \"BlockToActiveTopics\", 70: \"BlockToLowestActiveTopicWeight\", 71: \"PreviousTopicQuantileInfererScoreEma\", 72: \"PreviousTopicQuantileForecasterScoreEma\", 73: \"PreviousTopicQuantileReputerScoreEma\", 74: \"CountInfererInclusionsInTopic\", 75: \"CountForecasterInclusionsInTopic\", 76: \"ActiveInferers\", 77: \"ActiveForecasters\", 78: \"ActiveReputers\", 79: \"LowestInfererScoreEma\", 80: \"LowestForecasterScoreEma\", 81: \"LowestReputerScoreEma\", 82: \"LossBundles\", 83: \"TotalSumPreviousTopicWeights\", 84: \"RewardCurrentBlockEmission\", 85: \"GlobalWhitelist\", 86: \"TopicCreatorWhitelist\", 87: \"TopicWorkerWhitelist\", 88: \"TopicReputerWhitelist\", 89: \"TopicWorkerWhitelistEnabled\", 90: \"TopicReputerWhitelistEnabled\", 91: \"LastMedianInferences\", 92: \"MadInferences\", 93: \"InitialInfererEmaScore\", 94: \"InitialForecasterEmaScore\", 95: \"InitialReputerEmaScore\", 96: \"GlobalWorkerWhitelist\", 97: \"GlobalReputerWhitelist\", 98: \"GlobalAdminWhitelist\", 99: \"LatestRegretStdNorm\", 100: \"LatestInfererWeights\", 101: \"LatestForecasterWeights\", 102: \"NetworkInferences\", 103: \"OutlierResistantNetworkInferences\", 104: \"MonthlyReputerRewards\", 105: \"MonthlyTopicRewards\",}" ] }, { @@ -56,14 +72,18 @@ "source": [ "lens = np.zeros(256, dtype = int)\n", "\n", - "with plyvel.DB('../testnode/nodedir/data/application.db') as db:\n", - " height = max_height(db)\n", - " for field in range(255):\n", - " count1 = indexof(db, 's/k:emissions/', height, '', [field])\n", - " count2 = indexof(db, 's/k:emissions/', height, '', [field+1])\n", - " lens[field] = count2 - count1\n", + "for field in range(255):\n", + " lens[field] = count(db, 's/k:emissions/', height, key = [field])\n", "\n", - "np.argsort(lens)" + "order = np.lexsort((np.arange(256)[::-1], lens))[::-1]\n", + "\n", + "print('Map lengths:')\n", + "\n", + "for i in range(len(order)):\n", + " if lens[order[i]] == 0 and order[i] not in keynames:\n", + " break\n", + "\n", + " print(f'{keynames[order[i]]:50} {lens[order[i]]:9d}')" ] }, {