diff --git a/src/btrfs_lookup.rs b/src/btrfs_lookup.rs index 80bef60..32fc9f1 100644 --- a/src/btrfs_lookup.rs +++ b/src/btrfs_lookup.rs @@ -1,7 +1,8 @@ +use std::convert::identity; use std::rc::Rc; -use std::ops::{Deref, RangeBounds}; +use std::ops::{Deref, RangeBounds, Bound}; -use crate::btrfs_structs::{Leaf, Key, Item, InteriorNode, Node, ParseError, ParseBin, Value, Superblock, ItemType}; +use crate::btrfs_structs::{Leaf, Key, Item, InteriorNode, Node, ParseError, ParseBin, Value, Superblock, ItemType, ZERO_KEY, DirItem}; use crate::addrmap::{node_at_log, LogToPhys, AddressMap}; /// represents a B-Tree inside a filesystem image. Can be used to look up keys, @@ -50,32 +51,40 @@ impl Leaf { .map(|x|x.clone()) } - pub fn find_key_or_previous(&self, key: Key) -> Option { + pub fn find_key_or_previous(&self, key: Key) -> Option { self.items .iter() .take_while(|x|x.key <= key) + .enumerate() .last() - .map(|x|x.clone()) + .map(|x|x.0) } } impl InteriorNode { - pub fn find_key_or_previous(&self, key: Key) -> Option { + /// Return the index of the last child which has key at most `key`. This is the + /// branch which contains `key` if it exists. Returns `None` if all children are greater than + /// `key`, which guarantees that `key` is not among the descendants of `self`. + pub fn find_key_or_previous(&self, key: Key) -> Option { self.children .iter() .take_while(|x|x.key <= key) + .enumerate() .last() - .map(|x|x.ptr) + .map(|x|x.0) } } +/// Recursively traverse a tree to find a key, given they key and logical address +/// of the tree root. Internal function, `Tree::find_key` is the public interface. fn find_key_in_node(image: &[u8], addr: &T, root_addr_log: u64, key: Key) -> Result { let node = Node::parse(node_at_log(image, addr, root_addr_log)?)?; match node { Node::Interior(interior_node) => { - let next_node_log = interior_node.find_key_or_previous(key).unwrap(); + let next_node_index = interior_node.find_key_or_previous(key).unwrap(); + let next_node_log = interior_node.children[next_node_index].ptr; find_key_in_node(image, addr, next_node_log, key) }, Node::Leaf(leaf) => { @@ -96,202 +105,223 @@ impl Tree<'_> { /***** iterator *****/ -pub struct RangeIter<'a, R: RangeBounds, F: Fn(Key) -> Key = fn(Key) -> Key> { - tree: &'a Tree<'a>, +pub struct RangeIter<'a, 'b> { + tree: &'b Tree<'a>, - // path to the last returned item - nodes: Vec, - leaf: Option>, - indices: Vec, - - bounds: R, - skip_fn: F, + start: Bound, + end: Bound, + forward_skip_fn: Box Key>, + backward_skip_fn: Box Key>, } -impl Tree<'_> { - pub fn iter<'a>(&'a self) -> RangeIter<'a> { - self.range(None, None) - } - - pub fn range<'a>(&'a self, lower: Option, upper: Option) -> RangeIter<'a> { - RangeIter { - tree: self, - nodes: Vec::new(), - leaf: None, - indices: Vec::new(), // in nodes and leaf - lower_limit: lower, - upper_limit: upper, - skip_fn: |x|x - } - } - - pub fn range_id<'a>(&'a self, id: u64) -> RangeIter<'a> { - if id == u64::MAX { - self.range( - Some(Key::new(id, ItemType::Invalid, 0)), - None - ) - } else { - self.range( - Some(Key::new(id, ItemType::Invalid, 0)), - Some(Key::new(id+1, ItemType::Invalid, 0)) - ) - } - } - - /// given a tree, a range of indices, and two "skip functions", produces a double +impl<'a> Tree<'a> { + /// Given a tree, a range of indices, and two "skip functions", produces a double /// ended iterator which iterates through the keys contained in the range, in ascending /// or descending order. - - /// the skip functions are ignored for now, but are intended as an optimization: + /// + /// The skip functions are ignored for now, but are intended as an optimization: /// after a key `k` was returned by the iterator (or the reverse iterator), all keys /// strictly lower than `forward_skip_fn(k)` are skipped (resp. all keys strictly above - /// `backward_skip_fn` are skipped. - pub fn range_with_skip<'a, R, F>(&'a self, range: R, forward_skip_fn: F, backward_skip_fn: F) -> RangeIter<'a, F> + /// `backward_skip_fn(k)` are skipped. + /// + /// If `forward_skip_fn` and `backward_skip_fn` are the identity, nothing is skipped + pub fn range_with_skip<'b, R, F1, F2>(&'b self, range: R, forward_skip_fn: F1, backward_skip_fn: F2) -> RangeIter<'a, 'b> where R: RangeBounds, - F: Fn(Key) -> Key { + F1: Fn(Key) -> Key + 'static, + F2: Fn(Key) -> Key + 'static { RangeIter { tree: self, - nodes: Vec::new(), - leaf: None, - indices: Vec::new(), + start: range.start_bound().cloned(), + end: range.end_bound().cloned(), + forward_skip_fn: Box::new(forward_skip_fn), + backward_skip_fn: Box::new(backward_skip_fn), + } + } + + pub fn range<'b, R: RangeBounds>(&'b self, range: R) -> RangeIter<'a, 'b> { + RangeIter { + tree: self, + start: range.start_bound().cloned(), + end: range.end_bound().cloned(), + forward_skip_fn: Box::new(identity), + backward_skip_fn: Box::new(identity), + } + } + + + pub fn iter<'b>(&'b self) -> RangeIter<'a, 'b> { + RangeIter { + tree: self, + start: Bound::Unbounded, + end: Bound::Unbounded, + forward_skip_fn: Box::new(identity), + backward_skip_fn: Box::new(identity), } } } -impl Key> RangeIter<'_, F> { - fn move_down_and_get_first_item(&mut self, mut node_addr: u64) -> Option { - loop { - let node = Node::parse(node_at_log(self.tree.image, self.tree.addr_map.deref(), node_addr).ok()?).ok()?; - match node { - Node::Interior(int_node) => { - node_addr = int_node.children.first()?.ptr; - self.nodes.push(int_node); - self.indices.push(0); - }, - Node::Leaf(leaf_node) => { - let result = leaf_node.items.first()?.clone(); - self.leaf = Some(Box::new(leaf_node)); - self.indices.push(0); - return Some(result); - }, - } - } +#[derive(Debug,PartialEq,Eq,Clone,Copy)] +enum FindKeyMode {LT, GT, GE, LE} + +fn get_first_item(tree: &Tree, addr: u64) -> Result { + let node_data = node_at_log(tree.image, tree.addr_map.deref(), addr)?; + match Node::parse(node_data)? { + Node::Interior(intnode) => { + get_first_item(tree, intnode.children[0].ptr) + }, + Node::Leaf(leafnode) => { + Ok(leafnode.items[0].clone()) + }, } +} - fn move_down_and_get_item_or_previous(&mut self, mut node_addr: u64, key: Key) -> Option { - loop { - let node = Node::parse(node_at_log(self.tree.image, self.tree.addr_map.deref(), node_addr).ok()?).ok()?; +fn get_last_item(tree: &Tree, addr: u64) -> Result { + let node_data = node_at_log(tree.image, tree.addr_map.deref(), addr)?; + match Node::parse(node_data)? { + Node::Interior(intnode) => { + get_last_item(tree, intnode.children.last().unwrap().ptr) + }, + Node::Leaf(leafnode) => { + Ok(leafnode.items.last().unwrap().clone()) + }, + } +} - match node { - Node::Interior(int_node) => { - let (i, new_node_ptr) = int_node - .children - .iter() - .enumerate() - .take_while(|(_,bp)|bp.key <= key) - .last()?; +/// Try to find the item with key `key` if it exists in the tree, and return +/// the "closest" match. The exact meaning of "closest" is given by the `mode` argument: +/// If `mode` is `LT`/`GT`/`GE`/`LE`, return the item with the greatest / least / greatest / least +/// key less than / greater than / greater or equal to / less or equal to `key`. +fn find_closest_key(tree: &Tree, key: Key, mode: FindKeyMode) -> Result, ParseError> { - node_addr = new_node_ptr.ptr; - self.nodes.push(int_node); - self.indices.push(i); - }, - Node::Leaf(leaf_node) => { - let (i, result) = leaf_node - .items - .iter() - .enumerate() - .take_while(|(_,item)|item.key <= key) - .last()?; + // in some cases, this task can't be accomplished by a single traversal + // but we might have to go back up the tree; this state allows to quickly go back to the right node + let mut prev: Option = None; + let mut next: Option = None; - let result_cloned = result.clone(); - self.leaf = Some(Box::new(leaf_node)); - self.indices.push(i); - return Some(result_cloned); - }, - } + let mut node_data = node_at_log(tree.image, tree.addr_map.deref(), tree.root_addr_log)?; + + loop { + match Node::parse(node_data)? { + Node::Interior(intnode) => { + match intnode.find_key_or_previous(key) { + Some(idx) => { + if let Some(kp) = (idx > 0).then(|| intnode.children.get(idx-1)).flatten() { + prev = Some(kp.ptr); + } + if let Some(kp) = intnode.children.get(idx+1) { + next = Some(kp.ptr); + } + + node_data = node_at_log(tree.image, tree.addr_map.deref(), intnode.children[idx].ptr)?; + }, + None => { + // this can only happen if every key in the current node is `> key` + // which really should only happen if we're in the root node, as otherwise + // we wouldn't have descended into this branch; so assume every key in the + // tree is above `> key`. + if mode == FindKeyMode::LT || mode == FindKeyMode::LE { + return Ok(None); + } else { + // return the first item in tree; we are an interior node so we really should have + // at least one child + let addr = intnode.children[0].ptr; + return Ok(Some(get_first_item(tree, addr)?)); + } + } + } + }, + Node::Leaf(leafnode) => { + match leafnode.find_key_or_previous(key) { + Some(idx) => { + // the standard case, we found a key `k` with the guarantee that `k <= key` + let Item {key: k, value: v} = leafnode.items[idx].clone(); + + if mode == FindKeyMode::LE || mode == FindKeyMode::LT && k < key || mode == FindKeyMode::GE && k == key { + return Ok(Some(Item {key: k, value: v})) + } else if mode == FindKeyMode::LT && k == key { + // prev + if idx > 0 { + return Ok(Some(leafnode.items[idx-1].clone())); + } else { + // use prev + if let Some(addr) = prev { + return Ok(Some(get_last_item(tree, addr)?)); + } else { + return Ok(None); + } + } + } else { + // next + if let Some(item) = leafnode.items.get(idx+1) { + return Ok(Some(item.clone())); + } else { + // use next + if let Some(addr) = next { + return Ok(Some(get_first_item(tree, addr)?)); + } else { + return Ok(None); + } + } + } + }, + None => { + // same as above, but this can only happen if the root node is a leaf + if mode == FindKeyMode::LT || mode == FindKeyMode::LE { + return Ok(None); + } else { + // return the first item in tree if it exists + return Ok(leafnode.items.get(0).map(|x|x.clone())); + } + }, + } + }, } } } -impl Key> Iterator for RangeIter<'_, F> { +fn range_valid(start: Bound, end: Bound) -> bool { + match (start, end) { + (Bound::Included(x), Bound::Included(y)) => x <= y, + (Bound::Excluded(x), Bound::Included(y)) => x < y, + (Bound::Included(x), Bound::Excluded(y)) => x < y, + (Bound::Excluded(x), Bound::Excluded(y)) => x < y, // could technically be empty if "y = x+1", but we can't check + (_, _) => true, // one of them is unbounded + } +} + +impl<'a, 'b> Iterator for RangeIter<'a, 'b> { type Item = Item; - // for now we just silently stop when we encounter an error, maybe that isn't the best solution fn next(&mut self) -> Option { - if self.leaf.is_none() && self.nodes.len() == 0 { - // first item - // finding the first item is a bit tricky - // if there is a lower limit, the B+ tree only allows us to either find the item - // or the previous one if there is no exact match; in the latter case, go one further - - let result = if let Some(lim) = self.lower_limit { - let first_res = self.move_down_and_get_item_or_previous(self.tree.root_addr_log, lim); - if let Some(item) = first_res { - if item.key == lim { - // found exactly the limit, that's the easy case - Some(item) - } else { - // found a previous item; so we want the next one - self.next() - } - } else { - // did not find an item, so everything must come after lower limit - // just get the first - self.move_down_and_get_first_item(self.tree.root_addr_log) - } - } else { - // there is no lower limit, so also just get the first - self.move_down_and_get_first_item(self.tree.root_addr_log) - }; - - result.filter(|item|self.upper_limit.is_none() || item.key < self.upper_limit.unwrap()) - } else if self.leaf.is_none() { - // already through the iterator + if !range_valid(self.start.as_ref(), self.end.as_ref()) { return None; - } else { - let height = self.indices.len(); // must be at least 1 - let leaf = self.leaf.as_ref().unwrap(); - - self.indices[height-1] += 1; - if let Some(item) = leaf.items.get(self.indices[height-1]) { - // there's a next item in the same leaf - if self.upper_limit.is_none() || item.key < self.upper_limit.unwrap() { - return Some(item.clone()); - } else { - return None; - } - } else if height == 1 { - // the tree has height 1 and we're through the (only) leaf, there's nothing left - return None; - } else { - // try to advance in one of the higher nodes - self.leaf = None; - self.indices.pop(); - let mut level = height - 2; - - // go up until we can move forward in a node - let node_addr = loop { - let node = &self.nodes[level]; - - self.indices[level] += 1; - if let Some(blockptr) = node.children.get(self.indices[level]) { - break blockptr.ptr; - } else { - if level == 0 { - return None; - } - self.indices.pop(); - self.nodes.pop(); - level -= 1; - } - }; - - // first first item under this node - return self.move_down_and_get_first_item(node_addr) - .filter(|item|self.upper_limit.is_none() || item.key < self.upper_limit.unwrap()) - } } + + let (start_key, mode) : (Key, FindKeyMode) = match &self.start { + &Bound::Included(x) => (x, FindKeyMode::GE), + &Bound::Excluded(x) => (x, FindKeyMode::GT), + &Bound::Unbounded => (ZERO_KEY, FindKeyMode::GE), + }; + + // FIX: proper error handling + let result = find_closest_key(self.tree, start_key, mode) + .expect("file system should be consistent (or this is a bug)"); + + if let Some(item) = &result { + self.start = Bound::Excluded(item.key); + } + + let end_filter = |item : &Item| { + match &self.end { + &Bound::Included(x) => item.key <= x, + &Bound::Excluded(x) => item.key < x, + &Bound::Unbounded => true, + } + }; + + result + .filter(end_filter) + .map(|item|item.clone()) } } diff --git a/src/btrfs_structs.rs b/src/btrfs_structs.rs index 9bcc3d6..d2f4c17 100644 --- a/src/btrfs_structs.rs +++ b/src/btrfs_structs.rs @@ -73,6 +73,8 @@ impl Key { } } +pub const ZERO_KEY: Key = Key {key_id: 0, key_type: ItemType::Invalid, key_offset: 0}; + #[allow(unused)] #[derive(Debug,Clone)] pub enum Value {