From d5f7ff4c14872f2bf06ceeea1c3d4d38fe267516 Mon Sep 17 00:00:00 2001 From: Florian Stecker Date: Fri, 15 Sep 2023 09:38:20 -0400 Subject: [PATCH] lots of changes everywhere --- .gitignore | 1 + Cargo.toml | 3 - circle.png | Bin 0 -> 578 bytes src/addrmap.rs | 43 ++++-- src/btrfs_lookup.rs | 275 +++++++++++++++++++++++++++++++++ src/btrfs_structs.rs | 260 ++++++++++++++++++++----------- src/lib.rs | 8 + src/main.rs | 357 +++++++++++++++++++++++++++++++++++-------- src/test.rs | 10 ++ src/util.rs | 7 + 10 files changed, 799 insertions(+), 165 deletions(-) create mode 100644 circle.png create mode 100644 src/btrfs_lookup.rs create mode 100644 src/lib.rs create mode 100644 src/test.rs create mode 100644 src/util.rs diff --git a/.gitignore b/.gitignore index ea8c4bf..96ef6c0 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml index c6119aa..c0425a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,10 +3,7 @@ name = "parsebtrfs" version = "0.1.0" edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] -anyhow = "1.0.72" binparse_derive = { path = "../binparse_derive" } memmap2 = "0.7.1" rouille = "3.6.2" diff --git a/circle.png b/circle.png new file mode 100644 index 0000000000000000000000000000000000000000..47fe5102d690ad4601101a7fd0479e835bde6716 GIT binary patch literal 578 zcmV-I0=@l-P)EX>4Tx04R}tkv&MmKpe$iTct%RB6cX^kfA!+#V;Id6^c+H)C#RSm|Xe=O&XFE z7e~Rh;NZt%)xpJCR|i)?5c~jfb#YR3krMxx6k5c1aNLh~_a1le0DrT}RI?`msG4PD zQb{3~UloF{2w@NbL@_NfQ=b#XG(5-GJ$!t6^ClDu?Zdk+{#50?g z&Uv3W!pf3Dd`>)W&;^Mfxh}i>#<}FMpJzslY-XM~LM)WJSngt0HdNv%;+Udpl<&{E ztZ?4qtXAu+eNX0S*p< zu@Yskd%Qc;-P^xs+Wq|ip-XbPg?b;-00006VoOIv0RI600RN!9r;`8x010qNS#tmY zE+YT{E+YYWr9XB6000McNliru=Liu494OAjv0nfH06$4YK~xyiV_={e`2YXE{Qv*| zC;k8b{|yvR`v3pGJT`@s{$qkk*c86OqVNq&o{3neAuC*lMSdCio}n550Gf+haUbDE Q=l}o!07*qoM6N<$f~~RpBme*a literal 0 HcmV?d00001 diff --git a/src/addrmap.rs b/src/addrmap.rs index ad0e3d4..5f9b827 100644 --- a/src/addrmap.rs +++ b/src/addrmap.rs @@ -1,7 +1,10 @@ -use crate::btrfs_structs::{ParseBin, Key, ChunkItem, Leaf, Value, Superblock, LogToPhys, ParseError}; +use std::rc::Rc; -#[derive(Debug)] -pub struct AddressMap(Vec<(u64,u64,Vec<(u64,u64)>)>); +use crate::btrfs_structs::{ParseBin, Key, ChunkItem, Value, Superblock, ParseError, NODE_SIZE}; +use crate::btrfs_lookup::Tree; + +#[derive(Debug, Clone)] +pub struct AddressMap(pub Vec<(u64,u64,Vec<(u64,u64)>)>); // TODO: support for internal nodes, multiple devices? impl AddressMap { @@ -9,16 +12,15 @@ impl AddressMap { let superblock = Superblock::parse(&image[0x10000..])?; let bootstrap_addr = AddressMap::from_superblock(&superblock)?; - let chunk_root_log = superblock.chunk_root; - println!("Chunk Tree Root Logical Address: {:016x}", chunk_root_log); - - let chunk_root_phys = bootstrap_addr.to_phys(chunk_root_log).unwrap(); - println!("Chunk Tree Root Physical Address: {:016x}", chunk_root_phys); - - let chunk_root = Leaf::parse(&image[chunk_root_phys as usize..])?; + let chunk_tree = Tree { + image: image, + addr_map: Rc::new(bootstrap_addr), + root_addr_log: superblock.chunk_root, + }; let mut addr_map = Vec::new(); - for item in chunk_root.items { + + for item in chunk_tree.iter() { let chunk_key = item.key; if let Value::Chunk(chunk_value) = item.value { addr_map.push(( @@ -31,8 +33,11 @@ impl AddressMap { } } + // almost certainly unnecessary? addr_map.sort_by_key(|x|x.0); - println!("Address Table: {:?}", addr_map); + +// addr_map.iter().for_each(|x| println!("{:x?}", x)); + Ok(AddressMap(addr_map)) } @@ -57,7 +62,6 @@ impl AddressMap { } addr_map.sort_by_key(|x|x.0); - println!("Bootstrap Address Table: {:?}", addr_map); Ok(AddressMap(addr_map)) } } @@ -84,3 +88,16 @@ impl LogToPhys for AddressMap { } } } + + +pub fn node_at_log<'a, T: LogToPhys>(image: &'a [u8], addr: &T, log: u64) -> Result<&'a [u8], ParseError> { + if let Some(phys_addr) = addr.to_phys(log) { + Ok(&image[phys_addr as usize .. phys_addr as usize + NODE_SIZE]) + } else { + err!("Logical address {:x} could not be translated to physical address", log) + } +} + +pub trait LogToPhys { + fn to_phys(&self, log: u64) -> Option; +} diff --git a/src/btrfs_lookup.rs b/src/btrfs_lookup.rs new file mode 100644 index 0000000..0c409bb --- /dev/null +++ b/src/btrfs_lookup.rs @@ -0,0 +1,275 @@ +use std::rc::Rc; +use std::ops::Deref; + +use crate::btrfs_structs::{Leaf, Key, Item, InteriorNode, Node, ParseError, ParseBin, Value, Superblock, ItemType}; +use crate::addrmap::{node_at_log, LogToPhys, AddressMap}; + +pub struct Tree<'a> { + pub image: &'a [u8], + pub addr_map: Rc, + pub root_addr_log: u64, +} + +impl<'a> Tree<'a> { + pub fn new>(image: &'a [u8], tree_id: T) -> Result, ParseError> { + let addr_map = Rc::new(AddressMap::new(image)?); + let superblock = Superblock::parse(&image[0x10000..])?; + + let root_tree = Tree { + image: image, + addr_map: Rc::clone(&addr_map), + root_addr_log: superblock.root + }; + let tree_root_item = root_tree.find_key(Key::new(tree_id.into(), ItemType::Root, 0))?; + + let root_addr_log = match tree_root_item.value { + Value::Root(root) => root.bytenr, + _ => return Err("root item invalid".into()) + }; + + Ok(Tree { image, addr_map, root_addr_log }) + } + + pub fn root(image: &'a [u8]) -> Result, ParseError> { + let addr_map = Rc::new(AddressMap::new(image)?); + let superblock = Superblock::parse(&image[0x10000..])?; + + Ok(Tree { image, addr_map, root_addr_log: superblock.root }) + } +} + +/***** looking up keys *****/ + +impl Leaf { + pub fn find_key(&self, key: Key) -> Option { + self.items + .iter() + .find(|x|x.key == key) + .map(|x|x.clone()) + } + + pub fn find_key_or_previous(&self, key: Key) -> Option { + self.items + .iter() + .take_while(|x|x.key <= key) + .last() + .map(|x|x.clone()) + } + +} + +impl InteriorNode { + pub fn find_key_or_previous(&self, key: Key) -> Option { + self.children + .iter() + .take_while(|x|x.key <= key) + .last() + .map(|x|x.ptr) + } +} + +fn find_key_in_node(image: &[u8], addr: &T, root_addr_log: u64, key: Key) -> Result { + let node = Node::parse(node_at_log(image, addr, root_addr_log)?)?; + + match node { + Node::Interior(interior_node) => { + let next_node_log = interior_node.find_key_or_previous(key).unwrap(); + find_key_in_node(image, addr, next_node_log, key) + }, + Node::Leaf(leaf) => { + leaf.find_key(key).ok_or( + error!( + "Item with key ({},{:?},{}) was not found in the leaf at logical address 0x{:x}", + key.key_id, key.key_type, key.key_offset, root_addr_log) + ) + } + } +} + +impl Tree<'_> { + pub fn find_key(&self, key: Key) -> Result { + find_key_in_node(self.image, self.addr_map.deref(), self.root_addr_log, key) + } +} + +/***** iterator *****/ + +pub struct Iter<'a> { + tree: &'a Tree<'a>, + + // path to the last returned item + nodes: Vec, + leaf: Option>, + indices: Vec, + + lower_limit: Option, + upper_limit: Option, +} + +impl Tree<'_> { + pub fn iter<'a>(&'a self) -> Iter<'a> { + self.range(None, None) + } + + pub fn range<'a>(&'a self, lower: Option, upper: Option) -> Iter<'a> { + Iter { + tree: self, + nodes: Vec::new(), + leaf: None, + indices: Vec::new(), // in nodes and leaf + lower_limit: lower, + upper_limit: upper, + } + } + + pub fn range_id<'a>(&'a self, id: u64) -> Iter<'a> { + if id == u64::MAX { + self.range( + Some(Key::new(id, ItemType::Invalid, 0)), + None + ) + } else { + self.range( + Some(Key::new(id, ItemType::Invalid, 0)), + Some(Key::new(id+1, ItemType::Invalid, 0)) + ) + } + } +} + +impl Iter<'_> { + fn move_down_and_get_first_item(&mut self, mut node_addr: u64) -> Option { + loop { + let node = Node::parse(node_at_log(self.tree.image, self.tree.addr_map.deref(), node_addr).ok()?).ok()?; + match node { + Node::Interior(int_node) => { + node_addr = int_node.children.first()?.ptr; + self.nodes.push(int_node); + self.indices.push(0); + }, + Node::Leaf(leaf_node) => { + let result = leaf_node.items.first()?.clone(); + self.leaf = Some(Box::new(leaf_node)); + self.indices.push(0); + return Some(result); + }, + } + } + } + + fn move_down_and_get_item_or_previous(&mut self, mut node_addr: u64, key: Key) -> Option { + loop { + let node = Node::parse(node_at_log(self.tree.image, self.tree.addr_map.deref(), node_addr).ok()?).ok()?; + + match node { + Node::Interior(int_node) => { + let (i, new_node_ptr) = int_node + .children + .iter() + .enumerate() + .take_while(|(_,bp)|bp.key <= key) + .last()?; + + node_addr = new_node_ptr.ptr; + self.nodes.push(int_node); + self.indices.push(i); + }, + Node::Leaf(leaf_node) => { + let (i, result) = leaf_node + .items + .iter() + .enumerate() + .take_while(|(_,item)|item.key <= key) + .last()?; + + let result_cloned = result.clone(); + self.leaf = Some(Box::new(leaf_node)); + self.indices.push(i); + return Some(result_cloned); + }, + } + } + } + +} + +impl Iterator for Iter<'_> { + type Item = Item; + + // for now we just silently stop when we encounter an error, maybe that isn't the best solution + fn next(&mut self) -> Option { + if self.leaf.is_none() && self.nodes.len() == 0 { + // first item + // finding the first item is a bit tricky + // if there is a lower limit, the B+ tree only allows us to either find the item + // or the previous one if there is no exact match; in the latter case, go one further + + let result = if let Some(lim) = self.lower_limit { + let first_res = self.move_down_and_get_item_or_previous(self.tree.root_addr_log, lim); + if let Some(item) = first_res { + if item.key == lim { + // found exactly the limit, that's the easy case + Some(item) + } else { + // found a previous item; so we want the next one + self.next() + } + } else { + // did not find an item, so everything must come after lower limit + // just get the first + self.move_down_and_get_first_item(self.tree.root_addr_log) + } + } else { + // there is no lower limit, so also just get the first + self.move_down_and_get_first_item(self.tree.root_addr_log) + }; + + result.filter(|item|self.upper_limit.is_none() || item.key < self.upper_limit.unwrap()) + } else if self.leaf.is_none() { + // already through the iterator + return None; + } else { + let height = self.indices.len(); // must be at least 1 + let leaf = self.leaf.as_ref().unwrap(); + + self.indices[height-1] += 1; + if let Some(item) = leaf.items.get(self.indices[height-1]) { + // there's a next item in the same leaf + if self.upper_limit.is_none() || item.key < self.upper_limit.unwrap() { + return Some(item.clone()); + } else { + return None; + } + } else if height == 1 { + // the tree has height 1 and we're through the (only) leaf, there's nothing left + return None; + } else { + // try to advance in one of the higher nodes + self.leaf = None; + self.indices.pop(); + let mut level = height - 2; + + // go up until we can move forward in a node + let node_addr = loop { + let node = &self.nodes[level]; + + self.indices[level] += 1; + if let Some(blockptr) = node.children.get(self.indices[level]) { + break blockptr.ptr; + } else { + if level == 0 { + return None; + } + self.indices.pop(); + self.nodes.pop(); + level -= 1; + } + }; + + // first first item under this node + self.move_down_and_get_first_item(node_addr) + .filter(|item|self.upper_limit.is_none() || item.key < self.upper_limit.unwrap()) + } + } + } +} diff --git a/src/btrfs_structs.rs b/src/btrfs_structs.rs index 755dd92..ecd17d5 100644 --- a/src/btrfs_structs.rs +++ b/src/btrfs_structs.rs @@ -2,7 +2,6 @@ use binparse_derive::AllVariants; use binparse_derive::ParseBin; use std::fmt; use std::error; -use std::io; use std::ffi::CString; /***** BTRFS structures *****/ @@ -13,9 +12,9 @@ pub const NODE_SIZE: usize = 0x4000; #[derive(Debug,Clone,Copy,AllVariants,PartialEq,Eq,PartialOrd,Ord)] #[repr(u8)] pub enum ItemType { - Invalid = 0x00, + Invalid = 0x00, // invalid Inode = 0x01, // implemented - Ref = 0x0c, // TODO + Ref = 0x0c, // implemented ExtRef = 0x0d, XAttr = 0x18, VerityDesc = 0x24, @@ -23,15 +22,15 @@ pub enum ItemType { Orphan = 0x30, DirLog = 0x3c, DirLogIndex = 0x48, - Dir = 0x54, // implemented (better with len feature) - DirIndex = 0x60, // TODO + Dir = 0x54, // implemented (better with len feature; allow multiple?) + DirIndex = 0x60, // implemented ExtentData = 0x6c, // implemented ExtentCsum = 0x80, Root = 0x84, // implemented - RootBackref = 0x90, + RootBackRef = 0x90, RootRef = 0x9c, - Extent = 0xa8, // implemented (except extra data) - Metadata = 0xa9, // implemented (except extra data) + Extent = 0xa8, // implemented (with only one version of extra data) + Metadata = 0xa9, // implemented (with only one version of extra data) TreeBlockRef = 0xb0, ExtentDataRef = 0xb2, ExtentRefV0 = 0xb4, @@ -73,12 +72,14 @@ pub enum Value { Chunk(ChunkItem), Root(RootItem), Dir(DirItem), + DirIndex(DirItem), FreeSpaceInfo(FreeSpaceInfoItem), FreeSpaceExtent, UUIDSubvol(UUIDSubvolItem), Dev(DevItem), DevExtent(DevExtentItem), ExtentData(ExtentDataItem), + Ref(RefItem), Unknown(Vec), } @@ -167,6 +168,28 @@ pub struct Leaf { pub items: Vec, } +#[allow(unused)] +#[derive(Debug,Clone,ParseBin)] +pub struct KeyPointer { + pub key: Key, + pub ptr: u64, + pub generation: u64, +} + +#[allow(unused)] +#[derive(Debug,Clone)] +pub struct InteriorNode { + pub header: NodeHeader, + pub children: Vec, +} + +#[allow(unused)] +#[derive(Debug,Clone)] +pub enum Node { + Interior(InteriorNode), + Leaf(Leaf), +} + #[allow(unused)] #[derive(Debug,Clone,ParseBin)] pub struct BlockGroupItem { @@ -176,12 +199,17 @@ pub struct BlockGroupItem { } #[allow(unused)] -#[derive(Debug,Clone,ParseBin)] +#[derive(Debug,Clone)] pub struct ExtentItem { pub refs: u64, pub generation: u64, pub flags: u64, - pub data: Vec, + // pub data: Vec, + + // this is only correct if flags == 2, fix later! + pub block_refs: Vec<(ItemType, u64)>, +// pub tree_block_key_type: ItemType, +// pub tree_block_key_id: u64, } #[allow(unused)] @@ -357,6 +385,39 @@ pub struct ExtentDataHeader { extent_type: u8, } +#[allow(unused)] +#[derive(Debug,Clone,ParseBin)] +pub struct RefItem { + index: u64, + name_len: u16, + + // #[len = "name_len"] + name: Vec, +} + +#[allow(unused)] +#[repr(u64)] +#[derive(Clone,Copy,Debug)] +pub enum TreeID { + Root = 1, + Extent = 2, + Chunk = 3, + Dev = 4, + FS = 5, + RootDir = 6, + CSum = 7, + Quota = 8, + UUID = 9, + FreeSpace = 10, + BlockGroup = 11, +} + +impl From for u64 { + fn from(value: TreeID) -> u64 { + value as u64 + } +} + /***** trait for parsing, and implementations for basic types *****/ // most of the more complex types will be parsed using derive macros @@ -383,12 +444,6 @@ impl From<&str> for ParseError { } } -impl From for io::Error { - fn from(value: ParseError) -> io::Error { - io::Error::other(value) - } -} - pub trait ParseBin where Self: Sized { fn parse_len(bytes: &[u8]) -> Result<(Self, usize), ParseError>; @@ -400,7 +455,7 @@ pub trait ParseBin where Self: Sized { impl ParseBin for u8 { fn parse_len(bytes: &[u8]) -> Result<(Self, usize), ParseError> { if bytes.len() < 1 { - Err("not enough data".into()) + err!("not enough data") } else { Ok((bytes[0], 1)) } @@ -410,7 +465,7 @@ impl ParseBin for u8 { impl ParseBin for u16 { fn parse_len(bytes: &[u8]) -> Result<(Self, usize), ParseError> { if bytes.len() < 2 { - Err("not enough data".into()) + err!("not enough data") } else { let result = u16::from_le_bytes(bytes[0..2].try_into().unwrap()); Ok((result, 2)) @@ -421,7 +476,7 @@ impl ParseBin for u16 { impl ParseBin for u32 { fn parse_len(bytes: &[u8]) -> Result<(Self, usize), ParseError> { if bytes.len() < 4 { - Err("not enough data".into()) + err!("not enough data") } else { let result = u32::from_le_bytes(bytes[0..4].try_into().unwrap()); Ok((result, 4)) @@ -432,7 +487,7 @@ impl ParseBin for u32 { impl ParseBin for u64 { fn parse_len(bytes: &[u8]) -> Result<(Self, usize), ParseError> { if bytes.len() < 8 { - Err("not enough data".into()) + err!("not enough data") } else { let result = u64::from_le_bytes(bytes[0..8].try_into().unwrap()); Ok((result, 8)) @@ -443,7 +498,7 @@ impl ParseBin for u64 { impl ParseBin for [u8; N] { fn parse_len(bytes: &[u8]) -> Result<([u8; N], usize), ParseError> { if bytes.len() < N { - Err("not enough data".into()) + err!("not enough data") } else { Ok((bytes[0..N].try_into().unwrap(), N)) } @@ -489,63 +544,99 @@ impl ParseBin for ItemType { } } -impl ParseBin for Leaf { - fn parse_len(bytes: &[u8]) -> Result<(Leaf, usize), ParseError> { +impl ParseBin for Node { + fn parse_len(bytes: &[u8]) -> Result<(Node, usize), ParseError> { if bytes.len() < 0x65 { - return Err("not enough data".into()); + return err!("Not enough data to parse node header"); } let header = NodeHeader::parse(&bytes[0..0x65])?; - let mut items = Vec::new(); - for i in 0..header.nritems as usize { - let key = Key::parse(&bytes[i*0x19 + 0x65 .. i*0x19 + 0x65 + 0x11])?; - let offset = u32::parse(&bytes[i*0x19 + 0x65 + 0x11 .. i*0x19 + 0x65 + 0x15])?; - let size = u32::parse(&bytes[i*0x19 + 0x65 + 0x15 .. i*0x19 + 0x65 + 0x19])?; + if header.level > 0 { + // interior node + let mut children = Vec::new(); + let num = header.nritems as usize; - let data_slice = &bytes[0x65 + offset as usize .. - 0x65 + offset as usize + size as usize]; + for i in 0 .. num { + children.push(KeyPointer::parse(&bytes[0x65 + i*0x21 .. 0x86 + i*0x21])?); + } - let value = match key.key_type { - ItemType::BlockGroup => - Value::BlockGroup(BlockGroupItem::parse(data_slice)?), - ItemType::Metadata => - Value::Extent(ExtentItem::parse(data_slice)?), - ItemType::Extent => - Value::Extent(ExtentItem::parse(data_slice)?), - ItemType::Inode => - Value::Inode(InodeItem::parse(data_slice)?), - ItemType::Root => - Value::Root(RootItem::parse(data_slice)?), - ItemType::Dir => - Value::Dir(DirItem::parse(data_slice)?), - ItemType::Chunk => - Value::Chunk(ChunkItem::parse(data_slice)?), - ItemType::FreeSpaceInfo => - Value::FreeSpaceInfo(FreeSpaceInfoItem::parse(data_slice)?), - ItemType::FreeSpaceExtent => - Value::FreeSpaceExtent, - ItemType::UUIDSubvol => - Value::UUIDSubvol(UUIDSubvolItem::parse(data_slice)?), - ItemType::Dev => - Value::Dev(DevItem::parse(data_slice)?), - ItemType::DevExtent => - Value::DevExtent(DevExtentItem::parse(data_slice)?), - ItemType::ExtentData => - Value::ExtentData(ExtentDataItem::parse(data_slice)?), - _ => - Value::Unknown(Vec::from(data_slice)), - }; + Ok((Node::Interior(InteriorNode { header, children }), NODE_SIZE)) + } else { + // leaf node + let mut items = Vec::new(); - items.push(Item { key, value }); + for i in 0..header.nritems as usize { + let key = Key::parse(&bytes[i*0x19 + 0x65 .. i*0x19 + 0x65 + 0x11])?; + let offset = u32::parse(&bytes[i*0x19 + 0x65 + 0x11 .. i*0x19 + 0x65 + 0x15])?; + let size = u32::parse(&bytes[i*0x19 + 0x65 + 0x15 .. i*0x19 + 0x65 + 0x19])?; + + let data_slice = &bytes[0x65 + offset as usize .. + 0x65 + offset as usize + size as usize]; + + let value = match key.key_type { + ItemType::BlockGroup => + Value::BlockGroup(BlockGroupItem::parse(data_slice)?), + ItemType::Metadata => { + let item = ExtentItem::parse(data_slice)?; + if item.flags != 2 || item.refs > 1 { + println!("Metadata item with refs = {}, flags = {}, data = {:x?}", item.refs, item.flags, &data_slice[0x18..]); + } + Value::Extent(item) + }, + ItemType::Extent => + Value::Extent(ExtentItem::parse(data_slice)?), + ItemType::Inode => + Value::Inode(InodeItem::parse(data_slice)?), + ItemType::Root => + Value::Root(RootItem::parse(data_slice)?), + ItemType::Dir => + Value::Dir(DirItem::parse(data_slice)?), + ItemType::DirIndex => + Value::DirIndex(DirItem::parse(data_slice)?), + ItemType::Chunk => + Value::Chunk(ChunkItem::parse(data_slice)?), + ItemType::FreeSpaceInfo => + Value::FreeSpaceInfo(FreeSpaceInfoItem::parse(data_slice)?), + ItemType::FreeSpaceExtent => + Value::FreeSpaceExtent, + ItemType::UUIDSubvol => + Value::UUIDSubvol(UUIDSubvolItem::parse(data_slice)?), + ItemType::Dev => + Value::Dev(DevItem::parse(data_slice)?), + ItemType::DevExtent => + Value::DevExtent(DevExtentItem::parse(data_slice)?), + ItemType::ExtentData => + Value::ExtentData(ExtentDataItem::parse(data_slice)?), + ItemType::Ref => + Value::Ref(RefItem::parse(data_slice)?), + _ => + Value::Unknown(Vec::from(data_slice)), + }; + + items.push(Item { key, value }); + } + + Ok((Node::Leaf(Leaf { header, items }), NODE_SIZE)) } + } +} - let result = Leaf { - header, - items, - }; +impl ParseBin for InteriorNode { + fn parse_len(bytes: &[u8]) -> Result<(Self, usize), ParseError> { + match Node::parse_len(bytes)? { + (Node::Interior(int_node), len) => Ok((int_node, len)), + _ => err!("Expected interior node, found leaf"), + } + } +} - Ok((result, NODE_SIZE)) +impl ParseBin for Leaf { + fn parse_len(bytes: &[u8]) -> Result<(Self, usize), ParseError> { + match Node::parse_len(bytes)? { + (Node::Leaf(leaf_node), len) => Ok((leaf_node, len)), + _ => err!("Expected leaf, found interior node"), + } } } @@ -567,29 +658,26 @@ impl ParseBin for ExtentDataItem { } } -/***** looking up keys *****/ +impl ParseBin for ExtentItem { + fn parse_len(bytes: &[u8]) -> Result<(Self, usize), ParseError> { + let refs = u64::parse(bytes)?; + let generation = u64::parse(&bytes[0x08..])?; + let flags = u64::parse(&bytes[0x10..])?; -impl Leaf { - pub fn find_key(&self, key: Key) -> Option { - self.items.iter().find(|x|x.key == key).map(|x|x.clone()) + let mut block_refs = Vec::new(); + + if flags & 0x03 == 0x02 { + for i in 0 .. refs as usize { + let key_type = ItemType::parse(&bytes[0x18 + i*0x09 .. ])?; + let key_id = u64::parse(&bytes[0x19 + i*0x09 .. ])?; + block_refs.push((key_type, key_id)); + } + } + + Ok((ExtentItem { refs, generation, flags, block_refs }, 0x18 + refs as usize * 0x09)) } } -pub fn node_at_log<'a, T: LogToPhys>(image: &'a [u8], addr: &T, log: u64) -> Option<&'a [u8]> { - let phys_addr = addr.to_phys(log)?; - Some(&image[phys_addr as usize .. phys_addr as usize + NODE_SIZE]) -} - -pub trait LogToPhys { - fn to_phys(&self, log: u64) -> Option; -} - -pub fn find_key_in_tree(image: &[u8], addr: &T, root_addr_log: u64, key: Key) -> Option { - // assuming level is 0 - let leaf = Leaf::parse(node_at_log(image, addr, root_addr_log)?).ok()?; - leaf.find_key(key) -} - /***** prettier debug output for UUIDs and checksums *****/ impl fmt::Debug for UUID { diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..129c3bc --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,8 @@ +#[macro_use] +pub mod util; +pub mod btrfs_structs; +pub mod btrfs_lookup; +pub mod addrmap; + +#[cfg(test)] +mod test; diff --git a/src/main.rs b/src/main.rs index 9f54a5d..c55be77 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,53 +1,313 @@ -#![feature(io_error_other)] - -mod btrfs_structs; -mod addrmap; - -use addrmap::AddressMap; -use memmap2::Mmap; +use memmap2::{Mmap, MmapOptions}; use std::fs::File; -use std::io::Error as IOError; -use anyhow::Error as AError; use rouille::Request; use rouille::Response; -use btrfs_structs::{ItemType, ParseBin, Key, Item, Leaf, NodeHeader, NODE_SIZE, Superblock, Value, LogToPhys}; +use rouille::router; +use std::iter; +use std::env; +use std::{fs::OpenOptions, os::unix::fs::OpenOptionsExt}; +use std::collections::HashMap; -//const ACTIVE_NODES: &'static[usize] = &[0x14000, 0x18000, 0x1c000, 0x20000, 0x28000, 0x2c000, 0x3c000, 0x40000]; +use parsebtrfs::btrfs_structs::{TreeID, Value::Extent, Value::BlockGroup, ParseError, NODE_SIZE, ItemType, Node, ParseBin}; +use parsebtrfs::btrfs_lookup::Tree; -const EXTENT_TREE: u64 = 2; -const FS_TREE: u64 = 5; +use parsebtrfs::addrmap::{AddressMap, LogToPhys}; -fn main() -> Result<(), IOError> { - let file = File::open("../image")?; +const COLORS: &[&str] = &["#e6194b", "#3cb44b", "#ffe119", "#4363d8", "#f58231", "#911eb4", "#46f0f0", "#f032e6", "#bcf60c", "#fabebe", "#008080", "#e6beff", "#9a6324", "#fffac8", "#800000", "#aaffc3", "#808000", "#ffd8b1", "#000075", "#808080", "#000000"]; + +fn main() -> Result<(), MainError> { + let filename = env::args().skip(1).next().ok_or("Argument required")?; + + let file = OpenOptions::new().read(true).open(filename)?; let image = unsafe { Mmap::map(&file)? }; - let addr = AddressMap::new(&image).unwrap(); - let superblock = Superblock::parse(&image[0x10000..]).unwrap(); + const O_DIRECT: i32 = 0x4000; +// let file = OpenOptions::new().read(true).custom_flags(O_DIRECT).open(filename)?; +// let image = unsafe { MmapOptions::new().len(493921239040usize).map(&file)? }; - let chunk_root_phys = addr.to_phys(superblock.chunk_root) - .ok_or(IOError::other("Chunk root address stored in superblock is invalid"))? as usize; - let root_phys = addr.to_phys(superblock.root) - .ok_or(IOError::other("Root address stored in superblock is invalid"))? as usize; +// return Ok(()); - let root_tree_leaf = Leaf::parse(&image[root_phys .. root_phys + NODE_SIZE])?; + /* + let mystery_addr = 0x2f_2251_c000; + let addr_map = AddressMap::new(&image)?; + let mystery_addr_phys = addr_map.to_phys(mystery_addr).unwrap() as usize; + let mystery_node = Node::parse(&image[mystery_addr_phys .. ])?; - let root_key = Key::new(EXTENT_TREE, ItemType::Root, 0); - let root = root_tree_leaf.find_key(root_key) - .ok_or(IOError::other("Could not find extent tree in tree of roots"))?; + println!("{:#x?}", &mystery_node); +*/ - let root_addr = if let Value::Root(root_item) = root.value { - addr.to_phys(root_item.bytenr) - } else { - None - }.unwrap() as usize; + rouille::start_server("127.0.0.1:8080", move |request| { + router!( + request, + (GET) ["/"] => http_main_boxes(&image, request), + (GET) ["/favicon.ico"] => Response::empty_404(), + _ => Response::empty_404(), + ) + }); +} - let tree_leaf = Leaf::parse(&image[root_addr .. root_addr + NODE_SIZE])?; +static CIRCLE_IMAGE: &str = + "data:image/png;base64,\ + iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAABhGlDQ1BJQ0MgcHJvZmlsZQAAKJF9\ + kT1Iw0AcxV9bpUUqInYo4pChOtnFLxxrFYpQIdQKrTqYXPoFTRqSFBdHwbXg4Mdi1cHFWVcHV0EQ\ + /ABxdXFSdJES/5cUWsR4cNyPd/ced+8Af7PKVLMnAaiaZWRSSSGXXxWCrwhhEAFEMS0xU58TxTQ8\ + x9c9fHy9i/Ms73N/jn6lYDLAJxAnmG5YxBvEM5uWznmfOMLKkkJ8Tjxu0AWJH7kuu/zGueSwn2dG\ + jGxmnjhCLJS6WO5iVjZU4inimKJqlO/Puaxw3uKsVuusfU/+wnBBW1nmOs0RpLCIJYgQIKOOCqqw\ + EKdVI8VEhvaTHv5hxy+SSyZXBYwcC6hBheT4wf/gd7dmcXLCTQongd4X2/4YBYK7QKth29/Htt06\ + AQLPwJXW8deawOwn6Y2OFjsCBraBi+uOJu8BlztA9EmXDMmRAjT9xSLwfkbflAeGboG+Nbe39j5O\ + H4AsdZW+AQ4OgbESZa97vDvU3du/Z9r9/QChS3K5hXof0gAAAAZiS0dEAP8A/wD/oL2nkwAAAAlw\ + SFlzAAAuIwAALiMBeKU/dgAAAAd0SU1FB+cIEQMcKM7EsV8AAAA/SURBVBjTY2CgGfj//7/8////\ + J/3///8NFE/6//+/PDaFk/5jgknYFL7BovANTJ6JWKchK1yGRX4Z+Z6hGgAAmotacR/hRugAAAAA\ + SUVORK5CYII="; - println!("{:#?}", &tree_leaf); +static EXPLANATION_TEXT: &str = "\ +

Chunks

+

On the highest level, btrfs splits the disk into chunks (also called block groups). They can have different sizes, with 1GiB being typical in a large file system. Each chunk can either contain data or metadata.

-// println!("{:#?}", Leaf::parse(&image[0x253c000..0x2540000])); +

Here we look at the metadata chunks. They contain the B-treesm which btrfs gets its name from. They are key-value stores for different kinds of information. For example, the filesystem tree stores which files and directories are in the filesystem, and the extent tree stores which areas of the disk are in use. Each B-tree consists of a number of 16KiB nodes, here symbolized by colorful boxes, with the color indicating which tree the node belongs to. Most of the nodes are leaves, which contain the actual key-value pairs. The others are interior nodes, and we indicate them with a little white circle. They are important to find the leaf a key is stored in.

"; - Ok(()) +fn http_main_boxes(image: &[u8], _req: &Request) -> Response { + let mut treecolors: HashMap = HashMap::new(); + + let mut result = String::new(); + + let explanation_tablerowformat = |c: &str, t: &str| format!( + "\ +
\ +
\ + {}\ + \n", + c, c, CIRCLE_IMAGE, t); + let explanation_tablerowformat_leafonly = |c,t| format!( + "\ +
\ + \ + {}\ + \n", + c, t); + + let cellformat = |c| format!( + "\n", + c); + let cellformat_higher = |c,_| format!( + "\n", + c, CIRCLE_IMAGE); + + result.push_str(&"
\nWhat am I seeing here?"); + result.push_str(EXPLANATION_TEXT); + + // tree explanations + result.push_str(&"\n"); + result.push_str(&explanation_tablerowformat_leafonly("lightgrey", "unused or outdated node")); + treecolors.insert(1, COLORS[treecolors.len() % COLORS.len()]); + result.push_str(&explanation_tablerowformat(treecolors[&1], "root tree")); + + treecolors.insert(3, COLORS[treecolors.len() % COLORS.len()]); + result.push_str(&explanation_tablerowformat(treecolors[&3], "chunk tree")); + + let roots = Tree::root(image).unwrap(); + for item in roots.iter() { + if item.key.key_type == ItemType::Root { + let treedesc: String = match &item.key.key_id { + 1 => format!("root tree"), + 2 => format!("extent tree"), + 3 => format!("chunk tree"), + 4 => format!("device tree"), + 5 => format!("filesystem tree"), + 6 => format!("root directory"), + 7 => format!("checksum tree"), + 8 => format!("quota tree"), + 9 => format!("UUID tree"), + 10 => format!("free space tree"), + 11 => format!("block group tree"), + 0xffff_ffff_ffff_fff7 => format!("data reloc tree"), + x @ 0x100 ..= 0xffff_ffff_ffff_feff => format!("file tree, id = {}", x), + x => format!("other tree, id = {}", x), + }; + + treecolors.insert(item.key.key_id, COLORS[treecolors.len() % COLORS.len()]); + result.push_str(&explanation_tablerowformat( + treecolors[&item.key.key_id], + &treedesc + )); + } + } + result.push_str(&"
\n"); + result.push_str(&"
\n"); + + let extent_tree = Tree::new(&image, TreeID::Extent).unwrap(); + let mut extent_tree_iterator = extent_tree.iter(); + + // current_blockgroup == None: haven't encountered a blockgroup yet + // metadata_items == None: current blockgroup is not metadata or system + let mut current_blockgroup = None; + let mut metadata_items: Option>> = None; + + let metadata_blockgroups = iter::from_fn(|| { + while let Some(item) = extent_tree_iterator.next() { +// println!("Got key: {:x?}", &item.key); + match &item.value { + BlockGroup(bg) => { + println!("{:x?}", item.key); + let result = (current_blockgroup.take(), metadata_items.take()); + + let nodes_in_blockgroup = item.key.key_offset as usize / NODE_SIZE; + if bg.flags & 0x01 == 0 { + metadata_items = Some(vec![None; nodes_in_blockgroup]); + } else { + metadata_items = None; + } + current_blockgroup = Some(item); + + if let (Some(bg), met) = result { + return Some((bg, met)); + } + }, + Extent(e) => { + if let Some(bg_item) = ¤t_blockgroup { + if let Some(met) = &mut metadata_items { + let bg_start = bg_item.key.key_id; + let node_addr = item.key.key_id; + let tree_id = e.block_refs.iter().filter(|&(t,_)|t == &ItemType::TreeBlockRef).count() as u64; + let index = (node_addr - bg_start) as usize / NODE_SIZE; + if index < met.len() { + met[index] = Some((tree_id, item.key.key_offset)); + } else { + println!("Warning: extent out of block group range: {:x?}", &item.key); + } + } + } else { + println!("Warning: extent without matching block group: {:x?}", &item.key); + } + }, + _ => {},//panic!("Unexpected item in extent tree: {:x?}", item.key) + } + } + + let result = (current_blockgroup.take(), metadata_items.take()); + if let (Some(bg), met) = result { + return Some((bg, met)); + } else { + return None; + } + }); + + let mut last_key = 0; + + // colorful table + for (bg, nodes) in metadata_blockgroups { + if bg.key.key_id < last_key { + println!("Error: going backwards!"); + break; + } else { + last_key = bg.key.key_id; + } + + let bg_value = match &bg.value { + BlockGroup(bgv) => bgv, + _ => panic!("Expected BlockGroup value"), + }; + + // header + result.push_str( + &format!( + "

{:x} - {:x} ({}, {})

Physical: {}

\n", + bg.key.key_id, + bg.key.key_id + bg.key.key_offset, + match bg.key.key_offset { + x if x <= (1<<11) => format!("{} B", x), + x if x <= (1<<21) => format!("{} KiB", x as f64 / (1u64<<10) as f64), + x if x <= (1<<31) => format!("{} MiB", x as f64 / (1u64<<20) as f64), + x if x <= (1<<41) => format!("{} GiB", x as f64 / (1u64<<30) as f64), + x if x <= (1<<51) => format!("{} TiB", x as f64 / (1u64<<40) as f64), + x @ _ => format!("{} PiB", x as f64 / (1u64<<50) as f64), + }, + match bg_value.flags & 0x07 { + 0x01 => "Data", + 0x02 => "System", + 0x04 => "Metadata", + _ => "???", + }, + match extent_tree.addr_map.as_ref().0.binary_search_by_key(&bg.key.key_id, |x|x.0) { + Ok(i) => format!("{:x?}", &extent_tree.addr_map.as_ref().0[i].2), + _ => String::from(""), + } + ) + ); + + if let Some(nodes) = nodes { + result.push_str("\n\n"); + + for (i, &n) in nodes.iter().enumerate() { + if i % 64 == 0 && i != 0 { + result.push_str("\n\n"); + } + + if let Some((tid, level)) = n { + let color: Option<&str> = treecolors.get(&tid).map(|x|*x); + let color = color.unwrap_or_else(|| { + println!("Unknown color for id: {}", &tid); + let color: &str = COLORS[treecolors.len() % COLORS.len()]; + treecolors.insert(tid, color); + color + }); + if level == 0 { + result.push_str(&cellformat(color)); + } else { + result.push_str(&cellformat_higher(color, level)); + } + } else { + result.push_str(&cellformat("lightgrey")); + } + } + + result.push_str("\n
\n"); + } + } + + Response::html(result) +} + +// ----- Error handling ----- + +pub struct MainError(String); + +impl std::error::Error for MainError {} + +impl std::fmt::Debug for MainError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", &self.0) + } +} + +impl std::fmt::Display for MainError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", &self.0) + } +} + +impl From for MainError { + fn from(value: String) -> MainError { + MainError(value) + } +} + +impl From<&str> for MainError { + fn from(value: &str) -> MainError { + MainError::from(String::from(value)) + } +} + +impl From for MainError { + fn from(value: ParseError) -> MainError { + MainError::from(format!("BTRFS format error: {value}")) + } +} + +impl From for MainError { + fn from(value: std::io::Error) -> MainError { + MainError::from(format!("IO error: {value}")) + } } /* @@ -62,35 +322,6 @@ fn main() -> Result<(), std::io::Error> { }); } -fn http_main_boxes(image: &[u8], addr: &AddressTranslation, req: &Request) -> Response { - let chunk_offset = 0x02500000; - let nodes_in_chunk = 2048; - let mut result = String::new(); - - result.push_str("\n\n"); - - for i in 0..nodes_in_chunk { - if i % 64 == 0 { - result.push_str("\n\n"); - } - - let node = read_node(&image, chunk_offset + i*0x4000); - - let active = node.generation > 0 && ACTIVE_NODES.contains(&(i*0x4000)); - - let newbox = format!("\n", - if active { - "height:10px;width:10px;padding:0;background:black;" - } else { - "height:10px;width:10px;padding:0;background:lightgray;" - }); - result.push_str(&newbox); - } - - result.push_str("\n
"); - - Response::html(result) -} fn http_main_list(image: &[u8], addr: &AddressTranslation, req: &Request) -> Response { let chunk_offset = 0x02500000; diff --git a/src/test.rs b/src/test.rs new file mode 100644 index 0000000..65451cb --- /dev/null +++ b/src/test.rs @@ -0,0 +1,10 @@ +use super::*; +use btrfs_structs::{Key, ItemType}; + +#[test] +fn test_key_new() { + assert_eq!( + Key::new(1, ItemType::Root, 2), + Key { key_id: 1, key_type: ItemType::Root, key_offset: 2 } + ); +} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..6b91f0f --- /dev/null +++ b/src/util.rs @@ -0,0 +1,7 @@ +macro_rules! error { + ($($i:expr),*) => { format!($($i),*).into() }; +} + +macro_rules! err { + ($($i:expr),*) => { Err(error!($($i),*)) }; +}