From 6a6cbdef2ca0e84f4312415a235a5a59fe35ffec Mon Sep 17 00:00:00 2001 From: Florian Stecker Date: Sun, 30 Nov 2025 19:10:45 -0500 Subject: [PATCH] parse entire commands --- src/main.rs | 398 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 332 insertions(+), 66 deletions(-) diff --git a/src/main.rs b/src/main.rs index 4724b48..d885291 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,14 +1,14 @@ use std::fs::File; use std::error::Error; use std::io::{Read, Write, ErrorKind}; -use std::collections::BTreeMap; +use std::collections::{HashMap, BTreeMap}; use num_enum::TryFromPrimitive; use std::fmt::Display; use chrono::DateTime; -#[derive(TryFromPrimitive,Debug,PartialEq,Eq,Clone,Copy)] +#[derive(TryFromPrimitive,Debug,PartialEq,Eq,Clone,Copy,PartialOrd,Ord)] #[repr(u16)] -enum CommandType { +pub enum CommandType { Unspec = 0, Subvol = 1, Snapshot = 2, @@ -37,9 +37,39 @@ enum CommandType { EncodedWrite = 25, } -#[derive(TryFromPrimitive,Debug,Clone,Copy,PartialEq,Eq)] +#[derive(Debug,Clone)] +pub enum Command { + Unspec, + Subvol, + Snapshot { clone_transid: u64, clone_uuid: UUID, uuid: UUID, path: String, transid: u64 }, + MkFile { path: String, inode: u64 }, + MkDir { path: String, inode: u64 }, + MkNod, + MkFifo, + MkSock, + Symlink { path: String, inode: u64, link: String }, + Rename { from: String, to: String }, + Link { path: String, link: String }, + Unlink { path: String }, + RmDir { path: String }, + SetXAttr, + RemoveXAttr, + Write { path: String, offset: u64, data: Vec }, + Clone { path: String, offset: u64, clone_path: String, clone_uuid: UUID, clone_len: u64, clone_transid: u64, clone_offset: u64 }, + Truncate { path: String, size: u64 }, + Chmod { path: String, mode: u64 }, + Chown { path: String, uid: u64, gid: u64 }, + UTimes { path: String, atime: Time, mtime: Time, ctime: Time }, + End, + UpdateExtent, + FAllocate, + FileAttr, + EncodedWrite, +} + +#[derive(TryFromPrimitive,Debug,Clone,Copy,PartialEq,Eq,PartialOrd,Ord,Hash)] #[repr(u16)] -enum TLVType { +pub enum TLVType { Unspec = 0, UUID = 1, CTransID = 2, @@ -74,7 +104,11 @@ enum TLVType { Encryption = 31, } -struct UUID(u128); +/// `u128` wrapper for UUIDs +/// +/// This mostly just overrides the `Display` trait to use the typical UUID format `XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX` +#[derive(Debug,Clone)] +pub struct UUID(u128); impl Display for UUID { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { @@ -86,7 +120,11 @@ impl Display for UUID { } } -struct Time { +/// time format for BTRFS stream +/// +/// Consists of a 64 bit UNIX timestamp and a 32 bit nanoseconds field. +#[derive(Debug,Clone)] +pub struct Time { secs: i64, nanos: u32, } @@ -94,33 +132,10 @@ struct Time { impl Display for Time { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { write!(f, "{}", DateTime::from_timestamp(self.secs, self.nanos).ok_or(std::fmt::Error)?) - -// write!(f, "{}.{:09}", self.secs, self.nanos) - } -} - -fn parse_tlv(ty: TLVType, data: &[u8]) -> Option> { - match ty { - TLVType::CTransID | TLVType::Ino | TLVType::Size | TLVType::Mode | TLVType::UID | - TLVType::GID | TLVType::RDev | TLVType::FileOffset | TLVType::CloneCTransID | - TLVType::CloneOffset | TLVType::CloneLen | TLVType::FileAttr | TLVType::UnencodedFileLen | - TLVType::UnencodedLen | TLVType::UnencodedOffset => - Some(Box::new(u64::from_le_bytes(data.try_into().ok()?))), - TLVType::FAllocateMode | TLVType::Compression | TLVType::Encryption => - Some(Box::new(u32::from_le_bytes(data.try_into().ok()?))), - TLVType::XAttrName | TLVType::Path | TLVType::PathTo | TLVType::PathLink | TLVType::ClonePath => - Some(Box::new(String::from_utf8(data.to_vec()).ok()?)), - TLVType::UUID | TLVType::CloneUUID => - Some(Box::new(UUID(u128::from_le_bytes(data.try_into().ok()?)))), - TLVType::CTime | TLVType::MTime | TLVType::ATime | TLVType::OTime => { - let secs = i64::from_le_bytes(data[0..8].try_into().ok()?); - let nanos = u32::from_le_bytes(data[8..12].try_into().ok()?); - Some(Box::new(Time {secs, nanos})) - }, - _ => None } } +/// `writeln`, but behaving more gracefully on shutdown macro_rules! out { ($($arg:tt)*) => { let result = writeln!(std::io::stdout(), $($arg)*); @@ -130,54 +145,305 @@ macro_rules! out { }; } +#[derive(Clone,Copy,PartialEq,Eq,PartialOrd,Ord,Debug)] +enum FileChange { + Unchanged, + Created, + Deleted, + Modified, + ModifiedOnlyAttributes, +} + +#[derive(Clone,Copy,PartialEq,Eq,Debug)] +enum FileOp { + Create, + Delete, + Modify, + ModifyAttributes, +} + +fn state_change(old: FileChange, op: FileOp) -> Option { + let new = match (old, op) { + (FileChange::Unchanged, FileOp::Create) => FileChange::Created, + (FileChange::Unchanged, FileOp::Delete) => FileChange::Deleted, + (FileChange::Unchanged, FileOp::Modify) => FileChange::Modified, + (FileChange::Unchanged, FileOp::ModifyAttributes) => FileChange::ModifiedOnlyAttributes, + (FileChange::Created, FileOp::Delete) => FileChange::Unchanged, + (FileChange::Created, FileOp::Modify) => FileChange::Created, + (FileChange::Created, FileOp::ModifyAttributes) => FileChange::Created, + (FileChange::Deleted, FileOp::Create) => FileChange::Modified, + (FileChange::Modified, FileOp::Delete) => FileChange::Deleted, + (FileChange::Modified, FileOp::Modify) => FileChange::Modified, + (FileChange::Modified, FileOp::ModifyAttributes) => FileChange::Modified, + (FileChange::ModifiedOnlyAttributes, FileOp::Delete) => FileChange::Deleted, + (FileChange::ModifiedOnlyAttributes, FileOp::Modify) => FileChange::Modified, + (FileChange::ModifiedOnlyAttributes, FileOp::ModifyAttributes) => FileChange::ModifiedOnlyAttributes, + _ => return None, + }; + Some(new) +} + fn main() -> Result<(), Box> { let mut contents: Vec = Vec::new(); - File::open("../btrfs_send_test")?.read_to_end(&mut contents)?; + File::open("../sync_diff_20250525_20251130")?.read_to_end(&mut contents)?; out!("Magic: {}", &String::from_utf8(contents[..12].to_vec())?); out!("Version: {}", u32::from_le_bytes(contents[13..17].try_into()?)); out!(""); + let mut files: BTreeMap = BTreeMap::new(); + let mut offset: usize = 17; - let mut paths: BTreeMap> = BTreeMap::new(); - - while offset < contents.len() { - let len = u32::from_le_bytes(contents[offset .. offset + 4].try_into()?); - let cmd = CommandType::try_from_primitive( - u16::from_le_bytes( - contents[offset + 4 .. offset + 6].try_into()? - ) - )?; - let _checksum = u32::from_le_bytes(contents[offset + 6.. offset + 10].try_into()?); - -// out!("{cmd:?}"); - - let mut inner_offset: usize = 0; - while inner_offset < len as usize { - let tlvtype = TLVType::try_from_primitive( - u16::from_le_bytes( - contents[offset + inner_offset + 10 .. offset + inner_offset + 12].try_into()? - ) - )?; - let tlvlen = u16::from_le_bytes(contents[offset + inner_offset + 12 .. offset + inner_offset + 14].try_into()?); - - if tlvtype == TLVType::Path { - let data = &contents[offset + inner_offset + 14 .. offset + inner_offset + 14 + tlvlen as usize]; - let path = String::from_utf8(data.to_vec())?; - paths.entry(path) - .or_insert(Vec::new()) - .push(cmd); - } - - inner_offset += tlvlen as usize + 4; + fn do_op(files: &mut BTreeMap, path: &String, op: FileOp) -> Result<(), Box> { + let old = *files.get(path).unwrap_or(&FileChange::Unchanged); + let new = state_change(old, op) + .ok_or(format!("Invalid operation: {:?} on {:?}, path = {}", op, old, path))?; + if new == FileChange::Unchanged { + files.remove(path); + } else { + files.insert(path.clone(), new); } - - offset += len as usize + 10; // 10 byte header + Ok(()) } - out!("{:#?}", paths); + while offset < contents.len() { + let (cmd, new_offset) = Command::parse(&contents[offset..])?; + offset += new_offset; + + // out!("{:?}", cmd.ty()); + match &cmd { + Command::MkFile { path, inode: _ } | + Command::MkDir { path, inode: _ } | + Command::Symlink { path, inode: _, link: _ } | + Command::Link { path, link: _ } => { + do_op(&mut files, path, FileOp::Create)?; + }, + Command::Rename { from, to } if from != to => { + do_op(&mut files, from, FileOp::Delete)?; + do_op(&mut files, to, FileOp::Create)?; + }, + Command::Unlink { path } | + Command::RmDir { path } => { + do_op(&mut files, path, FileOp::Delete)?; + }, + Command::Write { path, offset: _, data: _ } | + Command::Truncate { path, size: _ } => { + do_op(&mut files, path, FileOp::Modify)?; + }, + Command::Chmod { path, mode: _ } | + Command::Chown { path, uid: _, gid: _ } => { + do_op(&mut files, path, FileOp::ModifyAttributes)?; + }, + _ => (), + } + } + + let mut last: Option = None; + for (file, &change) in &files { + if let Some(last_) = &last && file.starts_with(last_) { + // ignore + } else if change == FileChange::Created { + last = Some(file.clone()); + out!("{file}"); + } + } Ok(()) } + +pub trait TlvData: Sized { + fn parse_tlv_data(data: &[u8]) -> Result>; +} + +impl TlvData for u64 { + fn parse_tlv_data(data: &[u8]) -> Result> { + Ok(u64::from_le_bytes(data.try_into()?)) + } +} + +impl TlvData for u32 { + fn parse_tlv_data(data: &[u8]) -> Result> { + Ok(u32::from_le_bytes(data.try_into()?)) + } +} + +impl TlvData for String { + fn parse_tlv_data(data: &[u8]) -> Result> { + String::from_utf8(data.to_vec()).map_err(Into::into) + } +} + +impl TlvData for UUID { + fn parse_tlv_data(data: &[u8]) -> Result> { + Ok(UUID(u128::from_le_bytes(data.try_into()?))) + } +} + +impl TlvData for Time { + fn parse_tlv_data(data: &[u8]) -> Result> { + let secs = i64::from_le_bytes(data[0..8].try_into()?); + let nanos = u32::from_le_bytes(data[8..12].try_into()?); + Ok(Time {secs, nanos}) + } +} + +impl Command { + pub fn ty(&self) -> CommandType { + match self { + Command::Subvol => CommandType::Subvol, + Command::Snapshot { clone_transid: _, clone_uuid: _, uuid: _, path: _, transid: _ } => CommandType::Snapshot, + Command::MkFile { path: _, inode: _ } => CommandType::MkFile, + Command::MkDir { path: _, inode: _ } => CommandType::MkDir, + Command::MkNod => CommandType::MkNod, + Command::MkFifo => CommandType::MkFifo, + Command::MkSock => CommandType::MkSock, + Command::Symlink { path: _, inode: _, link: _ } => CommandType::Symlink, + Command::Rename { from: _, to: _ } => CommandType::Rename, + Command::Link { path: _, link: _ } => CommandType::Link, + Command::Unlink { path: _ } => CommandType::Unlink, + Command::RmDir { path: _ } => CommandType::RmDir, + Command::SetXAttr => CommandType::SetXAttr, + Command::RemoveXAttr => CommandType::RemoveXAttr, + Command::Write { path: _, offset: _, data: _ } => CommandType::Write, + Command::Clone { path: _, offset: _, clone_path: _, clone_uuid: _, clone_len: _, clone_transid: _, clone_offset: _ } => CommandType::Clone, + Command::Truncate { path: _, size: _ } => CommandType::Truncate, + Command::Chmod { path: _, mode: _ } => CommandType::Chmod, + Command::Chown { path: _, uid: _, gid: _ } => CommandType::Chown, + Command::UTimes { path: _, atime: _, mtime: _, ctime: _ } => CommandType::UTimes, + Command::End => CommandType::End, + Command::UpdateExtent => CommandType::UpdateExtent, + Command::FAllocate => CommandType::FAllocate, + Command::FileAttr => CommandType::FileAttr, + Command::EncodedWrite => CommandType::EncodedWrite, + _ => CommandType::Unspec, + } + } + + pub fn path(&self) -> Option<&str> { + match self { + Command::Symlink { path, inode: _, link: _ } | + Command::Rename { from: path, to: _ } | + Command::Link { path , link: _ } | + Command::Unlink { path } | + Command::RmDir { path } | + Command::MkFile { path, inode: _ } | + Command::MkDir { path, inode: _ } => + Some(path), + _ => None, + } + } + + pub fn parse(data: &[u8]) -> Result<(Self, usize), Box> { + let len = u32::from_le_bytes(data[0..4].try_into()?) as usize; + let cmd = CommandType::try_from_primitive( + u16::from_le_bytes(data[4..6].try_into()?) + )?; + let _checksum = u32::from_le_bytes(data[6..10].try_into()?); + + let mut tlvs: HashMap = HashMap::new(); + + let mut inner_offset = 0; + while inner_offset < len { + let tlvtype = TLVType::try_from_primitive( + u16::from_le_bytes(data[inner_offset+10 .. inner_offset+12].try_into()?) + )?; + let tlvlen = u16::from_le_bytes(data[inner_offset+12 .. inner_offset+14].try_into()?) as usize; + + tlvs.insert(tlvtype, (inner_offset+14, inner_offset+14+tlvlen)); + + inner_offset += tlvlen + 4; + } + + let tlv = |ty: TLVType| -> Result<&[u8], Box> { + let (s, e) = *tlvs.get(&ty).ok_or(format!("Command of type {cmd:?} needs a TLV of type {ty:?}"))?; + Ok(&data[s..e]) + }; + + let result = match cmd { +// CommandType::Subvol => Command::Subvol, + CommandType::Snapshot => Command::Snapshot { + clone_transid: u64::parse_tlv_data(tlv(TLVType::CloneCTransID)?)?, + clone_uuid: UUID::parse_tlv_data(tlv(TLVType::CloneUUID)?)?, + uuid: UUID::parse_tlv_data(tlv(TLVType::UUID)?)?, + path: String::parse_tlv_data(tlv(TLVType::Path)?)?, + transid: u64::parse_tlv_data(tlv(TLVType::CTransID)?)?, + }, + CommandType::MkFile => Command::MkFile { + path: String::parse_tlv_data(tlv(TLVType::Path)?)?, + inode: u64::parse_tlv_data(tlv(TLVType::Ino)?)?, + }, + CommandType::MkDir => Command::MkDir { + path: String::parse_tlv_data(tlv(TLVType::Path)?)?, + inode: u64::parse_tlv_data(tlv(TLVType::Ino)?)?, + }, +// CommandType::MkNod => Command::MkNod, +// CommandType::MkFifo => Command::MkFifo, +// CommandType::MkSock => Command::MkSock, + CommandType::Symlink => Command::Symlink { + path: String::parse_tlv_data(tlv(TLVType::Path)?)?, + inode: u64::parse_tlv_data(tlv(TLVType::Ino)?)?, + link: String::parse_tlv_data(tlv(TLVType::PathLink)?)?, + }, + CommandType::Rename => Command::Rename { + from: String::parse_tlv_data(tlv(TLVType::Path)?)?, + to: String::parse_tlv_data(tlv(TLVType::PathTo)?)?, + }, + CommandType::Link => Command::Link { + path: String::parse_tlv_data(tlv(TLVType::Path)?)?, + link: String::parse_tlv_data(tlv(TLVType::PathLink)?)?, + }, + CommandType::Unlink => Command::Unlink { + path: String::parse_tlv_data(tlv(TLVType::Path)?)?, + }, + CommandType::RmDir => Command::RmDir { + path: String::parse_tlv_data(tlv(TLVType::Path)?)?, + }, +// CommandType::SetXAttr => Command::SetXAttr, +// CommandType::RemoveXAttr => Command::RemoveXAttr, + CommandType::Write => Command::Write { + path: String::parse_tlv_data(tlv(TLVType::Path)?)?, + offset: u64::parse_tlv_data(tlv(TLVType::FileOffset)?)?, + data: tlv(TLVType::Data)?.to_vec(), + }, + CommandType::Clone => Command::Clone { + path: String::parse_tlv_data(tlv(TLVType::Path)?)?, + offset: u64::parse_tlv_data(tlv(TLVType::FileOffset)?)?, + clone_path: String::parse_tlv_data(tlv(TLVType::ClonePath)?)?, + clone_uuid: UUID::parse_tlv_data(tlv(TLVType::CloneUUID)?)?, + clone_len: u64::parse_tlv_data(tlv(TLVType::CloneLen)?)?, + clone_transid: u64::parse_tlv_data(tlv(TLVType::CloneCTransID)?)?, + clone_offset: u64::parse_tlv_data(tlv(TLVType::CloneOffset)?)?, + }, + CommandType::Truncate => Command::Truncate { + path: String::parse_tlv_data(tlv(TLVType::Path)?)?, + size: u64::parse_tlv_data(tlv(TLVType::Size)?)?, + }, + CommandType::Chmod => Command::Chmod { + path: String::parse_tlv_data(tlv(TLVType::Path)?)?, + mode: u64::parse_tlv_data(tlv(TLVType::Mode)?)?, + }, + CommandType::Chown => Command::Chown { + path: String::parse_tlv_data(tlv(TLVType::Path)?)?, + uid: u64::parse_tlv_data(tlv(TLVType::UID)?)?, + gid: u64::parse_tlv_data(tlv(TLVType::GID)?)?, + }, + CommandType::UTimes => Command::UTimes { + path: String::parse_tlv_data(tlv(TLVType::Path)?)?, + atime: Time::parse_tlv_data(tlv(TLVType::ATime)?)?, + mtime: Time::parse_tlv_data(tlv(TLVType::MTime)?)?, + ctime: Time::parse_tlv_data(tlv(TLVType::CTime)?)?, + }, + CommandType::End => Command::End, +// CommandType::UpdateExtent => Command::UpdateExtent, +// CommandType::FAllocate => Command::FAllocate, +// CommandType::FileAttr => Command::FileAttr, +// CommandType::EncodedWrite => Command::EncodedWrite, + _ => return Err(format!("Command type {cmd:?} not implemented! tlvdata = {tlvs:?}").into()), + }; + + Ok((result, len+10)) + } + +}