diff --git a/libimagref/src/error.rs b/libimagref/src/error.rs index fdd6314a..b0aaacf3 100644 --- a/libimagref/src/error.rs +++ b/libimagref/src/error.rs @@ -3,6 +3,7 @@ generate_error_module!( StoreReadError => "Store read error", StoreWriteError => "Store write error", IOError => "IO Error", + UTF8Error => "UTF8 Error", HeaderTypeError => "Header type error", HeaderFieldMissingError => "Header field missing error", HeaderFieldWriteError => "Header field cannot be written", diff --git a/libimagref/src/hasher.rs b/libimagref/src/hasher.rs new file mode 100644 index 00000000..fcdda5bd --- /dev/null +++ b/libimagref/src/hasher.rs @@ -0,0 +1,48 @@ +use std::path::PathBuf; +use std::io::Read; + +use error::RefErrorKind as REK; +use error::MapErrInto; + +use crypto::sha1::Sha1; +use crypto::digest::Digest; + +use result::Result; + +/// The Hasher trait is used to implement custom hashing functions for the ref library. +/// This means that one can define how the hash of a reference is constructed from the content of +/// the file to ref to. +pub trait Hasher { + + fn hash_name(&self) -> &'static str; + fn create_hash(&mut self, pb: &PathBuf, contents: &mut R) -> Result; + +} + +pub struct DefaultHasher { + hasher: Sha1, +} + +impl DefaultHasher { + + pub fn new() -> DefaultHasher { + DefaultHasher { hasher: Sha1::new() } + } + +} + +impl Hasher for DefaultHasher { + + fn hash_name(&self) -> &'static str { + "default" + } + + fn create_hash(&mut self, _: &PathBuf, c: &mut R) -> Result { + let mut s = String::new(); + try!(c.read_to_string(&mut s).map_err_into(REK::UTF8Error).map_err_into(REK::IOError)); + self.hasher.input_str(&s[..]); + Ok(self.hasher.result_str()) + } + +} + diff --git a/libimagref/src/hashers/mod.rs b/libimagref/src/hashers/mod.rs new file mode 100644 index 00000000..c5c49799 --- /dev/null +++ b/libimagref/src/hashers/mod.rs @@ -0,0 +1 @@ +pub mod nbytes; diff --git a/libimagref/src/hashers/nbytes.rs b/libimagref/src/hashers/nbytes.rs new file mode 100644 index 00000000..66af0297 --- /dev/null +++ b/libimagref/src/hashers/nbytes.rs @@ -0,0 +1,52 @@ +use std::io::Read; +use std::path::PathBuf; +use std::result::Result as RResult; + +use crypto::sha1::Sha1; +use crypto::digest::Digest; + +use libimagerror::into::IntoError; + +use hasher::Hasher; +use result::Result; +use error::RefErrorKind as REK; +use error::MapErrInto; + +pub struct NBytesHasher { + hasher: Sha1, + n: usize, +} + +impl NBytesHasher { + + pub fn new(n: usize) -> NBytesHasher { + NBytesHasher { + hasher: Sha1::new(), + n: n, + } + } + +} + +impl Hasher for NBytesHasher { + + fn hash_name(&self) -> &'static str { + "n-bytes-hasher" + } + + fn create_hash(&mut self, _: &PathBuf, contents: &mut R) -> Result { + let s = contents + .bytes() + .take(self.n) + .collect::, _>>() + .map_err_into(REK::IOError) + .and_then(|v| String::from_utf8(v).map_err_into(REK::IOError)) + .map_err(Box::new) + .map_err(|e| REK::UTF8Error.into_error_with_cause(e)) + .map_err_into(REK::IOError); + self.hasher.input_str(&try!(s)[..]); + Ok(self.hasher.result_str()) + } + +} + diff --git a/libimagref/src/lib.rs b/libimagref/src/lib.rs index eac8842c..bcdaf36d 100644 --- a/libimagref/src/lib.rs +++ b/libimagref/src/lib.rs @@ -29,6 +29,8 @@ module_entry_path_mod!("ref", "0.2.0"); pub mod error; pub mod flags; +pub mod hasher; +pub mod hashers; pub mod lister; pub mod reference; pub mod result; diff --git a/libimagref/src/reference.rs b/libimagref/src/reference.rs index 6e38aa50..577311a8 100644 --- a/libimagref/src/reference.rs +++ b/libimagref/src/reference.rs @@ -6,7 +6,6 @@ use std::ops::Deref; use std::ops::DerefMut; use std::collections::BTreeMap; use std::fs::File; -use std::io::Read; use std::fmt::{Display, Error as FmtError, Formatter}; use std::fs::Permissions; use std::result::Result as RResult; @@ -18,12 +17,11 @@ use libimagstore::store::Store; use libimagerror::into::IntoError; use toml::Value; -use crypto::sha1::Sha1; -use crypto::digest::Digest; use error::RefErrorKind as REK; use flags::RefFlags; use result::Result; +use hasher::*; use module_path::ModuleEntryPath; #[derive(Debug)] @@ -75,8 +73,9 @@ impl<'a> Ref<'a> { } } - /// Create a Ref object which refers to `pb` - pub fn create(store: &'a Store, pb: PathBuf, flags: RefFlags) -> Result> { + pub fn create_with_hasher(store: &'a Store, pb: PathBuf, flags: RefFlags, mut h: H) + -> Result> + { if !pb.exists() { return Err(REK::RefTargetDoesNotExist.into_error()); } @@ -93,7 +92,7 @@ impl<'a> Ref<'a> { // we hash the contents of the file and return (file, hash) .and_then(|mut file| { let opt_contenthash = if flags.get_content_hashing() { - Some(hash_file_contents(&mut file)) + Some(try!(h.create_hash(&pb, &mut file))) } else { None }; @@ -201,6 +200,11 @@ impl<'a> Ref<'a> { Ok(Ref(fle)) } + /// Create a Ref object which refers to `pb` + pub fn create(store: &'a Store, pb: PathBuf, flags: RefFlags) -> Result> { + Ref::create_with_hasher(store, pb, flags, DefaultHasher::new()) + } + /// Creates a Hash from a PathBuf by making the PathBuf absolute and then running a hash /// algorithm on it fn hash_path(pb: &PathBuf) -> Result { @@ -247,13 +251,20 @@ impl<'a> Ref<'a> { /// Get the hash of the link target by reading the link target and hashing the contents pub fn get_current_hash(&self) -> Result { + self.get_current_hash_with_hasher(DefaultHasher::new()) + } + + /// Get the hash of the link target by reading the link target and hashing the contents with the + /// custom hasher + pub fn get_current_hash_with_hasher(&self, mut h: H) -> Result { self.fs_file() .and_then(|pb| { - File::open(pb) + File::open(pb.clone()) + .map(|f| (pb, f)) .map_err(Box::new) .map_err(|e| REK::IOError.into_error_with_cause(e)) }) - .map(|mut file| hash_file_contents(&mut file)) + .and_then(|(path, mut file)| h.create_hash(&path, &mut file)) } /// Get the permissions of the file which are present @@ -433,6 +444,12 @@ impl<'a> Ref<'a> { /// /// This option causes heavy I/O as it recursively searches the Filesystem. pub fn refind(&self, search_roots: Option>) -> Result { + self.refind_with_hasher(search_roots, DefaultHasher::new()) + } + + pub fn refind_with_hasher(&self, search_roots: Option>, mut h: H) + -> Result + { use itertools::Itertools; use walkdir::WalkDir; @@ -456,7 +473,7 @@ impl<'a> Ref<'a> { .map_err(|e| REK::IOError.into_error_with_cause(e)) .map(|f| (pb, f)) }) - .map(|(path, mut file)| (path, hash_file_contents(&mut file))) + .and_then(|(p, mut f)| h.create_hash(&p, &mut f).map(|h| (p, h))) .map(|(path, hash)| { if hash == stored_hash { Some(path) @@ -518,11 +535,3 @@ impl<'a> Into> for Ref<'a> { } -fn hash_file_contents(f: &mut File) -> String { - let mut hasher = Sha1::new(); - let mut s = String::new(); - f.read_to_string(&mut s); - hasher.input_str(&s[..]); - hasher.result_str() -} -