Merge pull request #645 from matthiasbeyer/libimagref/custom-hash-functions

Libimagref/custom hash functions
This commit is contained in:
Matthias Beyer 2016-08-23 15:17:54 +02:00 committed by GitHub
commit 2d94b6c018
6 changed files with 130 additions and 17 deletions

View file

@ -3,6 +3,7 @@ generate_error_module!(
StoreReadError => "Store read error",
StoreWriteError => "Store write error",
IOError => "IO Error",
UTF8Error => "UTF8 Error",
HeaderTypeError => "Header type error",
HeaderFieldMissingError => "Header field missing error",
HeaderFieldWriteError => "Header field cannot be written",

48
libimagref/src/hasher.rs Normal file
View file

@ -0,0 +1,48 @@
use std::path::PathBuf;
use std::io::Read;
use error::RefErrorKind as REK;
use error::MapErrInto;
use crypto::sha1::Sha1;
use crypto::digest::Digest;
use result::Result;
/// The Hasher trait is used to implement custom hashing functions for the ref library.
/// This means that one can define how the hash of a reference is constructed from the content of
/// the file to ref to.
pub trait Hasher {
fn hash_name(&self) -> &'static str;
fn create_hash<R: Read>(&mut self, pb: &PathBuf, contents: &mut R) -> Result<String>;
}
pub struct DefaultHasher {
hasher: Sha1,
}
impl DefaultHasher {
pub fn new() -> DefaultHasher {
DefaultHasher { hasher: Sha1::new() }
}
}
impl Hasher for DefaultHasher {
fn hash_name(&self) -> &'static str {
"default"
}
fn create_hash<R: Read>(&mut self, _: &PathBuf, c: &mut R) -> Result<String> {
let mut s = String::new();
try!(c.read_to_string(&mut s).map_err_into(REK::UTF8Error).map_err_into(REK::IOError));
self.hasher.input_str(&s[..]);
Ok(self.hasher.result_str())
}
}

View file

@ -0,0 +1 @@
pub mod nbytes;

View file

@ -0,0 +1,52 @@
use std::io::Read;
use std::path::PathBuf;
use std::result::Result as RResult;
use crypto::sha1::Sha1;
use crypto::digest::Digest;
use libimagerror::into::IntoError;
use hasher::Hasher;
use result::Result;
use error::RefErrorKind as REK;
use error::MapErrInto;
pub struct NBytesHasher {
hasher: Sha1,
n: usize,
}
impl NBytesHasher {
pub fn new(n: usize) -> NBytesHasher {
NBytesHasher {
hasher: Sha1::new(),
n: n,
}
}
}
impl Hasher for NBytesHasher {
fn hash_name(&self) -> &'static str {
"n-bytes-hasher"
}
fn create_hash<R: Read>(&mut self, _: &PathBuf, contents: &mut R) -> Result<String> {
let s = contents
.bytes()
.take(self.n)
.collect::<RResult<Vec<u8>, _>>()
.map_err_into(REK::IOError)
.and_then(|v| String::from_utf8(v).map_err_into(REK::IOError))
.map_err(Box::new)
.map_err(|e| REK::UTF8Error.into_error_with_cause(e))
.map_err_into(REK::IOError);
self.hasher.input_str(&try!(s)[..]);
Ok(self.hasher.result_str())
}
}

View file

@ -29,6 +29,8 @@ module_entry_path_mod!("ref", "0.2.0");
pub mod error;
pub mod flags;
pub mod hasher;
pub mod hashers;
pub mod lister;
pub mod reference;
pub mod result;

View file

@ -6,7 +6,6 @@ use std::ops::Deref;
use std::ops::DerefMut;
use std::collections::BTreeMap;
use std::fs::File;
use std::io::Read;
use std::fmt::{Display, Error as FmtError, Formatter};
use std::fs::Permissions;
use std::result::Result as RResult;
@ -18,12 +17,11 @@ use libimagstore::store::Store;
use libimagerror::into::IntoError;
use toml::Value;
use crypto::sha1::Sha1;
use crypto::digest::Digest;
use error::RefErrorKind as REK;
use flags::RefFlags;
use result::Result;
use hasher::*;
use module_path::ModuleEntryPath;
#[derive(Debug)]
@ -75,8 +73,9 @@ impl<'a> Ref<'a> {
}
}
/// Create a Ref object which refers to `pb`
pub fn create(store: &'a Store, pb: PathBuf, flags: RefFlags) -> Result<Ref<'a>> {
pub fn create_with_hasher<H: Hasher>(store: &'a Store, pb: PathBuf, flags: RefFlags, mut h: H)
-> Result<Ref<'a>>
{
if !pb.exists() {
return Err(REK::RefTargetDoesNotExist.into_error());
}
@ -93,7 +92,7 @@ impl<'a> Ref<'a> {
// we hash the contents of the file and return (file, hash)
.and_then(|mut file| {
let opt_contenthash = if flags.get_content_hashing() {
Some(hash_file_contents(&mut file))
Some(try!(h.create_hash(&pb, &mut file)))
} else {
None
};
@ -201,6 +200,11 @@ impl<'a> Ref<'a> {
Ok(Ref(fle))
}
/// Create a Ref object which refers to `pb`
pub fn create(store: &'a Store, pb: PathBuf, flags: RefFlags) -> Result<Ref<'a>> {
Ref::create_with_hasher(store, pb, flags, DefaultHasher::new())
}
/// Creates a Hash from a PathBuf by making the PathBuf absolute and then running a hash
/// algorithm on it
fn hash_path(pb: &PathBuf) -> Result<String> {
@ -247,13 +251,20 @@ impl<'a> Ref<'a> {
/// Get the hash of the link target by reading the link target and hashing the contents
pub fn get_current_hash(&self) -> Result<String> {
self.get_current_hash_with_hasher(DefaultHasher::new())
}
/// Get the hash of the link target by reading the link target and hashing the contents with the
/// custom hasher
pub fn get_current_hash_with_hasher<H: Hasher>(&self, mut h: H) -> Result<String> {
self.fs_file()
.and_then(|pb| {
File::open(pb)
File::open(pb.clone())
.map(|f| (pb, f))
.map_err(Box::new)
.map_err(|e| REK::IOError.into_error_with_cause(e))
})
.map(|mut file| hash_file_contents(&mut file))
.and_then(|(path, mut file)| h.create_hash(&path, &mut file))
}
/// Get the permissions of the file which are present
@ -433,6 +444,12 @@ impl<'a> Ref<'a> {
///
/// This option causes heavy I/O as it recursively searches the Filesystem.
pub fn refind(&self, search_roots: Option<Vec<PathBuf>>) -> Result<PathBuf> {
self.refind_with_hasher(search_roots, DefaultHasher::new())
}
pub fn refind_with_hasher<H: Hasher>(&self, search_roots: Option<Vec<PathBuf>>, mut h: H)
-> Result<PathBuf>
{
use itertools::Itertools;
use walkdir::WalkDir;
@ -456,7 +473,7 @@ impl<'a> Ref<'a> {
.map_err(|e| REK::IOError.into_error_with_cause(e))
.map(|f| (pb, f))
})
.map(|(path, mut file)| (path, hash_file_contents(&mut file)))
.and_then(|(p, mut f)| h.create_hash(&p, &mut f).map(|h| (p, h)))
.map(|(path, hash)| {
if hash == stored_hash {
Some(path)
@ -518,11 +535,3 @@ impl<'a> Into<FileLockEntry<'a>> for Ref<'a> {
}
fn hash_file_contents(f: &mut File) -> String {
let mut hasher = Sha1::new();
let mut s = String::new();
f.read_to_string(&mut s);
hasher.input_str(&s[..]);
hasher.result_str()
}