Optimize the Store::entries() interface
The previous iterator was implemented to simply fetch _all_ pathes from the filesystem, no matter what. With this implementation, this changes. The iterator now has functionality to optimize the iteration, if only a subdirectory of the store is required, for example `$STORE/foo`. This is done via functionality where the underlying iterator gets altered. First of all, the interface was changed to return a `Entries` object, which itself only covers the libimagstore-internal `PathIterator` type. This type was changed so that the backend implementation provides an "PathIterBuilder`, which builds the actual iterator object for the `PathIterator` type. The intermediate `StoreIdConstructingIterator` was merged into `PathIterator` for simplicity. The `Entries` type got functionality similar to the `StoreIdIteratorWithStore` type for easier transition to the new API. This should probably be removed at a later point, though. As the `walkdir::WalkDir` type is not as nice as it could be, iterators for two collections in the store could be built like this (untested): store .entries()? .in_collection("foo") .chain(store.entries()?.in_collection("bar")) Functionality to exclude subdirectories is not possible with the current `walkdir::WalkDir` implementation and has to be done during iteration, with filtering (as usual). Signed-off-by: Matthias Beyer <mail@beyermatthias.de>
This commit is contained in:
parent
d59dca1a23
commit
d4872f6da3
9 changed files with 177 additions and 50 deletions
|
@ -20,6 +20,7 @@
|
|||
use std::fs::{File, OpenOptions, create_dir_all, remove_file, copy, rename};
|
||||
use std::io::{Seek, SeekFrom, Read};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use error::{StoreError as SE, StoreErrorKind as SEK};
|
||||
use error::ResultExt;
|
||||
|
@ -30,6 +31,9 @@ use super::Drain;
|
|||
use store::Entry;
|
||||
use storeid::StoreId;
|
||||
use file_abstraction::iter::PathIterator;
|
||||
use file_abstraction::iter::PathIterBuilder;
|
||||
|
||||
use walkdir::WalkDir;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FSFileAbstractionInstance(PathBuf);
|
||||
|
@ -133,18 +137,34 @@ impl FileAbstraction for FSFileAbstraction {
|
|||
})
|
||||
}
|
||||
|
||||
fn pathes_recursively(&self, basepath: PathBuf) -> Result<PathIterator, SE> {
|
||||
use walkdir::WalkDir;
|
||||
fn pathes_recursively(&self,
|
||||
basepath: PathBuf,
|
||||
storepath: PathBuf,
|
||||
backend: Arc<FileAbstraction>)
|
||||
-> Result<PathIterator, SE>
|
||||
{
|
||||
trace!("Building PathIterator object");
|
||||
Ok(PathIterator::new(Box::new(WalkDirPathIterBuilder { basepath }), storepath, backend))
|
||||
}
|
||||
}
|
||||
|
||||
let i = WalkDir::new(basepath)
|
||||
pub(crate) struct WalkDirPathIterBuilder {
|
||||
basepath: PathBuf
|
||||
}
|
||||
|
||||
impl PathIterBuilder for WalkDirPathIterBuilder {
|
||||
fn build_iter(&self) -> Box<Iterator<Item = Result<PathBuf, SE>>> {
|
||||
Box::new(WalkDir::new(self.basepath.clone())
|
||||
.min_depth(1)
|
||||
.max_open(100)
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
r.map(|e| PathBuf::from(e.path())).chain_err(|| SE::from_kind(SEK::FileError))
|
||||
});
|
||||
}))
|
||||
}
|
||||
|
||||
Ok(PathIterator::new(Box::new(i)))
|
||||
fn in_collection(&mut self, c: &str) {
|
||||
self.basepath.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ use super::Drain;
|
|||
use store::Entry;
|
||||
use storeid::StoreId;
|
||||
use file_abstraction::iter::PathIterator;
|
||||
use file_abstraction::iter::PathIterBuilder;
|
||||
|
||||
type Backend = Arc<Mutex<RefCell<HashMap<PathBuf, Entry>>>>;
|
||||
|
||||
|
@ -181,9 +182,9 @@ impl FileAbstraction for InMemoryFileAbstraction {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn pathes_recursively(&self, _basepath: PathBuf) -> Result<PathIterator, SE> {
|
||||
debug!("Getting all pathes");
|
||||
let keys : Vec<Result<PathBuf, SE>> = self
|
||||
fn pathes_recursively(&self, _basepath: PathBuf, storepath: PathBuf, backend: Arc<FileAbstraction>) -> Result<PathIterator, SE> {
|
||||
trace!("Building PathIterator object (inmemory implementation)");
|
||||
let keys : Vec<PathBuf> = self
|
||||
.backend()
|
||||
.lock()
|
||||
.map_err(|_| SE::from_kind(SEK::FileError))?
|
||||
|
@ -191,9 +192,21 @@ impl FileAbstraction for InMemoryFileAbstraction {
|
|||
.keys()
|
||||
.map(PathBuf::from)
|
||||
.map(Ok)
|
||||
.collect(); // we have to collect() because of the lock() above.
|
||||
.collect::<Result<_, SE>>()?; // we have to collect() because of the lock() above.
|
||||
|
||||
Ok(PathIterator::new(Box::new(keys.into_iter())))
|
||||
Ok(PathIterator::new(Box::new(InMemPathIterBuilder(keys)), storepath, backend))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct InMemPathIterBuilder(Vec<PathBuf>);
|
||||
|
||||
impl PathIterBuilder for InMemPathIterBuilder {
|
||||
fn build_iter(&self) -> Box<Iterator<Item = Result<PathBuf, SE>>> {
|
||||
Box::new(self.0.clone().into_iter().map(Ok))
|
||||
}
|
||||
|
||||
fn in_collection(&mut self, c: &str) {
|
||||
self.0.retain(|p| p.starts_with(c));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -24,48 +24,63 @@ use error::Result;
|
|||
use storeid::StoreId;
|
||||
use file_abstraction::FileAbstraction;
|
||||
|
||||
/// See documentation for PathIterator
|
||||
pub trait PathIterBuilder {
|
||||
fn build_iter(&self) -> Box<Iterator<Item = Result<PathBuf>>>;
|
||||
fn in_collection(&mut self, c: &str);
|
||||
}
|
||||
|
||||
/// A wrapper for an iterator over `PathBuf`s
|
||||
pub struct PathIterator(Box<Iterator<Item = Result<PathBuf>>>);
|
||||
///
|
||||
/// As the backend defines how "iterating over all entries" is implemented, this type holds a
|
||||
/// `PathIterBuilder` internally. This type is used to create new iterator instances every time the
|
||||
/// "settings" for how the iterator behaves are changed. This basically means: If the PathIterator
|
||||
/// is requested to not iterate over a directory "a" but rather its subdirectory "a/b", the
|
||||
/// implementation asks the `PathIterBuilder` to create a new iterator for that.
|
||||
///
|
||||
/// The `PathIterBuilder` can then yield a new iterator instance which is optimized for the new
|
||||
/// requirements (which basically means: Construct a new WalkDir object which does traverse the
|
||||
/// subdirectory instead of the parent).
|
||||
///
|
||||
/// This means quite a few allocations down the road, as the PathIterator itself is not generic, but
|
||||
/// this seems to be the best way to implement this.
|
||||
pub struct PathIterator {
|
||||
iter_builder: Box<PathIterBuilder>,
|
||||
iter: Box<Iterator<Item = Result<PathBuf>>>,
|
||||
storepath: PathBuf,
|
||||
backend: Arc<FileAbstraction>,
|
||||
}
|
||||
|
||||
impl PathIterator {
|
||||
|
||||
pub fn new(iter: Box<Iterator<Item = Result<PathBuf>>>) -> PathIterator {
|
||||
PathIterator(iter)
|
||||
pub fn new(iter_builder: Box<PathIterBuilder>,
|
||||
storepath: PathBuf,
|
||||
backend: Arc<FileAbstraction>)
|
||||
-> PathIterator
|
||||
{
|
||||
trace!("Generating iterator object with PathIterBuilder");
|
||||
let iter = iter_builder.build_iter();
|
||||
PathIterator { iter_builder, iter, storepath, backend }
|
||||
}
|
||||
|
||||
pub fn store_id_constructing(self, storepath: PathBuf, backend: Arc<FileAbstraction>)
|
||||
-> StoreIdConstructingIterator
|
||||
{
|
||||
StoreIdConstructingIterator(self, storepath, backend)
|
||||
pub fn in_collection(mut self, c: &str) -> Self {
|
||||
trace!("Generating iterator object for collection: {}", c);
|
||||
self.iter_builder.in_collection(c);
|
||||
self
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl Iterator for PathIterator {
|
||||
type Item = Result<PathBuf>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.0.next()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/// Helper type for constructing StoreIds from a PathIterator.
|
||||
///
|
||||
/// Automatically ignores non-files.
|
||||
pub struct StoreIdConstructingIterator(PathIterator, PathBuf, Arc<FileAbstraction>);
|
||||
|
||||
impl Iterator for StoreIdConstructingIterator {
|
||||
type Item = Result<StoreId>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
while let Some(next) = self.0.next() {
|
||||
while let Some(next) = self.iter.next() {
|
||||
match next {
|
||||
Err(e) => return Some(Err(e)),
|
||||
Ok(next) => match self.2.is_file(&next) {
|
||||
Ok(next) => match self.backend.is_file(&next) {
|
||||
Err(e) => return Some(Err(e)),
|
||||
Ok(true) => return Some(StoreId::from_full_path(&self.1, next)),
|
||||
Ok(true) => return Some(StoreId::from_full_path(&self.storepath, next)),
|
||||
Ok(false) => { continue },
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
use std::path::PathBuf;
|
||||
use std::fmt::Debug;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use error::StoreError as SE;
|
||||
use store::Entry;
|
||||
|
@ -50,7 +51,7 @@ pub trait FileAbstraction : Debug {
|
|||
fn drain(&self) -> Result<Drain, SE>;
|
||||
fn fill<'a>(&'a mut self, d: Drain) -> Result<(), SE>;
|
||||
|
||||
fn pathes_recursively(&self, basepath: PathBuf) -> Result<PathIterator, SE>;
|
||||
fn pathes_recursively(&self, basepath: PathBuf, storepath: PathBuf, backend: Arc<FileAbstraction>) -> Result<PathIterator, SE>;
|
||||
}
|
||||
|
||||
/// An abstraction trait over actions on files
|
||||
|
|
|
@ -144,3 +144,81 @@ mod compile_test {
|
|||
}
|
||||
}
|
||||
|
||||
use storeid::StoreId;
|
||||
use storeid::StoreIdIterator;
|
||||
use self::delete::StoreDeleteIterator;
|
||||
use self::get::StoreGetIterator;
|
||||
use self::retrieve::StoreRetrieveIterator;
|
||||
use file_abstraction::iter::PathIterator;
|
||||
use store::Store;
|
||||
use error::StoreError;
|
||||
use error::Result;
|
||||
|
||||
/// Iterator for iterating over all (or a subset of all) entries
|
||||
///
|
||||
/// The iterator now has functionality to optimize the iteration, if only a subdirectory of the
|
||||
/// store is required, for example `$STORE/foo`.
|
||||
///
|
||||
/// This is done via functionality where the underlying iterator gets
|
||||
/// altered.
|
||||
///
|
||||
/// As the (for the filesystem backend underlying) `walkdir::WalkDir` type is not as nice as it
|
||||
/// could be, iterating over two subdirectories with one iterator is not possible. Thus, iterators
|
||||
/// for two collections in the store should be build like this (untested):
|
||||
///
|
||||
/// ```ignore
|
||||
/// store
|
||||
/// .entries()?
|
||||
/// .in_collection("foo")
|
||||
/// .chain(store.entries()?.in_collection("bar"))
|
||||
/// ```
|
||||
///
|
||||
/// Functionality to exclude subdirectories is not possible with the current implementation and has
|
||||
/// to be done during iteration, with filtering (as usual).
|
||||
pub struct Entries<'a>(PathIterator, &'a Store);
|
||||
|
||||
impl<'a> Entries<'a> {
|
||||
|
||||
pub(crate) fn new(pi: PathIterator, store: &'a Store) -> Self {
|
||||
Entries(pi, store)
|
||||
}
|
||||
|
||||
pub fn in_collection(self, c: &str) -> Self {
|
||||
Entries(self.0.in_collection(c), self.1)
|
||||
}
|
||||
|
||||
pub fn without_store(self) -> StoreIdIterator {
|
||||
StoreIdIterator::new(Box::new(self.0))
|
||||
}
|
||||
|
||||
/// Transform the iterator into a StoreDeleteIterator
|
||||
///
|
||||
/// This immitates the API from `libimagstore::iter`.
|
||||
pub fn into_delete_iter(self) -> StoreDeleteIterator<'a, StoreError> {
|
||||
StoreDeleteIterator::new(Box::new(self.0), self.1)
|
||||
}
|
||||
|
||||
/// Transform the iterator into a StoreGetIterator
|
||||
///
|
||||
/// This immitates the API from `libimagstore::iter`.
|
||||
pub fn into_get_iter(self) -> StoreGetIterator<'a, StoreError> {
|
||||
StoreGetIterator::new(Box::new(self.0), self.1)
|
||||
}
|
||||
|
||||
/// Transform the iterator into a StoreRetrieveIterator
|
||||
///
|
||||
/// This immitates the API from `libimagstore::iter`.
|
||||
pub fn into_retrieve_iter(self) -> StoreRetrieveIterator<'a, StoreError> {
|
||||
StoreRetrieveIterator::new(Box::new(self.0), self.1)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Entries<'a> {
|
||||
type Item = Result<StoreId>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.0.next()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -37,7 +37,8 @@ use toml_query::read::TomlValueReadTypeExt;
|
|||
|
||||
use error::{StoreError as SE, StoreErrorKind as SEK};
|
||||
use error::ResultExt;
|
||||
use storeid::{IntoStoreId, StoreId, StoreIdIteratorWithStore};
|
||||
use storeid::{IntoStoreId, StoreId};
|
||||
use iter::Entries;
|
||||
use file_abstraction::FileAbstractionInstance;
|
||||
|
||||
// We re-export the following things so tests can use them
|
||||
|
@ -642,12 +643,11 @@ impl Store {
|
|||
}
|
||||
|
||||
/// Get _all_ entries in the store (by id as iterator)
|
||||
pub fn entries(&self) -> Result<StoreIdIteratorWithStore> {
|
||||
pub fn entries<'a>(&'a self) -> Result<Entries<'a>> {
|
||||
trace!("Building 'Entries' iterator");
|
||||
self.backend
|
||||
.pathes_recursively(self.path().clone())
|
||||
.map(|i| i.store_id_constructing(self.path().clone(), self.backend.clone()))
|
||||
.map(Box::new)
|
||||
.map(|it| StoreIdIteratorWithStore::new(it, self))
|
||||
.pathes_recursively(self.path().clone(), self.path().clone(), self.backend.clone())
|
||||
.map(|i| Entries::new(i, self))
|
||||
}
|
||||
|
||||
/// Gets the path where this store is on the disk
|
||||
|
|
|
@ -48,11 +48,11 @@ pub trait HabitStore {
|
|||
impl HabitStore for Store {
|
||||
/// Get an iterator over all habits
|
||||
fn all_habit_templates(&self) -> Result<HabitTemplateStoreIdIterator> {
|
||||
self.entries().map(HabitTemplateStoreIdIterator::from).map_err(From::from)
|
||||
Ok(HabitTemplateStoreIdIterator::from(self.entries()?.without_store()))
|
||||
}
|
||||
|
||||
fn all_habit_instances(&self) -> Result<HabitInstanceStoreIdIterator> {
|
||||
self.entries().map(HabitInstanceStoreIdIterator::from).map_err(From::from)
|
||||
Ok(HabitInstanceStoreIdIterator::from(self.entries()?.without_store()))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,18 +17,18 @@
|
|||
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
//
|
||||
|
||||
use libimagstore::storeid::StoreIdIteratorWithStore;
|
||||
use libimagstore::iter::Entries;
|
||||
use libimagstore::store::Store;
|
||||
use libimagstore::store::Result as StoreResult;
|
||||
use libimagstore::store::FileLockEntry;
|
||||
|
||||
use constants::*;
|
||||
|
||||
pub struct TimeTrackingsGetIterator<'a>(StoreIdIteratorWithStore<'a>, &'a Store);
|
||||
pub struct TimeTrackingsGetIterator<'a>(Entries<'a>, &'a Store);
|
||||
|
||||
impl<'a> TimeTrackingsGetIterator<'a> {
|
||||
pub fn new(sit: StoreIdIteratorWithStore<'a>, store: &'a Store) -> Self {
|
||||
TimeTrackingsGetIterator(sit, store)
|
||||
pub fn new(entries: Entries<'a>, store: &'a Store) -> Self {
|
||||
TimeTrackingsGetIterator(entries, store)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -95,7 +95,7 @@ impl<'a, 'b> Wiki<'a, 'b> {
|
|||
|
||||
pub fn all_ids(&self) -> Result<WikiIdIterator> {
|
||||
let filter = IdIsInWikiFilter(self.1);
|
||||
Ok(WikiIdIterator(self.0.entries()?, filter))
|
||||
Ok(WikiIdIterator(self.0.entries()?.without_store().with_store(self.0), filter))
|
||||
}
|
||||
|
||||
pub fn delete_entry<EN: AsRef<str>>(&self, entry_name: EN) -> Result<()> {
|
||||
|
|
Loading…
Reference in a new issue