Optimize the Store::entries() interface

The previous iterator was implemented to simply fetch _all_ pathes from
the filesystem, no matter what.

With this implementation, this changes. The iterator now has
functionality to optimize the iteration, if only a subdirectory of the
store is required, for example `$STORE/foo`.

This is done via functionality where the underlying iterator gets
altered.

First of all, the interface was changed to return a `Entries` object,
which itself only covers the libimagstore-internal `PathIterator` type.
This type was changed so that the backend implementation provides an
"PathIterBuilder`, which builds the actual iterator object for the
`PathIterator` type.

The intermediate `StoreIdConstructingIterator` was merged into
`PathIterator` for simplicity.

The `Entries` type got functionality similar to the
`StoreIdIteratorWithStore` type for easier transition to the new API.
This should probably be removed at a later point, though.

As the `walkdir::WalkDir` type is not as nice as it could be, iterators
for two collections in the store could be built like this (untested):

    store
        .entries()?
        .in_collection("foo")
        .chain(store.entries()?.in_collection("bar"))

Functionality to exclude subdirectories is not possible with the current
`walkdir::WalkDir` implementation and has to be done during iteration,
with filtering (as usual).

Signed-off-by: Matthias Beyer <mail@beyermatthias.de>
This commit is contained in:
Matthias Beyer 2018-06-07 00:29:52 +02:00
parent d59dca1a23
commit d4872f6da3
9 changed files with 177 additions and 50 deletions

View file

@ -20,6 +20,7 @@
use std::fs::{File, OpenOptions, create_dir_all, remove_file, copy, rename};
use std::io::{Seek, SeekFrom, Read};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use error::{StoreError as SE, StoreErrorKind as SEK};
use error::ResultExt;
@ -30,6 +31,9 @@ use super::Drain;
use store::Entry;
use storeid::StoreId;
use file_abstraction::iter::PathIterator;
use file_abstraction::iter::PathIterBuilder;
use walkdir::WalkDir;
#[derive(Debug)]
pub struct FSFileAbstractionInstance(PathBuf);
@ -133,18 +137,34 @@ impl FileAbstraction for FSFileAbstraction {
})
}
fn pathes_recursively(&self, basepath: PathBuf) -> Result<PathIterator, SE> {
use walkdir::WalkDir;
fn pathes_recursively(&self,
basepath: PathBuf,
storepath: PathBuf,
backend: Arc<FileAbstraction>)
-> Result<PathIterator, SE>
{
trace!("Building PathIterator object");
Ok(PathIterator::new(Box::new(WalkDirPathIterBuilder { basepath }), storepath, backend))
}
}
let i = WalkDir::new(basepath)
pub(crate) struct WalkDirPathIterBuilder {
basepath: PathBuf
}
impl PathIterBuilder for WalkDirPathIterBuilder {
fn build_iter(&self) -> Box<Iterator<Item = Result<PathBuf, SE>>> {
Box::new(WalkDir::new(self.basepath.clone())
.min_depth(1)
.max_open(100)
.into_iter()
.map(|r| {
r.map(|e| PathBuf::from(e.path())).chain_err(|| SE::from_kind(SEK::FileError))
});
}))
}
Ok(PathIterator::new(Box::new(i)))
fn in_collection(&mut self, c: &str) {
self.basepath.push(c);
}
}

View file

@ -33,6 +33,7 @@ use super::Drain;
use store::Entry;
use storeid::StoreId;
use file_abstraction::iter::PathIterator;
use file_abstraction::iter::PathIterBuilder;
type Backend = Arc<Mutex<RefCell<HashMap<PathBuf, Entry>>>>;
@ -181,9 +182,9 @@ impl FileAbstraction for InMemoryFileAbstraction {
Ok(())
}
fn pathes_recursively(&self, _basepath: PathBuf) -> Result<PathIterator, SE> {
debug!("Getting all pathes");
let keys : Vec<Result<PathBuf, SE>> = self
fn pathes_recursively(&self, _basepath: PathBuf, storepath: PathBuf, backend: Arc<FileAbstraction>) -> Result<PathIterator, SE> {
trace!("Building PathIterator object (inmemory implementation)");
let keys : Vec<PathBuf> = self
.backend()
.lock()
.map_err(|_| SE::from_kind(SEK::FileError))?
@ -191,9 +192,21 @@ impl FileAbstraction for InMemoryFileAbstraction {
.keys()
.map(PathBuf::from)
.map(Ok)
.collect(); // we have to collect() because of the lock() above.
.collect::<Result<_, SE>>()?; // we have to collect() because of the lock() above.
Ok(PathIterator::new(Box::new(keys.into_iter())))
Ok(PathIterator::new(Box::new(InMemPathIterBuilder(keys)), storepath, backend))
}
}
pub(crate) struct InMemPathIterBuilder(Vec<PathBuf>);
impl PathIterBuilder for InMemPathIterBuilder {
fn build_iter(&self) -> Box<Iterator<Item = Result<PathBuf, SE>>> {
Box::new(self.0.clone().into_iter().map(Ok))
}
fn in_collection(&mut self, c: &str) {
self.0.retain(|p| p.starts_with(c));
}
}

View file

@ -24,48 +24,63 @@ use error::Result;
use storeid::StoreId;
use file_abstraction::FileAbstraction;
/// See documentation for PathIterator
pub trait PathIterBuilder {
fn build_iter(&self) -> Box<Iterator<Item = Result<PathBuf>>>;
fn in_collection(&mut self, c: &str);
}
/// A wrapper for an iterator over `PathBuf`s
pub struct PathIterator(Box<Iterator<Item = Result<PathBuf>>>);
///
/// As the backend defines how "iterating over all entries" is implemented, this type holds a
/// `PathIterBuilder` internally. This type is used to create new iterator instances every time the
/// "settings" for how the iterator behaves are changed. This basically means: If the PathIterator
/// is requested to not iterate over a directory "a" but rather its subdirectory "a/b", the
/// implementation asks the `PathIterBuilder` to create a new iterator for that.
///
/// The `PathIterBuilder` can then yield a new iterator instance which is optimized for the new
/// requirements (which basically means: Construct a new WalkDir object which does traverse the
/// subdirectory instead of the parent).
///
/// This means quite a few allocations down the road, as the PathIterator itself is not generic, but
/// this seems to be the best way to implement this.
pub struct PathIterator {
iter_builder: Box<PathIterBuilder>,
iter: Box<Iterator<Item = Result<PathBuf>>>,
storepath: PathBuf,
backend: Arc<FileAbstraction>,
}
impl PathIterator {
pub fn new(iter: Box<Iterator<Item = Result<PathBuf>>>) -> PathIterator {
PathIterator(iter)
pub fn new(iter_builder: Box<PathIterBuilder>,
storepath: PathBuf,
backend: Arc<FileAbstraction>)
-> PathIterator
{
trace!("Generating iterator object with PathIterBuilder");
let iter = iter_builder.build_iter();
PathIterator { iter_builder, iter, storepath, backend }
}
pub fn store_id_constructing(self, storepath: PathBuf, backend: Arc<FileAbstraction>)
-> StoreIdConstructingIterator
{
StoreIdConstructingIterator(self, storepath, backend)
pub fn in_collection(mut self, c: &str) -> Self {
trace!("Generating iterator object for collection: {}", c);
self.iter_builder.in_collection(c);
self
}
}
impl Iterator for PathIterator {
type Item = Result<PathBuf>;
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
}
/// Helper type for constructing StoreIds from a PathIterator.
///
/// Automatically ignores non-files.
pub struct StoreIdConstructingIterator(PathIterator, PathBuf, Arc<FileAbstraction>);
impl Iterator for StoreIdConstructingIterator {
type Item = Result<StoreId>;
fn next(&mut self) -> Option<Self::Item> {
while let Some(next) = self.0.next() {
while let Some(next) = self.iter.next() {
match next {
Err(e) => return Some(Err(e)),
Ok(next) => match self.2.is_file(&next) {
Ok(next) => match self.backend.is_file(&next) {
Err(e) => return Some(Err(e)),
Ok(true) => return Some(StoreId::from_full_path(&self.1, next)),
Ok(true) => return Some(StoreId::from_full_path(&self.storepath, next)),
Ok(false) => { continue },
}
}

View file

@ -20,6 +20,7 @@
use std::path::PathBuf;
use std::fmt::Debug;
use std::collections::HashMap;
use std::sync::Arc;
use error::StoreError as SE;
use store::Entry;
@ -50,7 +51,7 @@ pub trait FileAbstraction : Debug {
fn drain(&self) -> Result<Drain, SE>;
fn fill<'a>(&'a mut self, d: Drain) -> Result<(), SE>;
fn pathes_recursively(&self, basepath: PathBuf) -> Result<PathIterator, SE>;
fn pathes_recursively(&self, basepath: PathBuf, storepath: PathBuf, backend: Arc<FileAbstraction>) -> Result<PathIterator, SE>;
}
/// An abstraction trait over actions on files

View file

@ -144,3 +144,81 @@ mod compile_test {
}
}
use storeid::StoreId;
use storeid::StoreIdIterator;
use self::delete::StoreDeleteIterator;
use self::get::StoreGetIterator;
use self::retrieve::StoreRetrieveIterator;
use file_abstraction::iter::PathIterator;
use store::Store;
use error::StoreError;
use error::Result;
/// Iterator for iterating over all (or a subset of all) entries
///
/// The iterator now has functionality to optimize the iteration, if only a subdirectory of the
/// store is required, for example `$STORE/foo`.
///
/// This is done via functionality where the underlying iterator gets
/// altered.
///
/// As the (for the filesystem backend underlying) `walkdir::WalkDir` type is not as nice as it
/// could be, iterating over two subdirectories with one iterator is not possible. Thus, iterators
/// for two collections in the store should be build like this (untested):
///
/// ```ignore
/// store
/// .entries()?
/// .in_collection("foo")
/// .chain(store.entries()?.in_collection("bar"))
/// ```
///
/// Functionality to exclude subdirectories is not possible with the current implementation and has
/// to be done during iteration, with filtering (as usual).
pub struct Entries<'a>(PathIterator, &'a Store);
impl<'a> Entries<'a> {
pub(crate) fn new(pi: PathIterator, store: &'a Store) -> Self {
Entries(pi, store)
}
pub fn in_collection(self, c: &str) -> Self {
Entries(self.0.in_collection(c), self.1)
}
pub fn without_store(self) -> StoreIdIterator {
StoreIdIterator::new(Box::new(self.0))
}
/// Transform the iterator into a StoreDeleteIterator
///
/// This immitates the API from `libimagstore::iter`.
pub fn into_delete_iter(self) -> StoreDeleteIterator<'a, StoreError> {
StoreDeleteIterator::new(Box::new(self.0), self.1)
}
/// Transform the iterator into a StoreGetIterator
///
/// This immitates the API from `libimagstore::iter`.
pub fn into_get_iter(self) -> StoreGetIterator<'a, StoreError> {
StoreGetIterator::new(Box::new(self.0), self.1)
}
/// Transform the iterator into a StoreRetrieveIterator
///
/// This immitates the API from `libimagstore::iter`.
pub fn into_retrieve_iter(self) -> StoreRetrieveIterator<'a, StoreError> {
StoreRetrieveIterator::new(Box::new(self.0), self.1)
}
}
impl<'a> Iterator for Entries<'a> {
type Item = Result<StoreId>;
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
}

View file

@ -37,7 +37,8 @@ use toml_query::read::TomlValueReadTypeExt;
use error::{StoreError as SE, StoreErrorKind as SEK};
use error::ResultExt;
use storeid::{IntoStoreId, StoreId, StoreIdIteratorWithStore};
use storeid::{IntoStoreId, StoreId};
use iter::Entries;
use file_abstraction::FileAbstractionInstance;
// We re-export the following things so tests can use them
@ -642,12 +643,11 @@ impl Store {
}
/// Get _all_ entries in the store (by id as iterator)
pub fn entries(&self) -> Result<StoreIdIteratorWithStore> {
pub fn entries<'a>(&'a self) -> Result<Entries<'a>> {
trace!("Building 'Entries' iterator");
self.backend
.pathes_recursively(self.path().clone())
.map(|i| i.store_id_constructing(self.path().clone(), self.backend.clone()))
.map(Box::new)
.map(|it| StoreIdIteratorWithStore::new(it, self))
.pathes_recursively(self.path().clone(), self.path().clone(), self.backend.clone())
.map(|i| Entries::new(i, self))
}
/// Gets the path where this store is on the disk

View file

@ -48,11 +48,11 @@ pub trait HabitStore {
impl HabitStore for Store {
/// Get an iterator over all habits
fn all_habit_templates(&self) -> Result<HabitTemplateStoreIdIterator> {
self.entries().map(HabitTemplateStoreIdIterator::from).map_err(From::from)
Ok(HabitTemplateStoreIdIterator::from(self.entries()?.without_store()))
}
fn all_habit_instances(&self) -> Result<HabitInstanceStoreIdIterator> {
self.entries().map(HabitInstanceStoreIdIterator::from).map_err(From::from)
Ok(HabitInstanceStoreIdIterator::from(self.entries()?.without_store()))
}
}

View file

@ -17,18 +17,18 @@
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//
use libimagstore::storeid::StoreIdIteratorWithStore;
use libimagstore::iter::Entries;
use libimagstore::store::Store;
use libimagstore::store::Result as StoreResult;
use libimagstore::store::FileLockEntry;
use constants::*;
pub struct TimeTrackingsGetIterator<'a>(StoreIdIteratorWithStore<'a>, &'a Store);
pub struct TimeTrackingsGetIterator<'a>(Entries<'a>, &'a Store);
impl<'a> TimeTrackingsGetIterator<'a> {
pub fn new(sit: StoreIdIteratorWithStore<'a>, store: &'a Store) -> Self {
TimeTrackingsGetIterator(sit, store)
pub fn new(entries: Entries<'a>, store: &'a Store) -> Self {
TimeTrackingsGetIterator(entries, store)
}
}

View file

@ -95,7 +95,7 @@ impl<'a, 'b> Wiki<'a, 'b> {
pub fn all_ids(&self) -> Result<WikiIdIterator> {
let filter = IdIsInWikiFilter(self.1);
Ok(WikiIdIterator(self.0.entries()?, filter))
Ok(WikiIdIterator(self.0.entries()?.without_store().with_store(self.0), filter))
}
pub fn delete_entry<EN: AsRef<str>>(&self, entry_name: EN) -> Result<()> {