diff --git a/lib/entry/libimagentrymarkdown/Cargo.toml b/lib/entry/libimagentrymarkdown/Cargo.toml index 3b962eac..92d923d8 100644 --- a/lib/entry/libimagentrymarkdown/Cargo.toml +++ b/lib/entry/libimagentrymarkdown/Cargo.toml @@ -18,7 +18,11 @@ log = "0.3" hoedown = "5.0.0" url = "1.2" error-chain = "0.10" +env_logger = "0.3" -libimagstore = { version = "0.4.0", path = "../../../lib/core/libimagstore" } -libimagerror = { version = "0.4.0", path = "../../../lib/core/libimagerror" } +libimagstore = { version = "0.4.0", path = "../../../lib/core/libimagstore" } +libimagerror = { version = "0.4.0", path = "../../../lib/core/libimagerror" } +libimagentrylink = { version = "0.4.0", path = "../../../lib/entry/libimagentrylink/" } +libimagentryref = { version = "0.4.0", path = "../../../lib/entry/libimagentryref/" } +libimagutil = { version = "0.4.0", path = "../../../lib/etc/libimagutil/" } diff --git a/lib/entry/libimagentrymarkdown/src/error.rs b/lib/entry/libimagentrymarkdown/src/error.rs index 4df464ec..d3efc6ec 100644 --- a/lib/entry/libimagentrymarkdown/src/error.rs +++ b/lib/entry/libimagentrymarkdown/src/error.rs @@ -17,11 +17,25 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // +use url::Url; + +use libimagstore::storeid::StoreId; + error_chain! { types { MarkdownError, MarkdownErrorKind, ResultExt, Result; } + links { + StoreError(::libimagstore::error::StoreError, ::libimagstore::error::StoreErrorKind); + LinkError(::libimagentrylink::error::LinkError, ::libimagentrylink::error::LinkErrorKind); + RefError(::libimagentryref::error::RefError, ::libimagentryref::error::RefErrorKind); + } + + foreign_links { + UrlParserError(::url::ParseError); + } + errors { MarkdownRenderError { description("Markdown render error") @@ -33,6 +47,20 @@ error_chain! { display("Link parsing error") } + StoreGetError(id: StoreId) { + description("Failed to get entry from store") + display("Failed to get entry '{}' from store", id) + } + + UndecidableLinkType(s: String) { + description("Failed to qualify link type") + display("The Type of the link '{}' cannot be recognized", s) + } + + UrlProcessingError(u: Url) { + description("Failed to properly processing URL") + display("The URL '{:?}' could not be processed properly", u) + } } } diff --git a/lib/entry/libimagentrymarkdown/src/lib.rs b/lib/entry/libimagentrymarkdown/src/lib.rs index e17751dc..9e4df569 100644 --- a/lib/entry/libimagentrymarkdown/src/lib.rs +++ b/lib/entry/libimagentrymarkdown/src/lib.rs @@ -39,9 +39,18 @@ extern crate hoedown; extern crate url; extern crate libimagstore; extern crate libimagerror; +extern crate libimagentrylink; +extern crate libimagentryref; +extern crate libimagutil; #[macro_use] extern crate error_chain; +#[macro_use] extern crate log; + +#[cfg(test)] +extern crate env_logger; + pub mod error; pub mod html; pub mod link; +pub mod processor; diff --git a/lib/entry/libimagentrymarkdown/src/link.rs b/lib/entry/libimagentrymarkdown/src/link.rs index 7de3edc7..bd26bea1 100644 --- a/lib/entry/libimagentrymarkdown/src/link.rs +++ b/lib/entry/libimagentrymarkdown/src/link.rs @@ -48,6 +48,7 @@ pub struct UrlLink { pub link: Url, } +#[derive(Debug)] struct LinkExtractor { links: Vec, } @@ -76,6 +77,10 @@ impl Render for LinkExtractor { let link = link.and_then(|l| l.to_str().ok()).map(String::from); let content = content.and_then(|l| l.to_str().ok()).map(String::from); + trace!("Processing..."); + trace!("link = {:?}", link); + trace!("content = {:?}", content); + match (link, content) { (Some(link), Some(content)) => { self.links.push(Link { link: link, title: content }); @@ -94,6 +99,7 @@ impl Render for LinkExtractor { pub fn extract_links(buf: &str) -> Vec { let mut le = LinkExtractor::new(); le.render(&Markdown::new(buf)); + trace!("Extracted: {:?}", le); le.links() } diff --git a/lib/entry/libimagentrymarkdown/src/processor.rs b/lib/entry/libimagentrymarkdown/src/processor.rs new file mode 100644 index 00000000..4ac570c8 --- /dev/null +++ b/lib/entry/libimagentrymarkdown/src/processor.rs @@ -0,0 +1,551 @@ +// +// imag - the personal information management suite for the commandline +// Copyright (C) 2015, 2016 Matthias Beyer and contributors +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; version +// 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +// + +use error::MarkdownError as ME; +use error::MarkdownErrorKind as MEK; +use error::*; +use link::extract_links; + +use libimagentrylink::external::ExternalLinker; +use libimagentrylink::internal::InternalLinker; +use libimagentryref::refstore::RefStore; +use libimagentryref::flags::RefFlags; +use libimagstore::store::Entry; +use libimagstore::store::Store; +use libimagstore::storeid::StoreId; +use libimagutil::iter::FoldResult; + +use std::path::PathBuf; + +use url::Url; + +/// A link Processor which collects the links from a Markdown and passes them on to +/// `libimagentrylink` functionality +/// +/// The processor can be configured to +/// +/// * Process internal links (from store entry to store entry) +/// * Process internal links with automatically creating targets +/// If an internal link is encountered, the corrosponding target must be present in the store. +/// If it is not, it will either be created or the processing fails +/// * Process external links (from store entry to URL) +/// * Process refs (from store entry to files on the filesystem and outside of the store) +/// (default: false) +/// +/// # Note +/// +/// There's no LinkProcessor::new() function, please use `LinkProcessor::default()`. +/// +pub struct LinkProcessor { + process_internal_links: bool, + create_internal_targets: bool, + process_external_links: bool, + process_refs: bool +} + +impl LinkProcessor { + + /// Switch internal link processing on/off + /// + /// Internal links are links which are simply `dirctory/file`, but not `/directory/file`, as + /// beginning an id with `/` is discouraged in imag. + pub fn process_internal_links(mut self, b: bool) -> Self { + self.process_internal_links = b; + self + } + + /// Switch internal link target creation on/off + /// + /// If a link points to a non-existing imag entry, a `false` here will cause the processor to + /// return an error from `process()`. A `true` setting will create the entry and then fetch it + /// to link it to the processed entry. + pub fn create_internal_targets(mut self, b: bool) -> Self { + self.create_internal_targets = b; + self + } + + /// Switch external link processing on/off + /// + /// An external link must start with `https://` or `http://`. + pub fn process_external_links(mut self, b: bool) -> Self { + self.process_external_links = b; + self + } + + /// Switch ref processing on/off + /// + /// A Ref is to be expected beeing a link with `file::///` at the beginning. + pub fn process_refs(mut self, b: bool) -> Self { + self.process_refs = b; + self + } + + /// Process an Entry for its links + /// + /// # Warning + /// + /// When `LinkProcessor::create_internal_targets()` was called to set the setting to true, this + /// function returns all errors returned by the Store. + /// + pub fn process<'a>(&self, entry: &mut Entry, store: &'a Store) -> Result<()> { + let text = entry.to_str(); + trace!("Processing: {:?}", entry.get_location()); + extract_links(&text) + .into_iter() + .fold_result::<_, MarkdownError, _>(|link| { + trace!("Processing {:?}", link); + match LinkQualification::qualify(&link.link) { + LinkQualification::InternalLink => { + if !self.process_internal_links { + return Ok(()); + } + + let spath = Some(store.path().clone()); + let id = StoreId::new(spath, PathBuf::from(&link.link))?; + let mut target = if self.create_internal_targets { + try!(store.retrieve(id)) + } else { + store.get(id.clone())? + .ok_or(ME::from_kind(MEK::StoreGetError(id)))? + }; + + entry.add_internal_link(&mut target).map_err(From::from) + }, + LinkQualification::ExternalLink(url) => { + if !self.process_external_links { + return Ok(()); + } + + entry.add_external_link(store, url).map_err(From::from) + }, + LinkQualification::RefLink(url) => { + if !self.process_refs { + return Ok(()); + } + + let flags = RefFlags::default() + .with_content_hashing(false) + .with_permission_tracking(false); + trace!("URL = {:?}", url); + trace!("URL.path() = {:?}", url.path()); + trace!("URL.host_str() = {:?}", url.host_str()); + let path = url.host_str().unwrap_or_else(|| url.path()); + let path = PathBuf::from(path); + let mut target = try!(RefStore::create(store, path, flags)); + + entry.add_internal_link(&mut target).map_err(From::from) + }, + LinkQualification::Undecidable(e) => { + // error + Err(e).chain_err(|| MEK::UndecidableLinkType(link.link.clone())) + }, + } + }) + } + +} + +/// Enum to tell what kind of link a string of text is +enum LinkQualification { + InternalLink, + ExternalLink(Url), + RefLink(Url), + Undecidable(ME), +} + +impl LinkQualification { + fn qualify(text: &str) -> LinkQualification { + match Url::parse(text) { + Ok(url) => { + if url.scheme() == "file" { + return LinkQualification::RefLink(url) + } + + // else we assume the following, as other stuff gets thrown out by + // url::Url::parse() as Err(_) + // + // if url.scheme() == "https" || url.scheme() == "http" { + return LinkQualification::ExternalLink(url); + // } + }, + + Err(e) => { + match e { + ::url::ParseError::RelativeUrlWithoutBase => { + LinkQualification::InternalLink + }, + + _ => LinkQualification::Undecidable(ME::from(e)), + } + } + } + } +} + +impl Default for LinkProcessor { + fn default() -> Self { + LinkProcessor { + process_internal_links: true, + create_internal_targets: false, + process_external_links: true, + process_refs: false + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::path::PathBuf; + + use libimagstore::store::Store; + use libimagentrylink::internal::InternalLinker; + + fn setup_logging() { + use env_logger; + let _ = env_logger::init().unwrap_or(()); + } + + pub fn get_store() -> Store { + use libimagstore::file_abstraction::InMemoryFileAbstraction; + let fs = InMemoryFileAbstraction::new(); + Store::new_with_backend(PathBuf::from("/"), None, Box::new(fs)).unwrap() + } + + #[test] + fn test_process_no_links() { + setup_logging(); + let store = get_store(); + + let mut base = store.create(PathBuf::from("test-1")).unwrap(); + *base.get_content_mut() = format!("This is an example entry with no links"); + + let update = store.update(&mut base); + assert!(update.is_ok()); + + let processor = LinkProcessor::default(); + + let result = processor.process(&mut base, &store); + assert!(result.is_ok()); + } + + #[test] + fn test_process_one_existing_file_linked() { + setup_logging(); + let store = get_store(); + + let mut base = store.create(PathBuf::from("test-2.1")).unwrap(); + *base.get_content_mut() = format!("This is an example entry with one [link](test-2.2)"); + + let update = store.update(&mut base); + assert!(update.is_ok()); + + // immediately drop as we don't need this entry right now + let _ = store.create(PathBuf::from("test-2.2")).unwrap(); + + let processor = LinkProcessor::default() + .process_internal_links(true) + .create_internal_targets(false) + .process_external_links(false) + .process_refs(false); + + let result = processor.process(&mut base, &store); + assert!(result.is_ok(), "Should be Ok(()): {:?}", result); + + { + let base_links = base.get_internal_links(); + assert!(base_links.is_ok()); + let base_links : Vec<_> = base_links.unwrap().collect(); + + assert_eq!(1, base_links.len()); + assert_eq!("test-2.2", base_links[0].to_str().unwrap()); + } + + { + let link = store.get(PathBuf::from("test-2.2")).unwrap().unwrap(); + let link_links = link.get_internal_links(); + assert!(link_links.is_ok()); + let link_links : Vec<_> = link_links.unwrap().collect(); + + assert_eq!(1, link_links.len()); + assert_eq!("test-2.1", link_links[0].to_str().unwrap()); + } + } + + #[test] + fn test_process_one_existing_file_linked_faulty() { + setup_logging(); + let store = get_store(); + + let mut base = store.create(PathBuf::from("test-2.1")).unwrap(); + *base.get_content_mut() = format!("This is an example entry with one [link](/test-2.2)"); + + let update = store.update(&mut base); + assert!(update.is_ok()); + + let processor = LinkProcessor::default() + .process_internal_links(true) + .create_internal_targets(false) + .process_external_links(false) + .process_refs(false); + + let result = processor.process(&mut base, &store); + assert!(result.is_err(), "Should be Err(_), but is Ok(())"); + } + + #[test] + fn test_process_one_nonexisting_file_linked() { + setup_logging(); + let store = get_store(); + + let mut base = store.create(PathBuf::from("test-2.1")).unwrap(); + *base.get_content_mut() = format!("This is an example entry with one [link](test-2.2)"); + + let update = store.update(&mut base); + assert!(update.is_ok()); + + let processor = LinkProcessor::default() + .process_internal_links(true) + .create_internal_targets(true) + .process_external_links(false) + .process_refs(false); + + let result = processor.process(&mut base, &store); + assert!(result.is_ok(), "Should be Ok(()): {:?}", result); + + { + let base_links = base.get_internal_links(); + assert!(base_links.is_ok()); + let base_links : Vec<_> = base_links.unwrap().collect(); + + assert_eq!(1, base_links.len()); + assert_eq!("test-2.2", base_links[0].to_str().unwrap()); + } + + { + let link = store.get(PathBuf::from("test-2.2")).unwrap().unwrap(); + let link_links = link.get_internal_links(); + assert!(link_links.is_ok()); + let link_links : Vec<_> = link_links.unwrap().collect(); + + assert_eq!(1, link_links.len()); + assert_eq!("test-2.1", link_links[0].to_str().unwrap()); + } + } + + #[test] + fn test_process_one_external_link() { + setup_logging(); + let store = get_store(); + + let mut base = store.create(PathBuf::from("test-5.1")).unwrap(); + *base.get_content_mut() = format!("An [example](http://example.com) is here."); + + let update = store.update(&mut base); + assert!(update.is_ok()); + + let processor = LinkProcessor::default() + .process_internal_links(true) + .create_internal_targets(true) + .process_external_links(true) + .process_refs(false); + + let result = processor.process(&mut base, &store); + assert!(result.is_ok(), "Should be Ok(()): {:?}", result); + + // The hash of "http://example.com" processed in the `libimagentrylink` way. + let expected_link = "links/external/9c17e047f58f9220a7008d4f18152fee4d111d14"; + { + let base_links = base.get_internal_links(); + assert!(base_links.is_ok()); + let base_links : Vec<_> = base_links.unwrap().collect(); + + assert_eq!(1, base_links.len()); + assert_eq!(expected_link, base_links[0].to_str().unwrap()); + } + + let entries = store.entries(); + assert!(entries.is_ok()); + let entries : Vec<_> = entries.unwrap().collect(); + + assert_eq!(2, entries.len(), "Expected 2 links, got: {:?}", entries); + + { + let link = store.get(PathBuf::from(expected_link)).unwrap().unwrap(); + let link_links = link.get_internal_links(); + assert!(link_links.is_ok()); + let link_links : Vec<_> = link_links.unwrap().collect(); + + assert_eq!(1, link_links.len()); + assert_eq!("test-5.1", link_links[0].to_str().unwrap()); + } + } + + #[test] + fn test_process_one_ref() { + setup_logging(); + let store = get_store(); + + let mut base = store.create(PathBuf::from("test-5.1")).unwrap(); + + // As the ref target must exist, we're using /etc/hosts here + *base.get_content_mut() = format!("An [example ref](file:///etc/hosts) is here."); + + let update = store.update(&mut base); + assert!(update.is_ok()); + + let processor = LinkProcessor::default() + .process_internal_links(false) + .create_internal_targets(false) + .process_external_links(false) + .process_refs(true); + + let result = processor.process(&mut base, &store); + assert!(result.is_ok(), "Should be Ok(()): {:?}", result); + + let entries = store.entries(); + assert!(entries.is_ok()); + let entries : Vec<_> = entries.unwrap().collect(); + + assert_eq!(2, entries.len(), "Expected 2 links, got: {:?}", entries); + println!("{:?}", entries); + } + + #[test] + fn test_process_two_refs() { + setup_logging(); + let store = get_store(); + + let mut base = store.create(PathBuf::from("test-5.1")).unwrap(); + + // As the ref target must exist, we're using /etc/hosts here + *base.get_content_mut() = format!( + r#"An [example ref](file:///etc/hosts) + is [here](file:///etc/group)."# + ); + + let update = store.update(&mut base); + assert!(update.is_ok()); + + let processor = LinkProcessor::default() + .process_internal_links(false) + .create_internal_targets(false) + .process_external_links(false) + .process_refs(true); + + let result = processor.process(&mut base, &store); + assert!(result.is_ok(), "Should be Ok(()): {:?}", result); + + let entries = store.entries(); + assert!(entries.is_ok()); + let entries : Vec<_> = entries.unwrap().collect(); + + assert_eq!(3, entries.len(), "Expected 3 links, got: {:?}", entries); + println!("{:?}", entries); + } + + #[test] + fn test_process_refs_with_ref_processing_switched_off() { + setup_logging(); + let store = get_store(); + + let mut base = store.create(PathBuf::from("test-5.1")).unwrap(); + + // As the ref target must exist, we're using /etc/hosts here + *base.get_content_mut() = format!( + r#"An [example ref](file:///etc/hosts) + is [here](file:///etc/group)."# + ); + + let update = store.update(&mut base); + assert!(update.is_ok()); + + let processor = LinkProcessor::default() + .process_internal_links(false) + .create_internal_targets(false) + .process_external_links(false) + .process_refs(false); + + let result = processor.process(&mut base, &store); + assert!(result.is_ok(), "Should be Ok(()): {:?}", result); + + let entries = store.entries(); + assert!(entries.is_ok()); + let entries : Vec<_> = entries.unwrap().collect(); + + assert_eq!(1, entries.len(), "Expected 1 entries, got: {:?}", entries); + println!("{:?}", entries); + } + + #[test] + fn test_process_external_link_with_external_link_processing_switched_off() { + setup_logging(); + let store = get_store(); + + let mut base = store.create(PathBuf::from("test-5.1")).unwrap(); + *base.get_content_mut() = format!("An [example](http://example.com) is here."); + + let update = store.update(&mut base); + assert!(update.is_ok()); + + let processor = LinkProcessor::default() + .process_internal_links(true) + .create_internal_targets(true) + .process_external_links(false) + .process_refs(false); + + let result = processor.process(&mut base, &store); + assert!(result.is_ok(), "Should be Ok(()): {:?}", result); + + let entries = store.entries(); + assert!(entries.is_ok()); + let entries : Vec<_> = entries.unwrap().collect(); + + assert_eq!(1, entries.len(), "Expected 1 entries, got: {:?}", entries); + } + + #[test] + fn test_process_one_existing_file_linked_with_internal_processing_switched_off() { + setup_logging(); + let store = get_store(); + + let mut base = store.create(PathBuf::from("test-2.1")).unwrap(); + *base.get_content_mut() = format!("This is an example entry with one [link](test-2.2)"); + + let update = store.update(&mut base); + assert!(update.is_ok()); + + // immediately drop as we don't need this entry right now + let _ = store.create(PathBuf::from("test-2.2")).unwrap(); + + let processor = LinkProcessor::default() + .process_internal_links(false) + .create_internal_targets(false) + .process_external_links(false) + .process_refs(false); + + let result = processor.process(&mut base, &store); + assert!(result.is_ok(), "Should be Ok(()): {:?}", result); + + assert_eq!(2, store.entries().unwrap().collect::>().len()); + } + +} +