From 9c68514dfbccf4dd9ea6c590d5109e8c6a3bdacc Mon Sep 17 00:00:00 2001 From: Matthias Beyer Date: Tue, 5 Sep 2017 18:21:44 +0200 Subject: [PATCH] Add processor for collecting links from content and putting them into the header via the functionality offered by `libimagentrylink`. --- lib/entry/libimagentrymarkdown/Cargo.toml | 6 +- lib/entry/libimagentrymarkdown/src/error.rs | 28 +++ lib/entry/libimagentrymarkdown/src/lib.rs | 10 + lib/entry/libimagentrymarkdown/src/link.rs | 6 + .../libimagentrymarkdown/src/processor.rs | 211 ++++++++++++++++++ 5 files changed, 259 insertions(+), 2 deletions(-) create mode 100644 lib/entry/libimagentrymarkdown/src/processor.rs diff --git a/lib/entry/libimagentrymarkdown/Cargo.toml b/lib/entry/libimagentrymarkdown/Cargo.toml index 3b962eac..ad27d0d2 100644 --- a/lib/entry/libimagentrymarkdown/Cargo.toml +++ b/lib/entry/libimagentrymarkdown/Cargo.toml @@ -19,6 +19,8 @@ hoedown = "5.0.0" url = "1.2" error-chain = "0.10" -libimagstore = { version = "0.4.0", path = "../../../lib/core/libimagstore" } -libimagerror = { version = "0.4.0", path = "../../../lib/core/libimagerror" } +libimagstore = { version = "0.4.0", path = "../../../lib/core/libimagstore" } +libimagerror = { version = "0.4.0", path = "../../../lib/core/libimagerror" } +libimagentrylink = { version = "0.4.0", path = "../../../lib/entry/libimagentrylink/" } +libimagutil = { version = "0.4.0", path = "../../../lib/etc/libimagutil/" } diff --git a/lib/entry/libimagentrymarkdown/src/error.rs b/lib/entry/libimagentrymarkdown/src/error.rs index 4df464ec..d3efc6ec 100644 --- a/lib/entry/libimagentrymarkdown/src/error.rs +++ b/lib/entry/libimagentrymarkdown/src/error.rs @@ -17,11 +17,25 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // +use url::Url; + +use libimagstore::storeid::StoreId; + error_chain! { types { MarkdownError, MarkdownErrorKind, ResultExt, Result; } + links { + StoreError(::libimagstore::error::StoreError, ::libimagstore::error::StoreErrorKind); + LinkError(::libimagentrylink::error::LinkError, ::libimagentrylink::error::LinkErrorKind); + RefError(::libimagentryref::error::RefError, ::libimagentryref::error::RefErrorKind); + } + + foreign_links { + UrlParserError(::url::ParseError); + } + errors { MarkdownRenderError { description("Markdown render error") @@ -33,6 +47,20 @@ error_chain! { display("Link parsing error") } + StoreGetError(id: StoreId) { + description("Failed to get entry from store") + display("Failed to get entry '{}' from store", id) + } + + UndecidableLinkType(s: String) { + description("Failed to qualify link type") + display("The Type of the link '{}' cannot be recognized", s) + } + + UrlProcessingError(u: Url) { + description("Failed to properly processing URL") + display("The URL '{:?}' could not be processed properly", u) + } } } diff --git a/lib/entry/libimagentrymarkdown/src/lib.rs b/lib/entry/libimagentrymarkdown/src/lib.rs index e17751dc..5626154d 100644 --- a/lib/entry/libimagentrymarkdown/src/lib.rs +++ b/lib/entry/libimagentrymarkdown/src/lib.rs @@ -39,9 +39,19 @@ extern crate hoedown; extern crate url; extern crate libimagstore; extern crate libimagerror; +extern crate libimagentrylink; +extern crate libimagutil; #[macro_use] extern crate error_chain; +#[cfg(test)] +extern crate env_logger; + +#[allow(unused)] +#[macro_use] +extern crate log; + pub mod error; pub mod html; pub mod link; +pub mod processor; diff --git a/lib/entry/libimagentrymarkdown/src/link.rs b/lib/entry/libimagentrymarkdown/src/link.rs index 7de3edc7..bd26bea1 100644 --- a/lib/entry/libimagentrymarkdown/src/link.rs +++ b/lib/entry/libimagentrymarkdown/src/link.rs @@ -48,6 +48,7 @@ pub struct UrlLink { pub link: Url, } +#[derive(Debug)] struct LinkExtractor { links: Vec, } @@ -76,6 +77,10 @@ impl Render for LinkExtractor { let link = link.and_then(|l| l.to_str().ok()).map(String::from); let content = content.and_then(|l| l.to_str().ok()).map(String::from); + trace!("Processing..."); + trace!("link = {:?}", link); + trace!("content = {:?}", content); + match (link, content) { (Some(link), Some(content)) => { self.links.push(Link { link: link, title: content }); @@ -94,6 +99,7 @@ impl Render for LinkExtractor { pub fn extract_links(buf: &str) -> Vec { let mut le = LinkExtractor::new(); le.render(&Markdown::new(buf)); + trace!("Extracted: {:?}", le); le.links() } diff --git a/lib/entry/libimagentrymarkdown/src/processor.rs b/lib/entry/libimagentrymarkdown/src/processor.rs new file mode 100644 index 00000000..4fbf3633 --- /dev/null +++ b/lib/entry/libimagentrymarkdown/src/processor.rs @@ -0,0 +1,211 @@ +// +// imag - the personal information management suite for the commandline +// Copyright (C) 2015, 2016 Matthias Beyer and contributors +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; version +// 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +// + +use error::MarkdownError as ME; +use error::MarkdownErrorKind as MEK; +use error::*; +use link::extract_links; + +use libimagentrylink::external::ExternalLinker; +use libimagentrylink::internal::InternalLinker; +use libimagentryref::refstore::RefStore; +use libimagentryref::flags::RefFlags; +use libimagstore::store::Entry; +use libimagstore::store::Store; +use libimagstore::storeid::StoreId; +use libimagutil::iter::FoldResult; + +use std::path::PathBuf; + +use url::Url; + +/// A link Processor which collects the links from a Markdown and passes them on to +/// `libimagentrylink` functionality +/// +/// The processor can be configured to +/// +/// * Process internal links (from store entry to store entry) +/// * Process internal links with automatically creating targets +/// If an internal link is encountered, the corrosponding target must be present in the store. +/// If it is not, it will either be created or the processing fails +/// * Process external links (from store entry to URL) +/// * Process refs (from store entry to files on the filesystem and outside of the store) +/// (default: false) +/// +/// # Note +/// +/// There's no LinkProcessor::new() function, please use `LinkProcessor::default()`. +/// +pub struct LinkProcessor { + process_internal_links: bool, + create_internal_targets: bool, + process_external_links: bool, + process_refs: bool +} + +impl LinkProcessor { + + /// Switch internal link processing on/off + /// + /// Internal links are links which are simply `dirctory/file`, but not `/directory/file`, as + /// beginning an id with `/` is discouraged in imag. + pub fn process_internal_links(mut self, b: bool) -> Self { + self.process_internal_links = b; + self + } + + /// Switch internal link target creation on/off + /// + /// If a link points to a non-existing imag entry, a `false` here will cause the processor to + /// return an error from `process()`. A `true` setting will create the entry and then fetch it + /// to link it to the processed entry. + pub fn create_internal_targets(mut self, b: bool) -> Self { + self.create_internal_targets = b; + self + } + + /// Switch external link processing on/off + /// + /// An external link must start with `https://` or `http://`. + pub fn process_external_links(mut self, b: bool) -> Self { + self.process_external_links = b; + self + } + + /// Switch ref processing on/off + /// + /// A Ref is to be expected beeing a link with `file::///` at the beginning. + pub fn process_refs(mut self, b: bool) -> Self { + self.process_refs = b; + self + } + + /// Process an Entry for its links + /// + /// # Warning + /// + /// When `LinkProcessor::create_internal_targets()` was called to set the setting to true, this + /// function returns all errors returned by the Store. + /// + pub fn process<'a>(&self, entry: &mut Entry, store: &'a Store) -> Result<()> { + let text = entry.to_str(); + trace!("Processing: {:?}", entry.get_location()); + extract_links(&text) + .into_iter() + .fold_result::<_, MarkdownError, _>(|link| { + trace!("Processing {:?}", link); + match LinkQualification::qualify(&link.link) { + LinkQualification::InternalLink => { + if !self.process_internal_links { + return Ok(()); + } + + let spath = Some(store.path().clone()); + let id = StoreId::new(spath, PathBuf::from(&link.link))?; + let mut target = if self.create_internal_targets { + try!(store.retrieve(id)) + } else { + store.get(id.clone())? + .ok_or(ME::from_kind(MEK::StoreGetError(id)))? + }; + + entry.add_internal_link(&mut target).map_err(From::from) + }, + LinkQualification::ExternalLink(url) => { + if !self.process_external_links { + return Ok(()); + } + + entry.add_external_link(store, url).map_err(From::from) + }, + LinkQualification::RefLink(url) => { + if !self.process_refs { + return Ok(()); + } + + let flags = RefFlags::default() + .with_content_hashing(false) + .with_permission_tracking(false); + trace!("URL = {:?}", url); + trace!("URL.path() = {:?}", url.path()); + trace!("URL.host_str() = {:?}", url.host_str()); + let path = url.host_str().unwrap_or_else(|| url.path()); + let path = PathBuf::from(path); + let mut target = try!(RefStore::create(store, path, flags)); + + entry.add_internal_link(&mut target).map_err(From::from) + }, + LinkQualification::Undecidable(e) => { + // error + Err(e).chain_err(|| MEK::UndecidableLinkType(link.link.clone())) + }, + } + }) + } + +} + +/// Enum to tell what kind of link a string of text is +enum LinkQualification { + InternalLink, + ExternalLink(Url), + RefLink(Url), + Undecidable(ME), +} + +impl LinkQualification { + fn qualify(text: &str) -> LinkQualification { + match Url::parse(text) { + Ok(url) => { + if url.scheme() == "file" { + return LinkQualification::RefLink(url) + } + + // else we assume the following, as other stuff gets thrown out by + // url::Url::parse() as Err(_) + // + // if url.scheme() == "https" || url.scheme() == "http" { + return LinkQualification::ExternalLink(url); + // } + }, + + Err(e) => { + match e { + ::url::ParseError::RelativeUrlWithoutBase => { + LinkQualification::InternalLink + }, + + _ => LinkQualification::Undecidable(ME::from(e)), + } + } + } + } +} + +impl Default for LinkProcessor { + fn default() -> Self { + LinkProcessor { + process_internal_links: true, + create_internal_targets: false, + process_external_links: true, + process_refs: false + } + } +} +