From 194c0fe2862736ac524afe63bbee3ce3f182c24f Mon Sep 17 00:00:00 2001 From: Laszlo Nagy Date: Sat, 26 Aug 2023 20:56:11 +1000 Subject: [PATCH] citnames: entry filter predicate implementation --- source/citnames_rs/src/filter.rs | 143 ++++++++++++++++++++++++------- source/citnames_rs/src/main.rs | 47 +++++----- 2 files changed, 140 insertions(+), 50 deletions(-) diff --git a/source/citnames_rs/src/filter.rs b/source/citnames_rs/src/filter.rs index 32c6ebcb..fd3205cf 100644 --- a/source/citnames_rs/src/filter.rs +++ b/source/citnames_rs/src/filter.rs @@ -20,11 +20,124 @@ use std::collections::hash_map::DefaultHasher; use std::collections::HashSet; use std::hash::{Hash, Hasher}; +use std::path::PathBuf; use json_compilation_db::Entry; use crate::configuration::{Content, DuplicateFilterFields}; +pub(crate) type EntryPredicate = Box bool>; + +impl Into for Content { + fn into(self) -> EntryPredicate { + let source_check = if self.include_only_existing_source { + EntryPredicateBuilder::source_check() + } else { + EntryPredicateBuilder::empty() + }; + let paths_to_include = EntryPredicateBuilder::contains(self.paths_to_include); + let paths_to_exclude = EntryPredicateBuilder::contains(self.paths_to_exclude); + let duplicates = self.duplicate_filter_fields.into(); + + source_check.and( + paths_to_include.and( + EntryPredicateBuilder::not(paths_to_exclude) + ) + ).and(duplicates) + .build() + } +} + + +struct EntryPredicateBuilder { + filter: Option, +} + +impl EntryPredicateBuilder { + fn empty() -> Self { + EntryPredicateBuilder { filter: None } + } + + fn build(self) -> EntryPredicate { + match self.filter { + Some(value) => value, + None => Box::new(|_: &Entry| true), + } + } + + fn not(self) -> Self { + let filter = match self.filter { + Some(mut original) => { + let new: EntryPredicate = Box::new(move |entry| { + let result = original(entry); + !result + }); + Some(new) + } + None => + None, + }; + EntryPredicateBuilder { filter } + } + + fn and(self, rhs: Self) -> Self { + let filter = match (self.filter, rhs.filter) { + (None, None) => + None, + (Some(mut lhs), Some(mut rhs)) => { + let new: EntryPredicate = Box::new(move |entry| { + let result = lhs(entry); + if result { + rhs(entry) + } else { + result + } + }); + Some(new) + } + (None, value) => + value, + (value, None) => + value, + }; + EntryPredicateBuilder { filter } + } + + fn source_check() -> EntryPredicateBuilder { + let filter: EntryPredicate = Box::new(|entry| { entry.file.is_file() }); + EntryPredicateBuilder { filter: Some(filter) } + } + + fn contains(paths: Vec) -> EntryPredicateBuilder { + if paths.is_empty() { + EntryPredicateBuilder::empty() + } else { + let filter: EntryPredicate = Box::new(move |entry| { + paths.iter().any(|path| { entry.file.starts_with(path) }) + }); + EntryPredicateBuilder { filter: Some(filter) } + } + } +} + +impl Into for DuplicateFilterFields { + fn into(self) -> EntryPredicateBuilder { + let mut have_seen = HashSet::new(); + let hash_calculation = DuplicateFilterFields::hash(&self); + + let filter: EntryPredicate = Box::new(move |entry| { + let hash = hash_calculation(&entry); + if !have_seen.contains(&hash) { + have_seen.insert(hash); + true + } else { + false + } + }); + EntryPredicateBuilder { filter: Some(filter) } + } +} + impl DuplicateFilterFields { fn hash_source(entry: &Entry) -> u64 { let mut s = DefaultHasher::default(); @@ -58,33 +171,3 @@ impl DuplicateFilterFields { } } } - -type EntryFilterPredicate = Box bool>; - -impl Into for DuplicateFilterFields { - fn into(self) -> EntryFilterPredicate { - let mut have_seen = HashSet::new(); - let hash_calculation = DuplicateFilterFields::hash(&self); - - Box::new(move |entry| { - let hash = hash_calculation(&entry); - if !have_seen.contains(&hash) { - have_seen.insert(hash); - true - } else { - false - } - }) - } -} - -impl Into for Content { - fn into(self) -> EntryFilterPredicate { - let duplicates: EntryFilterPredicate = self.duplicate_filter_fields.into(); - - Box::new(move |entry| { - let source_check: EntryFilterPredicate = todo!(); - todo!() - }) - } -} diff --git a/source/citnames_rs/src/main.rs b/source/citnames_rs/src/main.rs index e26eb6d0..a33599ef 100644 --- a/source/citnames_rs/src/main.rs +++ b/source/citnames_rs/src/main.rs @@ -23,7 +23,7 @@ use std::fs::OpenOptions; use std::io::stdin; use std::thread; -use anyhow::{Context, Result}; +use anyhow::{anyhow, Context, Result}; use clap::{arg, ArgAction, ArgMatches, command}; use crossbeam_channel::{bounded, Sender, unbounded}; use json_compilation_db::{Entry, read, write}; @@ -33,6 +33,7 @@ use simple_logger::SimpleLogger; use crate::configuration::Configuration; use crate::configuration::io::from_reader; use crate::execution::Execution; +use crate::filter::EntryPredicate; mod configuration; mod events; @@ -60,7 +61,8 @@ fn main() -> Result<()> { .get_matches(); // configure logging - configure_logging(&matches); + configure_logging(&matches) + .context("Configure logging from command line arguments.")?; // check semantic of the arguments let input = matches.get_one::("input") @@ -76,9 +78,11 @@ fn main() -> Result<()> { if input == "-" && config.unwrap_or("+") == "-" { error!("Both input and config reading the standard input."); + return Err(anyhow!("Both input and config reading the standard input.")); } if *append && output == "-" { error!("Append can't applied to the standard output."); + return Err(anyhow!("Append can't applied to the standard output.")); } // read configuration @@ -99,21 +103,6 @@ fn main() -> Result<()> { run(config, input.into(), output.into(), *append) } -fn configure_logging(matches: &ArgMatches) { - let level = match matches.get_count("verbose") { - 0 => LevelFilter::Error, - 1 => LevelFilter::Warn, - 2 => LevelFilter::Info, - 3 => LevelFilter::Debug, - _ => LevelFilter::Trace, - }; - // fixme: enable timestamp for debug only - SimpleLogger::new() - .with_level(level) - .init() - .unwrap(); -} - fn run(config: Configuration, input: String, output: String, append: bool) -> Result<()> { let (snd, rcv) = bounded::(100); @@ -129,9 +118,9 @@ fn run(config: Configuration, input: String, output: String, append: bool) -> Re // consume the entry streams here let temp = format!("{}.tmp", &output); { + let filter: EntryPredicate = config.output.content.into(); let file = OpenOptions::new().write(true).open(&temp)?; - // todo: filter duplicates - write(file, rcv.iter())?; + write(file, rcv.iter().filter(filter))?; } std::fs::remove_file(&output)?; std::fs::rename(&output, &temp)?; @@ -166,4 +155,22 @@ fn new_entries_from_events(sink: &Sender, input: &str) -> Result { // log::debug!("Found {new_entries} entries"); Ok(0) -} \ No newline at end of file +} + +fn configure_logging(matches: &ArgMatches) -> Result<()> { + let level = match matches.get_count("verbose") { + 0 => LevelFilter::Error, + 1 => LevelFilter::Warn, + 2 => LevelFilter::Info, + 3 => LevelFilter::Debug, + _ => LevelFilter::Trace, + }; + let mut logger = SimpleLogger::new() + .with_level(level); + if level <= LevelFilter::Debug { + logger = logger.with_local_timestamps() + } + logger.init()?; + + Ok(()) +}