From 557122f81956114f2d0f2584931b5f6e84e9553d Mon Sep 17 00:00:00 2001 From: Joseph LaFreniere Date: Sat, 28 Dec 2024 19:47:10 -0500 Subject: [PATCH] TODO --- src/adapters.rs | 209 +++++++++++++++++++++++------------------ src/adapters/custom.rs | 45 +++------ src/config.rs | 11 +-- src/preproc.rs | 2 +- 4 files changed, 133 insertions(+), 134 deletions(-) diff --git a/src/adapters.rs b/src/adapters.rs index 1736e02..a483e77 100644 --- a/src/adapters.rs +++ b/src/adapters.rs @@ -12,8 +12,8 @@ use crate::{adapted_iter::AdaptedFilesIterBox, config::RgaConfig, matching::*}; use anyhow::{format_err, Context, Result}; use async_trait::async_trait; use custom::strs; +use custom::Builtin; use custom::CustomAdapterConfig; -use custom::CustomIdentifiers; use custom::BUILTIN_SPAWNING_ADAPTERS; use log::*; use tokio::io::AsyncRead; @@ -135,108 +135,129 @@ pub struct AdaptInfo { /// (enabledAdapters, disabledAdapters) type AdaptersTuple = (Vec>, Vec>); +static EMPTY_SLICE: &'static [&'static str] = &[]; + pub fn get_all_adapters( - custom_identifiers: Option, + additional_extensions: &HashMap<&[&str], Builtin>, custom_adapters: Option>, ) -> AdaptersTuple { - let bz2_extensions = custom_identifiers - .as_ref() - .and_then(|ids| ids.bz2.as_ref()) - .and_then(|bz2| bz2.extensions.clone()) - .unwrap_or_else(|| strs(decompress::EXTENSIONS_BZ2)); - let bz2_mimetypes = custom_identifiers - .as_ref() - .and_then(|ids| ids.bz2.as_ref()) - .and_then(|bz2| bz2.mimetypes.clone()) - .unwrap_or_else(|| strs(decompress::MIMETYPES_BZ2)); + let mut extensions = additional_extensions.clone(); - let gz_extensions = custom_identifiers - .as_ref() - .and_then(|ids| ids.gz.as_ref()) - .and_then(|gz| gz.extensions.clone()) - .unwrap_or_else(|| strs(decompress::EXTENSIONS_GZ)); - let gz_mimetypes = custom_identifiers - .as_ref() - .and_then(|ids| ids.gz.as_ref()) - .and_then(|gz| gz.mimetypes.clone()) - .unwrap_or_else(|| strs(decompress::MIMETYPES_GZ)); + for extension in decompress::EXTENSIONS_BZ2 { + extensions.try_insert(extension, Builtin::Bz2) + } - let xz_extensions = custom_identifiers - .as_ref() - .and_then(|ids| ids.xz.as_ref()) - .and_then(|xz| xz.extensions.clone()) - .unwrap_or_else(|| strs(decompress::EXTENSIONS_XZ)); - let xz_mimetypes = custom_identifiers - .as_ref() - .and_then(|ids| ids.xz.as_ref()) - .and_then(|xz| xz.mimetypes.clone()) - .unwrap_or_else(|| strs(decompress::MIMETYPES_XZ)); + let mut bz2_extensions: [&str] = [ + decompress::EXTENSIONS_BZ2[..], + (additional_extensions + .get(&Builtin::Bz2) + .unwrap_or(&EMPTY_SLICE))[..], + ]; + // .as_ref() + // .and_then(|ids| ids.bz2.as_ref()) + // .and_then(|bz2| bz2.extensions.clone()) + // .unwrap_or_else(|| strs(decompress::EXTENSIONS_BZ2)); + // let bz2_mimetypes = additional_extensions + // .as_ref() + // .and_then(|ids| ids.bz2.as_ref()) + // .and_then(|bz2| bz2.mimetypes.clone()) + // .unwrap_or_else(|| strs(decompress::MIMETYPES_BZ2)); - let zst_extensions = custom_identifiers - .as_ref() - .and_then(|ids| ids.zst.as_ref()) - .and_then(|zst| zst.extensions.clone()) - .unwrap_or_else(|| strs(decompress::EXTENSIONS_ZST)); - let zst_mimetypes = custom_identifiers - .as_ref() - .and_then(|ids| ids.zst.as_ref()) - .and_then(|zst| zst.mimetypes.clone()) - .unwrap_or_else(|| strs(decompress::MIMETYPES_ZST)); + let gz_extensions = [ + decompress::EXTENSIONS_GZ[..], + (additional_extensions + .get(&Builtin::Gz) + .unwrap_or(&EMPTY_SLICE))[..], + ]; + // let gz_mimetypes = additional_extensions + // .as_ref() + // .and_then(|ids| ids.gz.as_ref()) + // .and_then(|gz| gz.mimetypes.clone()) + // .unwrap_or_else(|| strs(decompress::MIMETYPES_GZ)); - let ffmpeg_extensions = custom_identifiers - .as_ref() - .and_then(|ids| ids.ffmpeg.as_ref()) - .and_then(|ffmpeg| ffmpeg.extensions.clone()) - .unwrap_or_else(|| strs(ffmpeg::EXTENSIONS)); - let ffmpeg_mimetypes = custom_identifiers - .as_ref() - .and_then(|ids| ids.ffmpeg.as_ref()) - .and_then(|ffmpeg| ffmpeg.mimetypes.clone()) - .unwrap_or_else(|| strs(ffmpeg::MIMETYPES)); + let xz_extensions = [ + decompress::EXTENSIONS_XZ[..], + (additional_extensions + .get(&Builtin::Xz) + .unwrap_or(&EMPTY_SLICE))[..], + ]; + // let xz_mimetypes = additional_extensions + // .as_ref() + // .and_then(|ids| ids.xz.as_ref()) + // .and_then(|xz| xz.mimetypes.clone()) + // .unwrap_or_else(|| strs(decompress::MIMETYPES_XZ)); - let mbox_extensions = custom_identifiers - .as_ref() - .and_then(|ids| ids.mbox.as_ref()) - .and_then(|mbox| mbox.extensions.clone()) - .unwrap_or_else(|| strs(mbox::EXTENSIONS)); - let mbox_mimetypes = custom_identifiers - .as_ref() - .and_then(|ids| ids.mbox.as_ref()) - .and_then(|mbox| mbox.mimetypes.clone()) - .unwrap_or_else(|| strs(mbox::MIMETYPES)); + let zst_extensions = [ + decompress::EXTENSIONS_ZST[..], + (additional_extensions + .get(&Builtin::Zst) + .unwrap_or(&EMPTY_SLICE))[..], + ]; + // let zst_mimetypes = additional_extensions + // .as_ref() + // .and_then(|ids| ids.zst.as_ref()) + // .and_then(|zst| zst.mimetypes.clone()) + // .unwrap_or_else(|| strs(decompress::MIMETYPES_ZST)); - let sqlite_extensions = custom_identifiers - .as_ref() - .and_then(|ids| ids.sqlite.as_ref()) - .and_then(|sqlite| sqlite.extensions.clone()) - .unwrap_or_else(|| strs(sqlite::EXTENSIONS)); - let sqlite_mimetypes = custom_identifiers - .as_ref() - .and_then(|ids| ids.sqlite.as_ref()) - .and_then(|sqlite| sqlite.mimetypes.clone()) - .unwrap_or_else(|| strs(sqlite::MIMETYPES)); + let ffmpeg_extensions = [ + ffmpeg::EXTENSIONS[..], + (additional_extensions + .get(&Builtin::Ffmpeg) + .unwrap_or(&EMPTY_SLICE))[..], + ]; + // let ffmpeg_mimetypes = additional_extensions + // .as_ref() + // .and_then(|ids| ids.ffmpeg.as_ref()) + // .and_then(|ffmpeg| ffmpeg.mimetypes.clone()) + // .unwrap_or_else(|| strs(ffmpeg::MIMETYPES)); - let tar_extensions = custom_identifiers - .as_ref() - .and_then(|ids| ids.tar.as_ref()) - .and_then(|tar| tar.extensions.clone()) - .unwrap_or_else(|| strs(tar::EXTENSIONS)); - let tar_mimetypes = custom_identifiers - .as_ref() - .and_then(|ids| ids.tar.as_ref()) - .and_then(|tar| tar.mimetypes.clone()) - .unwrap_or_else(|| strs(tar::MIMETYPES)); + let mbox_extensions = [ + mbox::EXTENSIONS[..], + (additional_extensions + .get(&Builtin::Mbox) + .unwrap_or(&EMPTY_SLICE))[..], + ]; + // let mbox_mimetypes = additional_extensions + // .as_ref() + // .and_then(|ids| ids.mbox.as_ref()) + // .and_then(|mbox| mbox.mimetypes.clone()) + // .unwrap_or_else(|| strs(mbox::MIMETYPES)); + + let sqlite_extensions = [ + sqlite::EXTENSIONS[..], + (additional_extensions + .get(&Builtin::Sqlite) + .unwrap_or(&EMPTY_SLICE))[..], + ]; + // let sqlite_mimetypes = additional_extensions + // .as_ref() + // .and_then(|ids| ids.sqlite.as_ref()) + // .and_then(|sqlite| sqlite.mimetypes.clone()) + // .unwrap_or_else(|| strs(sqlite::MIMETYPES)); - let zip_extensions = custom_identifiers - .as_ref() - .and_then(|ids| ids.zip.as_ref()) - .and_then(|zip| zip.extensions.clone()) - .unwrap_or_else(|| strs(zip::EXTENSIONS)); - let zip_mimetypes = custom_identifiers - .as_ref() - .and_then(|ids| ids.zip.as_ref()) - .and_then(|zip| zip.mimetypes.clone()) - .unwrap_or_else(|| strs(zip::MIMETYPES)); + let tar_extensions = [ + tar::EXTENSIONS[..], + (additional_extensions + .get(&Builtin::Tar) + .unwrap_or(&EMPTY_SLICE))[..], + ]; + // let tar_mimetypes = additional_extensions + // .as_ref() + // .and_then(|ids| ids.tar.as_ref()) + // .and_then(|tar| tar.mimetypes.clone()) + // .unwrap_or_else(|| strs(tar::MIMETYPES)); + + let zip_extensions = [ + zip::EXTENSIONS[..], + (additional_extensions + .get(&Builtin::Zip) + .unwrap_or(&EMPTY_SLICE))[..], + ]; + // let zip_mimetypes = additional_extensions + // .as_ref() + // .and_then(|ids| ids.zip.as_ref()) + // .and_then(|zip| zip.mimetypes.clone()) + // .unwrap_or_else(|| strs(zip::MIMETYPES)); // order in descending priority let mut adapters: Vec> = vec![]; @@ -300,12 +321,12 @@ pub fn get_all_adapters( * - "+a,b" means use default list but also a and b (a,b will be prepended to the list so given higher priority) */ pub fn get_adapters_filtered>( - custom_identifiers: Option, + additional_extensions: &HashMap<&Builtin, &[&str]>, custom_adapters: Option>, adapter_names: &[T], ) -> Result>> { let (def_enabled_adapters, def_disabled_adapters) = - get_all_adapters(custom_identifiers, custom_adapters); + get_all_adapters(additional_extensions, custom_adapters); let adapters = if !adapter_names.is_empty() { let adapters_map: HashMap<_, _> = def_enabled_adapters .iter() diff --git a/src/adapters/custom.rs b/src/adapters/custom.rs index 6b48df2..6659e86 100644 --- a/src/adapters/custom.rs +++ b/src/adapters/custom.rs @@ -11,6 +11,7 @@ use lazy_static::lazy_static; use log::debug; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use std::collections::HashMap; use std::path::Path; use std::process::Stdio; use tokio::io::AsyncReadExt; @@ -20,39 +21,17 @@ use tokio::process::Command; use tokio_util::io::StreamReader; // mostly the same as AdapterMeta + SpawningFileAdapter -#[derive(Debug, Deserialize, Serialize, JsonSchema, Default, PartialEq, Clone)] -pub struct CustomIdentifier { - /// The file extensions this adapter supports, for example `["gz", "tgz"]`. - pub extensions: Option>, - /// If not null and --rga-accurate is enabled, mimetype matching is used instead of file name matching. - pub mimetypes: Option>, -} - -#[derive(Debug, Deserialize, Serialize, JsonSchema, PartialEq, Clone)] -pub struct CustomIdentifiers { - /// The identifiers to process as bz2 archives. - pub bz2: Option, - /// The identifiers to process as gz archives. - pub gz: Option, - /// The identifiers to process as xz archives. - pub xz: Option, - /// The identifiers to process as zst archives. - pub zst: Option, - - /// The identifiers to process via ffmpeg. - pub ffmpeg: Option, - - /// The identifiers to process as mbox files. - pub mbox: Option, - - /// The identifiers to process as SQLite files. - pub sqlite: Option, - - /// The identifiers to process as tar files. - pub tar: Option, - - /// The identifiers to process as zip archives. - pub zip: Option, +#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)] +pub enum Builtin { + Bz2, + Gz, + Xz, + Zst, + Ffmpeg, + Mbox, + Sqlite, + Tar, + Zip, } #[derive(Debug, Deserialize, Serialize, JsonSchema, Default, PartialEq, Clone)] diff --git a/src/config.rs b/src/config.rs index 41a6cb7..f57538d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,12 +1,11 @@ -use crate::{ - adapters::custom::{CustomAdapterConfig, CustomIdentifiers}, - project_dirs, -}; +use crate::adapters::custom; +use crate::project_dirs; use anyhow::{Context, Result}; use derive_more::FromStr; use log::*; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use std::collections::HashMap; use std::ffi::OsString; use std::io::Read; use std::{fs::File, io::Write, iter::IntoIterator, path::PathBuf, str::FromStr}; @@ -168,11 +167,11 @@ pub struct RgaConfig { #[serde(default, skip_serializing_if = "is_default")] #[structopt(skip)] // config file only - pub custom_adapters: Option>, + pub custom_adapters: Option>, #[serde(default, skip_serializing_if = "is_default")] #[structopt(skip)] // config file only - pub custom_identifiers: Option, + pub additional_extensions: Option>, #[serde(skip)] // CLI only #[structopt(long = "--rga-config-file", require_equals = true)] diff --git a/src/preproc.rs b/src/preproc.rs index 089ad03..be27507 100644 --- a/src/preproc.rs +++ b/src/preproc.rs @@ -33,7 +33,7 @@ async fn choose_adapter( inp: &mut (impl AsyncBufRead + Unpin), ) -> Result, FileMatcher, ActiveAdapters)>> { let active_adapters = get_adapters_filtered( - config.custom_identifiers.clone(), + config.additional_extensions.clone(), config.custom_adapters.clone(), &config.adapters, )?;