Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DelimFile new_reader and new_writer methods #15

Merged
merged 6 commits into from
Nov 7, 2023
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 107 additions & 35 deletions src/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,11 @@
//! ```
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::marker::PhantomData;
use std::path::Path;

use crate::{FgError, Result};
use csv::{QuoteStyle, ReaderBuilder, WriterBuilder};
use csv::{DeserializeRecordsIntoIter, QuoteStyle, ReaderBuilder, Writer, WriterBuilder};
use flate2::bufread::MultiGzDecoder;
use flate2::write::GzEncoder;
use flate2::Compression;
Expand Down Expand Up @@ -79,7 +80,7 @@ impl Io {
}

/// Returns true if the path ends with a recognized GZIP file extension
fn is_gzip_path<P: AsRef<Path>>(p: &P) -> bool {
fn is_gzip_path<P: AsRef<Path>>(p: P) -> bool {
if let Some(ext) = p.as_ref().extension() {
match ext.to_str() {
Some(x) => GZIP_EXTENSIONS.contains(&x),
Expand All @@ -92,11 +93,11 @@ impl Io {

/// Opens a file for reading. Transparently handles reading gzipped files based
/// extension.
pub fn new_reader<P>(&self, p: &P) -> Result<Box<dyn BufRead + Send>>
pub fn new_reader<P>(&self, p: P) -> Result<Box<dyn BufRead + Send>>
jdidion marked this conversation as resolved.
Show resolved Hide resolved
where
P: AsRef<Path>,
{
let file = File::open(p).map_err(FgError::IoError)?;
let file = File::open(p.as_ref()).map_err(FgError::IoError)?;
let buf = BufReader::with_capacity(self.buffer_size, file);

if Self::is_gzip_path(p) {
Expand All @@ -108,11 +109,11 @@ impl Io {

/// Opens a file for writing. Transparently handles writing GZIP'd data if the file
/// ends with a recognized GZIP extension.
pub fn new_writer<P>(&self, p: &P) -> Result<BufWriter<Box<dyn Write + Send>>>
pub fn new_writer<P>(&self, p: P) -> Result<BufWriter<Box<dyn Write + Send>>>
where
P: AsRef<Path>,
{
let file = File::create(p).map_err(FgError::IoError)?;
let file = File::create(p.as_ref()).map_err(FgError::IoError)?;
let write: Box<dyn Write + Send> = if Io::is_gzip_path(p) {
Box::new(GzEncoder::new(file, self.compression))
} else {
Expand Down Expand Up @@ -152,6 +153,80 @@ impl Io {
}
}

/// A struct that wraps a csv `Reader` and provides methods for reading one record at a time.
/// It also implements `Iterator`.
pub struct DelimFileReader<D: DeserializeOwned> {
record_iter: DeserializeRecordsIntoIter<Box<dyn BufRead + Send>, D>,
}

impl<D: DeserializeOwned> DelimFileReader<D> {
/// Returns a new `DelimFileReader` that will read records from the given reader with the given
/// delimiter and quoting. Assumes the input file has a header row.
pub fn new(reader: Box<dyn BufRead + Send>, delimiter: u8, quote: bool) -> Self {
let csv_reader = ReaderBuilder::new()
.delimiter(delimiter)
.has_headers(true)
.quoting(quote)
.from_reader(reader);
let record_iter = csv_reader.into_deserialize();
Self { record_iter }
jdidion marked this conversation as resolved.
Show resolved Hide resolved
}

/// Returns the next record from the underlying reader.
pub fn read(&mut self) -> Option<Result<D>> {
self.record_iter.next().map(|result| result.map_err(FgError::ConversionError))
}
}

impl<D: DeserializeOwned> Iterator for DelimFileReader<D> {
type Item = Result<D>;

fn next(&mut self) -> Option<Self::Item> {
self.read()
}
}

/// A struct that wraps a csv `Writer` and provides methods for writing single records as well as
/// multiple records from an iterator.
pub struct DelimFileWriter<S: Serialize> {
csv_writer: Writer<BufWriter<Box<dyn Write + Send>>>,
_data: PhantomData<S>,
}

impl<S: Serialize> DelimFileWriter<S> {
/// Returns a new `DelimFileWriter` that writes to the given `writer` with the given delimiter
/// and quoting. The output file will have a header row.
pub fn new(writer: BufWriter<Box<dyn Write + Send>>, delimiter: u8, quote: bool) -> Self {
let csv_writer = WriterBuilder::new()
.delimiter(delimiter)
.has_headers(true)
.quote_style(if quote { QuoteStyle::Necessary } else { QuoteStyle::Never })
.from_writer(writer);
Self { csv_writer, _data: PhantomData }
}

/// Writes a single record to the underlying writer.
pub fn write(&mut self, rec: &S) -> Result<()> {
self.csv_writer.serialize(rec).map_err(FgError::ConversionError)
}

/// Writes all records from `iter` to the underlying writer, in order.
pub fn write_all(&mut self, iter: impl IntoIterator<Item = S>) -> Result<()> {
for rec in iter {
self.write(&rec)?;
}
self.flush()?;
Ok(())
}

/// Flushes the underlying writer.
/// Note: this is not strictly necessary as the underlying writer is flushed automatically
/// on `Drop`.
pub fn flush(&mut self) -> Result<()> {
self.csv_writer.flush().map_err(FgError::IoError)
}
}

/// Unit-struct that contains associated functions for reading and writing Structs to/from
/// delimited files. Structs should use serde's Serialize/Deserialize derive macros in
/// order to be used with these functions.
Expand All @@ -167,6 +242,30 @@ impl Default for DelimFile {
}

impl DelimFile {
/// Returns a new `DelimFileReader` instance that reads from the given path, opened with this
/// `DelimFile`'s `Io` instance.
pub fn new_reader<D: DeserializeOwned, P: AsRef<Path>>(
&self,
path: P,
delimiter: u8,
quote: bool,
) -> Result<DelimFileReader<D>> {
let file = self.io.new_reader(path)?;
Ok(DelimFileReader::new(file, delimiter, quote))
}

/// Returns a new `DelimFileWriter` instance that writes to the given path, opened with this
/// `DelimFile`'s `Io` instance.
pub fn new_writer<S: Serialize, P: AsRef<Path>>(
&self,
path: P,
delimiter: u8,
quote: bool,
) -> Result<DelimFileWriter<S>> {
let file = self.io.new_writer(path)?;
Ok(DelimFileWriter::new(file, delimiter, quote))
}

/// Writes a series of one or more structs to a delimited file. If `quote` is true then fields
/// will be quoted as necessary, otherwise they will never be quoted.
pub fn write<S, P>(
Expand All @@ -180,19 +279,7 @@ impl DelimFile {
S: Serialize,
P: AsRef<Path>,
{
let write = self.io.new_writer(path)?;

let mut writer = WriterBuilder::new()
.delimiter(delimiter)
.has_headers(true)
.quote_style(if quote { QuoteStyle::Necessary } else { QuoteStyle::Never })
.from_writer(write);

for rec in recs {
writer.serialize(rec).map_err(FgError::ConversionError)?;
}

writer.flush().map_err(FgError::IoError)
self.new_writer(path, delimiter, quote)?.write_all(recs)
}

/// Writes structs implementing `[Serialize]` to a file with tab separators between fields.
Expand Down Expand Up @@ -221,22 +308,7 @@ impl DelimFile {
D: DeserializeOwned,
P: AsRef<Path>,
{
let read = self.io.new_reader(path)?;

let mut reader = ReaderBuilder::new()
.delimiter(delimiter)
.has_headers(true)
.quoting(quote)
.from_reader(read);

let mut results = vec![];

for result in reader.deserialize::<D>() {
let rec = result.map_err(FgError::ConversionError)?;
results.push(rec);
}

Ok(results)
self.new_reader(path, delimiter, quote)?.collect()
}

/// Reads structs implementing `[Deserialize]` from a file with tab separators between fields.
Expand Down
Loading