Skip to content

Commit

Permalink
Enhance orc-metadata bin to show basic stripe metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
Jefffrey committed Mar 26, 2024
1 parent 8537adb commit 24bd57e
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
37 changes: 32 additions & 5 deletions src/bin/orc-metadata.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
use std::{error::Error, fs::File, path::PathBuf};
use std::{error::Error, fs::File, path::PathBuf, sync::Arc};

use clap::Parser;
use datafusion_orc::ArrowReaderBuilder;
use datafusion_orc::{reader::metadata::read_metadata, stripe::Stripe};

#[derive(Parser)]
#[command(version, about, long_about = None)]
struct Cli {
/// ORC file path
file: PathBuf,

/// Display data for all stripes
#[arg(short, long)]
stripes: bool,
}

fn main() -> Result<(), Box<dyn Error>> {
let cli = Cli::parse();

let f = File::open(cli.file)?;
let builder = ArrowReaderBuilder::try_new(f)?;
let metadata = builder.file_metadata();
let mut f = File::open(cli.file)?;
let metadata = Arc::new(read_metadata(&mut f)?);

// TODO: better way to handle this printing?
// TODO: move this display to actual library
Expand All @@ -31,5 +34,29 @@ fn main() -> Result<(), Box<dyn Error>> {
// TODO: nesting types indentation is messed up
println!("schema:\n{}", metadata.root_data_type());

if cli.stripes {
println!("\n=== Stripes ===");
for (i, stripe_metadata) in metadata.stripe_metadatas().iter().enumerate() {
let stripe = Stripe::new(
&mut f,
&metadata,
metadata.root_data_type(),
i,
stripe_metadata,
)?;
println!("stripe index: {i}");
println!("number of rows: {}", stripe.number_of_rows());
println!(
"writer timezone: {}",
stripe
.footer()
.writer_timezone
.clone()
.unwrap_or("None".to_string())
);
println!();
}
}

Ok(())
}
4 changes: 4 additions & 0 deletions src/stripe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ impl Stripe {
pub fn stripe_offset(&self) -> usize {
self.stripe_offset
}

pub fn number_of_rows(&self) -> usize {
self.number_of_rows
}
}

#[derive(Debug)]
Expand Down

0 comments on commit 24bd57e

Please sign in to comment.