Skip to content

Commit

Permalink
Add basic support for embedding files (no PDF/A3 checks yet)
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurenzV committed Mar 1, 2025
1 parent 8251625 commit 9003740
Show file tree
Hide file tree
Showing 8 changed files with 352 additions and 31 deletions.
32 changes: 24 additions & 8 deletions crates/krilla/src/chunk_container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ impl ChunkContainer {
sc.serialize_settings().pdf_version.write_xmp(&mut xmp);

let named_destinations = sc.global_objects.named_destinations.take();
let embedded_files = sc.global_objects.embedded_files.take();

// We only write a catalog if a page tree exists. Every valid PDF must have one
// and krilla ensures that there always is one, but for snapshot tests, it can be
Expand Down Expand Up @@ -211,19 +212,34 @@ impl ChunkContainer {
catalog.outlines(remapper[&ol.0]);
}

if !named_destinations.is_empty() {
if !named_destinations.is_empty() || !embedded_files.is_empty() {
// Cannot use pdf-writer API here because it requires Ref's, while
// we write our destinations directly into the array.
let mut names = catalog.names();
let mut name_tree = names.destinations();
let mut name_entries = name_tree.names();

// Sort to prevent inconsistent order.
let mut sorted = named_destinations.into_iter().collect::<Vec<_>>();
sorted.sort_by(|a, b| a.1.cmp(&b.1));
if !named_destinations.is_empty() {
let mut dest_name_tree = names.destinations();
let mut dest_name_entries = dest_name_tree.names();

for (name, dest_ref) in sorted {
name_entries.insert(Str(name.name.as_bytes()), remapper[&dest_ref]);
// Sort to prevent inconsistent order.
let mut sorted = named_destinations.into_iter().collect::<Vec<_>>();
sorted.sort_by(|a, b| a.1.cmp(&b.1));

for (name, dest_ref) in sorted {
dest_name_entries.insert(Str(name.name.as_bytes()), remapper[&dest_ref]);
}

dest_name_entries.finish();
dest_name_tree.finish();
}

if !embedded_files.is_empty() {
let mut embedded_files_name_tree = names.embedded_files();
let mut embedded_name_entries = embedded_files_name_tree.names();

for (_ref, name) in embedded_files {
embedded_name_entries.insert(Str(name.as_bytes()), remapper[&_ref]);
}
}
}

Expand Down
9 changes: 7 additions & 2 deletions crates/krilla/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@
//!
//! [`Page`]: crate::page::Page
use tiny_skia_path::{Rect, Size};

use crate::embed::EmbeddedFile;
use crate::error::KrillaResult;
use crate::metadata::Metadata;
use crate::object::outline::Outline;
use crate::object::page::Page;
use crate::object::page::PageLabel;
use crate::serialize::{SerializeContext, SerializeSettings};
use crate::tagging::TagTree;
use tiny_skia_path::{Rect, Size};

/// A PDF document.
pub struct Document {
Expand Down Expand Up @@ -81,6 +81,11 @@ impl Document {
self.serializer_context.set_tag_tree(tag_tree);
}

/// Embed a new file in the PDF document.
pub fn embed_file(&mut self, file: EmbeddedFile) {
self.serializer_context.embed_file(file);
}

/// Attempt to write the document to a PDF.
pub fn finish(mut self) -> KrillaResult<Vec<u8>> {
// Write empty page if none has been created yet.
Expand Down
142 changes: 124 additions & 18 deletions crates/krilla/src/object/embed.rs
Original file line number Diff line number Diff line change
@@ -1,38 +1,40 @@
//! Embedding attachments to a PDF file.
use std::sync::Arc;
use std::ops::DerefMut;
use pdf_writer::{Chunk, Finish, Ref};
use pdf_writer::types::AssociationKind;
use crate::metadata::pdf_date;
use crate::object::{Cacheable, ChunkContainerFn};
use crate::serialize::SerializeContext;
use crate::stream::FilterStreamBuilder;
use crate::util::NameExt;
use crate::validation::Validator;
use crate::version::PdfVersion;
use pdf_writer::types::AssociationKind;
use pdf_writer::{Chunk, Finish, Name, Ref, Str, TextStr};
use std::ops::DerefMut;
use std::sync::Arc;

/// An error while embedding the file.
pub enum EmbedError {
/// The document doesn't contain a date, which is required for embedded files
/// in some export modes.
MissingDate
MissingDate,
}

/// An embedded file.
#[derive(Debug, Clone, Hash)]
pub struct EmbeddedFile {
/// The name of the embedded file.
pub name: String,
pub path: String,
/// The mime type of the embedded file.
pub mime_type: String,
/// A description of the embedded file.
pub description: String,
pub description: Option<String>,
/// The association kind of the embedded file.
pub association_kind: AssociationKind,
/// The raw data of the embedded file.
pub data: Arc<Vec<u8>>,
/// Whether the embedded file should be compressed (recommended to turn off if the
/// original file already has compression).
pub compress: bool
pub compress: bool,
}

impl Cacheable for EmbeddedFile {
Expand All @@ -46,30 +48,134 @@ impl Cacheable for EmbeddedFile {

let file_stream = if self.compress {
FilterStreamBuilder::new_from_binary_data(&self.data)
} else {
} else {
FilterStreamBuilder::new_from_uncompressed(&self.data)
}.finish(&sc.serialize_settings());

}
.finish(&sc.serialize_settings());

let mut embedded_file_stream = chunk.embedded_file(stream_ref, &file_stream.encoded_data());
file_stream.write_filters(embedded_file_stream.deref_mut().deref_mut());

embedded_file_stream.subtype(self.mime_type.to_pdf_name());
let mut params = embedded_file_stream.params();
params.size(self.data.len() as i32);

if let Some(date_time) = sc.metadata()
if let Some(date_time) = sc
.metadata()
.and_then(|m| m.modification_date.or_else(|| m.creation_date))
{
let date = pdf_date(date_time);
params.modification_date(date);
} else {
todo!();
} else {
}

params.finish();
embedded_file_stream.finish();

chunk

let mut file_spec = chunk.file_spec(root_ref);
file_spec.path(Str(self.path.as_bytes()));

if sc.serialize_settings().pdf_version >= PdfVersion::Pdf17 {
file_spec.unic_file(TextStr(&self.path));
}

let mut ef = file_spec.insert(Name(b"EF")).dict();
ef.pair(Name(b"F"), stream_ref);

if sc.serialize_settings().pdf_version >= PdfVersion::Pdf17 {
ef.pair(Name(b"UF"), stream_ref);
}

ef.finish();

if matches!(
sc.serialize_settings().validator,
Validator::A3_A | Validator::A3_B | Validator::A3_U
) {
// PDF 2.0, but ISO 19005-3 (PDF/A-3) Annex E allows it for PDF/A-3.
file_spec.association_kind(self.association_kind);
}

if let Some(description) = self.description {
file_spec.description(TextStr(&description));
}

file_spec.finish();

chunk
}
}

#[cfg(test)]
mod tests {
use crate::embed::EmbeddedFile;
use crate::tests::ASSETS_PATH;
use crate::Document;
use krilla_macros::snapshot;
use pdf_writer::types::AssociationKind;
use std::sync::Arc;

fn file_1() -> EmbeddedFile {
let data = std::fs::read(ASSETS_PATH.join("emojis.txt")).unwrap();
EmbeddedFile {
path: "emojis.txt".to_string(),
mime_type: "text/txt".to_string(),
description: Some("The description of the file.".to_string()),
association_kind: AssociationKind::Supplement,
data: Arc::new(data),
compress: false,
}
}

fn file_2() -> EmbeddedFile {
let data =
std::fs::read(ASSETS_PATH.join("svgs/resvg_structure_svg_nested_svg_with_rect.svg"))
.unwrap();
EmbeddedFile {
path: "image.svg".to_string(),
mime_type: "image/svg+xml".to_string(),
description: Some("A nice SVG image!".to_string()),
association_kind: AssociationKind::Supplement,
data: Arc::new(data),
compress: false,
}
}

fn file_3() -> EmbeddedFile {
let data = std::fs::read(ASSETS_PATH.join("images/rgb8.png")).unwrap();

EmbeddedFile {
path: "rgb8.png".to_string(),
mime_type: "image/png".to_string(),
description: Some("A nice picture.".to_string()),
association_kind: AssociationKind::Unspecified,
data: Arc::new(data),
compress: false,
}
}

#[snapshot(document)]
fn embedded_file(d: &mut Document) {
let file = file_1();
d.embed_file(file);
}

#[snapshot(document)]
fn embedded_file_with_compression(d: &mut Document) {
let mut file = file_1();
file.compress = true;

d.embed_file(file);
}

#[snapshot(document)]
fn multiple_embedded_files(d: &mut Document) {
let f1 = file_1();
let f2 = file_2();
let f3 = file_3();

d.embed_file(f1);
d.embed_file(f2);
d.embed_file(f3);
}
}
2 changes: 1 addition & 1 deletion crates/krilla/src/object/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub mod action;
pub mod annotation;
pub mod color;
pub mod destination;
pub mod embed;
pub(crate) mod ext_g_state;
pub(crate) mod font;
#[cfg(feature = "raster-images")]
Expand All @@ -22,7 +23,6 @@ pub(crate) mod shading_function;
pub(crate) mod shading_pattern;
pub(crate) mod tiling_pattern;
pub(crate) mod xobject;
pub mod embed;

pub(crate) type ChunkContainerFn = Box<dyn FnMut(&mut ChunkContainer) -> &mut Vec<Chunk>>;

Expand Down
15 changes: 13 additions & 2 deletions crates/krilla/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use tiny_skia_path::Size;
use crate::chunk_container::ChunkContainer;
use crate::color::{ColorSpace, ICCBasedColorSpace, ICCProfile};
use crate::destination::{NamedDestination, XyzDestination};
use crate::embed::EmbeddedFile;
use crate::error::{KrillaError, KrillaResult};
use crate::font::{Font, FontInfo};
#[cfg(feature = "raster-images")]
Expand Down Expand Up @@ -262,7 +263,13 @@ impl SerializeContext {
pub(crate) fn set_metadata(&mut self, metadata: Metadata) {
self.chunk_container.metadata = Some(metadata);
}


pub(crate) fn embed_file(&mut self, file: EmbeddedFile) {
let name = file.path.clone();
let ref_ = self.register_cacheable(file);
self.global_objects.embedded_files.insert(ref_, name);
}

pub(crate) fn metadata(&self) -> Option<&Metadata> {
self.chunk_container.metadata.as_ref()
}
Expand Down Expand Up @@ -840,7 +847,7 @@ impl<T> DerefMut for MaybeTaken<T> {
#[derive(Default)]
pub(crate) struct GlobalObjects {
/// All named destinations that have been registered, including a Ref to their destination.
// Needs to be pub(crate)lic because writing of named destinations happens in `ChunkContainer`.
// Needs to be pub(crate) because writing of named destinations happens in `ChunkContainer`.
pub(crate) named_destinations: MaybeTaken<HashMap<NamedDestination, Ref>>,
/// A map from fonts to font container.
font_map: MaybeTaken<HashMap<Font, Rc<RefCell<FontContainer>>>>,
Expand All @@ -859,6 +866,9 @@ pub(crate) struct GlobalObjects {
outline: MaybeTaken<Option<Outline>>,
/// Stores the tag tree.
tag_tree: MaybeTaken<Option<TagTree>>,
/// Stores the association of the names of embedded files to their refs,
/// for the catalog dictionary.
pub(crate) embedded_files: MaybeTaken<BTreeMap<Ref, String>>,
}

impl GlobalObjects {
Expand All @@ -870,5 +880,6 @@ impl GlobalObjects {
assert!(self.struct_parents.is_taken());
assert!(self.outline.is_taken());
assert!(self.tag_tree.is_taken());
assert!(self.embedded_files.is_taken());
}
}
Loading

0 comments on commit 9003740

Please sign in to comment.