-
Notifications
You must be signed in to change notification settings - Fork 690
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This add an option to disable font subsetting. Original fonts can be embedded in full original form. This feature can make documents substantially bigger. In addition to embedded fonts being bigger PDF requires additional information in order to properly render text. Specifically, it requires glyph widths. Some fonts contain thousands of glyps. A thousand of glyph widths on average would result in about 4 Kb additional size of the document. Additionally, PDF requires another mapping to make the text intelligible when copying. This additional size is much harder to estimate as it greatly depend on the font coverage but usually on the order of ~1-10 Kb per font. Intended use case is a workaround for when TTFunk breaks fonts in subsetting. But also this might be useful for documents that are going to be edited. For example, documents that are templates and more text would be added later, or AcroForm feature that allows end users to fill forms.
- Loading branch information
1 parent
8ceaa10
commit b1a6232
Showing
4 changed files
with
459 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
# frozen_string_literal: true | ||
|
||
module Prawn | ||
module Fonts | ||
class ToUnicodeCMap # @private | ||
# mapping is expected to be a hash with keys being charater codes (in | ||
# broad sense, as used in the showing operation strings) and values being | ||
# Unicode code points | ||
def initialize(mapping, code_space_size = nil) | ||
@mapping = mapping | ||
@code_space_size = code_space_size | ||
end | ||
|
||
def generatate | ||
chunks = [] | ||
|
||
# Header | ||
chunks << <<~HEADER.chomp | ||
/CIDInit /ProcSet findresource begin | ||
12 dict begin | ||
begincmap | ||
/CIDSystemInfo 3 dict dup begin | ||
/Registry (Adobe) def | ||
/Ordering (UCS) def | ||
/Supplement 0 def | ||
end def | ||
/CMapName /Adobe-Identity-UCS def | ||
/CMapType 2 def | ||
HEADER | ||
|
||
max_glyph_index = mapping.keys.max | ||
# Range | ||
code_space_size = @code_space_size || (max_glyph_index.bit_length / 8.0).ceil | ||
|
||
# In CMap codespaces are not sequentional, they're ranges in | ||
# a multi-dimentional space. Each byte is considered separately. So we | ||
# have to maximally extend the lower bytes in order to allow for | ||
# continuos mapping. | ||
# We only keep the highest byte because usually it's lower than | ||
# maximally allowed and we don't want to cover that unused space. | ||
code_space_max = max_glyph_index | ('ff' * (code_space_size - 1)).to_i(16) | ||
|
||
chunks << '1 begincodespacerange' | ||
chunks << format("<%0#{code_space_size * 2}X><%0#{code_space_size * 2}X>", 0, code_space_max) | ||
chunks << 'endcodespacerange' | ||
|
||
# Mapping | ||
all_spans = | ||
mapping_spans( | ||
mapping.reject { |gid, cid| gid.zero? || (0xd800..0xdfff).cover?(cid) } | ||
) | ||
|
||
short_spans, long_spans = all_spans.partition { _1[0] == :short } | ||
|
||
long_spans | ||
.each_slice(100) do |spans| | ||
chunks << "#{spans.length} beginbfrange" | ||
|
||
spans.each do |type, span| | ||
case type | ||
when :fully_sorted | ||
chunks << format( | ||
"<%0#{code_space_size * 2}X><%0#{code_space_size * 2}X><%s>", | ||
span.first[0], | ||
span.last[0], | ||
span.first[1].chr(::Encoding::UTF_16BE).unpack1('H*') | ||
) | ||
when :index_sorted | ||
chunks << format( | ||
"<%0#{code_space_size * 2}X><%0#{code_space_size * 2}X>[%s]", | ||
span.first[0], | ||
span.last[0], | ||
span.map { |_, cid| "<#{cid.chr(::Encoding::UTF_16BE).unpack1('H*')}>" }.join('') | ||
) | ||
end | ||
end | ||
|
||
chunks << 'endbfrange' | ||
end | ||
|
||
short_spans | ||
.map { |_type, slice| slice.flatten(1) } | ||
.each_slice(100) do |mapping| | ||
chunks << "#{mapping.length} beginbfchar" | ||
chunks.concat( | ||
mapping.map do |(gid, cid)| | ||
format( | ||
"<%0#{code_space_size * 2}X><%s>", | ||
gid, | ||
cid.chr(::Encoding::UTF_16BE).unpack1('H*') | ||
) | ||
end | ||
) | ||
chunks << 'endbfchar' | ||
end | ||
|
||
# Footer | ||
chunks << <<~FOOTER.chomp | ||
endcmap | ||
CMapName currentdict /CMap defineresource pop | ||
end | ||
end | ||
FOOTER | ||
|
||
chunks.join("\n") | ||
end | ||
|
||
private | ||
|
||
attr_reader :mapping | ||
|
||
attr_reader :cmap, :code_space_size, :code_space_max | ||
|
||
def mapping_spans(mapping) | ||
mapping | ||
.sort | ||
.slice_when { |a, b| (b[0] - a[0]) != 1 } # Slice at key discontinuity | ||
.flat_map do |slice| | ||
if slice.length == 1 | ||
[[:short, slice]] | ||
else | ||
continuous_clices, discontinuous_slices = | ||
slice | ||
.slice_when { |a, b| b[1] - a[1] != 1 } # Slice at value discontinuity | ||
.partition { |subslice| subslice.length > 1 } | ||
|
||
discontinuous_slices | ||
.flatten(1) # Join together | ||
.slice_when { |a, b| (b[0] - a[0]) != 1 } # Slice at key discontinuity, again | ||
.map { _1.length > 1 ? [:index_sorted, _1] : [:short, _1] } + | ||
continuous_clices.map { [:fully_sorted, _1] } | ||
end | ||
end | ||
.sort_by { _1[1][0][0] } # Sort span start key | ||
end | ||
end | ||
end | ||
end |
Oops, something went wrong.