Skip to content

Commit

Permalink
better support for html lists in html tables
Browse files Browse the repository at this point in the history
  • Loading branch information
as-op committed Jan 23, 2025
1 parent f09eefe commit 48d5e47
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 60 deletions.
100 changes: 57 additions & 43 deletions lib/md_to_pdf/elements/html.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,22 @@ def html_tag_to_font_style(tag, opts)
end
end

def draw_html(node, opts)
html = node.string_content.gsub("\n", '').strip
parsed_data = Nokogiri::HTML.fragment(html)
draw_html_tag(parsed_data, node, opts)
end

def data_inlinehtml(node, opts)
html = node.string_content
return [] if html.downcase == '</a>' || html.downcase == '</font>'

parsed_data = Nokogiri::HTML.fragment(html)
data_inlinehtml_tag(parsed_data, node, opts)
end

private

def remove_font_stack_opts(opts)
result = opts
list = opts[:font_stack_opts]
Expand All @@ -70,22 +86,6 @@ def remove_link_stack_opts(opts)
result
end

def draw_html(node, opts)
html = node.string_content.gsub("\n", '').strip
parsed_data = Nokogiri::HTML.fragment(html)
draw_html_tag(parsed_data, node, opts)
end

def data_inlinehtml(node, opts)
html = node.string_content
return [] if html.downcase == '</a>' || html.downcase == '</font>'

parsed_data = Nokogiri::HTML.fragment(html)
data_inlinehtml_tag(parsed_data, node, opts)
end

private

def data_inlinehtml_tag(tag, node, opts)
result = []
current_opts = opts
Expand All @@ -111,7 +111,7 @@ def data_inlinehtml_tag(tag, node, opts)
when 'label', 'li'
result.concat(data_inlinehtml_tag(sub, node, opts))
when 'p'
result.concat(data_inlinehtml_tag(sub, node, opts)).push(text_hash_raw("\n", current_opts))
result.concat(data_inlinehtml_paragraph_tag(sub, node, opts))
when 'br'
result.push(text_hash_raw("\n", current_opts))
when 'input'
Expand All @@ -128,6 +128,18 @@ def data_inlinehtml_tag(tag, node, opts)
result
end

def data_inlinehtml_paragraph_tag(sub, node, opts)
result = data_inlinehtml_tag(sub, node, opts)
# lists in tables are brittle and must handle their own newlines
# <p><br></p> should resolve to \n not to be duplicated into \n\n
unless (opts[:is_in_table] && opts[:is_in_list]) ||
(result.length == 1 && result[0][:text] == "\n")
result.push(text_hash_raw("\n", opts))
end
result
end


def data_image_style_opts(tag, _node, _opts)
result = {}
if tag.attr("style")
Expand All @@ -152,12 +164,11 @@ def data_inline_image_tag(tag, node, opts)
def data_inlinehtml_list_tag(tag, node, opts)
result = []
points, level, _list_style, content_opts = data_html_list(tag, node, opts)
result.push(text_hash_raw("\n", content_opts).merge({ list_level: level, list_indent: 0 })) if level > 1
content_opts[:is_in_list] = true
points.each do |point|
data = data_inlinehtml_tag(point[:tag], node, content_opts)
data.push(text_hash_raw("\n", content_opts).merge({ list_entry_type: 'end' }))
data[0][:list_entry_type] = 'first' unless data.empty?
data.unshift(text_hash(point[:bullet], point[:opts]).merge({ list_entry_type: 'bullet' }))
data = data_inlinehtml_tag(point[:tag], node, opts.merge(content_opts))
data[0][:list_entry_type] = :first unless data.empty?
data.unshift(text_hash(point[:bullet], point[:opts]).merge({ list_entry_type: :bullet }))
data.each do |item|
item[:list_level] = level if item[:list_level].nil?
item[:list_indent] = point[:width] if item[:list_indent].nil?
Expand All @@ -182,17 +193,18 @@ def collect_html_table_tag_rows(tag, table_font_opts, opts)
end

def draw_html_table_tag(tag, opts)
table_font_opts = build_table_font_opts(opts)
rows = collect_html_table_tag_rows(tag, table_font_opts, opts)
current_opts = opts.merge({ is_in_table: true })
table_font_opts = build_table_font_opts(current_opts)
rows = collect_html_table_tag_rows(tag, table_font_opts, current_opts)
column_count = 0
rows.each do |row|
column_count = [column_count, row.length].max
end
column_alignments = Array.new(column_count, :left)
header_row_count = count_html_header_rows(tag)
table = build_table_settings(header_row_count, opts)
opts[:opts_cell] = table[:opts_cell]
draw_table_data(table, rows, column_alignments, opts)
table = build_table_settings(header_row_count, current_opts)
current_opts[:opts_cell] = table[:opts_cell]
draw_table_data(table, rows, column_alignments, current_opts)
end

def count_html_header_rows(tag, header_count = 0)
Expand Down Expand Up @@ -390,28 +402,30 @@ def space_stuffing(width, space_width)
end

def indent_html_table_list_items(cell_data)
cell_data.each do |item|
level_stack = {}
cell_data.each_with_index do |item, index|
next if item[:list_level].nil?

level_stack[item[:list_level]] = space_stuffing(item[:list_indent], item[:list_indent_space])

# Note: There is no settings for paddings of text fragments in Prawn::Table
# so as a workaround the lists are stuffed with spaces, which is of course not pixel perfect

# first indenting with spaces of multiline list items
# * item
# multiline item
# multiline item
if item[:list_entry_type].nil? && item[:text] != "\n"
item[:text] = "#{space_stuffing(item[:list_indent], item[:list_indent_space])}#{item[:text]}"
prev = index > 0 ? cell_data[index - 1] : nil
if item[:text] != "\n" && !prev.nil? && prev[:list_entry_type] != :bullet
current_stuffing = ''
(1..item[:list_level] - 1).each do |i|
current_stuffing += level_stack[i] || ''
end
if item[:list_entry_type] == :bullet
item[:text] = "#{current_stuffing}#{item[:text]}"
else
indent = space_stuffing(item[:list_indent], item[:list_indent_space])
item[:text] = "#{current_stuffing}#{indent}#{item[:text]}"
end
end

# second indenting of nested lists
# * item
# multiline item
# * sub list item
# * sub list item
# sub list multiline item
if item[:list_level] > 1 && (item[:list_entry_type].nil? || item[:list_entry_type] == 'bullet') && item[:text] != "\n"
item[:text] = "#{space_stuffing(item[:list_indent], item[:list_indent_space])}#{item[:text]}"
if prev && item[:list_entry_type] == :bullet && prev[:text] != "\n"
item[:text] = "\n#{item[:text]}"
end
end
cell_data
Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions spec/fixtures/table/lists_in_cell.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<figure><table><tbody><tr><td><p>He is making a list</p><ol><li><p>checking it</p></li><li><p>twice</p><ol><li><p>gonna find out</p></li><li><p>who's been naughty or</p><ol><li><p>nice</p></li></ol></li></ol></li></ol></td><td><p><br></p></td><td><p><br></p></td></tr><tr><td><p><br></p></td><td><p><br></p></td><td><p><br></p></td></tr></tbody></table></figure><br>
10 changes: 5 additions & 5 deletions spec/markdown_to_pdf/page_break_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@
{ x: 421.71429, y: 730.884, text: "Col 5" },
{ x: 498.85714, y: 730.884, text: "Col 6" },
{ x: 36.0, y: 689.268, text: "Entry 1" },
{ x: 344.57143, y: 689.268, text: "[x] " },
{ x: 498.85714, y: 689.268, text: "[x] " },
{ x: 344.57143, y: 689.268, text: "[x]" },
{ x: 498.85714, y: 689.268, text: "[x]" },
{ x: 36.0, y: 675.396, text: "Entry 2" },
{ x: 344.57143, y: 675.396, text: "[x] " },
{ x: 421.71429, y: 675.396, text: "[x] " },
{ x: 344.57143, y: 675.396, text: "[x]" },
{ x: 421.71429, y: 675.396, text: "[x]" },
{ x: 36.0, y: 661.524, text: "Entry 3" },
{ x: 36.0, y: 647.652, text: "Entry 4" },
{ x: 113.14286, y: 647.652, text: "[x] " },
{ x: 113.14286, y: 647.652, text: "[x]" },
{ x: 36.0, y: 137.016, text: "Content up until here" },
{ x: 36.0, y: 123.144, text: "Header 1" },
{ x: 36.0, y: 747.384, text: "But does not not create a page break if the next item is already a page break (Header 1 must be at" },
Expand Down
35 changes: 23 additions & 12 deletions spec/markdown_to_pdf/table_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,14 @@
{ x: 421.71429, y: 744.756, text: "Header 5" },
{ x: 498.85714, y: 744.756, text: "Header 6" },
{ x: 36.0, y: 730.884, text: "Entry 1" },
{ x: 344.57143, y: 730.884, text: "[x] " },
{ x: 498.85714, y: 730.884, text: "[x] " },
{ x: 344.57143, y: 730.884, text: "[x]" },
{ x: 498.85714, y: 730.884, text: "[x]" },
{ x: 36.0, y: 717.012, text: "Entry 2" },
{ x: 344.57143, y: 717.012, text: "[x] " },
{ x: 421.71429, y: 717.012, text: "[x] " },
{ x: 344.57143, y: 717.012, text: "[x]" },
{ x: 421.71429, y: 717.012, text: "[x]" },
{ x: 36.0, y: 703.14, text: "Entry 3" },
{ x: 36.0, y: 689.268, text: "Entry 4" },
{ x: 113.14286, y: 689.268, text: "[x] " }])
{ x: 113.14286, y: 689.268, text: "[x]" }])
end

it 'creates a table without bad wrapping with doc font style' do
Expand Down Expand Up @@ -145,7 +145,7 @@
end

it 'creates a html table with lists inside' do
generator.parse_file('table/list_in_cell.md')
generator.parse_file('table/list_in_cell.html')
expect_pdf([
{ x: 36.0, y: 744.756, text: "• test1" },
{ x: 36.0, y: 730.884, text: "• test2" },
Expand All @@ -163,6 +163,17 @@
{ x: 396.0, y: 717.012, text: "3. wiiiii" }])
end

it 'creates a html table with lists with paragraphs inside' do
generator.parse_file('table/lists_in_cell.html')
expect_pdf([
{ x: 36.0, y: 744.756, text: "He is making a list" },
{ x: 36.0, y: 730.884, text: "1. checking it" },
{ x: 36.0, y: 717.012, text: "2. twice" },
{ x: 36.0, y: 703.14, text: "    1. gonna find out" },
{ x: 36.0, y: 689.268, text: "    2. who's been naughty or" },
{ x: 36.0, y: 675.396, text: "        1. nice" }])
end

it 'creates a html table with subtable in a header row' do
generator.parse_file('table/subtable_in_header_row.md')
expect_pdf([
Expand All @@ -185,14 +196,14 @@
expect_pdf([
{ x: 36.0, y: 747.384, text: "With paragraphs:" },
{ x: 36.0, y: 730.884, text: "First" },
{ x: 36.0, y: 661.524, text: "Fourth" },
{ x: 36.0, y: 689.268, text: "Fourth" },
{ x: 306.0, y: 730.884, text: "First" },
{ x: 306.0, y: 717.012, text: "Second" },
{ x: 36.0, y: 636.408, text: "With breaks:" },
{ x: 36.0, y: 619.908, text: "First" },
{ x: 36.0, y: 578.292, text: "Fourth" },
{ x: 306.0, y: 619.908, text: "First" },
{ x: 306.0, y: 606.036, text: "Second" }])
{ x: 36.0, y: 664.152, text: "With breaks:" },
{ x: 36.0, y: 647.652, text: "First" },
{ x: 36.0, y: 606.036, text: "Fourth" },
{ x: 306.0, y: 647.652, text: "First" },
{ x: 306.0, y: 633.78, text: "Second" }])
end

it 'creates a html table with cell colors' do
Expand Down

0 comments on commit 48d5e47

Please sign in to comment.