From 48d5e471c4c98b8a6a66eb179624f92cb82bd98b Mon Sep 17 00:00:00 2001 From: as-op Date: Thu, 23 Jan 2025 16:26:23 +0100 Subject: [PATCH] better support for html lists in html tables --- lib/md_to_pdf/elements/html.rb | 100 ++++++++++-------- .../{list_in_cell.md => list_in_cell.html} | 0 spec/fixtures/table/lists_in_cell.html | 1 + spec/markdown_to_pdf/page_break_spec.rb | 10 +- spec/markdown_to_pdf/table_spec.rb | 35 +++--- 5 files changed, 86 insertions(+), 60 deletions(-) rename spec/fixtures/table/{list_in_cell.md => list_in_cell.html} (100%) create mode 100644 spec/fixtures/table/lists_in_cell.html diff --git a/lib/md_to_pdf/elements/html.rb b/lib/md_to_pdf/elements/html.rb index 3f110d0..72c9cc3 100644 --- a/lib/md_to_pdf/elements/html.rb +++ b/lib/md_to_pdf/elements/html.rb @@ -52,6 +52,22 @@ def html_tag_to_font_style(tag, opts) end end + def draw_html(node, opts) + html = node.string_content.gsub("\n", '').strip + parsed_data = Nokogiri::HTML.fragment(html) + draw_html_tag(parsed_data, node, opts) + end + + def data_inlinehtml(node, opts) + html = node.string_content + return [] if html.downcase == '' || html.downcase == '' + + parsed_data = Nokogiri::HTML.fragment(html) + data_inlinehtml_tag(parsed_data, node, opts) + end + + private + def remove_font_stack_opts(opts) result = opts list = opts[:font_stack_opts] @@ -70,22 +86,6 @@ def remove_link_stack_opts(opts) result end - def draw_html(node, opts) - html = node.string_content.gsub("\n", '').strip - parsed_data = Nokogiri::HTML.fragment(html) - draw_html_tag(parsed_data, node, opts) - end - - def data_inlinehtml(node, opts) - html = node.string_content - return [] if html.downcase == '' || html.downcase == '' - - parsed_data = Nokogiri::HTML.fragment(html) - data_inlinehtml_tag(parsed_data, node, opts) - end - - private - def data_inlinehtml_tag(tag, node, opts) result = [] current_opts = opts @@ -111,7 +111,7 @@ def data_inlinehtml_tag(tag, node, opts) when 'label', 'li' result.concat(data_inlinehtml_tag(sub, node, opts)) when 'p' - result.concat(data_inlinehtml_tag(sub, node, opts)).push(text_hash_raw("\n", current_opts)) + result.concat(data_inlinehtml_paragraph_tag(sub, node, opts)) when 'br' result.push(text_hash_raw("\n", current_opts)) when 'input' @@ -128,6 +128,18 @@ def data_inlinehtml_tag(tag, node, opts) result end + def data_inlinehtml_paragraph_tag(sub, node, opts) + result = data_inlinehtml_tag(sub, node, opts) + # lists in tables are brittle and must handle their own newlines + #


should resolve to \n not to be duplicated into \n\n + unless (opts[:is_in_table] && opts[:is_in_list]) || + (result.length == 1 && result[0][:text] == "\n") + result.push(text_hash_raw("\n", opts)) + end + result + end + + def data_image_style_opts(tag, _node, _opts) result = {} if tag.attr("style") @@ -152,12 +164,11 @@ def data_inline_image_tag(tag, node, opts) def data_inlinehtml_list_tag(tag, node, opts) result = [] points, level, _list_style, content_opts = data_html_list(tag, node, opts) - result.push(text_hash_raw("\n", content_opts).merge({ list_level: level, list_indent: 0 })) if level > 1 + content_opts[:is_in_list] = true points.each do |point| - data = data_inlinehtml_tag(point[:tag], node, content_opts) - data.push(text_hash_raw("\n", content_opts).merge({ list_entry_type: 'end' })) - data[0][:list_entry_type] = 'first' unless data.empty? - data.unshift(text_hash(point[:bullet], point[:opts]).merge({ list_entry_type: 'bullet' })) + data = data_inlinehtml_tag(point[:tag], node, opts.merge(content_opts)) + data[0][:list_entry_type] = :first unless data.empty? + data.unshift(text_hash(point[:bullet], point[:opts]).merge({ list_entry_type: :bullet })) data.each do |item| item[:list_level] = level if item[:list_level].nil? item[:list_indent] = point[:width] if item[:list_indent].nil? @@ -182,17 +193,18 @@ def collect_html_table_tag_rows(tag, table_font_opts, opts) end def draw_html_table_tag(tag, opts) - table_font_opts = build_table_font_opts(opts) - rows = collect_html_table_tag_rows(tag, table_font_opts, opts) + current_opts = opts.merge({ is_in_table: true }) + table_font_opts = build_table_font_opts(current_opts) + rows = collect_html_table_tag_rows(tag, table_font_opts, current_opts) column_count = 0 rows.each do |row| column_count = [column_count, row.length].max end column_alignments = Array.new(column_count, :left) header_row_count = count_html_header_rows(tag) - table = build_table_settings(header_row_count, opts) - opts[:opts_cell] = table[:opts_cell] - draw_table_data(table, rows, column_alignments, opts) + table = build_table_settings(header_row_count, current_opts) + current_opts[:opts_cell] = table[:opts_cell] + draw_table_data(table, rows, column_alignments, current_opts) end def count_html_header_rows(tag, header_count = 0) @@ -390,28 +402,30 @@ def space_stuffing(width, space_width) end def indent_html_table_list_items(cell_data) - cell_data.each do |item| + level_stack = {} + cell_data.each_with_index do |item, index| next if item[:list_level].nil? + level_stack[item[:list_level]] = space_stuffing(item[:list_indent], item[:list_indent_space]) + # Note: There is no settings for paddings of text fragments in Prawn::Table # so as a workaround the lists are stuffed with spaces, which is of course not pixel perfect - # first indenting with spaces of multiline list items - # * item - # multiline item - # multiline item - if item[:list_entry_type].nil? && item[:text] != "\n" - item[:text] = "#{space_stuffing(item[:list_indent], item[:list_indent_space])}#{item[:text]}" + prev = index > 0 ? cell_data[index - 1] : nil + if item[:text] != "\n" && !prev.nil? && prev[:list_entry_type] != :bullet + current_stuffing = '' + (1..item[:list_level] - 1).each do |i| + current_stuffing += level_stack[i] || '' + end + if item[:list_entry_type] == :bullet + item[:text] = "#{current_stuffing}#{item[:text]}" + else + indent = space_stuffing(item[:list_indent], item[:list_indent_space]) + item[:text] = "#{current_stuffing}#{indent}#{item[:text]}" + end end - - # second indenting of nested lists - # * item - # multiline item - # * sub list item - # * sub list item - # sub list multiline item - if item[:list_level] > 1 && (item[:list_entry_type].nil? || item[:list_entry_type] == 'bullet') && item[:text] != "\n" - item[:text] = "#{space_stuffing(item[:list_indent], item[:list_indent_space])}#{item[:text]}" + if prev && item[:list_entry_type] == :bullet && prev[:text] != "\n" + item[:text] = "\n#{item[:text]}" end end cell_data diff --git a/spec/fixtures/table/list_in_cell.md b/spec/fixtures/table/list_in_cell.html similarity index 100% rename from spec/fixtures/table/list_in_cell.md rename to spec/fixtures/table/list_in_cell.html diff --git a/spec/fixtures/table/lists_in_cell.html b/spec/fixtures/table/lists_in_cell.html new file mode 100644 index 0000000..59ec656 --- /dev/null +++ b/spec/fixtures/table/lists_in_cell.html @@ -0,0 +1 @@ +

He is making a list

  1. checking it

  2. twice

    1. gonna find out

    2. who's been naughty or

      1. nice







diff --git a/spec/markdown_to_pdf/page_break_spec.rb b/spec/markdown_to_pdf/page_break_spec.rb index 5eeb2e2..52a5ea5 100644 --- a/spec/markdown_to_pdf/page_break_spec.rb +++ b/spec/markdown_to_pdf/page_break_spec.rb @@ -44,14 +44,14 @@ { x: 421.71429, y: 730.884, text: "Col 5" }, { x: 498.85714, y: 730.884, text: "Col 6" }, { x: 36.0, y: 689.268, text: "Entry 1" }, - { x: 344.57143, y: 689.268, text: "[x] " }, - { x: 498.85714, y: 689.268, text: "[x] " }, + { x: 344.57143, y: 689.268, text: "[x]" }, + { x: 498.85714, y: 689.268, text: "[x]" }, { x: 36.0, y: 675.396, text: "Entry 2" }, - { x: 344.57143, y: 675.396, text: "[x] " }, - { x: 421.71429, y: 675.396, text: "[x] " }, + { x: 344.57143, y: 675.396, text: "[x]" }, + { x: 421.71429, y: 675.396, text: "[x]" }, { x: 36.0, y: 661.524, text: "Entry 3" }, { x: 36.0, y: 647.652, text: "Entry 4" }, - { x: 113.14286, y: 647.652, text: "[x] " }, + { x: 113.14286, y: 647.652, text: "[x]" }, { x: 36.0, y: 137.016, text: "Content up until here" }, { x: 36.0, y: 123.144, text: "Header 1" }, { x: 36.0, y: 747.384, text: "But does not not create a page break if the next item is already a page break (Header 1 must be at" }, diff --git a/spec/markdown_to_pdf/table_spec.rb b/spec/markdown_to_pdf/table_spec.rb index a38ed46..c0d39b9 100644 --- a/spec/markdown_to_pdf/table_spec.rb +++ b/spec/markdown_to_pdf/table_spec.rb @@ -88,14 +88,14 @@ { x: 421.71429, y: 744.756, text: "Header 5" }, { x: 498.85714, y: 744.756, text: "Header 6" }, { x: 36.0, y: 730.884, text: "Entry 1" }, - { x: 344.57143, y: 730.884, text: "[x] " }, - { x: 498.85714, y: 730.884, text: "[x] " }, + { x: 344.57143, y: 730.884, text: "[x]" }, + { x: 498.85714, y: 730.884, text: "[x]" }, { x: 36.0, y: 717.012, text: "Entry 2" }, - { x: 344.57143, y: 717.012, text: "[x] " }, - { x: 421.71429, y: 717.012, text: "[x] " }, + { x: 344.57143, y: 717.012, text: "[x]" }, + { x: 421.71429, y: 717.012, text: "[x]" }, { x: 36.0, y: 703.14, text: "Entry 3" }, { x: 36.0, y: 689.268, text: "Entry 4" }, - { x: 113.14286, y: 689.268, text: "[x] " }]) + { x: 113.14286, y: 689.268, text: "[x]" }]) end it 'creates a table without bad wrapping with doc font style' do @@ -145,7 +145,7 @@ end it 'creates a html table with lists inside' do - generator.parse_file('table/list_in_cell.md') + generator.parse_file('table/list_in_cell.html') expect_pdf([ { x: 36.0, y: 744.756, text: "• test1" }, { x: 36.0, y: 730.884, text: "• test2" }, @@ -163,6 +163,17 @@ { x: 396.0, y: 717.012, text: "3. wiiiii" }]) end + it 'creates a html table with lists with paragraphs inside' do + generator.parse_file('table/lists_in_cell.html') + expect_pdf([ + { x: 36.0, y: 744.756, text: "He is making a list" }, + { x: 36.0, y: 730.884, text: "1. checking it" }, + { x: 36.0, y: 717.012, text: "2. twice" }, + { x: 36.0, y: 703.14, text: "    1. gonna find out" }, + { x: 36.0, y: 689.268, text: "    2. who's been naughty or" }, + { x: 36.0, y: 675.396, text: "        1. nice" }]) + end + it 'creates a html table with subtable in a header row' do generator.parse_file('table/subtable_in_header_row.md') expect_pdf([ @@ -185,14 +196,14 @@ expect_pdf([ { x: 36.0, y: 747.384, text: "With paragraphs:" }, { x: 36.0, y: 730.884, text: "First" }, - { x: 36.0, y: 661.524, text: "Fourth" }, + { x: 36.0, y: 689.268, text: "Fourth" }, { x: 306.0, y: 730.884, text: "First" }, { x: 306.0, y: 717.012, text: "Second" }, - { x: 36.0, y: 636.408, text: "With breaks:" }, - { x: 36.0, y: 619.908, text: "First" }, - { x: 36.0, y: 578.292, text: "Fourth" }, - { x: 306.0, y: 619.908, text: "First" }, - { x: 306.0, y: 606.036, text: "Second" }]) + { x: 36.0, y: 664.152, text: "With breaks:" }, + { x: 36.0, y: 647.652, text: "First" }, + { x: 36.0, y: 606.036, text: "Fourth" }, + { x: 306.0, y: 647.652, text: "First" }, + { x: 306.0, y: 633.78, text: "Second" }]) end it 'creates a html table with cell colors' do