From 6c4fa6ec0467576520bfbd5f59105350524af883 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 22 Jan 2025 12:31:44 -0500 Subject: [PATCH] Introduce an acts_as_loofah! method to make Nokogiri instances act like Loofah instances. These classes have a new instance method, `#acts_as_loofah!`: - Nokogiri::XML::Document - Nokogiri::XML::DocumentFragment - Nokogiri::HTML4::Document - Nokogiri::HTML4::DocumentFragment - Nokogiri::HTML5::Document - Nokogiri::HTML5::DocumentFragment This method extends the Nokogiri object so that it quacks like the corresponding Loofah object. Note that this method will also extend any existing child element objects, just as if those objects were created as children of a Loofah document. --- lib/loofah/concerns.rb | 11 ++++- lib/loofah/html4/document.rb | 10 +++++ lib/loofah/html4/document_fragment.rb | 10 +++++ lib/loofah/xml/document.rb | 18 ++++++++ lib/loofah/xml/document_fragment.rb | 8 ++++ test/unit/test_acts_as_loofah.rb | 63 +++++++++++++++++++++++++++ 6 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 test/unit/test_acts_as_loofah.rb diff --git a/lib/loofah/concerns.rb b/lib/loofah/concerns.rb index 9e0f5d7..172c3d2 100644 --- a/lib/loofah/concerns.rb +++ b/lib/loofah/concerns.rb @@ -123,10 +123,17 @@ def to_text(options = {}) end module DocumentDecorator # :nodoc: + class << self + def decorate(instance) + instance.decorators(Nokogiri::XML::Node) << ScrubBehavior::Node + instance.decorators(Nokogiri::XML::NodeSet) << ScrubBehavior::NodeSet + instance.decorate_existing + end + end + def initialize(*args, &block) super - decorators(Nokogiri::XML::Node) << ScrubBehavior::Node - decorators(Nokogiri::XML::NodeSet) << ScrubBehavior::NodeSet + DocumentDecorator.decorate(self) end end diff --git a/lib/loofah/html4/document.rb b/lib/loofah/html4/document.rb index 593380b..543f837 100644 --- a/lib/loofah/html4/document.rb +++ b/lib/loofah/html4/document.rb @@ -12,6 +12,16 @@ class Document < Nokogiri::HTML4::Document include Loofah::DocumentDecorator include Loofah::TextBehavior include Loofah::HtmlDocumentBehavior + + module NokogiriExtender + def acts_as_loofah! + super + singleton_class.include(Loofah::TextBehavior) + singleton_class.include(Loofah::HtmlDocumentBehavior) + end + end end end end + +Nokogiri::HTML4::Document.include(Loofah::HTML4::Document::NokogiriExtender) diff --git a/lib/loofah/html4/document_fragment.rb b/lib/loofah/html4/document_fragment.rb index 988f4b9..9fa0ee6 100644 --- a/lib/loofah/html4/document_fragment.rb +++ b/lib/loofah/html4/document_fragment.rb @@ -10,6 +10,16 @@ module HTML4 # :nodoc: class DocumentFragment < Nokogiri::HTML4::DocumentFragment include Loofah::TextBehavior include Loofah::HtmlFragmentBehavior + + module NokogiriExtender + def acts_as_loofah! + super + singleton_class.include(Loofah::TextBehavior) + singleton_class.include(Loofah::HtmlFragmentBehavior) + end + end end end end + +Nokogiri::HTML4::DocumentFragment.include(Loofah::HTML4::DocumentFragment::NokogiriExtender) diff --git a/lib/loofah/xml/document.rb b/lib/loofah/xml/document.rb index cbdc804..fbcd8ba 100644 --- a/lib/loofah/xml/document.rb +++ b/lib/loofah/xml/document.rb @@ -10,6 +10,24 @@ module XML # :nodoc: class Document < Nokogiri::XML::Document include Loofah::ScrubBehavior::Node include Loofah::DocumentDecorator + + module NokogiriExtender + def acts_as_loofah! + singleton_class.include(Loofah::ScrubBehavior::Node) + Loofah::DocumentDecorator.decorate(self) + end + + # TODO: this should to be upstreamed into Nokogiri + def decorate_existing + return unless @decorators + + @node_cache.each do |node| + decorate(node) + end + end + end end end end + +Nokogiri::XML::Document.include(Loofah::XML::Document::NokogiriExtender) diff --git a/lib/loofah/xml/document_fragment.rb b/lib/loofah/xml/document_fragment.rb index aecf65a..bb86a4c 100644 --- a/lib/loofah/xml/document_fragment.rb +++ b/lib/loofah/xml/document_fragment.rb @@ -15,6 +15,14 @@ def parse(tags) new(doc, tags) end end + + module NokogiriExtender + def acts_as_loofah! + document.acts_as_loofah! + end + end end end end + +Nokogiri::XML::DocumentFragment.include(Loofah::XML::DocumentFragment::NokogiriExtender) diff --git a/test/unit/test_acts_as_loofah.rb b/test/unit/test_acts_as_loofah.rb new file mode 100644 index 0000000..af9773e --- /dev/null +++ b/test/unit/test_acts_as_loofah.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +require "helper" + +class UnitTestActsAsLoofah < Loofah::TestCase + SUBJECTS = [Nokogiri::XML, Nokogiri::HTML4, defined?(Nokogiri::HTML5) && Nokogiri::HTML5].compact + + SUBJECTS.each do |subject_class| + describe subject_class do + it "Document act like Loofah" do + ndoc = subject_class::Document.parse("
hello
hello") + node = ndoc.at_css("div") + + # method presence + refute_respond_to(ndoc, :scrub!) + refute_respond_to(node, :scrub!) + + ndoc.acts_as_loofah! + + assert_respond_to(ndoc, :scrub!, "Nokogiri::HTML5::Document should be extended") + assert_respond_to(node, :scrub!, "Existing child elements should be extended") + assert_respond_to(ndoc.at_css("span"), :scrub!, "New child elements should be extended") + + # scrub behavior + ndoc.scrub!(:prune) + + refute_includes(ndoc.to_html, "script") + + # other concerns + if subject_class.name.include?("HTML") + assert_includes(ndoc.singleton_class.ancestors, Loofah::TextBehavior) + assert_includes(ndoc.singleton_class.ancestors, Loofah::HtmlDocumentBehavior) + end + end + + it "DocumentFragment act like Loofah" do + nfrag = subject_class::DocumentFragment.parse("
hello
hello") + node = nfrag.at_css("div") + + # method presence + refute_respond_to(nfrag, :scrub!) + refute_respond_to(node, :scrub!) + + nfrag.acts_as_loofah! + + assert_respond_to(nfrag, :scrub!, "Nokogiri::HTML5::Document should be extended") + assert_respond_to(node, :scrub!, "Existing child elements should be extended") + assert_respond_to(nfrag.at_css("span"), :scrub!, "New child elements should be extended") + + # scrub behavior + nfrag.scrub!(:prune) + + refute_includes(nfrag.to_html, "script") + + # other concerns + if subject_class.name.include?("HTML") + assert_includes(nfrag.singleton_class.ancestors, Loofah::TextBehavior) + assert_includes(nfrag.singleton_class.ancestors, Loofah::HtmlFragmentBehavior) + end + end + end + end +end