Examples (#203)

Examples, fix builder, add to builder
ohler55 · Mar 13, 2018 · 0aeb656 · 0aeb656
1 parent 228f920
commit 0aeb656
Show file tree

Hide file tree

Showing 10 changed files with 382 additions and 19 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,10 @@
 
+## 2.9.0 - March 13, 2018
+
+  - New builder methods for building HTML.
+
+  - Examples added.
+
 ## 2.8.4 - March 4, 2018
 
   - Commented out debuf statement.

diff --git a/examples/gen.rb b/examples/gen.rb
@@ -0,0 +1,50 @@
+#!/usr/bin/env ruby
+
+# This example demonstrates loading an XML modifying it and then dumping it.
+
+# Use the current repo if run from the examples directory.
+ox_dir = File.dirname(File.dirname(File.expand_path(__FILE__)))
+$LOAD_PATH << File.join(ox_dir, 'ext')
+$LOAD_PATH << File.join(ox_dir, 'lib')
+
+require 'ox'
+
+xml = %{
+<?xml version="1.0"?>
+<Park.Animal>
+  <type>mutant</type>
+  <friends type="Hash">
+    <i>5</i>
+    <Park.Animal>
+      <type>dog</type>
+    </Park.Animal>
+  </friends>
+</Park.Animal>
+}
+
+# Load the XML into a set of Ox::Nodes.
+doc = Ox.load(xml, mode: :generic)
+
+# Once an Ox::Document is loaded it can be inspected and modified. A Doc has a
+# root. Calling doc.root will give a node that is the root of the XML which is
+# the Park.Animal element.
+root = doc.root
+puts "root element name: #{root.name}"
+
+# The Ox::Element.locate method can be used similar to XPath. It does not have
+# all the features of XPath but it does help dig into an XML. Look for any
+# descendent of the root that has a type attribute and return those attribute
+# values.
+puts "descendent type attribute value: #{root.locate('*/@type')}"
+
+# Delete 'i' element by iterating over the root's nodes and look for one named
+# friends. The locate method could also be used.
+root.nodes.each { |n|
+  if n.name == 'friends'
+    n.nodes.delete_if { |child| child.name == 'i' }
+  end
+}
+
+# Lets take a look at the changes by dumping the doc.
+xml2 = Ox.dump(doc)
+puts "modified XML: #{xml2}"
diff --git a/examples/hashi.rb b/examples/hashi.rb
@@ -0,0 +1,38 @@
+#!/usr/bin/env ruby
+
+# This example demonstrates the use of Ox.load using the :hash and
+# :hash_no_attrs modes.
+
+# Use the current repo if run from the examples directory.
+ox_dir = File.dirname(File.dirname(File.expand_path(__FILE__)))
+$LOAD_PATH << File.join(ox_dir, 'ext')
+$LOAD_PATH << File.join(ox_dir, 'lib')
+
+require 'ox'
+
+# load or use this sample string.
+xml = %{
+<?xml version="1.0"?>
+<Park.Animal>
+  <type>mutant</type>
+  <friends type="Hash">
+    <i>5</i>
+    <Park.Animal>
+      <type>dog</type>
+    </Park.Animal>
+  </friends>
+</Park.Animal>
+}
+
+doc = Ox.load(xml, mode: :hash)
+puts "as hash with Symbol element names: #{doc}"
+
+# Load the XML and convert to a Hash. By default element names are
+# symbolized. By using the :symbolize_keys option and setting it to false the
+# element names will be strings.
+doc = Ox.load(xml, mode: :hash, symbolize_keys: false)
+puts "as hash with String element names: #{doc}"
+
+# The :hash_no_attrs mode leaves attributes out of the resulting Hash.
+doc = Ox.load(xml, mode: :hash_no_attrs)
+puts "as hash_no_attrs: #{doc}"
diff --git a/examples/obj.rb b/examples/obj.rb
@@ -0,0 +1,36 @@
+#!/usr/bin/env ruby
+
+# This example demonstrates encoding and decoding a Ruby object.
+
+# Use the current repo if run from the examples directory.
+ox_dir = File.dirname(File.dirname(File.expand_path(__FILE__)))
+$LOAD_PATH << File.join(ox_dir, 'ext')
+$LOAD_PATH << File.join(ox_dir, 'lib')
+
+require 'ox'
+
+# Define a class that will be used for instances that are encoded and decoded.
+class Classy
+  def initialize(a, b)
+    @a = a
+    @b = b
+  end
+
+  def to_s
+    "Classy a: #{@a}, b: #{@b}"
+  end
+end
+
+obj = Classy.new(23, ['abc', {x: true}])
+
+doc = Ox.dump(obj, mode: :object)
+
+# The encoded format is not important other and should ot be generated by
+# hand. It is of interest only for the curious.
+puts "encoded object:\n#{doc}"
+
+# Now convert back to a Ruby object.
+obj2 = Ox.load(doc, mode: :object)
+
+# Looks the same, print it out to check.
+puts "decoded object: #{obj2}"
diff --git a/examples/saxy.html b/examples/saxy.html
@@ -0,0 +1,22 @@
+<!DOCTYPE HTML>
+<html>
+  <head>
+    <title>Saxy</title>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <link rel="stylesheet" type="text/css" media="screen,print" href="saxy.css">
+  </head>
+  <!-- body started after this comment -->
+  <body>
+    <p>First Line</p>
+    <div class="first">
+      <p>One Level Deep</p>
+    </div>
+    <hr>
+    <div class="first">
+      <div>
+        <p>Deeper</p>
+      </div>
+    </div>
+  </body>
+</html>
diff --git a/examples/saxy.rb b/examples/saxy.rb
@@ -0,0 +1,52 @@
+#!/usr/bin/env ruby
+
+# This example demonstrates the use of the Ox.sax_parse method. An XML string
+# is parsed and a list of element names if built.
+
+# Use the current repo if run from the examples directory.
+ox_dir = File.dirname(File.dirname(File.expand_path(__FILE__)))
+$LOAD_PATH << File.join(ox_dir, 'ext')
+$LOAD_PATH << File.join(ox_dir, 'lib')
+
+require 'ox'
+
+# First create a handler for the SAX callbacks. A Hash is used to collect the
+# element names. This is a quick way to make sure the collected names are
+# unique. Only the start_element is implemented as that is all that is needed
+# to collect names. There is no need to inherit from Ox::Sax but tht class
+# includes the private version of all the methods that can be made publis.
+class Saxy
+  def initialize
+    #super
+    @names = {}
+  end
+
+  def names
+    @names.keys
+  end
+
+  def start_element(name)
+    @names[name] = nil
+  end
+
+end
+
+# The XML can be a string or a IO instance.
+xml = %{
+<?xml version="1.0"?>
+<Park.Animal>
+  <type>mutant</type>
+  <friends type="Hash">
+    <i>5</i>
+    <Park.Animal>
+      <type>dog</type>
+    </Park.Animal>
+  </friends>
+</Park.Animal>
+}
+# Create an instance of the handler. 
+handler = Saxy.new()
+
+Ox.sax_html(handler, xml)
+
+puts "element names: #{handler.names}"
diff --git a/examples/saxy_html.rb b/examples/saxy_html.rb
@@ -0,0 +1,113 @@
+#!/usr/bin/env ruby
+
+# This example demonstrates the use of the Ox.sax_html parser and the
+# Ox.Builder. The parser is used to parse and HTML file and add a
+# `class="ppp"` to each '<p>' element start.
+#
+# The approach taken is to build while parsing. An HTML parse is started and a
+# builder call is made on each parser callback. If the element is a 'p' then
+# the class attribute is added. All others remain the same.
+
+# Use the current repo if run from the examples directory.
+ox_dir = File.dirname(File.dirname(File.expand_path(__FILE__)))
+$LOAD_PATH << File.join(ox_dir, 'ext')
+$LOAD_PATH << File.join(ox_dir, 'lib')
+
+require 'ox'
+
+# First create a handler for the SAX callbacks. The class instances include a
+# builder that builds as parsing takes place.
+class Saxy < Ox::Sax
+  VOID_ELEMENTS = [ :area, :base, :br, :col, :embed, :hr, :img, :input, :link, :meta, :param, :source, :track, :wbr ]
+
+  def initialize
+    super
+    # The build is created with an indentation of 2 but that can be changed to
+    # the desired indentation.
+    @builder = Ox::Builder.new(:indent => 2)
+    # element_name and attributes are used for deferred writing of the element
+    # start.
+    @element_name = nil
+    @attrs = {}
+  end
+
+  def to_s
+    @builder.to_s
+  end
+
+  # The builder creates element starts with attributes but the parser uses a
+  # seprate call for attributes and element starts. To deal with the
+  # difference keep track of the start name and attributes as they are
+  # added. When another callback other than attributes is called write any
+  # pending element start.
+  def push_element
+    unless @element_name.nil?
+      # Add the class attribute if the element is a <p> element.
+      @attrs[:class] = 'ppp' if :p == @element_name
+
+      # Check @void_elements to determine how the element start would be
+      # written. HTML includes void elements that are self closing so those
+      # should be handled correctly.
+      if VOID_ELEMENTS.include?(@element_name)
+	@builder.void_element(@element_name, @attrs)
+      else
+	@builder.element(@element_name, @attrs)
+      end
+      # Reset the element name.
+      @element_name = nil
+      @attrs = {}
+    end
+  end
+
+  def start_element(name)
+    push_element
+    @element_name = name
+  end
+
+  def attr(name, value)
+    @attrs[name] = value
+  end
+
+  def doctype(value)
+    push_element
+    @builder.doctype(value)
+  end
+
+  def comment(value)
+    push_element
+    @builder.comment(value)
+  end
+
+  def text(value)
+    push_element
+    @builder.text(value)
+  end
+
+  def end_element(name)
+    push_element
+    @builder.pop() unless VOID_ELEMENTS.include?(name)
+  end
+
+  # Just in case there is a parse error this will display the error along with
+  # where the error occurred in the XML file.
+  def error(message, line, column)
+    puts "*-*-* error at #{line}:#{column}: #{message}"
+  end
+end
+
+# Load the XML file. The Ox.sax_html also handles IO objects.
+xml = File.read('saxy.html')
+# Create an instance of the handler. 
+handler = Saxy.new()
+
+Ox.sax_html(handler, xml)
+
+# For debugging uncomment these lines.
+#puts "******************** original *************************\n#{xml}"
+#puts "******************** modifified ***********************\n#{handler.to_s}"
+
+# For benchmarks these lines should be repeated to parse and to generate a
+# modified XML string.
+#handler = Saxy.new()
+#Ox.sax_html(handler, xml)
+#handler.to_s