Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/validate xml #30

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions lib/logstash/filters/xml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,40 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
# Of course, if the document had nodes with the same names but different namespaces, they will now be ambiguous.
config :remove_namespaces, :validate => :boolean, :default => false

# By default the filter will not try to validate the xml
config :validate_xml, :validate => :boolean, :default => false

# By default the filter will try to validate against a XSD
# Example:
#
# [source,ruby]
# filter {
# xml {
# validate_xml => true
# validation => {
# "type" => "xsd"
# "file" => "path/to/file"
# }
# }
# }
#
config :validation, :validate => :hash, :default => {}

XMLPARSEFAILURE_TAG = "_xmlparsefailure"
XMLVALIDATIONFAILURE_TAG = "_xmlvalidationfailure"

def register
require "nokogiri"
require "xmlsimple"
if @validate_xml
case @validation['type']
when "rng"
@schema = Nokogiri::XML::RelaxNG(File.open(@validation['file']))
else
@schema = Nokogiri::XML::Schema(File.open(@validation['file']))
end
end

end

def filter(event)
Expand Down Expand Up @@ -172,6 +201,27 @@ def filter(event)
end
end

if @validate_xml
begin
doc = Nokogiri::XML(value, nil, value.encoding.to_s)
if @schema
errors = @schema.validate(doc)
if errors.size>0
event.tag(XMLVALIDATIONFAILURE_TAG)
event["validated"] = false
errors = errors.map {|a| a.to_s}
event["errors"] = errors * "\n"
else
event["validated"] = true
end
end
matched = true
rescue => e
event.tag(XMLPARSEFAILURE_TAG)
@logger.warn("Error parsing xml with Nokogiri::XML", :source => @source, :value => value, :exception => e, :backtrace => e.backtrace)
return
end
end
filter_matched(event) if matched
@logger.debug? && @logger.debug("Event after xml filter", :event => event)
end
Expand Down
7 changes: 7 additions & 0 deletions spec/book.rng
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<element name="book" xmlns="http://relaxng.org/ns/structure/1.0">
<oneOrMore>
<element name="page">
<text/>
</element>
</oneOrMore>
</element>
11 changes: 11 additions & 0 deletions spec/book.xsd
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified">
<xs:element name="book">
<xs:complexType>
<xs:sequence>
<xs:element maxOccurs="unbounded" ref="page"/>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="page" type="xs:string"/>
</xs:schema>
93 changes: 93 additions & 0 deletions spec/filters/xml_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -306,4 +306,97 @@
end
end

describe "validate XML using XSD" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
target => "parseddata"
validate_xml => true
validation => {
type => "xsd"
file => "spec/book.xsd"
}
}
}
CONFIG

# Single value
sample('xmldata' => '<book>
<page>This is page one.</page>
<page>This is page two.</page>
</book>'
) do
insist { subject['validated'] } == true
end
end

describe "validate wrong XML using XSD" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
target => "parseddata"
validate_xml => true
validation => {
type => "xsd"
file => "spec/book.xsd"
}
}
}
CONFIG

# Single value
sample('xmldata' => '<book></book>'
) do
insist { subject['validated'] } == false
end
end

describe "validate XML using RelaxNG" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
target => "parseddata"
validate_xml => true
validation => {
type => "rng"
file => "spec/book.rng"
}
}
}
CONFIG

# Single value
sample('xmldata' => '<book>
<page>This is page one.</page>
<page>This is page two.</page>
</book>'
) do
insist { subject['validated'] } == true
end
end

describe "validate wrong XML using RelaxNG" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
target => "parseddata"
validate_xml => true
validation => {
type => "rng"
file => "spec/book.rng"
}
}
}
CONFIG

# Single value
sample('xmldata' => '<book></book>'
) do
insist { subject['validated'] } == false
end
end
end