-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Randomized specs (and fixed resulting problems) - Split MRSS parser classes into two files Close #3
- Loading branch information
Showing
17 changed files
with
265 additions
and
98 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
--color | ||
--require spec_helper | ||
--order rand |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
module Feedjira | ||
module Parser | ||
module Oasis | ||
class Mrss | ||
include SAXMachine | ||
include FeedUtilities | ||
|
||
element :title | ||
element :link | ||
element :description | ||
|
||
elements :item, :as => :entries, :class => Oasis::MrssEntry | ||
|
||
attr_accessor :feed_url | ||
|
||
REGEX_MATCH = %r(http://purl.org/rss/1.0/modules/content/|http://search.yahoo.com/mrss/) | ||
|
||
def self.able_to_parse?(first_2k_xml) | ||
first_2k_xml =~ REGEX_MATCH | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
module Feedjira | ||
module Parser | ||
module Oasis | ||
class MrssEntry | ||
include SAXMachine | ||
include FeedEntryUtilities | ||
|
||
element :guid, :as => :entry_id | ||
element :'dc:identifier', :as => :entry_id | ||
|
||
element :title | ||
|
||
element :link, :as => :url | ||
|
||
element :pubDate, :as => :published | ||
element :pubdate, :as => :published | ||
element :'dc:date', :as => :published | ||
element :'dc:Date', :as => :published | ||
element :'dcterms:created', :as => :published | ||
element :issued, :as => :published | ||
|
||
element 'media:thumbnail', :value => :url, :as => :thumbnail_url | ||
|
||
element :description, :as => :summary | ||
element 'media:description', :as => :summary | ||
element 'content:encoded', :as => :summary | ||
|
||
def title | ||
sanitize @title | ||
end | ||
|
||
def summary | ||
sanitize @summary | ||
end | ||
|
||
private | ||
|
||
def sanitize(unsafe_html) | ||
doc = Loofah.fragment(unsafe_html) | ||
doc.text.strip.squish | ||
end | ||
end | ||
end | ||
end | ||
end |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
require Rails.root.join('app','parsers','mrss_parser.rb') | ||
require Rails.root.join('app','parsers','mrss_entry.rb') | ||
require Rails.root.join('app','parsers','mrss.rb') | ||
Feedjira::Feed.add_feed_class Feedjira::Parser::Oasis::Mrss |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
require 'rails_helper' | ||
|
||
describe Feedjira::Parser::Oasis::MrssEntry do | ||
context 'when entry has media:thumbnail and media:description' do | ||
let(:entry) do | ||
dma_mrss_xml = File.read(Rails.root.to_s + '/spec/sample_feeds/dma.xml') | ||
feed = Feedjira::Feed.parse(dma_mrss_xml) | ||
feed.entries.first | ||
end | ||
|
||
describe 'a parsed entry' do | ||
it 'should have the correct title stripped and squished' do | ||
expect(entry.title).to eq("") | ||
end | ||
it 'should have the correct summary stripped and squished' do | ||
expect(entry.summary).to eq("Official Photo- of something important (U.S. Air Force Photo)") | ||
end | ||
it 'should have the correct url' do | ||
expect(entry.url).to eq("http://www.af.mil/News/Photos.aspx?igphoto=2000949217") | ||
end | ||
it 'should have the correct thumbnail url' do | ||
expect(entry.thumbnail_url).to eq("http://media.dma.mil/2014/Oct/22/2000949217/145/100/0/141022-F-PB123-223.JPG") | ||
end | ||
it 'should have the correct entry_id' do | ||
expect(entry.entry_id).to eq("http://www.af.mil/News/Photos.aspx?igphoto=2000949217") | ||
end | ||
it 'should have the correct published time' do | ||
expect(entry.published).to eq(Time.parse("2014-10-22 14:24:00Z")) | ||
end | ||
end | ||
end | ||
|
||
context 'when entry has description and media:description' do | ||
let(:entries) do | ||
mrss_xml = File.read(Rails.root.to_s + '/spec/sample_feeds/desc_plus_mediadesc.xml') | ||
feed = Feedjira::Feed.parse(mrss_xml) | ||
feed.entries | ||
end | ||
|
||
describe 'a parsed entry' do | ||
it 'should use whatever comes last in the XML' do | ||
expect(entries.first.summary).to eq("This came from description") | ||
expect(entries.last.summary).to eq("But this came from media:description") | ||
end | ||
end | ||
end | ||
|
||
context 'when the feed uses RSS content module' do | ||
let(:entry) do | ||
mrss_xml = File.read(Rails.root.to_s + '/spec/sample_feeds/rss_with_content_module.xml') | ||
feed = Feedjira::Feed.parse(mrss_xml) | ||
feed.entries.first | ||
end | ||
|
||
describe 'a parsed entry' do | ||
it 'should use the content:encoded field for the summary' do | ||
expect(entry.summary).to eq("Sentence one. Sentence two. more...") | ||
end | ||
end | ||
end | ||
|
||
end |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
require 'rails_helper' | ||
|
||
describe Feedjira::Parser::Oasis::Mrss do | ||
context 'for DMA feed' do | ||
let(:dma_mrss_xml) { File.read(Rails.root.to_s + '/spec/sample_feeds/dma.xml') } | ||
|
||
describe '#able_to_parse?' do | ||
context 'when first 2000 chars of XML contains MRSS text string' do | ||
it 'should return true' do | ||
expect(Feedjira::Parser::Oasis::Mrss.able_to_parse?(dma_mrss_xml)).to be_truthy | ||
end | ||
end | ||
end | ||
|
||
describe 'the parser' do | ||
it 'should pull out the entries properly' do | ||
feed = Feedjira::Feed.parse(dma_mrss_xml) | ||
expect(feed.entries.first.class).to eq(Feedjira::Parser::Oasis::MrssEntry) | ||
end | ||
end | ||
end | ||
|
||
context 'for RSS with content module' do | ||
let(:mrss_xml) { File.read(Rails.root.to_s + '/spec/sample_feeds/rss_with_content_module.xml') } | ||
|
||
describe '#able_to_parse?' do | ||
context 'when first 2000 chars of XML contains the content namespace text string' do | ||
it 'should return true' do | ||
expect(Feedjira::Parser::Oasis::Mrss.able_to_parse?(mrss_xml)).to be_truthy | ||
end | ||
end | ||
end | ||
|
||
describe 'the parser' do | ||
it 'should pull out the entries properly' do | ||
feed = Feedjira::Feed.parse(mrss_xml) | ||
expect(feed.entries.first.class).to eq(Feedjira::Parser::Oasis::MrssEntry) | ||
end | ||
end | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
<?xml version="1.0" encoding="utf-8"?> | ||
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/" xmlns:atom="http://www.w3.org/2005/Atom"> | ||
<channel> | ||
<atom:link href="http://media.dma.mil/mrss/portal/144/detailpage/www.af.mil/News/Photos.aspx" rel="self" | ||
type="application/rss+xml"/> | ||
<title>Air Force Link Images</title> | ||
<link>http://www.af.mil</link> | ||
<description>The latest images from Air Force Link.</description> | ||
<item> | ||
<title type="html"> | ||
<![CDATA[ ]]> | ||
</title> | ||
<link>http://www.af.mil/News/Photos.aspx?igphoto=2000949217</link> | ||
<guid>http://www.af.mil/News/Photos.aspx?igphoto=2000949217</guid> | ||
<pubDate>Wed, 22 Oct 2014 14:24:00 GMT</pubDate> | ||
<media:thumbnail url="http://media.dma.mil/2014/Oct/22/2000949217/145/100/0/141022-F-PB123-223.JPG" width="72" | ||
height="100"/> | ||
<media:content url="http://media.dma.mil/2014/Oct/22/2000949217/-1/-1/0/141022-F-PB123-223.JPG" width="1500" | ||
height="2100"/> | ||
<media:description>This came from media:description</media:description> | ||
<description>This came from description</description> | ||
</item> | ||
<item> | ||
<title type="html"> | ||
<![CDATA[ ]]> | ||
</title> | ||
<link>http://www.af.mil/News/Photos.aspx?igphoto=2000949218</link> | ||
<guid>http://www.af.mil/News/Photos.aspx?igphoto=2000949218</guid> | ||
<pubDate>Wed, 22 Oct 2014 14:24:00 GMT</pubDate> | ||
<media:thumbnail url="http://media.dma.mil/2014/Oct/22/2000949218/145/100/0/141022-F-PB123-223.JPG" width="72" | ||
height="100"/> | ||
<media:content url="http://media.dma.mil/2014/Oct/22/2000949218/-1/-1/0/141022-F-PB123-223.JPG" width="1500" | ||
height="2100"/> | ||
<description>But this came from description</description> | ||
<media:description>But this came from <!--Here is a comment --> media:description</media:description> | ||
</item> | ||
</channel> | ||
</rss> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
<?xml version="1.0" encoding="UTF-8" ?> | ||
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/"> | ||
<channel> | ||
<title><![CDATA[Joint Base San Antonio - Commentaries]]></title> | ||
<link>http://www.jbsa.af.mil</link> | ||
<description><![CDATA[Joint Base San Antonio - Commentaries]]></description> | ||
<language>en-US</language> | ||
<copyright><![CDATA[2014 Joint Base San Antonio]]></copyright> | ||
<pubDate>Tue, 30 Sep 2014 17:25:16 GMT</pubDate> | ||
<lastBuildDate>Tue, 30 Sep 2014 17:25:16 GMT</lastBuildDate> | ||
<generator>Air Force Link RSS Generator</generator> | ||
<item> | ||
<title><![CDATA[Celebrating National Hispanic Heritage Month]]></title> | ||
<link>http://www.jbsa.af.mil/news/story.asp?id=123426145</link> | ||
<content:encoded><![CDATA[Sentence one.<br /> | ||
<br /> | ||
Sentence two.<br /> | ||
<br /> | ||
<a href="http://www.jbsa.af.mil/news/story.asp?id=123426145">more...</a>]]></content:encoded> | ||
<author>[email protected] (Maj. Gen. Jimmie O. Keenan)</author> | ||
<guid>http://www.jbsa.af.mil/news/story.asp?id=123426145</guid> | ||
<pubDate>Thu, 25 Sep 2014 15:58:24 EST</pubDate> | ||
</item> | ||
<item> | ||
<title><![CDATA[Beyond 360 feed back is 360 accountability]]></title> | ||
<link>http://www.jbsa.af.mil/news/story.asp?id=123422070</link> | ||
<content:encoded><![CDATA[<div style="float:left;"><a href="http://www.jbsa.af.mil/news/story.asp?id=123422070"><img border="0" style="margin-right:15px" src="http://www.jbsa.af.mil/shared/media/photodb/thumbnails/2014/06/140617-F-XX000-002.jpg"</img></a></div><font size="3"><font face="Times New Roman">In highly accomplished teams and organizations, every member is accountable for their performance - whether hitting a baseball or flying an airplane.<o:p></o:p></font></font><font face="Times New Roman" size="3"> </font> | ||
<p class="MsoNormal" style="margin: 0in 0in 0pt;">  | ||
<p class="MsoNormal" style="margin: 0in 0in 0pt;"><font size="3"><font face="Times New Roman">That is why in Air Force Operations, whether flying or defending, controlling or building, we debrief the mission, compare our performance to standards, and develop learning points to improve the next mission. In that debrief, everyone is held to equal account according to the standards of their job, whether they are O-5 or E-3, commander or wingman. In the mission debrief, we have 360-degree accountability.<o:p></o:p></font></font> <font face="Times New Roman" size="3"> </font> | ||
<p class="MsoNormal" style="margin: 0in 0in 0pt;">  | ||
<p class="MsoNormal" style="margin: 0<br/><a href="http://www.jbsa.af.mil/news/story.asp?id=123422070">more...</a>]]></content:encoded> | ||
<author>[email protected] (Col. Matt Isler )</author> | ||
<guid>http://www.jbsa.af.mil/news/story.asp?id=123422070</guid> | ||
<pubDate>Thu, 21 Aug 2014 12:15:00 EST</pubDate> | ||
</item> | ||
</channel> | ||
</rss> |
Oops, something went wrong.