Skip to content

Commit

Permalink
Use PublicSuffix 2
Browse files Browse the repository at this point in the history
Related to #89 and #71.

Close #85.
  • Loading branch information
dentarg committed Sep 9, 2016
1 parent 67be628 commit cd38e55
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 8 deletions.
19 changes: 13 additions & 6 deletions lib/twingly/url.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@
require_relative "url/error"
require_relative "version"

PublicSuffix::List.private_domains = false
PublicSuffix::List.default = PublicSuffix::List.parse(
File.read(PublicSuffix::List::DEFAULT_LIST_PATH), private_domains: false)
PublicSuffix::List.default.indexes.keys.
map { |name| Addressable::IDNA.to_ascii(name) }.
select { |name| name =~ /xn\-\-/ }.
each { |name| PublicSuffix::List.default << PublicSuffix::Rule.factory(name) }

module Twingly
class URL
Expand Down Expand Up @@ -36,9 +41,11 @@ def internal_parse(potential_url)
scheme = addressable_uri.scheme
raise Twingly::URL::Error::ParseError unless scheme =~ ACCEPTED_SCHEMES

display_uri = addressable_display_uri(addressable_uri)
# URLs that can't be normalized should not be valid
try_addressable_normalize(addressable_uri)

public_suffix_domain = PublicSuffix.parse(display_uri.host)
host = addressable_uri.host
public_suffix_domain = PublicSuffix.parse(host, default_rule: nil)
raise Twingly::URL::Error::ParseError if public_suffix_domain.nil?

raise Twingly::URL::Error::ParseError if public_suffix_domain.sld.nil?
Expand All @@ -63,8 +70,8 @@ def to_addressable_uri(potential_url)

# Workaround for the following bug in addressable:
# https://github.com/sporkmonger/addressable/issues/224
def addressable_display_uri(addressable_uri)
addressable_uri.display_uri
def try_addressable_normalize(addressable_uri)
addressable_uri.normalize
rescue ArgumentError => error
if error.message.include?("invalid byte sequence in UTF-8")
raise Twingly::URL::Error::ParseError
Expand All @@ -76,7 +83,7 @@ def addressable_display_uri(addressable_uri)
private :new
private :internal_parse
private :to_addressable_uri
private :addressable_display_uri
private :try_addressable_normalize
end

def initialize(addressable_uri, public_suffix_domain)
Expand Down
23 changes: 22 additions & 1 deletion spec/lib/twingly/url_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def invalid_urls
"http://.com.",
"http://.gl/xxx",
"http://some_site.net%C2",
"http://+%D5d.some_site.net",
"http://+%D5d.some_site.net", # https://github.com/sporkmonger/addressable/issues/224
"http://www.twingly.",
]
end
Expand All @@ -54,6 +54,8 @@ def valid_urls
"http://räksmörgås.josefßon.org",
"http://user:[email protected]/",
"http://:@blog.twingly.com/",
"https://www.foo.ایران.ir/bar",
"https://www.foo.xn--mgba3a4f16a.ir/bar",
]
end

Expand Down Expand Up @@ -238,6 +240,25 @@ def valid_urls
end

describe "#normalized" do
context "when given valid urls" do
valid_urls.each do |valid_url|
it "does not raise an error for \"#{valid_url}\"" do
expect {
described_class.parse(valid_url).normalized.to_s
}.not_to raise_error
end
end
end

context "when given bad input" do
invalid_urls.each do |invalid_url|
it "returns NullURL for \"#{invalid_url}\"" do
actual = described_class.parse(invalid_url).normalized
expect(actual).to be_a(Twingly::URL::NullURL)
end
end
end

subject { described_class.parse(url).normalized.to_s }

context "adds www if host is missing a subdomain" do
Expand Down
2 changes: 1 addition & 1 deletion twingly-url.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Gem::Specification.new do |s|
s.required_ruby_version = "~> 2.2"

s.add_dependency "addressable", "~> 2"
s.add_dependency "public_suffix", "~> 1.4"
s.add_dependency "public_suffix", "~> 2"
s.add_dependency "idn-ruby", "~> 0.1"

s.add_development_dependency "rake", "~> 10"
Expand Down

0 comments on commit cd38e55

Please sign in to comment.