From 3d14141b49d200233b4d66918e4bd5f38b0d3e00 Mon Sep 17 00:00:00 2001 From: Goulven Champenois Date: Sat, 19 Mar 2022 13:36:02 +0100 Subject: [PATCH] Validate URLs using Addressable gem to allow non-ascii characters Warning: this allows a lot more URLs. See https://github.com/twingly/twingly-url/issues/74#issuecomment-226334749 --- README.md | 2 +- activevalidators.gemspec | 1 + .../active_model/validations/url_validator.rb | 18 +++++------------- test/validations/url_test.rb | 19 +++++++++++++++++-- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 7fbbd65..4406e7f 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,7 @@ Exhaustive list of supported validators and their implementation: * `ssn` : Social Security Number (only for USA). * `tracking_number`: based on a set of predefined masks * `twitter` : based on a regular expression -* `url` : based on a regular expression +* `url` : based on [`Addressable`](https://github.com/sporkmonger/addressable) gem ### Handling error messages diff --git a/activevalidators.gemspec b/activevalidators.gemspec index 9fb694e..e16d55a 100644 --- a/activevalidators.gemspec +++ b/activevalidators.gemspec @@ -17,6 +17,7 @@ Gem::Specification.new do |s| s.add_dependency 'rake' s.add_dependency 'mail' s.add_dependency 'date_validator' + s.add_dependency 'addressable' , '~> 2.7' s.add_dependency 'activemodel' , '>= 3.0' s.add_dependency 'phony' , '~> 2.0' s.add_dependency 'countries' , '>= 1.2', '< 4.0' diff --git a/lib/active_validators/active_model/validations/url_validator.rb b/lib/active_validators/active_model/validations/url_validator.rb index 325d973..7940f5a 100644 --- a/lib/active_validators/active_model/validations/url_validator.rb +++ b/lib/active_validators/active_model/validations/url_validator.rb @@ -1,10 +1,10 @@ require 'active_support/core_ext/array/wrap' -require 'uri' +require 'addressable' module ActiveModel module Validations - # Public: Uses `URI.regexp` to validate URLs, by default only allows + # Public: Uses `Addressable::URI.parse` to validate URLs, by default only allows # the http and https protocols. # # Examples @@ -46,7 +46,7 @@ def initialize(options) def validate_each(record, attribute, value) uri = as_uri(value) tld_requirement_fullfilled = check_tld_requirement(value) - record.errors.add(attribute) unless uri && value.to_s =~ uri_regexp && tld_requirement_fullfilled + record.errors.add(attribute) unless uri && uri.scheme.in?(protocols) && tld_requirement_fullfilled end private @@ -68,14 +68,6 @@ def protocols Array.wrap(options[:protocols] || %w{http https}) end - # Internal: Constructs the regular expression to check - # the URI for the configured protocols. - # - # Returns the Regexp. - def uri_regexp - @uri_regexp ||= /\A#{URI::Parser.new.make_regexp(protocols)}\z/ - end - # Internal: Checks if the tld requirements are fullfilled # # When :require_tld option is set to true, the url will be searched for @@ -83,7 +75,7 @@ def uri_regexp # # Returns a boolean value. def check_tld_requirement(value) - host = URI.parse(value.to_s).host rescue value + host = Addressable::URI.parse(value.to_s).host rescue value options[:require_tld] === true ? host =~ /.(\.)\w+/ : true end @@ -92,7 +84,7 @@ def check_tld_requirement(value) # # Returns the URI or nil. def as_uri(value) - URI.parse(value.to_s) rescue nil if value.present? + Addressable::URI.parse(value.to_s) rescue nil if value.present? end end end diff --git a/test/validations/url_test.rb b/test/validations/url_test.rb index f6b3121..ebd5658 100644 --- a/test/validations/url_test.rb +++ b/test/validations/url_test.rb @@ -1,4 +1,5 @@ require 'test_helper' + ActiveValidators.activate(:url) describe "Url Validation" do @@ -49,6 +50,20 @@ def build_ftp_record _(subject.errors.size).must_equal(0) end + it "accepts valid urls with non-ascii domain name" do + subject = build_url_record + subject.url = 'https://www.詹姆斯.com' + _(subject.valid?).must_equal(true) + _(subject.errors.size).must_equal(0) + end + + it "accepts valid urls with non-ascii path" do + subject = build_url_record + subject.url = 'https://www.example.com/ουτοπία' + _(subject.valid?).must_equal(true) + _(subject.errors.size).must_equal(0) + end + it "accepts ftp if defined" do subject = build_ftp_record subject.url = 'ftp://ftp.verrot.fr' @@ -67,7 +82,7 @@ def build_ftp_record describe "for invalid urls" do it "rejects invalid urls" do subject = build_url_record - subject.url = 'http://^^^^.fr' + subject.url = 'http://in va lid.fr' _(subject.valid?).must_equal(false) _(subject.errors.size).must_equal(1) end @@ -81,7 +96,7 @@ def build_ftp_record it "generates an error message of type invalid" do subject = build_url_record - subject.url = 'http://^^^^.fr' + subject.url = 'http://in va lid.fr' _(subject.valid?).must_equal(false) _(subject.errors[:url].include?(subject.errors.generate_message(:url, :invalid))).must_equal(true) end