-
-
Notifications
You must be signed in to change notification settings - Fork 189
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* towards v1.10.0 * change legacy behavior * Change in header de-duplication; Refactoring (#264) * Change in header de-duplication * refactor enforce utf8 encoding * more code refactoring * restructure tests (#265) * improve tests * small refactor & performance improvement * improve chunk handling * speed-up count_quote_chars * small performance improvements * accelerate hash_transformations * more performance improvements * coverage * adding Ruby 3.3 to CI tests
- Loading branch information
Showing
61 changed files
with
610 additions
and
459 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# frozen_string_literal: true | ||
|
||
module SmarterCSV | ||
class << self | ||
def hash_transformations(hash, options) | ||
# there may be unmapped keys, or keys purposedly mapped to nil or an empty key.. | ||
# make sure we delete any key/value pairs from the hash, which the user wanted to delete: | ||
remove_empty_values = options[:remove_empty_values] == true | ||
remove_zero_values = options[:remove_zero_values] | ||
remove_values_matching = options[:remove_values_matching] | ||
convert_to_numeric = options[:convert_values_to_numeric] | ||
value_converters = options[:value_converters] | ||
|
||
hash.each_with_object({}) do |(k, v), new_hash| | ||
next if k.nil? || k == '' || k == :"" | ||
next if remove_empty_values && (has_rails ? v.blank? : blank?(v)) | ||
next if remove_zero_values && v.is_a?(String) && v =~ /^(0+|0+\.0+)$/ # values are Strings | ||
next if remove_values_matching && v =~ remove_values_matching | ||
|
||
# deal with the :only / :except options to :convert_values_to_numeric | ||
if convert_to_numeric && !limit_execution_for_only_or_except(options, :convert_values_to_numeric, k) | ||
if v =~ /^[+-]?\d+\.\d+$/ | ||
v = v.to_f | ||
elsif v =~ /^[+-]?\d+$/ | ||
v = v.to_i | ||
end | ||
end | ||
|
||
converter = value_converters[k] if value_converters | ||
v = converter.convert(v) if converter | ||
|
||
new_hash[k] = v | ||
end | ||
end | ||
|
||
# def hash_transformations(hash, options) | ||
# # there may be unmapped keys, or keys purposedly mapped to nil or an empty key.. | ||
# # make sure we delete any key/value pairs from the hash, which the user wanted to delete: | ||
# hash.delete(nil) | ||
# hash.delete('') | ||
# hash.delete(:"") | ||
|
||
# if options[:remove_empty_values] == true | ||
# hash.delete_if{|_k, v| has_rails ? v.blank? : blank?(v)} | ||
# end | ||
|
||
# hash.delete_if{|_k, v| !v.nil? && v =~ /^(0+|0+\.0+)$/} if options[:remove_zero_values] # values are Strings | ||
# hash.delete_if{|_k, v| v =~ options[:remove_values_matching]} if options[:remove_values_matching] | ||
|
||
# if options[:convert_values_to_numeric] | ||
# hash.each do |k, v| | ||
# # deal with the :only / :except options to :convert_values_to_numeric | ||
# next if limit_execution_for_only_or_except(options, :convert_values_to_numeric, k) | ||
|
||
# # convert if it's a numeric value: | ||
# case v | ||
# when /^[+-]?\d+\.\d+$/ | ||
# hash[k] = v.to_f | ||
# when /^[+-]?\d+$/ | ||
# hash[k] = v.to_i | ||
# end | ||
# end | ||
# end | ||
|
||
# if options[:value_converters] | ||
# hash.each do |k, v| | ||
# converter = options[:value_converters][k] | ||
# next unless converter | ||
|
||
# hash[k] = converter.convert(v) | ||
# end | ||
# end | ||
|
||
# hash | ||
# end | ||
|
||
protected | ||
|
||
# acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash: | ||
def limit_execution_for_only_or_except(options, option_name, key) | ||
if options[option_name].is_a?(Hash) | ||
if options[option_name].has_key?(:except) | ||
return true if Array(options[option_name][:except]).include?(key) | ||
elsif options[option_name].has_key?(:only) | ||
return true unless Array(options[option_name][:only]).include?(key) | ||
end | ||
end | ||
false | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# frozen_string_literal: true | ||
|
||
module SmarterCSV | ||
class << self | ||
# transform the headers that were in the file: | ||
def header_transformations(header_array, options) | ||
header_array.map!{|x| x.gsub(%r/#{options[:quote_char]}/, '')} | ||
header_array.map!{|x| x.strip} if options[:strip_whitespace] | ||
|
||
unless options[:keep_original_headers] | ||
header_array.map!{|x| x.gsub(/\s+|-+/, '_')} | ||
header_array.map!{|x| x.downcase} if options[:downcase_header] | ||
end | ||
|
||
# detect duplicate headers and disambiguate | ||
header_array = disambiguate_headers(header_array, options) if options[:duplicate_header_suffix] | ||
# symbolize headers | ||
header_array = header_array.map{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers] | ||
# doesn't make sense to re-map when we have user_provided_headers | ||
header_array = remap_headers(header_array, options) if options[:key_mapping] | ||
|
||
header_array | ||
end | ||
|
||
def disambiguate_headers(headers, options) | ||
counts = Hash.new(0) | ||
headers.map do |header| | ||
counts[header] += 1 | ||
counts[header] > 1 ? "#{header}#{options[:duplicate_header_suffix]}#{counts[header]}" : header | ||
end | ||
end | ||
|
||
# do some key mapping on the keys in the file header | ||
# if you want to completely delete a key, then map it to nil or to '' | ||
def remap_headers(headers, options) | ||
key_mapping = options[:key_mapping] | ||
if key_mapping.empty? || !key_mapping.is_a?(Hash) || key_mapping.keys.empty? | ||
raise(SmarterCSV::IncorrectOption, "ERROR: incorrect format for key_mapping! Expecting hash with from -> to mappings") | ||
end | ||
|
||
key_mapping = options[:key_mapping] | ||
# if silence_missing_keys are not set, raise error if missing header | ||
missing_keys = key_mapping.keys - headers | ||
# if the user passes a list of speciffic mapped keys that are optional | ||
missing_keys -= options[:silence_missing_keys] if options[:silence_missing_keys].is_a?(Array) | ||
|
||
unless missing_keys.empty? || options[:silence_missing_keys] == true | ||
raise SmarterCSV::KeyMappingError, "ERROR: can not map headers: #{missing_keys.join(', ')}" | ||
end | ||
|
||
headers.map! do |header| | ||
if key_mapping.has_key?(header) | ||
key_mapping[header].nil? ? nil : key_mapping[header] | ||
elsif options[:remove_unmapped_keys] | ||
nil | ||
else | ||
header | ||
end | ||
end | ||
headers | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# frozen_string_literal: true | ||
|
||
module SmarterCSV | ||
class << self | ||
def header_validations(headers, options) | ||
check_duplicate_headers(headers, options) | ||
check_required_headers(headers, options) | ||
end | ||
|
||
def check_duplicate_headers(headers, _options) | ||
header_counts = Hash.new(0) | ||
headers.each { |header| header_counts[header] += 1 unless header.nil? } | ||
|
||
duplicates = header_counts.select { |_, count| count > 1 } | ||
|
||
unless duplicates.empty? | ||
raise(SmarterCSV::DuplicateHeaders, "Duplicate Headers in CSV: #{duplicates.inspect}") | ||
end | ||
end | ||
|
||
require 'set' | ||
|
||
def check_required_headers(headers, options) | ||
if options[:required_keys] && options[:required_keys].is_a?(Array) | ||
headers_set = headers.to_set | ||
missing_keys = options[:required_keys].select { |k| !headers_set.include?(k) } | ||
|
||
unless missing_keys.empty? | ||
raise SmarterCSV::MissingKeys, "ERROR: missing attributes: #{missing_keys.join(',')}" | ||
end | ||
end | ||
end | ||
end | ||
end |
Oops, something went wrong.