From ecd495ad482fec7e29dc6b4a5922baa6efdaf404 Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Sun, 17 Dec 2023 16:33:32 -0800 Subject: [PATCH] refactor --- lib/smarter_csv.rb | 2 + lib/smarter_csv/header_transformations.rb | 63 +++++++++++++++++++ lib/smarter_csv/header_validations.rb | 24 +++++++ lib/smarter_csv/headers.rb | 77 ----------------------- 4 files changed, 89 insertions(+), 77 deletions(-) create mode 100644 lib/smarter_csv/header_transformations.rb create mode 100644 lib/smarter_csv/header_validations.rb diff --git a/lib/smarter_csv.rb b/lib/smarter_csv.rb index d4d94a2c..d346e4ae 100644 --- a/lib/smarter_csv.rb +++ b/lib/smarter_csv.rb @@ -5,6 +5,8 @@ require "smarter_csv/options_processing" require "smarter_csv/auto_detection" require "smarter_csv/variables" +require 'smarter_csv/header_transformations' +require 'smarter_csv/header_validations' require "smarter_csv/headers" require "smarter_csv/parse" diff --git a/lib/smarter_csv/header_transformations.rb b/lib/smarter_csv/header_transformations.rb new file mode 100644 index 00000000..fac326e1 --- /dev/null +++ b/lib/smarter_csv/header_transformations.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +module SmarterCSV + class << self + # transform the headers that were in the file: + def header_transformations(header_array, options) + header_array.map!{|x| x.gsub(%r/#{options[:quote_char]}/, '')} + header_array.map!{|x| x.strip} if options[:strip_whitespace] + + unless options[:keep_original_headers] + header_array.map!{|x| x.gsub(/\s+|-+/, '_')} + header_array.map!{|x| x.downcase} if options[:downcase_header] + end + + # detect duplicate headers and disambiguate + header_array = disambiguate_headers(header_array, options) if options[:duplicate_header_suffix] + # symbolize headers + header_array = header_array.map{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers] + # doesn't make sense to re-map when we have user_provided_headers + header_array = remap_headers(header_array, options) if options[:key_mapping] + + header_array + end + + def disambiguate_headers(headers, options) + counts = Hash.new(0) + headers.map do |header| + counts[header] += 1 + counts[header] > 1 ? "#{header}#{options[:duplicate_header_suffix]}#{counts[header]}" : header + end + end + + # do some key mapping on the keys in the file header + # if you want to completely delete a key, then map it to nil or to '' + def remap_headers(headers, options) + key_mapping = options[:key_mapping] + if key_mapping.empty? || !key_mapping.is_a?(Hash) || key_mapping.keys.empty? + raise(SmarterCSV::IncorrectOption, "ERROR: incorrect format for key_mapping! Expecting hash with from -> to mappings") + end + + key_mapping = options[:key_mapping] + # if silence_missing_keys are not set, raise error if missing header + missing_keys = key_mapping.keys - headers + # if the user passes a list of speciffic mapped keys that are optional + missing_keys -= options[:silence_missing_keys] if options[:silence_missing_keys].is_a?(Array) + + unless missing_keys.empty? || options[:silence_missing_keys] == true + raise SmarterCSV::KeyMappingError, "ERROR: can not map headers: #{missing_keys.join(', ')}" + end + + headers.map! do |header| + if key_mapping.has_key?(header) + key_mapping[header].nil? ? nil : key_mapping[header] + elsif options[:remove_unmapped_keys] + nil + else + header + end + end + headers + end + end +end diff --git a/lib/smarter_csv/header_validations.rb b/lib/smarter_csv/header_validations.rb new file mode 100644 index 00000000..c95d116a --- /dev/null +++ b/lib/smarter_csv/header_validations.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module SmarterCSV + class << self + def header_validations(headers, options) + duplicate_headers = [] + headers.compact.each do |k| + duplicate_headers << k if headers.select{|x| x == k}.size > 1 + end + + unless duplicate_headers.empty? + raise SmarterCSV::DuplicateHeaders, "ERROR: duplicate headers: #{duplicate_headers.join(',')}" + end + + if options[:required_keys] && options[:required_keys].is_a?(Array) + missing_keys = [] + options[:required_keys].each do |k| + missing_keys << k unless headers.include?(k) + end + raise SmarterCSV::MissingKeys, "ERROR: missing attributes: #{missing_keys.join(',')}" unless missing_keys.empty? + end + end + end +end diff --git a/lib/smarter_csv/headers.rb b/lib/smarter_csv/headers.rb index 2a0c2b64..500d82b5 100644 --- a/lib/smarter_csv/headers.rb +++ b/lib/smarter_csv/headers.rb @@ -72,82 +72,5 @@ def remove_comments_from_header(header, options) header.sub(options[:comment_regexp], '') end - - # transform the headers that were in the file: - def header_transformations(header_array, options) - header_array.map!{|x| x.gsub(%r/#{options[:quote_char]}/, '')} - header_array.map!{|x| x.strip} if options[:strip_whitespace] - - unless options[:keep_original_headers] - header_array.map!{|x| x.gsub(/\s+|-+/, '_')} - header_array.map!{|x| x.downcase} if options[:downcase_header] - end - - # detect duplicate headers and disambiguate - header_array = disambiguate_headers(header_array, options) if options[:duplicate_header_suffix] - # symbolize headers - header_array = header_array.map{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers] - # doesn't make sense to re-map when we have user_provided_headers - header_array = remap_headers(header_array, options) if options[:key_mapping] - - header_array - end - - def disambiguate_headers(headers, options) - counts = Hash.new(0) - headers.map do |header| - counts[header] += 1 - counts[header] > 1 ? "#{header}#{options[:duplicate_header_suffix]}#{counts[header]}" : header - end - end - - # do some key mapping on the keys in the file header - # if you want to completely delete a key, then map it to nil or to '' - def remap_headers(headers, options) - key_mapping = options[:key_mapping] - if key_mapping.empty? || !key_mapping.is_a?(Hash) || key_mapping.keys.empty? - raise(SmarterCSV::IncorrectOption, "ERROR: incorrect format for key_mapping! Expecting hash with from -> to mappings") - end - - key_mapping = options[:key_mapping] - # if silence_missing_keys are not set, raise error if missing header - missing_keys = key_mapping.keys - headers - # if the user passes a list of speciffic mapped keys that are optional - missing_keys -= options[:silence_missing_keys] if options[:silence_missing_keys].is_a?(Array) - - unless missing_keys.empty? || options[:silence_missing_keys] == true - raise SmarterCSV::KeyMappingError, "ERROR: can not map headers: #{missing_keys.join(', ')}" - end - - headers.map! do |header| - if key_mapping.has_key?(header) - key_mapping[header].nil? ? nil : key_mapping[header] - elsif options[:remove_unmapped_keys] - nil - else - header - end - end - headers - end - - def header_validations(headers, options) - duplicate_headers = [] - headers.compact.each do |k| - duplicate_headers << k if headers.select{|x| x == k}.size > 1 - end - - unless duplicate_headers.empty? - raise SmarterCSV::DuplicateHeaders, "ERROR: duplicate headers: #{duplicate_headers.join(',')}" - end - - if options[:required_keys] && options[:required_keys].is_a?(Array) - missing_keys = [] - options[:required_keys].each do |k| - missing_keys << k unless headers.include?(k) - end - raise SmarterCSV::MissingKeys, "ERROR: missing attributes: #{missing_keys.join(',')}" unless missing_keys.empty? - end - end end end