diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index fd70f479..115ea992 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -26,6 +26,7 @@ jobs: - "3.0" - 3.1 - 3.2 + - 3.3 - head - truffleruby - truffleruby-head diff --git a/CHANGELOG.md b/CHANGELOG.md index 79ad3fc6..85981362 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,24 @@ # SmarterCSV 1.x Change Log +## 1.10.0 (2023-12-31) ⚡ BREAKING ⚡ + + * BREAKING CHANGES: + + Changed behavior: + + when `user_provided_headers` are provided: + * if they are not unique, an exception will now be raised + * they are taken "as is", no header transformations can be applied + * when they are given as strings or as symbols, it is assumed that this is the desired format + * the value of the `strings_as_keys` options will be ignored + + + option `duplicate_header_suffix` now defaults to `''` instead of `nil`. + * this allows automatic disambiguation when processing of CSV files with duplicate headers, by appending a number + * explicitly set this option to `nil` to get the behavior from previous versions. + + * performance and memory improvements + * code refactor + ## 1.9.3 (2023-12-16) * raise SmarterCSV::IncorrectOption when `user_provided_headers` are empty * code refactor / no functional changes diff --git a/README.md b/README.md index 5018ab4e..dfdf9b4f 100644 --- a/README.md +++ b/README.md @@ -2,15 +2,33 @@ # SmarterCSV [![codecov](https://codecov.io/gh/tilo/smarter_csv/branch/main/graph/badge.svg?token=1L7OD80182)](https://codecov.io/gh/tilo/smarter_csv) [![Gem Version](https://badge.fury.io/rb/smarter_csv.svg)](http://badge.fury.io/rb/smarter_csv) - + + +#### LATEST CHANGES + +* Version 1.10.0 has BREAKING CHANGES: + + Changed behavior: + + when `user_provided_headers` are provided: + * if they are not unique, an exception will now be raised + * they are taken "as is", no header transformations can be applied + * when they are given as strings or as symbols, it is assumed that this is the desired format + * the value of the `strings_as_keys` options will be ignored + + + option `duplicate_header_suffix` now defaults to `''` instead of `nil`. + * this allows automatic disambiguation when processing of CSV files with duplicate headers, by appending a number + * explicitly set this option to `nil` to get the behavior from previous versions. + #### Development Branches * default branch is `main` for 1.x development -* 2.x development is on `2.0-development` (check this branch for 2.0 documentation) + +* 2.x development is on `2.0-development` (check this branch for 2.0 documentation) + - This is an EXPERIMENTAL branch - DO NOT USE in production -#### Work towards Future Version 2.0 +#### Work towards Future Version 2.x -* Work towards SmarterCSV 2.0 is still ongoing, with improved features, and more streamlined options, but consider it as experimental at this time. +* Work towards SmarterCSV 2.x is still ongoing, with improved features, and more streamlined options, but consider it as experimental at this time. Please check the [2.0-develop branch](https://github.com/tilo/smarter_csv/tree/2.0-develop), open any issues and pull requests with mention of tag v2.0. --------------- @@ -83,10 +101,11 @@ $ hexdump -C spec/fixtures/bom_test_feff.csv 00000030 0a 33 38 37 35 39 31 35 30 2c 71 75 69 7a 7a 65 |.38759150,quizze| 00000040 73 2c 35 36 37 38 0d 0a |s,5678..| ``` + ### Articles * [Processing 1.4 Million CSV Records in Ruby, fast ](https://lcx.wien/blog/processing-14-million-csv-records-in-ruby/) * [Speeding up CSV parsing with parallel processing](http://xjlin0.github.io/tech/2015/05/25/faster-parsing-csv-with-parallel-processing) - + ### Examples Here are some examples to demonstrate the versatility of SmarterCSV. @@ -281,7 +300,8 @@ The options and the block are optional. | :headers_in_file | true | Whether or not the file contains headers as the first line. | | | | Important if the file does not contain headers, | | | | otherwise you would lose the first line of data. | - | :duplicate_header_suffix | nil | If set, adds numbers to duplicated headers and separates them by the given suffix | + | :duplicate_header_suffix | '' | Adds numbers to duplicated headers and separates them by the given suffix. | + | | | Set this to nil to raise `DuplicateHeaders` error instead (previous behavior) | | :user_provided_headers | nil | *careful with that axe!* | | | | user provided Array of header strings or symbols, to define | | | | what headers should be used, overriding any in-file headers. | diff --git a/lib/smarter_csv.rb b/lib/smarter_csv.rb index d4d94a2c..26b8914d 100644 --- a/lib/smarter_csv.rb +++ b/lib/smarter_csv.rb @@ -5,13 +5,21 @@ require "smarter_csv/options_processing" require "smarter_csv/auto_detection" require "smarter_csv/variables" +require 'smarter_csv/header_transformations' +require 'smarter_csv/header_validations' require "smarter_csv/headers" +require "smarter_csv/hash_transformations" require "smarter_csv/parse" +# load the C-extension: case RUBY_ENGINE when 'ruby' begin if `uname -s`.chomp == 'Darwin' + # + # Please report if you see cases where the rake-compiler is building x86_64 code on arm64 cpus: + # https://github.com/rake-compiler/rake-compiler/issues/231 + # require 'smarter_csv/smarter_csv.bundle' else # :nocov: diff --git a/lib/smarter_csv/hash_transformations.rb b/lib/smarter_csv/hash_transformations.rb new file mode 100644 index 00000000..f6b4d538 --- /dev/null +++ b/lib/smarter_csv/hash_transformations.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +module SmarterCSV + class << self + def hash_transformations(hash, options) + # there may be unmapped keys, or keys purposedly mapped to nil or an empty key.. + # make sure we delete any key/value pairs from the hash, which the user wanted to delete: + remove_empty_values = options[:remove_empty_values] == true + remove_zero_values = options[:remove_zero_values] + remove_values_matching = options[:remove_values_matching] + convert_to_numeric = options[:convert_values_to_numeric] + value_converters = options[:value_converters] + + hash.each_with_object({}) do |(k, v), new_hash| + next if k.nil? || k == '' || k == :"" + next if remove_empty_values && (has_rails ? v.blank? : blank?(v)) + next if remove_zero_values && v.is_a?(String) && v =~ /^(0+|0+\.0+)$/ # values are Strings + next if remove_values_matching && v =~ remove_values_matching + + # deal with the :only / :except options to :convert_values_to_numeric + if convert_to_numeric && !limit_execution_for_only_or_except(options, :convert_values_to_numeric, k) + if v =~ /^[+-]?\d+\.\d+$/ + v = v.to_f + elsif v =~ /^[+-]?\d+$/ + v = v.to_i + end + end + + converter = value_converters[k] if value_converters + v = converter.convert(v) if converter + + new_hash[k] = v + end + end + + # def hash_transformations(hash, options) + # # there may be unmapped keys, or keys purposedly mapped to nil or an empty key.. + # # make sure we delete any key/value pairs from the hash, which the user wanted to delete: + # hash.delete(nil) + # hash.delete('') + # hash.delete(:"") + + # if options[:remove_empty_values] == true + # hash.delete_if{|_k, v| has_rails ? v.blank? : blank?(v)} + # end + + # hash.delete_if{|_k, v| !v.nil? && v =~ /^(0+|0+\.0+)$/} if options[:remove_zero_values] # values are Strings + # hash.delete_if{|_k, v| v =~ options[:remove_values_matching]} if options[:remove_values_matching] + + # if options[:convert_values_to_numeric] + # hash.each do |k, v| + # # deal with the :only / :except options to :convert_values_to_numeric + # next if limit_execution_for_only_or_except(options, :convert_values_to_numeric, k) + + # # convert if it's a numeric value: + # case v + # when /^[+-]?\d+\.\d+$/ + # hash[k] = v.to_f + # when /^[+-]?\d+$/ + # hash[k] = v.to_i + # end + # end + # end + + # if options[:value_converters] + # hash.each do |k, v| + # converter = options[:value_converters][k] + # next unless converter + + # hash[k] = converter.convert(v) + # end + # end + + # hash + # end + + protected + + # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash: + def limit_execution_for_only_or_except(options, option_name, key) + if options[option_name].is_a?(Hash) + if options[option_name].has_key?(:except) + return true if Array(options[option_name][:except]).include?(key) + elsif options[option_name].has_key?(:only) + return true unless Array(options[option_name][:only]).include?(key) + end + end + false + end + end +end diff --git a/lib/smarter_csv/header_transformations.rb b/lib/smarter_csv/header_transformations.rb new file mode 100644 index 00000000..fac326e1 --- /dev/null +++ b/lib/smarter_csv/header_transformations.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +module SmarterCSV + class << self + # transform the headers that were in the file: + def header_transformations(header_array, options) + header_array.map!{|x| x.gsub(%r/#{options[:quote_char]}/, '')} + header_array.map!{|x| x.strip} if options[:strip_whitespace] + + unless options[:keep_original_headers] + header_array.map!{|x| x.gsub(/\s+|-+/, '_')} + header_array.map!{|x| x.downcase} if options[:downcase_header] + end + + # detect duplicate headers and disambiguate + header_array = disambiguate_headers(header_array, options) if options[:duplicate_header_suffix] + # symbolize headers + header_array = header_array.map{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers] + # doesn't make sense to re-map when we have user_provided_headers + header_array = remap_headers(header_array, options) if options[:key_mapping] + + header_array + end + + def disambiguate_headers(headers, options) + counts = Hash.new(0) + headers.map do |header| + counts[header] += 1 + counts[header] > 1 ? "#{header}#{options[:duplicate_header_suffix]}#{counts[header]}" : header + end + end + + # do some key mapping on the keys in the file header + # if you want to completely delete a key, then map it to nil or to '' + def remap_headers(headers, options) + key_mapping = options[:key_mapping] + if key_mapping.empty? || !key_mapping.is_a?(Hash) || key_mapping.keys.empty? + raise(SmarterCSV::IncorrectOption, "ERROR: incorrect format for key_mapping! Expecting hash with from -> to mappings") + end + + key_mapping = options[:key_mapping] + # if silence_missing_keys are not set, raise error if missing header + missing_keys = key_mapping.keys - headers + # if the user passes a list of speciffic mapped keys that are optional + missing_keys -= options[:silence_missing_keys] if options[:silence_missing_keys].is_a?(Array) + + unless missing_keys.empty? || options[:silence_missing_keys] == true + raise SmarterCSV::KeyMappingError, "ERROR: can not map headers: #{missing_keys.join(', ')}" + end + + headers.map! do |header| + if key_mapping.has_key?(header) + key_mapping[header].nil? ? nil : key_mapping[header] + elsif options[:remove_unmapped_keys] + nil + else + header + end + end + headers + end + end +end diff --git a/lib/smarter_csv/header_validations.rb b/lib/smarter_csv/header_validations.rb new file mode 100644 index 00000000..bd906da4 --- /dev/null +++ b/lib/smarter_csv/header_validations.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module SmarterCSV + class << self + def header_validations(headers, options) + check_duplicate_headers(headers, options) + check_required_headers(headers, options) + end + + def check_duplicate_headers(headers, _options) + header_counts = Hash.new(0) + headers.each { |header| header_counts[header] += 1 unless header.nil? } + + duplicates = header_counts.select { |_, count| count > 1 } + + unless duplicates.empty? + raise(SmarterCSV::DuplicateHeaders, "Duplicate Headers in CSV: #{duplicates.inspect}") + end + end + + require 'set' + + def check_required_headers(headers, options) + if options[:required_keys] && options[:required_keys].is_a?(Array) + headers_set = headers.to_set + missing_keys = options[:required_keys].select { |k| !headers_set.include?(k) } + + unless missing_keys.empty? + raise SmarterCSV::MissingKeys, "ERROR: missing attributes: #{missing_keys.join(',')}" + end + end + end + end +end diff --git a/lib/smarter_csv/headers.rb b/lib/smarter_csv/headers.rb index 98795f6e..cc557c69 100644 --- a/lib/smarter_csv/headers.rb +++ b/lib/smarter_csv/headers.rb @@ -17,11 +17,8 @@ def process_headers(filehandle, options) file_header_array, file_header_size = parse(header_line, options) - # header transformations: - file_header_array = transform_headers(file_header_array, options) + file_header_array = header_transformations(file_header_array, options) - # currently this is, but should not be called on user_provided headers - file_header_array = legacy_header_transformations(file_header_array, options) else unless options[:user_provided_headers] raise SmarterCSV::IncorrectOption, "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" @@ -45,18 +42,10 @@ def process_headers(filehandle, options) end header_array = user_header_array - - # these 3 steps should only be part of the header transformation when headers_in_file: - # -> breaking change when we move this to transform_headers() - # see details in legacy_header_transformations() - # - header_array = legacy_header_transformations(header_array, options) else header_array = file_header_array end - validate_headers(header_array, options) - [header_array, header_array.size] end @@ -70,95 +59,6 @@ def preprocess_header_line(header_line, options) header_line end - # transform the headers that were in the file: - def transform_headers(header_array, options) - header_array.map!{|x| x.gsub(%r/#{options[:quote_char]}/, '')} - header_array.map!{|x| x.strip} if options[:strip_whitespace] - - unless options[:keep_original_headers] - header_array.map!{|x| x.gsub(/\s+|-+/, '_')} - header_array.map!{|x| x.downcase} if options[:downcase_header] - end - - header_array - end - - def legacy_header_transformations(header_array, options) - # detect duplicate headers and disambiguate - # -> user_provided_headers should not have duplicates! - header_array = disambiguate_headers(header_array, options) if options[:duplicate_header_suffix] - # symbolize headers - # -> user_provided_headers should already be symbols or strings as needed - header_array = header_array.map{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers] - # doesn't make sense to re-map when we have user_provided_headers - header_array = remap_headers(header_array, options) if options[:key_mapping] && !options[:user_provided_headers] - header_array - end - - def disambiguate_headers(headers, options) - counts = Hash.new(0) - headers.map do |header| - counts[header] += 1 - counts[header] > 1 ? "#{header}#{options[:duplicate_header_suffix]}#{counts[header]}" : header - end - end - - # do some key mapping on the keys in the file header - # if you want to completely delete a key, then map it to nil or to '' - def remap_headers(headers, options) - key_mapping = options[:key_mapping] - if key_mapping.empty? || !key_mapping.is_a?(Hash) || key_mapping.keys.empty? - raise(SmarterCSV::IncorrectOption, "ERROR: incorrect format for key_mapping! Expecting hash with from -> to mappings") - end - - key_mapping = options[:key_mapping] - # if silence_missing_keys are not set, raise error if missing header - missing_keys = key_mapping.keys - headers - # if the user passes a list of speciffic mapped keys that are optional - missing_keys -= options[:silence_missing_keys] if options[:silence_missing_keys].is_a?(Array) - - unless missing_keys.empty? || options[:silence_missing_keys] == true - raise SmarterCSV::KeyMappingError, "ERROR: can not map headers: #{missing_keys.join(', ')}" - end - - headers.map! do |header| - if key_mapping.has_key?(header) - key_mapping[header].nil? ? nil : key_mapping[header] - elsif options[:remove_unmapped_keys] - nil - else - header - end - end - headers - end - - # header_validations - def validate_headers(headers, options) - duplicate_headers = [] - headers.compact.each do |k| - duplicate_headers << k if headers.select{|x| x == k}.size > 1 - end - - unless options[:user_provided_headers] || duplicate_headers.empty? - raise SmarterCSV::DuplicateHeaders, "ERROR: duplicate headers: #{duplicate_headers.join(',')}" - end - - if options[:required_keys] && options[:required_keys].is_a?(Array) - missing_keys = [] - options[:required_keys].each do |k| - missing_keys << k unless headers.include?(k) - end - raise SmarterCSV::MissingKeys, "ERROR: missing attributes: #{missing_keys.join(',')}" unless missing_keys.empty? - end - end - - def enforce_utf8_encoding(header, options) - return header unless options[:force_utf8] || options[:file_encoding] !~ /utf-8/i - - header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) - end - def remove_comments_from_header(header, options) return header unless options[:comment_regexp] diff --git a/lib/smarter_csv/options_processing.rb b/lib/smarter_csv/options_processing.rb index 606c4963..6d14302f 100644 --- a/lib/smarter_csv/options_processing.rb +++ b/lib/smarter_csv/options_processing.rb @@ -9,7 +9,7 @@ module SmarterCSV comment_regexp: nil, # was: /\A#/, convert_values_to_numeric: true, downcase_header: true, - duplicate_header_suffix: nil, + duplicate_header_suffix: '', # was: nil, file_encoding: 'utf-8', force_simple_split: false, force_utf8: false, diff --git a/lib/smarter_csv/smarter_csv.rb b/lib/smarter_csv/smarter_csv.rb index d361704a..c0fcb459 100644 --- a/lib/smarter_csv/smarter_csv.rb +++ b/lib/smarter_csv/smarter_csv.rb @@ -16,12 +16,13 @@ def SmarterCSV.process(input, given_options = {}, &block) # rubocop:disable Lint options = process_options(given_options) - has_rails = !!defined?(Rails) + @enforce_utf8 = options[:force_utf8] || options[:file_encoding] !~ /utf-8/i + @verbose = options[:verbose] begin fh = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}") - if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && (fh.respond_to?(:external_encoding) && fh.external_encoding != Encoding.find('UTF-8') || fh.respond_to?(:encoding) && fh.encoding != Encoding.find('UTF-8')) + if @enforce_utf8 && (fh.respond_to?(:external_encoding) && fh.external_encoding != Encoding.find('UTF-8') || fh.respond_to?(:encoding) && fh.encoding != Encoding.find('UTF-8')) puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".' end @@ -35,6 +36,10 @@ def SmarterCSV.process(input, given_options = {}, &block) # rubocop:disable Lint @headers, header_size = process_headers(fh, options) @headerA = @headers # @headerA is deprecated, use @headers + puts "Effective headers:\n#{pp(@headers)}\n" if @verbose + + header_validations(@headers, options) + # in case we use chunking.. we'll need to set it up.. if options[:chunk_size].to_i > 0 use_chunks = true @@ -46,31 +51,42 @@ def SmarterCSV.process(input, given_options = {}, &block) # rubocop:disable Lint end # now on to processing all the rest of the lines in the CSV file: + # fh.each_line |line| until fh.eof? # we can't use fh.readlines() here, because this would read the whole file into memory at once, and eof => true line = readline_with_counts(fh, options) # replace invalid byte sequence in UTF-8 with question mark to avoid errors - line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i + line = enforce_utf8_encoding(line, options) if @enforce_utf8 - print "processing file line %10d, csv line %10d\r" % [@file_line_count, @csv_line_count] if options[:verbose] + print "processing file line %10d, csv line %10d\r" % [@file_line_count, @csv_line_count] if @verbose next if options[:comment_regexp] && line =~ options[:comment_regexp] # ignore all comment lines if there are any # cater for the quoted csv data containing the row separator carriage return character # in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv) # by detecting the existence of an uneven number of quote characters + multiline = count_quote_chars(line, options[:quote_char]).odd? - multiline = count_quote_chars(line, options[:quote_char]).odd? # should handle quote_char nil - while count_quote_chars(line, options[:quote_char]).odd? # should handle quote_char nil + while multiline next_line = fh.readline(options[:row_sep]) - next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i + next_line = enforce_utf8_encoding(next_line, options) if @enforce_utf8 line += next_line @file_line_count += 1 + + break if fh.eof? # Exit loop if end of file is reached + + multiline = count_quote_chars(line, options[:quote_char]).odd? + end + + # :nocov: + if multiline && @verbose + print "\nline contains uneven number of quote chars so including content through file line %d\n" % @file_line_count end - print "\nline contains uneven number of quote chars so including content through file line %d\n" % @file_line_count if options[:verbose] && multiline + # :nocov: line.chomp!(options[:row_sep]) + # --- SPLIT LINE & DATA TRANSFORMATIONS ------------------------------------------------------------ dataA, _data_size = parse(line, options, header_size) dataA.map!{|x| x.strip} if options[:strip_whitespace] @@ -78,48 +94,25 @@ def SmarterCSV.process(input, given_options = {}, &block) # rubocop:disable Lint # if all values are blank, then ignore this line next if options[:remove_empty_hashes] && (dataA.empty? || blank?(dataA)) + # --- HASH TRANSFORMATIONS ------------------------------------------------------------ hash = @headers.zip(dataA).to_h - # make sure we delete any key/value pairs from the hash, which the user wanted to delete: - hash.delete(nil) - hash.delete('') - hash.delete(:"") + hash = hash_transformations(hash, options) - if options[:remove_empty_values] == true - hash.delete_if{|_k, v| has_rails ? v.blank? : blank?(v)} - end - - hash.delete_if{|_k, v| !v.nil? && v =~ /^(0+|0+\.0+)$/} if options[:remove_zero_values] # values are Strings - hash.delete_if{|_k, v| v =~ options[:remove_values_matching]} if options[:remove_values_matching] - - if options[:convert_values_to_numeric] - hash.each do |k, v| - # deal with the :only / :except options to :convert_values_to_numeric - next if limit_execution_for_only_or_except(options, :convert_values_to_numeric, k) - - # convert if it's a numeric value: - case v - when /^[+-]?\d+\.\d+$/ - hash[k] = v.to_f - when /^[+-]?\d+$/ - hash[k] = v.to_i - end - end - end - - if options[:value_converters] - hash.each do |k, v| - converter = options[:value_converters][k] - next unless converter - - hash[k] = converter.convert(v) - end - end + # --- HASH VALIDATIONS ---------------------------------------------------------------- + # will go here, and be able to: + # - validate correct format of the values for fields + # - required fields to be non-empty + # - ... + # ------------------------------------------------------------------------------------- next if options[:remove_empty_hashes] && hash.empty? + puts "CSV Line #{@file_line_count}: #{pp(hash)}" if @verbose == '2' # very verbose setting + # optional adding of csv_line_number to the hash to help debugging hash[:csv_line_number] = @csv_line_count if options[:with_line_numbers] + # process the chunks or the resulting hash if use_chunks chunk << hash # append temp result to chunk @@ -128,16 +121,13 @@ def SmarterCSV.process(input, given_options = {}, &block) # rubocop:disable Lint if block_given? yield chunk # do something with the hashes in the chunk in the block else - @result << chunk # not sure yet, why anybody would want to do this without a block + @result << chunk.dup # Append chunk to result (use .dup to keep a copy after we do chunk.clear) end @chunk_count += 1 - chunk = [] # initialize for next chunk of data + chunk.clear # re-initialize for next chunk of data else - - # the last chunk may contain partial data, which also needs to be returned (BUG / ISSUE-18) - + # the last chunk may contain partial data, which is handled below end - # while a chunk is being filled up we don't need to do anything else here else # no chunk handling @@ -150,15 +140,15 @@ def SmarterCSV.process(input, given_options = {}, &block) # rubocop:disable Lint end # print new line to retain last processing line message - print "\n" if options[:verbose] + print "\n" if @verbose - # last chunk: + # handling of last chunk: if !chunk.nil? && chunk.size > 0 # do something with the chunk if block_given? yield chunk # do something with the hashes in the chunk in the block else - @result << chunk # not sure yet, why anybody would want to do this without a block + @result << chunk.dup # Append chunk to result (use .dup to keep a copy after we do chunk.clear) end @chunk_count += 1 # chunk = [] # initialize for next chunk of data @@ -175,16 +165,22 @@ def SmarterCSV.process(input, given_options = {}, &block) # rubocop:disable Lint end class << self - # * the `scan` method iterates through the string and finds all occurrences of the pattern - # * The reqular expression: - # - (? 0 + end + end + + # same as previous test, but reading the file with strings as keys -describe 'be_able_to' do it 'loads_binary_file_with_strings_as_keys' do options = {col_sep: "\cA", row_sep: "\cB", comment_regexp: /^#/, strings_as_keys: true} - data = SmarterCSV.process("#{fixture_path}/binary.csv", options) + data = SmarterCSV.process(binary_file, options) expect(data.size).to eq 8 data.each do |item| diff --git a/spec/smarter_csv/chunked_reading_spec.rb b/spec/features/chunked/chunked_reading_spec.rb similarity index 97% rename from spec/smarter_csv/chunked_reading_spec.rb rename to spec/features/chunked/chunked_reading_spec.rb index ad73824c..95dd661b 100644 --- a/spec/smarter_csv/chunked_reading_spec.rb +++ b/spec/features/chunked/chunked_reading_spec.rb @@ -4,7 +4,7 @@ fixture_path = 'spec/fixtures' -describe 'be_able_to' do +describe 'chunked reading' do it 'loads_chunk_cornercase_csv_files' do 6.times do |chunk_size| # test for all chunk-sizes options = {chunk_size: chunk_size, remove_empty_hashes: true} diff --git a/spec/smarter_csv/chunked_spec.rb b/spec/features/chunked/chunked_spec.rb similarity index 100% rename from spec/smarter_csv/chunked_spec.rb rename to spec/features/chunked/chunked_spec.rb diff --git a/spec/smarter_csv/convert_values_to_numeric_spec.rb b/spec/features/converters/convert_values_to_numeric_spec.rb similarity index 100% rename from spec/smarter_csv/convert_values_to_numeric_spec.rb rename to spec/features/converters/convert_values_to_numeric_spec.rb diff --git a/spec/smarter_csv/value_converters_spec.rb b/spec/features/converters/value_converters_spec.rb similarity index 92% rename from spec/smarter_csv/value_converters_spec.rb rename to spec/features/converters/value_converters_spec.rb index 905b88e2..abcd99fe 100644 --- a/spec/smarter_csv/value_converters_spec.rb +++ b/spec/features/converters/value_converters_spec.rb @@ -13,11 +13,11 @@ def self.convert(value) class CurrencyConverter def self.convert(value) - value.sub(/[$]/, '').to_f # would be nice to add a computed column :currency => '€' + value.sub(/[$]/, '').to_f # would be nice to add a computed column :currency => '€' end end -describe 'be_able_to' do +describe ':value_converters option' do it 'convert date values into Date instances' do options = {value_converters: {date: DateConverter}} data = SmarterCSV.process("#{fixture_path}/with_dates.csv", options) diff --git a/spec/smarter_csv/bom_issues_spec.rb b/spec/features/formating/bom_issues_spec.rb similarity index 100% rename from spec/smarter_csv/bom_issues_spec.rb rename to spec/features/formating/bom_issues_spec.rb diff --git a/spec/smarter_csv/carriage_return_spec.rb b/spec/features/formating/carriage_return_spec.rb similarity index 100% rename from spec/smarter_csv/carriage_return_spec.rb rename to spec/features/formating/carriage_return_spec.rb diff --git a/spec/smarter_csv/column_separator_spec.rb b/spec/features/formating/column_separator_spec.rb similarity index 95% rename from spec/smarter_csv/column_separator_spec.rb rename to spec/features/formating/column_separator_spec.rb index ae908127..6785ff15 100644 --- a/spec/smarter_csv/column_separator_spec.rb +++ b/spec/features/formating/column_separator_spec.rb @@ -171,14 +171,14 @@ { col_sep: :auto, headers_in_file: false, - user_provided_headers: %w[Date1 Date2], + user_provided_headers: %w[Date1 Date2], # user provides strings } end it 'will fail to guess the separator' do data = SmarterCSV.process("#{fixture_path}/separator_comma_no_headers_will_fail.csv", options) - expect(data.first[:Date1]).to eq '2022-10-04 16' # Instead of 2022-10-04 16:00:47 UTC - expect(data.first[:Date2]).to eq 0 # Instead of 2022-10-04 16:00:47 UTC + expect(data.first['Date1']).to eq '2022-10-04 16' # Instead of 2022-10-04 16:00:47 UTC + expect(data.first['Date2']).to eq 0 # Instead of 2022-10-04 16:00:47 UTC end end end diff --git a/spec/smarter_csv/emoji_spec.rb b/spec/features/formating/emoji_spec.rb similarity index 100% rename from spec/smarter_csv/emoji_spec.rb rename to spec/features/formating/emoji_spec.rb diff --git a/spec/smarter_csv/line_ending_spec.rb b/spec/features/formating/line_ending_spec.rb similarity index 100% rename from spec/smarter_csv/line_ending_spec.rb rename to spec/features/formating/line_ending_spec.rb diff --git a/spec/smarter_csv/valid_unicode_spec.rb b/spec/features/formating/valid_unicode_spec.rb similarity index 99% rename from spec/smarter_csv/valid_unicode_spec.rb rename to spec/features/formating/valid_unicode_spec.rb index fcd38d81..00493a29 100644 --- a/spec/smarter_csv/valid_unicode_spec.rb +++ b/spec/features/formating/valid_unicode_spec.rb @@ -4,7 +4,7 @@ fixture_path = 'spec/fixtures' -describe 'be_able_to' do +describe 'valid unicode' do it 'loads file with unicode strings' do options = {} data = SmarterCSV.process("#{fixture_path}/valid_unicode.csv", options) diff --git a/spec/smarter_csv/additional_separator_spec.rb b/spec/features/general/additional_separator_spec.rb similarity index 100% rename from spec/smarter_csv/additional_separator_spec.rb rename to spec/features/general/additional_separator_spec.rb diff --git a/spec/features/general/basic_spec.rb b/spec/features/general/basic_spec.rb new file mode 100644 index 00000000..37b1eb5e --- /dev/null +++ b/spec/features/general/basic_spec.rb @@ -0,0 +1,97 @@ +# frozen_string_literal: true + +require 'spec_helper' + +fixture_path = 'spec/fixtures' + +[true, false].each do |bool| + describe "fulfills basic tests with#{bool ? ' C-' : 'out '}acceleration" do + let(:options) { { acceleration: bool } } + + describe 'basic CSV processing' do + # works only when testing locally + unless ENV['CI'] + it 'compiles the acceleration' do + expect(SmarterCSV.has_acceleration?).to eq true + end + end + + context 'with basic CSV file' do + let(:basic_file) { "#{fixture_path}/basic.csv" } + + it 'loads_basic_csv_file' do + data = SmarterCSV.process(basic_file, options) + expect(data.size).to eq 5 + + data.each do |h| + h.each_key do |key| + # all the keys should be symbols + expect(key.class).to eq Symbol + + expect(%i[first_name last_name dogs cats birds fish]).to include(key) + end + expect(h.size).to be <= 6 + end + end + + it 'loads_basic_csv_file from Rails' do + stub_const('Rails', true) + data = SmarterCSV.process(basic_file, options) + expect(data.size).to eq 5 + + data.each do |h| + h.each_key do |key| + # all the keys should be symbols + expect(key.class).to eq Symbol + + expect(%i[first_name last_name dogs cats birds fish]).to include(key) + end + expect(h.size).to be <= 6 + end + end + + context 'with full user_provided_headers' do + let(:options) { super().merge({user_provided_headers: %i[a b c d e f]}) } + + it 'replaces headers with user_provided_headers' do + data = SmarterCSV.process(basic_file, options) + expect(data.size).to eq 5 + + expect(SmarterCSV.raw_header).to eq "First Name,Last Name,Dogs,Cats,Birds,Fish\n" + expect(SmarterCSV.headers).to eq %i[a b c d e f] + end + end + + context 'with partial user_provided_headers' do + let(:options) { super().merge({user_provided_headers: %i[a b c d e]}) } + + it 'raises an exception if the number of user_provided_headers is incorrect' do + expect do + SmarterCSV.process(basic_file, options) + end.to raise_exception(SmarterCSV::HeaderSizeMismatch) + end + end + + context 'with empty user_provided_headers' do + let(:options) { super().merge({user_provided_headers: []}) } + + it 'raises an exception if the user_provided_headers is empty' do + expect do + SmarterCSV.process(basic_file, options) + end.to raise_exception(SmarterCSV::IncorrectOption, /ERROR: incorrect format for user_provided_headers! Expecting array with headers/) + end + end + + context 'with incorrect user_provided_headers' do + let(:options) { super().merge({user_provided_headers: {}}) } + + it 'raises an exception if the user_provided_headers is of incorrect type' do + expect do + SmarterCSV.process(basic_file, options) + end.to raise_exception(SmarterCSV::IncorrectOption, /ERROR: incorrect format for user_provided_headers! Expecting array with headers/) + end + end + end + end + end +end diff --git a/spec/smarter_csv/empty_columns_spec.rb b/spec/features/general/empty_columns_spec.rb similarity index 100% rename from spec/smarter_csv/empty_columns_spec.rb rename to spec/features/general/empty_columns_spec.rb diff --git a/spec/smarter_csv/simple_spec.rb b/spec/features/general/simple_spec.rb similarity index 100% rename from spec/smarter_csv/simple_spec.rb rename to spec/features/general/simple_spec.rb diff --git a/spec/smarter_csv/remove_empty_values_spec.rb b/spec/features/hash_transformations/remove_empty_values_spec.rb similarity index 81% rename from spec/smarter_csv/remove_empty_values_spec.rb rename to spec/features/hash_transformations/remove_empty_values_spec.rb index 02c2e30d..788eb826 100644 --- a/spec/smarter_csv/remove_empty_values_spec.rb +++ b/spec/features/hash_transformations/remove_empty_values_spec.rb @@ -4,8 +4,8 @@ fixture_path = 'spec/fixtures' -describe 'be_able_to' do - it 'remove_empty_values' do +describe ':remove_empty_values option' do + it 'removes empty values' do options = {row_sep: :auto, remove_empty_values: true} data = SmarterCSV.process("#{fixture_path}/empty.csv", options) expect(data.size).to eq 1 diff --git a/spec/smarter_csv/remove_keys_from_hashes_spec.rb b/spec/features/hash_transformations/remove_keys_from_hashes_spec.rb similarity index 77% rename from spec/smarter_csv/remove_keys_from_hashes_spec.rb rename to spec/features/hash_transformations/remove_keys_from_hashes_spec.rb index 1e1b2d96..065920b2 100644 --- a/spec/smarter_csv/remove_keys_from_hashes_spec.rb +++ b/spec/features/hash_transformations/remove_keys_from_hashes_spec.rb @@ -4,8 +4,8 @@ fixture_path = 'spec/fixtures' -describe 'be_able_to' do - it 'remove_values_matching' do +describe ':remove_zero_values option' do + it 'removes zero values' do options = {remove_zero_values: true, key_mapping: {first_name: :vorname, last_name: :nachname, fish: nil} } data = SmarterCSV.process("#{fixture_path}/basic.csv", options) expect(data.size).to eq 5 @@ -16,7 +16,8 @@ expect(%i[vorname nachname dogs cats birds]).to include(key) end - expect(hash.values).to_not include(0) + expect(hash.keys).not_to include(:fish) + expect(hash.values).not_to include(0) expect(hash.size).to be <= 6 end diff --git a/spec/smarter_csv/remove_not_mapped_keys_spec.rb b/spec/features/hash_transformations/remove_not_mapped_keys_spec.rb similarity index 100% rename from spec/smarter_csv/remove_not_mapped_keys_spec.rb rename to spec/features/hash_transformations/remove_not_mapped_keys_spec.rb diff --git a/spec/smarter_csv/remove_values_matching_spec.rb b/spec/features/hash_transformations/remove_values_matching_spec.rb similarity index 85% rename from spec/smarter_csv/remove_values_matching_spec.rb rename to spec/features/hash_transformations/remove_values_matching_spec.rb index 112aa032..7484dd3a 100644 --- a/spec/smarter_csv/remove_values_matching_spec.rb +++ b/spec/features/hash_transformations/remove_values_matching_spec.rb @@ -4,8 +4,8 @@ fixture_path = 'spec/fixtures' -describe 'be_able_to' do - it 'remove_values_matching' do +describe ':remove_values_matching option' do + it 'removes values' do options = {remove_zero_values: true, remove_empty_values: true, remove_values_matching: /^\d+$/} data = SmarterCSV.process("#{fixture_path}/basic.csv", options) expect(data.size).to eq 5 @@ -21,7 +21,7 @@ expect(val.class).to eq String # all the values should be strings end - expect(hash.values).to_not include(0) + expect(hash.values).not_to include(0) expect(hash.size).to be <= 6 end diff --git a/spec/smarter_csv/remove_zero_values_spec.rb b/spec/features/hash_transformations/remove_zero_values_spec.rb similarity index 81% rename from spec/smarter_csv/remove_zero_values_spec.rb rename to spec/features/hash_transformations/remove_zero_values_spec.rb index 50307213..7aa07c0d 100644 --- a/spec/smarter_csv/remove_zero_values_spec.rb +++ b/spec/features/hash_transformations/remove_zero_values_spec.rb @@ -4,8 +4,8 @@ fixture_path = 'spec/fixtures' -describe 'be_able_to' do - it 'remove_zero_values' do +describe ':remove_zero_values option' do + it 'removes zero values' do options = {remove_zero_values: true, remove_empty_values: true} data = SmarterCSV.process("#{fixture_path}/basic.csv", options) expect(data.size).to eq 5 @@ -17,7 +17,7 @@ expect(%i[first_name last_name dogs cats birds fish]).to include(key) end - expect(hash.values).to_not include(0) + expect(hash.values).not_to include(0) expect(hash.size).to be <= 6 end diff --git a/spec/smarter_csv/duplicate_headers_spec.rb b/spec/features/header_handling/duplicate_headers_spec.rb similarity index 61% rename from spec/smarter_csv/duplicate_headers_spec.rb rename to spec/features/header_handling/duplicate_headers_spec.rb index 22d0cf00..f11c7dde 100644 --- a/spec/smarter_csv/duplicate_headers_spec.rb +++ b/spec/features/header_handling/duplicate_headers_spec.rb @@ -6,35 +6,37 @@ describe 'duplicate headers' do describe 'without special handling / default behavior' do - it 'raises error on duplicate headers' do - expect do - SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", {}) - end.to raise_exception(SmarterCSV::DuplicateHeaders) - end - it 'does not raise error when duplicate_header_suffix is given' do expect do SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", {duplicate_header_suffix: ''}) end.not_to raise_exception end - it 'does not raise error when user_provided_headers are given' do + it 'raises error when user_provided_headers with duplicates are given' do expect do options = {user_provided_headers: %i[a b c d a]} SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options) - end.not_to raise_exception + end.to raise_exception(SmarterCSV::DuplicateHeaders) end - it 'raises error on duplicate headers, when attempting to do key_mapping' do - # the mapping is right, but the underlying csv file is bad - options = {key_mapping: {email: :a, firstname: :b, lastname: :c, age: :e} } - expect do - SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options) - end.to raise_exception(SmarterCSV::DuplicateHeaders) + it 'can remap duplicated headers' do + options ={key_mapping: {email: :a, firstname: :b, lastname: :c, email2: :d, age: :e}} + data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options) + expect(data.first).to eq({a: 'tom@bla.com', b: 'Tom', c: 'Sawyer', d: 'mike@bla.com', e: 34}) end end describe 'with special handling' do + context 'when suffix is set to nil' do + let(:options) { {duplicate_header_suffix: nil} } + + it 'raises error on duplicate headers in the input file' do + expect do + SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options) + end.to raise_exception(SmarterCSV::DuplicateHeaders) + end + end + context 'with given suffix' do let(:options) { {duplicate_header_suffix: '_'} } @@ -48,10 +50,11 @@ expect(data.first.keys).to eq %i[email firstname lastname email_2 age] end - it 'enumerates when duplicate headers are given' do + it 'raises when duplicate headers are given' do options.merge!({user_provided_headers: %i[a b c a a]}) - data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options) - expect(data.first.keys).to eq %i[a b c a_2 a_3] + expect do + SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options) + end.to raise_exception(SmarterCSV::DuplicateHeaders) end it 'can remap duplicated headers' do @@ -61,8 +64,8 @@ end end - context 'with empty suffix' do - let(:options) { {duplicate_header_suffix: ''} } + context 'with different suffix' do + let(:options) { {duplicate_header_suffix: ':'} } it 'reads whole file' do data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options) @@ -71,13 +74,14 @@ it 'generates the correct keys' do data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options) - expect(data.first.keys).to eq %i[email firstname lastname email2 age] + expect(data.first.keys).to eq %i[email firstname lastname email:2 age] end - it 'enumerates when duplicate headers are given' do + it 'raises when duplicate headers are given' do options.merge!({user_provided_headers: %i[a b c a a]}) - data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options) - expect(data.first.keys).to eq %i[a b c a2 a3] + expect do + SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options) + end.to raise_exception(SmarterCSV::DuplicateHeaders) end end end diff --git a/spec/smarter_csv/header_transformation_spec.rb b/spec/features/header_handling/header_transformation_spec.rb similarity index 96% rename from spec/smarter_csv/header_transformation_spec.rb rename to spec/features/header_handling/header_transformation_spec.rb index 94c25074..4a704639 100644 --- a/spec/smarter_csv/header_transformation_spec.rb +++ b/spec/features/header_handling/header_transformation_spec.rb @@ -4,7 +4,7 @@ fixture_path = 'spec/fixtures' -describe 'be_able_to' do +describe 'header transformations option' do it 'loads_file_with_dashes_in_header_fields as strings' do options = {strings_as_keys: true} data = SmarterCSV.process("#{fixture_path}/with_dashes.csv", options) diff --git a/spec/smarter_csv/invalid_headers_spec.rb b/spec/features/header_handling/invalid_headers_spec.rb similarity index 65% rename from spec/smarter_csv/invalid_headers_spec.rb rename to spec/features/header_handling/invalid_headers_spec.rb index 9ce05414..420b7877 100644 --- a/spec/smarter_csv/invalid_headers_spec.rb +++ b/spec/features/header_handling/invalid_headers_spec.rb @@ -60,35 +60,30 @@ context 'mapping_keys: exception for missing keys / header names' do subject(:process_file) { SmarterCSV.process("#{fixture_path}/user_import.csv", options) } - let(:options) do - { - required_keys: [:middle_name], - key_mapping: { missing_key: :middle_name}, - } - end - - # we do not expect version 1.8 behavior: - it 'does not raise about the mapped header name when source of key_mapping is missing' do - expect(SmarterCSV).not_to receive(:puts).with a_string_matching(/WARNING.*missing_key/) - expect{ process_file }.not_to raise_exception( - SmarterCSV::MissingKeys, "ERROR: missing attributes: middle_name" - ) - end + context 'when one key_mapping key is missing' do + let(:options) do + { + required_keys: [:middle_name], + key_mapping: { missing_key: :middle_name}, + } + end - # we expect version 1.9 behavior: - it 'raises exception that the header for the key mapping is missing in the file' do - expect(SmarterCSV).not_to receive(:puts).with a_string_matching(/WARNING.*missing_key/) - expect{ process_file }.to raise_exception( - SmarterCSV::KeyMappingError, "ERROR: can not map headers: missing_key" - ) + it 'raises exception that header for the key mapping is missing in file' do + expect(SmarterCSV).not_to receive(:puts).with a_string_matching(/WARNING.*missing_key/) + # we do not expect version 1.8 behavior: + expect{ process_file }.not_to raise_exception( + SmarterCSV::MissingKeys, "ERROR: missing attributes: middle_name" + ) + # we expect version 1.9 behavior: + expect{ process_file }.to raise_exception( + SmarterCSV::KeyMappingError, "ERROR: can not map headers: missing_key" + ) + end end context "when multiple keys are missing" do let(:options) do - { - required_keys: [:middle_name], - key_mapping: { missing_key: :middle_name, other_missing_key: :other }, - } + { key_mapping: { missing_key: :middle_name, other_missing_key: :other } } end it 'raises exception that headers for the key mapping are missing in the file' do @@ -97,30 +92,52 @@ SmarterCSV::KeyMappingError, "ERROR: can not map headers: missing_key, other_missing_key" ) end - end - context "when slience_missing_keys is used" do - it "does not raise an exception when :silence_missing_keys is true" do + it "does not raise any exception when :silence_missing_keys is true" do options[:silence_missing_keys] = true expect(SmarterCSV).not_to receive(:puts).with a_string_matching(/WARNING.*missing_key/) - expect{ process_file }.not_to raise_exception( - SmarterCSV::KeyMappingError, "ERROR: can not map headers: missing_key" - ) + expect{ process_file }.not_to raise_exception + end + end + + context "when slience_missing_keys is used" do + let(:options) do + { + required_keys: [:middle_name], + key_mapping: { missing_key: :middle_name, other_optional_key: :other }, + } + end + + context "when invalid key_mapping is given" do + it "does not raise a KeyMappingError exception when :silence_missing_keys is true" do + options[:silence_missing_keys] = true + expect(SmarterCSV).not_to receive(:puts).with a_string_matching(/WARNING.*missing_key/) + expect{ process_file }.not_to raise_exception SmarterCSV::KeyMappingError + # still raises an error because :middle_name is required + expect{ process_file }.to raise_exception( + SmarterCSV::MissingKeys, "ERROR: missing attributes: middle_name" + ) + end end it "does not raise an exception when :silence_missing_keys is an array containing the missing key" do options[:silence_missing_keys] = [:missing_key, :other_optional_key] expect(SmarterCSV).not_to receive(:puts).with a_string_matching(/WARNING.*missing_key/) expect{ process_file }.not_to raise_exception( - SmarterCSV::KeyMappingError, "ERROR: can not map headers: missing_key" + SmarterCSV::KeyMappingError, "ERROR: can not map headers: missing_key" + ) + # still raises an error because :middle_name is required + expect{ process_file }.to raise_exception( + SmarterCSV::MissingKeys, "ERROR: missing attributes: middle_name" ) end it "raises an exception when :silence_missing_keys is an array but does not contain the missing key" do options[:silence_missing_keys] = [:other_optional_key] expect(SmarterCSV).not_to receive(:puts).with a_string_matching(/WARNING.*missing_key/) + # raises KeyMappingError because :missing_key is required: expect{ process_file }.to raise_exception( - SmarterCSV::KeyMappingError, "ERROR: can not map headers: missing_key" + SmarterCSV::KeyMappingError, "ERROR: can not map headers: missing_key" ) end end diff --git a/spec/smarter_csv/keep_headers_spec.rb b/spec/features/header_handling/keep_headers_spec.rb similarity index 92% rename from spec/smarter_csv/keep_headers_spec.rb rename to spec/features/header_handling/keep_headers_spec.rb index c26c31d9..f2d6a5d9 100644 --- a/spec/smarter_csv/keep_headers_spec.rb +++ b/spec/features/header_handling/keep_headers_spec.rb @@ -4,7 +4,7 @@ fixture_path = 'spec/fixtures' -describe 'be_able_to' do +describe ':keep_original_headers option' do it 'not_downcase_headers' do options = {keep_original_headers: true} data = SmarterCSV.process("#{fixture_path}/basic.csv", options) diff --git a/spec/smarter_csv/key_mapping_spec.rb b/spec/features/header_handling/key_mapping_spec.rb similarity index 100% rename from spec/smarter_csv/key_mapping_spec.rb rename to spec/features/header_handling/key_mapping_spec.rb diff --git a/spec/smarter_csv/no_header_spec.rb b/spec/features/header_handling/no_header_spec.rb similarity index 100% rename from spec/smarter_csv/no_header_spec.rb rename to spec/features/header_handling/no_header_spec.rb diff --git a/spec/smarter_csv/not_downcase_header_spec.rb b/spec/features/header_handling/not_downcase_header_spec.rb similarity index 93% rename from spec/smarter_csv/not_downcase_header_spec.rb rename to spec/features/header_handling/not_downcase_header_spec.rb index 7787206a..2124a718 100644 --- a/spec/smarter_csv/not_downcase_header_spec.rb +++ b/spec/features/header_handling/not_downcase_header_spec.rb @@ -4,7 +4,7 @@ fixture_path = 'spec/fixtures' -describe 'be_able_to' do +describe ':downcase_header option' do it 'not_downcase_headers' do options = {downcase_header: false} data = SmarterCSV.process("#{fixture_path}/basic.csv", options) diff --git a/spec/smarter_csv/required_headers_spec.rb b/spec/features/header_handling/required_headers_spec.rb similarity index 100% rename from spec/smarter_csv/required_headers_spec.rb rename to spec/features/header_handling/required_headers_spec.rb diff --git a/spec/smarter_csv/silence_missing_keys_spec.rb b/spec/features/header_handling/silence_missing_keys_spec.rb similarity index 100% rename from spec/smarter_csv/silence_missing_keys_spec.rb rename to spec/features/header_handling/silence_missing_keys_spec.rb diff --git a/spec/smarter_csv/strings_as_keys_spec.rb b/spec/features/header_handling/strings_as_keys_spec.rb similarity index 86% rename from spec/smarter_csv/strings_as_keys_spec.rb rename to spec/features/header_handling/strings_as_keys_spec.rb index dc2b2ff2..5bf1266d 100644 --- a/spec/smarter_csv/strings_as_keys_spec.rb +++ b/spec/features/header_handling/strings_as_keys_spec.rb @@ -4,8 +4,8 @@ fixture_path = 'spec/fixtures' -describe 'be_able_to' do - it 'use_strings_as_keys' do +describe ':strings_as_keys option' do + it 'uses strings as hash keys' do options = {strings_as_keys: true} data = SmarterCSV.process("#{fixture_path}/basic.csv", options) expect(data.size).to eq 5 diff --git a/spec/smarter_csv/strip_chars_from_headers_spec.rb b/spec/features/header_handling/strip_chars_from_headers_spec.rb similarity index 84% rename from spec/smarter_csv/strip_chars_from_headers_spec.rb rename to spec/features/header_handling/strip_chars_from_headers_spec.rb index fecafc78..db904d73 100644 --- a/spec/smarter_csv/strip_chars_from_headers_spec.rb +++ b/spec/features/header_handling/strip_chars_from_headers_spec.rb @@ -4,8 +4,8 @@ fixture_path = 'spec/fixtures' -describe 'be_able_to' do - it 'strip_whitespace_from_headers' do +describe ':strip_chars_from_headers option' do + it 'strips whitespece from headers' do options = {strip_chars_from_headers: ' '} data = SmarterCSV.process("#{fixture_path}/basic.csv", options) expect(data.size).to eq 5 diff --git a/spec/smarter_csv/ignore_comments_spec.rb b/spec/features/ignore_comments_spec.rb similarity index 97% rename from spec/smarter_csv/ignore_comments_spec.rb rename to spec/features/ignore_comments_spec.rb index 3e48158d..956774a5 100644 --- a/spec/smarter_csv/ignore_comments_spec.rb +++ b/spec/features/ignore_comments_spec.rb @@ -4,7 +4,7 @@ fixture_path = 'spec/fixtures' -describe 'be_able_to' do +describe ':comment_regexp option' do it 'by default does not ignore comments in CSV files' do options = {} data = SmarterCSV.process("#{fixture_path}/ignore_comments.csv", options) diff --git a/spec/smarter_csv/escaped_quote_chars_spec.rb b/spec/features/quotes/escaped_quote_chars_spec.rb similarity index 100% rename from spec/smarter_csv/escaped_quote_chars_spec.rb rename to spec/features/quotes/escaped_quote_chars_spec.rb diff --git a/spec/smarter_csv/quoted_spec.rb b/spec/features/quotes/quoted_spec.rb similarity index 96% rename from spec/smarter_csv/quoted_spec.rb rename to spec/features/quotes/quoted_spec.rb index 41acc05a..4ee627a3 100644 --- a/spec/smarter_csv/quoted_spec.rb +++ b/spec/features/quotes/quoted_spec.rb @@ -20,8 +20,8 @@ expect(data[3][:description]).to eq 'MUST SELL! air, moon roof, loaded' data.each do |h| expect(h[:year].class).to eq Integer - expect(h[:make]).to_not be_nil - expect(h[:model]).to_not be_nil + expect(h[:make]).not_to be_nil + expect(h[:model]).not_to be_nil expect(h[:price].class).to eq Float end end diff --git a/spec/smarter_csv/skip_lines_spec.rb b/spec/features/skip_lines_spec.rb similarity index 95% rename from spec/smarter_csv/skip_lines_spec.rb rename to spec/features/skip_lines_spec.rb index d37a4ab1..6b19e380 100644 --- a/spec/smarter_csv/skip_lines_spec.rb +++ b/spec/features/skip_lines_spec.rb @@ -4,7 +4,7 @@ fixture_path = 'spec/fixtures' -describe 'be_able_to' do +describe ':skip_lines option' do it 'loads_csv_file_skipping_lines' do options = {skip_lines: 3} data = SmarterCSV.process("#{fixture_path}/skip_lines.csv", options) diff --git a/spec/smarter_csv/hard_sample_spec.rb b/spec/features/special_cases/hard_sample_spec.rb similarity index 100% rename from spec/smarter_csv/hard_sample_spec.rb rename to spec/features/special_cases/hard_sample_spec.rb diff --git a/spec/smarter_csv/malformed_spec.rb b/spec/features/special_cases/malformed_spec.rb similarity index 100% rename from spec/smarter_csv/malformed_spec.rb rename to spec/features/special_cases/malformed_spec.rb diff --git a/spec/smarter_csv/problematic_spec.rb b/spec/features/special_cases/problematic_spec.rb similarity index 100% rename from spec/smarter_csv/problematic_spec.rb rename to spec/features/special_cases/problematic_spec.rb diff --git a/spec/smarter_csv/trading_spec.rb b/spec/features/special_cases/trading_spec.rb similarity index 100% rename from spec/smarter_csv/trading_spec.rb rename to spec/features/special_cases/trading_spec.rb diff --git a/spec/smarter_csv/basic_spec.rb b/spec/smarter_csv/basic_spec.rb deleted file mode 100644 index d10f65b7..00000000 --- a/spec/smarter_csv/basic_spec.rb +++ /dev/null @@ -1,93 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -fixture_path = 'spec/fixtures' - -[true, false].each do |bool| - describe "fulfills basic tests with#{bool ? ' C-' : 'out '}acceleration" do - let(:options) { { acceleration: bool } } - - describe 'basic CSV processing' do - # works only when testing locally - unless ENV['CI'] - it 'compiles the acceleration' do - expect(SmarterCSV.has_acceleration?).to eq true - end - end - - it 'loads_basic_csv_file' do - data = SmarterCSV.process("#{fixture_path}/basic.csv", options) - expect(data.size).to eq 5 - - data.each do |h| - h.each_key do |key| - # all the keys should be symbols - expect(key.class).to eq Symbol - - expect(%i[first_name last_name dogs cats birds fish]).to include(key) - end - expect(h.size).to be <= 6 - end - end - - it 'loads_basic_csv_file from Rails' do - stub_const('Rails', true) - data = SmarterCSV.process("#{fixture_path}/basic.csv", options) - expect(data.size).to eq 5 - - data.each do |h| - h.each_key do |key| - # all the keys should be symbols - expect(key.class).to eq Symbol - - expect(%i[first_name last_name dogs cats birds fish]).to include(key) - end - expect(h.size).to be <= 6 - end - end - - context 'with full user_provided_headers' do - let(:options) { super().merge({user_provided_headers: %i[a b c d e f]}) } - - it 'replaces headers with user_provided_headers' do - data = SmarterCSV.process("#{fixture_path}/basic.csv", options) - expect(data.size).to eq 5 - - expect(SmarterCSV.raw_header).to eq "First Name,Last Name,Dogs,Cats,Birds,Fish\n" - expect(SmarterCSV.headers).to eq %i[a b c d e f] - end - end - - context 'with partial user_provided_headers' do - let(:options) { super().merge({user_provided_headers: %i[a b c d e]}) } - - it 'raises an exception if the number of user_provided_headers is incorrect' do - expect do - SmarterCSV.process("#{fixture_path}/basic.csv", options) - end.to raise_exception(SmarterCSV::HeaderSizeMismatch) - end - end - - context 'with empty user_provided_headers' do - let(:options) { super().merge({user_provided_headers: []}) } - - it 'raises an exception if the user_provided_headers is empty' do - expect do - SmarterCSV.process("#{fixture_path}/basic.csv", options) - end.to raise_exception(SmarterCSV::IncorrectOption, /ERROR: incorrect format for user_provided_headers! Expecting array with headers/) - end - end - - context 'with incorrect user_provided_headers' do - let(:options) { super().merge({user_provided_headers: {}}) } - - it 'raises an exception if the user_provided_headers is of incorrect type' do - expect do - SmarterCSV.process("#{fixture_path}/basic.csv", options) - end.to raise_exception(SmarterCSV::IncorrectOption, /ERROR: incorrect format for user_provided_headers! Expecting array with headers/) - end - end - end - end -end diff --git a/spec/smarter_csv/binary_file_spec.rb b/spec/smarter_csv/binary_file_spec.rb deleted file mode 100644 index a9513fba..00000000 --- a/spec/smarter_csv/binary_file_spec.rb +++ /dev/null @@ -1,27 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -fixture_path = 'spec/fixtures' - -# this reads a binary database dump file, which is in structure like a CSV file -# but contains control characters delimiting the rows and columns, and also -# contains a comment section which is commented our by a leading # character - -describe 'be_able_to' do - it 'loads_binary_file_with_comments' do - options = {col_sep: "\cA", row_sep: "\cB", comment_regexp: /^#/} - data = SmarterCSV.process("#{fixture_path}/binary.csv", options) - expect(data.flatten.size).to eq 8 - - data.each do |item| - # all keys should be symbols - item.each_key do |key| - expect(key.class).to eq Symbol - end - expect(item[:timestamp]).to eq 1_381_388_409 - expect(item[:item_id].class).to eq Integer - expect(item[:name].size).to be > 0 - end - end -end diff --git a/spec/smarter_csv/close_file_spec.rb b/spec/smarter_csv/close_file_spec.rb index 97c4e7f2..cf6b6eb8 100644 --- a/spec/smarter_csv/close_file_spec.rb +++ b/spec/smarter_csv/close_file_spec.rb @@ -4,7 +4,7 @@ fixture_path = 'spec/fixtures' -describe 'be_able_to' do +describe 'file operations' do it 'close file after using it' do options = {col_sep: "\cA", row_sep: "\cB", comment_regexp: /^#/, strings_as_keys: true} diff --git a/spec/smarter_csv/misc_spec.rb b/spec/smarter_csv/misc_spec.rb deleted file mode 100644 index 353f4597..00000000 --- a/spec/smarter_csv/misc_spec.rb +++ /dev/null @@ -1,27 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -describe 'misc functionality' do - describe 'elem_blank?' do - it 'returns true for nil' do - expect(SmarterCSV.send(:elem_blank?, nil)).to eq true - end - - it 'returns true for ""' do - expect(SmarterCSV.send(:elem_blank?, "")).to eq true - end - - it 'returns true for "\t \r\n\t"' do - expect(SmarterCSV.send(:elem_blank?, "\t \r\n\t")).to eq true - end - - it 'returns false for "a"' do - expect(SmarterCSV.send(:elem_blank?, "a")).to eq false - end - - it 'returns false for 1234' do - expect(SmarterCSV.send(:elem_blank?, 1234)).to eq false - end - end -end diff --git a/spec/smarter_csv/validations_spec.rb b/spec/smarter_csv/option_validations_spec.rb similarity index 95% rename from spec/smarter_csv/validations_spec.rb rename to spec/smarter_csv/option_validations_spec.rb index 6d2c3a04..9f070944 100644 --- a/spec/smarter_csv/validations_spec.rb +++ b/spec/smarter_csv/option_validations_spec.rb @@ -4,7 +4,7 @@ fixture_path = 'spec/fixtures' -describe 'validations' do +describe 'option validations' do let(:options) { {} } it 'loads basic csv file without issues' do diff --git a/spec/smarter_csv/options_processing_spec.rb b/spec/smarter_csv/options_processing_spec.rb index 816746e2..61b65732 100644 --- a/spec/smarter_csv/options_processing_spec.rb +++ b/spec/smarter_csv/options_processing_spec.rb @@ -31,31 +31,29 @@ end describe '#validate_options!' do - it 'raises an exception for row_sep' do - expect do - invalid_options = { - row_sep: nil, - } - SmarterCSV.process_options(invalid_options) - end.to raise_exception(SmarterCSV::ValidationError, '["invalid row_sep"]') - end - - it 'raises an exception for col_sep' do - expect do - invalid_options = { - col_sep: nil, - } - SmarterCSV.process_options(invalid_options) - end.to raise_exception(SmarterCSV::ValidationError, '["invalid col_sep"]') - end + [:row_sep, :col_sep, :quote_char].each do |opt| + # empty values + [nil, ''].each do |val| + context "with invalid value #{val}" do + it "raises an exception for #{opt} set #{val}" do + expect do + invalid_options = { + opt => val, + } + SmarterCSV.process_options(invalid_options) + end.to raise_exception(SmarterCSV::ValidationError, "[\"invalid #{opt}\"]") + end + end + end - it 'raises an exception for quote_char' do - expect do - invalid_options = { - quote_char: nil, - } - SmarterCSV.process_options(invalid_options) - end.to raise_exception(SmarterCSV::ValidationError, '["invalid quote_char"]') + it "does not raise an exception for #{opt} set non-empty" do + expect do + invalid_options = { + opt => ' ', + } + SmarterCSV.process_options(invalid_options) + end.not_to raise_exception + end end end diff --git a/spec/smarter_csv/parse/README.md b/spec/smarter_csv/parse/README.md index e3c7dccc..6ef50f30 100644 --- a/spec/smarter_csv/parse/README.md +++ b/spec/smarter_csv/parse/README.md @@ -5,4 +5,6 @@ when testing `parse` methods: * make sure to always pass all options to the 'parse' methods, incl. acceleration -* always wrap tests, so that both accelerated and un-accelerated code-paths are run +* always wrap tests, so that both accelerated and un-accelerated code-paths are run, + because the purpose of these tests is to ensure that both accelerated and unaccelerated + code paths are behaving identically. diff --git a/spec/smarter_csv/parse/column_separator_spec.rb b/spec/smarter_csv/parse/column_separator_spec.rb index c813862c..eb8ecaf4 100644 --- a/spec/smarter_csv/parse/column_separator_spec.rb +++ b/spec/smarter_csv/parse/column_separator_spec.rb @@ -1,5 +1,17 @@ # frozen_string_literal: true +# ------------------------------------------------------------------------------------------ +# when testing `parse` methods: +# +# * SmarterCSV.default_options are not loaded when testing `parse` methods by themselves +# +# * make sure to always pass all options to the 'parse' methods, incl. acceleration +# +# * always wrap tests, so that both accelerated and un-accelerated code-paths are run, +# because the purpose of these tests is to ensure that both accelerated and unaccelerated +# code paths are behaving identically. +# ------------------------------------------------------------------------------------------ + require 'spec_helper' [true, false].each do |bool| diff --git a/spec/smarter_csv/parse/max_size_spec.rb b/spec/smarter_csv/parse/max_size_spec.rb index c4f8ce33..fa2f7e44 100644 --- a/spec/smarter_csv/parse/max_size_spec.rb +++ b/spec/smarter_csv/parse/max_size_spec.rb @@ -1,5 +1,17 @@ # frozen_string_literal: true +# ------------------------------------------------------------------------------------------ +# when testing `parse` methods: +# +# * SmarterCSV.default_options are not loaded when testing `parse` methods by themselves +# +# * make sure to always pass all options to the 'parse' methods, incl. acceleration +# +# * always wrap tests, so that both accelerated and un-accelerated code-paths are run, +# because the purpose of these tests is to ensure that both accelerated and unaccelerated +# code paths are behaving identically. +# ------------------------------------------------------------------------------------------ + require 'spec_helper' # the purpose of the max_size parameter is to handle a corner case where diff --git a/spec/smarter_csv/parse/old_csv_library_spec.rb b/spec/smarter_csv/parse/old_csv_library_spec.rb index 16a45cfe..9231f7d4 100644 --- a/spec/smarter_csv/parse/old_csv_library_spec.rb +++ b/spec/smarter_csv/parse/old_csv_library_spec.rb @@ -1,5 +1,17 @@ # frozen_string_literal: true +# ------------------------------------------------------------------------------------------ +# when testing `parse` methods: +# +# * SmarterCSV.default_options are not loaded when testing `parse` methods by themselves +# +# * make sure to always pass all options to the 'parse' methods, incl. acceleration +# +# * always wrap tests, so that both accelerated and un-accelerated code-paths are run, +# because the purpose of these tests is to ensure that both accelerated and unaccelerated +# code paths are behaving identically. +# ------------------------------------------------------------------------------------------ + require 'spec_helper' [true, false].each do |bool| diff --git a/spec/smarter_csv/parse/rfc4180_and_more_spec.rb b/spec/smarter_csv/parse/rfc4180_and_more_spec.rb index b6a4fe48..e315cf2a 100644 --- a/spec/smarter_csv/parse/rfc4180_and_more_spec.rb +++ b/spec/smarter_csv/parse/rfc4180_and_more_spec.rb @@ -1,5 +1,17 @@ # frozen_string_literal: true +# ------------------------------------------------------------------------------------------ +# when testing `parse` methods: +# +# * SmarterCSV.default_options are not loaded when testing `parse` methods by themselves +# +# * make sure to always pass all options to the 'parse' methods, incl. acceleration +# +# * always wrap tests, so that both accelerated and un-accelerated code-paths are run, +# because the purpose of these tests is to ensure that both accelerated and unaccelerated +# code paths are behaving identically. +# ------------------------------------------------------------------------------------------ + require 'spec_helper' [true, false].each do |bool|