Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make DataFrame#to_wide fill nil reshaping #288

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion lib/red_amber/data_frame_reshaping.rb
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def to_long(*keep_keys, name: :NAME, value: :VALUE)
#
# @since 0.2.0
#
def to_wide(name: :NAME, value: :VALUE)
def to_wide(name: :NAME, value: :VALUE, fill_missing: false, fill_with: nil)
name = name.to_sym
unless keys.include?(name)
raise DataFrameArgumentError,
Expand All @@ -274,6 +274,22 @@ def to_wide(name: :NAME, value: :VALUE)
h = converts.to_h
hash[keeps.to_h][h[name].to_s.to_sym] = h[value]
end
if fill_missing
names = [].union(*hash.values.collect(&:keys))
hash.each_pair do |ns, vs|
next if vs.length == names.length

keys = vs.keys
values = vs.values
names.each_with_index do |n, i|
next if vs.key?(n)

keys.insert(i, n)
values.insert(i, fill_with)
end
hash[ns] = keys.zip(values).to_h
end
end
ks = hash.first[0].keys + hash.first[1].keys
vs = hash.map { |k, v| k.values + v.values }.transpose
DataFrame.new(ks.zip(vs))
Expand Down
15 changes: 15 additions & 0 deletions test/test_data_frame_reshaping.rb
Original file line number Diff line number Diff line change
Expand Up @@ -128,5 +128,20 @@ class DataFrameReshapingTest < Test::Unit::TestCase
df = @df.rename(NAME: :key1, VALUE: :key2)
assert_equal @str, df.to_wide(name: :key1, value: :key2).to_s
end

test '#to_wide with missing values' do
df = DataFrame.new(
names: %w[name1 name1 name1 name2 name2 name3 name3 name3],
NAME: %w[One Two Three One Three One Two Three],
VALUE: [1.1, 2.1, 3.1, 1.2, 3.2, 1.3, 2.3, 3.3]
)
wide = DataFrame.new(
names: %w[name1 name2 name3],
One: [1.1, 1.2, 1.3],
Two: [2.1, nil, 2.3],
Three: [3.1, 3.2, 3.3]
)
assert_equal wide, df.to_wide(fill_missing: true)
end
end
end