Skip to content

Commit

Permalink
manip_bin_numerics() imporved and ungroup() added
Browse files Browse the repository at this point in the history
  • Loading branch information
erblast committed Jan 13, 2019
1 parent 7bc69a5 commit cf72b93
Show file tree
Hide file tree
Showing 11 changed files with 171 additions and 30 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: easyalluvial
Title: Generate Alluvial Plots with a Single Line of Code
Version: 0.1.7
Version: 0.1.8
Authors@R: person( "Bjoern", "Koneswarakantha", role = c("aut","cre"), email = "[email protected]", comment = c(ORCID = " 0000-0003-4585-7799") )
URL: https://github.com/erblast/easyalluvial
Description: Alluvial plots are similar to sankey diagrams and visualise categorical data
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@ importFrom(purrr,is_bare_numeric)
importFrom(purrr,is_null)
importFrom(rlang,UQ)
importFrom(rlang,quo_is_null)
importFrom(stats,median)
importFrom(stats,var)
importFrom(stringr,str_detect)
importFrom(stringr,str_replace)
importFrom(tibble,is_tibble)
importFrom(tidyr,complete)
importFrom(tidyr,gather)
Expand Down
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@

# Version: 0.1.8, released 20190113
- `dplyr 7.8.0` compatibility
- `vdiffr` checks for plots
- `manip_bin_numerics()` accepts c('median', 'mean', 'cuts', 'min_max') as bin_labels
argument which will be converted to bin label.
- `alluvial_wide()` and `alluvial_long()` do not crash anymore when dataframes are grouped

# 20181118
CRAN submission


4 changes: 4 additions & 0 deletions R/alluvial_long.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ alluvial_long = function( data
value = as.name( value_str )
id = as.name( id_str )

# ungroup

data = ungroup(data)

# fill

if( rlang::quo_is_null(fill) ){
Expand Down
6 changes: 5 additions & 1 deletion R/alluvial_wide.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


if(getRversion() >= "2.15.1"){
utils::globalVariables( c('x', '.', ':=', 'alluvial_id', 'fill_flow', 'fill_value', 'value', 'fill' ) )
utils::globalVariables( c('x', '.', ':=', 'alluvial_id', 'fill_flow', 'fill_value', 'value', 'fill', 'easyalluvialid' ) )
}


Expand Down Expand Up @@ -115,6 +115,10 @@ alluvial_wide = function( data
id_str = quo_name(id)
}

# ungroup

data = ungroup(data)

# remove id from variables

variables = names(data)
Expand Down
120 changes: 101 additions & 19 deletions R/manip.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@
#' @seealso \code{\link[stringr]{str_detect}}
#' @rdname manip_factor_2_numeric
#' @export
#' @importFrom stringr str_detect
#' @importFrom stringr str_detect str_replace
#' @import dplyr
#' @importFrom grDevices boxplot.stats col2rgb rgb
#' @importFrom stats var
#' @importFrom utils head
#' @importFrom stats median
manip_factor_2_numeric = function(vec){

bool = as.character(vec) %>%
Expand All @@ -29,7 +30,7 @@ manip_factor_2_numeric = function(vec){
if( bool ){

vec = vec %>%
as.character %>%
as.character() %>%
as.numeric()

} else{
Expand All @@ -47,27 +48,36 @@ manip_factor_2_numeric = function(vec){
#' @param x dataframe with numeric variables, or numeric vector
#' @param bins number of bins for numerical variables, Default: 5
#' @param bin_labels labels for the bins from low to high, Default: c("LL",
#' "ML", "M", "MH", "HH")#' @param center boolean, Default: T
#' @param scale boolean, Default: T
#' @param center boolean, Default: T
#' @param transform boolean, Default: T
#' "ML", "M", "MH", "HH"). Can also be one of c('mean', 'median', 'min_max',
#' 'cuts'), the corresppnding summary function will supply the labels.
#' @param scale logical, Default: T
#' @param center logical, Default: T
#' @param transform logical, apply Yeo Johnson Transformation, Default: T
#' @param round_numeric, logical, rounds numeric results if bin_labels is
#' supplied with a supported summary function name.
#' @param digits, integer, number of digits to round to
#' @examples
#' summary( mtcars )
#' summary( manip_bin_numerics(mtcars) )
#' summary( manip_bin_numerics(mtcars, bin_labels = 'mean'))
#' summary( manip_bin_numerics(mtcars, bin_labels = 'cuts'
#' , scale = FALSE, center = FALSE, transform = FALSE))
#' @return dataframe
#' @rdname manip_bin_numerics
#' @import recipes
#' @importFrom purrr is_bare_numeric
#' @importFrom tibble is_tibble
#' @export
manip_bin_numerics = function(x
, bins = 5
, bin_labels = c('LL', 'ML', 'M', 'MH', 'HH')
, center = T
, scale = T
, transform = T){


, bins = 5
, bin_labels = c('LL', 'ML', 'M', 'MH', 'HH')
, center = T
, scale = T
, transform = T
, round_numeric = T
, digits = 2 ){

# check if input is vector or dataframe, conv vec to df
if(purrr::is_bare_numeric(x)){
df = tibble( x = x )
input_vector = T
Expand All @@ -80,8 +90,8 @@ manip_bin_numerics = function(x

requireNamespace('recipes')

if( length(bin_labels) != bins ){
stop( 'bin_labes must be equal to bins')
if( length(bin_labels) != bins & ! bin_labels[1] %in% c('median', 'cuts', 'mean', 'min_max') ){
stop( "bin_labels length must be equal to bins or one of c('median', 'cuts', 'mean', 'min_max')")
}

numerics = df %>%
Expand All @@ -93,7 +103,10 @@ manip_bin_numerics = function(x
return( df )
}

rec = recipe(df)
df = mutate(df, easyalluvialid = row_number() )

rec = recipe(df) %>%
add_role( easyalluvialid, new_role = 'id variable')

if( center ) rec = rec %>%
step_center( one_of(numerics) )
Expand Down Expand Up @@ -121,9 +134,78 @@ manip_bin_numerics = function(x
, min(boxplot.stats(x)$stats)
, x)
) %>%
mutate_at( vars(numerics), function(x) cut(x, breaks = bins) ) %>%
mutate_at( vars(numerics), rename_levels)

mutate_at( vars(numerics), function(x) cut(x, breaks = bins) )

summary_as_label = function(df, df_old, fun){
# joins df with original dataframe. Groups by segments and calculates
# summary stat using the given function. Summary stat replaces segment
# labels

df = df %>%
left_join( select(df_old, one_of( c(numerics, 'easyalluvialid') ) ), by = 'easyalluvialid')

for(num in numerics){
df = df %>%
group_by( !! as.name( paste0(num, '.x') ) ) %>%
mutate( !! as.name( paste0(num, '.y') ) := fun( !! as.name( paste0(num, '.y') ) ) )
}

df = df %>%
ungroup() %>%
select( - ends_with('.x') )

if(round_numeric){

df = df %>%
mutate_if( is.numeric, round, digits = digits )
}

df = df %>%
mutate_if(is.numeric, as.factor ) %>%
rename_at( vars( ends_with('.y') ) , .funs = function(x) str_replace(x, '\\.y$', '') )

return(df)

}

if( length(bin_labels) == bins ){

data_new = data_new %>%
mutate_at( vars(numerics), rename_levels)

}else if( bin_labels == 'median'){
data_new = summary_as_label(data_new, df_old = df, fun = median)
}else if( bin_labels == 'mean'){
data_new = summary_as_label(data_new, df_old = df, fun = mean)
}else if( bin_labels == 'min_max'){
df_min = summary_as_label(data_new, df_old = df, fun = min)
df_max = summary_as_label(data_new, df_old = df, fun = max)

join_by = names(df_min)[! names(df_min) %in% numerics ]
join_by = c(join_by, 'easyalluvialid')

data_new = df_min %>%
left_join(df_max, by = join_by )

for(num in numerics){

sym_min = as.name( paste0(num, '.x') )
sym_max = as.name( paste0(num, '.y') )

data_new = data_new %>%
arrange( !! sym_min ) %>%
mutate( !! as.name( num ) := map2_chr( !! sym_max, !! sym_min, function(x,y) paste(x,'-\n',y) ) ) %>%
mutate( !! as.name( num ) := as_factor(!! as.name( num ) ) )
}

data_new = data_new %>%
select( -ends_with('.x'), -ends_with('.y') )
}


#remove easyalluvialid
data_new = select(data_new, - easyalluvialid)

if( input_vector ){
return( data_new$x )
}else{
Expand Down
1 change: 1 addition & 0 deletions easyalluvial.Rproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ LaTeX: pdfLaTeX
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
20 changes: 15 additions & 5 deletions man/manip_bin_numerics.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions tests/testthat/test_alluvial_long.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ test_that('alluvial_long'
})

expect_warning( alluvial_long( data_highflow, key, value, id ) )

#gouped df
p = alluvial_long( group_by(data, carrier), key = qu, value = mean_arr_delay, id = tailnum)


})

Expand Down
9 changes: 6 additions & 3 deletions tests/testthat/test_alluvial_wide.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ test_that('alluvial_wide'
, max_variables = max_variables
, fill_by = 'first_variable' )


vdiffr::expect_doppelganger('wide_first', p)

p = alluvial_wide( data = data
Expand Down Expand Up @@ -110,8 +109,12 @@ test_that('alluvial_wide'

expect_warning( alluvial_wide( data = ggplot2::diamonds) )

alluvial_wide(data, max_variables = 3, col_vector_flow = c('red', 'green', 'orange', 'yellow', 'blue')
, col_vector_value = c('red', 'green', 'orange', 'yellow', 'blue'), fill_by = 'last_variable' )
# alluvial_wide(data, max_variables = 3, col_vector_flow = c('red', 'green', 'orange', 'yellow', 'blue')
# , col_vector_value = c('red', 'green', 'orange', 'yellow', 'blue'), fill_by = 'last_variable' )

#gouped df

p = alluvial_wide( group_by(data, cylinders) )

})

Expand Down
24 changes: 23 additions & 1 deletion tests/testthat/test_manip.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,29 @@ test_that('manip_bin_numerics'
expect_true( is_empty(numerics) )
expect_true( ! is_empty(data_new) )
expect_identical( names(data_new) , names(data) )

expect_true( ! 'easyalluvialid' %in% names(data_new) )

bins_from_vec = manip_bin_numerics(data$disp)
expect_equal( levels(bins_from_vec), c("LL", "ML", "M", "MH", "HH") )

data_new_cuts = manip_bin_numerics(data, bin_labels = 'cuts')

data_new_median = manip_bin_numerics(data, bin_labels = 'median')

data_new_mean = manip_bin_numerics(data, bin_labels = 'mean')

data_new_min_max = manip_bin_numerics(data, bin_labels = 'min_max')

# p1 = alluvial_wide(data)
#
# p2 = alluvial_wide(data, bin_labels = 'min_max')
#
# gridExtra::grid.arrange(p1,p2)

expect_false( identical(data_new_cuts, data_new_median) )

expect_false( identical(data_new_mean, data_new_median) )

})


Expand Down

0 comments on commit cf72b93

Please sign in to comment.