From 4ecec9275d15c4fce253fc92a3f1d9bb407c9979 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 11 Jun 2024 11:11:53 -0700 Subject: [PATCH 1/3] don't use names in internal data1 data structure --- R/sparse_character.R | 8 ++++---- R/sparse_double.R | 8 ++++---- R/sparse_integer.R | 8 ++++---- R/sparse_logical.R | 8 ++++---- src/altrep-sparse-double.c | 7 ------- src/altrep-sparse-integer.c | 7 ------- src/altrep-sparse-logical.c | 7 ------- src/altrep-sparse-string.c | 7 ------- 8 files changed, 16 insertions(+), 44 deletions(-) diff --git a/R/sparse_character.R b/R/sparse_character.R index c0d1d59..6e87272 100644 --- a/R/sparse_character.R +++ b/R/sparse_character.R @@ -71,10 +71,10 @@ sparse_character <- function(values, positions, length, default = "") { new_sparse_character <- function(values, positions, length, default) { x <- list( - val = values, - pos = positions, - len = length, - default = default + values, + positions, + length, + default ) .Call(ffi_altrep_new_sparse_string, x) diff --git a/R/sparse_double.R b/R/sparse_double.R index bb962c7..e73b4db 100644 --- a/R/sparse_double.R +++ b/R/sparse_double.R @@ -82,10 +82,10 @@ sparse_double <- function(values, positions, length, default = 0) { new_sparse_double <- function(values, positions, length, default) { x <- list( - val = values, - pos = positions, - len = length, - default = default + values, + positions, + length, + default ) .Call(ffi_altrep_new_sparse_double, x) diff --git a/R/sparse_integer.R b/R/sparse_integer.R index dfbfb35..7e6ffa4 100644 --- a/R/sparse_integer.R +++ b/R/sparse_integer.R @@ -82,10 +82,10 @@ sparse_integer <- function(values, positions, length, default = 0L) { new_sparse_integer <- function(values, positions, length, default) { x <- list( - val = values, - pos = positions, - len = length, - default = default + values, + positions, + length, + default ) .Call(ffi_altrep_new_sparse_integer, x) diff --git a/R/sparse_logical.R b/R/sparse_logical.R index 7ed4052..b1082da 100644 --- a/R/sparse_logical.R +++ b/R/sparse_logical.R @@ -69,10 +69,10 @@ sparse_logical <- function(values, positions, length, default = FALSE) { new_sparse_logical <- function(values, positions, length, default) { x <- list( - val = values, - pos = positions, - len = length, - default = default + values, + positions, + length, + default ) .Call(ffi_altrep_new_sparse_logical, x) diff --git a/src/altrep-sparse-double.c b/src/altrep-sparse-double.c index 0ed4cf8..9c858c9 100644 --- a/src/altrep-sparse-double.c +++ b/src/altrep-sparse-double.c @@ -140,13 +140,6 @@ static SEXP altrep_sparse_double_Extract_subset(SEXP x, SEXP indx, SEXP call) { SEXP out_default = extract_default(x); SET_VECTOR_ELT(out, 3, out_default); - SEXP names = Rf_allocVector(STRSXP, 4); - Rf_setAttrib(out, R_NamesSymbol, names); - SET_STRING_ELT(names, 0, Rf_mkChar("val")); - SET_STRING_ELT(names, 1, Rf_mkChar("pos")); - SET_STRING_ELT(names, 2, Rf_mkChar("len")); - SET_STRING_ELT(names, 3, Rf_mkChar("default")); - R_xlen_t i_out = 0; for (R_xlen_t i = 0; i < size; ++i) { diff --git a/src/altrep-sparse-integer.c b/src/altrep-sparse-integer.c index 3d2bd7f..d092301 100644 --- a/src/altrep-sparse-integer.c +++ b/src/altrep-sparse-integer.c @@ -140,13 +140,6 @@ static SEXP altrep_sparse_integer_Extract_subset(SEXP x, SEXP indx, SEXP call) { SEXP out_default = extract_default(x); SET_VECTOR_ELT(out, 3, out_default); - SEXP names = Rf_allocVector(STRSXP, 4); - Rf_setAttrib(out, R_NamesSymbol, names); - SET_STRING_ELT(names, 0, Rf_mkChar("val")); - SET_STRING_ELT(names, 1, Rf_mkChar("pos")); - SET_STRING_ELT(names, 2, Rf_mkChar("len")); - SET_STRING_ELT(names, 3, Rf_mkChar("default")); - R_xlen_t i_out = 0; for (R_xlen_t i = 0; i < size; ++i) { diff --git a/src/altrep-sparse-logical.c b/src/altrep-sparse-logical.c index 879d3dc..5f7b89f 100644 --- a/src/altrep-sparse-logical.c +++ b/src/altrep-sparse-logical.c @@ -140,13 +140,6 @@ static SEXP altrep_sparse_logical_Extract_subset(SEXP x, SEXP indx, SEXP call) { SEXP out_default = extract_default(x); SET_VECTOR_ELT(out, 3, out_default); - SEXP names = Rf_allocVector(STRSXP, 4); - Rf_setAttrib(out, R_NamesSymbol, names); - SET_STRING_ELT(names, 0, Rf_mkChar("val")); - SET_STRING_ELT(names, 1, Rf_mkChar("pos")); - SET_STRING_ELT(names, 2, Rf_mkChar("len")); - SET_STRING_ELT(names, 3, Rf_mkChar("default")); - R_xlen_t i_out = 0; for (R_xlen_t i = 0; i < size; ++i) { diff --git a/src/altrep-sparse-string.c b/src/altrep-sparse-string.c index 7319ca4..7479c07 100644 --- a/src/altrep-sparse-string.c +++ b/src/altrep-sparse-string.c @@ -137,13 +137,6 @@ static SEXP altrep_sparse_string_Extract_subset(SEXP x, SEXP indx, SEXP call) { SEXP out_default = extract_default(x); SET_VECTOR_ELT(out, 3, out_default); - SEXP names = Rf_allocVector(STRSXP, 4); - Rf_setAttrib(out, R_NamesSymbol, names); - SET_STRING_ELT(names, 0, Rf_mkChar("val")); - SET_STRING_ELT(names, 1, Rf_mkChar("pos")); - SET_STRING_ELT(names, 2, Rf_mkChar("len")); - SET_STRING_ELT(names, 3, Rf_mkChar("default")); - R_xlen_t i_out = 0; for (R_xlen_t i = 0; i < size; ++i) { From 09d0a5313df98a374fabea2cfa0aeb75fd935a29 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 11 Jun 2024 11:38:44 -0700 Subject: [PATCH 2/3] update vignette about smaller minimum object size --- vignettes/articles/when-to-use.Rmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vignettes/articles/when-to-use.Rmd b/vignettes/articles/when-to-use.Rmd index b2617d0..847de0d 100644 --- a/vignettes/articles/when-to-use.Rmd +++ b/vignettes/articles/when-to-use.Rmd @@ -40,7 +40,7 @@ obj_size(integer(2000)) obj_size(integer(3000)) ``` -The vectors above only contained the value `0`. We can replicate that sparsely with `sparse_integer(integer(), integer(), length = 0)`. We see that the size of a 0-length sparse integer vector has a size of `1304 B = 1.3 kB`. +The vectors above only contained the value `0`. We can replicate that sparsely with `sparse_integer(integer(), integer(), length = 0)`. We see that the size of a 0-length sparse integer vector has a size of `888 B`. ```{r} obj_size(sparse_integer(integer(), integer(), length = 0)) @@ -80,9 +80,9 @@ sparse_x <- sparse_integer(1:200, 1:200, 1000) obj_size(sparse_x) ``` -So it all comes down to a trade-off. Dense integer vectors with a size of 313 or less will be smaller than their sparse counterparts no matter what. Dense integer vector vectors with 314 elements will take up the same amount of memory as their sparse counterpart with no values. +So it all comes down to a trade-off. Dense integer vectors with a size of 210 or less will be smaller than their sparse counterparts no matter what. Dense integer vector vectors with 211 elements will take up the same amount of memory as their sparse counterpart with no values. -From these values we can calculate memory equivalent vectors to determine which would be more efficient, noting that sparse vectors increase in size by twice for each non-default value that their dense counterpart. For a vector of length 1000, the sparse vector will be equivalent in size if it has `343` non-default values. And these values continue to go up. +From these values we can calculate memory equivalent vectors to determine which would be more efficient, noting that sparse vectors increase in size by twice for each non-default value that their dense counterpart. For a vector of length 1000, the sparse vector will be equivalent in size if it has `210` non-default values. And these values continue to go up. ## Conversion speed From 8c0a166c3c24d9532fc1cfca49c71a3e86ad77ea Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 11 Jun 2024 11:51:12 -0700 Subject: [PATCH 3/3] add news --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index d29b4bb..4ef3e4a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # sparsevctrs (development version) +* All sparse vector types now have a significant smaller base object size. (#67) + # sparsevctrs 0.1.0 * Initial CRAN submission.