diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..4106b06 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,50 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + error-on: error diff --git a/.github/workflows/rworkflows.yml b/.github/workflows/rworkflows.yml index 94d43eb..dd18193 100644 --- a/.github/workflows/rworkflows.yml +++ b/.github/workflows/rworkflows.yml @@ -54,4 +54,4 @@ jobs: DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} runner_os: ${{ runner.os }} cache_version: cache-v1 - docker_registry: ghcr.io + docker_registry: ghcr.io \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 26dfcfb..b15712f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -68,6 +68,6 @@ biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, SingleCell, GeneExpression, Normalization, Clustering, QualityControl, Sequencing Encoding: UTF-8 -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 URL: https://github.com/stemangiola/tidySingleCellExperiment BugReports: https://github.com/stemangiola/tidySingleCellExperiment/issues diff --git a/NAMESPACE b/NAMESPACE index 45635a7..9c9853c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,11 +49,13 @@ export(tidy) export(unnest_single_cell_experiment) exportMethods(aggregate_cells) exportMethods(join_features) +importFrom(MASS,rnegbin) importFrom(Matrix,rowSums) importFrom(S4Vectors,"metadata<-") importFrom(S4Vectors,DataFrame) importFrom(S4Vectors,metadata) importFrom(S4Vectors,split) +importFrom(SingleCellExperiment,altExpNames) importFrom(SingleCellExperiment,cbind) importFrom(SingleCellExperiment,reducedDims) importFrom(SummarizedExperiment,"assays<-") @@ -74,11 +76,13 @@ importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,group_by) importFrom(dplyr,group_by_drop_default) +importFrom(dplyr,group_keys) importFrom(dplyr,group_rows) importFrom(dplyr,group_split) importFrom(dplyr,inner_join) importFrom(dplyr,left_join) importFrom(dplyr,mutate) +importFrom(dplyr,pick) importFrom(dplyr,pull) importFrom(dplyr,rename) importFrom(dplyr,right_join) @@ -113,10 +117,12 @@ importFrom(pillar,tbl_format_header) importFrom(pkgconfig,get_config) importFrom(purrr,as_mapper) importFrom(purrr,imap) +importFrom(purrr,list_transpose) importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map_chr) importFrom(purrr,reduce) +importFrom(purrr,set_names) importFrom(purrr,when) importFrom(rlang,":=") importFrom(rlang,dots_values) diff --git a/R/methods.R b/R/methods.R index c4ffc04..757e41c 100755 --- a/R/methods.R +++ b/R/methods.R @@ -41,23 +41,26 @@ setMethod("join_features", "SingleCellExperiment", function(.data, # CRAN Note .cell <- NULL .feature <- NULL - + # Get 'assays' from function arguments list + arg_list <- c(mget(ls(environment(), sorted=F)), match.call(expand.dots=F)$...) + all_assays <- get_all_assays(.data)$assay_id + if(is.null(arg_list$assays)) assays_from_join_call <- all_assays # Shape is long if (shape == "long") { - # Suppress generic data frame creation message produced by left_join suppressMessages({ - .data <- - .data %>% - left_join( - by=c_(.data)$name, - get_abundance_sc_long( - .data=.data, - features=features, - all=all, - exclude_zeros=exclude_zeros)) %>% - select(!!c_(.data)$symbol, .feature, - contains(".abundance"), everything()) + .data <- + .data %>% + left_join( + by=c_(.data)$name, + get_abundance_sc_long( + .data=.data, + features=features, + all=all, + exclude_zeros=exclude_zeros, + ...)) %>% + select(!!c_(.data)$symbol, .feature, + contains(".abundance"), everything()) }) # Provide data frame creation and abundance column message @@ -79,14 +82,16 @@ setMethod("join_features", "SingleCellExperiment", function(.data, .data # Shape if wide - } else { + } else if (shape == "wide"){ + if(is.null(arg_list$assays)) stop("Please provide assays") .data %>% left_join( by=c_(.data)$name, get_abundance_sc_wide( .data=.data, features=features, - all=all, ...)) + all=all, + ...)) } }) @@ -138,86 +143,173 @@ tidy.SingleCellExperiment <- function(object) { #' @importFrom SummarizedExperiment assays assays<- assayNames #' @importFrom S4Vectors split #' @importFrom stringr str_remove +#' @importFrom dplyr full_join +#' @importFrom dplyr left_join +#' @importFrom dplyr group_by +#' @importFrom dplyr pick +#' @importFrom dplyr group_keys +#' @importFrom dplyr group_rows #' @importFrom dplyr group_split +#' @importFrom dplyr pull +#' @importFrom tidyr unite +#' @importFrom tidyr separate +#' @importFrom purrr reduce +#' @importFrom purrr map +#' @importFrom purrr set_names +#' @importFrom purrr list_transpose #' #' #' @export -setMethod("aggregate_cells", "SingleCellExperiment", function(.data, - .sample=NULL, slot="data", assays=NULL, - aggregation_function=Matrix::rowSums, - ...) { - - # Fix NOTEs - feature <- NULL - .sample <- enquo(.sample) - - # Subset only wanted assays - if (!is.null(assays)) { - assays(.data) <- assays(.data)[assays] - } - - - grouping_factor = - .data |> - colData() |> - as_tibble() |> - select(!!.sample) |> - suppressMessages() |> - unite("my_id_to_split_by___", !!.sample, sep = "___") |> - pull(my_id_to_split_by___) |> - as.factor() - - list_count_cells = table(grouping_factor) |> as.list() +setMethod("aggregate_cells", "SingleCellExperiment", function(.data, + .sample = NULL, assays = NULL, + aggregation_function = Matrix::rowSums, + ...) { + # Fix NOTEs + feature <- NULL + .sample <- enquo(.sample) - # New method - list_assays = - .data |> - assays() |> - as.list() |> - map(~ .x |> splitColData(grouping_factor)) |> - unlist(recursive=FALSE) + arg_list <- c(mget(ls(environment(), sorted = F)), match.call(expand.dots = F)$...) + assays_to_use <- eval(arg_list$assays) + if (is.null(assays_to_use)) assays_to_use <- tail(names(assays(.data)), n = 1) + + # Get information on sample groups + sample_groups <- .data |> + as_tibble() |> + group_by(pick({{ .sample }})) - list_assays = - list_assays |> - map2(names(list_assays), ~ { - # Get counts - .x %>% - aggregation_function(na.rm=TRUE) %>% - enframe( - name =".feature", - value="x") %>% # sprintf("%s", .y)) %>% + sample_group_idx <- sample_groups |> + group_rows() - # In case we don't have rownames - mutate(.feature=as.character(.feature)) - }) |> - enframe(name = ".sample") |> + sample_group_keys <- sample_groups |> + group_keys() - # Clean groups - mutate(assay_name = assayNames(!!.data) |> rep(each=length(levels(grouping_factor)))) |> - mutate(.sample = .sample |> str_remove(assay_name) |> str_remove("\\.")) |> - group_split(.sample) |> - map(~ .x |> unnest(value) |> pivot_wider(names_from = assay_name, values_from = x) ) |> + .sample_names <- colnames(sample_group_keys) - # Add cell count - map2( - list_count_cells, - ~ .x |> mutate(.aggregated_cells = .y) - ) + grouping_factor_names <- sample_group_keys |> + unite(col = "grouping_factor", !!.sample, sep = "___") |> + pull(grouping_factor) + + # Split sce object by groups + sce_split <- map(.x = seq_along(sample_group_idx), .f = \(.num) .data[, sample_group_idx[[.num]]]) |> + purrr::set_names(grouping_factor_names) + grouping_factor <- + .data |> + colData() |> + as_tibble() |> + select(!!.sample) |> + suppressMessages() |> + unite("my_id_to_split_by___", !!.sample, sep = "___") |> + pull(my_id_to_split_by___) |> + as.factor() + + # Add count of aggregated cells + list_count_cells <- table(grouping_factor) |> + enframe(name = "grouping_factor", value = ".aggregated_cells") |> + mutate(.aggregated_cells = as.integer(.aggregated_cells)) - do.call(rbind, list_assays) |> + # Subset features based on selected assays + feature_df <- get_all_features(.data) + selected_features <- feature_df[feature_df$assay_id %in% assays_to_use, ] + selected_experiments_list <- split(x = selected_features, f = as.character(selected_features$exp_id)) + if ("RNA" %in% names(selected_experiments_list)) selected_experiments_list <- selected_experiments_list[c("RNA", setdiff(names(selected_experiments_list), "RNA"))] - left_join( - .data |> - colData() |> - as_tibble() |> - subset(!!.sample) |> - unite("my_id_to_split_by___", !!.sample, remove=FALSE, sep = "___"), - by= join_by(".sample" == "my_id_to_split_by___") - ) |> - - as_SummarizedExperiment( - .sample=.sample, - .transcript=.feature, - .abundance=!!as.symbol(names(.data@assays))) + # Aggregate cells based on selected features from any assay / experiment type. Output is a tibble. + aggregate_assays_fun <- function(exp) { + # Check where the assay data needs to be taken from (main RNA experiment or altExp) + selected_exp <- unique(exp$exp_id) + selected_assays <- exp |> distinct(assay_name, .keep_all = TRUE) + if (selected_exp == "RNA") { + aggregate_sce_fun <- function(sce) { + aggregated_vals <- assays(sce)[selected_assays$assay_name] |> + as.list() |> + map(.f = \(.list) aggregation_function(.list)) + map(.x = seq_along(aggregated_vals), \(.num) enframe(x = aggregated_vals[[.num]], name = ".feature", value = selected_assays$assay_id[[.num]])) |> + suppressMessages(reduce(full_join)) + } + aggregated_list <- lapply(sce_split, aggregate_sce_fun) |> + purrr::list_transpose() |> + map(.f = \(.list) .list |> bind_rows(.id = "grouping_factor")) + interim_res <- map(.x = seq_along(aggregated_list), .f = \(.num) aggregated_list[[.num]] |> + separate(col = grouping_factor, into = .sample_names, sep = "___")) |> + purrr::set_names(nm = selected_exp) + map(.x = seq_along(interim_res), .f = \(.num) interim_res[[.num]] |> mutate(assay_type = names(interim_res)[[.num]])) |> + purrr::reduce(full_join) |> + select(assay_type, everything()) + } else { + # aggregate from altExp + aggregate_sce_fun <- function(sce) { + aggregated_vals <- assays(altExps(sce)[[selected_exp]])[selected_assays$assay_name] |> + as.list() |> + purrr::set_names(selected_assays$assay_id) |> + map(.f = \(.list) aggregation_function(.list)) + map(.x = seq_along(aggregated_vals), \(.num) enframe(x = aggregated_vals[[.num]], name = ".feature", value = selected_assays$assay_id[[.num]])) |> + suppressMessages(reduce(full_join)) + } + aggregated_list <- lapply(sce_split, aggregate_sce_fun) |> + purrr::list_transpose() |> + map(.f = \(.list) .list |> bind_rows(.id = "grouping_factor")) + interim_res <- map(.x = seq_along(aggregated_list), .f = \(.num) aggregated_list[[.num]] |> + separate(col = grouping_factor, into = .sample_names, sep = "___")) |> + purrr::set_names(nm = selected_exp) + map(.x = seq_along(interim_res), .f = \(.num) interim_res[[.num]] |> + mutate(assay_type = names(interim_res)[[.num]])) |> + purrr::reduce(full_join) |> + select(assay_type, everything()) + } + } + + # Join tibbles from each assay / experiment type into a single tibble. + se <- lapply(selected_experiments_list, aggregate_assays_fun) |> + purrr::reduce(full_join) |> + suppressMessages() + + # Sometimes feature names can be duplicated in multiple assays, e.g. CD4 in RNA and ADT. Check for duplication. + any_feat_duplicated <- se |> + distinct(assay_type, .feature) |> + pull(.feature) |> + duplicated() |> + any() + + if(any_feat_duplicated) { + warning("tidySingleCellExperiment says: The selected assays have overlapping feature names. The feature names have been combined with the selected assay_type, to keep the rownames of the SingleCellExperiment unique. You can find the original feature names in the feature_original column of the rowData slot of your object.") + # Extract original feature names for storing. + orig_features <- se |> + distinct(assay_type, .feature) + # Extract which features have duplicated names. + dup_features <- orig_features |> + filter(duplicated(.feature)) |> + pull(.feature) + # Make duplicated feature names unique by combining with assay name and separating with ".." + se <- se |> + mutate(.feature = case_when(.feature %in% dup_features ~ str_c(assay_type, .feature, sep = ".."), .default = .feature)) + } + # Turn tibble into SummarizedExperiment object + se <- se |> + as_SummarizedExperiment( + .sample = .sample_names, + .transcript = .feature, + .abundance = setdiff(colnames(se), c("assay_type", .sample_names, ".feature"))) + # Manually force the assay_type data to live in the rowData slot if it does not already + if(exists("assay_type", where = as.data.frame(colData(se)))) { + rowData(se) <- rownames(se) |> + enframe(name = NULL, value = "rowname") |> + mutate(assay_type = unique(colData(se)$assay_type)) |> + tibble::column_to_rownames() |> + as.data.frame() |> + as(Class = "DataFrame") + colData(se)$assay_type <- NULL + } + # Add original feature name information to the rowData slot + if(any_feat_duplicated) { + rowData(se) <- rowData(se) |> + as.data.frame() |> + rownames_to_column() |> + mutate(feature_original = rowname, + feature_original = str_remove_all(string = feature_original, pattern = ".+(?=\\.\\.)"), + feature_original = str_remove_all(string = feature_original, pattern = "^\\..")) |> + column_to_rownames() |> + as(Class = "DataFrame") + } + return(se) }) diff --git a/R/plotly_methods.R b/R/plotly_methods.R index 2d7a61a..61c76a8 100755 --- a/R/plotly_methods.R +++ b/R/plotly_methods.R @@ -11,26 +11,26 @@ #' @importFrom ttservice plot_ly #' @export plot_ly.SingleCellExperiment <- function(data=data.frame(), - ..., type=NULL, name=NULL, - color=NULL, colors=NULL, alpha=NULL, - stroke=NULL, strokes=NULL, alpha_stroke=1, - size=NULL, sizes=c(10, 100), - span=NULL, spans=c(1, 20), - symbol=NULL, symbols=NULL, - linetype=NULL, linetypes=NULL, - split=NULL, frame=NULL, - width=NULL, height=NULL, source="A") { - data %>% - # This is a trick to not loop the call - as_tibble() %>% - ttservice::plot_ly(..., - type=type, name=name, - color=color, colors=colors, alpha=alpha, - stroke=stroke, strokes=strokes, alpha_stroke=alpha_stroke, - size=size, sizes=sizes, - span=span, spans=spans, - symbol=symbol, symbols=symbols, - linetype=linetype, linetypes=linetypes, - split=split, frame=frame, - width=width, height=height, source=source) -} + ..., type=NULL, name=NULL, + color=NULL, colors=NULL, alpha=NULL, + stroke=NULL, strokes=NULL, alpha_stroke=1, + size=NULL, sizes=c(10, 100), + span=NULL, spans=c(1, 20), + symbol=NULL, symbols=NULL, + linetype=NULL, linetypes=NULL, + split=NULL, frame=NULL, + width=NULL, height=NULL, source="A") { + data %>% + # This is a trick to not loop the call + as_tibble() %>% + ttservice::plot_ly(..., + type=type, name=name, + color=color, colors=colors, alpha=alpha, + stroke=stroke, strokes=strokes, alpha_stroke=alpha_stroke, + size=size, sizes=sizes, + span=span, spans=spans, + symbol=symbol, symbols=symbols, + linetype=linetype, linetypes=linetypes, + split=split, frame=frame, + width=width, height=height, source=source) +} \ No newline at end of file diff --git a/R/print_method.R b/R/print_method.R index bdef4ae..a28c4a5 100755 --- a/R/print_method.R +++ b/R/print_method.R @@ -4,25 +4,27 @@ #' @name tbl_format_header #' @rdname tbl_format_header #' @inherit pillar::tbl_format_header -#' +#' #' @examples #' # TODO -#' +#' #' @importFrom rlang names2 #' @importFrom pillar align #' @importFrom pillar get_extent #' @importFrom pillar style_subtle #' @importFrom pillar tbl_format_header #' @export + tbl_format_header.tidySingleCellExperiment <- function(x, setup, ...) { - + number_of_features <- x |> attr("number_of_features") assay_names <- x |> attr("assay_names") - + + # Change name named_header <- setup$tbl_sum names(named_header) <- "A SingleCellExperiment-tibble abstraction" - + if (all(names2(named_header) == "")) { header <- named_header } else { @@ -30,11 +32,12 @@ tbl_format_header.tidySingleCellExperiment <- function(x, setup, ...) { align(paste0(names2(named_header), ":"), space=NBSP), " ", named_header) %>% # Add further info single-cell - append(sprintf( - "\033[90m Features=%s | Cells=%s | Assays=%s\033[39m", - number_of_features, nrow(x), - paste(assay_names, collapse=", ") - ), after=1) + + append(sprintf( + "\033[90m Features=%s | Cells=%s | Assays=%s\033[39m", + number_of_features, nrow(x), + paste(assay_names, collapse=", ")), after=1) + } style_subtle(pillar___format_comment(header, width=setup$width)) } @@ -49,17 +52,28 @@ tbl_format_header.tidySingleCellExperiment <- function(x, setup, ...) { #' @examples #' data(pbmc_small) #' print(pbmc_small) -#' +#' #' @importFrom vctrs new_data_frame #' @importFrom SummarizedExperiment assayNames +#' @importFrom SingleCellExperiment altExpNames #' @export -print.SingleCellExperiment <- function(x, ..., n=NULL, width=NULL) { - x |> - as_tibble(n_dimensions_to_return=5) |> - new_data_frame(class=c("tidySingleCellExperiment", "tbl")) %>% - add_attr(nrow(x), "number_of_features") %>% - add_attr(assayNames(x), "assay_names") %>% - print() - - invisible(x) + +print.SingleCellExperiment <- function(x, ..., n = NULL, width = NULL) { + if (length(names(altExps(x))) > 0) { + alt_exp_assays <- list() + assay_names_list <- lapply(altExps(x), assayNames) + assay_names_df <- stack(assay_names_list) + assay_names_string <- c(assayNames(x), paste(assay_names_df$ind, assay_names_df$values, sep = "-")) |> + paste(collapse = ", ") + } else { + assay_names_string <- paste(assayNames(x), collapse = ", ") + } + x |> + as_tibble(n_dimensions_to_return = 5) |> + new_data_frame(class = c("tidySingleCellExperiment", "tbl")) %>% + add_attr(nrow(x), "number_of_features") %>% + add_attr(assay_names_string, "assay_names") %>% + print() + + invisible(x) } diff --git a/R/tibble_methods.R b/R/tibble_methods.R index 9679660..a007c17 100755 --- a/R/tibble_methods.R +++ b/R/tibble_methods.R @@ -15,7 +15,7 @@ as_tibble.SingleCellExperiment <- function(x, ..., .name_repair=c("check_unique", "unique", "universal", "minimal"), rownames=pkgconfig::get_config("tibble::rownames", NULL)) { df <- colData(x) %>% - as.data.frame() %>% + as(Class = "data.frame", strict = FALSE) %>% tibble::as_tibble(rownames=c_(x)$name) # Attach reduced dimensions only if # there are any and for special datasets diff --git a/R/utilities.R b/R/utilities.R index bdeef59..4cc7c73 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -1,5 +1,6 @@ #' @importFrom tibble as_tibble #' @importFrom SummarizedExperiment colData +#' @importFrom MASS rnegbin #' #' @keywords internal #' @@ -7,44 +8,45 @@ #' #' @noRd to_tib <- function(.data) { - colData(.data) %>% - as.data.frame() %>% - as_tibble(rownames=c_(.data)$name) + colData(.data) %>% + as.data.frame() %>% + as_tibble(rownames=c_(.data)$name) } # Greater than gt <- function(a, b) { - a > b + a > b } # Smaller than st <- function(a, b) { - a < b + a < b } # Negation not <- function(is) { - !is + !is } # Raise to the power pow <- function(a, b) { - a^b + a^b } # Equals eq <- function(a, b) { - a == b + a == b } prepend <- function(x, values, before=1) { - n <- length(x) - stopifnot(before > 0 && before <= n) - if (before == 1) { - c(values, x) - } else { - c(x[seq_len(before-1)], values, x[seq(before, n)]) - } + n <- length(x) + stopifnot(before > 0 && before <= n) + if (before == 1) { + c(values, x) + } + else { + c(x[seq_len(before - 1)], values, x[before:n]) + } } #' Add class to abject #' @@ -55,7 +57,7 @@ prepend <- function(x, values, before=1) { #' #' @return A tibble with an additional attribute add_class <- function(var, name) { - if (!name %in% class(var)) + if (!name %in% class(var)) class(var) <- prepend(class(var), name) return(var) } @@ -70,71 +72,148 @@ add_class <- function(var, name) { #' @return A tibble with an additional attribute #' @keywords internal drop_class <- function(var, name) { - class(var) <- class(var)[!class(var) %in% name] - return(var) + class(var) <- class(var)[!class(var) %in% name] + var } -#' get abundance long +# Get assays +get_all_assays <- function(x) { + assay_names <- names(assays(x)) + alt_exp_assays <- list() + alt_exp_assay_names_list <- lapply(altExps(x), assayNames) + names(assay_names) <- rep("RNA", length(assay_names)) + # Include altExp assays if they exist + if(length(altExps(x)) > 0) { + alt_exp_assay_names_df <- stack(alt_exp_assay_names_list) + alt_exp_assay_names <- paste(alt_exp_assay_names_df$ind, alt_exp_assay_names_df$values, sep = "-") + names(alt_exp_assay_names) <- alt_exp_assay_names_df$ind + } else { + alt_exp_assay_names_df <- NULL + alt_exp_assay_names <- NULL + } + + all_assay_names_df <- rbind(stack(assay_names), alt_exp_assay_names_df) + all_assay_names <- c(assay_names, alt_exp_assay_names) + all_assay_names_ext_df <- stack(all_assay_names) + all_assay_names_ext_df <- cbind(all_assay_names_ext_df, all_assay_names_df$values) + colnames(all_assay_names_ext_df) <- c("assay_id", "exp_id", "assay_name") + return(all_assay_names_ext_df) +} + +# Get list of features +get_all_features <- function(x) { + all_assay_names_ext_df <- get_all_assays(x) + features_lookup <- vector("list", length = length(all_assay_names_ext_df$assay_id)) + RNA_features <- vector("list", length = 1) + names(RNA_features) <- "RNA" + RNA_features[["RNA"]] <- rownames(rowData(x)) + temp_funct <- function(x) rownames(rowData(x)) + alt_exp_features <- lapply(altExps(x), temp_funct) + feature_df <- stack(c(RNA_features, alt_exp_features)) + colnames(feature_df) <- c("feature", "exp_id") + feature_df <- merge(feature_df, all_assay_names_ext_df, by = "exp_id") + return(feature_df) +} + +#' get abundance wide #' #' @keywords internal #' #' @importFrom magrittr "%$%" #' @importFrom utils tail #' @importFrom stats setNames +#' @importFrom purrr reduce +#' @importFrom dplyr full_join #' @importFrom SummarizedExperiment assay assayNames #' #' @param .data A `tidySingleCellExperiment` #' @param features A character #' @param all A boolean -#' @param ... Parameters to pass to join wide, i.e., +#' @param ... Parameters to pass to join wide, i.e., #' `assay` to extract feature abundances from #' #' @return A tidySingleCellExperiment object #' #' @noRd -get_abundance_sc_wide <- function(.data, - features=NULL, all=FALSE, assay=rev(assayNames(.data))[1], prefix="") { - - # Solve CRAN warnings - . <- NULL - - # For SCE there is not filed for variable features - variable_feature <- c() - - # Check if output would be too big without forcing - if (isFALSE(all) && is.null(features)) { - if (!length(variable_feature)) { - stop("Your object does not contain variable feature labels,\n", - " feature argument is empty and all arguments are set to FALSE.\n", - " Either:\n", - " 1. use detect_variable_features() to select variable feature\n", - " 2. pass an array of feature names\n", - " 3. set all=TRUE (this will output a very large object;", - " does your computer have enough RAM?)") - } else { - # Get variable features if existing - variable_genes <- variable_feature - } +get_abundance_sc_wide <- function(.data, features=NULL, all=FALSE, prefix="", variable_features = NA, ...) { + + arg_list <- c(mget(ls(environment(), sorted=F)), match.call(expand.dots=F)$...) + assays_to_use <- eval(arg_list$assays) + if(is.null(assays_to_use)) stop("Please provide one assay name when joining in wide format") + if(length(assays_to_use) > 1) stop("Please provide one assay name when joining in wide format") + + # Solve CRAN warnings + . <- NULL + + # For SCE there is no a priori field for variable features + # If variable_features are selected set all to FALSE. They can only be on or the other. + if(!all(is.na(variable_features))) all <- FALSE + + # Give options if no arguments are selected + if (isFALSE(all) && is.null(features)) { + if (all(is.na(variable_features))) { + stop("Your object does not contain variable feature labels,\n", + "The features argument is empty and all arguments are set to FALSE.\n", + " Either:\n", + " 1. use scran::getTopHVGs() to select variable features\n", + " 2. pass an array of feature names to `variable_features`\n", + " 3. set all=TRUE (this will output a very large object;", + " does your computer have enough RAM?)") } else { - variable_genes <- NULL + # Get variable features if existing + variable_genes <- variable_features } - - if (!is.null(variable_genes)) { - gs <- variable_genes - } else if (!is.null(features)) { - gs <- features + } else { + variable_genes <- NULL + } + + if (!is.null(variable_genes)) { + gs <- variable_genes + } else if (!is.null(features)) { + gs <- features + } + # Get selected features and assays + feature_df <- get_all_features(.data) + # If all = TRUE then gs are all features in the selected assays, otherwise just the selected features. + if(isTRUE(all)) gs <- feature_df[feature_df$assay_name %in% assays_to_use, "feature"] + selected_features <- feature_df[(feature_df$feature %in% gs), ] + # Subset by selected assay + selected_features <- selected_features[selected_features$assay_name %in% assays_to_use,] + # If the name of the selected assay is wrong the function will throw an error. Stop before this happens. + if(!is.null(features) & nrow(selected_features) == 0) stop("Please provide matched feature and assay names") + # Split by experiment + selected_experiments_list <- split(x = selected_features, f = as.character(selected_features$exp_id)) + if("RNA" %in% names(selected_experiments_list)) selected_experiments_list <- selected_experiments_list[c("RNA", setdiff(names(selected_experiments_list), "RNA"))] + extract_feature_values <- function(exp) { + selected_features_exp <- as.character(unique(exp$exp_id)) + selected_features_assay <- as.character(unique(exp$assay_name)) + selected_features_assay_names <- as.character(unique(exp$assay_id)) + # Assay slots for the main "RNA" experiment and the altExps are treated differently and need to be run separately + if(selected_features_exp == "RNA") { + selected_features_from_exp <- rownames(assay(.data, selected_features_assay_names))[(rownames(assay(.data, selected_features_assay_names)) %in% gs)] + mtx <- assay(.data, selected_features_assay_names)[selected_features_from_exp,] + if(is.null(dim(mtx))) mtx <- matrix(mtx, byrow = TRUE, nrow = 1, ncol = length(mtx)) + mtx %>% + `colnames<-`(colnames(.data)) %>% + as.matrix() %>% t() %>% + as_tibble(rownames=c_(.data)$name, .name_repair = "minimal") %>% + setNames(c(c_(.data)$name, sprintf("%s%s", prefix, selected_features_from_exp))) } else { - stop("It is not convenient to extract all genes.", - " You should have either variable features,", - " or a feature list to extract.") - } - mtx <- assay(.data, assay) - mtx <- mtx[gs, , drop=FALSE] - - mtx %>% + selected_features_from_exp <- rownames(altExps(.data)[[selected_features_exp]])[(rownames(altExps(.data)[[selected_features_exp]]) %in% gs)] + mtx <- assay(altExps(.data)[[selected_features_exp]], selected_features_assay)[selected_features_from_exp,] + if(is.null(dim(mtx))) mtx <- matrix(mtx, byrow = TRUE, nrow = 1, ncol = length(mtx)) + mtx %>% + `colnames<-`(colnames(.data)) %>% as.matrix() %>% t() %>% - as_tibble(rownames=c_(.data)$name) %>% - setNames(c(c_(.data)$name, sprintf("%s%s", prefix, gs))) + as_tibble(rownames=c_(.data)$name, .name_repair = "minimal") %>% + setNames(c(c_(.data)$name, sprintf("%s%s", prefix, selected_features_from_exp))) + } + } + # Apply function that extracts feature values and join for all selected assays + suppressMessages({ + feature_values_list <- lapply(selected_experiments_list, extract_feature_values) + purrr::reduce(feature_values_list, dplyr::full_join, by = dplyr::join_by(.cell), suffix = paste0(".", names(feature_values_list))) + }) } #' get abundance long @@ -144,8 +223,9 @@ get_abundance_sc_wide <- function(.data, #' @importFrom magrittr "%$%" #' @importFrom tidyr pivot_longer #' @importFrom tibble as_tibble -#' @importFrom purrr when #' @importFrom purrr map2 +#' @importFrom purrr reduce +#' @importFrom dplyr full_join #' @importFrom SummarizedExperiment assays assayNames #' #' @param .data A tidySingleCellExperiment @@ -156,71 +236,115 @@ get_abundance_sc_wide <- function(.data, #' @return A tidySingleCellExperiment object #' #' @noRd -get_abundance_sc_long <- function(.data, - features=NULL, all=FALSE, exclude_zeros=FALSE) { - - # Solve CRAN warnings - . <- NULL - - # For SCE there is not filed for variable features - variable_feature <- c() - - # Check if output would be too big without forcing - if (isFALSE(all) && is.null(features)) { - if (!length(variable_feature)) { - stop("Your object does not contain variable feature labels,\n", - " feature argument is empty and all arguments are set to FALSE.\n", - " Either:\n", - " 1. use detect_variable_features() to select variable feature\n", - " 2. pass an array of feature names\n", - " 3. set all=TRUE (this will output a very large object;", - " does your computer have enough RAM?)") - } else { - # Get variable features if existing - variable_genes <- variable_feature - } +get_abundance_sc_long <- function(.data, features = NULL, all = FALSE, exclude_zeros = FALSE, variable_features = NA, ...) { + + arg_list <- c(mget(ls(environment(), sorted=F)), match.call(expand.dots=F)$...) + assays_to_use <- eval(arg_list$assays) + + # Solve CRAN warnings + . <- NULL + + # Check if output would be too big without forcing + if (isFALSE(all) && is.null(features)) { + if (all(is.na(variable_features))) { + stop("Your object does not contain variable feature labels,\n", + "The features argument is empty and all arguments are set to FALSE.\n", + " Either:\n", + " 1. use scran::getTopHVGs() to select variable features\n", + " 2. pass an array of feature names to `variable_features`\n", + " 3. set all=TRUE (this will output a very large object;", + " does your computer have enough RAM?)") } else { - variable_genes <- NULL + # Get variable features if existing + variable_genes <- variable_features + features <- variable_features } - - # Check that I have assay names - if (!length(assayNames(.data))) - stop("tidySingleCellExperiment says:", - " there are no assay names in the", - " source SingleCellExperiment.") - - if (!is.null(variable_genes)) { - gs <- variable_genes - } else if (!is.null(features)){ - gs <- features - } else if (isTRUE(all)) { - gs <- TRUE + } else { + variable_genes <- NULL + } + + # Check that I have assay names + if (!length(assayNames(.data))) { + stop("tidySingleCellExperiment says:", + " there are no assay names in the", + " source SingleCellExperiment.") + } + + if (!is.null(variable_genes)) { + gs <- variable_genes + } else if (!is.null(features)){ + gs <- features + } else if(is.null(gs) && isTRUE(gs)) { + gs <- unique(feature_df$feature) + } else { + stop("It is not convenient to extract all genes.", + " You should have either variable features,", + " or a feature list to extract.") + } + + # Get assays + all_assay_names_ext_df <- get_all_assays(.data) + + # Get list of features + feature_df <- get_all_features(.data) + + # Get selected features - if all = TRUE then all features in the objects are selected + selected_features <- feature_df[(feature_df$feature %in% gs), ] + if(!is.null(assays_to_use)) selected_features <- selected_features[selected_features$assay_name %in% assays_to_use,] + selected_features_exp <- unique(selected_features$exp_id) + selected_experiments_list <- split(x = selected_features, f = as.character(selected_features$exp_id)) + if("RNA" %in% selected_features_exp) selected_experiments_list <- selected_experiments_list[c("RNA", setdiff(names(selected_experiments_list), "RNA"))] + + extract_feature_values <- function(exp) { + selected_exp <- unique(exp$exp_id) + # Assay slots for the main "RNA" experiment and the altExps are treated differently and need to be run separately + if (selected_exp == "RNA") { + assays(.data) %>% + as.list() %>% + .[unique(exp$assay_name)] %>% + purrr::map2(unique(exp$assay_id), ~ { + # Subset specified features + .x <- .x[unique(exp$feature), , drop=FALSE] + # Replace 0 with NA + if (isTRUE(exclude_zeros)) + .x[.x == 0] <- NA + .x %>% + as.matrix() %>% + data.frame(check.names=FALSE) %>% + as_tibble(rownames=".feature") %>% + tidyr::pivot_longer( + cols=-.feature, + names_to=c_(.data)$name, + values_to=".abundance" %>% paste(.y, sep="_"), + values_drop_na=TRUE) + }) %>% Reduce(function(...) full_join(..., + by=c(".feature", c_(.data)$name)), .) } else { - stop("It is not convenient to extract all genes.", - " You should have either variable features,", - " or a feature list to extract.") - } - - assays(.data) %>% + assays(altExps(.data)[[unique(exp$exp_id)]]) %>% as.list() %>% - # Take active assay - map2(assayNames(.data), ~ { - # Subset specified features - .x <- .x[gs, , drop=FALSE] - # Replace 0 with NA - if (isTRUE(exclude_zeros)) - .x[.x == 0] <- NA - .x %>% - as.matrix() %>% - data.frame(check.names=FALSE) %>% - as_tibble(rownames=".feature") %>% - tidyr::pivot_longer( - cols=-.feature, - names_to=c_(.data)$name, - values_to=".abundance" %>% paste(.y, sep="_"), - values_drop_na=TRUE) - }) %>% Reduce(function(...) full_join(..., - by=c(".feature", c_(.data)$name)), .) + .[unique(exp$assay_name)] %>% + purrr::map2(unique(exp$assay_id), ~ { + # Subset specified features + .x <- .x[unique(exp$feature), , drop=FALSE] + # Replace 0 with NA + if (isTRUE(exclude_zeros)) + .x[.x == 0] <- NA + .x %>% + as.matrix() %>% + data.frame(check.names=FALSE) %>% + as_tibble(rownames=".feature") %>% + tidyr::pivot_longer( + cols=-.feature, + names_to=c_(.data)$name, + values_to=".abundance" %>% paste(.y, sep="_"), + values_drop_na=TRUE) + }) %>% Reduce(function(...) full_join(..., + by=c(".feature", c_(.data)$name)), .) + } + } + # Apply function that extracts feature values and bind_rows for all selected assays + lapply(selected_experiments_list, extract_feature_values) |> + bind_rows() } #' @importFrom dplyr select any_of @@ -236,21 +360,21 @@ as_meta_data <- function(.data, SingleCellExperiment_object) { col_to_exclude <- get_special_columns(SingleCellExperiment_object) |> - # Need this in case we have multiple reduced dimensions + # Need this in case we have multiple reduced dimensions # with overlapping column names, e.g., multiple PCAs vctrs::vec_as_names(repair="unique") |> # To avoid name change by the 'bind_cols()' of 'as_tibble()' trick_to_avoid_renaming_of_already_unique_columns_by_dplyr() - + .data_df <- .data %>% select(-any_of(col_to_exclude)) %>% data.frame() - - # Set row names in a robust way; the 'row.names' argument + + # Set row names in a robust way; the 'row.names' argument # of 'data.frame()' does not work for 1-row 'data.frame's sym <- c_(SingleCellExperiment_object)$symbol rownames(.data_df) <- pull(.data_df, !!sym) - + .data_df <- select(.data_df, -!!sym) return(DataFrame(.data_df)) } @@ -263,23 +387,23 @@ as_meta_data <- function(.data, SingleCellExperiment_object) { #' #' @noRd get_special_columns <- function(SingleCellExperiment_object) { - get_special_datasets(SingleCellExperiment_object) %>% - map(~ .x %>% colnames()) %>% - unlist() %>% - as.character() + get_special_datasets(SingleCellExperiment_object) %>% + map(~ .x %>% colnames()) %>% + unlist() %>% + as.character() } #' @importFrom SingleCellExperiment reducedDims get_special_datasets <- function(SingleCellExperiment_object, n_dimensions_to_return=Inf) { - + rd <- reducedDims(SingleCellExperiment_object) map2(as.list(rd), names(rd), ~ { n_dims <- min(n_dimensions_to_return, ncol(.x)) mat <- .x[, seq_len(n_dims), drop=FALSE] - # Set names as SCE is much less constrained + # Set names as SCE is much less constrained # and there could be missing names - if (is.null(colnames(mat))) colnames(mat) <- + if (is.null(colnames(mat))) colnames(mat) <- sprintf("%s%s", .y, seq_len(ncol(mat))) return(mat) }) @@ -300,10 +424,10 @@ get_needed_columns <- function(.data) { #' #' @return A character vector quo_names <- function(v) { - v <- quo_name(quo_squash(v)) - gsub("^c\\(|`|\\)$", "", v) %>% - strsplit(", ") %>% - unlist() + v <- quo_name(quo_squash(v)) + gsub("^c\\(|`|\\)$", "", v) %>% + strsplit(", ") %>% + unlist() } #' returns variables from an expression @@ -318,7 +442,6 @@ return_arguments_of <- function(expression){ variables } -#' @importFrom purrr when #' @importFrom dplyr select #' @importFrom rlang expr #' @importFrom tidyselect eval_select @@ -339,16 +462,17 @@ duplicated_cell_names <- paste( # Check if "sample" is included in the query and is not part of any other existing annotation #' @importFrom stringr str_detect #' @importFrom stringr regex -is_sample_feature_deprecated_used <- function(.data, + +is_sample_feature_deprecated_used <- function(.data, user_columns, use_old_special_names=FALSE) { - + cell <- user_columns |> as.character() |> str_detect(regex("\\bcell\\b")) |> any() .cell <- user_columns |> as.character() |> str_detect(regex("\\W*(\\.cell)\\W*")) |> any() - - old_standard_is_used <- + + old_standard_is_used <- !"cell" %in% colnames(colData(.data)) && ("cell" %in% as.character(user_columns) || (cell && !.cell)) - + if (old_standard_is_used) { warning("tidySingleCellExperiment says:", " from version 1.3.1, the special columns including", @@ -368,6 +492,7 @@ get_special_column_name_symbol <- function(name) { # Key column names #' @importFrom S4Vectors metadata #' @importFrom S4Vectors metadata<- + ping_old_special_column_into_metadata <- function(.data) { metadata(.data)$cell__ <- get_special_column_name_symbol("cell") return(.data) @@ -415,9 +540,9 @@ special_datasets_to_tibble <- function(.singleCellExperiment, ...) { tibble::enframe() %>% tidyr::spread(name, value) }) %>% purrr::reduce(bind_cols) - + # To avoid name change by the 'bind_cols()' of 'as_tibble()' - colnames(x) <- colnames(x) |> + colnames(x) <- colnames(x) |> trick_to_avoid_renaming_of_already_unique_columns_by_dplyr() return(x) } @@ -431,37 +556,37 @@ trick_to_avoid_renaming_of_already_unique_columns_by_dplyr <- function(x) { #' #' @keywords internal #' @noRd -#' +#' #' @importFrom rlang enquo #' @importFrom purrr map #' @importFrom dplyr distinct_at #' @importFrom magrittr equals #' @importFrom dplyr vars -#' +#' #' @param .data A tibble #' @param .col A vector of column names -#' +#' #' @return A character get_specific_annotation_columns <- function(.data, .col) { - + # Comply with CRAN NOTES . <- NULL - + # Make col names .col <- enquo(.col) - + # x-annotation df n_x <- .data |> distinct_at(vars(!!.col)) |> nrow() - + # Exclude columns that have more values than my .col columns_unique_length = .data |> select(-!!.col) |> lapply(function(x) unique(x) |> length()) columns_unique_length = columns_unique_length[columns_unique_length<=n_x] - + .sample = .data |> select(!!.col) |> unite(".sample", !!.col) |> pull(.sample) - + # element wise columns columns_unique_length |> - names() |> + names() |> map(~ { n_.x <- .data |> pull(all_of(.x)) |> paste(.sample) |> unique() |> length() if (n_.x == n_x) .x else NULL @@ -475,23 +600,24 @@ get_specific_annotation_columns <- function(.data, .col) { #' #' @keywords internal #' @noRd -#' +#' #' @importFrom rlang enquo -#' +#' #' @param .data A tibble #' @param .column A vector of column names #' #' @return A tibble + subset <- function(.data, .column) { - + # Make col names .column <- enquo(.column) - + # Check if column present if (!all(quo_names(.column) %in% colnames(.data))) stop("nanny says: some of the .column specified", " do not exist in the input data frame.") - + .data |> # Selecting the right columns select(!!.column, get_specific_annotation_columns(.data, !!.column)) %>% @@ -500,21 +626,21 @@ subset <- function(.data, .column) { splitColData <- function(x, f) { - # This is by @jma1991 + # This is by @jma1991 # at https://github.com/drisso/SingleCellExperiment/issues/55 - + i <- split(seq_along(f), f) - + v <- vector(mode = "list", length = length(i)) - + names(v) <- names(i) - + for (n in names(i)) { v[[n]] <- x[, i[[n]]] } - + return(v) - + } cell__ <- get_special_column_name_symbol(".cell") feature__ <- get_special_column_name_symbol(".feature") -sample__ <- get_special_column_name_symbol(".sample") \ No newline at end of file +sample__ <- get_special_column_name_symbol(".sample") diff --git a/plotly_methods.R b/plotly_methods.R new file mode 100644 index 0000000..2d7a61a --- /dev/null +++ b/plotly_methods.R @@ -0,0 +1,36 @@ +#' @name plot_ly +#' @rdname plot_ly +#' @inherit ttservice::plot_ly +#' @return `plotly` +#' +#' @examples +#' data(pbmc_small) +#' pbmc_small |> +#' plot_ly(x = ~ nCount_RNA, y = ~ nFeature_RNA) +#' +#' @importFrom ttservice plot_ly +#' @export +plot_ly.SingleCellExperiment <- function(data=data.frame(), + ..., type=NULL, name=NULL, + color=NULL, colors=NULL, alpha=NULL, + stroke=NULL, strokes=NULL, alpha_stroke=1, + size=NULL, sizes=c(10, 100), + span=NULL, spans=c(1, 20), + symbol=NULL, symbols=NULL, + linetype=NULL, linetypes=NULL, + split=NULL, frame=NULL, + width=NULL, height=NULL, source="A") { + data %>% + # This is a trick to not loop the call + as_tibble() %>% + ttservice::plot_ly(..., + type=type, name=name, + color=color, colors=colors, alpha=alpha, + stroke=stroke, strokes=strokes, alpha_stroke=alpha_stroke, + size=size, sizes=sizes, + span=span, spans=spans, + symbol=symbol, symbols=symbols, + linetype=linetype, linetypes=linetypes, + split=split, frame=frame, + width=width, height=height, source=source) +} diff --git a/tests/testthat/test-dplyr_methods.R b/tests/testthat/test-dplyr_methods.R index 93f0541..ecf4f58 100755 --- a/tests/testthat/test-dplyr_methods.R +++ b/tests/testthat/test-dplyr_methods.R @@ -1,5 +1,38 @@ library(S4Vectors) -data(pbmc_small) +library(MASS, include.only = "rnegbin") +data("pbmc_small") +# Mock up ADT and cell hashing experiments +set.seed(2023-08-29) +# Antibody tags +pos_myus <- sample(x = 7500:20000, size = 5) +int_myus <- sample(x = 100:1500, size = 5) +neg_myus <- sample(x = 1:30, size = 15) +all_myus <- c(pos_myus, int_myus, neg_myus) +all_myus <- sample(x = all_myus, size = length(all_myus)) + +mat <- list() +for(i in seq_along(all_myus)) { + mat[[i]] <- rnegbin(n = dim(pbmc_small)[[2]], mu = all_myus[[i]], theta = all_myus[[i]]/500) +} +mat <- Reduce(f = cbind, x = mat) +colnames(mat) <- paste("Ab", seq_along(mat[1,]), sep = "-") +rownames(mat) <- colnames(pbmc_small) + +altExps(pbmc_small)[["ADT"]] <- SingleCellExperiment(assays = list(counts = t(mat), logcounts = log10(t(mat) + 1))) + +# Cell hashing +HTO_myus <- sample(x = c(100, 100000), size = 6, replace = TRUE) +mat <- list() +for(i in seq_along(HTO_myus)) { + mat[[i]] <- rnegbin(n = dim(pbmc_small)[[2]], mu = HTO_myus[[i]], theta = HTO_myus[[i]]/500) +} + +mat <- Reduce(f = cbind, x = mat) +colnames(mat) <- paste("HTO", seq_along(mat[1,]), sep = "-") +rownames(mat) <- colnames(pbmc_small) + +altExps(pbmc_small)[["Hashtag demultiplex"]] <- SingleCellExperiment(assays = list(counts = t(mat), logcounts = log10(t(mat) + 1))) + df <- pbmc_small df$number <- sample(seq(ncol(df))) df$factor <- sample( @@ -8,15 +41,15 @@ df$factor <- sample( # test_that("arrange()", { # expect_identical( -# arrange(df, number), +# arrange(df, number), # df[, order(df$number)]) # suppressWarnings({ # fd <- df %>% -# scater::logNormCounts() %>% +# scater::logNormCounts() %>% # scater::runPCA() # }) # expect_identical( -# arrange(fd, PC1), +# arrange(fd, PC1), # fd[, order(reducedDim(fd)[, 1])]) # fd <- df %>% # mutate(foo=seq(ncol(df))) %>% @@ -38,7 +71,7 @@ test_that("bind_cols()", { expect_identical(fd[[i[1]]], df$factor) expect_identical(fd[[i[2]]], df$factor) expect_identical( - select(fd, -starts_with("factor")), + select(fd, -starts_with("factor")), select(df, -factor)) }) @@ -80,12 +113,12 @@ test_that("mutate()", { expect_true(all(fd$peter == "pan")) fd <- mutate(df, number=paste(number)) expect_identical(fd$number, paste(df$number)) - + # special columns are blocked df |> mutate(.cell=1) |> expect_error("you are trying to mutate a column that is view only") - + df |> mutate(PC_10=1) |> expect_error("you are trying to mutate a column that is view only") @@ -95,35 +128,35 @@ test_that("rename()", { fd <- rename(df, num=number, fac=factor) expect_identical(fd$num, df$number) expect_identical(fd$fac, df$factor) - - df |> - rename(ne=mo) |> + + df |> + rename(ne=mo) |> expect_error("Column `mo` doesn't exist") - + # special columns are blocked # ...'to' cannot be special - + df |> rename(a=PC_1) |> - expect_error("you are trying to rename a column that is view only") - - df |> - rename(a=.cell) |> + expect_error("you are trying to rename a column that is view only") + + df |> + rename(a=.cell) |> expect_error("you are trying to rename a column that is view only") # ...'from' cannot be special - - df |> - rename(PC_1=number) |> + + df |> + rename(PC_1=number) |> expect_error("These names are duplicated") - - df |> - rename(.cell=number) |> + + df |> + rename(.cell=number) |> expect_error("These names are duplicated") }) test_that("left_join()", { - y <- df |> - distinct(factor) |> + y <- df |> + distinct(factor) |> mutate(string=letters[seq(nlevels(df$factor))]) fd <- left_join(df, y, by="factor") expect_s4_class(fd, "SingleCellExperiment") @@ -132,9 +165,9 @@ test_that("left_join()", { }) test_that("left_join(), with DataFrame y", { - y <- df |> - distinct(factor) |> - mutate(string=letters[seq(nlevels(df$factor))]) |> + y <- df |> + distinct(factor) |> + mutate(string=letters[seq(nlevels(df$factor))]) |> DataFrame() fd <- left_join(df, y, by="factor") expect_s4_class(fd, "SingleCellExperiment") @@ -143,9 +176,9 @@ test_that("left_join(), with DataFrame y", { }) test_that("inner_join()", { - y <- df |> - distinct(factor) |> - mutate(string=letters[seq(nlevels(df$factor))]) |> + y <- df |> + distinct(factor) |> + mutate(string=letters[seq(nlevels(df$factor))]) |> slice(1) fd <- inner_join(df, y, by="factor") expect_s4_class(fd, "SingleCellExperiment") @@ -154,9 +187,9 @@ test_that("inner_join()", { }) test_that("inner_join(), with DataFrame y", { - y <- df |> - distinct(factor) |> - mutate(string=letters[seq(nlevels(df$factor))]) |> + y <- df |> + distinct(factor) |> + mutate(string=letters[seq(nlevels(df$factor))]) |> slice(1) |> DataFrame() fd <- inner_join(df, y, by="factor") expect_s4_class(fd, "SingleCellExperiment") @@ -196,12 +229,12 @@ test_that("full_join()", { expect_equal(nrow(fd), ncol(df)+2*sum(df$factor == "g2")) # w/o duplicates y <- tibble(factor="g2", other=1) - + # I DON'T KNOW WHY THESE TESTS GIVES WARNING IN THE GITHUB ACTION - # fd <- expect_silent(full_join(df, y, by=join_by(factor))) + # fd <- expect_silent(full_join(df, y, by=join_by(factor))) # expect_s4_class(fd, "SingleCellExperiment") # expect_identical( - # select(fd, -other), + # select(fd, -other), # mutate(df, factor=paste(factor))) }) @@ -215,21 +248,21 @@ test_that("full_join(), with DataFrame y", { expect_equal(nrow(fd), ncol(df)+2*sum(df$factor == "g2")) # w/o duplicates y <- tibble(factor="g2", other=1) |> DataFrame() - + # I DON'T KNOW WHY THESE TESTS GIVES WARNING IN THE GITHUB ACTION - # fd <- expect_silent(full_join(df, y, by=join_by(factor))) + # fd <- expect_silent(full_join(df, y, by=join_by(factor))) # expect_s4_class(fd, "SingleCellExperiment") # expect_identical( - # select(fd, -other), + # select(fd, -other), # mutate(df, factor=paste(factor))) }) test_that("slice()", { - # I DON'T KNOW WHY THESE TESTS GIVES WARNING + # I DON'T KNOW WHY THESE TESTS GIVES WARNING # Please use `all_of()` or `any_of()` instead. #expect_identical(slice(df), df[, 0]) #expect_identical(slice(df, ncol(df)+1), df[, 0]) - + expect_identical(slice(df, 1), df[, 1]) expect_identical(slice(df, -1), df[, -1]) i <- sample(ncol(df), 5) @@ -366,10 +399,10 @@ test_that("add_count()", { }) test_that("rowwise()", { - df |> + df |> summarise(sum(lys)) |> expect_error("object 'lys' not found") - + df$lys <- replicate(ncol(df), sample(10, 3), FALSE) fd <- df |> rowwise() |> summarise(sum(lys)) expect_s3_class(fd, "tbl_df") @@ -378,22 +411,22 @@ test_that("rowwise()", { }) test_that("group_split() works for one variable", { - fd <- df |> + fd <- df |> group_split(groups) expect_equal(length(fd), length(unique(df$groups))) }) test_that("group_split() works for combination of variables", { - fd <- df |> + fd <- df |> group_split(groups, ident) expect_equal(length(fd), length(unique(df$groups)) * length(unique(df$ident))) }) test_that("group_split() works for one logical statement", { - fd_log <- df |> + fd_log <- df |> group_split(groups=="g1") - fd_var <- df |> + fd_var <- df |> group_split(groups=="g1") expect_equal(lapply(fd_var, count), lapply(fd_log, count)) }) @@ -402,7 +435,7 @@ test_that("group_split() works for two logical statements", { fd <- df |> group_split(PC_1>0 & groups=="g1") fd_counts <- lapply(fd, count) - expect_equal(c(fd_counts[[1]], fd_counts[[2]], use.names = FALSE), + expect_equal(c(fd_counts[[1]], fd_counts[[2]], use.names = FALSE), list(75, 5)) }) diff --git a/tests/testthat/test-ggplotly_methods.R b/tests/testthat/test-ggplotly_methods.R index a02e84e..9f2d263 100644 --- a/tests/testthat/test-ggplotly_methods.R +++ b/tests/testthat/test-ggplotly_methods.R @@ -1,16 +1,49 @@ -data(pbmc_small) +library(MASS, include.only = "rnegbin") +data("pbmc_small") +# Mock up ADT and cell hashing experiments +set.seed(2023-08-29) +# Antibody tags +pos_myus <- sample(x = 7500:20000, size = 5) +int_myus <- sample(x = 100:1500, size = 5) +neg_myus <- sample(x = 1:30, size = 15) +all_myus <- c(pos_myus, int_myus, neg_myus) +all_myus <- sample(x = all_myus, size = length(all_myus)) + +mat <- list() +for(i in seq_along(all_myus)) { + mat[[i]] <- rnegbin(n = dim(pbmc_small)[[2]], mu = all_myus[[i]], theta = all_myus[[i]]/500) +} +mat <- Reduce(f = cbind, x = mat) +colnames(mat) <- paste("Ab", seq_along(mat[1,]), sep = "-") +rownames(mat) <- colnames(pbmc_small) + +altExps(pbmc_small)[["ADT"]] <- SingleCellExperiment(assays = list(counts = t(mat), logcounts = log10(t(mat) + 1))) + +# Cell hashing +HTO_myus <- sample(x = c(100, 100000), size = 6, replace = TRUE) +mat <- list() +for(i in seq_along(HTO_myus)) { + mat[[i]] <- rnegbin(n = dim(pbmc_small)[[2]], mu = HTO_myus[[i]], theta = HTO_myus[[i]]/500) +} + +mat <- Reduce(f = cbind, x = mat) +colnames(mat) <- paste("HTO", seq_along(mat[1,]), sep = "-") +rownames(mat) <- colnames(pbmc_small) + +altExps(pbmc_small)[["Hashtag demultiplex"]] <- SingleCellExperiment(assays = list(counts = t(mat), logcounts = log10(t(mat) + 1))) + df <- pbmc_small df$number <- rnorm(ncol(df)) df$factor <- sample(gl(3, 1, ncol(df))) test_that("ggplot()", { # cell metadata - p <- ggplot(df, aes(factor, number)) + p <- ggplot(df, aes(factor, number)) expect_silent(show(p)) expect_s3_class(p, "ggplot") # assay data g <- sample(rownames(df), 1) - fd <- join_features(df, g, shape="wide") + fd <- join_features(df, g, shape="wide", assays = "counts") p <- ggplot(fd, aes(factor, .data[[make.names(g)]])) expect_silent(show(p)) expect_s3_class(p, "ggplot") @@ -22,17 +55,17 @@ test_that("ggplot()", { test_that("plotly()", { # cell metadata - p <- plot_ly(df, x=~factor, y=~number, type="violin") + p <- plot_ly(df, x=~factor, y=~number, type="violin") expect_silent(show(p)) expect_s3_class(p, "plotly") # assay data g <- sample(rownames(df), 1) - fd <- join_features(df, g, shape="wide") - p <- plot_ly(fd, x=~factor, y=g, type="violin") + fd <- join_features(df, g, shape="wide", assays = "counts") + p <- plot_ly(fd, x=~factor, y=g, type="violin") expect_silent(show(p)) expect_s3_class(p, "plotly") # reduced dimensions - p <- plot_ly(fd, x=~PC_1, y=~PC_2, type="scatter", mode="markers") + p <- plot_ly(fd, x=~PC_1, y=~PC_2, type="scatter", mode="markers") expect_silent(show(p)) expect_s3_class(p, "plotly") }) diff --git a/tests/testthat/test-methods.R b/tests/testthat/test-methods.R index b42540e..e597e91 100644 --- a/tests/testthat/test-methods.R +++ b/tests/testthat/test-methods.R @@ -1,71 +1,162 @@ -data(pbmc_small) + +library(MASS, include.only = "rnegbin") +data("pbmc_small") +# Mock up ADT and cell hashing experiments +set.seed(2023-08-29) +# Antibody tags +pos_myus <- sample(x = 7500:20000, size = 5) +int_myus <- sample(x = 100:1500, size = 5) +neg_myus <- sample(x = 1:30, size = 15) +all_myus <- c(pos_myus, int_myus, neg_myus) +all_myus <- sample(x = all_myus, size = length(all_myus)) + +mat <- list() +for(i in seq_along(all_myus)) { + mat[[i]] <- rnegbin(n = dim(pbmc_small)[[2]], mu = all_myus[[i]], theta = all_myus[[i]]/500) +} +mat <- Reduce(f = cbind, x = mat) +colnames(mat) <- paste("Ab", seq_along(mat[1,]), sep = "-") +rownames(mat) <- colnames(pbmc_small) + +altExps(pbmc_small)[["ADT"]] <- SingleCellExperiment(assays = list(counts = t(mat), logcounts = log10(t(mat) + 1))) + +# Cell hashing +HTO_myus <- sample(x = c(100, 100000), size = 6, replace = TRUE) +mat <- list() +for(i in seq_along(HTO_myus)) { + mat[[i]] <- rnegbin(n = dim(pbmc_small)[[2]], mu = HTO_myus[[i]], theta = HTO_myus[[i]]/500) +} + +mat <- Reduce(f = cbind, x = mat) +colnames(mat) <- paste("HTO", seq_along(mat[1,]), sep = "-") +rownames(mat) <- colnames(pbmc_small) + +altExps(pbmc_small)[["Hashtag demultiplex"]] <- SingleCellExperiment(assays = list(counts = t(mat), logcounts = log10(t(mat) + 1))) + df <- pbmc_small +df$number <- rnorm(ncol(df)) +df$factor <- sample(gl(3, 1, ncol(df))) test_that("show()", { - txt <- capture.output(show(df)) - expect_equal(grep("SingleCellExperiment", txt), 1) - i <- grep(str <- ".*Features=([0-9]+).*", txt) - expect_equal(gsub(str, "\\1", txt[i]), paste(nrow(df))) - i <- grep(str <- ".*Cells=([0-9]+).*", txt) - expect_equal(gsub(str, "\\1", txt[i]), paste(ncol(df))) -}) + txt <- capture.output(show(df)) + expect_lt(length(txt), 20) + expect_equal(grep("SingleCellExperiment", txt), 1) + i <- grep(str <- ".*Features=([0-9]+).*", txt) + expect_equal(gsub(str, "\\1", txt[i]), paste(nrow(df))) + i <- grep(str <- ".*Cells=([0-9]+).*", txt) + expect_equal(gsub(str, "\\1", txt[i]), paste(ncol(df))) + i <- grep(".*s=.*", txt) + j <- grep(".cell*", txt) -1 + header_text <- paste(txt[i:j], collapse = "") |> + stringr::str_remove_all(pattern = "# ") |> + stringr::str_remove_all(pattern = "\033") |> + stringr::str_remove_all(pattern = "\\[90m ") |> + stringr::str_remove_all(pattern = "\\[90m") |> + stringr::str_remove_all(pattern = "\\[0m") + x <- header_text |> + stringr::str_remove(pattern = ".+s=") |> + strsplit(split = ", ") |> + unlist() + y <- assayNames(df) + for (k in seq_along(altExps(pbmc_small))) { + y <- append(x = y, paste(altExpNames(pbmc_small)[[k]], assayNames(altExps(pbmc_small)[[k]]), sep = "-")) + } + for(j in seq_along(y)) { + expect_contains(x, y[j]) + } +} +) test_that("join_features()", { - gs <- sample(rownames(df), 3) - # long - fd <- join_features(df, gs, shape="long") - expect_s3_class(fd, "tbl_df") - expect_setequal(unique(fd$.feature), gs) - expect_true(all(table(fd$.feature) == ncol(df))) - expect_identical( - matrix(fd$.abundance_counts, nrow=length(gs)), - as.matrix(unname(counts(df)[fd$.feature[seq_along(gs)], ]))) - # wide - fd <- join_features(df, gs, shape="wide", assay="counts") - expect_s4_class(fd, "SingleCellExperiment") - expect_null(fd$.feature) - expect_identical( - unname(t(as.matrix(as_tibble(fd)[, make.names(gs)]))), - as.matrix(unname(counts(df)[gs, ]))) + gs <- c(sample(rownames(df), 3), sample(rownames(altExps(df)[[1]]), 3)) + sce_counts_combined <- do.call("rbind", append(lapply(altExps(df), counts), values = list(as.matrix(counts(df))), after = 0)) + # long + fd <- join_features(df, gs, shape="long") + expect_s3_class(fd, "tbl_df") + expect_setequal(unique(fd$.feature), gs) + expect_true(all(table(fd$.feature) == ncol(df))) + expect_identical( + { + fd <- select(fd,.cell, .feature, starts_with(".abundance")) + fd <- select(fd,.cell, .feature, ends_with("-counts")|ends_with("_counts")) + fd <- mutate(fd, counts = case_when(is.na(unlist(pick(3))) ~ unlist(pick(4)), .default = unlist(pick(3)))) + fd <- pivot_wider( + data=fd, + id_cols = ".feature", + names_from = ".cell", + values_from = "counts") + fd <- as.data.frame(fd[order(fd$.feature),]) + rownames(fd) <- fd$.feature + fd <- fd[, -1] + fd <- select(fd, sort(colnames(df))) + fd <- as.matrix(fd) + fd + }, + as.matrix(sce_counts_combined[sort(gs), sort(colnames(df))])) + + # wide + fd <- join_features(df, gs, shape="wide", assays="counts") + expect_s4_class(fd, "SingleCellExperiment") + expect_null(fd$.feature) + expect_identical( + { + fd <- as.data.frame(select(fd, cell = .cell, make.names(sort(gs)))) + fd <- fd[order(fd$cell), ] + rownames(fd) <- fd$cell + fd <- fd[, -1] + t(as.matrix(fd)) + }, { + sce_wide_mat <- as.matrix(sce_counts_combined[sort(gs), sort(colnames(df))]) + rownames(sce_wide_mat) <- make.names(rownames(sce_wide_mat)) + sce_wide_mat + } + ) }) test_that("as_tibble()", { - fd <- as_tibble(df) - expect_s3_class(fd, "tbl_df") - expect_equal(nrow(fd), ncol(df)) - ncd <- ncol(colData(df)) - ndr <- vapply(reducedDims(df), ncol, integer(1)) - expect_equal(ncol(fd), sum(1, ncd, ndr)) - # duplicated PCs - reducedDim(df, "PCB") <- reducedDim(df, "PCA") - fd <- as_tibble(mutate(df, abc=1)) - expect_equal(ncol(fd), ncol(as_tibble(df))+1) + fd <- as_tibble(df) + expect_s3_class(fd, "tbl_df") + expect_equal(nrow(fd), ncol(df)) + ncd <- ncol(colData(df)) + ndr <- vapply(reducedDims(df), ncol, integer(1)) + expect_equal(ncol(fd), sum(1, ncd, ndr)) + # duplicated PCs + reducedDim(df, "PCB") <- reducedDim(df, "PCA") + fd <- as_tibble(mutate(df, abc=1)) + expect_equal(ncol(fd), ncol(as_tibble(df))+1) }) test_that("aggregate_cells()", { - df$factor <- sample(gl(3, 1, ncol(df))) - df$string <- sample(c("a", "b"), ncol(df), TRUE) - tbl <- distinct(select(df, factor, string)) - fd <- aggregate_cells(df, c(factor, string)) - expect_identical(assayNames(fd), assayNames(df)) - # [HLC: aggregate_cells() currently - # reorders features alphabetically] - fd <- fd[rownames(df), ] - expect_s4_class(fd, "SummarizedExperiment") - expect_equal(dim(fd), c(nrow(df), nrow(tbl))) - foo <- mapply( - f=tbl$factor, - s=tbl$string, - \(f, s) { - expect_identical( - df |> - filter(factor == f, string == s) |> - assay() |> rowSums() |> as.vector(), - fd[, fd$factor == f & fd$string == s] |> - assay() |> as.vector()) - }) - # specified 'assays' are subsetted - expect_error(aggregate_cells(df, c(factor, string), assays="x")) - fd <- aggregate_cells(df, c(factor, string), assays="counts") - expect_identical(assayNames(fd), "counts") + df$factor <- sample(gl(3, 1, ncol(df))) + df$string <- sample(c("a", "b"), ncol(df), TRUE) + tbl <- distinct(select(df, factor, string)) + fd <- aggregate_cells(df, .sample = c(factor, string), assays = assayNames(df)) + expect_identical(assayNames(fd), assayNames(df)) + # [HLC: aggregate_cells() currently + # reorders features alphabetically] + fd <- fd[rownames(df), ] + expect_s4_class(fd, "SummarizedExperiment") + expect_equal(dim(fd), c(nrow(df), nrow(tbl))) + foo <- mapply( + f=tbl$factor, + s=tbl$string, + \(f, s) { + expect_identical( + df |> + filter(factor == f, string == s) |> + assay() |> rowSums() |> as.vector(), + fd[, fd$factor == f & fd$string == s] |> + assay() |> as.vector()) + }) + # specified 'assays' are subsetted + expect_error(aggregate_cells(df, c(factor, string), assays="x")) + fd <- aggregate_cells(df, c(factor, string), assays="counts") + expect_identical(assayNames(fd), "counts") + # Aggregate when using multiple assays + assays_to_use <- c("logcounts", "ADT-logcounts") + fd <- aggregate_cells(df, .sample = c(factor, string), assays = assays_to_use) + expect_identical(assayNames(fd), assays_to_use) + fd_all_features <- tidySingleCellExperiment:::get_all_features(df) |> + filter(assay_id %in% assays_to_use) |> pull(feature) |> sort() + expect_identical(fd_all_features, sort(rownames(fd))) }) diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 10ceb49..a4b1a1b 100755 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -362,7 +362,7 @@ pbmc_small_cell_type %>% # Add some mitochondrial abundance values mutate(mitochondrial=rnorm(dplyr::n())) %>% # Plot correlation - join_features(features=c("CST3", "LYZ"), shape="wide") %>% + join_features(features=c("CST3", "LYZ"), shape="wide", assays="counts") %>% ggplot(aes(CST3+1, LYZ+1, color=groups, size=mitochondrial)) + facet_wrap(~first.labels, scales="free") + geom_point() + @@ -490,4 +490,4 @@ pbmc_small_tidy %>% sessionInfo() ``` -# References \ No newline at end of file +# References