diff --git a/articles/figures_article.html b/articles/figures_article.html index cda9310..8c99626 100644 --- a/articles/figures_article.html +++ b/articles/figures_article.html @@ -137,8 +137,8 @@

2023-08-25

names_to="property", values_to="Value" ) %>% - mutate(property = factor(property, levels = c("mito.fraction", "G2M.Score", "S.Score"))) %>% - ggplot(aes(sample, Value)) + + mutate(property = factor(property, levels = c("mito.fraction", "G2M.Score", "S.Score"))) %>% + ggplot(aes(sample, Value)) + geom_boxplot(outlier.size = 0.5 ) + facet_wrap(~property, scales = "free_y" ) + custom_theme + @@ -146,15 +146,15 @@

2023-08-25

 p2 = 
   PBMC_clean_scaled_UMAP_cluster_cell_type %>%
-  sample_n(20000) %>%
-  ggplot(aes(UMAP_1, UMAP_2, color=seurat_clusters)) +
+  sample_n(20000) %>%
+  ggplot(aes(UMAP_1, UMAP_2, color=seurat_clusters)) +
   geom_point(size=0.05, alpha=0.2) +
   custom_theme +
   theme(aspect.ratio=1)
 
 PBMC_clean_scaled_UMAP_cluster_cell_type %>%
-  sample_n(20000) %>%
-  plot_ly(
+  sample_n(20000) %>%
+  plot_ly(
     x = ~`UMAP_1`,
     y = ~`UMAP_2`,
     z = ~`UMAP_3`,
@@ -166,10 +166,10 @@ 

2023-08-25

 p3 = 
   PBMC_clean_scaled_UMAP_cluster_cell_type %>%
-  arrange(first.labels) %>%
-  mutate(seurat_clusters = fct_inorder(seurat_clusters)) %>%
+  arrange(first.labels) %>%
+  mutate(seurat_clusters = fct_inorder(seurat_clusters)) %>%
   join_features(features=c("CD3D", "HLA-DRB1")) %>%
-  ggplot(aes(y=seurat_clusters , x=.abundance_SCT, fill=first.labels)) +
+  ggplot(aes(y=seurat_clusters , x=.abundance_SCT, fill=first.labels)) +
   geom_density_ridges(bandwidth = 0.2) +
   facet_wrap(~ .feature, nrow = 2) +
   coord_flip() +
@@ -178,7 +178,7 @@ 

2023-08-25

# Plot heatmap p4 = PBMC_clean_scaled_UMAP_cluster_cell_type %>% - sample_n(2000) %>% + sample_n(2000) %>% DoHeatmap( features = markers$gene, group.colors = friendly_cols @@ -186,11 +186,11 @@

2023-08-25

 p5 = 
   PBMC_clean_scaled_UMAP_cluster_cell_type %>%
-  sample_n(1000) %>%
+  sample_n(1000) %>%
   join_features(features=markers$gene) %>%
-  mutate(seurat_clusters = as.integer(seurat_clusters)) %>%
-  filter(seurat_clusters<10) %>%
-  group_by(seurat_clusters) %>%
+  mutate(seurat_clusters = as.integer(seurat_clusters)) %>%
+  filter(seurat_clusters<10) %>%
+  group_by(seurat_clusters) %>%
   
   # Plot heatmap
   heatmap(
@@ -210,13 +210,13 @@ 

2023-08-25

 p6 = 
   PBMC_clean_scaled_UMAP_cluster_cell_type %>%
-  tidyseurat::unite("cluster_cell_type", c(first.labels, seurat_clusters), remove=FALSE) %>%
+  tidyseurat::unite("cluster_cell_type", c(first.labels, seurat_clusters), remove=FALSE) %>%
   pivot_longer(
     c(seurat_clusters, first.labels_single),
     names_to = "classification", values_to = "value"
   ) %>%
   
-  ggplot(aes(x = classification, stratum = value, alluvium = cell,
+  ggplot(aes(x = classification, stratum = value, alluvium = cell,
            fill = first.labels, label = value)) +
   scale_x_discrete(expand = c(1, 1)) +
   geom_flow() +
diff --git a/articles/introduction.html b/articles/introduction.html
index b5cbecd..ee22819 100644
--- a/articles/introduction.html
+++ b/articles/introduction.html
@@ -270,14 +270,14 @@ 

Preliminary plots
 pbmc_small %>%
-  tidyseurat::ggplot(aes(nFeature_RNA, fill = groups)) +
+  ggplot(aes(nFeature_RNA, fill = groups)) +
   geom_histogram() +
   my_theme

Here we plot total features per cell.

 pbmc_small %>%
-  tidyseurat::ggplot(aes(groups, nCount_RNA, fill = groups)) +
+  ggplot(aes(groups, nCount_RNA, fill = groups)) +
   geom_boxplot(outlier.shape = NA) +
   geom_jitter(width = 0.1) +
   my_theme
@@ -286,7 +286,7 @@

Preliminary plots
 pbmc_small %>%
   join_features(features = c("HLA-DRA", "LYZ")) %>%
-  ggplot(aes(groups, .abundance_RNA + 1, fill = groups)) +
+  ggplot(aes(groups, .abundance_RNA + 1, fill = groups)) +
   geom_boxplot(outlier.shape = NA) +
   geom_jitter(aes(size = nCount_RNA), alpha = 0.5, width = 0.2) +
   scale_y_log10() +
@@ -326,8 +326,8 @@ 

Preprocess the datasetIf a tool is not included in the tidyseurat collection, we can use as_tibble to permanently convert tidyseurat into tibble.

 pbmc_small_pca %>%
-  as_tibble() %>%
-  select(contains("PC"), everything()) %>%
+  as_tibble() %>%
+  select(contains("PC"), everything()) %>%
   GGally::ggpairs(columns = 1:5, ggplot2::aes(colour = groups)) +
   my_theme

@@ -365,7 +365,7 @@

Identify clusters
 pbmc_small_cluster %>%
-  tidyseurat::count(groups, seurat_clusters)

+ count(groups, seurat_clusters)
## # A tibble: 8 × 3
 ##   groups seurat_clusters     n
 ##   <chr>  <fct>           <int>
@@ -406,7 +406,7 @@ 

Identify clustersmarkers <- pbmc_small_cluster %>% FindAllMarkers(only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25) %>% - group_by(cluster) %>% + group_by(cluster) %>% top_n(10, avg_log2FC) # Plot heatmap @@ -427,7 +427,7 @@

Reduce dimensions
 pbmc_small_UMAP %>%
-  plot_ly(
+  plot_ly(
     x = ~`UMAP_1`,
     y = ~`UMAP_2`,
     z = ~`UMAP_3`,
@@ -456,21 +456,21 @@ 

Cell type prediction= "single" ) %>% as.data.frame() %>% - as_tibble(rownames = "cell") %>% - select(cell, first.labels)

+ as_tibble(rownames = "cell") %>% + select(cell, first.labels)
 # Join UMAP and cell type info
 pbmc_small_cell_type <-
   pbmc_small_UMAP %>%
-  left_join(cell_type_df, by = "cell")
+  left_join(cell_type_df, by = "cell")
 
 # Reorder columns
 pbmc_small_cell_type %>%
-  tidyseurat::select(cell, first.labels, everything())
+ tidyseurat::select(cell, first.labels, everything())

We can easily summarise the results. For example, we can see how cell type classification overlaps with cluster classification.

 pbmc_small_cell_type %>%
-  count(seurat_clusters, first.labels)
+ count(seurat_clusters, first.labels)

We can easily reshape the data for building information-rich faceted plots.

 pbmc_small_cell_type %>%
@@ -482,7 +482,7 @@ 

Cell type prediction) %>% # UMAP plots for cell type and cluster - ggplot(aes(UMAP_1, UMAP_2, color = label)) + + ggplot(aes(UMAP_1, UMAP_2, color = label)) + geom_point() + facet_wrap(~classifier) + my_theme

@@ -491,11 +491,11 @@

Cell type predictionpbmc_small_cell_type %>% # Add some mitochondrial abundance values - mutate(mitochondrial = rnorm(n())) %>% + mutate(mitochondrial = rnorm(n())) %>% # Plot correlation join_features(features = c("CST3", "LYZ"), shape = "wide") %>% - ggplot(aes(CST3 + 1, LYZ + 1, color = groups, size = mitochondrial)) + + ggplot(aes(CST3 + 1, LYZ + 1, color = groups, size = mitochondrial)) + geom_point() + facet_wrap(~first.labels, scales = "free") + scale_x_log10() + @@ -510,16 +510,16 @@

Nested analyses
 pbmc_small_nested <-
   pbmc_small_cell_type %>%
-  filter(first.labels != "Erythrocytes") %>%
-  mutate(cell_class = if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>%
-  nest(data = -cell_class)
+  filter(first.labels != "Erythrocytes") %>%
+  mutate(cell_class = if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>%
+  nest(data = -cell_class)
 
 pbmc_small_nested

Now we can independently for the lymphoid and myeloid subsets (i) find variable features, (ii) reduce dimensions, and (iii) cluster using both tidyverse and Seurat seamlessly.

 pbmc_small_nested_reanalysed <-
   pbmc_small_nested %>%
-  mutate(data = map(
+  mutate(data = map(
     data, ~ .x %>%
       FindVariableFeatures(verbose = FALSE) %>%
       RunPCA(npcs = 10, verbose = FALSE) %>%
@@ -534,14 +534,14 @@ 

Nested analysespbmc_small_nested_reanalysed %>% # Convert to tibble otherwise Seurat drops reduced dimensions when unifying data sets. - mutate(data = map(data, ~ .x %>% as_tibble())) %>% - unnest(data) %>% + mutate(data = map(data, ~ .x %>% as_tibble())) %>% + unnest(data) %>% # Define unique clusters - unite("cluster", c(cell_class, seurat_clusters), remove = FALSE) %>% + unite("cluster", c(cell_class, seurat_clusters), remove = FALSE) %>% # Plotting - ggplot(aes(UMAP_1, UMAP_2, color = cluster)) + + ggplot(aes(UMAP_1, UMAP_2, color = cluster)) + geom_point() + facet_wrap(~cell_class) + my_theme

diff --git a/articles/introduction_files/figure-html/plot2-1.png b/articles/introduction_files/figure-html/plot2-1.png index 168f2e0..5ffbcfc 100644 Binary files a/articles/introduction_files/figure-html/plot2-1.png and b/articles/introduction_files/figure-html/plot2-1.png differ diff --git a/articles/introduction_files/figure-html/unnamed-chunk-12-1.png b/articles/introduction_files/figure-html/unnamed-chunk-12-1.png index f77796f..29b2fa4 100644 Binary files a/articles/introduction_files/figure-html/unnamed-chunk-12-1.png and b/articles/introduction_files/figure-html/unnamed-chunk-12-1.png differ diff --git a/index.html b/index.html index 75671fc..29a725d 100644 --- a/index.html +++ b/index.html @@ -263,14 +263,14 @@

Preliminary plots
 pbmc_small %>%
-  tidyseurat::ggplot(aes(nFeature_RNA, fill = groups)) +
+  tidyseurat::ggplot(aes(nFeature_RNA, fill = groups)) +
   geom_histogram() +
   my_theme

Here we plot total features per cell.

 pbmc_small %>%
-  tidyseurat::ggplot(aes(groups, nCount_RNA, fill = groups)) +
+  tidyseurat::ggplot(aes(groups, nCount_RNA, fill = groups)) +
   geom_boxplot(outlier.shape = NA) +
   geom_jitter(width = 0.1) +
   my_theme
@@ -279,7 +279,7 @@

Preliminary plots
 pbmc_small %>%
   join_features(features = c("HLA-DRA", "LYZ")) %>%
-  ggplot(aes(groups, .abundance_RNA + 1, fill = groups)) +
+  ggplot(aes(groups, .abundance_RNA + 1, fill = groups)) +
   geom_boxplot(outlier.shape = NA) +
   geom_jitter(aes(size = nCount_RNA), alpha = 0.5, width = 0.2) +
   scale_y_log10() +
@@ -319,8 +319,8 @@ 

Preprocess the datasetIf a tool is not included in the tidyseurat collection, we can use as_tibble to permanently convert tidyseurat into tibble.

 pbmc_small_pca %>%
-  as_tibble() %>%
-  select(contains("PC"), everything()) %>%
+  as_tibble() %>%
+  select(contains("PC"), everything()) %>%
   GGally::ggpairs(columns = 1:5, ggplot2::aes(colour = groups)) +
   my_theme

@@ -358,7 +358,7 @@

Identify clusters
 pbmc_small_cluster %>%
-  tidyseurat::count(groups, seurat_clusters)
+ tidyseurat::count(groups, seurat_clusters)

## # A tibble: 8 × 3
 ##   groups seurat_clusters     n
 ##   <chr>  <fct>           <int>
@@ -396,7 +396,7 @@ 

Identify clustersmarkers <- pbmc_small_cluster %>% FindAllMarkers(only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25) %>% - group_by(cluster) %>% + group_by(cluster) %>% top_n(10, avg_log2FC) # Plot heatmap @@ -417,7 +417,7 @@

Reduce dimensions
 pbmc_small_UMAP %>%
-  plot_ly(
+  plot_ly(
     x = ~`UMAP_1`,
     y = ~`UMAP_2`,
     z = ~`UMAP_3`,
@@ -446,21 +446,21 @@ 

Cell type prediction= "single" ) %>% as.data.frame() %>% - as_tibble(rownames = "cell") %>% - select(cell, first.labels)

+ as_tibble(rownames = "cell") %>% + select(cell, first.labels)
 # Join UMAP and cell type info
 pbmc_small_cell_type <-
   pbmc_small_UMAP %>%
-  left_join(cell_type_df, by = "cell")
+  left_join(cell_type_df, by = "cell")
 
 # Reorder columns
 pbmc_small_cell_type %>%
-  tidyseurat::select(cell, first.labels, everything())
+ tidyseurat::select(cell, first.labels, everything())

We can easily summarise the results. For example, we can see how cell type classification overlaps with cluster classification.

 pbmc_small_cell_type %>%
-  count(seurat_clusters, first.labels)
+ count(seurat_clusters, first.labels)

We can easily reshape the data for building information-rich faceted plots.

 pbmc_small_cell_type %>%
@@ -472,7 +472,7 @@ 

Cell type prediction) %>% # UMAP plots for cell type and cluster - ggplot(aes(UMAP_1, UMAP_2, color = label)) + + ggplot(aes(UMAP_1, UMAP_2, color = label)) + geom_point() + facet_wrap(~classifier) + my_theme

@@ -481,11 +481,11 @@

Cell type predictionpbmc_small_cell_type %>% # Add some mitochondrial abundance values - mutate(mitochondrial = rnorm(n())) %>% + mutate(mitochondrial = rnorm(n())) %>% # Plot correlation join_features(features = c("CST3", "LYZ"), shape = "wide") %>% - ggplot(aes(CST3 + 1, LYZ + 1, color = groups, size = mitochondrial)) + + ggplot(aes(CST3 + 1, LYZ + 1, color = groups, size = mitochondrial)) + geom_point() + facet_wrap(~first.labels, scales = "free") + scale_x_log10() + @@ -500,16 +500,16 @@

Nested analyses
 pbmc_small_nested <-
   pbmc_small_cell_type %>%
-  filter(first.labels != "Erythrocytes") %>%
-  mutate(cell_class = if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>%
-  nest(data = -cell_class)
+  filter(first.labels != "Erythrocytes") %>%
+  mutate(cell_class = if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>%
+  nest(data = -cell_class)
 
 pbmc_small_nested

Now we can independently for the lymphoid and myeloid subsets (i) find variable features, (ii) reduce dimensions, and (iii) cluster using both tidyverse and Seurat seamlessly.

 pbmc_small_nested_reanalysed <-
   pbmc_small_nested %>%
-  mutate(data = map(
+  mutate(data = map(
     data, ~ .x %>%
       FindVariableFeatures(verbose = FALSE) %>%
       RunPCA(npcs = 10, verbose = FALSE) %>%
@@ -524,14 +524,14 @@ 

Nested analysespbmc_small_nested_reanalysed %>% # Convert to tibble otherwise Seurat drops reduced dimensions when unifying data sets. - mutate(data = map(data, ~ .x %>% as_tibble())) %>% - unnest(data) %>% + mutate(data = map(data, ~ .x %>% as_tibble())) %>% + unnest(data) %>% # Define unique clusters - unite("cluster", c(cell_class, seurat_clusters), remove = FALSE) %>% + unite("cluster", c(cell_class, seurat_clusters), remove = FALSE) %>% # Plotting - ggplot(aes(UMAP_1, UMAP_2, color = cluster)) + + ggplot(aes(UMAP_1, UMAP_2, color = cluster)) + geom_point() + facet_wrap(~cell_class) + my_theme

diff --git a/pkgdown.yml b/pkgdown.yml index 6403822..809ba99 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -4,5 +4,5 @@ pkgdown_sha: ~ articles: figures_article: figures_article.html introduction: introduction.html -last_built: 2023-08-25T08:38Z +last_built: 2023-08-25T10:51Z diff --git a/reference/aggregate_cells.html b/reference/aggregate_cells.html index 870596a..7bab498 100644 --- a/reference/aggregate_cells.html +++ b/reference/aggregate_cells.html @@ -77,7 +77,7 @@

Aggregate cells

Arguments

.data
-

A tidySingleCellExperiment object

+

A tidyseurat object

.sample
@@ -110,24 +110,9 @@

Value

Examples

data(pbmc_small)
-pbmc_small |>
-  aggregate_cells(c(groups, letter.idents), assays = "RNA")
+pbmc_small_pseudo_bulk <- pbmc_small |>
+  aggregate_cells(c(groups, letter.idents), assays="RNA")
 #> Joining with `by = join_by(letter.idents, groups)`
-#> # A tibble: 920 × 8
-#>    .feature  .sample   RNA letter.idents groups .aggregated_cells orig.ident   
-#>    <chr>     <chr>   <dbl> <fct>         <chr>              <int> <fct>        
-#>  1 MS4A1     g2___A  35.5  A             g2                    23 SeuratProject
-#>  2 CD79B     g2___A  39.3  A             g2                    23 SeuratProject
-#>  3 CD79A     g2___A  29.5  A             g2                    23 SeuratProject
-#>  4 HLA-DRA   g2___A  73.9  A             g2                    23 SeuratProject
-#>  5 TCL1A     g2___A  29.5  A             g2                    23 SeuratProject
-#>  6 HLA-DQB1  g2___A  37.6  A             g2                    23 SeuratProject
-#>  7 HVCN1     g2___A  22.4  A             g2                    23 SeuratProject
-#>  8 HLA-DMB   g2___A  24.7  A             g2                    23 SeuratProject
-#>  9 LTB       g2___A  91.9  A             g2                    23 SeuratProject
-#> 10 LINC00926 g2___A   9.91 A             g2                    23 SeuratProject
-#> # ℹ 910 more rows
-#> # ℹ 1 more variable: RNA_snn_res.0.8 <fct>
 
 
diff --git a/reference/arrange.html b/reference/arrange.html new file mode 100644 index 0000000..f84257e --- /dev/null +++ b/reference/arrange.html @@ -0,0 +1,190 @@ + +Order rows using column values — arrange • tidyseurat + + +
+
+ + + +
+
+ + +
+

arrange() orders the rows of a data frame by the values of selected +columns.

+

Unlike other dplyr verbs, arrange() largely ignores grouping; you +need to explicitly mention grouping variables (or use .by_group = TRUE) +in order to group by them, and functions of variables are evaluated +once per data frame, not once per group.

+
+ +
+
# S3 method for Seurat
+arrange(.data, ..., .by_group = FALSE)
+
+ +
+

Arguments

+
.data
+

A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See Methods, below, for +more details.

+ + +
...
+

<data-masking> Variables, or +functions of variables. Use desc() to sort a variable in descending +order.

+ + +
.by_group
+

If TRUE, will sort first by grouping variable. Applies to +grouped data frames only.

+ +
+
+

Value

+ + +

An object of the same type as .data. The output has the following +properties:

  • All rows appear in the output, but (usually) in a different place.

  • +
  • Columns are not modified.

  • +
  • Groups are not modified.

  • +
  • Data frame attributes are preserved.

  • +
+
+

Details

+ +
+

Missing values

+ + +

Unlike base sorting with sort(), NA are:

  • always sorted to the end for local data, even when wrapped with desc().

  • +
  • treated differently for remote data, depending on the backend.

  • +
+ +
+
+

Methods

+ + + +

This function is a generic, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour.

+

The following methods are currently available in loaded packages: +dplyr (data.frame), plotly (plotly), tidyseurat (Seurat) +.

+
+
+

See also

+

Other single table verbs: +mutate(), +rename(), +slice(), +summarise()

+
+ +
+

Examples

+
data(pbmc_small)
+pbmc_small |>
+    arrange(nFeature_RNA)
+#> # A Seurat-tibble abstraction: 80 × 15
+#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
+#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
+#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
+#>  1 CATG… SeuratPro…         51           26 0               A             g2    
+#>  2 GGCA… SeuratPro…        172           29 0               A             g1    
+#>  3 AGTC… SeuratPro…        157           29 0               A             g1    
+#>  4 GACG… SeuratPro…        202           30 0               A             g2    
+#>  5 GGAA… SeuratPro…        150           30 0               A             g2    
+#>  6 AGGT… SeuratPro…         62           31 0               A             g2    
+#>  7 CTTC… SeuratPro…         41           32 0               A             g2    
+#>  8 GTAA… SeuratPro…         67           33 0               A             g2    
+#>  9 GTCA… SeuratPro…        210           33 0               A             g2    
+#> 10 TGGT… SeuratPro…         64           36 0               A             g1    
+#> # ℹ 70 more rows
+#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
+#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/reference/as_tibble.html b/reference/as_tibble.html new file mode 100644 index 0000000..1d0f17b --- /dev/null +++ b/reference/as_tibble.html @@ -0,0 +1,221 @@ + +Coerce lists, matrices, and more to data frames — as_tibble • tidyseurat + + +
+
+ + + +
+
+ + +
+

as_tibble() turns an existing object, such as a data frame or +matrix, into a so-called tibble, a data frame with class tbl_df. This is +in contrast with tibble(), which builds a tibble from individual columns. +as_tibble() is to tibble() as base::as.data.frame() is to +base::data.frame().

+

as_tibble() is an S3 generic, with methods for:

as_tibble_row() converts a vector to a tibble with one row. +If the input is a list, all elements must have size one.

+

as_tibble_col() converts a vector to a tibble with one column.

+
+ +
+
# S3 method for Seurat
+as_tibble(
+  x,
+  ...,
+  .name_repair = c("check_unique", "unique", "universal", "minimal"),
+  rownames = NULL
+)
+
+ +
+

Arguments

+
x
+

A data frame, list, matrix, or other object that could reasonably be +coerced to a tibble.

+ + +
...
+

Unused, for extensibility.

+ + +
.name_repair
+

Treatment of problematic column names:

  • "minimal": No name repair or checks, beyond basic existence,

  • +
  • "unique": Make sure names are unique and not empty,

  • +
  • "check_unique": (default value), no name repair, but check they are +unique,

  • +
  • "universal": Make the names unique and syntactic

  • +
  • a function: apply custom name repair (e.g., .name_repair = make.names +for names in the style of base R).

  • +
  • A purrr-style anonymous function, see rlang::as_function()

  • +

This argument is passed on as repair to vctrs::vec_as_names(). +See there for more details on these terms and the strategies used +to enforce them.

+ + +
rownames
+

How to treat existing row names of a data frame or matrix:

  • NULL: remove row names. This is the default.

  • +
  • NA: keep row names.

  • +
  • A string: the name of a new column. Existing rownames are transferred +into this column and the row.names attribute is deleted. +No name repair is applied to the new column name, even if x already contains +a column of that name. +Use as_tibble(rownames_to_column(...)) to safeguard against this case.

  • +

Read more in rownames.

+ +
+
+

Value

+ + +

`tibble`

+
+
+

Row names

+ + + +

The default behavior is to silently remove row names.

+

New code should explicitly convert row names to a new column using the +rownames argument.

+

For existing code that relies on the retention of row names, call +pkgconfig::set_config("tibble::rownames" = NA) in your script or in your +package's .onLoad() function.

+
+
+

Life cycle

+ + + +

Using as_tibble() for vectors is superseded as of version 3.0.0, +prefer the more expressive as_tibble_row() and +as_tibble_col() variants for new code.

+
+
+

See also

+

tibble() constructs a tibble from individual columns. enframe() +converts a named vector to a tibble with a column of names and column of +values. Name repair is implemented using vctrs::vec_as_names().

+
+ +
+

Examples

+
data(pbmc_small)
+pbmc_small |> as_tibble()
+#> # A tibble: 80 × 29
+#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
+#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
+#>  1 ATGC… SeuratPro…         70           47 0               A             g2    
+#>  2 CATG… SeuratPro…         85           52 0               A             g1    
+#>  3 GAAC… SeuratPro…         87           50 1               B             g2    
+#>  4 TGAC… SeuratPro…        127           56 0               A             g2    
+#>  5 AGTC… SeuratPro…        173           53 0               A             g2    
+#>  6 TCTG… SeuratPro…         70           48 0               A             g1    
+#>  7 TGGT… SeuratPro…         64           36 0               A             g1    
+#>  8 GCAG… SeuratPro…         72           45 0               A             g1    
+#>  9 GATA… SeuratPro…         52           36 0               A             g1    
+#> 10 AATG… SeuratPro…        100           41 0               A             g1    
+#> # ℹ 70 more rows
+#> # ℹ 22 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
+#> #   PC_4 <dbl>, PC_5 <dbl>, PC_6 <dbl>, PC_7 <dbl>, PC_8 <dbl>, PC_9 <dbl>,
+#> #   PC_10 <dbl>, PC_11 <dbl>, PC_12 <dbl>, PC_13 <dbl>, PC_14 <dbl>,
+#> #   PC_15 <dbl>, PC_16 <dbl>, PC_17 <dbl>, PC_18 <dbl>, PC_19 <dbl>,
+#> #   tSNE_1 <dbl>, tSNE_2 <dbl>
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/reference/bind_rows.html b/reference/bind_rows.html index bd116e8..582b308 100644 --- a/reference/bind_rows.html +++ b/reference/bind_rows.html @@ -1,5 +1,8 @@ -Efficiently bind multiple data frames by row and column — bind_rows • tidyseurat#' Efficiently bind multiple data frames by row and column — bind_rows • tidyseuratExample data set 2 — cell_type_df • tidyseuratCell types of 80 PBMC single cells — cell_type_df • tidyseurat @@ -53,22 +53,40 @@
-

Example data set 2

+

A dataset containing the barcodes and cell types of 80 PBMC single cells.

-
cell_type_df
+
data(cell_type_df)

Format

-

An object of class tbl_df (inherits from tbl, data.frame) with 80 rows and 2 columns.

+

A tibble containing 80 rows and 2 columns. + Cells are a subsample of the Peripheral Blood Mononuclear Cells (PBMC) + dataset of 2,700 single cell. Cell types were identified with SingleR.

cell
+

cell identifier, barcode

+ +
first.labels
+

cell type

+ + +
+ +
+

Value

+ + +

`tibble`

diff --git a/reference/count.html b/reference/count.html new file mode 100644 index 0000000..39ddb14 --- /dev/null +++ b/reference/count.html @@ -0,0 +1,188 @@ + +Count the observations in each group — count • tidyseurat + + +
+
+ + + +
+
+ + +
+

count() lets you quickly count the unique values of one or more variables: +df %>% count(a, b) is roughly equivalent to +df %>% group_by(a, b) %>% summarise(n = n()). +count() is paired with tally(), a lower-level helper that is equivalent +to df %>% summarise(n = n()). Supply wt to perform weighted counts, +switching the summary from n = n() to n = sum(wt).

+

add_count() and add_tally() are equivalents to count() and tally() +but use mutate() instead of summarise() so that they add a new column +with group-wise counts.

+
+ +
+
# S3 method for Seurat
+count(
+  x,
+  ...,
+  wt = NULL,
+  sort = FALSE,
+  name = NULL,
+  .drop = group_by_drop_default(x)
+)
+
+# S3 method for Seurat
+add_count(
+  x,
+  ...,
+  wt = NULL,
+  sort = FALSE,
+  name = NULL,
+  .drop = group_by_drop_default(x)
+)
+
+ +
+

Arguments

+
x
+

A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr).

+ + +
...
+

<data-masking> Variables to group +by.

+ + +
wt
+

<data-masking> Frequency weights. +Can be NULL or a variable:

  • If NULL (the default), counts the number of rows in each group.

  • +
  • If a variable, computes sum(wt) for each group.

  • +
+ + +
sort
+

If TRUE, will show the largest groups at the top.

+ + +
name
+

The name of the new column in the output.

+

If omitted, it will default to n. If there's already a column called n, +it will use nn. If there's a column called n and nn, it'll use +nnn, and so on, adding ns until it gets a new name.

+ + +
.drop
+

Handling of factor levels that don't appear in the data, passed +on to group_by().

+

For count(): if FALSE will include counts for empty groups (i.e. for +levels of factors that don't exist in the data).

+

[Deprecated] For add_count(): deprecated since it +can't actually affect the output.

+ +
+
+

Value

+ + +

An object of the same type as .data. count() and add_count()

+ + +

group transiently, so the output has the same groups as the input.

+
+ +
+

Examples

+
data(pbmc_small)
+pbmc_small |> count(groups)
+#> tidyseurat says: A data frame is returned for independent data analysis.
+#> # A tibble: 2 × 2
+#>   groups     n
+#>   <chr>  <int>
+#> 1 g1        44
+#> 2 g2        36
+    
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/reference/distinct.html b/reference/distinct.html new file mode 100644 index 0000000..13e6c5a --- /dev/null +++ b/reference/distinct.html @@ -0,0 +1,153 @@ + +Keep distinct/unique rows — distinct • tidyseurat + + +
+
+ + + +
+
+ + +
+

Keep only unique/distinct rows from a data frame. This is similar +to unique.data.frame() but considerably faster.

+
+ +
+
# S3 method for Seurat
+distinct(.data, ..., .keep_all = FALSE)
+
+ +
+

Arguments

+
.data
+

A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See Methods, below, for +more details.

+ + +
...
+

<data-masking> Optional variables to +use when determining uniqueness. If there are multiple rows for a given +combination of inputs, only the first row will be preserved. If omitted, +will use all variables in the data frame.

+ + +
.keep_all
+

If TRUE, keep all variables in .data. +If a combination of ... is not distinct, this keeps the +first row of values.

+ +
+
+

Value

+ + +

An object of the same type as .data. The output has the following +properties:

  • Rows are a subset of the input but appear in the same order.

  • +
  • Columns are not modified if ... is empty or .keep_all is TRUE. +Otherwise, distinct() first calls mutate() to create new columns.

  • +
  • Groups are not modified.

  • +
  • Data frame attributes are preserved.

  • +
+
+

Methods

+ + + +

This function is a generic, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour.

+

The following methods are currently available in loaded packages: +dplyr (data.frame), plotly (plotly), tidyseurat (Seurat) +.

+
+ +
+

Examples

+
data("pbmc_small")
+pbmc_small |> distinct(groups)
+#> tidyseurat says: A data frame is returned for independent data analysis.
+#> # A tibble: 2 × 1
+#>   groups
+#>   <chr> 
+#> 1 g2    
+#> 2 g1    
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/reference/dplyr-methods.html b/reference/dplyr-methods.html deleted file mode 100644 index 3a50463..0000000 --- a/reference/dplyr-methods.html +++ /dev/null @@ -1,1000 +0,0 @@ - -Arrange rows by column values — arrange • tidyseurat - - -
-
- - - -
-
- - -
-

`arrange()` order the rows of a data frame rows by the values of selected -columns.

-

Unlike other dplyr verbs, `arrange()` largely ignores grouping; you -need to explicit mention grouping variables (or use `by_group = TRUE`) -in order to group by them, and functions of variables are evaluated -once per data frame, not once per group.

-

`filter()` retains the rows where the conditions you provide a `TRUE`. Note -that, unlike base subsetting with `[`, rows where the condition evaluates -to `NA` are dropped.

-

Most data operations are done on groups defined by variables. -`group_by()` takes an existing tbl and converts it into a grouped tbl -where operations are performed "by group". `ungroup()` removes grouping.

-

`summarise()` creates a new data frame. It will have one (or more) rows for -each combination of grouping variables; if there are no grouping variables, -the output will have a single row summarising all observations in the input. -It will contain one column for each grouping variable and one column -for each of the summary statistics that you have specified.

-

`summarise()` and `summarize()` are synonyms.

-

`mutate()` adds new variables and preserves existing ones; -`transmute()` adds new variables and drops existing ones. -New variables overwrite existing variables of the same name. -Variables can be removed by setting their value to `NULL`.

-

Rename individual variables using `new_name = old_name` syntax.

-

`rowwise()` is used for the results of [do()] when you -create list-variables. It is also useful to support arbitrary -complex operations that need to be applied to each row.

-

`slice()` lets you index rows by their (integer) locations. It allows you -to select, remove, and duplicate rows. It is accompanied by a number of -helpers for common use cases:

-

Select (and optionally rename) variables in a data frame, using a concise -mini-language that makes it easy to refer to variables based on their name -(e.g. `a:f` selects all columns from `a` on the left to `f` on the -right). You can also use predicate functions like is.numeric to select -variables based on their properties.

-

Sample n rows from a table

-

`count()` lets you quickly count the unique values of one or more variables: -`df -`df -`count()` is paired with `tally()`, a lower-level helper that is equivalent -to `df -switching the summary from `n=n()` to `n=sum(wt)`.

-

`add_count()` are `add_tally()` are equivalents to `count()` and `tally()` -but use `mutate()` instead of `summarise()` so that they add a new column -with group-wise counts.

-

`pull()` is similar to `$`. It's mostly useful because it looks a little -nicer in pipes, it also works with remote data frames, and it can optionally -name the output.

-
- - -
-

Arguments

-
.by_group
-

If TRUE, will sort first by grouping variable. Applies to grouped data frames only.

- - -
.keep_all
-

If TRUE, keep all variables in .data. If a combination of ... is not distinct, this keeps the first row of values. (See dplyr)

- - -
.preserve
-

when `FALSE` (the default), the grouping structure -is recalculated based on the resulting data, otherwise it is kept as is.

- - -
.add
-

When `FALSE`, the default, `group_by()` will - override existing groups. To add to the existing groups, use - `.add = TRUE`.

-

This argument was previously called `add`, but that prevented - creating a new grouping variable called `add`, and conflicts with - our naming conventions.

- - -
.data
-

Input data frame.

- - -
y
-

tbls to join. (See dplyr)

- - -
by
-

A character vector of variables to join by. (See dplyr)

- - -
copy
-

If x and y are not from the same data source, and copy is TRUE, then y will be copied into the same src as x. (See dplyr)

- - -
suffix
-

If there are non-joined duplicate variables in x and y, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2. (See dplyr)

- - -
weight_by
-

<[`data-masking`][dplyr_data_masking]> Sampling weights. -This must evaluate to a vector of non-negative numbers the same length as -the input. Weights are automatically standardised to sum to 1.

- - -
tbl
-

A data.frame.

- - -
size
-

<[`tidy-select`][dplyr_select]> -For `sample_n()`, the number of rows to select. -For `sample_frac()`, the fraction of rows to select. -If `tbl` is grouped, `size` applies to each group.

- - -
replace
-

Sample with or without replacement?

- - -
weight
-

<[`tidy-select`][dplyr_select]> Sampling weights. -This must evaluate to a vector of non-negative numbers the same length as -the input. Weights are automatically standardised to sum to 1.

- - -
.env
-

DEPRECATED.

- - -
x
-

A data frame, data frame extension (e.g. a tibble), or a -lazy data frame (e.g. from dbplyr or dtplyr).

- - -
wt
-

<[`data-masking`][dplyr_data_masking]> Frequency weights. - Can be `NULL` or a variable:

-

* If `NULL` (the default), counts the number of rows in each group. - * If a variable, computes `sum(wt)` for each group.

- - -
sort
-

If `TRUE`, will show the largest groups at the top.

- - -
.drop
-

For `count()`: if `FALSE` will include counts for empty groups -(i.e. for levels of factors that don't exist in the data). Deprecated in -`add_count()` since it didn't actually affect the output.

- - -
name
-

An optional parameter that specifies the column to be used -as names for a named vector. Specified in a similar manner as var.

- - -
...
-

For use by methods.

- -
-
-

Value

- - -

An object of the same type as `.data`.

- - -

* All rows appear in the output, but (usually) in a different place. -* Columns are not modified. -* Groups are not modified. -* Data frame attributes are preserved.

- - -

A Seurat object

- - -

An object of the same type as `.data`.

- - -

* Rows are a subset of the input, but appear in the same order. -* Columns are not modified. -* The number of groups may be reduced (if `.preserve` is not `TRUE`). -* Data frame attributes are preserved.

- - -

A grouped data frame, unless the combination of `...` and `add` - yields a non empty set of grouping columns, a regular (ungrouped) data frame - otherwise.

- - -

A tibble

- - -

An object of the same type as `.data`.

- - -

For `mutate()`:

- - -

* Rows are not affected. -* Existing columns will be preserved unless explicitly modified. -* New columns will be added to the right of existing columns. -* Columns given value `NULL` will be removed -* Groups will be recomputed if a grouping variable is mutated. -* Data frame attributes are preserved.

- - -

For `transmute()`:

- - -

* Rows are not affected. -* Apart from grouping variables, existing columns will be remove unless - explicitly kept. -* Column order matches order of expressions. -* Groups will be recomputed if a grouping variable is mutated. -* Data frame attributes are preserved.

- - -

An object of the same type as `.data`. -* Rows are not affected. -* Column names are changed; column order is preserved -* Data frame attributes are preserved. -* Groups are updated to reflect new names.

- - -

A `tbl`

- - -

A `tbl`

- - -

A Seurat object

- - -

A Seurat object

- - -

A Seurat object

- - -

A Seurat object

- - -

An object of the same type as `.data`. The output has the following -properties:

- - -

* Each row may appear 0, 1, or many times in the output. -* Columns are not modified. -* Groups are not modified. -* Data frame attributes are preserved.

- - -

An object of the same type as `.data`. The output has the following -properties:

- - -

* Rows are not affected. -* Output columns are a subset of input columns, potentially with a different - order. Columns will be renamed if `new_name = old_name` form is used. -* Data frame attributes are preserved. -* Groups are maintained; you can't select off grouping variables.

- - -

A Seurat object

- - -

An object of the same type as `.data`. `count()` and `add_count()` -group transiently, so the output has the same groups as the input.

- - -

A vector the same size as `.data`.

-
-
-

Details

-

## Locales -The sort order for character vectors will depend on the collating sequence -of the locale in use: see locales().

-

## Missing values -Unlike base sorting with `sort()`, `NA` are: -* always sorted to the end for local data, even when wrapped with `desc()`. -* treated differently for remote data, depending on the backend.

-

dplyr is not yet smart enough to optimise filtering optimisation -on grouped datasets that don't need grouped calculations. For this reason, -filtering is often considerably faster on ungroup()ed data.

-

Slice does not work with relational databases because they have no -intrinsic notion of row order. If you want to perform the equivalent -operation, use [filter()] and [row_number()].

-
-
-

Methods

- - -

This function is a **generic**, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour.

- -

These function are **generic**s, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour.

-

Methods available in currently loaded packages:

- -

These function are **generic**s, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour.

-

Methods available in currently loaded packages:

- -

This function is a **generic**, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour.

-

The following methods are currently available in loaded packages:

-
-
-

Grouped tibbles

- - - -

Because mutating expressions are computed within groups, they may -yield different results on grouped tibbles. This will be the case -as soon as an aggregating, lagging, or ranking function is -involved. Compare this ungrouped mutate:

-

With the grouped equivalent:

-

The former normalises `mass` by the global average whereas the -latter normalises by the averages within gender levels.

-
- -
-

Examples

-
`%>%` = magrittr::`%>%`
-pbmc_small %>%  arrange(nFeature_RNA)
-#> # A Seurat-tibble abstraction: 80 × 15
-#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
-#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#>  1 CATG… SeuratPro…         51           26 0               A             g2    
-#>  2 GGCA… SeuratPro…        172           29 0               A             g1    
-#>  3 AGTC… SeuratPro…        157           29 0               A             g1    
-#>  4 GACG… SeuratPro…        202           30 0               A             g2    
-#>  5 GGAA… SeuratPro…        150           30 0               A             g2    
-#>  6 AGGT… SeuratPro…         62           31 0               A             g2    
-#>  7 CTTC… SeuratPro…         41           32 0               A             g2    
-#>  8 GTAA… SeuratPro…         67           33 0               A             g2    
-#>  9 GTCA… SeuratPro…        210           33 0               A             g2    
-#> 10 TGGT… SeuratPro…         64           36 0               A             g1    
-#> # ℹ 70 more rows
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>%  distinct(groups)
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> # A tibble: 2 × 1
-#>   groups
-#>   <chr> 
-#> 1 g2    
-#> 2 g1    
-
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>%  filter(groups == "g1")
-#> # A Seurat-tibble abstraction: 44 × 15
-#> # Features=230 | Cells=44 | Active assay=RNA | Assays=RNA
-#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#>  1 CATG… SeuratPro…         85           52 0               A             g1    
-#>  2 TCTG… SeuratPro…         70           48 0               A             g1    
-#>  3 TGGT… SeuratPro…         64           36 0               A             g1    
-#>  4 GCAG… SeuratPro…         72           45 0               A             g1    
-#>  5 GATA… SeuratPro…         52           36 0               A             g1    
-#>  6 AATG… SeuratPro…        100           41 0               A             g1    
-#>  7 AGAG… SeuratPro…        191           61 0               A             g1    
-#>  8 CTAA… SeuratPro…        168           44 0               A             g1    
-#>  9 TTGG… SeuratPro…        135           45 0               A             g1    
-#> 10 CATC… SeuratPro…         79           43 0               A             g1    
-#> # ℹ 34 more rows
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# Learn more in ?dplyr_eval
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>%  group_by(groups)
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> # A tibble: 80 × 29
-#> # Groups:   groups [2]
-#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#>  1 ATGC… SeuratPro…         70           47 0               A             g2    
-#>  2 CATG… SeuratPro…         85           52 0               A             g1    
-#>  3 GAAC… SeuratPro…         87           50 1               B             g2    
-#>  4 TGAC… SeuratPro…        127           56 0               A             g2    
-#>  5 AGTC… SeuratPro…        173           53 0               A             g2    
-#>  6 TCTG… SeuratPro…         70           48 0               A             g1    
-#>  7 TGGT… SeuratPro…         64           36 0               A             g1    
-#>  8 GCAG… SeuratPro…         72           45 0               A             g1    
-#>  9 GATA… SeuratPro…         52           36 0               A             g1    
-#> 10 AATG… SeuratPro…        100           41 0               A             g1    
-#> # ℹ 70 more rows
-#> # ℹ 22 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, PC_6 <dbl>, PC_7 <dbl>, PC_8 <dbl>, PC_9 <dbl>,
-#> #   PC_10 <dbl>, PC_11 <dbl>, PC_12 <dbl>, PC_13 <dbl>, PC_14 <dbl>,
-#> #   PC_15 <dbl>, PC_16 <dbl>, PC_17 <dbl>, PC_18 <dbl>, PC_19 <dbl>,
-#> #   tSNE_1 <dbl>, tSNE_2 <dbl>
-
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>%  summarise(mean(nCount_RNA))
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> # A tibble: 1 × 1
-#>   `mean(nCount_RNA)`
-#>                <dbl>
-#> 1               245.
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>%  mutate(nFeature_RNA = 1)
-#> # A Seurat-tibble abstraction: 80 × 15
-#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
-#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>    <chr> <fct>           <dbl>        <dbl> <fct>           <fct>         <chr> 
-#>  1 ATGC… SeuratPro…         70            1 0               A             g2    
-#>  2 CATG… SeuratPro…         85            1 0               A             g1    
-#>  3 GAAC… SeuratPro…         87            1 1               B             g2    
-#>  4 TGAC… SeuratPro…        127            1 0               A             g2    
-#>  5 AGTC… SeuratPro…        173            1 0               A             g2    
-#>  6 TCTG… SeuratPro…         70            1 0               A             g1    
-#>  7 TGGT… SeuratPro…         64            1 0               A             g1    
-#>  8 GCAG… SeuratPro…         72            1 0               A             g1    
-#>  9 GATA… SeuratPro…         52            1 0               A             g1    
-#> 10 AATG… SeuratPro…        100            1 0               A             g1    
-#> # ℹ 70 more rows
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>%  rename(s_score = nFeature_RNA)
-#> # A Seurat-tibble abstraction: 80 × 15
-#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
-#>    .cell      orig.ident nCount_RNA s_score RNA_snn_res.0.8 letter.idents groups
-#>    <chr>      <fct>           <dbl>   <int> <fct>           <fct>         <chr> 
-#>  1 ATGCCAGAA… SeuratPro…         70      47 0               A             g2    
-#>  2 CATGGCCTG… SeuratPro…         85      52 0               A             g1    
-#>  3 GAACCTGAT… SeuratPro…         87      50 1               B             g2    
-#>  4 TGACTGGAT… SeuratPro…        127      56 0               A             g2    
-#>  5 AGTCAGACT… SeuratPro…        173      53 0               A             g2    
-#>  6 TCTGATACA… SeuratPro…         70      48 0               A             g1    
-#>  7 TGGTATCTA… SeuratPro…         64      36 0               A             g1    
-#>  8 GCAGCTCTG… SeuratPro…         72      45 0               A             g1    
-#>  9 GATATAACA… SeuratPro…         52      36 0               A             g1    
-#> 10 AATGTTGAC… SeuratPro…        100      41 0               A             g1    
-#> # ℹ 70 more rows
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-`%>%` = magrittr::`%>%`
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>% left_join(pbmc_small %>% distinct(groups) %>% mutate(new_column = 1:2))
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> Joining with `by = join_by(groups)`
-#> # A Seurat-tibble abstraction: 80 × 16
-#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
-#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#>  1 ATGC… SeuratPro…         70           47 0               A             g2    
-#>  2 CATG… SeuratPro…         85           52 0               A             g1    
-#>  3 GAAC… SeuratPro…         87           50 1               B             g2    
-#>  4 TGAC… SeuratPro…        127           56 0               A             g2    
-#>  5 AGTC… SeuratPro…        173           53 0               A             g2    
-#>  6 TCTG… SeuratPro…         70           48 0               A             g1    
-#>  7 TGGT… SeuratPro…         64           36 0               A             g1    
-#>  8 GCAG… SeuratPro…         72           45 0               A             g1    
-#>  9 GATA… SeuratPro…         52           36 0               A             g1    
-#> 10 AATG… SeuratPro…        100           41 0               A             g1    
-#> # ℹ 70 more rows
-#> # ℹ 9 more variables: RNA_snn_res.1 <fct>, new_column <int>, PC_1 <dbl>,
-#> #   PC_2 <dbl>, PC_3 <dbl>, PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-`%>%` = magrittr::`%>%`
-
-data("pbmc_small")
-pbmc_small %>%
-  inner_join(
-    pbmc_small %>% distinct(groups) %>% mutate(new_column = 1:2) %>% slice(1))
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> Joining with `by = join_by(groups)`
-#> # A Seurat-tibble abstraction: 36 × 16
-#> # Features=230 | Cells=36 | Active assay=RNA | Assays=RNA
-#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#>  1 ATGC… SeuratPro…         70           47 0               A             g2    
-#>  2 GAAC… SeuratPro…         87           50 1               B             g2    
-#>  3 TGAC… SeuratPro…        127           56 0               A             g2    
-#>  4 AGTC… SeuratPro…        173           53 0               A             g2    
-#>  5 AGGT… SeuratPro…         62           31 0               A             g2    
-#>  6 GGGT… SeuratPro…        101           41 0               A             g2    
-#>  7 CATG… SeuratPro…         51           26 0               A             g2    
-#>  8 TACG… SeuratPro…         99           45 0               A             g2    
-#>  9 GTAA… SeuratPro…         67           33 0               A             g2    
-#> 10 TACA… SeuratPro…        109           41 0               A             g2    
-#> # ℹ 26 more rows
-#> # ℹ 9 more variables: RNA_snn_res.1 <fct>, new_column <int>, PC_1 <dbl>,
-#> #   PC_2 <dbl>, PC_3 <dbl>, PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-`%>%` = magrittr::`%>%`
-
-data("pbmc_small")
-pbmc_small %>% right_join(pbmc_small %>% distinct(groups) %>% mutate(new_column = 1:2) %>% slice(1))
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> Joining with `by = join_by(groups)`
-#> # A Seurat-tibble abstraction: 36 × 16
-#> # Features=230 | Cells=36 | Active assay=RNA | Assays=RNA
-#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#>  1 ATGC… SeuratPro…         70           47 0               A             g2    
-#>  2 GAAC… SeuratPro…         87           50 1               B             g2    
-#>  3 TGAC… SeuratPro…        127           56 0               A             g2    
-#>  4 AGTC… SeuratPro…        173           53 0               A             g2    
-#>  5 AGGT… SeuratPro…         62           31 0               A             g2    
-#>  6 GGGT… SeuratPro…        101           41 0               A             g2    
-#>  7 CATG… SeuratPro…         51           26 0               A             g2    
-#>  8 TACG… SeuratPro…         99           45 0               A             g2    
-#>  9 GTAA… SeuratPro…         67           33 0               A             g2    
-#> 10 TACA… SeuratPro…        109           41 0               A             g2    
-#> # ℹ 26 more rows
-#> # ℹ 9 more variables: RNA_snn_res.1 <fct>, new_column <int>, PC_1 <dbl>,
-#> #   PC_2 <dbl>, PC_3 <dbl>, PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>% full_join(tibble::tibble(groups = "g1", other=1:4))
-#> Joining with `by = join_by(groups)`
-#> tidyseurat says: This operation lead to duplicated cell names. A data frame is returned for independent data analysis.
-#> # A tibble: 212 × 30
-#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#>  1 ATGC… SeuratPro…         70           47 0               A             g2    
-#>  2 CATG… SeuratPro…         85           52 0               A             g1    
-#>  3 CATG… SeuratPro…         85           52 0               A             g1    
-#>  4 CATG… SeuratPro…         85           52 0               A             g1    
-#>  5 CATG… SeuratPro…         85           52 0               A             g1    
-#>  6 GAAC… SeuratPro…         87           50 1               B             g2    
-#>  7 TGAC… SeuratPro…        127           56 0               A             g2    
-#>  8 AGTC… SeuratPro…        173           53 0               A             g2    
-#>  9 TCTG… SeuratPro…         70           48 0               A             g1    
-#> 10 TCTG… SeuratPro…         70           48 0               A             g1    
-#> # ℹ 202 more rows
-#> # ℹ 23 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, PC_6 <dbl>, PC_7 <dbl>, PC_8 <dbl>, PC_9 <dbl>,
-#> #   PC_10 <dbl>, PC_11 <dbl>, PC_12 <dbl>, PC_13 <dbl>, PC_14 <dbl>,
-#> #   PC_15 <dbl>, PC_16 <dbl>, PC_17 <dbl>, PC_18 <dbl>, PC_19 <dbl>,
-#> #   tSNE_1 <dbl>, tSNE_2 <dbl>, other <int>
-
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>%  slice(1)
-#> # A Seurat-tibble abstraction: 1 × 15
-#> # Features=230 | Cells=1 | Active assay=RNA | Assays=RNA
-#>   .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>   <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#> 1 ATGCC… SeuratPro…         70           47 0               A             g2    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# Slice group-wise using .by
-pbmc_small |> slice(1:2, .by = groups)
-#> # A Seurat-tibble abstraction: 4 × 15
-#> # Features=230 | Cells=4 | Active assay=RNA | Assays=RNA
-#>   .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>   <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#> 1 ATGCC… SeuratPro…         70           47 0               A             g2    
-#> 2 GAACC… SeuratPro…         87           50 1               B             g2    
-#> 3 CATGG… SeuratPro…         85           52 0               A             g1    
-#> 4 TCTGA… SeuratPro…         70           48 0               A             g1    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# slice_sample() allows you to random select with or without replacement
-pbmc_small |> slice_sample(n = 5)
-#> # A Seurat-tibble abstraction: 5 × 15
-#> # Features=230 | Cells=5 | Active assay=RNA | Assays=RNA
-#>   .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>   <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#> 1 GATAG… SeuratPro…        328           72 1               B             g1    
-#> 2 GGCAT… SeuratPro…        126           53 0               A             g1    
-#> 3 ATGCC… SeuratPro…         70           47 0               A             g2    
-#> 4 AGATA… SeuratPro…        187           61 0               A             g2    
-#> 5 TACAA… SeuratPro…        108           44 0               A             g2    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# if using replacement, and duplicate cells are returned, a tibble will be
-# returned because duplicate cells cannot exist in Seurat objects
-pbmc_small |> slice_sample(n = 1, replace = TRUE) # returns Seurat
-#> # A Seurat-tibble abstraction: 1 × 15
-#> # Features=230 | Cells=1 | Active assay=RNA | Assays=RNA
-#>   .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>   <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#> 1 GATAG… SeuratPro…        328           72 1               B             g1    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-pbmc_small |> slice_sample(n = 100, replace = TRUE) # returns tibble
-#> tidyseurat says: When sampling with replacement a data frame is returned for independent data analysis.
-#> # A tibble: 100 × 29
-#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#>  1 ATGC… SeuratPro…         70           47 0               A             g2    
-#>  2 ATGC… SeuratPro…         70           47 0               A             g2    
-#>  3 ATGC… SeuratPro…         70           47 0               A             g2    
-#>  4 CATG… SeuratPro…         85           52 0               A             g1    
-#>  5 TCTG… SeuratPro…         70           48 0               A             g1    
-#>  6 TGGT… SeuratPro…         64           36 0               A             g1    
-#>  7 AATG… SeuratPro…        100           41 0               A             g1    
-#>  8 GGGT… SeuratPro…        101           41 0               A             g2    
-#>  9 GGGT… SeuratPro…        101           41 0               A             g2    
-#> 10 CATG… SeuratPro…         51           26 0               A             g2    
-#> # ℹ 90 more rows
-#> # ℹ 22 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, PC_6 <dbl>, PC_7 <dbl>, PC_8 <dbl>, PC_9 <dbl>,
-#> #   PC_10 <dbl>, PC_11 <dbl>, PC_12 <dbl>, PC_13 <dbl>, PC_14 <dbl>,
-#> #   PC_15 <dbl>, PC_16 <dbl>, PC_17 <dbl>, PC_18 <dbl>, PC_19 <dbl>,
-#> #   tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# weight by a variable
-pbmc_small |> slice_sample(n = 5, weight_by = nCount_RNA)
-#> # A Seurat-tibble abstraction: 5 × 15
-#> # Features=230 | Cells=5 | Active assay=RNA | Assays=RNA
-#>   .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>   <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#> 1 CTGCC… SeuratPro…        146           47 0               A             g1    
-#> 2 ACCAG… SeuratPro…        417           75 0               A             g1    
-#> 3 AAGCG… SeuratPro…        443           77 1               B             g1    
-#> 4 GGCAT… SeuratPro…        126           53 0               A             g1    
-#> 5 ACTCG… SeuratPro…        231           49 1               B             g2    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# sample by group
-pbmc_small |> slice_sample(n = 5, by = groups)
-#> # A Seurat-tibble abstraction: 10 × 15
-#> # Features=230 | Cells=10 | Active assay=RNA | Assays=RNA
-#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#>  1 AGTC… SeuratPro…        173           53 0               A             g2    
-#>  2 ATGC… SeuratPro…         70           47 0               A             g2    
-#>  3 GTCA… SeuratPro…        210           33 0               A             g2    
-#>  4 TTGC… SeuratPro…        104           40 0               A             g2    
-#>  5 GCGC… SeuratPro…        213           48 1               B             g2    
-#>  6 CATC… SeuratPro…        353           80 1               B             g1    
-#>  7 TACT… SeuratPro…        156           48 0               A             g1    
-#>  8 ATAC… SeuratPro…        612           69 1               B             g1    
-#>  9 GGCA… SeuratPro…        126           53 0               A             g1    
-#> 10 TTAC… SeuratPro…        228           39 0               A             g1    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# sample using proportions
-pbmc_small |> slice_sample(prop = 0.10)
-#> # A Seurat-tibble abstraction: 8 × 15
-#> # Features=230 | Cells=8 | Active assay=RNA | Assays=RNA
-#>   .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>   <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#> 1 GATAG… SeuratPro…        328           72 1               B             g1    
-#> 2 GGCAT… SeuratPro…        126           53 0               A             g1    
-#> 3 ATGCC… SeuratPro…         70           47 0               A             g2    
-#> 4 AGATA… SeuratPro…        187           61 0               A             g2    
-#> 5 TACAA… SeuratPro…        108           44 0               A             g2    
-#> 6 CATGA… SeuratPro…         51           26 0               A             g2    
-#> 7 GCACT… SeuratPro…        292           71 1               B             g2    
-#> 8 CGTAG… SeuratPro…        371           75 1               B             g1    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-
-# First rows based on existing order
-pbmc_small |> slice_head(n = 5)
-#> # A Seurat-tibble abstraction: 5 × 15
-#> # Features=230 | Cells=5 | Active assay=RNA | Assays=RNA
-#>   .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>   <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#> 1 ATGCC… SeuratPro…         70           47 0               A             g2    
-#> 2 CATGG… SeuratPro…         85           52 0               A             g1    
-#> 3 GAACC… SeuratPro…         87           50 1               B             g2    
-#> 4 TGACT… SeuratPro…        127           56 0               A             g2    
-#> 5 AGTCA… SeuratPro…        173           53 0               A             g2    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# Last rows based on existing order
-pbmc_small |> slice_tail(n = 5)
-#> # A Seurat-tibble abstraction: 5 × 15
-#> # Features=230 | Cells=5 | Active assay=RNA | Assays=RNA
-#>   .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>   <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#> 1 GAGTT… SeuratPro…        527           47 0               A             g1    
-#> 2 GACGC… SeuratPro…        202           30 0               A             g2    
-#> 3 AGTCT… SeuratPro…        157           29 0               A             g1    
-#> 4 GGAAC… SeuratPro…        150           30 0               A             g2    
-#> 5 CTTGA… SeuratPro…        233           76 1               B             g1    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# Rows with minimum and maximum values of a metadata variable
-pbmc_small |> slice_min(nFeature_RNA, n = 5)
-#> # A Seurat-tibble abstraction: 5 × 15
-#> # Features=230 | Cells=5 | Active assay=RNA | Assays=RNA
-#>   .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>   <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#> 1 CATGA… SeuratPro…         51           26 0               A             g2    
-#> 2 GGCAT… SeuratPro…        172           29 0               A             g1    
-#> 3 AGTCT… SeuratPro…        157           29 0               A             g1    
-#> 4 GACGC… SeuratPro…        202           30 0               A             g2    
-#> 5 GGAAC… SeuratPro…        150           30 0               A             g2    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# slice_min() and slice_max() may return more rows than requested
-# in the presence of ties.
-pbmc_small |>  slice_min(nFeature_RNA, n = 2)
-#> # A Seurat-tibble abstraction: 3 × 15
-#> # Features=230 | Cells=3 | Active assay=RNA | Assays=RNA
-#>   .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>   <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#> 1 CATGA… SeuratPro…         51           26 0               A             g2    
-#> 2 GGCAT… SeuratPro…        172           29 0               A             g1    
-#> 3 AGTCT… SeuratPro…        157           29 0               A             g1    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# Use with_ties = FALSE to return exactly n matches
-pbmc_small |> slice_min(nFeature_RNA, n = 2, with_ties = FALSE)
-#> # A Seurat-tibble abstraction: 2 × 15
-#> # Features=230 | Cells=2 | Active assay=RNA | Assays=RNA
-#>   .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>   <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#> 1 CATGA… SeuratPro…         51           26 0               A             g2    
-#> 2 GGCAT… SeuratPro…        172           29 0               A             g1    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# Or use additional variables to break the tie:
-pbmc_small |> slice_min(tibble::tibble(nFeature_RNA, nCount_RNA), n = 2)
-#> # A Seurat-tibble abstraction: 2 × 15
-#> # Features=230 | Cells=2 | Active assay=RNA | Assays=RNA
-#>   .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>   <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#> 1 CATGA… SeuratPro…         51           26 0               A             g2    
-#> 2 AGTCT… SeuratPro…        157           29 0               A             g1    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# Use by for group-wise operations
-pbmc_small |> slice_min(nFeature_RNA, n = 5, by = groups)
-#> # A Seurat-tibble abstraction: 10 × 15
-#> # Features=230 | Cells=10 | Active assay=RNA | Assays=RNA
-#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#>  1 CATG… SeuratPro…         51           26 0               A             g2    
-#>  2 GACG… SeuratPro…        202           30 0               A             g2    
-#>  3 GGAA… SeuratPro…        150           30 0               A             g2    
-#>  4 AGGT… SeuratPro…         62           31 0               A             g2    
-#>  5 CTTC… SeuratPro…         41           32 0               A             g2    
-#>  6 GGCA… SeuratPro…        172           29 0               A             g1    
-#>  7 AGTC… SeuratPro…        157           29 0               A             g1    
-#>  8 TGGT… SeuratPro…         64           36 0               A             g1    
-#>  9 GATA… SeuratPro…         52           36 0               A             g1    
-#> 10 TTAC… SeuratPro…        228           39 0               A             g1    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-
-# Rows with minimum and maximum values of a metadata variable
-pbmc_small |> slice_max(nFeature_RNA, n = 5)
-#> # A Seurat-tibble abstraction: 5 × 15
-#> # Features=230 | Cells=5 | Active assay=RNA | Assays=RNA
-#>   .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>   <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#> 1 GACAT… SeuratPro…        872           96 1               B             g1    
-#> 2 ACGTG… SeuratPro…        709           94 1               B             g2    
-#> 3 TTGAG… SeuratPro…        787           88 0               A             g1    
-#> 4 TTTAG… SeuratPro…        462           86 1               B             g1    
-#> 5 ATTGT… SeuratPro…        745           84 1               B             g2    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>%  select(.cell, orig.ident )
-#> # A Seurat-tibble abstraction: 80 × 9
-#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
-#>    .cell          orig.ident     PC_1   PC_2   PC_3  PC_4   PC_5  tSNE_1  tSNE_2
-#>    <chr>          <fct>         <dbl>  <dbl>  <dbl> <dbl>  <dbl>   <dbl>   <dbl>
-#>  1 ATGCCAGAACGACT SeuratProj… -0.774  -0.900 -0.249 0.559  0.465   0.868  -8.10 
-#>  2 CATGGCCTGTGCAT SeuratProj… -0.0260 -0.347  0.665 0.418  0.585  -7.39   -8.77 
-#>  3 GAACCTGATGAACC SeuratProj… -0.457   0.180  1.32  2.01  -0.482 -28.2     0.241
-#>  4 TGACTGGATTCTCA SeuratProj… -0.812  -1.38  -1.00  0.139 -1.60   16.3   -11.2  
-#>  5 AGTCAGACTGCACA SeuratProj… -0.774  -0.900 -0.249 0.559  0.465   1.91  -11.2  
-#>  6 TCTGATACACGTGT SeuratProj… -0.774  -0.900 -0.249 0.559  0.465   3.15   -9.94 
-#>  7 TGGTATCTAAACAG SeuratProj… -0.460  -1.19  -0.312 0.716 -1.65   17.9    -9.90 
-#>  8 GCAGCTCTGTTTCT SeuratProj… -0.900  -0.388  0.693 0.404  0.536  -6.49   -8.39 
-#>  9 GATATAACACGCAT SeuratProj… -0.774  -0.900 -0.249 0.559  0.465   1.33   -9.68 
-#> 10 AATGTTGACAGTCA SeuratProj… -0.488  -1.16  -0.306 0.702 -1.47   17.0    -9.43 
-#> # ℹ 70 more rows
-
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>%  sample_n(50)
-#> # A Seurat-tibble abstraction: 50 × 15
-#> # Features=230 | Cells=50 | Active assay=RNA | Assays=RNA
-#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#>  1 GATA… SeuratPro…        328           72 1               B             g1    
-#>  2 GGCA… SeuratPro…        126           53 0               A             g1    
-#>  3 ATGC… SeuratPro…         70           47 0               A             g2    
-#>  4 AGAT… SeuratPro…        187           61 0               A             g2    
-#>  5 TACA… SeuratPro…        108           44 0               A             g2    
-#>  6 CATG… SeuratPro…         51           26 0               A             g2    
-#>  7 GCAC… SeuratPro…        292           71 1               B             g2    
-#>  8 CGTA… SeuratPro…        371           75 1               B             g1    
-#>  9 TTAC… SeuratPro…        298           65 1               B             g1    
-#> 10 ATAA… SeuratPro…         99           42 1               B             g2    
-#> # ℹ 40 more rows
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-pbmc_small %>%  sample_frac(0.1)
-#> # A Seurat-tibble abstraction: 8 × 15
-#> # Features=230 | Cells=8 | Active assay=RNA | Assays=RNA
-#>   .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#>   <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
-#> 1 GATAG… SeuratPro…        328           72 1               B             g1    
-#> 2 GGCAT… SeuratPro…        126           53 0               A             g1    
-#> 3 ATGCC… SeuratPro…         70           47 0               A             g2    
-#> 4 AGATA… SeuratPro…        187           61 0               A             g2    
-#> 5 TACAA… SeuratPro…        108           44 0               A             g2    
-#> 6 CATGA… SeuratPro…         51           26 0               A             g2    
-#> 7 GCACT… SeuratPro…        292           71 1               B             g2    
-#> 8 CGTAG… SeuratPro…        371           75 1               B             g1    
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-
-
-`%>%` <- magrittr::`%>%`
-pbmc_small %>%
-
-    count(groups)
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> # A tibble: 2 × 2
-#>   groups     n
-#>   <chr>  <int>
-#> 1 g1        44
-#> 2 g2        36
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>%  pull(groups)
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#>  [1] "g2" "g1" "g2" "g2" "g2" "g1" "g1" "g1" "g1" "g1" "g2" "g1" "g2" "g2" "g2"
-#> [16] "g1" "g2" "g1" "g1" "g2" "g1" "g1" "g2" "g2" "g1" "g2" "g2" "g2" "g2" "g1"
-#> [31] "g1" "g1" "g1" "g2" "g1" "g1" "g2" "g1" "g1" "g2" "g1" "g2" "g2" "g2" "g1"
-#> [46] "g2" "g1" "g2" "g1" "g2" "g1" "g2" "g2" "g2" "g1" "g1" "g1" "g1" "g2" "g1"
-#> [61] "g1" "g1" "g1" "g1" "g1" "g2" "g2" "g1" "g1" "g1" "g2" "g1" "g2" "g2" "g1"
-#> [76] "g1" "g2" "g1" "g2" "g1"
-
-
-
-
- -
- - -
- -
-

Site built with pkgdown 2.0.7.

-
- -
- - - - - - - - diff --git a/reference/extract.html b/reference/extract.html new file mode 100644 index 0000000..7c371dd --- /dev/null +++ b/reference/extract.html @@ -0,0 +1,193 @@ + +Extract a character column into multiple columns using regular +expression groups — extract • tidyseurat + + +
+
+ + + +
+
+ + +
+

[Superseded]

+

extract() has been superseded in favour of separate_wider_regex() +because it has a more polished API and better handling of problems. +Superseded functions will not go away, but will only receive critical bug +fixes.

+

Given a regular expression with capturing groups, extract() turns +each group into a new column. If the groups don't match, or the input +is NA, the output will be NA.

+
+ +
+
# S3 method for Seurat
+extract(
+  data,
+  col,
+  into,
+  regex = "([[:alnum:]]+)",
+  remove = TRUE,
+  convert = FALSE,
+  ...
+)
+
+ +
+

Arguments

+
data
+

A data frame.

+ + +
col
+

<tidy-select> Column to expand.

+ + +
into
+

Names of new variables to create as character vector. +Use NA to omit the variable in the output.

+ + +
regex
+

A string representing a regular expression used to extract the +desired values. There should be one group (defined by ()) for each +element of into.

+ + +
remove
+

If TRUE, remove input column from output data frame.

+ + +
convert
+

If TRUE, will run type.convert() with +as.is = TRUE on new columns. This is useful if the component +columns are integer, numeric or logical.

+

NB: this will cause string "NA"s to be converted to NAs.

+ + +
...
+

Additional arguments passed on to methods.

+ +
+
+

Value

+ + +

`tidyseurat`

+
+
+

See also

+

separate() to split up by a separator.

+
+ +
+

Examples

+
data(pbmc_small)
+pbmc_small |>
+  extract(groups, 
+    into="g", 
+    regex="g([0-9])", 
+    convert=TRUE)
+#> # A Seurat-tibble abstraction: 80 × 15
+#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
+#>    .cell  orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents     g
+#>    <chr>  <fct>           <dbl>        <int> <fct>           <fct>         <int>
+#>  1 ATGCC… SeuratPro…         70           47 0               A                 2
+#>  2 CATGG… SeuratPro…         85           52 0               A                 1
+#>  3 GAACC… SeuratPro…         87           50 1               B                 2
+#>  4 TGACT… SeuratPro…        127           56 0               A                 2
+#>  5 AGTCA… SeuratPro…        173           53 0               A                 2
+#>  6 TCTGA… SeuratPro…         70           48 0               A                 1
+#>  7 TGGTA… SeuratPro…         64           36 0               A                 1
+#>  8 GCAGC… SeuratPro…         72           45 0               A                 1
+#>  9 GATAT… SeuratPro…         52           36 0               A                 1
+#> 10 AATGT… SeuratPro…        100           41 0               A                 1
+#> # ℹ 70 more rows
+#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
+#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/reference/filter.html b/reference/filter.html new file mode 100644 index 0000000..c17a068 --- /dev/null +++ b/reference/filter.html @@ -0,0 +1,222 @@ + +Keep rows that match a condition — filter • tidyseurat + + +
+
+ + + +
+
+ + +
+

The filter() function is used to subset a data frame, +retaining all rows that satisfy your conditions. +To be retained, the row must produce a value of TRUE for all conditions. +Note that when a condition evaluates to NA +the row will be dropped, unlike base subsetting with [.

+
+ +
+
# S3 method for Seurat
+filter(.data, ..., .preserve = FALSE)
+
+ +
+

Arguments

+
.data
+

A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See Methods, below, for +more details.

+ + +
...
+

<data-masking> Expressions that +return a logical value, and are defined in terms of the variables in +.data. If multiple expressions are included, they are combined with the +& operator. Only rows for which all conditions evaluate to TRUE are +kept.

+ + +
.preserve
+

Relevant when the .data input is grouped. +If .preserve = FALSE (the default), the grouping structure +is recalculated based on the resulting data, otherwise the grouping is kept as is.

+ +
+
+

Value

+ + +

An object of the same type as .data. The output has the following properties:

  • Rows are a subset of the input, but appear in the same order.

  • +
  • Columns are not modified.

  • +
  • The number of groups may be reduced (if .preserve is not TRUE).

  • +
  • Data frame attributes are preserved.

  • +
+
+

Details

+

The filter() function is used to subset the rows of +.data, applying the expressions in ... to the column values to determine which +rows should be retained. It can be applied to both grouped and ungrouped data (see group_by() and +ungroup()). However, dplyr is not yet smart enough to optimise the filtering +operation on grouped datasets that do not need grouped calculations. For this +reason, filtering is often considerably faster on ungrouped data.

+
+
+

Useful filter functions

+ + + + +

There are many functions and operators that are useful when constructing the +expressions used to filter the data:

+
+

Grouped tibbles

+ + + + +

Because filtering expressions are computed within groups, they may +yield different results on grouped tibbles. This will be the case +as soon as an aggregating, lagging, or ranking function is +involved. Compare this ungrouped filtering:

+

starwars %>% filter(mass > mean(mass, na.rm = TRUE))

+

With the grouped equivalent:

+

starwars %>% group_by(gender) %>% filter(mass > mean(mass, na.rm = TRUE))

+

In the ungrouped version, filter() compares the value of mass in each row to +the global average (taken over the whole data set), keeping only the rows with +mass greater than this global average. In contrast, the grouped version calculates +the average mass separately for each gender group, and keeps rows with mass greater +than the relevant within-gender average.

+
+
+

Methods

+ + + +

This function is a generic, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour.

+

The following methods are currently available in loaded packages: +dplyr (data.frame, ts), plotly (plotly), tidyseurat (Seurat) +.

+
+
+

See also

+

Other single table verbs: +arrange(), +mutate(), +reframe(), +rename(), +select(), +slice(), +summarise()

+
+ +
+

Examples

+
data("pbmc_small")
+pbmc_small |>  filter(groups == "g1")
+#> # A Seurat-tibble abstraction: 44 × 15
+#> # Features=230 | Cells=44 | Active assay=RNA | Assays=RNA
+#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
+#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
+#>  1 CATG… SeuratPro…         85           52 0               A             g1    
+#>  2 TCTG… SeuratPro…         70           48 0               A             g1    
+#>  3 TGGT… SeuratPro…         64           36 0               A             g1    
+#>  4 GCAG… SeuratPro…         72           45 0               A             g1    
+#>  5 GATA… SeuratPro…         52           36 0               A             g1    
+#>  6 AATG… SeuratPro…        100           41 0               A             g1    
+#>  7 AGAG… SeuratPro…        191           61 0               A             g1    
+#>  8 CTAA… SeuratPro…        168           44 0               A             g1    
+#>  9 TTGG… SeuratPro…        135           45 0               A             g1    
+#> 10 CATC… SeuratPro…         79           43 0               A             g1    
+#> # ℹ 34 more rows
+#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
+#> #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
+
+# Learn more in ?dplyr_eval
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/reference/print.html b/reference/formatting.html similarity index 53% rename from reference/print.html rename to reference/formatting.html index aefcbb1..8eb3b1d 100644 --- a/reference/print.html +++ b/reference/formatting.html @@ -1,14 +1,24 @@ -Printing tibbles — print • tidyseuratPrinting tibbles — formatting • tidyseurat @@ -64,20 +74,27 @@
-

`r lifecycle::badge("maturing")`

-

One of the main features of the `tbl_df` class is the printing:

-

* Tibbles only print as many rows and columns as fit on one screen, - supplemented by a summary of the remaining rows and columns. -* Tibble reveals the type of each column, which keeps the user informed about - whether a variable is, e.g., `<chr>` or `<fct>` (character versus factor).

-

Printing can be tweaked for a one-off call by calling `print()` explicitly -and setting arguments like `n` and `width`. More persistent control is -available by setting the options described below.

-

Only the first 5 reduced dimensions are displayed, while all of them are queriable (e.g. ggplot). All dimensions are returned/displayed if as_tibble is used.

+

One of the main features of the tbl_df class is the printing:

  • Tibbles only print as many rows and columns as fit on one screen, +supplemented by a summary of the remaining rows and columns.

  • +
  • Tibble reveals the type of each column, which keeps the user informed about +whether a variable is, e.g., <chr> or <fct> (character versus factor). +See vignette("types") for an overview of common +type abbreviations.

  • +

Printing can be tweaked for a one-off call by calling print() explicitly +and setting arguments like n and width. More persistent control is +available by setting the options described in pillar::pillar_options. +See also vignette("digits") for a comparison to base options, +and vignette("numbers") that showcases num() and char() +for creating columns with custom formatting options.

+

As of tibble 3.1.0, printing is handled entirely by the pillar package. +If you implement a package that extends tibble, +the printed output can be customized in various ways. +See vignette("extending", package = "pillar") for details, +and pillar::pillar_options for options that control the display in the console.

@@ -92,21 +109,19 @@

Arguments

...
-

Other arguments passed on to individual methods.

+

Passed on to tbl_format_setup().

n
-

Number of rows to show. If `NULL`, the default, will print all rows -if less than option `tibble.print_max`. Otherwise, will print -`tibble.print_min` rows.

+

Number of rows to show. If NULL, the default, will print all rows +if less than the print_max option. +Otherwise, will print as many rows as specified by the +print_min option.

width
-

Width of text output to generate. This defaults to `NULL`, which -means use `getOption("tibble.width")` or (if also `NULL`) -`getOption("width")`; the latter displays only the columns that fit on one -screen. You can also set `options(tibble.width = Inf)` to override this -default and always print all columns.

+

Width of text output to generate. This defaults to NULL, which +means use the width option.

n_extra
@@ -119,31 +134,14 @@

Arguments

Value

-

Nothing

-
-
-

Package options

- - - -

The following options are used by the tibble and pillar packages -to format and print `tbl_df` objects. -Used by the formatting workhorse `trunc_mat()` and, therefore, -indirectly, by `print.tbl()`.

-

* `tibble.print_max`: Row number threshold: Maximum number of rows printed. - Set to `Inf` to always print all rows. Default: 20. -* `tibble.print_min`: Number of rows printed if row number threshold is - exceeded. Default: 10. -* `tibble.width`: Output width. Default: `NULL` (use `width` option). -* `tibble.max_extra_cols`: Number of extra columns printed in reduced form. - Default: 100.

+

Prints a message to the console describing + the contents of the `tidyseurat`.

Examples

-
library(dplyr)
-data("pbmc_small")
-pbmc_small  %>% print()
+    
data(pbmc_small)
+print(pbmc_small)
 #> # A Seurat-tibble abstraction: 80 × 15
 #> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
 #>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
@@ -161,6 +159,7 @@ 

Examples

#> # ℹ 70 more rows #> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>, #> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl> +
diff --git a/reference/full_join.html b/reference/full_join.html new file mode 100644 index 0000000..9e1afb2 --- /dev/null +++ b/reference/full_join.html @@ -0,0 +1,297 @@ + +Mutating joins — full_join • tidyseurat + + +
+
+ + + +
+
+ + +
+

Mutating joins add columns from y to x, matching observations based on +the keys. There are four mutating joins: the inner join, and the three outer +joins.

+

Inner join

+ + +

An inner_join() only keeps observations from x that have a matching key +in y.

+

The most important property of an inner join is that unmatched rows in either +input are not included in the result. This means that generally inner joins +are not appropriate in most analyses, because it is too easy to lose +observations.

+
+ +
+

Outer joins

+ + +

The three outer joins keep observations that appear in at least one of the +data frames:

  • A left_join() keeps all observations in x.

  • +
  • A right_join() keeps all observations in y.

  • +
  • A full_join() keeps all observations in x and y.

  • +
+ +
+ +
+
# S3 method for Seurat
+full_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)
+
+ +
+

Arguments

+
x, y
+

A pair of data frames, data frame extensions (e.g. a tibble), or +lazy data frames (e.g. from dbplyr or dtplyr). See Methods, below, for +more details.

+ + +
by
+

A join specification created with join_by(), or a character +vector of variables to join by.

+

If NULL, the default, *_join() will perform a natural join, using all +variables in common across x and y. A message lists the variables so +that you can check they're correct; suppress the message by supplying by +explicitly.

+

To join on different variables between x and y, use a join_by() +specification. For example, join_by(a == b) will match x$a to y$b.

+

To join by multiple variables, use a join_by() specification with +multiple expressions. For example, join_by(a == b, c == d) will match +x$a to y$b and x$c to y$d. If the column names are the same between +x and y, you can shorten this by listing only the variable names, like +join_by(a, c).

+

join_by() can also be used to perform inequality, rolling, and overlap +joins. See the documentation at ?join_by for details on +these types of joins.

+

For simple equality joins, you can alternatively specify a character vector +of variable names to join by. For example, by = c("a", "b") joins x$a +to y$a and x$b to y$b. If variable names differ between x and y, +use a named character vector like by = c("x_a" = "y_a", "x_b" = "y_b").

+

To perform a cross-join, generating all combinations of x and y, see +cross_join().

+ + +
copy
+

If x and y are not from the same data source, +and copy is TRUE, then y will be copied into the +same src as x. This allows you to join tables across srcs, but +it is a potentially expensive operation so you must opt into it.

+ + +
suffix
+

If there are non-joined duplicate variables in x and +y, these suffixes will be added to the output to disambiguate them. +Should be a character vector of length 2.

+ + +
...
+

Other parameters passed onto methods.

+ +
+
+

Value

+ + +

An object of the same type as x (including the same groups). The order of +the rows and columns of x is preserved as much as possible. The output has +the following properties:

  • The rows are affect by the join type.

    • inner_join() returns matched x rows.

    • +
    • left_join() returns all x rows.

    • +
    • right_join() returns matched of x rows, followed by unmatched y rows.

    • +
    • full_join() returns all x rows, followed by unmatched y rows.

    • +
  • +
  • Output columns include all columns from x and all non-key columns from +y. If keep = TRUE, the key columns from y are included as well.

  • +
  • If non-key columns in x and y have the same name, suffixes are added +to disambiguate. If keep = TRUE and key columns in x and y have +the same name, suffixes are added to disambiguate these as well.

  • +
  • If keep = FALSE, output columns included in by are coerced to their +common type between x and y.

  • +
+
+

Many-to-many relationships

+ + + + +

By default, dplyr guards against many-to-many relationships in equality joins +by throwing a warning. These occur when both of the following are true:

  • A row in x matches multiple rows in y.

  • +
  • A row in y matches multiple rows in x.

  • +

This is typically surprising, as most joins involve a relationship of +one-to-one, one-to-many, or many-to-one, and is often the result of an +improperly specified join. Many-to-many relationships are particularly +problematic because they can result in a Cartesian explosion of the number of +rows returned from the join.

+

If a many-to-many relationship is expected, silence this warning by +explicitly setting relationship = "many-to-many".

+

In production code, it is best to preemptively set relationship to whatever +relationship you expect to exist between the keys of x and y, as this +forces an error to occur immediately if the data doesn't align with your +expectations.

+

Inequality joins typically result in many-to-many relationships by nature, so +they don't warn on them by default, but you should still take extra care when +specifying an inequality join, because they also have the capability to +return a large number of rows.

+

Rolling joins don't warn on many-to-many relationships either, but many +rolling joins follow a many-to-one relationship, so it is often useful to +set relationship = "many-to-one" to enforce this.

+

Note that in SQL, most database providers won't let you specify a +many-to-many relationship between two tables, instead requiring that you +create a third junction table that results in two one-to-many relationships +instead.

+
+
+

Methods

+ + + +

These functions are generics, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour.

+

Methods available in currently loaded packages:

+
+

See also

+

Other joins: +cross_join(), +filter-joins, +nest_join()

+
+ +
+

Examples

+
data(pbmc_small)
+tt <- pbmc_small
+tt |> full_join(tibble::tibble(groups="g1", other=1:4))
+#> Joining with `by = join_by(groups)`
+#> tidyseurat says: This operation lead to duplicated cell names. A data frame is returned for independent data analysis.
+#> # A tibble: 212 × 30
+#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
+#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
+#>  1 ATGC… SeuratPro…         70           47 0               A             g2    
+#>  2 CATG… SeuratPro…         85           52 0               A             g1    
+#>  3 CATG… SeuratPro…         85           52 0               A             g1    
+#>  4 CATG… SeuratPro…         85           52 0               A             g1    
+#>  5 CATG… SeuratPro…         85           52 0               A             g1    
+#>  6 GAAC… SeuratPro…         87           50 1               B             g2    
+#>  7 TGAC… SeuratPro…        127           56 0               A             g2    
+#>  8 AGTC… SeuratPro…        173           53 0               A             g2    
+#>  9 TCTG… SeuratPro…         70           48 0               A             g1    
+#> 10 TCTG… SeuratPro…         70           48 0               A             g1    
+#> # ℹ 202 more rows
+#> # ℹ 23 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
+#> #   PC_4 <dbl>, PC_5 <dbl>, PC_6 <dbl>, PC_7 <dbl>, PC_8 <dbl>, PC_9 <dbl>,
+#> #   PC_10 <dbl>, PC_11 <dbl>, PC_12 <dbl>, PC_13 <dbl>, PC_14 <dbl>,
+#> #   PC_15 <dbl>, PC_16 <dbl>, PC_17 <dbl>, PC_18 <dbl>, PC_19 <dbl>,
+#> #   tSNE_1 <dbl>, tSNE_2 <dbl>, other <int>
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/reference/ggplot-1.png b/reference/ggplot-1.png new file mode 100644 index 0000000..80dc628 Binary files /dev/null and b/reference/ggplot-1.png differ diff --git a/reference/ggplot.html b/reference/ggplot.html new file mode 100644 index 0000000..8b73c7b --- /dev/null +++ b/reference/ggplot.html @@ -0,0 +1,164 @@ + +Create a new ggplot from a tidyseurat — ggplot • tidyseurat + + +
+
+ + + +
+
+ + +
+

ggplot() initializes a ggplot object. It can be used to +declare the input data frame for a graphic and to specify the +set of plot aesthetics intended to be common throughout all +subsequent layers unless specifically overridden.

+
+ +
+
# S3 method for Seurat
+ggplot(data = NULL, mapping = aes(), ..., environment = parent.frame())
+
+ +
+

Arguments

+
data
+

Default dataset to use for plot. If not already a data.frame, +will be converted to one by fortify(). If not specified, +must be supplied in each layer added to the plot.

+ + +
mapping
+

Default list of aesthetic mappings to use for plot. +If not specified, must be supplied in each layer added to the plot.

+ + +
...
+

Other arguments passed on to methods. Not currently used.

+ + +
environment
+

[Deprecated] Used prior to tidy +evaluation.

+ +
+
+

Value

+ + +

`ggplot`

+
+
+

Details

+

ggplot() is used to construct the initial plot object, +and is almost always followed by a plus sign (+) to add +components to the plot.

+

There are three common patterns used to invoke ggplot():

  • ggplot(data = df, mapping = aes(x, y, other aesthetics))

  • +
  • ggplot(data = df)

  • +
  • ggplot()

  • +

The first pattern is recommended if all layers use the same +data and the same set of aesthetics, although this method +can also be used when adding a layer using data from another +data frame.

+

The second pattern specifies the default data frame to use +for the plot, but no aesthetics are defined up front. This +is useful when one data frame is used predominantly for the +plot, but the aesthetics vary from one layer to another.

+

The third pattern initializes a skeleton ggplot object, which +is fleshed out as layers are added. This is useful when +multiple data frames are used to produce different layers, as +is often the case in complex graphics.

+

The data = and mapping = specifications in the arguments are optional +(and are often omitted in practice), so long as the data and the mapping +values are passed into the function in the right order. In the examples +below, however, they are left in place for clarity.

+
+ +
+

Examples

+
library(ggplot2)
+data(pbmc_small)
+pbmc_small |> 
+  ggplot(aes(groups, nCount_RNA)) +
+  geom_boxplot()
+
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/reference/ggplot2-methods.html b/reference/ggplot2-methods.html deleted file mode 100644 index 3e26c13..0000000 --- a/reference/ggplot2-methods.html +++ /dev/null @@ -1,149 +0,0 @@ - -Create a new ggplot from a tidyseurat object — ggplot • tidyseurat - - -
-
- - - -
-
- - -
-

`ggplot()` initializes a ggplot object. It can be used to -declare the input data frame for a graphic and to specify the -set of plot aesthetics intended to be common throughout all -subsequent layers unless specifically overridden.

-
- - -
-

Arguments

-
.data
-

Default dataset to use for plot. If not already a data.frame, -will be converted to one by [fortify()]. If not specified, -must be supplied in each layer added to the plot.

- - -
mapping
-

Default list of aesthetic mappings to use for plot. -If not specified, must be supplied in each layer added to the plot.

- - -
...
-

Other arguments passed on to methods. Not currently used.

- - -
environment
-

DEPRECATED. Used prior to tidy evaluation.

- -
-
-

Value

- - -

A ggplot

-
-
-

Details

-

`ggplot()` is used to construct the initial plot object, -and is almost always followed by `+` to add component to the -plot. There are three common ways to invoke `ggplot()`:

- -

The first method is recommended if all layers use the same -data and the same set of aesthetics, although this method -can also be used to add a layer using data from another -data frame. See the first example below. The second -method specifies the default data frame to use for the plot, -but no aesthetics are defined up front. This is useful when -one data frame is used predominantly as layers are added, -but the aesthetics may vary from one layer to another. The -third method initializes a skeleton `ggplot` object which -is fleshed out as layers are added. This method is useful when -multiple data frames are used to produce different layers, as -is often the case in complex graphics.

-
- -
-

Examples

-
# Generate some sample data, then compute mean and standard deviation
-# in each group
-
-
-
-
- -
- - -
- -
-

Site built with pkgdown 2.0.7.

-
- -
- - - - - - - - diff --git a/reference/glimpse.html b/reference/glimpse.html new file mode 100644 index 0000000..ccab4ee --- /dev/null +++ b/reference/glimpse.html @@ -0,0 +1,183 @@ + +Get a glimpse of your data — glimpse • tidyseurat + + +
+
+ + + +
+
+ + +
+

glimpse() is like a transposed version of print(): +columns run down the page, and data runs across. +This makes it possible to see every column in a data frame. +It's a little like str() applied to a data frame +but it tries to show you as much data as possible. +(And it always shows the underlying data, even when applied +to a remote data source.)

+

See format_glimpse() for details on the formatting.

+
+ +
+
# S3 method for tidyseurat
+glimpse(x, width = NULL, ...)
+
+ +
+

Arguments

+
x
+

An object to glimpse at.

+ + +
width
+

Width of output: defaults to the setting of the +width option (if finite) +or the width of the console.

+ + +
...
+

Unused, for extensibility.

+ +
+
+

Value

+ + +

x original x is (invisibly) returned, allowing glimpse() to be +used within a data pipe line.

+
+
+

S3 methods

+ + + +

glimpse is an S3 generic with a customised method for tbls and +data.frames, and a default method that calls str().

+
+ +
+

Examples

+
data(pbmc_small)
+pbmc_small |> glimpse()
+#> Formal class 'Seurat' [package "SeuratObject"] with 13 slots
+#>   ..@ assays      :List of 1
+#>   .. ..$ RNA:Formal class 'Assay' [package "SeuratObject"] with 8 slots
+#>   ..@ meta.data   :'data.frame':	80 obs. of  7 variables:
+#>   .. ..$ orig.ident     : Factor w/ 1 level "SeuratProject": 1 1 1 1 1 1 1 1 1 1 ...
+#>   .. ..$ nCount_RNA     : num [1:80] 70 85 87 127 173 70 64 72 52 100 ...
+#>   .. ..$ nFeature_RNA   : int [1:80] 47 52 50 56 53 48 36 45 36 41 ...
+#>   .. ..$ RNA_snn_res.0.8: Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
+#>   .. ..$ letter.idents  : Factor w/ 2 levels "A","B": 1 1 2 1 1 1 1 1 1 1 ...
+#>   .. ..$ groups         : chr [1:80] "g2" "g1" "g2" "g2" ...
+#>   .. ..$ RNA_snn_res.1  : Factor w/ 3 levels "0","1","2": 1 1 1 1 1 1 1 1 1 1 ...
+#>   ..@ active.assay: chr "RNA"
+#>   ..@ active.ident: Factor w/ 3 levels "0","1","2": 1 1 1 1 1 1 1 1 1 1 ...
+#>   .. ..- attr(*, "names")= chr [1:80] "ATGCCAGAACGACT" "CATGGCCTGTGCAT" "GAACCTGATGAACC" "TGACTGGATTCTCA" ...
+#>   ..@ graphs      :List of 1
+#>   .. ..$ RNA_snn:Formal class 'Graph' [package "SeuratObject"] with 7 slots
+#>   ..@ neighbors   : list()
+#>   ..@ reductions  :List of 2
+#>   .. ..$ pca :Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
+#>   .. ..$ tsne:Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
+#>   ..@ images      : list()
+#>   ..@ project.name: chr "SeuratProject"
+#>   ..@ misc        : list()
+#>   ..@ version     :Classes 'package_version', 'numeric_version'  hidden list of 1
+#>   .. ..$ : int [1:3] 4 0 0
+#>   ..@ commands    :List of 10
+#>   .. ..$ NormalizeData.RNA       :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#>   .. ..$ ScaleData.RNA           :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#>   .. ..$ RunPCA.RNA              :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#>   .. ..$ BuildSNN.RNA.pca        :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#>   .. ..$ FindClusters            :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#>   .. ..$ RunTSNE.pca             :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#>   .. ..$ JackStraw.RNA.pca       :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#>   .. ..$ ScoreJackStraw.pca      :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#>   .. ..$ ProjectDim.RNA.pca      :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#>   .. ..$ FindVariableFeatures.RNA:Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#>   ..@ tools       : list()
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/reference/group_by.html b/reference/group_by.html new file mode 100644 index 0000000..9a522bd --- /dev/null +++ b/reference/group_by.html @@ -0,0 +1,220 @@ + +Group by one or more variables — group_by • tidyseurat + + +
+
+ + + +
+
+ + +
+

Most data operations are done on groups defined by variables. +group_by() takes an existing tbl and converts it into a grouped tbl +where operations are performed "by group". ungroup() removes grouping.

+
+ +
+
# S3 method for Seurat
+group_by(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data))
+
+ +
+

Arguments

+
.data
+

A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See Methods, below, for +more details.

+ + +
...
+

In group_by(), variables or computations to group by. +Computations are always done on the ungrouped data frame. +To perform computations on the grouped data, you need to use +a separate mutate() step before the group_by(). +Computations are not allowed in nest_by(). +In ungroup(), variables to remove from the grouping.

+ + +
.add
+

When FALSE, the default, group_by() will +override existing groups. To add to the existing groups, use +.add = TRUE.

+

This argument was previously called add, but that prevented +creating a new grouping variable called add, and conflicts with +our naming conventions.

+ + +
.drop
+

Drop groups formed by factor levels that don't appear in the +data? The default is TRUE except when .data has been previously +grouped with .drop = FALSE. See group_by_drop_default() for details.

+ +
+
+

Value

+ + +

A grouped data frame with class grouped_df, +unless the combination of ... and add yields a empty set of +grouping columns, in which case a tibble will be returned.

+
+
+

Methods

+ + + +

These function are generics, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour.

+

Methods available in currently loaded packages:

  • group_by(): dplyr (data.frame), plotly (plotly), tidyseurat (Seurat) +.

  • +
  • ungroup(): dplyr (data.frame, grouped_df, rowwise_df), plotly (plotly) +.

  • +
+
+

Ordering

+ + + +

Currently, group_by() internally orders the groups in ascending order. This +results in ordered output from functions that aggregate groups, such as +summarise().

+

When used as grouping columns, character vectors are ordered in the C locale +for performance and reproducibility across R sessions. If the resulting +ordering of your grouped operation matters and is dependent on the locale, +you should follow up the grouped operation with an explicit call to +arrange() and set the .locale argument. For example:

+

data %>%
+  group_by(chr) %>%
+  summarise(avg = mean(x)) %>%
+  arrange(chr, .locale = "en")

+

This is often useful as a preliminary step before generating content intended +for humans, such as an HTML table.

+

Legacy behavior

+ + +

Prior to dplyr 1.1.0, character vector grouping columns were ordered in the +system locale. If you need to temporarily revert to this behavior, you can +set the global option dplyr.legacy_locale to TRUE, but this should be +used sparingly and you should expect this option to be removed in a future +version of dplyr. It is better to update existing code to explicitly call +arrange(.locale = ) instead. Note that setting dplyr.legacy_locale will +also force calls to arrange() to use the system locale.

+
+ + +
+
+

See also

+

Other grouping functions: +group_map(), +group_nest(), +group_split(), +group_trim()

+
+ +
+

Examples

+
data("pbmc_small")
+pbmc_small |>  group_by(groups)
+#> tidyseurat says: A data frame is returned for independent data analysis.
+#> # A tibble: 80 × 29
+#> # Groups:   groups [2]
+#>    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
+#>    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
+#>  1 ATGC… SeuratPro…         70           47 0               A             g2    
+#>  2 CATG… SeuratPro…         85           52 0               A             g1    
+#>  3 GAAC… SeuratPro…         87           50 1               B             g2    
+#>  4 TGAC… SeuratPro…        127           56 0               A             g2    
+#>  5 AGTC… SeuratPro…        173           53 0               A             g2    
+#>  6 TCTG… SeuratPro…         70           48 0               A             g1    
+#>  7 TGGT… SeuratPro…         64           36 0               A             g1    
+#>  8 GCAG… SeuratPro…         72           45 0               A             g1    
+#>  9 GATA… SeuratPro…         52           36 0               A             g1    
+#> 10 AATG… SeuratPro…        100           41 0               A             g1    
+#> # ℹ 70 more rows
+#> # ℹ 22 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
+#> #   PC_4 <dbl>, PC_5 <dbl>, PC_6 <dbl>, PC_7 <dbl>, PC_8 <dbl>, PC_9 <dbl>,
+#> #   PC_10 <dbl>, PC_11 <dbl>, PC_12 <dbl>, PC_13 <dbl>, PC_14 <dbl>,
+#> #   PC_15 <dbl>, PC_16 <dbl>, PC_17 <dbl>, PC_18 <dbl>, PC_19 <dbl>,
+#> #   tSNE_1 <dbl>, tSNE_2 <dbl>
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/reference/index.html b/reference/index.html index 8d2c032..dbafbb1 100644 --- a/reference/index.html +++ b/reference/index.html @@ -65,65 +65,152 @@

All functions

Aggregate cells

-

bind_rows

+

arrange(<Seurat>)

-

Efficiently bind multiple data frames by row and column

+

Order rows using column values

+ +

as_tibble(<Seurat>)

+ +

Coerce lists, matrices, and more to data frames

+ +

bind_rows(<Seurat>) bind_cols(<Seurat>)

+ +

#' Efficiently bind multiple data frames by row and column

cell_type_df

-

Example data set 2

+

Cell types of 80 PBMC single cells

+ +

count(<Seurat>) add_count(<Seurat>)

+ +

Count the observations in each group

+ +

distinct(<Seurat>)

+ +

Keep distinct/unique rows

+ +

extract(<Seurat>)

+ +

Extract a character column into multiple columns using regular +expression groups

+ +

filter(<Seurat>)

+ +

Keep rows that match a condition

+ +

print(<Seurat>)

+ +

Printing tibbles

+ +

full_join(<Seurat>)

+ +

Mutating joins

+ +

ggplot(<Seurat>)

+ +

Create a new ggplot from a tidyseurat

+ +

glimpse(<tidyseurat>)

+ +

Get a glimpse of your data

+ +

group_by(<Seurat>)

+ +

Group by one or more variables

-

ggplot

+

inner_join(<Seurat>)

-

Create a new ggplot from a tidyseurat object

+

Mutating joins

join_features(<Seurat>)

-

Extract and join information for features.

+

join_features

join_transcripts()

(DEPRECATED) Extract and join information for transcripts.

+ +

left_join(<Seurat>)

+ +

Mutating joins

+ +

mutate(<Seurat>)

+ +

Create, modify, and delete columns

+ +

nest(<Seurat>)

+ +

Nest rows into a list-column of data frames

pbmc_small_nested_interactions

-

Example data set 2

+

Intercellular ligand-receptor interactions for +38 ligands from a single cell RNA-seq cluster.

-

pivot_longer

+

pivot_longer(<Seurat>)

Pivot data from wide to long

-

plot_ly()

+

plot_ly()

Initiate a plotly visualization

-

print(<Seurat>)

+

pull(<Seurat>)

-

Printing tibbles

+

Extract a single column

+ +

rename(<Seurat>)

+ +

Rename columns

return_arguments_of()

returns variables from an expression

-

tbl_format_header

+

right_join(<Seurat>)

-

Format the header of a tibble

+

Mutating joins

-

as_tibble glimpse

+

rowwise(<Seurat>)

-

Coerce lists, matrices, and more to data frames

+

Group input by rows

+ +

sample_n(<Seurat>) sample_frac(<Seurat>)

+ +

Sample n rows from a table

+ +

select(<Seurat>)

+ +

Keep or drop columns using their names and types

+ +

separate(<Seurat>)

+ +

Separate a character column into multiple columns with a regular +expression or numeric locations

+ +

slice(<Seurat>) slice_sample(<Seurat>) slice_head(<Seurat>) slice_tail(<Seurat>) slice_min(<Seurat>) slice_max(<Seurat>)

+ +

Subset rows using their positions

+ +

summarise(<Seurat>) summarize(<Seurat>)

+ +

Summarise each group down to one row

+ +

tbl_format_header(<tidySeurat>)

+ +

Format the header of a tibble

tidy()

-

tidy for seurat

+

tidy for `Seurat`

-

unnest nest extract unite separate

+

unite(<Seurat>)

-

unnest

+

Unite multiple columns into one by pasting strings together

-

unnest_seurat()

+

unnest(<tidyseurat_nested>) unnest_seurat()

-

unnest_seurat

+

Unnest a list-column of data frames into rows and columns