diff --git a/articles/figures_article.html b/articles/figures_article.html index cda9310..8c99626 100644 --- a/articles/figures_article.html +++ b/articles/figures_article.html @@ -137,8 +137,8 @@
p2 =
PBMC_clean_scaled_UMAP_cluster_cell_type %>%
- sample_n(20000) %>%
- ggplot(aes(UMAP_1, UMAP_2, color=seurat_clusters)) +
+ sample_n(20000) %>%
+ ggplot(aes(UMAP_1, UMAP_2, color=seurat_clusters)) +
geom_point(size=0.05, alpha=0.2) +
custom_theme +
theme(aspect.ratio=1)
PBMC_clean_scaled_UMAP_cluster_cell_type %>%
- sample_n(20000) %>%
- plot_ly(
+ sample_n(20000) %>%
+ plot_ly(
x = ~`UMAP_1`,
y = ~`UMAP_2`,
z = ~`UMAP_3`,
@@ -166,10 +166,10 @@ 2023-08-25
p3 =
PBMC_clean_scaled_UMAP_cluster_cell_type %>%
- arrange(first.labels) %>%
- mutate(seurat_clusters = fct_inorder(seurat_clusters)) %>%
+ arrange(first.labels) %>%
+ mutate(seurat_clusters = fct_inorder(seurat_clusters)) %>%
join_features(features=c("CD3D", "HLA-DRB1")) %>%
- ggplot(aes(y=seurat_clusters , x=.abundance_SCT, fill=first.labels)) +
+ ggplot(aes(y=seurat_clusters , x=.abundance_SCT, fill=first.labels)) +
geom_density_ridges(bandwidth = 0.2) +
facet_wrap(~ .feature, nrow = 2) +
coord_flip() +
@@ -178,7 +178,7 @@ 2023-08-25
# Plot heatmap
p4 =
PBMC_clean_scaled_UMAP_cluster_cell_type %>%
- sample_n(2000) %>%
+ sample_n(2000) %>%
DoHeatmap(
features = markers$gene,
group.colors = friendly_cols
@@ -186,11 +186,11 @@ 2023-08-25
p5 =
PBMC_clean_scaled_UMAP_cluster_cell_type %>%
- sample_n(1000) %>%
+ sample_n(1000) %>%
join_features(features=markers$gene) %>%
- mutate(seurat_clusters = as.integer(seurat_clusters)) %>%
- filter(seurat_clusters<10) %>%
- group_by(seurat_clusters) %>%
+ mutate(seurat_clusters = as.integer(seurat_clusters)) %>%
+ filter(seurat_clusters<10) %>%
+ group_by(seurat_clusters) %>%
# Plot heatmap
heatmap(
@@ -210,13 +210,13 @@ 2023-08-25
p6 =
PBMC_clean_scaled_UMAP_cluster_cell_type %>%
- tidyseurat::unite("cluster_cell_type", c(first.labels, seurat_clusters), remove=FALSE) %>%
+ tidyseurat::unite("cluster_cell_type", c(first.labels, seurat_clusters), remove=FALSE) %>%
pivot_longer(
c(seurat_clusters, first.labels_single),
names_to = "classification", values_to = "value"
) %>%
- ggplot(aes(x = classification, stratum = value, alluvium = cell,
+ ggplot(aes(x = classification, stratum = value, alluvium = cell,
fill = first.labels, label = value)) +
scale_x_discrete(expand = c(1, 1)) +
geom_flow() +
diff --git a/articles/introduction.html b/articles/introduction.html
index b5cbecd..ee22819 100644
--- a/articles/introduction.html
+++ b/articles/introduction.html
@@ -270,14 +270,14 @@ Preliminary plots
pbmc_small %>%
- tidyseurat::ggplot(aes(nFeature_RNA, fill = groups)) +
+ ggplot(aes(nFeature_RNA, fill = groups)) +
geom_histogram() +
my_theme
Here we plot total features per cell.
pbmc_small %>%
- tidyseurat::ggplot(aes(groups, nCount_RNA, fill = groups)) +
+ ggplot(aes(groups, nCount_RNA, fill = groups)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.1) +
my_theme
@@ -286,7 +286,7 @@ Preliminary plots
pbmc_small %>%
join_features(features = c("HLA-DRA", "LYZ")) %>%
- ggplot(aes(groups, .abundance_RNA + 1, fill = groups)) +
+ ggplot(aes(groups, .abundance_RNA + 1, fill = groups)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(aes(size = nCount_RNA), alpha = 0.5, width = 0.2) +
scale_y_log10() +
@@ -326,8 +326,8 @@ Preprocess the datasetIf a tool is not included in the tidyseurat collection, we can use as_tibble
to permanently convert tidyseurat
into tibble.
pbmc_small_pca %>%
- as_tibble() %>%
- select(contains("PC"), everything()) %>%
+ as_tibble() %>%
+ select(contains("PC"), everything()) %>%
GGally::ggpairs(columns = 1:5, ggplot2::aes(colour = groups)) +
my_theme
@@ -365,7 +365,7 @@ Identify clusters
pbmc_small_cluster %>%
- tidyseurat::count(groups, seurat_clusters)
+ count(groups, seurat_clusters)
## # A tibble: 8 × 3
## groups seurat_clusters n
## <chr> <fct> <int>
@@ -406,7 +406,7 @@ Identify clustersmarkers <-
pbmc_small_cluster %>%
FindAllMarkers(only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25) %>%
- group_by(cluster) %>%
+ group_by(cluster) %>%
top_n(10, avg_log2FC)
# Plot heatmap
@@ -427,7 +427,7 @@ Reduce dimensions
pbmc_small_UMAP %>%
- plot_ly(
+ plot_ly(
x = ~`UMAP_1`,
y = ~`UMAP_2`,
z = ~`UMAP_3`,
@@ -456,21 +456,21 @@ Cell type prediction= "single"
) %>%
as.data.frame() %>%
- as_tibble(rownames = "cell") %>%
- select(cell, first.labels)
# Join UMAP and cell type info
pbmc_small_cell_type <-
pbmc_small_UMAP %>%
- left_join(cell_type_df, by = "cell")
+ left_join(cell_type_df, by = "cell")
# Reorder columns
pbmc_small_cell_type %>%
- tidyseurat::select(cell, first.labels, everything())
We can easily summarise the results. For example, we can see how cell type classification overlaps with cluster classification.
+ count(seurat_clusters, first.labels)We can easily reshape the data for building information-rich faceted plots.
pbmc_small_cell_type %>%
@@ -482,7 +482,7 @@ Cell type prediction) %>%
# UMAP plots for cell type and cluster
- ggplot(aes(UMAP_1, UMAP_2, color = label)) +
+ ggplot(aes(UMAP_1, UMAP_2, color = label)) +
geom_point() +
facet_wrap(~classifier) +
my_theme
pbmc_small_nested <-
pbmc_small_cell_type %>%
- filter(first.labels != "Erythrocytes") %>%
- mutate(cell_class = if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>%
- nest(data = -cell_class)
+ filter(first.labels != "Erythrocytes") %>%
+ mutate(cell_class = if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>%
+ nest(data = -cell_class)
pbmc_small_nested
Now we can independently for the lymphoid and myeloid subsets (i) find variable features, (ii) reduce dimensions, and (iii) cluster using both tidyverse and Seurat seamlessly.
pbmc_small_nested_reanalysed <-
pbmc_small_nested %>%
- mutate(data = map(
+ mutate(data = map(
data, ~ .x %>%
FindVariableFeatures(verbose = FALSE) %>%
RunPCA(npcs = 10, verbose = FALSE) %>%
@@ -534,14 +534,14 @@ Nested analysespbmc_small_nested_reanalysed %>%
# Convert to tibble otherwise Seurat drops reduced dimensions when unifying data sets.
- mutate(data = map(data, ~ .x %>% as_tibble())) %>%
- unnest(data) %>%
+ mutate(data = map(data, ~ .x %>% as_tibble())) %>%
+ unnest(data) %>%
# Define unique clusters
- unite("cluster", c(cell_class, seurat_clusters), remove = FALSE) %>%
+ unite("cluster", c(cell_class, seurat_clusters), remove = FALSE) %>%
# Plotting
- ggplot(aes(UMAP_1, UMAP_2, color = cluster)) +
+ ggplot(aes(UMAP_1, UMAP_2, color = cluster)) +
geom_point() +
facet_wrap(~cell_class) +
my_theme
pbmc_small %>%
- tidyseurat::ggplot(aes(nFeature_RNA, fill = groups)) +
+ tidyseurat::ggplot(aes(nFeature_RNA, fill = groups)) +
geom_histogram() +
my_theme
Here we plot total features per cell.
pbmc_small %>%
- tidyseurat::ggplot(aes(groups, nCount_RNA, fill = groups)) +
+ tidyseurat::ggplot(aes(groups, nCount_RNA, fill = groups)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.1) +
my_theme
pbmc_small %>%
join_features(features = c("HLA-DRA", "LYZ")) %>%
- ggplot(aes(groups, .abundance_RNA + 1, fill = groups)) +
+ ggplot(aes(groups, .abundance_RNA + 1, fill = groups)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(aes(size = nCount_RNA), alpha = 0.5, width = 0.2) +
scale_y_log10() +
@@ -319,8 +319,8 @@ Preprocess the datasetIf a tool is not included in the tidyseurat collection, we can use as_tibble
to permanently convert tidyseurat
into tibble.
pbmc_small_pca %>%
- as_tibble() %>%
- select(contains("PC"), everything()) %>%
+ as_tibble() %>%
+ select(contains("PC"), everything()) %>%
GGally::ggpairs(columns = 1:5, ggplot2::aes(colour = groups)) +
my_theme
@@ -358,7 +358,7 @@ Identify clusters
pbmc_small_cluster %>%
- tidyseurat::count(groups, seurat_clusters)
+ tidyseurat::count(groups, seurat_clusters)
## # A tibble: 8 × 3
## groups seurat_clusters n
## <chr> <fct> <int>
@@ -396,7 +396,7 @@ Identify clustersmarkers <-
pbmc_small_cluster %>%
FindAllMarkers(only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25) %>%
- group_by(cluster) %>%
+ group_by(cluster) %>%
top_n(10, avg_log2FC)
# Plot heatmap
@@ -417,7 +417,7 @@ Reduce dimensions
pbmc_small_UMAP %>%
- plot_ly(
+ plot_ly(
x = ~`UMAP_1`,
y = ~`UMAP_2`,
z = ~`UMAP_3`,
@@ -446,21 +446,21 @@ Cell type prediction= "single"
) %>%
as.data.frame() %>%
- as_tibble(rownames = "cell") %>%
- select(cell, first.labels)
# Join UMAP and cell type info
pbmc_small_cell_type <-
pbmc_small_UMAP %>%
- left_join(cell_type_df, by = "cell")
+ left_join(cell_type_df, by = "cell")
# Reorder columns
pbmc_small_cell_type %>%
- tidyseurat::select(cell, first.labels, everything())
We can easily summarise the results. For example, we can see how cell type classification overlaps with cluster classification.
+ count(seurat_clusters, first.labels)We can easily reshape the data for building information-rich faceted plots.
pbmc_small_cell_type %>%
@@ -472,7 +472,7 @@ Cell type prediction) %>%
# UMAP plots for cell type and cluster
- ggplot(aes(UMAP_1, UMAP_2, color = label)) +
+ ggplot(aes(UMAP_1, UMAP_2, color = label)) +
geom_point() +
facet_wrap(~classifier) +
my_theme
pbmc_small_nested <-
pbmc_small_cell_type %>%
- filter(first.labels != "Erythrocytes") %>%
- mutate(cell_class = if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>%
- nest(data = -cell_class)
+ filter(first.labels != "Erythrocytes") %>%
+ mutate(cell_class = if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>%
+ nest(data = -cell_class)
pbmc_small_nested
Now we can independently for the lymphoid and myeloid subsets (i) find variable features, (ii) reduce dimensions, and (iii) cluster using both tidyverse and Seurat seamlessly.
pbmc_small_nested_reanalysed <-
pbmc_small_nested %>%
- mutate(data = map(
+ mutate(data = map(
data, ~ .x %>%
FindVariableFeatures(verbose = FALSE) %>%
RunPCA(npcs = 10, verbose = FALSE) %>%
@@ -524,14 +524,14 @@ Nested analysespbmc_small_nested_reanalysed %>%
# Convert to tibble otherwise Seurat drops reduced dimensions when unifying data sets.
- mutate(data = map(data, ~ .x %>% as_tibble())) %>%
- unnest(data) %>%
+ mutate(data = map(data, ~ .x %>% as_tibble())) %>%
+ unnest(data) %>%
# Define unique clusters
- unite("cluster", c(cell_class, seurat_clusters), remove = FALSE) %>%
+ unite("cluster", c(cell_class, seurat_clusters), remove = FALSE) %>%
# Plotting
- ggplot(aes(UMAP_1, UMAP_2, color = cluster)) +
+ ggplot(aes(UMAP_1, UMAP_2, color = cluster)) +
geom_point() +
facet_wrap(~cell_class) +
my_theme
A tidySingleCellExperiment object
A tidyseurat object
data(pbmc_small)
-pbmc_small |>
- aggregate_cells(c(groups, letter.idents), assays = "RNA")
+pbmc_small_pseudo_bulk <- pbmc_small |>
+ aggregate_cells(c(groups, letter.idents), assays="RNA")
#> Joining with `by = join_by(letter.idents, groups)`
-#> # A tibble: 920 × 8
-#> .feature .sample RNA letter.idents groups .aggregated_cells orig.ident
-#> <chr> <chr> <dbl> <fct> <chr> <int> <fct>
-#> 1 MS4A1 g2___A 35.5 A g2 23 SeuratProject
-#> 2 CD79B g2___A 39.3 A g2 23 SeuratProject
-#> 3 CD79A g2___A 29.5 A g2 23 SeuratProject
-#> 4 HLA-DRA g2___A 73.9 A g2 23 SeuratProject
-#> 5 TCL1A g2___A 29.5 A g2 23 SeuratProject
-#> 6 HLA-DQB1 g2___A 37.6 A g2 23 SeuratProject
-#> 7 HVCN1 g2___A 22.4 A g2 23 SeuratProject
-#> 8 HLA-DMB g2___A 24.7 A g2 23 SeuratProject
-#> 9 LTB g2___A 91.9 A g2 23 SeuratProject
-#> 10 LINC00926 g2___A 9.91 A g2 23 SeuratProject
-#> # ℹ 910 more rows
-#> # ℹ 1 more variable: RNA_snn_res.0.8 <fct>
arrange()
orders the rows of a data frame by the values of selected
+columns.
Unlike other dplyr verbs, arrange()
largely ignores grouping; you
+need to explicitly mention grouping variables (or use .by_group = TRUE
)
+in order to group by them, and functions of variables are evaluated
+once per data frame, not once per group.
# S3 method for Seurat
+arrange(.data, ..., .by_group = FALSE)
A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See Methods, below, for +more details.
<data-masking
> Variables, or
+functions of variables. Use desc()
to sort a variable in descending
+order.
If TRUE
, will sort first by grouping variable. Applies to
+grouped data frames only.
An object of the same type as .data
. The output has the following
+properties:
All rows appear in the output, but (usually) in a different place.
Columns are not modified.
Groups are not modified.
Data frame attributes are preserved.
This function is a generic, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour.
+The following methods are currently available in loaded packages:
+dplyr (data.frame
), plotly (plotly
), tidyseurat (Seurat
)
+.
data(pbmc_small)
+pbmc_small |>
+ arrange(nFeature_RNA)
+#> # A Seurat-tibble abstraction: 80 × 15
+#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
+#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
+#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
+#> 1 CATG… SeuratPro… 51 26 0 A g2
+#> 2 GGCA… SeuratPro… 172 29 0 A g1
+#> 3 AGTC… SeuratPro… 157 29 0 A g1
+#> 4 GACG… SeuratPro… 202 30 0 A g2
+#> 5 GGAA… SeuratPro… 150 30 0 A g2
+#> 6 AGGT… SeuratPro… 62 31 0 A g2
+#> 7 CTTC… SeuratPro… 41 32 0 A g2
+#> 8 GTAA… SeuratPro… 67 33 0 A g2
+#> 9 GTCA… SeuratPro… 210 33 0 A g2
+#> 10 TGGT… SeuratPro… 64 36 0 A g1
+#> # ℹ 70 more rows
+#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
+#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
+
+
as_tibble()
turns an existing object, such as a data frame or
+matrix, into a so-called tibble, a data frame with class tbl_df
. This is
+in contrast with tibble()
, which builds a tibble from individual columns.
+as_tibble()
is to tibble()
as base::as.data.frame()
is to
+base::data.frame()
.
as_tibble()
is an S3 generic, with methods for:
data.frame
: Thin wrapper around the list
method
+that implements tibble's treatment of rownames.
Default: Other inputs are first coerced with base::as.data.frame()
.
as_tibble_row()
converts a vector to a tibble with one row.
+If the input is a list, all elements must have size one.
as_tibble_col()
converts a vector to a tibble with one column.
# S3 method for Seurat
+as_tibble(
+ x,
+ ...,
+ .name_repair = c("check_unique", "unique", "universal", "minimal"),
+ rownames = NULL
+)
A data frame, list, matrix, or other object that could reasonably be +coerced to a tibble.
Unused, for extensibility.
Treatment of problematic column names:
"minimal"
: No name repair or checks, beyond basic existence,
"unique"
: Make sure names are unique and not empty,
"check_unique"
: (default value), no name repair, but check they are
+unique
,
"universal"
: Make the names unique
and syntactic
a function: apply custom name repair (e.g., .name_repair = make.names
+for names in the style of base R).
A purrr-style anonymous function, see rlang::as_function()
This argument is passed on as repair
to vctrs::vec_as_names()
.
+See there for more details on these terms and the strategies used
+to enforce them.
How to treat existing row names of a data frame or matrix:
NULL
: remove row names. This is the default.
NA
: keep row names.
A string: the name of a new column. Existing rownames are transferred
+into this column and the row.names
attribute is deleted.
+No name repair is applied to the new column name, even if x
already contains
+a column of that name.
+Use as_tibble(rownames_to_column(...))
to safeguard against this case.
Read more in rownames.
`tibble`
+The default behavior is to silently remove row names.
+New code should explicitly convert row names to a new column using the
+rownames
argument.
For existing code that relies on the retention of row names, call
+pkgconfig::set_config("tibble::rownames" = NA)
in your script or in your
+package's .onLoad()
function.
Using as_tibble()
for vectors is superseded as of version 3.0.0,
+prefer the more expressive as_tibble_row()
and
+as_tibble_col()
variants for new code.
tibble()
constructs a tibble from individual columns. enframe()
+converts a named vector to a tibble with a column of names and column of
+values. Name repair is implemented using vctrs::vec_as_names()
.
data(pbmc_small)
+pbmc_small |> as_tibble()
+#> # A tibble: 80 × 29
+#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
+#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
+#> 1 ATGC… SeuratPro… 70 47 0 A g2
+#> 2 CATG… SeuratPro… 85 52 0 A g1
+#> 3 GAAC… SeuratPro… 87 50 1 B g2
+#> 4 TGAC… SeuratPro… 127 56 0 A g2
+#> 5 AGTC… SeuratPro… 173 53 0 A g2
+#> 6 TCTG… SeuratPro… 70 48 0 A g1
+#> 7 TGGT… SeuratPro… 64 36 0 A g1
+#> 8 GCAG… SeuratPro… 72 45 0 A g1
+#> 9 GATA… SeuratPro… 52 36 0 A g1
+#> 10 AATG… SeuratPro… 100 41 0 A g1
+#> # ℹ 70 more rows
+#> # ℹ 22 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
+#> # PC_4 <dbl>, PC_5 <dbl>, PC_6 <dbl>, PC_7 <dbl>, PC_8 <dbl>, PC_9 <dbl>,
+#> # PC_10 <dbl>, PC_11 <dbl>, PC_12 <dbl>, PC_13 <dbl>, PC_14 <dbl>,
+#> # PC_15 <dbl>, PC_16 <dbl>, PC_17 <dbl>, PC_18 <dbl>, PC_19 <dbl>,
+#> # tSNE_1 <dbl>, tSNE_2 <dbl>
+
+
Example data set 2
+A dataset containing the barcodes and cell types of 80 PBMC single cells.
cell_type_df
data(cell_type_df)
An object of class tbl_df
(inherits from tbl
, data.frame
) with 80 rows and 2 columns.
A tibble containing 80 rows and 2 columns. + Cells are a subsample of the Peripheral Blood Mononuclear Cells (PBMC) + dataset of 2,700 single cell. Cell types were identified with SingleR.
cell identifier, barcode
cell type
`tibble`
count()
lets you quickly count the unique values of one or more variables:
+df %>% count(a, b)
is roughly equivalent to
+df %>% group_by(a, b) %>% summarise(n = n())
.
+count()
is paired with tally()
, a lower-level helper that is equivalent
+to df %>% summarise(n = n())
. Supply wt
to perform weighted counts,
+switching the summary from n = n()
to n = sum(wt)
.
add_count()
and add_tally()
are equivalents to count()
and tally()
+but use mutate()
instead of summarise()
so that they add a new column
+with group-wise counts.
# S3 method for Seurat
+count(
+ x,
+ ...,
+ wt = NULL,
+ sort = FALSE,
+ name = NULL,
+ .drop = group_by_drop_default(x)
+)
+
+# S3 method for Seurat
+add_count(
+ x,
+ ...,
+ wt = NULL,
+ sort = FALSE,
+ name = NULL,
+ .drop = group_by_drop_default(x)
+)
A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr).
<data-masking
> Variables to group
+by.
<data-masking
> Frequency weights.
+Can be NULL
or a variable:
If NULL
(the default), counts the number of rows in each group.
If a variable, computes sum(wt)
for each group.
If TRUE
, will show the largest groups at the top.
The name of the new column in the output.
+If omitted, it will default to n
. If there's already a column called n
,
+it will use nn
. If there's a column called n
and nn
, it'll use
+nnn
, and so on, adding n
s until it gets a new name.
Handling of factor levels that don't appear in the data, passed
+on to group_by()
.
For count()
: if FALSE
will include counts for empty groups (i.e. for
+levels of factors that don't exist in the data).
For add_count()
: deprecated since it
+can't actually affect the output.
An object of the same type as .data
. count()
and add_count()
group transiently, so the output has the same groups as the input.
+data(pbmc_small)
+pbmc_small |> count(groups)
+#> tidyseurat says: A data frame is returned for independent data analysis.
+#> # A tibble: 2 × 2
+#> groups n
+#> <chr> <int>
+#> 1 g1 44
+#> 2 g2 36
+
+
Keep only unique/distinct rows from a data frame. This is similar
+to unique.data.frame()
but considerably faster.
# S3 method for Seurat
+distinct(.data, ..., .keep_all = FALSE)
A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See Methods, below, for +more details.
<data-masking
> Optional variables to
+use when determining uniqueness. If there are multiple rows for a given
+combination of inputs, only the first row will be preserved. If omitted,
+will use all variables in the data frame.
If TRUE
, keep all variables in .data
.
+If a combination of ...
is not distinct, this keeps the
+first row of values.
An object of the same type as .data
. The output has the following
+properties:
Rows are a subset of the input but appear in the same order.
Columns are not modified if ...
is empty or .keep_all
is TRUE
.
+Otherwise, distinct()
first calls mutate()
to create new columns.
Groups are not modified.
Data frame attributes are preserved.
This function is a generic, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour.
+The following methods are currently available in loaded packages:
+dplyr (data.frame
), plotly (plotly
), tidyseurat (Seurat
)
+.
data("pbmc_small")
+pbmc_small |> distinct(groups)
+#> tidyseurat says: A data frame is returned for independent data analysis.
+#> # A tibble: 2 × 1
+#> groups
+#> <chr>
+#> 1 g2
+#> 2 g1
+
+
`arrange()` order the rows of a data frame rows by the values of selected -columns.
-Unlike other dplyr verbs, `arrange()` largely ignores grouping; you -need to explicit mention grouping variables (or use `by_group = TRUE`) -in order to group by them, and functions of variables are evaluated -once per data frame, not once per group.
-`filter()` retains the rows where the conditions you provide a `TRUE`. Note -that, unlike base subsetting with `[`, rows where the condition evaluates -to `NA` are dropped.
-Most data operations are done on groups defined by variables. -`group_by()` takes an existing tbl and converts it into a grouped tbl -where operations are performed "by group". `ungroup()` removes grouping.
-`summarise()` creates a new data frame. It will have one (or more) rows for -each combination of grouping variables; if there are no grouping variables, -the output will have a single row summarising all observations in the input. -It will contain one column for each grouping variable and one column -for each of the summary statistics that you have specified.
-`summarise()` and `summarize()` are synonyms.
-`mutate()` adds new variables and preserves existing ones; -`transmute()` adds new variables and drops existing ones. -New variables overwrite existing variables of the same name. -Variables can be removed by setting their value to `NULL`.
-Rename individual variables using `new_name = old_name` syntax.
-`rowwise()` is used for the results of [do()] when you -create list-variables. It is also useful to support arbitrary -complex operations that need to be applied to each row.
-`slice()` lets you index rows by their (integer) locations. It allows you -to select, remove, and duplicate rows. It is accompanied by a number of -helpers for common use cases:
-Select (and optionally rename) variables in a data frame, using a concise -mini-language that makes it easy to refer to variables based on their name -(e.g. `a:f` selects all columns from `a` on the left to `f` on the -right). You can also use predicate functions like is.numeric to select -variables based on their properties.
-Sample n rows from a table
-`count()` lets you quickly count the unique values of one or more variables: -`df -`df -`count()` is paired with `tally()`, a lower-level helper that is equivalent -to `df -switching the summary from `n=n()` to `n=sum(wt)`.
-`add_count()` are `add_tally()` are equivalents to `count()` and `tally()` -but use `mutate()` instead of `summarise()` so that they add a new column -with group-wise counts.
-`pull()` is similar to `$`. It's mostly useful because it looks a little -nicer in pipes, it also works with remote data frames, and it can optionally -name the output.
-If TRUE, will sort first by grouping variable. Applies to grouped data frames only.
If TRUE, keep all variables in .data. If a combination of ... is not distinct, this keeps the first row of values. (See dplyr)
when `FALSE` (the default), the grouping structure -is recalculated based on the resulting data, otherwise it is kept as is.
When `FALSE`, the default, `group_by()` will - override existing groups. To add to the existing groups, use - `.add = TRUE`.
-This argument was previously called `add`, but that prevented - creating a new grouping variable called `add`, and conflicts with - our naming conventions.
Input data frame.
tbls to join. (See dplyr)
A character vector of variables to join by. (See dplyr)
If x and y are not from the same data source, and copy is TRUE, then y will be copied into the same src as x. (See dplyr)
If there are non-joined duplicate variables in x and y, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2. (See dplyr)
<[`data-masking`][dplyr_data_masking]> Sampling weights. -This must evaluate to a vector of non-negative numbers the same length as -the input. Weights are automatically standardised to sum to 1.
A data.frame.
<[`tidy-select`][dplyr_select]> -For `sample_n()`, the number of rows to select. -For `sample_frac()`, the fraction of rows to select. -If `tbl` is grouped, `size` applies to each group.
Sample with or without replacement?
<[`tidy-select`][dplyr_select]> Sampling weights. -This must evaluate to a vector of non-negative numbers the same length as -the input. Weights are automatically standardised to sum to 1.
DEPRECATED.
A data frame, data frame extension (e.g. a tibble), or a -lazy data frame (e.g. from dbplyr or dtplyr).
<[`data-masking`][dplyr_data_masking]> Frequency weights. - Can be `NULL` or a variable:
-* If `NULL` (the default), counts the number of rows in each group. - * If a variable, computes `sum(wt)` for each group.
If `TRUE`, will show the largest groups at the top.
For `count()`: if `FALSE` will include counts for empty groups -(i.e. for levels of factors that don't exist in the data). Deprecated in -`add_count()` since it didn't actually affect the output.
An optional parameter that specifies the column to be used
-as names for a named vector. Specified in a similar manner as var
.
For use by methods.
An object of the same type as `.data`.
- - -* All rows appear in the output, but (usually) in a different place. -* Columns are not modified. -* Groups are not modified. -* Data frame attributes are preserved.
- - -A Seurat object
- - -An object of the same type as `.data`.
- - -* Rows are a subset of the input, but appear in the same order. -* Columns are not modified. -* The number of groups may be reduced (if `.preserve` is not `TRUE`). -* Data frame attributes are preserved.
- - -A grouped data frame, unless the combination of `...` and `add` - yields a non empty set of grouping columns, a regular (ungrouped) data frame - otherwise.
- - -A tibble
- - -An object of the same type as `.data`.
- - -For `mutate()`:
- - -* Rows are not affected. -* Existing columns will be preserved unless explicitly modified. -* New columns will be added to the right of existing columns. -* Columns given value `NULL` will be removed -* Groups will be recomputed if a grouping variable is mutated. -* Data frame attributes are preserved.
- - -For `transmute()`:
- - -* Rows are not affected. -* Apart from grouping variables, existing columns will be remove unless - explicitly kept. -* Column order matches order of expressions. -* Groups will be recomputed if a grouping variable is mutated. -* Data frame attributes are preserved.
- - -An object of the same type as `.data`. -* Rows are not affected. -* Column names are changed; column order is preserved -* Data frame attributes are preserved. -* Groups are updated to reflect new names.
- - -A `tbl`
- - -A `tbl`
- - -A Seurat object
- - -A Seurat object
- - -A Seurat object
- - -A Seurat object
- - -An object of the same type as `.data`. The output has the following -properties:
- - -* Each row may appear 0, 1, or many times in the output. -* Columns are not modified. -* Groups are not modified. -* Data frame attributes are preserved.
- - -An object of the same type as `.data`. The output has the following -properties:
- - -* Rows are not affected. -* Output columns are a subset of input columns, potentially with a different - order. Columns will be renamed if `new_name = old_name` form is used. -* Data frame attributes are preserved. -* Groups are maintained; you can't select off grouping variables.
- - -A Seurat object
- - -An object of the same type as `.data`. `count()` and `add_count()` -group transiently, so the output has the same groups as the input.
- - -A vector the same size as `.data`.
-## Locales -The sort order for character vectors will depend on the collating sequence -of the locale in use: see locales().
-## Missing values -Unlike base sorting with `sort()`, `NA` are: -* always sorted to the end for local data, even when wrapped with `desc()`. -* treated differently for remote data, depending on the backend.
-dplyr is not yet smart enough to optimise filtering optimisation -on grouped datasets that don't need grouped calculations. For this reason, -filtering is often considerably faster on ungroup()ed data.
-Slice does not work with relational databases because they have no -intrinsic notion of row order. If you want to perform the equivalent -operation, use [filter()] and [row_number()].
-This function is a **generic**, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour.
- -These function are **generic**s, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour.
-Methods available in currently loaded packages:
- -These function are **generic**s, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour.
-Methods available in currently loaded packages:
- -This function is a **generic**, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour.
-The following methods are currently available in loaded packages:
-Because mutating expressions are computed within groups, they may -yield different results on grouped tibbles. This will be the case -as soon as an aggregating, lagging, or ranking function is -involved. Compare this ungrouped mutate:
-With the grouped equivalent:
-The former normalises `mass` by the global average whereas the -latter normalises by the averages within gender levels.
-`%>%` = magrittr::`%>%`
-pbmc_small %>% arrange(nFeature_RNA)
-#> # A Seurat-tibble abstraction: 80 × 15
-#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 CATG… SeuratPro… 51 26 0 A g2
-#> 2 GGCA… SeuratPro… 172 29 0 A g1
-#> 3 AGTC… SeuratPro… 157 29 0 A g1
-#> 4 GACG… SeuratPro… 202 30 0 A g2
-#> 5 GGAA… SeuratPro… 150 30 0 A g2
-#> 6 AGGT… SeuratPro… 62 31 0 A g2
-#> 7 CTTC… SeuratPro… 41 32 0 A g2
-#> 8 GTAA… SeuratPro… 67 33 0 A g2
-#> 9 GTCA… SeuratPro… 210 33 0 A g2
-#> 10 TGGT… SeuratPro… 64 36 0 A g1
-#> # ℹ 70 more rows
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>% distinct(groups)
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> # A tibble: 2 × 1
-#> groups
-#> <chr>
-#> 1 g2
-#> 2 g1
-
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>% filter(groups == "g1")
-#> # A Seurat-tibble abstraction: 44 × 15
-#> # Features=230 | Cells=44 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 CATG… SeuratPro… 85 52 0 A g1
-#> 2 TCTG… SeuratPro… 70 48 0 A g1
-#> 3 TGGT… SeuratPro… 64 36 0 A g1
-#> 4 GCAG… SeuratPro… 72 45 0 A g1
-#> 5 GATA… SeuratPro… 52 36 0 A g1
-#> 6 AATG… SeuratPro… 100 41 0 A g1
-#> 7 AGAG… SeuratPro… 191 61 0 A g1
-#> 8 CTAA… SeuratPro… 168 44 0 A g1
-#> 9 TTGG… SeuratPro… 135 45 0 A g1
-#> 10 CATC… SeuratPro… 79 43 0 A g1
-#> # ℹ 34 more rows
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# Learn more in ?dplyr_eval
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>% group_by(groups)
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> # A tibble: 80 × 29
-#> # Groups: groups [2]
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 ATGC… SeuratPro… 70 47 0 A g2
-#> 2 CATG… SeuratPro… 85 52 0 A g1
-#> 3 GAAC… SeuratPro… 87 50 1 B g2
-#> 4 TGAC… SeuratPro… 127 56 0 A g2
-#> 5 AGTC… SeuratPro… 173 53 0 A g2
-#> 6 TCTG… SeuratPro… 70 48 0 A g1
-#> 7 TGGT… SeuratPro… 64 36 0 A g1
-#> 8 GCAG… SeuratPro… 72 45 0 A g1
-#> 9 GATA… SeuratPro… 52 36 0 A g1
-#> 10 AATG… SeuratPro… 100 41 0 A g1
-#> # ℹ 70 more rows
-#> # ℹ 22 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, PC_6 <dbl>, PC_7 <dbl>, PC_8 <dbl>, PC_9 <dbl>,
-#> # PC_10 <dbl>, PC_11 <dbl>, PC_12 <dbl>, PC_13 <dbl>, PC_14 <dbl>,
-#> # PC_15 <dbl>, PC_16 <dbl>, PC_17 <dbl>, PC_18 <dbl>, PC_19 <dbl>,
-#> # tSNE_1 <dbl>, tSNE_2 <dbl>
-
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>% summarise(mean(nCount_RNA))
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> # A tibble: 1 × 1
-#> `mean(nCount_RNA)`
-#> <dbl>
-#> 1 245.
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>% mutate(nFeature_RNA = 1)
-#> # A Seurat-tibble abstraction: 80 × 15
-#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <dbl> <fct> <fct> <chr>
-#> 1 ATGC… SeuratPro… 70 1 0 A g2
-#> 2 CATG… SeuratPro… 85 1 0 A g1
-#> 3 GAAC… SeuratPro… 87 1 1 B g2
-#> 4 TGAC… SeuratPro… 127 1 0 A g2
-#> 5 AGTC… SeuratPro… 173 1 0 A g2
-#> 6 TCTG… SeuratPro… 70 1 0 A g1
-#> 7 TGGT… SeuratPro… 64 1 0 A g1
-#> 8 GCAG… SeuratPro… 72 1 0 A g1
-#> 9 GATA… SeuratPro… 52 1 0 A g1
-#> 10 AATG… SeuratPro… 100 1 0 A g1
-#> # ℹ 70 more rows
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>% rename(s_score = nFeature_RNA)
-#> # A Seurat-tibble abstraction: 80 × 15
-#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA s_score RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 ATGCCAGAA… SeuratPro… 70 47 0 A g2
-#> 2 CATGGCCTG… SeuratPro… 85 52 0 A g1
-#> 3 GAACCTGAT… SeuratPro… 87 50 1 B g2
-#> 4 TGACTGGAT… SeuratPro… 127 56 0 A g2
-#> 5 AGTCAGACT… SeuratPro… 173 53 0 A g2
-#> 6 TCTGATACA… SeuratPro… 70 48 0 A g1
-#> 7 TGGTATCTA… SeuratPro… 64 36 0 A g1
-#> 8 GCAGCTCTG… SeuratPro… 72 45 0 A g1
-#> 9 GATATAACA… SeuratPro… 52 36 0 A g1
-#> 10 AATGTTGAC… SeuratPro… 100 41 0 A g1
-#> # ℹ 70 more rows
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-`%>%` = magrittr::`%>%`
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>% left_join(pbmc_small %>% distinct(groups) %>% mutate(new_column = 1:2))
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> Joining with `by = join_by(groups)`
-#> # A Seurat-tibble abstraction: 80 × 16
-#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 ATGC… SeuratPro… 70 47 0 A g2
-#> 2 CATG… SeuratPro… 85 52 0 A g1
-#> 3 GAAC… SeuratPro… 87 50 1 B g2
-#> 4 TGAC… SeuratPro… 127 56 0 A g2
-#> 5 AGTC… SeuratPro… 173 53 0 A g2
-#> 6 TCTG… SeuratPro… 70 48 0 A g1
-#> 7 TGGT… SeuratPro… 64 36 0 A g1
-#> 8 GCAG… SeuratPro… 72 45 0 A g1
-#> 9 GATA… SeuratPro… 52 36 0 A g1
-#> 10 AATG… SeuratPro… 100 41 0 A g1
-#> # ℹ 70 more rows
-#> # ℹ 9 more variables: RNA_snn_res.1 <fct>, new_column <int>, PC_1 <dbl>,
-#> # PC_2 <dbl>, PC_3 <dbl>, PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-`%>%` = magrittr::`%>%`
-
-data("pbmc_small")
-pbmc_small %>%
- inner_join(
- pbmc_small %>% distinct(groups) %>% mutate(new_column = 1:2) %>% slice(1))
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> Joining with `by = join_by(groups)`
-#> # A Seurat-tibble abstraction: 36 × 16
-#> # Features=230 | Cells=36 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 ATGC… SeuratPro… 70 47 0 A g2
-#> 2 GAAC… SeuratPro… 87 50 1 B g2
-#> 3 TGAC… SeuratPro… 127 56 0 A g2
-#> 4 AGTC… SeuratPro… 173 53 0 A g2
-#> 5 AGGT… SeuratPro… 62 31 0 A g2
-#> 6 GGGT… SeuratPro… 101 41 0 A g2
-#> 7 CATG… SeuratPro… 51 26 0 A g2
-#> 8 TACG… SeuratPro… 99 45 0 A g2
-#> 9 GTAA… SeuratPro… 67 33 0 A g2
-#> 10 TACA… SeuratPro… 109 41 0 A g2
-#> # ℹ 26 more rows
-#> # ℹ 9 more variables: RNA_snn_res.1 <fct>, new_column <int>, PC_1 <dbl>,
-#> # PC_2 <dbl>, PC_3 <dbl>, PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-`%>%` = magrittr::`%>%`
-
-data("pbmc_small")
-pbmc_small %>% right_join(pbmc_small %>% distinct(groups) %>% mutate(new_column = 1:2) %>% slice(1))
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> Joining with `by = join_by(groups)`
-#> # A Seurat-tibble abstraction: 36 × 16
-#> # Features=230 | Cells=36 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 ATGC… SeuratPro… 70 47 0 A g2
-#> 2 GAAC… SeuratPro… 87 50 1 B g2
-#> 3 TGAC… SeuratPro… 127 56 0 A g2
-#> 4 AGTC… SeuratPro… 173 53 0 A g2
-#> 5 AGGT… SeuratPro… 62 31 0 A g2
-#> 6 GGGT… SeuratPro… 101 41 0 A g2
-#> 7 CATG… SeuratPro… 51 26 0 A g2
-#> 8 TACG… SeuratPro… 99 45 0 A g2
-#> 9 GTAA… SeuratPro… 67 33 0 A g2
-#> 10 TACA… SeuratPro… 109 41 0 A g2
-#> # ℹ 26 more rows
-#> # ℹ 9 more variables: RNA_snn_res.1 <fct>, new_column <int>, PC_1 <dbl>,
-#> # PC_2 <dbl>, PC_3 <dbl>, PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>% full_join(tibble::tibble(groups = "g1", other=1:4))
-#> Joining with `by = join_by(groups)`
-#> tidyseurat says: This operation lead to duplicated cell names. A data frame is returned for independent data analysis.
-#> # A tibble: 212 × 30
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 ATGC… SeuratPro… 70 47 0 A g2
-#> 2 CATG… SeuratPro… 85 52 0 A g1
-#> 3 CATG… SeuratPro… 85 52 0 A g1
-#> 4 CATG… SeuratPro… 85 52 0 A g1
-#> 5 CATG… SeuratPro… 85 52 0 A g1
-#> 6 GAAC… SeuratPro… 87 50 1 B g2
-#> 7 TGAC… SeuratPro… 127 56 0 A g2
-#> 8 AGTC… SeuratPro… 173 53 0 A g2
-#> 9 TCTG… SeuratPro… 70 48 0 A g1
-#> 10 TCTG… SeuratPro… 70 48 0 A g1
-#> # ℹ 202 more rows
-#> # ℹ 23 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, PC_6 <dbl>, PC_7 <dbl>, PC_8 <dbl>, PC_9 <dbl>,
-#> # PC_10 <dbl>, PC_11 <dbl>, PC_12 <dbl>, PC_13 <dbl>, PC_14 <dbl>,
-#> # PC_15 <dbl>, PC_16 <dbl>, PC_17 <dbl>, PC_18 <dbl>, PC_19 <dbl>,
-#> # tSNE_1 <dbl>, tSNE_2 <dbl>, other <int>
-
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>% slice(1)
-#> # A Seurat-tibble abstraction: 1 × 15
-#> # Features=230 | Cells=1 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 ATGCC… SeuratPro… 70 47 0 A g2
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# Slice group-wise using .by
-pbmc_small |> slice(1:2, .by = groups)
-#> # A Seurat-tibble abstraction: 4 × 15
-#> # Features=230 | Cells=4 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 ATGCC… SeuratPro… 70 47 0 A g2
-#> 2 GAACC… SeuratPro… 87 50 1 B g2
-#> 3 CATGG… SeuratPro… 85 52 0 A g1
-#> 4 TCTGA… SeuratPro… 70 48 0 A g1
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# slice_sample() allows you to random select with or without replacement
-pbmc_small |> slice_sample(n = 5)
-#> # A Seurat-tibble abstraction: 5 × 15
-#> # Features=230 | Cells=5 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 GATAG… SeuratPro… 328 72 1 B g1
-#> 2 GGCAT… SeuratPro… 126 53 0 A g1
-#> 3 ATGCC… SeuratPro… 70 47 0 A g2
-#> 4 AGATA… SeuratPro… 187 61 0 A g2
-#> 5 TACAA… SeuratPro… 108 44 0 A g2
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# if using replacement, and duplicate cells are returned, a tibble will be
-# returned because duplicate cells cannot exist in Seurat objects
-pbmc_small |> slice_sample(n = 1, replace = TRUE) # returns Seurat
-#> # A Seurat-tibble abstraction: 1 × 15
-#> # Features=230 | Cells=1 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 GATAG… SeuratPro… 328 72 1 B g1
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-pbmc_small |> slice_sample(n = 100, replace = TRUE) # returns tibble
-#> tidyseurat says: When sampling with replacement a data frame is returned for independent data analysis.
-#> # A tibble: 100 × 29
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 ATGC… SeuratPro… 70 47 0 A g2
-#> 2 ATGC… SeuratPro… 70 47 0 A g2
-#> 3 ATGC… SeuratPro… 70 47 0 A g2
-#> 4 CATG… SeuratPro… 85 52 0 A g1
-#> 5 TCTG… SeuratPro… 70 48 0 A g1
-#> 6 TGGT… SeuratPro… 64 36 0 A g1
-#> 7 AATG… SeuratPro… 100 41 0 A g1
-#> 8 GGGT… SeuratPro… 101 41 0 A g2
-#> 9 GGGT… SeuratPro… 101 41 0 A g2
-#> 10 CATG… SeuratPro… 51 26 0 A g2
-#> # ℹ 90 more rows
-#> # ℹ 22 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, PC_6 <dbl>, PC_7 <dbl>, PC_8 <dbl>, PC_9 <dbl>,
-#> # PC_10 <dbl>, PC_11 <dbl>, PC_12 <dbl>, PC_13 <dbl>, PC_14 <dbl>,
-#> # PC_15 <dbl>, PC_16 <dbl>, PC_17 <dbl>, PC_18 <dbl>, PC_19 <dbl>,
-#> # tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# weight by a variable
-pbmc_small |> slice_sample(n = 5, weight_by = nCount_RNA)
-#> # A Seurat-tibble abstraction: 5 × 15
-#> # Features=230 | Cells=5 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 CTGCC… SeuratPro… 146 47 0 A g1
-#> 2 ACCAG… SeuratPro… 417 75 0 A g1
-#> 3 AAGCG… SeuratPro… 443 77 1 B g1
-#> 4 GGCAT… SeuratPro… 126 53 0 A g1
-#> 5 ACTCG… SeuratPro… 231 49 1 B g2
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# sample by group
-pbmc_small |> slice_sample(n = 5, by = groups)
-#> # A Seurat-tibble abstraction: 10 × 15
-#> # Features=230 | Cells=10 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 AGTC… SeuratPro… 173 53 0 A g2
-#> 2 ATGC… SeuratPro… 70 47 0 A g2
-#> 3 GTCA… SeuratPro… 210 33 0 A g2
-#> 4 TTGC… SeuratPro… 104 40 0 A g2
-#> 5 GCGC… SeuratPro… 213 48 1 B g2
-#> 6 CATC… SeuratPro… 353 80 1 B g1
-#> 7 TACT… SeuratPro… 156 48 0 A g1
-#> 8 ATAC… SeuratPro… 612 69 1 B g1
-#> 9 GGCA… SeuratPro… 126 53 0 A g1
-#> 10 TTAC… SeuratPro… 228 39 0 A g1
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# sample using proportions
-pbmc_small |> slice_sample(prop = 0.10)
-#> # A Seurat-tibble abstraction: 8 × 15
-#> # Features=230 | Cells=8 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 GATAG… SeuratPro… 328 72 1 B g1
-#> 2 GGCAT… SeuratPro… 126 53 0 A g1
-#> 3 ATGCC… SeuratPro… 70 47 0 A g2
-#> 4 AGATA… SeuratPro… 187 61 0 A g2
-#> 5 TACAA… SeuratPro… 108 44 0 A g2
-#> 6 CATGA… SeuratPro… 51 26 0 A g2
-#> 7 GCACT… SeuratPro… 292 71 1 B g2
-#> 8 CGTAG… SeuratPro… 371 75 1 B g1
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-
-# First rows based on existing order
-pbmc_small |> slice_head(n = 5)
-#> # A Seurat-tibble abstraction: 5 × 15
-#> # Features=230 | Cells=5 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 ATGCC… SeuratPro… 70 47 0 A g2
-#> 2 CATGG… SeuratPro… 85 52 0 A g1
-#> 3 GAACC… SeuratPro… 87 50 1 B g2
-#> 4 TGACT… SeuratPro… 127 56 0 A g2
-#> 5 AGTCA… SeuratPro… 173 53 0 A g2
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# Last rows based on existing order
-pbmc_small |> slice_tail(n = 5)
-#> # A Seurat-tibble abstraction: 5 × 15
-#> # Features=230 | Cells=5 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 GAGTT… SeuratPro… 527 47 0 A g1
-#> 2 GACGC… SeuratPro… 202 30 0 A g2
-#> 3 AGTCT… SeuratPro… 157 29 0 A g1
-#> 4 GGAAC… SeuratPro… 150 30 0 A g2
-#> 5 CTTGA… SeuratPro… 233 76 1 B g1
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# Rows with minimum and maximum values of a metadata variable
-pbmc_small |> slice_min(nFeature_RNA, n = 5)
-#> # A Seurat-tibble abstraction: 5 × 15
-#> # Features=230 | Cells=5 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 CATGA… SeuratPro… 51 26 0 A g2
-#> 2 GGCAT… SeuratPro… 172 29 0 A g1
-#> 3 AGTCT… SeuratPro… 157 29 0 A g1
-#> 4 GACGC… SeuratPro… 202 30 0 A g2
-#> 5 GGAAC… SeuratPro… 150 30 0 A g2
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# slice_min() and slice_max() may return more rows than requested
-# in the presence of ties.
-pbmc_small |> slice_min(nFeature_RNA, n = 2)
-#> # A Seurat-tibble abstraction: 3 × 15
-#> # Features=230 | Cells=3 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 CATGA… SeuratPro… 51 26 0 A g2
-#> 2 GGCAT… SeuratPro… 172 29 0 A g1
-#> 3 AGTCT… SeuratPro… 157 29 0 A g1
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# Use with_ties = FALSE to return exactly n matches
-pbmc_small |> slice_min(nFeature_RNA, n = 2, with_ties = FALSE)
-#> # A Seurat-tibble abstraction: 2 × 15
-#> # Features=230 | Cells=2 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 CATGA… SeuratPro… 51 26 0 A g2
-#> 2 GGCAT… SeuratPro… 172 29 0 A g1
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# Or use additional variables to break the tie:
-pbmc_small |> slice_min(tibble::tibble(nFeature_RNA, nCount_RNA), n = 2)
-#> # A Seurat-tibble abstraction: 2 × 15
-#> # Features=230 | Cells=2 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 CATGA… SeuratPro… 51 26 0 A g2
-#> 2 AGTCT… SeuratPro… 157 29 0 A g1
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-# Use by for group-wise operations
-pbmc_small |> slice_min(nFeature_RNA, n = 5, by = groups)
-#> # A Seurat-tibble abstraction: 10 × 15
-#> # Features=230 | Cells=10 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 CATG… SeuratPro… 51 26 0 A g2
-#> 2 GACG… SeuratPro… 202 30 0 A g2
-#> 3 GGAA… SeuratPro… 150 30 0 A g2
-#> 4 AGGT… SeuratPro… 62 31 0 A g2
-#> 5 CTTC… SeuratPro… 41 32 0 A g2
-#> 6 GGCA… SeuratPro… 172 29 0 A g1
-#> 7 AGTC… SeuratPro… 157 29 0 A g1
-#> 8 TGGT… SeuratPro… 64 36 0 A g1
-#> 9 GATA… SeuratPro… 52 36 0 A g1
-#> 10 TTAC… SeuratPro… 228 39 0 A g1
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-
-# Rows with minimum and maximum values of a metadata variable
-pbmc_small |> slice_max(nFeature_RNA, n = 5)
-#> # A Seurat-tibble abstraction: 5 × 15
-#> # Features=230 | Cells=5 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 GACAT… SeuratPro… 872 96 1 B g1
-#> 2 ACGTG… SeuratPro… 709 94 1 B g2
-#> 3 TTGAG… SeuratPro… 787 88 0 A g1
-#> 4 TTTAG… SeuratPro… 462 86 1 B g1
-#> 5 ATTGT… SeuratPro… 745 84 1 B g2
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>% select(.cell, orig.ident )
-#> # A Seurat-tibble abstraction: 80 × 9
-#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident PC_1 PC_2 PC_3 PC_4 PC_5 tSNE_1 tSNE_2
-#> <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
-#> 1 ATGCCAGAACGACT SeuratProj… -0.774 -0.900 -0.249 0.559 0.465 0.868 -8.10
-#> 2 CATGGCCTGTGCAT SeuratProj… -0.0260 -0.347 0.665 0.418 0.585 -7.39 -8.77
-#> 3 GAACCTGATGAACC SeuratProj… -0.457 0.180 1.32 2.01 -0.482 -28.2 0.241
-#> 4 TGACTGGATTCTCA SeuratProj… -0.812 -1.38 -1.00 0.139 -1.60 16.3 -11.2
-#> 5 AGTCAGACTGCACA SeuratProj… -0.774 -0.900 -0.249 0.559 0.465 1.91 -11.2
-#> 6 TCTGATACACGTGT SeuratProj… -0.774 -0.900 -0.249 0.559 0.465 3.15 -9.94
-#> 7 TGGTATCTAAACAG SeuratProj… -0.460 -1.19 -0.312 0.716 -1.65 17.9 -9.90
-#> 8 GCAGCTCTGTTTCT SeuratProj… -0.900 -0.388 0.693 0.404 0.536 -6.49 -8.39
-#> 9 GATATAACACGCAT SeuratProj… -0.774 -0.900 -0.249 0.559 0.465 1.33 -9.68
-#> 10 AATGTTGACAGTCA SeuratProj… -0.488 -1.16 -0.306 0.702 -1.47 17.0 -9.43
-#> # ℹ 70 more rows
-
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>% sample_n(50)
-#> # A Seurat-tibble abstraction: 50 × 15
-#> # Features=230 | Cells=50 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 GATA… SeuratPro… 328 72 1 B g1
-#> 2 GGCA… SeuratPro… 126 53 0 A g1
-#> 3 ATGC… SeuratPro… 70 47 0 A g2
-#> 4 AGAT… SeuratPro… 187 61 0 A g2
-#> 5 TACA… SeuratPro… 108 44 0 A g2
-#> 6 CATG… SeuratPro… 51 26 0 A g2
-#> 7 GCAC… SeuratPro… 292 71 1 B g2
-#> 8 CGTA… SeuratPro… 371 75 1 B g1
-#> 9 TTAC… SeuratPro… 298 65 1 B g1
-#> 10 ATAA… SeuratPro… 99 42 1 B g2
-#> # ℹ 40 more rows
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-pbmc_small %>% sample_frac(0.1)
-#> # A Seurat-tibble abstraction: 8 × 15
-#> # Features=230 | Cells=8 | Active assay=RNA | Assays=RNA
-#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
-#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
-#> 1 GATAG… SeuratPro… 328 72 1 B g1
-#> 2 GGCAT… SeuratPro… 126 53 0 A g1
-#> 3 ATGCC… SeuratPro… 70 47 0 A g2
-#> 4 AGATA… SeuratPro… 187 61 0 A g2
-#> 5 TACAA… SeuratPro… 108 44 0 A g2
-#> 6 CATGA… SeuratPro… 51 26 0 A g2
-#> 7 GCACT… SeuratPro… 292 71 1 B g2
-#> 8 CGTAG… SeuratPro… 371 75 1 B g1
-#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
-#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
-
-
-
-`%>%` <- magrittr::`%>%`
-pbmc_small %>%
-
- count(groups)
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> # A tibble: 2 × 2
-#> groups n
-#> <chr> <int>
-#> 1 g1 44
-#> 2 g2 36
-
-`%>%` = magrittr::`%>%`
-data("pbmc_small")
-pbmc_small %>% pull(groups)
-#> tidyseurat says: A data frame is returned for independent data analysis.
-#> [1] "g2" "g1" "g2" "g2" "g2" "g1" "g1" "g1" "g1" "g1" "g2" "g1" "g2" "g2" "g2"
-#> [16] "g1" "g2" "g1" "g1" "g2" "g1" "g1" "g2" "g2" "g1" "g2" "g2" "g2" "g2" "g1"
-#> [31] "g1" "g1" "g1" "g2" "g1" "g1" "g2" "g1" "g1" "g2" "g1" "g2" "g2" "g2" "g1"
-#> [46] "g2" "g1" "g2" "g1" "g2" "g1" "g2" "g2" "g2" "g1" "g1" "g1" "g1" "g2" "g1"
-#> [61] "g1" "g1" "g1" "g1" "g1" "g2" "g2" "g1" "g1" "g1" "g2" "g1" "g2" "g2" "g1"
-#> [76] "g1" "g2" "g1" "g2" "g1"
-
-
R/tidyr_methods.R
+ extract.Rd
extract()
has been superseded in favour of separate_wider_regex()
+because it has a more polished API and better handling of problems.
+Superseded functions will not go away, but will only receive critical bug
+fixes.
Given a regular expression with capturing groups, extract()
turns
+each group into a new column. If the groups don't match, or the input
+is NA, the output will be NA.
# S3 method for Seurat
+extract(
+ data,
+ col,
+ into,
+ regex = "([[:alnum:]]+)",
+ remove = TRUE,
+ convert = FALSE,
+ ...
+)
A data frame.
<tidy-select
> Column to expand.
Names of new variables to create as character vector.
+Use NA
to omit the variable in the output.
A string representing a regular expression used to extract the
+desired values. There should be one group (defined by ()
) for each
+element of into
.
If TRUE
, remove input column from output data frame.
If TRUE
, will run type.convert()
with
+as.is = TRUE
on new columns. This is useful if the component
+columns are integer, numeric or logical.
NB: this will cause string "NA"
s to be converted to NA
s.
Additional arguments passed on to methods.
`tidyseurat`
+separate()
to split up by a separator.
data(pbmc_small)
+pbmc_small |>
+ extract(groups,
+ into="g",
+ regex="g([0-9])",
+ convert=TRUE)
+#> # A Seurat-tibble abstraction: 80 × 15
+#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
+#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents g
+#> <chr> <fct> <dbl> <int> <fct> <fct> <int>
+#> 1 ATGCC… SeuratPro… 70 47 0 A 2
+#> 2 CATGG… SeuratPro… 85 52 0 A 1
+#> 3 GAACC… SeuratPro… 87 50 1 B 2
+#> 4 TGACT… SeuratPro… 127 56 0 A 2
+#> 5 AGTCA… SeuratPro… 173 53 0 A 2
+#> 6 TCTGA… SeuratPro… 70 48 0 A 1
+#> 7 TGGTA… SeuratPro… 64 36 0 A 1
+#> 8 GCAGC… SeuratPro… 72 45 0 A 1
+#> 9 GATAT… SeuratPro… 52 36 0 A 1
+#> 10 AATGT… SeuratPro… 100 41 0 A 1
+#> # ℹ 70 more rows
+#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
+#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
+
+
The filter()
function is used to subset a data frame,
+retaining all rows that satisfy your conditions.
+To be retained, the row must produce a value of TRUE
for all conditions.
+Note that when a condition evaluates to NA
+the row will be dropped, unlike base subsetting with [
.
# S3 method for Seurat
+filter(.data, ..., .preserve = FALSE)
A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See Methods, below, for +more details.
<data-masking
> Expressions that
+return a logical value, and are defined in terms of the variables in
+.data
. If multiple expressions are included, they are combined with the
+&
operator. Only rows for which all conditions evaluate to TRUE
are
+kept.
Relevant when the .data
input is grouped.
+If .preserve = FALSE
(the default), the grouping structure
+is recalculated based on the resulting data, otherwise the grouping is kept as is.
An object of the same type as .data
. The output has the following properties:
Rows are a subset of the input, but appear in the same order.
Columns are not modified.
The number of groups may be reduced (if .preserve
is not TRUE
).
Data frame attributes are preserved.
The filter()
function is used to subset the rows of
+.data
, applying the expressions in ...
to the column values to determine which
+rows should be retained. It can be applied to both grouped and ungrouped data (see group_by()
and
+ungroup()
). However, dplyr is not yet smart enough to optimise the filtering
+operation on grouped datasets that do not need grouped calculations. For this
+reason, filtering is often considerably faster on ungrouped data.
There are many functions and operators that are useful when constructing the +expressions used to filter the data:
Because filtering expressions are computed within groups, they may +yield different results on grouped tibbles. This will be the case +as soon as an aggregating, lagging, or ranking function is +involved. Compare this ungrouped filtering:
+ +With the grouped equivalent:
+ +In the ungrouped version, filter()
compares the value of mass
in each row to
+the global average (taken over the whole data set), keeping only the rows with
+mass
greater than this global average. In contrast, the grouped version calculates
+the average mass separately for each gender
group, and keeps rows with mass
greater
+than the relevant within-gender average.
This function is a generic, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour.
+The following methods are currently available in loaded packages:
+dplyr (data.frame
, ts
), plotly (plotly
), tidyseurat (Seurat
)
+.
data("pbmc_small")
+pbmc_small |> filter(groups == "g1")
+#> # A Seurat-tibble abstraction: 44 × 15
+#> # Features=230 | Cells=44 | Active assay=RNA | Assays=RNA
+#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
+#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
+#> 1 CATG… SeuratPro… 85 52 0 A g1
+#> 2 TCTG… SeuratPro… 70 48 0 A g1
+#> 3 TGGT… SeuratPro… 64 36 0 A g1
+#> 4 GCAG… SeuratPro… 72 45 0 A g1
+#> 5 GATA… SeuratPro… 52 36 0 A g1
+#> 6 AATG… SeuratPro… 100 41 0 A g1
+#> 7 AGAG… SeuratPro… 191 61 0 A g1
+#> 8 CTAA… SeuratPro… 168 44 0 A g1
+#> 9 TTGG… SeuratPro… 135 45 0 A g1
+#> 10 CATC… SeuratPro… 79 43 0 A g1
+#> # ℹ 34 more rows
+#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
+#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
+
+# Learn more in ?dplyr_eval
+
+
`r lifecycle::badge("maturing")`
-One of the main features of the `tbl_df` class is the printing:
-* Tibbles only print as many rows and columns as fit on one screen, - supplemented by a summary of the remaining rows and columns. -* Tibble reveals the type of each column, which keeps the user informed about - whether a variable is, e.g., `<chr>` or `<fct>` (character versus factor).
-Printing can be tweaked for a one-off call by calling `print()` explicitly -and setting arguments like `n` and `width`. More persistent control is -available by setting the options described below.
-Only the first 5 reduced dimensions are displayed, while all of them are queriable (e.g. ggplot). All dimensions are returned/displayed if as_tibble is used.
+One of the main features of the tbl_df
class is the printing:
Tibbles only print as many rows and columns as fit on one screen, +supplemented by a summary of the remaining rows and columns.
Tibble reveals the type of each column, which keeps the user informed about
+whether a variable is, e.g., <chr>
or <fct>
(character versus factor).
+See vignette("types")
for an overview of common
+type abbreviations.
Printing can be tweaked for a one-off call by calling print()
explicitly
+and setting arguments like n
and width
. More persistent control is
+available by setting the options described in pillar::pillar_options.
+See also vignette("digits")
for a comparison to base options,
+and vignette("numbers")
that showcases num()
and char()
+for creating columns with custom formatting options.
As of tibble 3.1.0, printing is handled entirely by the pillar package.
+If you implement a package that extends tibble,
+the printed output can be customized in various ways.
+See vignette("extending", package = "pillar")
for details,
+and pillar::pillar_options for options that control the display in the console.
Other arguments passed on to individual methods.
Passed on to tbl_format_setup()
.
Number of rows to show. If `NULL`, the default, will print all rows -if less than option `tibble.print_max`. Otherwise, will print -`tibble.print_min` rows.
Number of rows to show. If NULL
, the default, will print all rows
+if less than the print_max
option.
+Otherwise, will print as many rows as specified by the
+print_min
option.
Width of text output to generate. This defaults to `NULL`, which -means use `getOption("tibble.width")` or (if also `NULL`) -`getOption("width")`; the latter displays only the columns that fit on one -screen. You can also set `options(tibble.width = Inf)` to override this -default and always print all columns.
Width of text output to generate. This defaults to NULL
, which
+means use the width
option.
Nothing
-The following options are used by the tibble and pillar packages -to format and print `tbl_df` objects. -Used by the formatting workhorse `trunc_mat()` and, therefore, -indirectly, by `print.tbl()`.
-* `tibble.print_max`: Row number threshold: Maximum number of rows printed. - Set to `Inf` to always print all rows. Default: 20. -* `tibble.print_min`: Number of rows printed if row number threshold is - exceeded. Default: 10. -* `tibble.width`: Output width. Default: `NULL` (use `width` option). -* `tibble.max_extra_cols`: Number of extra columns printed in reduced form. - Default: 100.
+Prints a message to the console describing + the contents of the `tidyseurat`.
library(dplyr)
-data("pbmc_small")
-pbmc_small %>% print()
+ data(pbmc_small)
+print(pbmc_small)
#> # A Seurat-tibble abstraction: 80 × 15
#> # Features=230 | Cells=80 | Active assay=RNA | Assays=RNA
#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
@@ -161,6 +159,7 @@ Examples
#> # ℹ 70 more rows
#> # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
#> # PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>
+
Mutating joins add columns from y
to x
, matching observations based on
+the keys. There are four mutating joins: the inner join, and the three outer
+joins.
An inner_join()
only keeps observations from x
that have a matching key
+in y
.
The most important property of an inner join is that unmatched rows in either +input are not included in the result. This means that generally inner joins +are not appropriate in most analyses, because it is too easy to lose +observations.
+The three outer joins keep observations that appear in at least one of the +data frames:
A left_join()
keeps all observations in x
.
A right_join()
keeps all observations in y
.
A full_join()
keeps all observations in x
and y
.
# S3 method for Seurat
+full_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)
A pair of data frames, data frame extensions (e.g. a tibble), or +lazy data frames (e.g. from dbplyr or dtplyr). See Methods, below, for +more details.
A join specification created with join_by()
, or a character
+vector of variables to join by.
If NULL
, the default, *_join()
will perform a natural join, using all
+variables in common across x
and y
. A message lists the variables so
+that you can check they're correct; suppress the message by supplying by
+explicitly.
To join on different variables between x
and y
, use a join_by()
+specification. For example, join_by(a == b)
will match x$a
to y$b
.
To join by multiple variables, use a join_by()
specification with
+multiple expressions. For example, join_by(a == b, c == d)
will match
+x$a
to y$b
and x$c
to y$d
. If the column names are the same between
+x
and y
, you can shorten this by listing only the variable names, like
+join_by(a, c)
.
join_by()
can also be used to perform inequality, rolling, and overlap
+joins. See the documentation at ?join_by for details on
+these types of joins.
For simple equality joins, you can alternatively specify a character vector
+of variable names to join by. For example, by = c("a", "b")
joins x$a
+to y$a
and x$b
to y$b
. If variable names differ between x
and y
,
+use a named character vector like by = c("x_a" = "y_a", "x_b" = "y_b")
.
To perform a cross-join, generating all combinations of x
and y
, see
+cross_join()
.
If x
and y
are not from the same data source,
+and copy
is TRUE
, then y
will be copied into the
+same src as x
. This allows you to join tables across srcs, but
+it is a potentially expensive operation so you must opt into it.
If there are non-joined duplicate variables in x
and
+y
, these suffixes will be added to the output to disambiguate them.
+Should be a character vector of length 2.
Other parameters passed onto methods.
An object of the same type as x
(including the same groups). The order of
+the rows and columns of x
is preserved as much as possible. The output has
+the following properties:
The rows are affect by the join type.
inner_join()
returns matched x
rows.
left_join()
returns all x
rows.
right_join()
returns matched of x
rows, followed by unmatched y
rows.
full_join()
returns all x
rows, followed by unmatched y
rows.
Output columns include all columns from x
and all non-key columns from
+y
. If keep = TRUE
, the key columns from y
are included as well.
If non-key columns in x
and y
have the same name, suffix
es are added
+to disambiguate. If keep = TRUE
and key columns in x
and y
have
+the same name, suffix
es are added to disambiguate these as well.
If keep = FALSE
, output columns included in by
are coerced to their
+common type between x
and y
.
By default, dplyr guards against many-to-many relationships in equality joins +by throwing a warning. These occur when both of the following are true:
A row in x
matches multiple rows in y
.
A row in y
matches multiple rows in x
.
This is typically surprising, as most joins involve a relationship of +one-to-one, one-to-many, or many-to-one, and is often the result of an +improperly specified join. Many-to-many relationships are particularly +problematic because they can result in a Cartesian explosion of the number of +rows returned from the join.
+If a many-to-many relationship is expected, silence this warning by
+explicitly setting relationship = "many-to-many"
.
In production code, it is best to preemptively set relationship
to whatever
+relationship you expect to exist between the keys of x
and y
, as this
+forces an error to occur immediately if the data doesn't align with your
+expectations.
Inequality joins typically result in many-to-many relationships by nature, so +they don't warn on them by default, but you should still take extra care when +specifying an inequality join, because they also have the capability to +return a large number of rows.
+Rolling joins don't warn on many-to-many relationships either, but many
+rolling joins follow a many-to-one relationship, so it is often useful to
+set relationship = "many-to-one"
to enforce this.
Note that in SQL, most database providers won't let you specify a +many-to-many relationship between two tables, instead requiring that you +create a third junction table that results in two one-to-many relationships +instead.
+These functions are generics, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour.
+Methods available in currently loaded packages:
inner_join()
: dplyr (data.frame
), tidyseurat (Seurat
)
+.
left_join()
: dplyr (data.frame
), tidyseurat (Seurat
)
+.
right_join()
: dplyr (data.frame
), tidyseurat (Seurat
)
+.
full_join()
: dplyr (data.frame
), tidyseurat (Seurat
)
+.
Other joins:
+cross_join()
,
+filter-joins
,
+nest_join()
data(pbmc_small)
+tt <- pbmc_small
+tt |> full_join(tibble::tibble(groups="g1", other=1:4))
+#> Joining with `by = join_by(groups)`
+#> tidyseurat says: This operation lead to duplicated cell names. A data frame is returned for independent data analysis.
+#> # A tibble: 212 × 30
+#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
+#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
+#> 1 ATGC… SeuratPro… 70 47 0 A g2
+#> 2 CATG… SeuratPro… 85 52 0 A g1
+#> 3 CATG… SeuratPro… 85 52 0 A g1
+#> 4 CATG… SeuratPro… 85 52 0 A g1
+#> 5 CATG… SeuratPro… 85 52 0 A g1
+#> 6 GAAC… SeuratPro… 87 50 1 B g2
+#> 7 TGAC… SeuratPro… 127 56 0 A g2
+#> 8 AGTC… SeuratPro… 173 53 0 A g2
+#> 9 TCTG… SeuratPro… 70 48 0 A g1
+#> 10 TCTG… SeuratPro… 70 48 0 A g1
+#> # ℹ 202 more rows
+#> # ℹ 23 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
+#> # PC_4 <dbl>, PC_5 <dbl>, PC_6 <dbl>, PC_7 <dbl>, PC_8 <dbl>, PC_9 <dbl>,
+#> # PC_10 <dbl>, PC_11 <dbl>, PC_12 <dbl>, PC_13 <dbl>, PC_14 <dbl>,
+#> # PC_15 <dbl>, PC_16 <dbl>, PC_17 <dbl>, PC_18 <dbl>, PC_19 <dbl>,
+#> # tSNE_1 <dbl>, tSNE_2 <dbl>, other <int>
+
+
ggplot()
initializes a ggplot object. It can be used to
+declare the input data frame for a graphic and to specify the
+set of plot aesthetics intended to be common throughout all
+subsequent layers unless specifically overridden.
# S3 method for Seurat
+ggplot(data = NULL, mapping = aes(), ..., environment = parent.frame())
Default dataset to use for plot. If not already a data.frame,
+will be converted to one by fortify()
. If not specified,
+must be supplied in each layer added to the plot.
Default list of aesthetic mappings to use for plot. +If not specified, must be supplied in each layer added to the plot.
Other arguments passed on to methods. Not currently used.
`ggplot`
+ggplot()
is used to construct the initial plot object,
+and is almost always followed by a plus sign (+
) to add
+components to the plot.
There are three common patterns used to invoke ggplot()
:
ggplot(data = df, mapping = aes(x, y, other aesthetics))
ggplot(data = df)
ggplot()
The first pattern is recommended if all layers use the same +data and the same set of aesthetics, although this method +can also be used when adding a layer using data from another +data frame.
+The second pattern specifies the default data frame to use +for the plot, but no aesthetics are defined up front. This +is useful when one data frame is used predominantly for the +plot, but the aesthetics vary from one layer to another.
+The third pattern initializes a skeleton ggplot
object, which
+is fleshed out as layers are added. This is useful when
+multiple data frames are used to produce different layers, as
+is often the case in complex graphics.
The data =
and mapping =
specifications in the arguments are optional
+(and are often omitted in practice), so long as the data and the mapping
+values are passed into the function in the right order. In the examples
+below, however, they are left in place for clarity.
library(ggplot2)
+data(pbmc_small)
+pbmc_small |>
+ ggplot(aes(groups, nCount_RNA)) +
+ geom_boxplot()
+
+
+
`ggplot()` initializes a ggplot object. It can be used to -declare the input data frame for a graphic and to specify the -set of plot aesthetics intended to be common throughout all -subsequent layers unless specifically overridden.
-Default dataset to use for plot. If not already a data.frame, -will be converted to one by [fortify()]. If not specified, -must be supplied in each layer added to the plot.
Default list of aesthetic mappings to use for plot. -If not specified, must be supplied in each layer added to the plot.
Other arguments passed on to methods. Not currently used.
DEPRECATED. Used prior to tidy evaluation.
A ggplot
-`ggplot()` is used to construct the initial plot object, -and is almost always followed by `+` to add component to the -plot. There are three common ways to invoke `ggplot()`:
- -The first method is recommended if all layers use the same -data and the same set of aesthetics, although this method -can also be used to add a layer using data from another -data frame. See the first example below. The second -method specifies the default data frame to use for the plot, -but no aesthetics are defined up front. This is useful when -one data frame is used predominantly as layers are added, -but the aesthetics may vary from one layer to another. The -third method initializes a skeleton `ggplot` object which -is fleshed out as layers are added. This method is useful when -multiple data frames are used to produce different layers, as -is often the case in complex graphics.
-# Generate some sample data, then compute mean and standard deviation
-# in each group
-
-
glimpse()
is like a transposed version of print()
:
+columns run down the page, and data runs across.
+This makes it possible to see every column in a data frame.
+It's a little like str()
applied to a data frame
+but it tries to show you as much data as possible.
+(And it always shows the underlying data, even when applied
+to a remote data source.)
See format_glimpse()
for details on the formatting.
# S3 method for tidyseurat
+glimpse(x, width = NULL, ...)
An object to glimpse at.
Width of output: defaults to the setting of the
+width
option (if finite)
+or the width of the console.
Unused, for extensibility.
x original x is (invisibly) returned, allowing glimpse()
to be
+used within a data pipe line.
glimpse
is an S3 generic with a customised method for tbl
s and
+data.frames
, and a default method that calls str()
.
data(pbmc_small)
+pbmc_small |> glimpse()
+#> Formal class 'Seurat' [package "SeuratObject"] with 13 slots
+#> ..@ assays :List of 1
+#> .. ..$ RNA:Formal class 'Assay' [package "SeuratObject"] with 8 slots
+#> ..@ meta.data :'data.frame': 80 obs. of 7 variables:
+#> .. ..$ orig.ident : Factor w/ 1 level "SeuratProject": 1 1 1 1 1 1 1 1 1 1 ...
+#> .. ..$ nCount_RNA : num [1:80] 70 85 87 127 173 70 64 72 52 100 ...
+#> .. ..$ nFeature_RNA : int [1:80] 47 52 50 56 53 48 36 45 36 41 ...
+#> .. ..$ RNA_snn_res.0.8: Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
+#> .. ..$ letter.idents : Factor w/ 2 levels "A","B": 1 1 2 1 1 1 1 1 1 1 ...
+#> .. ..$ groups : chr [1:80] "g2" "g1" "g2" "g2" ...
+#> .. ..$ RNA_snn_res.1 : Factor w/ 3 levels "0","1","2": 1 1 1 1 1 1 1 1 1 1 ...
+#> ..@ active.assay: chr "RNA"
+#> ..@ active.ident: Factor w/ 3 levels "0","1","2": 1 1 1 1 1 1 1 1 1 1 ...
+#> .. ..- attr(*, "names")= chr [1:80] "ATGCCAGAACGACT" "CATGGCCTGTGCAT" "GAACCTGATGAACC" "TGACTGGATTCTCA" ...
+#> ..@ graphs :List of 1
+#> .. ..$ RNA_snn:Formal class 'Graph' [package "SeuratObject"] with 7 slots
+#> ..@ neighbors : list()
+#> ..@ reductions :List of 2
+#> .. ..$ pca :Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
+#> .. ..$ tsne:Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
+#> ..@ images : list()
+#> ..@ project.name: chr "SeuratProject"
+#> ..@ misc : list()
+#> ..@ version :Classes 'package_version', 'numeric_version' hidden list of 1
+#> .. ..$ : int [1:3] 4 0 0
+#> ..@ commands :List of 10
+#> .. ..$ NormalizeData.RNA :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#> .. ..$ ScaleData.RNA :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#> .. ..$ RunPCA.RNA :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#> .. ..$ BuildSNN.RNA.pca :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#> .. ..$ FindClusters :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#> .. ..$ RunTSNE.pca :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#> .. ..$ JackStraw.RNA.pca :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#> .. ..$ ScoreJackStraw.pca :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#> .. ..$ ProjectDim.RNA.pca :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#> .. ..$ FindVariableFeatures.RNA:Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
+#> ..@ tools : list()
+
+
Most data operations are done on groups defined by variables.
+group_by()
takes an existing tbl and converts it into a grouped tbl
+where operations are performed "by group". ungroup()
removes grouping.
# S3 method for Seurat
+group_by(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data))
A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See Methods, below, for +more details.
In group_by()
, variables or computations to group by.
+Computations are always done on the ungrouped data frame.
+To perform computations on the grouped data, you need to use
+a separate mutate()
step before the group_by()
.
+Computations are not allowed in nest_by()
.
+In ungroup()
, variables to remove from the grouping.
When FALSE
, the default, group_by()
will
+override existing groups. To add to the existing groups, use
+.add = TRUE
.
This argument was previously called add
, but that prevented
+creating a new grouping variable called add
, and conflicts with
+our naming conventions.
Drop groups formed by factor levels that don't appear in the
+data? The default is TRUE
except when .data
has been previously
+grouped with .drop = FALSE
. See group_by_drop_default()
for details.
A grouped data frame with class grouped_df
,
+unless the combination of ...
and add
yields a empty set of
+grouping columns, in which case a tibble will be returned.
These function are generics, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour.
+Methods available in currently loaded packages:
Currently, group_by()
internally orders the groups in ascending order. This
+results in ordered output from functions that aggregate groups, such as
+summarise()
.
When used as grouping columns, character vectors are ordered in the C locale
+for performance and reproducibility across R sessions. If the resulting
+ordering of your grouped operation matters and is dependent on the locale,
+you should follow up the grouped operation with an explicit call to
+arrange()
and set the .locale
argument. For example:
This is often useful as a preliminary step before generating content intended +for humans, such as an HTML table.
Prior to dplyr 1.1.0, character vector grouping columns were ordered in the
+system locale. If you need to temporarily revert to this behavior, you can
+set the global option dplyr.legacy_locale
to TRUE
, but this should be
+used sparingly and you should expect this option to be removed in a future
+version of dplyr. It is better to update existing code to explicitly call
+arrange(.locale = )
instead. Note that setting dplyr.legacy_locale
will
+also force calls to arrange()
to use the system locale.
Other grouping functions:
+group_map()
,
+group_nest()
,
+group_split()
,
+group_trim()
data("pbmc_small")
+pbmc_small |> group_by(groups)
+#> tidyseurat says: A data frame is returned for independent data analysis.
+#> # A tibble: 80 × 29
+#> # Groups: groups [2]
+#> .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
+#> <chr> <fct> <dbl> <int> <fct> <fct> <chr>
+#> 1 ATGC… SeuratPro… 70 47 0 A g2
+#> 2 CATG… SeuratPro… 85 52 0 A g1
+#> 3 GAAC… SeuratPro… 87 50 1 B g2
+#> 4 TGAC… SeuratPro… 127 56 0 A g2
+#> 5 AGTC… SeuratPro… 173 53 0 A g2
+#> 6 TCTG… SeuratPro… 70 48 0 A g1
+#> 7 TGGT… SeuratPro… 64 36 0 A g1
+#> 8 GCAG… SeuratPro… 72 45 0 A g1
+#> 9 GATA… SeuratPro… 52 36 0 A g1
+#> 10 AATG… SeuratPro… 100 41 0 A g1
+#> # ℹ 70 more rows
+#> # ℹ 22 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
+#> # PC_4 <dbl>, PC_5 <dbl>, PC_6 <dbl>, PC_7 <dbl>, PC_8 <dbl>, PC_9 <dbl>,
+#> # PC_10 <dbl>, PC_11 <dbl>, PC_12 <dbl>, PC_13 <dbl>, PC_14 <dbl>,
+#> # PC_15 <dbl>, PC_16 <dbl>, PC_17 <dbl>, PC_18 <dbl>, PC_19 <dbl>,
+#> # tSNE_1 <dbl>, tSNE_2 <dbl>
+
+
Aggregate cells
Efficiently bind multiple data frames by row and column
Order rows using column values
Coerce lists, matrices, and more to data frames
#' Efficiently bind multiple data frames by row and column
Example data set 2
Cell types of 80 PBMC single cells
Count the observations in each group
Keep distinct/unique rows
Extract a character column into multiple columns using regular +expression groups
Keep rows that match a condition
Printing tibbles
Mutating joins
Create a new ggplot
from a tidyseurat
Get a glimpse of your data
Group by one or more variables
Create a new ggplot from a tidyseurat object
Mutating joins
Extract and join information for features.
join_features
(DEPRECATED) Extract and join information for transcripts.
Mutating joins
Create, modify, and delete columns
Nest rows into a list-column of data frames
Example data set 2
Intercellular ligand-receptor interactions for +38 ligands from a single cell RNA-seq cluster.
Pivot data from wide to long
Initiate a plotly visualization
Printing tibbles
Extract a single column
Rename columns
returns variables from an expression
Format the header of a tibble
Mutating joins
Coerce lists, matrices, and more to data frames
Group input by rows
Sample n rows from a table
Keep or drop columns using their names and types
Separate a character column into multiple columns with a regular +expression or numeric locations
slice(<Seurat>)
slice_sample(<Seurat>)
slice_head(<Seurat>)
slice_tail(<Seurat>)
slice_min(<Seurat>)
slice_max(<Seurat>)
Subset rows using their positions
Summarise each group down to one row
Format the header of a tibble
tidy for seurat
tidy for `Seurat`
unnest
Unite multiple columns into one by pasting strings together
unnest_seurat
Unnest a list-column of data frames into rows and columns