Skip to content

Commit

Permalink
Use variable genes in PCA within clustering wrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
Matthew Bernstein authored and Matthew Bernstein committed May 20, 2021
1 parent 78859d0 commit af3303a
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions cello/scanpy_cello.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,10 @@ def cello(
mod = ce._retrieve_pretrained_model(adata, algo, rsrc_loc)
if mod is None:
mod = ce.train_model(
adata, rsrc_loc, algo=algo, log_dir=log_dir
adata,
rsrc_loc,
algo=algo,
log_dir=log_dir
)
if out_prefix:
out_model_f = '{}.model.dill'.format(out_prefix)
Expand Down Expand Up @@ -212,7 +215,8 @@ def normalize_and_cluster(
adata: AnnData,
n_pca_components: int = 50,
n_neighbors: int = 15,
cluster_res: float = 1.0
n_top_genes: int = 10000,
cluster_res: float = 2.0
):
"""
Normalize and cluster an expression matrix in units of raw UMI counts.
Expand All @@ -228,7 +232,10 @@ def normalize_and_cluster(
Number of neighbors to use for computing the nearest-neighbors
graph. Clustering is performed using community detection on this
nearest-neighbors graph.
cluster_res (default 1.0)
n_top_genes (default 10000)
Number of genes selected for computing the nearest-neighbors graph
and for clustering.
cluster_res (default 2.0)
Cluster resolution for the Leiden community detection algorithm.
A higher resolution produces more fine-grained, smaller clusters.
"""
Expand All @@ -238,7 +245,8 @@ def normalize_and_cluster(
sys.exit("The function 'normalize_and_cluster' requires that scanpy package be installed. To install scanpy, run 'pip install scanpy'")
sc.pp.normalize_total(adata, target_sum=1e6)
sc.pp.log1p(adata)
sc.pp.pca(adata, n_comps=n_pca_components)
sc.pp.highly_variable_genes(adata, n_top_genes=n_top_genes)
sc.pp.pca(adata, n_comps=n_pca_components, use_highly_variable=True)
sc.pp.neighbors(adata, n_neighbors=n_neighbors)
sc.tl.leiden(adata, resolution=cluster_res)

Expand Down

0 comments on commit af3303a

Please sign in to comment.