Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New module: anndata/getsize module #6925

Merged
merged 8 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions modules/nf-core/anndata/getsize/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- conda-forge::anndata=0.10.9
36 changes: 36 additions & 0 deletions modules/nf-core/anndata/getsize/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
process ANNDATA_GETSIZE {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://community.wave.seqera.io/library/anndata:0.10.9--d13580e4b297da7c':
'community.wave.seqera.io/library/anndata:0.10.9--1eab54e300e1e584' }"

input:
tuple val(meta), path(h5ad)
val size_type

output:
tuple val(meta), path("*.txt"), emit: size
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: "${meta.id}"
template 'getsize.py'

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.txt

cat <<-END_VERSIONS > versions.yml
${task.process}:
python: \$(python3 -c 'import platform; print(platform.python_version())')
anndata: \$(python3 -c 'import anndata as ad; print(ad.__version__)')
END_VERSIONS
"""
}
54 changes: 54 additions & 0 deletions modules/nf-core/anndata/getsize/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: "anndata_getsize"
description: Get the size (n_cells or n_genes) of an anndata object stored as a h5ad file
keywords:
- anndata
- single-cell
- scanpy
tools:
- "anndata":
description: "An annotated data matrix."
homepage: "http://anndata.rtfd.io"
documentation: "http://anndata.rtfd.io"
tool_dev_url: "https://github.com/theislab/anndata"
doi: "10.21105/joss.04371"
licence: ["BSD-3-clause"]
identifier: biotools:anndata

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`

- h5ad:
type: file
description: H5AD file of anndata object
pattern: "*.h5ad"

- - size_type:
type: string
description: either 'cells', 'genes', 'obs', or 'var'

output:
- size:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.txt":
type: file
description: text file containing the requested size
pattern: "*.txt"

- versions:
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@LeonHafner"
maintainers:
- "@LeonHafner"
51 changes: 51 additions & 0 deletions modules/nf-core/anndata/getsize/templates/getsize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python3

import platform

import anndata as ad


def format_yaml_like(data: dict, indent: int = 0) -> str:
"""Formats a dictionary to a YAML-like string.

Args:
data (dict): The dictionary to format.
indent (int): The current indentation level.

Returns:
str: A string formatted as YAML.
"""
yaml_str = ""
for key, value in data.items():
spaces = " " * indent
if isinstance(value, dict):
yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
else:
yaml_str += f"{spaces}{key}: {value}\\n"
return yaml_str


adata = ad.read_h5ad("${h5ad}", backed="r")

size_functions = {
"cells": lambda: adata.n_obs,
"genes": lambda: adata.n_vars,
"obs": lambda: adata.n_obs,
"var": lambda: adata.n_vars,
}

size_type = "${size_type}".lower()
if size_type not in size_functions:
raise ValueError(f'Size type must be one of {', '.join(size_functions.keys())}.')

size = size_functions[size_type]()

with open("${prefix}.txt", "w") as f:
f.write(str(size))


# Versions
versions = {"${task.process}": {"python": platform.python_version(), "anndata": ad.__version__}}

with open("versions.yml", "w") as f:
f.write(format_yaml_like(versions))
108 changes: 108 additions & 0 deletions modules/nf-core/anndata/getsize/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
nextflow_process {

name "Test Process ANNDATA_GETSIZE"
script "../main.nf"
process "ANNDATA_GETSIZE"

tag "modules"
tag "modules_nfcore"
tag "anndata"
tag "anndata/getsize"

test("scdownstream - h5ad - cells") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file('https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_custom_emptydrops_filter_matrix_5k.h5ad', checkIfExists: true),
]
input[1] = 'cells'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("scdownstream - h5ad - genes") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file('https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_custom_emptydrops_filter_matrix_5k.h5ad', checkIfExists: true),
]
input[1] = 'genes'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("scdownstream - h5ad - cells - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file('https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_custom_emptydrops_filter_matrix_5k.h5ad', checkIfExists: true),
]
input[1] = 'cells'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("scdownstream - h5ad - genes - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file('https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_custom_emptydrops_filter_matrix_5k.h5ad', checkIfExists: true),
]
input[1] = 'genes'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
Loading
Loading