Skip to content

Commit

Permalink
Merge branch 'rel-0.3.4'
Browse files Browse the repository at this point in the history
  • Loading branch information
alpae committed Aug 19, 2024
2 parents 62ae7cb + 95b531a commit 790a086
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 24 deletions.
2 changes: 1 addition & 1 deletion FastOMA/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@

__packagename__ = "FastOMA"
__version__ = "0.3.3"
__version__ = "0.3.4"
2 changes: 1 addition & 1 deletion FastOMA/collect_subhogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def write_roothogs(orthoxml: Path, roothog_folder: Path, output_file_roothog_tsv
for gene in group_members:
tsv.write(f"{group_name}\t{gene}\t{omamer_roothog}\n")

_write_group_fasta(fasta_format, group_members, group_name, id_transformer, meta, output_fasta_groups,
_write_group_fasta(fasta_format, group_members, group_name.replace(":", ""), id_transformer, meta, output_fasta_groups,
roothog_folder)

logger.info("writing of %s done. created %d groups containing %d proteins in total",
Expand Down
42 changes: 26 additions & 16 deletions FastOMA/zoo/hog/extract_groups.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import itertools

from ..utils import auto_open
import collections
from time import time
Expand Down Expand Up @@ -72,8 +74,20 @@ def add_genome_genes(self, genome_node):
self.genes.update(generef_2_xref)
return True

def _count_genes(self, node):
count = 0
for gene in node.iter('{http://orthoXML.org/2011/}geneRef'):
count += 1
for og in node.iter('{http://orthoXML.org/2011/}orthologGroup'):
for n in og.text:
if isinstance(n, Gene):
count += 1
return count

def _collect_genes(self, node):
genes = set([]); to_rem = []
genes = set([])
if node.tag != "{http://orthoXML.org/2011/}orthologGroup":
raise RuntimeError("_collect_genes() only works for ortholog groups")
for child in node.iter():
if child == node:
continue
Expand All @@ -83,22 +97,15 @@ def _collect_genes(self, node):
except KeyError:
logger.info(f"ignoring gene(id={child.get('id')}), probably in skip set.")
pass
to_rem.append(child)
elif child.tag == "{http://orthoXML.org/2011/}orthologGroup":
genes.update((n for n in child.text if isinstance(n, Gene)))
to_rem.append(child)
for c in to_rem:
try:
node.remove(c)
except ValueError as e:
# this is not a direct child of node. we ignore this potential
# memory-leak as the entire group will be deleted at latest once
# we reach the root orthologGroup node.
pass
return genes

def merge_children(self, node):
genes = self._collect_genes(node)
for child in reversed(node):
if child.tag in ("{http://orthoXML.org/2011/}orthologGroup", "{http://orthoXML.org/2011/}geneRef", "{http://orthoXML.org/2011/}paralogGroup"):
node.remove(child)
node.text = genes

def get_group(self, node):
Expand Down Expand Up @@ -134,11 +141,11 @@ def handle_duplication_node(self, elem):

class MarkerGroupExtractor(GroupExtractor):
def handle_duplication_node(self, elem):
nr_children = [len(self._collect_genes(child)) for child in elem]
nr_children = [self._count_genes(child) for child in elem]
max_pos = nr_children.index(max(nr_children))
for i, child in enumerate(elem):
if i != max_pos:
elem.remove(child)
to_rem = [c for i, c in enumerate(elem) if i != max_pos]
for child in to_rem:
elem.remove(child)


def parse_orthoxml(fh, processor:GroupExtractor):
Expand Down Expand Up @@ -174,7 +181,10 @@ def fixtag(tag, ns=""):
processor.merge_children(elem)
if og_level == extract_at_depth:
logger.debug("dumping annotated group with {} genes".format(len(elem.text)))
yield processor.get_group(elem)
if len(elem.text) > 1:
yield processor.get_group(elem)
else:
logger.debug("won't return group of less than two proteins")
elem.clear()
extract_at_depth = -1 if processor.target_clade is not None else 0
if og_level == 0:
Expand Down
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ installed.
```bash
nextflow run dessimozlab/FastOMA -profile docker --input_folder /path/to/in_folder --output_folder /path/to/out_folder
```
You could also add specific version to be used by adding `-r v0.3.1` to the command line.
You could also add specific version to be used by adding `-r v0.3.4` to the command line.
Nextflow will automatically fetch the [dessimozlab/FastOMA](https://github.com/dessimozlab/FastOMA) repository and starts
the `FastOMA.nf` workflow. The `-profile` argument must be used to specify the profile to use. We support `docker`,
`singularity` and `conda` which then automatically set up the necessary tools by downloading the required containers or creating
Expand Down Expand Up @@ -88,10 +88,10 @@ There are four ways to run/install FastOMA detailed below:
The FastOMA workflow can be run directly without any installation using nextflow's ability to fetch a workflow from github. A specific version can be selected by specifying the `-r` option to nextflow to select a specific version of FastOMA:

```bash
nextflow run desimozlab/FastOMA -r v0.3.1 -profile conda
nextflow run desimozlab/FastOMA -r v0.3.4 -profile conda
```

This will fetch version v0.3.1 from github and run the FastOMA workflow using the conda profile. See section [How to run fastOMA](#how-to-run-fastoma).
This will fetch version v0.3.4 from github and run the FastOMA workflow using the conda profile. See section [How to run fastOMA](#how-to-run-fastoma).

### 2. Cloning the FastOMA repo and running from there

Expand Down Expand Up @@ -184,7 +184,7 @@ nextflow run FastOMA.nf -profile docker \
--output_folder myresult/
```
This will use the container that is tagged with the current commit id. Similarly, one could also use
`--container_version "0.3.1"` to use the container with version `dessimozlab/fastoma:0.3.1` from dockerhub. Check the latest version on the [DockerHub](https://hub.docker.com/r/dessimozlab/fastoma/tags).
`--container_version "0.3.4"` to use the container with version `dessimozlab/fastoma:0.3.4` from dockerhub. Check the latest version on the [DockerHub](https://hub.docker.com/r/dessimozlab/fastoma/tags).

### Singularity
Since Docker needs administrator privileges (root access), [Singluarity](https://apptainer.org/index.html) (a.k.a Apptainer) is a good alternative. This can be installed using [Conda](https://anaconda.org/conda-forge/singularity) with `conda install conda-forge::singularity`. However, in most of the academic HPC cluster, singluarity is already installed and can be called with `module load`.
Expand Down Expand Up @@ -449,6 +449,8 @@ Majidian, Sina, Yannis Nevers, Ali Yazdizadeh Kharrazi, Alex Warwick Vesztrocy,


## Change log
- Update v0.3.4: Fixing a bug in marker gene groups extraction. Before, more than one gene per species were possible
- Update v0.3.3: improvements for nextflow (selection of alternative versions) and updates on readme
- Update v0.3.1: spliting HOG and sampling
- Update v0.1.6: adding dynamic resources, additional and improved output
- Update v0.1.5: docker, add help, clean nextflow
Expand Down
4 changes: 2 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ manifest {
nextflowVersion = ">=22.10.4"
defaultBranch = "main"
doi = "10.1101/2024.01.29.577392"
version = "0.3.3"
version = "0.3.4"
}

params {
container_name = "dessimozlab/fastoma"
container_version = "0.3.3"
container_version = "0.3.4"
omamer_db = "https://omabrowser.org/All/LUCA.h5"
debug_enabled = false
help = false
Expand Down

0 comments on commit 790a086

Please sign in to comment.