Skip to content

Commit

Permalink
Merge pull request #65 from jbloomlab/select_target_names
Browse files Browse the repository at this point in the history
added `select_target_names` option to `Targets`
  • Loading branch information
Kate D. Crawford authored Dec 21, 2019
2 parents eb4d2bd + f72d03b commit 17a08b6
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 35 deletions.
16 changes: 13 additions & 3 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,21 @@ All notable changes to this project will be documented in this file.

The format is based on `Keep a Changelog <https://keepachangelog.com>`_.

0.1.2
-----

Added
+++++
* Added ``select_target_names`` option to ``Targets``.

0.1.1
---------------------------
* Fixed DataFrame querying bug in `./alignparse/ccs.py`
-----

Fixed
+++++
* Fixed DataFrame querying bug in ``./alignparse/ccs.py``

0.1.0
---------------------------
-----
Initial release

2 changes: 1 addition & 1 deletion alignparse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@

__author__ = '`the Bloom lab <https://research.fhcrc.org/bloom/en.html>`_'
__email__ = '[email protected]'
__version__ = '0.1.1'
__version__ = '0.1.2'
__url__ = 'https://github.com/jbloomlab/alignparse'
22 changes: 18 additions & 4 deletions alignparse/targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,9 @@ class Targets:
ignore_feature_parse_specs_keys : None or list
Ignore these target-level keys in `feature_parse_specs`. Useful for
YAML with default keys that don't represent actual targets.
select_target_names : None or list
If `None`, the created object is for all sequences in `seqsfile`.
Otherwise pass a list with names of just the sequences of interest.
Attributes
----------
Expand All @@ -361,7 +364,9 @@ def __repr__(self):
def __init__(self, *, seqsfile, feature_parse_specs,
allow_extra_features=False, seqsfileformat='genbank',
allow_clipped_muts_seqs=False,
ignore_feature_parse_specs_keys=None):
ignore_feature_parse_specs_keys=None,
select_target_names=None,
):
"""See main class docstring."""
# read feature_parse_specs
if isinstance(feature_parse_specs, str):
Expand Down Expand Up @@ -393,6 +398,11 @@ def __init__(self, *, seqsfile, feature_parse_specs,
'mutation_op_count']

# get targets from seqsfile
if select_target_names is not None:
if not (isinstance(select_target_names, list) and
len(select_target_names) >= 1):
raise ValueError('`select_target_names` must be none or '
'non-empty list')
if isinstance(seqsfile, str):
seqrecords = list(Bio.SeqIO.parse(seqsfile, format=seqsfileformat))
else:
Expand All @@ -403,6 +413,8 @@ def __init__(self, *, seqsfile, feature_parse_specs,
self._target_dict = {}
for seqrecord in seqrecords:
tname = Target.get_name(seqrecord)
if select_target_names and (tname not in select_target_names):
continue
target = Target(seqrecord=seqrecord,
req_features=self.features_to_parse(tname, 'name'),
allow_extra_features=allow_extra_features,
Expand All @@ -421,6 +433,8 @@ def __init__(self, *, seqsfile, feature_parse_specs,
str(self._return_suffixes))
self.target_names = [target.name for target in self.targets]
self.target_seqs = {target.name: target.seq for target in self.targets}
if not self.targets:
raise ValueError('no targets found')

# check needed for `to_csv` option of `parse_alignment`.
if len(self.target_names) != len({tname.replace(' ', '_') for
Expand All @@ -442,7 +456,7 @@ def __init__(self, *, seqsfile, feature_parse_specs,
# features unless flag to do this explicitly set
if not allow_clipped_muts_seqs:
for t in self.target_names:
for f in self.features_to_parse(tname, 'name'):
for f in self.features_to_parse(t, 'name'):
for return_name in ['sequence', 'mutations']:
if return_name in self._parse_returnvals(t, f):
filt = self._parse_filters[t][f]
Expand Down Expand Up @@ -757,7 +771,7 @@ def align_and_parse(self,
Returns
-------
(readstats, aligned, filtered) : tuple
Same meaning as for :meth:`Targets.parse_alignments` except
Same meaning as for :meth:`Targets.parse_alignment` except
the data frames / CSV files all have additional columns indicating
name of each query set (`name_cols`) as well as any `group_cols`.
Expand Down Expand Up @@ -1267,7 +1281,7 @@ def _parse_alignment_cs(self, samfile, *, multi_align='primary',
Note
----
This method returns the same information that can be better
obtained via :meth:`Targets.parse_alignments` by setting
obtained via :meth:`Targets.parse_alignment` by setting
to return 'cs', 'clip5', 'clip3' for every feature. It is
currently retained only for debugging / testing purposes,
and may eventually be removed.
Expand Down
90 changes: 63 additions & 27 deletions notebooks/lasv_pilot.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,42 @@
"_ = targets.plot(ax_width=10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note that if needed, it is also possible to get a [Targets](https://jbloomlab.github.io/alignparse/alignparse.targets.html#alignparse.targets.Targets) object for just some of the sequences specified in `seqsfile` or `feature_parse_specs`.\n",
"This is most commonly useful when `seqsfile` contains additional sequences that are not of interest.\n",
"Below we illustrate how to do this by:\n",
" - Setting `select_target_names` to only keep the *LASV_Josiah_WT* sequence in `seqsfile`\n",
" - Setting `ingore_feature_parse_specs` to ignore the other targets (in this case, *LASV_Josiah_OPT*) in `feature_parse_specs`"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Here are the names of the retained targets: ['LASV_Josiah_WT']\n"
]
}
],
"source": [
"targets_subset = alignparse.targets.Targets(\n",
" seqsfile=targetfiles,\n",
" feature_parse_specs=lasv_parse_specs_file,\n",
" allow_extra_features=True,\n",
" select_target_names=['LASV_Josiah_WT'],\n",
" ignore_feature_parse_specs_keys=['LASV_Josiah_OPT']\n",
" )\n",
"\n",
"print(f\"Here are the names of the retained targets: {targets_subset.target_names}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -485,7 +521,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -533,7 +569,7 @@
"0 input_files/lasv_example_ccs.fastq.gz "
]
},
"execution_count": 10,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -561,7 +597,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -577,7 +613,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 13,
"metadata": {},
"outputs": [
{
Expand All @@ -598,7 +634,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 14,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -684,7 +720,7 @@
"4 0.0000 "
]
},
"execution_count": 13,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -702,7 +738,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 15,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -757,7 +793,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 16,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -786,7 +822,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 17,
"metadata": {
"scrolled": true
},
Expand Down Expand Up @@ -841,7 +877,7 @@
"0 ./output_files/lasv_pilot_alignments.sam "
]
},
"execution_count": 16,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -861,7 +897,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 18,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -891,7 +927,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 19,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -925,7 +961,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 20,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -976,7 +1012,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 21,
"metadata": {
"scrolled": true
},
Expand Down Expand Up @@ -1051,7 +1087,7 @@
"4 unmapped 0 lasv_pilot"
]
},
"execution_count": 20,
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1062,7 +1098,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 22,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1098,7 +1134,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 23,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1184,7 +1220,7 @@
"4 lasv_pilot "
]
},
"execution_count": 22,
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1195,7 +1231,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 24,
"metadata": {
"scrolled": true
},
Expand Down Expand Up @@ -1255,7 +1291,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 25,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1321,7 +1357,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -1344,7 +1380,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 27,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1526,7 +1562,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -1552,7 +1588,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 29,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1603,7 +1639,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 30,
"metadata": {
"scrolled": true
},
Expand Down Expand Up @@ -1954,7 +1990,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -1976,7 +2012,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 32,
"metadata": {},
"outputs": [
{
Expand Down

0 comments on commit 17a08b6

Please sign in to comment.