diff --git a/dnachisel/SequencePattern/MotifPssmPattern.py b/dnachisel/SequencePattern/MotifPssmPattern.py index 7aa785c..9f1ecea 100644 --- a/dnachisel/SequencePattern/MotifPssmPattern.py +++ b/dnachisel/SequencePattern/MotifPssmPattern.py @@ -150,7 +150,7 @@ def list_from_file( sequence(s) with the absolute highest possible score". """ if isinstance(motifs_file, str): - with open("./jaspar.txt", "r") as f: + with open(motifs_file, "r") as f: motifs_list = motifs.parse(f, file_format) else: motifs_list = motifs.parse(motifs_file, file_format) diff --git a/tests/data/multiple_motifs.meme.txt b/tests/data/multiple_motifs.meme.txt new file mode 100644 index 0000000..cec81f5 --- /dev/null +++ b/tests/data/multiple_motifs.meme.txt @@ -0,0 +1,33 @@ +MEME version 4 + +ALPHABET= ACGT + +strands: + - + +Background letter frequencies +A 0.25 C 0.25 G 0.25 T 0.25 + +MOTIF MA0016.1 MA0016.1.usp +letter-probability matrix: alength= 4 w= 10 nsites= 38 E= 0 + 0.000000 0.026316 0.973684 0.000000 + 0.026316 0.000000 0.947368 0.026316 + 0.000000 0.000000 1.000000 0.000000 + 0.000000 0.000000 1.000000 0.000000 + 0.000000 0.000000 0.000000 1.000000 + 0.000000 0.947368 0.026316 0.026316 + 0.921053 0.000000 0.078947 0.000000 + 0.131579 0.657895 0.078947 0.131579 + 0.131579 0.210526 0.578947 0.078947 + 0.157895 0.263158 0.421053 0.157895 +URL http://jaspar.genereg.net/matrix/MA0016.1 + +MOTIF MA0011.2 MA0011.2.br +letter-probability matrix: alength= 4 w= 6 nsites= 12 E= 0 + 0.000000 0.833333 0.000000 0.166667 + 0.000000 0.083333 0.000000 0.916667 + 1.000000 0.000000 0.000000 0.000000 + 0.083333 0.083333 0.166667 0.666667 + 0.166667 0.000000 0.083333 0.750000 + 0.083333 0.166667 0.083333 0.666667 +URL http://jaspar.genereg.net/matrix/MA0011.2 + diff --git a/tests/data/single_motif.meme.txt b/tests/data/single_motif.meme.txt new file mode 100644 index 0000000..928dd3d --- /dev/null +++ b/tests/data/single_motif.meme.txt @@ -0,0 +1,19 @@ +MEME version 4 + +ALPHABET= ACGT + +strands: + - + +Background letter frequencies +A 0.25 C 0.25 G 0.25 T 0.25 + +MOTIF MA0011.2 MA0011.2.br +letter-probability matrix: alength= 4 w= 6 nsites= 12 E= 0 + 0.000000 0.833333 0.000000 0.166667 + 0.000000 0.083333 0.000000 0.916667 + 1.000000 0.000000 0.000000 0.000000 + 0.083333 0.083333 0.166667 0.666667 + 0.166667 0.000000 0.083333 0.750000 + 0.083333 0.166667 0.083333 0.666667 +URL http://jaspar.genereg.net/matrix/MA0011.2 + diff --git a/tests/test_patterns.py b/tests/test_patterns.py index 8b9bc44..f480d39 100644 --- a/tests/test_patterns.py +++ b/tests/test_patterns.py @@ -1,4 +1,18 @@ -from dnachisel.SequencePattern import SequencePattern +import pytest +from pathlib import Path + +from dnachisel import SequencePattern, MotifPssmPattern + + +@pytest.fixture +def test_single_motif_filepath(): + return str(Path(__file__).parent / 'data' / 'single_motif.meme.txt') + + +@pytest.fixture +def test_multiple_motif_filepath(): + return str(Path(__file__).parent / 'data' / 'multiple_motifs.meme.txt') + def test_patterns_from_string(): pattern = SequencePattern.from_string("6xT") @@ -6,4 +20,20 @@ def test_patterns_from_string(): pattern = SequencePattern.from_string("BsmBI_site") assert pattern.expression == "CGTCTC" pattern = SequencePattern.from_string("5x2mer") - assert pattern.expression == '([ATGC]{2})\\1{4}' \ No newline at end of file + assert pattern.expression == '([ATGC]{2})\\1{4}' + + +def test_pssm_pattern_from_file( + test_single_motif_filepath, test_multiple_motif_filepath +): + single_pattern = MotifPssmPattern.list_from_file( + test_single_motif_filepath, "minimal", relative_threshold=0.9 + ) + assert len(single_pattern) == 1 + assert all([isinstance(p, MotifPssmPattern) for p in single_pattern]) + + multiple_patterns = MotifPssmPattern.list_from_file( + test_multiple_motif_filepath, "minimal", relative_threshold=0.9 + ) + assert len(multiple_patterns) == 2 + assert all([isinstance(p, MotifPssmPattern) for p in multiple_patterns])