Skip to content

Commit

Permalink
Merge pull request #506 from juminlee/bugfix/dssp_version_check
Browse files Browse the repository at this point in the history
[VIP] Bug fix and improvement in DSSP version check
  • Loading branch information
pckroon authored Mar 22, 2023
2 parents 6d60999 + c7ed734 commit 63aceb6
Show file tree
Hide file tree
Showing 10 changed files with 383 additions and 11 deletions.
23 changes: 16 additions & 7 deletions vermouth/dssp/dssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import os
import subprocess
import tempfile
import re

from ..file_writer import deferred_open
from ..pdb import pdb
Expand All @@ -31,6 +32,7 @@
from ..log_helpers import StyleAdapter, get_logger

LOGGER = StyleAdapter(get_logger(__name__))
SUPPORTED_DSSP_VERSIONS = ("2.2.1", "3.0.0")


class DSSPError(Exception):
Expand Down Expand Up @@ -143,7 +145,7 @@ def read_dssp2(lines):
return secstructs


def run_dssp(system, executable='dssp', savefile=None, defer_writing=True, version="3.0.0"):
def run_dssp(system, executable='dssp', savefile=None, defer_writing=True):
"""
Run DSSP on a system and return the assigned secondary structures.
Expand Down Expand Up @@ -172,8 +174,6 @@ def run_dssp(system, executable='dssp', savefile=None, defer_writing=True, versi
If set to a path, the output of DSSP is written in that file.
defer_writing: bool
Whether to use :meth:`~vermouth.file_writer.DeferredFileWriter.write` for writing data
version: str
Supported versions for running dssp
Returns
list[str]
Expand All @@ -194,10 +194,19 @@ def run_dssp(system, executable='dssp', savefile=None, defer_writing=True, versi
Parse a DSSP output.
"""
# check version
process = subprocess.run(["dssp", "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
version_found = process.stdout.decode('UTF8')
if version not in version_found:
raise DSSPError('Vermouth currently only supports DSSP version 3.0.0.')
process = subprocess.run([executable, "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
match = re.search('\d+\.\d+\.\d+', process.stdout.decode('UTF8'))
version = match[0] if match else None
if not version:
raise DSSPError('Failed to get DSSP version information.')
if not version in SUPPORTED_DSSP_VERSIONS:
LOGGER.warning("Vermouth is tested only with DSSP versions {}. "
"The provided DSSP (version {}) may result in inaccurate "
"secondary structure assignments. As alternative you can "
"provide a secondary structure assignment string using "
"the `-ss` option.",
SUPPORTED_DSSP_VERSIONS, version,
type='DSSP-version')

tmpfile_handle, tmpfile_name = tempfile.mkstemp(suffix='.pdb', text=True,
dir='.', prefix='dssp_in_')
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
==== Secondary Structure Definition by the program DSSP, CMBI version by M.L. Hekkelman/2010-10-21 ==== DATE=2023-03-22 .
REFERENCE W. KABSCH AND C.SANDER, BIOPOLYMERS 22 (1983) 2577-2637 .
.
COMPND .
SOURCE .
AUTHOR .
29 1 0 0 0 TOTAL NUMBER OF RESIDUES, NUMBER OF CHAINS, NUMBER OF SS-BRIDGES(TOTAL,INTRACHAIN,INTERCHAIN) .
2663.1 ACCESSIBLE SURFACE OF PROTEIN (ANGSTROM**2) .
19 65.5 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(J) , SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS IN PARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES .
12 41.4 TOTAL NUMBER OF HYDROGEN BONDS IN ANTIPARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-5), SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-4), SAME NUMBER PER 100 RESIDUES .
1 3.4 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-3), SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-2), SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-1), SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+0), SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+1), SAME NUMBER PER 100 RESIDUES .
6 20.7 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+2), SAME NUMBER PER 100 RESIDUES .
2 6.9 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+3), SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+4), SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+5), SAME NUMBER PER 100 RESIDUES .
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 *** HISTOGRAMS OF *** .
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 RESIDUES PER ALPHA HELIX .
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 PARALLEL BRIDGES PER LADDER .
0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ANTIPARALLEL BRIDGES PER LADDER .
0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 LADDERS PER SHEET .
# RESIDUE AA STRUCTURE BP1 BP2 ACC N-H-->O O-->H-N N-H-->O O-->H-N TCO KAPPA ALPHA PHI PSI X-CA Y-CA Z-CA
1 1 A S 0 0 122 0, 0.0 15,-0.9 0, 0.0 2,-0.3 0.000 360.0 360.0 360.0 128.2 26.2 10.3 -17.3
2 2 A K E -A 15 0A 130 13,-0.2 27,-2.1 14,-0.1 2,-0.4 -0.991 360.0-135.7-139.8 127.5 28.9 12.9 -18.4
3 3 A Y E -AB 14 28A 64 11,-1.3 11,-2.4 13,-0.4 2,-0.6 -0.725 11.7-169.7 -85.7 130.1 30.1 16.0 -16.4
4 4 A E E +AB 13 27A 28 23,-2.5 2,-0.5 -2,-0.4 23,-0.5 -0.952 17.7 175.4-114.4 98.3 30.6 19.4 -18.3
5 5 A Y E -AB 12 26A 78 7,-2.4 7,-1.7 -2,-0.6 2,-0.4 -0.964 15.7-154.0-112.1 121.0 32.4 21.5 -15.6
6 6 A T E -AB 11 25A 48 19,-0.5 19,-0.5 -2,-0.5 5,-0.2 -0.832 12.1-173.9-106.9 135.4 33.4 24.9 -16.9
7 7 A I E > -A 10 0A 57 3,-2.3 3,-1.5 -2,-0.4 2,-0.8 -0.940 62.0 -47.0-133.2 98.7 36.4 26.8 -15.3
8 8 A P T 3 S- 0 0 121 0, 0.0 -2,-0.0 0, 0.0 3,-0.0 -0.619 123.9 -20.9 86.6-104.1 37.0 30.5 -16.6
9 9 A S T 3 S+ 0 0 126 -2,-0.8 2,-0.4 2,-0.0 -3,-0.0 0.260 119.7 92.7-118.7 7.5 36.8 30.4 -20.5
10 10 A Y E < -A 7 0A 160 -3,-1.5 -3,-2.3 2,-0.0 2,-0.6 -0.909 61.1-150.6-116.4 125.7 37.5 26.5 -20.9
11 11 A T E -A 6 0A 62 -2,-0.4 2,-0.6 -5,-0.2 -5,-0.2 -0.864 10.9-164.7 -97.1 116.0 34.7 23.9 -21.0
12 12 A F E +A 5 0A 87 -7,-1.7 -7,-2.4 -2,-0.6 2,-0.4 -0.930 13.9 177.9-105.2 112.9 36.0 20.5 -19.6
13 13 A R E +A 4 0A 187 -2,-0.6 -9,-0.2 -9,-0.2 -2,-0.0 -0.985 20.8 111.0-127.3 119.1 33.5 17.7 -20.7
14 14 A G E -A 3 0A 21 -11,-2.4 -11,-1.3 -2,-0.4 2,-0.1 -0.841 60.7 -48.3-164.4-163.1 34.0 14.0 -19.8
15 15 A P E S+A 2 0A 114 0, 0.0 -13,-0.2 0, 0.0 13,-0.0 -0.470 87.1 52.9 -87.5 161.5 32.6 10.9 -17.7
16 16 A G - 0 0 43 -15,-0.9 -13,-0.4 -2,-0.1 -2,-0.1 0.651 67.2-129.7 88.5 118.5 31.7 10.8 -13.9
17 17 A C + 0 0 57 -15,-0.1 -1,-0.1 11,-0.0 11,-0.1 -0.743 37.6 161.7-100.7 84.5 29.3 13.3 -12.2
18 18 A P - 0 0 79 0, 0.0 2,-0.3 0, 0.0 8,-0.1 -0.313 40.7-104.8 -81.9 176.7 30.8 15.0 -8.9
19 19 A T - 0 0 140 -2,-0.1 2,-0.2 6,-0.0 6,-0.1 -0.735 37.6-139.8 -99.0 157.5 29.6 18.3 -7.2
20 20 A V - 0 0 45 -2,-0.3 3,-0.1 4,-0.1 5,-0.0 -0.734 9.9-107.2-123.8 159.7 31.7 21.6 -7.6
21 21 A K > - 0 0 141 -2,-0.2 3,-0.7 1,-0.1 2,-0.1 -0.362 58.5 -69.9 -77.8 164.2 33.0 24.7 -5.5
22 22 A P T 3 S+ 0 0 132 0, 0.0 2,-0.2 0, 0.0 -1,-0.1 -0.396 124.7 32.7 -63.9 129.0 31.5 28.4 -6.0
23 23 A A T 3 S+ 0 0 84 1,-0.3 2,-0.3 -2,-0.1 -17,-0.1 -0.624 107.7 73.3 120.6 -66.9 32.7 29.7 -9.4
24 24 A V < - 0 0 11 -3,-0.7 2,-0.4 -2,-0.2 -1,-0.3 -0.622 63.3-171.3 -78.1 138.4 32.7 26.3 -11.3
25 25 A T E -B 6 0A 109 -19,-0.5 -19,-0.5 -2,-0.3 -6,-0.0 -0.917 11.4-149.7-142.6 108.3 29.1 25.0 -12.1
26 26 A I E -B 5 0A 48 -2,-0.4 2,-0.3 -21,-0.2 -21,-0.2 -0.168 6.9-155.8 -71.6 160.4 28.8 21.4 -13.4
27 27 A R E -B 4 0A 156 -23,-0.5 -23,-2.5 -25,-0.0 2,-0.6 -0.929 2.7-159.8-141.8 120.0 26.2 19.8 -15.8
28 28 A C E B 3 0A 63 -2,-0.3 -25,-0.2 -25,-0.2 -11,-0.0 -0.905 360.0 360.0 -97.3 115.1 25.6 16.1 -15.7
29 29 A E 0 0 148 -27,-2.1 -1,-0.2 -2,-0.6 -26,-0.1 0.943 360.0 360.0 -87.9 360.0 24.0 15.3 -19.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
==== Secondary Structure Definition by the program DSSP, CMBI version 2.0 ==== DATE=2023-03-22 .
REFERENCE W. KABSCH AND C.SANDER, BIOPOLYMERS 22 (1983) 2577-2637 .
.
COMPND .
SOURCE .
AUTHOR .
29 1 0 0 0 TOTAL NUMBER OF RESIDUES, NUMBER OF CHAINS, NUMBER OF SS-BRIDGES(TOTAL,INTRACHAIN,INTERCHAIN) .
2663.1 ACCESSIBLE SURFACE OF PROTEIN (ANGSTROM**2) .
19 65.5 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(J) , SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS IN PARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES .
12 41.4 TOTAL NUMBER OF HYDROGEN BONDS IN ANTIPARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-5), SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-4), SAME NUMBER PER 100 RESIDUES .
1 3.4 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-3), SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-2), SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-1), SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+0), SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+1), SAME NUMBER PER 100 RESIDUES .
6 20.7 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+2), SAME NUMBER PER 100 RESIDUES .
2 6.9 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+3), SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+4), SAME NUMBER PER 100 RESIDUES .
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+5), SAME NUMBER PER 100 RESIDUES .
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 *** HISTOGRAMS OF *** .
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 RESIDUES PER ALPHA HELIX .
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 PARALLEL BRIDGES PER LADDER .
0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ANTIPARALLEL BRIDGES PER LADDER .
0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 LADDERS PER SHEET .
# RESIDUE AA STRUCTURE BP1 BP2 ACC N-H-->O O-->H-N N-H-->O O-->H-N TCO KAPPA ALPHA PHI PSI X-CA Y-CA Z-CA CHAIN
1 1 A S 0 0 122 0, 0.0 15,-0.9 0, 0.0 2,-0.3 0.000 360.0 360.0 360.0 128.2 26.2 10.3 -17.3
2 2 A K E -A 15 0A 130 13,-0.2 27,-2.1 14,-0.1 2,-0.4 -0.991 360.0-135.7-139.8 127.5 28.9 12.9 -18.4
3 3 A Y E -AB 14 28A 64 11,-1.3 11,-2.4 13,-0.4 2,-0.6 -0.725 11.7-169.7 -85.7 130.1 30.1 16.0 -16.4
4 4 A E E +AB 13 27A 28 23,-2.5 2,-0.5 -2,-0.4 23,-0.5 -0.952 17.7 175.4-114.4 98.3 30.6 19.4 -18.3
5 5 A Y E -AB 12 26A 78 7,-2.4 7,-1.7 -2,-0.6 2,-0.4 -0.964 15.7-154.0-112.1 121.0 32.4 21.5 -15.6
6 6 A T E -AB 11 25A 48 19,-0.5 19,-0.5 -2,-0.5 5,-0.2 -0.832 12.1-173.9-106.9 135.4 33.4 24.9 -16.9
7 7 A I E > -A 10 0A 57 3,-2.3 3,-1.5 -2,-0.4 2,-0.8 -0.940 62.0 -47.0-133.2 98.7 36.4 26.8 -15.3
8 8 A P T 3 S- 0 0 121 0, 0.0 -2,-0.0 0, 0.0 3,-0.0 -0.619 123.9 -20.9 86.6-104.1 37.0 30.5 -16.6
9 9 A S T 3 S+ 0 0 126 -2,-0.8 2,-0.4 2,-0.0 -3,-0.0 0.260 119.7 92.7-118.7 7.5 36.8 30.4 -20.5
10 10 A Y E < -A 7 0A 160 -3,-1.5 -3,-2.3 2,-0.0 2,-0.6 -0.909 61.1-150.6-116.4 125.7 37.5 26.5 -20.9
11 11 A T E -A 6 0A 62 -2,-0.4 2,-0.6 -5,-0.2 -5,-0.2 -0.864 10.9-164.7 -97.1 116.0 34.7 23.9 -21.0
12 12 A F E +A 5 0A 87 -7,-1.7 -7,-2.4 -2,-0.6 2,-0.4 -0.930 13.9 177.9-105.2 112.9 36.0 20.5 -19.6
13 13 A R E +A 4 0A 187 -2,-0.6 -9,-0.2 -9,-0.2 -2,-0.0 -0.985 20.8 111.0-127.3 119.1 33.5 17.7 -20.7
14 14 A G E -A 3 0A 21 -11,-2.4 -11,-1.3 -2,-0.4 2,-0.1 -0.841 60.7 -48.3-164.4-163.1 34.0 14.0 -19.8
15 15 A P E S+A 2 0A 114 0, 0.0 -13,-0.2 0, 0.0 13,-0.0 -0.470 87.1 52.9 -87.5 161.5 32.6 10.9 -17.7
16 16 A G - 0 0 43 -15,-0.9 -13,-0.4 -2,-0.1 -2,-0.1 0.651 67.2-129.7 88.5 118.5 31.7 10.8 -13.9
17 17 A C + 0 0 57 -15,-0.1 -1,-0.1 11,-0.0 11,-0.1 -0.743 37.6 161.7-100.7 84.5 29.3 13.3 -12.2
18 18 A P - 0 0 79 0, 0.0 2,-0.3 0, 0.0 8,-0.1 -0.313 40.7-104.8 -81.9 176.7 30.8 15.0 -8.9
19 19 A T - 0 0 140 -2,-0.1 2,-0.2 6,-0.0 6,-0.1 -0.735 37.6-139.8 -99.0 157.5 29.6 18.3 -7.2
20 20 A V - 0 0 45 -2,-0.3 3,-0.1 4,-0.1 5,-0.0 -0.734 9.9-107.2-123.8 159.7 31.7 21.6 -7.6
21 21 A K > - 0 0 141 -2,-0.2 3,-0.7 1,-0.1 2,-0.1 -0.362 58.5 -69.9 -77.8 164.2 33.0 24.7 -5.5
22 22 A P T 3 S+ 0 0 132 0, 0.0 2,-0.2 0, 0.0 -1,-0.1 -0.396 124.7 32.7 -63.9 129.0 31.5 28.4 -6.0
23 23 A A T 3 S+ 0 0 84 1,-0.3 2,-0.3 -2,-0.1 -17,-0.1 -0.624 107.7 73.3 120.6 -66.9 32.7 29.7 -9.4
24 24 A V < - 0 0 11 -3,-0.7 2,-0.4 -2,-0.2 -1,-0.3 -0.622 63.3-171.3 -78.1 138.4 32.7 26.3 -11.3
25 25 A T E -B 6 0A 109 -19,-0.5 -19,-0.5 -2,-0.3 -6,-0.0 -0.917 11.4-149.7-142.6 108.3 29.1 25.0 -12.1
26 26 A I E -B 5 0A 48 -2,-0.4 2,-0.3 -21,-0.2 -21,-0.2 -0.168 6.9-155.8 -71.6 160.4 28.8 21.4 -13.4
27 27 A R E -B 4 0A 156 -23,-0.5 -23,-2.5 -25,-0.0 2,-0.6 -0.929 2.7-159.8-141.8 120.0 26.2 19.8 -15.8
28 28 A C E B 3 0A 63 -2,-0.3 -25,-0.2 -25,-0.2 -11,-0.0 -0.905 360.0 360.0 -97.3 115.1 25.6 16.1 -15.7
29 29 A E 0 0 148 -27,-2.1 -1,-0.2 -2,-0.6 -26,-0.1 0.943 360.0 360.0 -87.9 360.0 24.0 15.3 -19.1
Loading

0 comments on commit 63aceb6

Please sign in to comment.