diff --git a/pvactools/lib/combine_inputs.py b/pvactools/lib/combine_inputs.py index 7cfa083e..aaa67e78 100644 --- a/pvactools/lib/combine_inputs.py +++ b/pvactools/lib/combine_inputs.py @@ -15,7 +15,7 @@ def add_junction_coordinates_to_variants(self): # remove version number in annotated to compare with filtered junctions file var_df[['transcript_id', 'transcript_version']] = var_df['transcript_name'].str.split('.', expand=True) - var_df = var_df.loc[var_df['transcript_id'].str.startswith('ENST') == True] + var_df = var_df.loc[var_df['transcript_id'].fillna('').str.contains(r'^ENS.*T', regex=True)] var_df['transcript_version'] = var_df['transcript_version'].astype('int64') # create new cols diff --git a/pvactools/lib/junction_to_fasta.py b/pvactools/lib/junction_to_fasta.py index 019b6a15..9e51b5b9 100644 --- a/pvactools/lib/junction_to_fasta.py +++ b/pvactools/lib/junction_to_fasta.py @@ -161,7 +161,7 @@ def get_aa_sequence(self, dataframe): final_seq = '' for x in coordinates: start = int(x.split(',')[0]); end = int(x.split(',')[1]) - seq = self.personal_fasta.get_seq(self.chrom, start, end) + seq = self.personal_fasta.get_seq(str(self.chrom), start, end) final_seq += str(seq) # using Seq from Bio.Seq to translate str_seq # positive strand diff --git a/pvactools/lib/splice_pipeline.py b/pvactools/lib/splice_pipeline.py index 6fd86644..3a9f3f7d 100644 --- a/pvactools/lib/splice_pipeline.py +++ b/pvactools/lib/splice_pipeline.py @@ -1,3 +1,4 @@ +import sys import shutil import os import pandas as pd