integrated pvaccompare into pvactools and added tests

griffithlab · Feb 17, 2025 · 042d6f1 · 042d6f1
1 parent 332e4c6
commit 042d6f1
Show file tree

Hide file tree

Showing 63 changed files with 21,671 additions and 5 deletions.
diff --git a/pvactools/tools/__init__.py b/pvactools/tools/__init__.py
@@ -1,6 +1,7 @@
 __all__ = [
     'download_cwls',
     'download_wdls',
+    'compare',
 ]
 
 from . import *
diff --git a/pvactools/tools/compare.py b/pvactools/tools/compare.py
@@ -0,0 +1,223 @@
+from .pvaccompare.compare_tools import *
+from .pvaccompare.server import main as startserver
+import argparse
+import logging
+from datetime import datetime
+import sys
+import os
+
+logging.basicConfig(level=logging.DEBUG, format="%(message)s")
+
+
+def define_parser():
+    """
+    Purpose:    Define arguments for the parser that the user can use
+    Modifies:   Nothing
+    Returns:    The parser
+    """
+    valid_aggregated_columns = [
+        "Gene",
+        "AA Change",
+        "Num Passing Transcripts",
+        "Best Peptide",
+        "Best Transcript",
+        "Num Passing Peptides",
+        "IC50 MT",
+        "IC50 WT",
+        "%%ile MT",
+        "%%ile WT",
+        "RNA Expr",
+        "RNA VAF",
+        "DNA VAF",
+        "Tier",
+    ]
+    valid_unaggregated_columns = [
+        "Biotype",
+        "Median MT IC50 Score",
+        "Median WT IC50 Score",
+        "Median MT Percentile",
+        "Median WT Percentile",
+        "WT Epitope Seq",
+        "Tumor DNA VAF",
+        "Tumor RNA Depth",
+        "Tumor RNA VAF",
+        "Gene Expression",
+        "BigMHC_EL WT Score",
+        "BigMHC_EL MT Score",
+        "BigMHC_IM WT Score",
+        "BigMHC_IM MT Score",
+        "MHCflurryEL Processing WT Score",
+        "MHCflurryEL Processing MT Score",
+        "MHCflurryEL Presentation WT Score",
+        "MHCflurryEL Presentation MT Score",
+        "MHCflurryEL Presentation WT Percentile",
+        "MHCflurryEL Presentation MT Percentile",
+        "MHCflurry WT IC50 Score",
+        "MHCflurry MT IC50 Score",
+        "MHCflurry WT Percentile",
+        "MHCflurry MT Percentile",
+        "MHCnuggetsI WT IC50 Score",
+        "MHCnuggetsI MT IC50 Score",
+        "MHCnuggetsI WT Percentile",
+        "MHCnuggetsI MT Percentile",
+        "NetMHC WT IC50 Score",
+        "NetMHC MT IC50 Score",
+        "NetMHC WT Percentile",
+        "NetMHC MT Percentile",
+        "NetMHCcons WT IC50 Score",
+        "NetMHCcons MT IC50 Score",
+        "NetMHCcons WT Percentile",
+        "NetMHCcons MT Percentile",
+        "NetMHCpan WT IC50 Score",
+        "NetMHCpan MT IC50 Score",
+        "NetMHCpan WT Percentile",
+        "NetMHCpan MT Percentile",
+        "NetMHCpanEL WT Score",
+        "NetMHCpanEL MT Score",
+        "NetMHCpanEL WT Percentile",
+        "NetMHCpanEL MT Percentile",
+        "PickPocket WT IC50 Score",
+        "PickPocket MT IC50 Score",
+        "PickPocket WT Percentile",
+        "PickPocket MT Percentile",
+        "SMM WT IC50 Score",
+        "SMM MT IC50 Score",
+        "SMM WT Percentile",
+        "SMM MT Percentile",
+        "SMMPMBEC WT IC50 Score",
+        "SMMPMBEC MT IC50 Score",
+        "SMMPMBEC WT Percentile",
+        "SMMPMBEC MT Percentile",
+        "DeepImmuno WT Score",
+        "DeepImmuno MT Score",
+        "Problematic Positions",
+    ]
+    valid_reference_match_columns = [
+        "Peptide",
+        "Hit Definition",
+        "Match Window",
+        "Match Sequence",
+    ]
+
+    default_aggregated_columns = [
+        "Num Passing Transcripts",
+        "Best Peptide",
+        "Best Transcript",
+        "Num Passing Peptides",
+        "Tier",
+    ]
+    default_unaggregated_columns = [
+        "Biotype",
+        "Median MT IC50 Score",
+        "Median WT IC50 Score",
+        "Median MT Percentile",
+        "Median WT Percentile",
+        "WT Epitope Seq",
+        "Tumor DNA VAF",
+        "Tumor RNA Depth",
+        "Tumor RNA VAF",
+        "Gene Expression",
+    ]
+    default_reference_match_columns = ["Peptide", "Match Window"]
+
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("results_folder1", nargs="?", help="Path to first results input folder")
+    parser.add_argument("results_folder2", nargs="?", help="Path to second results input folder")
+    parser.add_argument(
+        "--output-dir",
+        default="output",
+        help="Specify where the output directory should be generated",
+    )
+    parser.add_argument(
+        "--mhc-class", choices=["1", "2"], help="Specify MHC class 1 or class 2"
+    )
+    parser.add_argument(
+        "--no-server",
+        action="store_true",
+        help="If specified, will not start the report server after the comparisons finish",
+    )
+    parser.add_argument(
+        "--start-server",
+        action="store_true",
+        help="If specified, will only start the report server and will not run a comparison",
+    )
+    parser.add_argument(
+        "--aggregated-columns",
+        type=lambda s: [a for a in s.split(",")],
+        default=default_aggregated_columns,
+        help=f"Comma-separated columns to include in the aggregated TSV comparison, choices: {', '.join(valid_aggregated_columns)}",
+    )
+    parser.add_argument(
+        "--unaggregated-columns",
+        type=lambda s: [a for a in s.split(",")],
+        default=default_unaggregated_columns,
+        help=f"Comma-separated columns to include in the unaggregated TSV comparison, choices: {', '.join(valid_unaggregated_columns)}",
+    )
+    parser.add_argument(
+        "--reference-match-columns",
+        type=lambda s: [a for a in s.split(",")],
+        default=default_reference_match_columns,
+        help=f"Comma-separated columns to include in the reference match TSV comparison, choices: {', '.join(valid_reference_match_columns)}",
+    )
+
+    return parser
+
+
+def prepare_results_folder(classes, base_output_dir):
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    unique_output_dir = f"{base_output_dir}/results_{timestamp}"
+
+    os.makedirs(unique_output_dir)
+
+    if "1" in classes:
+        os.makedirs(f"{unique_output_dir}/mhc_class_i")
+    if "2" in classes:
+        os.makedirs(f"{unique_output_dir}/mhc_class_ii")
+
+    return unique_output_dir
+
+
+def main(args_input = sys.argv[1:]):
+    """
+    Purpose:    Control function for the whole tool, calls run_comparison which calls all of the comparisons
+    Modifies:   Nothing
+    Returns:    None
+    """
+    parser = define_parser()
+    args = parser.parse_args(args_input)
+
+    if args.start_server:
+        startserver()
+        return
+
+    if not args.results_folder1 or not args.results_folder2:
+        logging.error("Error: results_folder1 and results_folder2 are required paramaters\n")
+        parser.print_help()
+        return
+
+    validators.validate_aggregated_columns(args.aggregated_columns, parser)
+    validators.validate_unaggregated_columns(args.unaggregated_columns, parser)
+    validators.validate_reference_match_columns(args.reference_match_columns, parser)
+
+    classes = [args.mhc_class] if args.mhc_class else ["1", "2"]
+    output_dir = prepare_results_folder(classes, args.output_dir)
+
+    for class_type in classes:
+        comparison_router.run_comparison(
+            class_type,
+            args.results_folder1,
+            args.results_folder2,
+            output_dir,
+            args.aggregated_columns,
+            args.unaggregated_columns,
+            args.reference_match_columns,
+        )
+
+    if not args.no_server:
+        startserver()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pvactools/tools/main.py b/pvactools/tools/main.py
@@ -25,6 +25,13 @@ def define_parser():
         )
     download_wdls_parser.set_defaults(func=download_wdls)
 
+    compare_parser = subparsers.add_parser(
+        "compare",
+        help="Run a comparison between two output results folders",
+        add_help=False
+        )
+    compare_parser.set_defaults(func=compare)
+
     parser.add_argument(
         "-v", "--version",
         action="store_true",

diff --git a/pvactools/tools/pvaccompare/compare_tools/__init__.py b/pvactools/tools/pvaccompare/compare_tools/__init__.py
@@ -0,0 +1,2 @@
+from .comparison_router import run_comparison
+from .validators import *
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .comparison_router import run_comparison
		from .validators import *