From 6e876684da7af69d1d5c826d84746285f30dfb07 Mon Sep 17 00:00:00 2001 From: rwdavies Date: Fri, 26 Jan 2024 20:03:56 +0000 Subject: [PATCH] Add test to give error when ref sample file size does not match hap file #93 --- STITCH/R/reference.R | 21 ++++++++++++++- .../testthat/test-acceptance-reference.R | 27 +++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/STITCH/R/reference.R b/STITCH/R/reference.R index 24ebd83..aa59173 100644 --- a/STITCH/R/reference.R +++ b/STITCH/R/reference.R @@ -43,6 +43,13 @@ get_and_initialize_from_reference <- function( plotHapSumDuringIterations ) { + ## quick validations of haps against sample file + ## some redunancy + validate_sample_file_vs_haplotype( + reference_sample_file, + reference_haplotype_file + ) + print_message("Begin initializing paramters using reference haplotypes") ## get reference haplotypes matched to posfile ## NA's where there are no match @@ -859,7 +866,19 @@ new_subset_of_single_reference_iteration <- function( - +validate_sample_file_vs_haplotype <- function( + reference_sample_file, + reference_haplotype_file +) { + if (file.exists(reference_sample_file) & file.exists(reference_haplotype_file)) { + n1 <- nrow(read.table(reference_sample_file, header = TRUE)) + n2 <- ncol(read.table(reference_haplotype_file, nrows = 1)) + if (n1 != n2 / 2) { + stop(paste0("There are ", n1, " rows in the reference sample file and ", n2, " columns (i.e. ", n2 / 2, " samples) from the sample file. These files must match in a one to one fashion")) + } + } + NULL +} diff --git a/STITCH/tests/testthat/test-acceptance-reference.R b/STITCH/tests/testthat/test-acceptance-reference.R index fdcb3a6..e07da70 100644 --- a/STITCH/tests/testthat/test-acceptance-reference.R +++ b/STITCH/tests/testthat/test-acceptance-reference.R @@ -109,6 +109,33 @@ test_that("STITCH can initialize with reference data", { }) +test_that("STITCH throws an error if reference panel information not sized properly", { + + new_sample_file <- tempfile() + simple_write( + rbind(refpack$reference_samples, tail(refpack$reference_samples)), + new_sample_file + ) + + expect_error( + STITCH( + chr = data_package$chr, + bamlist = data_package$bamlist, + posfile = data_package$posfile, + genfile = data_package$genfile, + outputdir = tempdir(), + reference_haplotype_file = refpack$reference_haplotype_file, + reference_legend_file = refpack$reference_legend_file, + reference_sample_file = new_sample_file, + K = 2, + nGen = 100, + reference_populations = "GBR" + ) + ) + +}) + + test_that("STITCH can initialize with reference data for S > 1", { for(output_format in c("bgvcf", "bgen")) {