Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide map file input option for non-hybrid mode #153 #154

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Input Arguments
-start_snp START_SNP
The first marker to consider. The first marker is "1". Default: 1.
-stop_snp STOP_SNP The last marker to consider. Default: all markers considered.
-map_file MAP_FILE A map file for all loci.

|Software| requires a pedigree file (``-ped_file``) and one or more genomic data files to run the analysis.

Expand Down Expand Up @@ -137,8 +138,7 @@ Hybrid peeling arguments
Single locus arguments:
-seg_file SEG_FILE A segregation probabilities file for hybrid peeling.
-seg_map_file SEG_MAP_FILE
A map file for loci in the segregation probabilities file.
-map_file MAP_FILE A map file for all loci in hybrid peeling.
A map file for loci in the segregation probabilities file in hybrid peeling.

In order to run hybrid peeling the user needs to supply a ``-map_file`` which gives the genetic positions for the SNPs in the sequence allele read counts data supplied, a ``-seg_map_file`` which gives the genetic position for the SNPs in the segregation file, and a ``-seg_file`` which gives the segregation values generated via multi-locus iterative peeling. These arguments are not required for running in multi-locus mode.

Expand Down
16 changes: 10 additions & 6 deletions src/tinypeel/Peeling/PeelingInfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from ..tinyhouse import ProbMath
from ..tinyhouse import HaplotypeOperations
from ..tinyhouse import InputOutput


#####################################################################
Expand All @@ -22,8 +23,11 @@ def createPeelingInfo(pedigree, args, createSeg=True, phaseFounder=False):
nInd=pedigree.maxIdn, nFam=pedigree.maxFam, nLoci=nLoci, createSeg=createSeg
)
peelingInfo.isSexChrom = args.sex_chrom
# Information about the peeling positions are handled elsewhere.
peelingInfo.positions = None
if args.map_file:
peelingInfo.positions = np.array(
InputOutput.readMapFile(args.map_file, args.startsnp, args.stopsnp)[2],
dtype=np.int64,
)

# Generate the segregation tensors.
peelingInfo.segregationTensor = ProbMath.generateSegregation(e=1e-06)
Expand Down Expand Up @@ -102,9 +106,9 @@ def setupTransmission(length, peelingInfo):
if peelingInfo.positions is None:
localMap = np.linspace(0, 1, num=peelingInfo.nLoci, dtype=np.float32)
else:
localMap = (
peelingInfo.positions / peelingInfo.positions[-1]
) # This should be sorted. Need to add in code to check.
localMap = (peelingInfo.positions - peelingInfo.positions[0]) / (
XingerTang marked this conversation as resolved.
Show resolved Hide resolved
peelingInfo.positions[-1] - peelingInfo.positions[0]
)
for i in range(peelingInfo.nLoci - 1):
distance = localMap[i + 1] - localMap[i]
distance = distance * length
Expand Down Expand Up @@ -213,7 +217,7 @@ def getHetMidpoint(geno):
spec["transmissionRate"] = optional(float32[:])
spec["maf"] = optional(float32[:])

spec["positions"] = optional(float32[:]) # Not sure we use this.
spec["positions"] = optional(int64[:])
spec["iteration"] = int64


Expand Down
20 changes: 9 additions & 11 deletions src/tinypeel/tinypeel.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,7 @@ def getLociAndDistance(snpMap, segMap):
def generateSingleLocusSegregation(peelingInfo, pedigree, args):
if args.segfile is not None:
# This just gets the locations in the map files.
snpMap = np.array(
InputOutput.readMapFile(args.map_file, args.startsnp, args.stopsnp)[2]
)
snpMap = peelingInfo.positions
segMap = np.array(InputOutput.readMapFile(args.seg_map_file)[2])

loci, distance = getLociAndDistance(snpMap, segMap)
Expand Down Expand Up @@ -387,6 +385,13 @@ def getArgs():
"stopsnp",
],
)
input_parser.add_argument(
"-map_file",
default=None,
required=False,
type=str,
help="A map file for all loci.",
)

# Output options
output_parser = parser.add_argument_group("Output Options")
Expand Down Expand Up @@ -534,19 +539,12 @@ def getArgs():
)

singleLocus_parser = parser.add_argument_group("Hybrid peeling arguments")
singleLocus_parser.add_argument(
"-map_file",
default=None,
required=False,
type=str,
help="a map file for all loci in hybrid peeling.",
)
singleLocus_parser.add_argument(
"-seg_map_file",
default=None,
required=False,
type=str,
help="a map file for loci in the segregation probabilities file.",
help="A map file for loci in the segregation probabilities file in hybrid peeling.",
)
singleLocus_parser.add_argument(
"-seg_file",
Expand Down
44 changes: 43 additions & 1 deletion tests/functional_tests/run_func_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ def delete_columns(two_d_list, col_del):

class TestClass:
path = os.path.join("tests", "functional_tests")
command = "AlphaPeel "
test_cases = None
input_file_depend_on_test_cases = None

Expand Down Expand Up @@ -96,6 +95,8 @@ def generate_command(self):
"""
generate the command for the test
"""
self.command = "AlphaPeel "

for file in self.input_files:
if (
(self.test_cases is not None)
Expand Down Expand Up @@ -539,6 +540,47 @@ def test_out_id_only(self):
for ind in self.output:
assert "MotherOf" not in ind[0] and "FatherOf" not in ind[0]

def test_map_input(self):
"""
Run the test for the input map file
"""
self.test_name = "test_map_input"
self.prepare_path()

self.arguments = {"method": "multi"}
self.output_file_to_check = "dosage"

# without map file input
self.input_files = ["geno_file", "ped_file"]
self.output_file_prefix = "map_input.no_map_file"

self.generate_command()
os.system(self.command)

self.output_file_path = os.path.join(
self.output_path,
f"{self.output_file_prefix}.{self.output_file_to_check}.txt",
)

self.first_output = read_and_sort_file(self.output_file_path)

# with map file input
self.input_files.append("map_file")
self.output_file_prefix = "map_input.with_map_file"

self.generate_command()
os.system(self.command)

self.output_file_path = os.path.join(
self.output_path,
f"{self.output_file_prefix}.{self.output_file_to_check}.txt",
)

self.second_output = read_and_sort_file(self.output_file_path)

# the two outputs should match
assert self.first_output == self.second_output

# TODO test_plink for PLINK
# a. binary PLINK output
# b. binary output + input
Expand Down
6 changes: 6 additions & 0 deletions tests/functional_tests/test_map_input/geno_file.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
M0 1 2 1 0 9
F0 1 2 0 1 2
M1 0 2 0 1 2
F1 2 2 1 0 2
M2 1 2 1 1 2
F2 1 2 0 0 2
5 changes: 5 additions & 0 deletions tests/functional_tests/test_map_input/map_file.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1 1-1 1
1 1-2 2
1 1-3 3
1 1-4 4
1 1-5 5
6 changes: 6 additions & 0 deletions tests/functional_tests/test_map_input/ped_file.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
M0 0 0
F0 0 0
M1 M0 F0
F1 M0 F0
M2 M1 F1
F2 M1 F1