-
Notifications
You must be signed in to change notification settings - Fork 0
/
index_fixer.py
executable file
·149 lines (129 loc) · 5.32 KB
/
index_fixer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import sys
import re
import os
import click
from flowcell_parser.classes import SampleSheetParser
def generate_samplesheet(ss_reader):
"""Will generate a 'clean' samplesheet, : the given fields will be removed. if rename_samples is True, samples prepended with 'Sample_'
are renamed to match the sample name"""
output=""
#Header
output+="[Header]{}".format(os.linesep)
for field in ss_reader.header:
output+="{},{}".format(field.rstrip(), ss_reader.header[field].rstrip())
output+=os.linesep
#Data
output+="[Data]{}".format(os.linesep)
datafields=[]
for field in ss_reader.datafields:
datafields.append(field)
output+=",".join(datafields)
output+=os.linesep
for line in ss_reader.data:
line_ar=[]
for field in datafields:
value = line[field]
line_ar.append(value)
output+=",".join(line_ar)
output+=os.linesep
return output
def nuc_compliment(nuc):
if nuc == 'A':
return 'T'
elif nuc == 'T':
return 'A'
elif nuc == 'C':
return'G'
elif nuc == 'G':
return 'C'
else:
sys.exit("Critical error. Unknown nucleotide found: {}.".format(nuc))
@click.command()
@click.option('--path', required=True,help='Path to the Samplesheet. E.g. ~/fc/161111_M01320_0095_000000000-AWE6P.csv')
@click.option('--swap', is_flag=True,help='Swaps index 1 with 2 and vice versa.')
@click.option('--rc1', is_flag=True,help='Exchanges index 1 for its reverse compliment.')
@click.option('--rc2', is_flag=True,help='Exchanges index 2 for its reverse compliment.')
@click.option('--platform', required=True, type=click.Choice(['hiseq', 'miseq', 'hiseqx']), help="Run platform ('hiseq', 'miseq', 'hiseqx')")
@click.option('--sampleswap', '--ss', multiple=True, type=(unicode, unicode), help='Swap index between sample pairs. Use one --ss per pair.')
def main(path, swap, rc1, rc2, platform, ss):
ss_reader=SampleSheetParser(path)
ss_data=ss_reader.data
single = True
if platform == "hiseq":
index1 = 'Index'
if re.search('[-+]', (ss_data[0][index1])):
single = False
elif platform == "miseq":
index1 = 'index'
index2 = 'index2'
if index2 in ss_data[0]:
single = False
elif platform == "hiseqx":
index1 = 'index1'
index2 = 'index2'
single = False
if single:
#Sanity check
if rc2 or swap:
sys.exit("Single index. Cannot change index 2, nor swap indexes")
#Reverse compliment
if rc1:
for row in ss_data:
index_in = re.match('([ATCG]{4,12})', row[index1])
if index_in:
if rc1:
rc = ""
for nuc in index_in.group(1)[::-1]:
rc = rc + nuc_compliment(nuc)
row[index1] = '{}'.format(rc)
if not single:
#Reverse Compliment
if rc1 or rc2:
for row in ss_data:
if platform == "hiseq":
index_in = re.match('([ATCG]{4,12})[-+]([ATCG]{4,12})', row[index1])
if rc1:
rc = ""
for nuc in index_in.group(1)[::-1]:
rc = rc + nuc_compliment(nuc)
row[index1] = '{}-{}'.format(rc, index_in.group(2))
if rc2:
rc = ""
for nuc in index_in.group(2)[::-1]:
rc = rc + nuc_compliment(nuc)
row[index1] = '{}-{}'.format(index_in.group(1), rc)
elif platform == "miseq" or platform == "hiseqx":
if rc1:
rc = ""
for nuc in row['index1'][::-1]:
rc = rc + nuc_compliment(nuc)
row['index1'] = rc
if rc2:
rc = ""
for nuc in row['index2'][::-1]:
rc = rc + nuc_compliment(nuc)
row['index2'] = rc
#Swap indexes
if swap:
for row in ss_data:
if platform == "hiseq":
index_in = re.match('([ATCG]{4,12})[-+]([ATCG]{4,12})', row[index1])
row[index1] = '{}-{}'.format(index_in.group(2), index_in.group(1))
elif platform == "miseq" or platform == "hiseqx":
storage = row['index1']
row['index1'] = row['index2']
row['index2'] = storage
#Rearrange samples
if ss:
#Need to catch all samples in a list prior to writing, then dump them in corrected order
sys.exit("Sample Swap isn't implemented yet.")
#redemux_ss = ss_reader.generate_clean_samplesheet()
redemux_ss = generate_samplesheet(ss_reader)
if platform == "hiseq" or platform == "hiseqx":
filename = re.search('\/(\w+).csv$', path).group(1)
else:
filename = "SampleSheet"
with open('{}_redemux.csv'.format(filename), 'w') as fh_out:
fh_out.write(redemux_ss)
if __name__ == '__main__':
main()