-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathSequences.pm
131 lines (98 loc) · 2.31 KB
/
Sequences.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
package Sequences;
#################################
#
#module of methods for dealing with raw solexa reads ..
#################################
use strict;
use Exporter;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
$VERSION = 0.1;
@ISA = qw(Exporter);
@EXPORT = ();
@EXPORT_OK = qw();
%EXPORT_TAGS = (DEFAULT => [qw()],
ALL =>[qw()]);
sub is_solexa_fastq{
## takes a quality score line and scans it to see if the line contains chars with ascii values of greater than 73.. which indicates solexa
my @quals = split(/|/,$_[0]);
foreach my $q (@quals){
if (ord($q) > 73){
return 1;
}
}
return 0;
}
sub sol2sanger{
die "error in sol2sanger " unless $_[1];
my $vers = shift; ## vers is the version of the solexa pipeline
if ($vers =~ m/1.4/){
my $quals = shift;
# Added to eliminate carriage return conversion
chomp $quals;
my @quals = split( '', $quals );
my $qual = '';
foreach my $q (@quals){
my $s = chr(ord($q) - 31);
$qual = $qual . $s;
}
return $qual;
}
else {
die "No known version\n\n";
}
}
sub get_read_lengths{
my %hash;
warn "@{$_[0]}";
foreach my $file (@{$_[0]}){
## returns reference to hash of lengths for every read
open FILE, "<$file" || die "can't open $file\n\n";
while (my $line = <FILE>){
chomp $line;
if ($line =~ m/^@/){
my $seq = <FILE>;
chomp $seq;
my $l = length($seq);
$hash{$l}{$file}{$line} =1;
}
}
close FILE;
}
return \%hash;
}
sub split_line_in_two{
my $line = shift;
if (length $line % 2 eq 0 ){
my $line1 = substr($line, 0, ($line / 2);
my $line2 = substr($line, ($line / 2), $line / 2) );
return ($line1, $line2);
}
else {
return 0;
}
}
sub remove_adapter_3{ ##removes an adapter by exact match, adapter may have extra nt in read ... so returns chopped seq and pos of adapter match
my $seq = shift;
my $adapter = shift;
my $match = 0;
# warn "in sub, \n\n";
while ($seq =~ m/$adapter/g){
$match = length($`);
}
# warn "match = $match\n\n";
if($match){
my $start = length($seq) - $match;
$seq = substr($seq, 0, (-1 * $start) );
# warn $seq, "\n";
return $seq, $start;
}
else{
return 0;
}
}
sub chop_n_3{ ## chops characters of the 3
my $seq = shift;
my $length = shift;
return substr($seq, 0, -$length)
}
1;