-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBwaMeminputSE.pl
executable file
·119 lines (85 loc) · 4.11 KB
/
BwaMeminputSE.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/perl
use warnings;
use strict;
use diagnostics;
use Cwd;
# This script will parse the files in an directory and call bwa on each fastq file in the directory. All
# *.fastq files are stored in @INfiles. A directory inside the input directory called bwa will store
# all the output, including the resulting *.bam file and a text file containing the std out from bwa.
#
# Modified March 2015 by Mike Place to use bwa mem
#
######################################################################################################################
# Input Arguments and Variables
unless (scalar @ARGV == 1) {
print "\nUSAGE: BwaMeminputSE.pl <Reference.basename>\n";
print "Assumes input directory is current directory which\n";
print "fastq files and runs bwa mem on each file. Also input the\n";
print "basename of the reference index files.\n";
exit;
}
#my $INPUT_DIR = shift;
my $INPUT_DIR = getcwd();
my $REFERENCE = shift;
die "Undefined input directory!\n" unless defined $INPUT_DIR;
print "input dir: $INPUT_DIR \n";
opendir my ($IN), $INPUT_DIR or die "Cannot open input directory '$INPUT_DIR'\n";
# Collect all fastq files from the directory and store in @INfiles
my @INfiles = grep { /\.fastq$/} readdir $IN;
die "No input .fastq files found!\n" unless @INfiles > 0;
# Create output direcotry to store all bwa output files
my $OUTPUT_DIR = "$INPUT_DIR/Bwa/";
die "Output directory '$OUTPUT_DIR' already exists. To continue, rename existing directory!\n" if (-d $OUTPUT_DIR);
unless (mkdir $OUTPUT_DIR) {
die "Could not make output directory '$OUTPUT_DIR'\n$!\n";
}
######################################################################################################################
# Body of Script
$|=1; #Autoflush standard out
&main(); #Script entry point
######################################################################################################################
# Main subroutine
sub main {
bwa();
}
######################################################################################################################
# bwa subroutine
# This subroutine loops through all fastq files in the directory and runs bwa.
# This script requires the reference index files, the fastq file, and the output base name. The output
# base name is created from the name of the fastq file. The output .bam file and std out from bwa is saved in
# an output directory: InputDirectory/Bwa/
sub bwa {
foreach my $file (@INfiles) {
my $name;
if ($file =~ /(.*)\.fastq$/) {
$name = $1;
} else {
die "$file is not a *.fastq file.\n";
}
my $out = "$OUTPUT_DIR"."$name";
open( my $fh,'>>', "$OUTPUT_DIR/Bwa_run.log" ) or die "Unable to write to file $out.run.log : $!";
print $fh "Running bwa on '$file'\n";
close($fh);
# Run bwa
my $btlog = `bwa mem -t 8 -M $REFERENCE $file 1> $out.sam 2>>$OUTPUT_DIR/Bwa_run.log`;
### Clean the SAM file
### This soft-clips an alignment that hangs off the end of its reference sequence.
### This will print out all the errors that it ignores (MAPQ errors
print"\n\n I=out.sam = $out.sam O= $out.cleaned.sam\n";
my $cleanSam = `java -Xmx8g -jar /opt/bifxapps/picard-tools/CleanSam.jar I=$out.sam O=$out.cleaned.sam`;
# REMOVE original sam file #rm $OUT.sam
unlink("$out.sam");
### Add the RG header and sort the SAM file
### This will print out all the errors that it ignores (MAPQ errors)
my $sortSam = `java -Xmx8g -jar /opt/bifxapps/picard-tools/AddOrReplaceReadGroups.jar I=$out.cleaned.sam O=$out.final.sam SO=coordinate LB=$REFERENCE.fasta PL=ILLUMINA PU=unknown SM=$out VALIDATION_STRINGENCY=LENIENT`;
# REMOVE cleaned sam file
unlink("$out.cleaned.sam");
### Make the BAM file, sort and index it
my $makeBam = `samtools view -uS -t $REFERENCE.fsa.fai $out.final.sam | samtools sort - $out.sorted`;
### Index bam file
my $indexBam = `samtools index $out.sorted.bam`;
unlink("$out.final.sam");
print "Finished running bwa2 on '$file'\n\n";
}
}
######################################################################################################################