Skip to content

Commit

Permalink
Address feature request: trim reads exceeding a given length #191
Browse files Browse the repository at this point in the history
  • Loading branch information
ch4rr0 committed Jul 13, 2018
1 parent 863d9c8 commit 6e1657f
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 1 deletion.
16 changes: 16 additions & 0 deletions bt2_search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ static int ipause; // pause before maching?
static uint32_t qUpto; // max # of queries to read
static int gTrim5; // amount to trim from 5' end
static int gTrim3; // amount to trim from 3' end
static pair<int, int> trimReadsExceedingLen; // trim reads exceeding given length from either 3' or 5'-end
static int offRate; // keep default offRate
static bool solexaQuals; // quality strings are solexa quals, not phred, and subtract 64 (not 33)
static bool phred64Quals; // quality chars are phred, but must subtract 64 (not 33)
Expand Down Expand Up @@ -288,6 +289,7 @@ static void resetOptions() {
qUpto = 0xffffffff; // max # of queries to read
gTrim5 = 0; // amount to trim from 5' end
gTrim3 = 0; // amount to trim from 3' end
trimReadsExceedingLen = pair<int, int>(5, 0); // default: don't do any trimming
offRate = -1; // keep default offRate
solexaQuals = false; // quality strings are solexa quals, not phred, and subtract 64 (not 33)
phred64Quals = false; // quality chars are phred, but must subtract 64 (not 33)
Expand Down Expand Up @@ -643,6 +645,7 @@ static struct option long_options[] = {
{(char*)"xeq", no_argument, 0, ARG_XEQ},
{(char*)"thread-ceiling", required_argument, 0, ARG_THREAD_CEILING},
{(char*)"thread-piddir", required_argument, 0, ARG_THREAD_PIDDIR},
{(char*)"trim-reads-exceeding-len", required_argument, 0, ARG_TRIM_READS_EXCEEDING_LEN},
{(char*)0, 0, 0, 0} // terminator
};

Expand Down Expand Up @@ -736,6 +739,7 @@ static void printUsage(ostream& out) {
<< " -u/--upto <int> stop after first <int> reads/pairs (no limit)" << endl
<< " -5/--trim5 <int> trim <int> bases from 5'/left end of reads (0)" << endl
<< " -3/--trim3 <int> trim <int> bases from 3'/right end of reads (0)" << endl
<< " --trim-reads-exceeding-len <3|5:int> trim <int> bases from either 3'/right or 5'/left end of reads (no trimming)" << endl
<< " --phred33 qualities are Phred+33 (default)" << endl
<< " --phred64 qualities are Phred+64" << endl
<< " --int-quals qualities encoded as space-delimited integers" << endl
Expand Down Expand Up @@ -1101,6 +1105,17 @@ static void parseOption(int next_option, const char *arg) {
break;
case '3': gTrim3 = parseInt(0, "-3/--trim3 arg must be at least 0", arg); break;
case '5': gTrim5 = parseInt(0, "-5/--trim5 arg must be at least 0", arg); break;
case ARG_TRIM_READS_EXCEEDING_LEN:
trimReadsExceedingLen = parsePair<int>(arg, ':');
if (trimReadsExceedingLen.first != 3 && trimReadsExceedingLen.first != 5) {
cerr << "`--trim-reads-exceeding-len pos:n`: pos must be either 3 or 5" << endl;
throw 1;
}
if(trimReadsExceedingLen.second < 0) {
cerr << "`--trim-reads-exceeding-len pos:n`: n must be at least 0" << endl;
throw 1;
}
break;
case 'h': printUsage(cout); throw 0; break;
case ARG_USAGE: printUsage(cout); throw 0; break;
//
Expand Down Expand Up @@ -4717,6 +4732,7 @@ static void driver(
integerQuals, // true -> qualities are space-separated numbers
gTrim5, // amt to hard clip from 5' end
gTrim3, // amt to hard clip from 3' end
trimReadsExceedingLen, // trim reads exceeding given length from either 3' or 5'-end
fastaContLen, // length of sampled reads for FastaContinuous...
fastaContFreq, // frequency of sampled reads for FastaContinuous...
skipReads, // skip the first 'skip' patterns
Expand Down
3 changes: 2 additions & 1 deletion opts.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ enum {
ARG_XEQ, // --xeq
ARG_THREAD_CEILING, // --thread-ceiling
ARG_THREAD_PIDDIR, // --thread-piddir
ARG_INTERLEAVED_FASTQ // --interleaved
ARG_INTERLEAVED_FASTQ, // --interleaved
ARG_TRIM_READS_EXCEEDING_LEN// --trim-reads-exceeding-len
};

#endif
Expand Down
3 changes: 3 additions & 0 deletions pat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,11 @@ pair<bool, bool> PatternSourcePerThread::nextReadPair() {
}
// Finalize read/pair
if(!buf_.read_b().patFw.empty()) {
trim(buf_.read_a());
trim(buf_.read_b());
finalizePair(buf_.read_a(), buf_.read_b());
} else {
trim(buf_.read_a());
finalize(buf_.read_a());
}
bool this_is_last = buf_.cur_buf_ == static_cast<unsigned int>(last_batch_size_-1);
Expand Down
24 changes: 24 additions & 0 deletions pat.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ struct PatternParams {
bool intQuals_,
int trim5_,
int trim3_,
pair<int, int> trimReadsExceedingLen_,
int sampleLen_,
int sampleFreq_,
size_t skip_,
Expand All @@ -76,6 +77,7 @@ struct PatternParams {
intQuals(intQuals_),
trim5(trim5_),
trim3(trim3_),
trimReadsExceedingLen(trimReadsExceedingLen_),
sampleLen(sampleLen_),
sampleFreq(sampleFreq_),
skip(skip_),
Expand All @@ -91,6 +93,7 @@ struct PatternParams {
bool intQuals; // true -> qualities are space-separated numbers
int trim5; // amt to hard clip from 5' end
int trim3; // amt to hard clip from 3' end
pair<int, int> trimReadsExceedingLen;
int sampleLen; // length of sampled reads for FastaContinuous...
int sampleFreq; // frequency of sampled reads for FastaContinuous...
size_t skip; // skip the first 'skip' patterns
Expand Down Expand Up @@ -988,6 +991,27 @@ class PatternSourcePerThread {
return composer_.parse(ra, rb, buf_.rdid());
}

void trim(Read& r) {
if (pp_.trimReadsExceedingLen.second > 0) {
switch (pp_.trimReadsExceedingLen.first) {
case 3:
if (r.patFw.length() > pp_.trimReadsExceedingLen.second) {
r.trimmed5 = r.patFw.length() - pp_.trimReadsExceedingLen.second;
r.patFw.trimEnd(r.trimmed5);
r.qual.trimEnd(r.trimmed5);
}
break;
case 5:
if (r.patFw.length() > pp_.trimReadsExceedingLen.second) {
r.trimmed3 = r.patFw.length() - pp_.trimReadsExceedingLen.second;
r.patFw.trimBegin(r.trimmed3);
r.qual.trimBegin(r.trimmed3);
}
break;
}
}
}

PatternComposer& composer_; // pattern composer
PerThreadReadBuf buf_; // read data buffer
const PatternParams& pp_; // pattern-related parameters
Expand Down
28 changes: 28 additions & 0 deletions scripts/test/simple_tests.pl
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,34 @@
cline_reads => "CATCGATCAGTATCTG:ABCDEDGHIJKLMNOPQ", # qual too long
should_abort => 1},

{ name => "trim-reads-exceeding-len: trim from 5'-end",
ref => [ "AGCATCGATCAGTATCTGA" ],
cline_reads => "CATCGATCAGTATCTG:IIIIIIIIIIIIIIII\n",
args => "--trim-reads-exceeding-len 5:12",
norc => 1,
hits => [{ 6 => 1 }] },

{ name => "trim-reads-exceeding-len: trim from 3'-end",
ref => [ "AGCATCGATCAGTATCTGA" ],
cline_reads => "CATCGATCAGTATCTG:IIIIIIIIIIIIIIII\n",
args => "--trim-reads-exceeding-len 3:12",
norc => 1,
hits => [{ 2 => 1 }] },

{ name => "trim-reads-exceeding-len: invalid position",
ref => [ "AGCATCGATCAGTATCTGA" ],
cline_reads => "CATCGATCAGTATCTG:IIIIIIIIIIIIIIII\n",
args => "--trim-reads-exceeding-len 4:12",
norc => 1,
should_abort => 1 },

{ name => "trim-reads-exceeding-len: invalid count",
ref => [ "AGCATCGATCAGTATCTGA" ],
cline_reads => "CATCGATCAGTATCTG:IIIIIIIIIIIIIIII\n",
args => "--trim-reads-exceeding-len 5:-12",
norc => 1,
should_abort => 1 },

# Part of sequence is trimmed
{ name => "Cline 7",
ref => [ "AGCATCGATCAGTATCTGA" ],
Expand Down

0 comments on commit 6e1657f

Please sign in to comment.