manatee

#!/usr/bin/perl -w
use strict;
use warnings;
use 5.010; #Use perl 5.10 or newer.

use FindBin qw( $RealBin );


my @modules = qw(
    Getopt::Long
    Set::IntervalTree
);

#Check required perl modules installations.
for(@modules) {
    eval "use $_";
    if ($@) {
        printf "\nPlease install $_ perl module and rerun.\n\n";
        exit;
    }
}

##Program variables##

#Software version.
my $version = '1.0';

#Path to Bowtie genome index.
my $path_to_bowtie_index;

#Path to Bowtie transcriptome index dir.
my $bowtie_trans_index_dir = "trans-index";

#Slash symbol.
my $slash = "/";

#Path to non coding annotation file.
my $annotation_file;

#Global variables.
my $NOT_SPECIFIED = 'not_specified';
my $fasta_format = "fasta";
my $fastq_format = "fastq";

#Path to coding annotation file.
my $general_annotation = $NOT_SPECIFIED;

#Path to reference genome fasta file.
my $reference_genome;

#Default bowtie alignment cores.
my $aln_cores = 1;

#Alignment mismatches.
my $mismatches = 1;

#Strand specific mode of the algorithm.
my $strand_specific = 'yes';

#Maximum value for -m parameter in bowtie execution. 
my $m = 50;

#Maximum nt distance between consecutive reads that will be considered to belong to the same cluster.
my $clust_dist = 50;

#Minimum number of reads that can comprise one read cluster.
my $clust_dens = 5;

#Path to Bowtie transcriptome index.
my $bowtie_trans_index = $NOT_SPECIFIED;

#Total number of aligned reads.
my $total_aligned_reads = 0;

#Global separor strings.
my $SEPARATOR = "SePaRaToR";
#Seperator for alternative transcripts.
my $ALTERNATIVE_TRANS_SEPARATOR = " ";
my $ALTERNATIVE_TRANS_SEPARATOR_FILE = ",";

#Global string.
my $EMPTY = '';

#Tag included in the generated SAM output file.
my $global_unaligned_tag;

#Search space for a multimapped read.
my $multimap_range = 50;

#Data structure for placement of alignment scores. 
my @range_downstream;
for (my $i = $multimap_range; $i > 0; $i--){ 
  push @range_downstream, (1/exp($i / 10));
}
my @range_upstream = reverse @range_downstream;

#Total number of mapped reads.
my $mapped_reads = 0;

#Tag for reads without existing annotation.
my $no_annotation_flag = 'no_annotation';

#Tag for uniquely aligned read. 
my $unique_type = 'unique';

#Tag for multimapped read.
my $non_unique_type = 'non_unique';

#Indicator for collapsing the input reads.
my $collapsed = 1;

#Not applicable parameter.
my $NA = 'NA';

#Number of transcripts with the highest aligner score as obtained from the alignment to the transcriptome.
my $n_best_transcripts = 3;

#Check if program was called without any input parameters or if user asked for help.
if(!exists $ARGV[0] || $ARGV[0] =~ /-h|--h|-help|--help/){
  get_help();
  exit;
}

#Read input parameters.
my $config_file;
my $input_file;
my $output_path;

GetOptions (
  'config=s' => \$config_file,
  'i=s' => \$input_file,
  'o=s' => \$output_path,
  'index=s' => \$path_to_bowtie_index,
  'annotation=s' => \$annotation_file,
  'genome=s' => \$reference_genome,
  'cores=s' => \$aln_cores,
  'mismatches=s' => \$mismatches,
  'strand_spec=s' => \$strand_specific,
  'm=s' => \$m,
  'cdi=s' => \$clust_dist,
  'cd=s' => \$clust_dens,
  't_index=s' => \$bowtie_trans_index,
  'collapse=s' => \$collapsed
);

#Check user input for required input file.
if (not defined $input_file) {
    print "\nArgument -i/--i is mandatory. Use option -h for help message.\n\n";
    exit;
}

#Check if input file exists.
if (!-e $input_file){
    print "\nInput file must exists.\n";
    exit;
}

#Check user input for required output path.
if (not defined $output_path) {
    print "\nArgument -o/--o is mandatory. Use option -h for help message.\n\n";
    exit;
}

#Execution with configuration file.
if(defined $config_file){
  #Open the configuration file.
  open(CONFIG_FILE,"<",$config_file) or die "$0: open $config_file: $!";
  #Read the configuration file. 
  while(my $line_config=<CONFIG_FILE>){
    if($line_config !~ /^$/){
      chomp $line_config;
      my @splitted_line = split('=',$line_config);
      if ($splitted_line[0] eq "index"){
	$path_to_bowtie_index = $splitted_line[1];
      }elsif ($splitted_line[0] eq "genome"){
	$reference_genome = $splitted_line[1];
      }elsif ($splitted_line[0] eq "annotation"){
	$annotation_file = $splitted_line[1];
      }elsif ($splitted_line[0] eq "t_index" && exists $splitted_line[1]){
	$bowtie_trans_index = $splitted_line[1];
      }elsif ($splitted_line[0] eq "cores" && exists $splitted_line[1]){
        $aln_cores = $splitted_line[1];
      }elsif ($splitted_line[0] eq "m" && exists $splitted_line[1]){
	$m = $splitted_line[1];
      }elsif ($splitted_line[0] eq "collapse" && exists $splitted_line[1]){
	$collapsed = $splitted_line[1];
	if($splitted_line[1] eq "yes"){
	  $collapsed = 1;
	}elsif($splitted_line[1] eq "no"){
	  $collapsed = 0;
	}else{
	  print "\nCollapse mode should be defined either as yes or no (default -collapse yes).\n";
	  exit;
	}
      }elsif ($splitted_line[0] eq "s" && exists $splitted_line[1]){
	if($splitted_line[1] eq "yes"){
	  $strand_specific = 1;
	}elsif($splitted_line[1] eq "no"){
	  $strand_specific = 0;
	}else{
	  print "\nStrand specific mode should be defined either as yes or no (default -s yes).\n";
	  exit;
	}
      }
      elsif ($splitted_line[0] eq "cdi" && exists $splitted_line[1]){
	$clust_dist = $splitted_line[1];
      }elsif ($splitted_line[0] eq "cd" && exists $splitted_line[1]){
	$clust_dens = $splitted_line[1];
      }
    }
  }
}

###Check user inputs###

#Check for reference genome file.
if(!-s $reference_genome){
  print STDERR "\nFATAL: Defined genomic file does not exist. 
Add proper genome file and rerun.\n\n"; 
  exit;
}

#Check for annotation file
if(!-s $annotation_file){
  print STDERR "\nFATAL: Defined non coding annotation file does not exist. 
Add non coding annotation and rerun.\n\n"; 
  exit;
}

#Check for bowtie transcriptome index.
if(check_index($bowtie_trans_index) eq 0){
	  print "\n
FATAL: Incorrect transcriptome index files. Check your transcriptome index files and rerun.
You could leave this parameter empty, and transcriptome index will be generated based on provided 
annotation file and placed in the transcripts directory.\n\n";
	  exit;
}

#Check for bowtie index.
if (not defined $path_to_bowtie_index) {
  print "\nFATAL: Argument -index is mandatory. You can define all the input parameters
in the configuration file and run the program with -config <file>.\n\n";
exit;
}

#Check for 
if(check_index($path_to_bowtie_index) eq 0){
print "\nFATAL: Incorrect index files. Check your genome index files and rerun.\n\n";
  exit;
}
	
if($aln_cores !~ /^[0-9]+$/ || $aln_cores <= 0){
  print "\nNumber of alignment cores needs to be a positive integer (default -cores 1).\n\n";
  exit;
}

if($clust_dens !~ /^[0-9]+$/ || $clust_dens <= 0){
  print "\nMinimum number of unannotated read abundances per cluster should 
be a positive integer (default: cd=5).\n\n";
  exit;
}

if($clust_dist !~ /^[0-9]+$/ || $clust_dist <= 0){
  print "\nDistance between clusters of unannotated reads needs to be a positive 
integer (default -cdi 200).
  
Clusters of unannotated reads will be merged if the distance between them is 
equal or less than cdi.\n\n";
  exit;
}

if($m !~ /^[0-9]+$/ || $m <= 0){
  print "\n-m needs to be a positive integer number (default -m 50).\n\n";
  exit;
}

if(!defined $config_file){
  if($strand_specific eq 'yes'){
    $strand_specific = 1;
  }elsif($strand_specific eq 'no'){
    $strand_specific = 0;
  }else{
    print "\nStrand specific mode should be defined either as yes or no (default -s yes).\n";
    exit;
  }
}

#Check the samtools installation.
my $samtools_availability = `which samtools`;
if(length($samtools_availability) == 0){
  print "FATAL: Make sure you have samtools installed and added to your path.\n";
  exit;
}

#Check whether the ouput directory exists.
if (-d $output_path){
  print "\nOutput directory $output_path already exists.\n\n";
  exit;
}else{
  mkdir $output_path or die "\nThe path $output_path needs to exist.\n";
}

#Add slash to path if missing.
if(substr($output_path,-1) ne '/'){
  $output_path = $output_path.'/';
}

#Print user input.
print_user_input();

#Decompress input.
decompress();

#Check input format.
my $input_file_format = file_format($input_file);

#Struct with transcript name and related info.
my %transcript_name;

#Define non-coding annotation hash.
my %gtf;
my $annotation = Set::IntervalTree->new;

#Process and store annotation to hash.
process_annotation($annotation_file,\%transcript_name,\$annotation,\%gtf);

#Define condensed reads hash, utilized for time optimization.
my %collapsed_reads = ();
my %read_tags = ();

my $input_base_name;
my $input_path;

#Extract the path and base name of the input file.
get_input_filename_base_path($input_file, \$input_base_name, \$input_path);

#Input reads will be condensed, optimizing both execution time and available memory usage.
my $collapsed_input = $output_path.$input_base_name."_condensed.fasta"; 

#Process input fasta or fa file. Condensed reads creation, length of input file reduced. 
if($collapsed){ 
  process_input($input_file,$collapsed_input,\%collapsed_reads,\%read_tags);
  $input_file = $collapsed_input;
}

#Temp files.
my $bowtie_output = $output_path.$input_base_name.'_Manatee_bowtie.output.sam';
my $bowtie_unaligned = $output_path.$input_base_name.'_Manatee_bowtie.unaligned.fa'; 
my $reads_exceeding_m = $output_path.$input_base_name.'_Manatee_bowtie.over_m.fa';

#Path to bowtie aligner.
my $path_to_bowtie = $RealBin.'/bowtie-1.0.1/bowtie';

#Constrution of bowtie call.
my $bowtie_call;
if($input_file_format eq $fasta_format){
  $bowtie_call = "$path_to_bowtie -f --quiet -v $mismatches --best --strata --all -m $m -p $aln_cores -S $path_to_bowtie_index $input_file $bowtie_output --max $reads_exceeding_m --un $bowtie_unaligned";  ## always -f because of read condensation process
}else{
  $bowtie_call = "$path_to_bowtie --quiet -v $mismatches --best --strata --all -m $m -p $aln_cores -S $path_to_bowtie_index $input_file $bowtie_output --max $reads_exceeding_m --un $bowtie_unaligned";  ## always -f because of read condensation process
}

#Call Bowtie for genome alignment.
system("$bowtie_call");

#Sort sam file based on 1st column.
`sort -T $output_path -k 1,1 $bowtie_output`;

#Structure for uniquely aligned reads.
my $uniquely_aligned_tree = Set::IntervalTree->new;
my $uniquely_aligned_count = 0;

my $tmp_sam = $output_path.$input_base_name.'Manatee_tmp.sam';
my %densities; 
my %transcripts;

my %tmp_hash;

#File with multimaps/UARs without existing annotation.
my $no_indication_file = $output_path.$input_base_name.'_Manatee_no_indication.txt';
open(my $NO_INDICATION_FILE, ">", $no_indication_file);

#Hash for isomiR storage.
my %isomirs;
my $isomir_counter = 0;

#Process mapped reads and create temp file with aligned reads and their counts.
reads_process($bowtie_output, \$uniquely_aligned_tree, \$uniquely_aligned_count, \%densities, \%transcripts);

#Quantify reads.
quantify(\%transcripts);

#Tmp file for alignment against transcriptome.
my $trans_unaligned_0 = $output_path.$input_base_name.'_Manate_transcriptome_unaligned_0.fa';

my %transcripts_transcriptome;
my %no_indication_tag;
my $trans_aligned = $output_path.$input_base_name.'_Manatee_transcriptome_aligned_0.sam';

#Alignemnt against transcriptome of reads that exceeded the -m parameter in bowtie aligment.
align_against_transcriptome($reads_exceeding_m, $trans_unaligned_0, 1, $trans_aligned); #One mismatch

$trans_aligned = $output_path.$input_base_name.'_Manatee_transcriptome_aligned_1.sam';
my $trans_unaligned_1 = $output_path.$input_base_name.'_Manatee_transcriptome_unaligned_1.fa';

#Alignment against transcriptome of unaligned reads in bowtie alignment.
align_against_transcriptome($bowtie_unaligned, $trans_unaligned_1, 2, $trans_aligned); #Two mismatches

$trans_aligned = $output_path.$input_base_name.'_Manatee_transcriptome_aligned_2.sam';
my $trans_unaligned_prev = $trans_unaligned_1;
my $trans_unaligned_2 = $output_path.$input_base_name.'_Manatee_transcriptome_unaligned_2.fa';

#Augmented number of mismatches.
align_against_transcriptome($trans_unaligned_prev, $trans_unaligned_2, 3, $trans_aligned);

create_no_indication_tag($no_indication_file);

#Store transcripts aligned to the transcriptome to the proper transcript structure. 
store_transcripts();

my $clusters_output = $output_path.$input_base_name.'_Manatee_clusters.tsv';
unannotated_clusters();

#Write transcripts to file.
my $transcripts_counts = $output_path.$input_base_name.'_Manatee_counts.tsv';
write_expressions();

my $isomirs_file = $output_path.$input_base_name.'_Manatee_isomirs.tsv';
write_isomirs();

#Remove all the temp files from the working directory.
clean();

print "Output directory: $output_path\n\n";

print "END OF EXECUTION\n\n";

####### SUBROUTINES #######

#Extract .tar.gz files.
sub decompress {

  if($input_file =~ /tar.gz$/){
    `tar xvzf $input_file -C $output_path`;   
    my $extracted_input_file = `ls $output_path`;
    $extracted_input_file =~ s/^\s+|\s+$//g; # Removing leading and trailing white spaces.
    $input_file = $output_path.$extracted_input_file;
  }
}

#Write isomirs to output file.
sub write_isomirs {

  if(%isomirs){
    open(my $fp, ">", $isomirs_file);
    print $fp "Transcript Name\tCount\tRPM\tSequence\n";
    foreach my $trans_name (sort keys %isomirs){
      foreach my $sequence (keys $isomirs{$trans_name}){
	
	  my $rpm = ($isomirs{$trans_name}{$sequence}*(10^6))/($mapped_reads);
	  print $fp "$trans_name\t$isomirs{$trans_name}{$sequence}\t$rpm\t$sequence\t\n";
      }
    }
  }
}

#Create file with multimaps/UARs without existing annotation.
sub create_no_indication_tag {

  my $input = shift;
  
  close($NO_INDICATION_FILE);

  open(my $fp, "<", $input);
  
  my $counter = 0;
  my $read_id;

  while (my $line = <$fp>){
  
    chomp $line;
    
    my @arr = split($SEPARATOR, $line);
    
    $read_id = $arr[0];

    $counter++;

    my $chr = $arr[1];
    my $strand = $arr[2];
    my $start = $arr[3];
    my $stop = $arr[4];

    $no_indication_tag{$read_id}{$counter} = $chr.$SEPARATOR.$strand.$SEPARATOR.$start.$SEPARATOR.$stop;
    $no_indication_tag{$read_id}{'counter'} = $counter;

    if($counter == $arr[$#arr]){
      $counter = 0;    
    }
  }
 
  close($fp);
  
}

#Store clusters from UARs without existing annotation.
sub unannotated_clusters {

  my $read_id;
  my %dens; 
  my $chr;
  my $strand;
  
  #Create uar densities.
  foreach my $read_id (keys %no_indication_tag){
    
    if( $no_indication_tag{$read_id}{'counter'} eq 1 ){
      
      my @arr = split($SEPARATOR, $no_indication_tag{$read_id}{1});
      $chr = $arr[0];
      $strand = $arr[1];
      my $start = $arr[2];
      my $stop = $arr[3];
     
      for (my $i=$start; $i <= $stop; $i++){

	if(exists $dens{$chr}{$strand}{$i}){
	  if($collapsed){
	    $dens{$chr}{$strand}{$i} += $read_tags{$read_id};
	  }else{
	    $dens{$chr}{$strand}{$i}++;
	  }
	}else{
	  if($collapsed){
	    $dens{$chr}{$strand}{$i} = $read_tags{$read_id};
	  }else{
	    $dens{$chr}{$strand}{$i} = 1;
	  }
	}
      }
      
    }
  }

  my %uar_clusters;
  my $clusters = Set::IntervalTree->new;
  my $prev_nc = 0;
  my $cluster_start;
  my $reads_per_cluster= 0;
  my %subclusters;
  my $counter = 0;
  my $flag;
  my $rpc;

  foreach $chr (sort keys %dens){
    foreach $strand (sort keys $dens{$chr}){
      my $new_cluster = 1;
      $counter = 0;
      $reads_per_cluster = 0;
      $flag = 0;
      foreach my $nc (sort {$a <=> $b} keys $dens{$chr}{$strand}){

        if( $flag and abs($prev_nc-$nc) > 1 and abs($prev_nc-$nc) <= $clust_dist ){
	  $subclusters{$counter} = $reads_per_cluster;
	  $counter++;
	  $reads_per_cluster = 0;
        }
        
	if( $flag and abs($prev_nc-$nc) > $clust_dist ){
	
	  $rpc = 0;
	  
	  foreach my $counter (keys %subclusters){
	    $rpc = $rpc + $subclusters{$counter};
	  }

	  $uar_clusters{$chr}{$strand}{$cluster_start}{$prev_nc} = $reads_per_cluster + $rpc;
	  my $uar = $reads_per_cluster + $rpc;
	  my $full_info = $chr.$SEPARATOR.$strand.$SEPARATOR.$uar;
	  
	  $clusters->insert($full_info, $cluster_start,$prev_nc);
	  
	  $new_cluster = 1;
	  $reads_per_cluster = 0;
	  %subclusters = ();
	  $counter = 0;
	  
	}
	
	if($new_cluster){
	  $cluster_start = $nc;
	  $new_cluster = 0;
	  %subclusters = ();
	  $counter = 0;
	  $reads_per_cluster = 0;
	}
	$prev_nc = $nc;
	$flag = 1;
	
	if($dens{$chr}{$strand}{$nc} > $reads_per_cluster){
	  $reads_per_cluster = $dens{$chr}{$strand}{$nc};
        }
	
      }
    }
  }
  #Store the last cluster to the hash.
  if(%subclusters){
  
    foreach my $counter (keys %subclusters){
      $rpc = $rpc + $subclusters{$counter};
    }
    
    my $uar = $reads_per_cluster + $rpc;
    my $full_info = $chr.$SEPARATOR.$strand.$SEPARATOR.$uar;
    $uar_clusters{$chr}{$strand}{$cluster_start}{$prev_nc} = $reads_per_cluster + $rpc;
  }
  
  open(my $cl, ">", $clusters_output);
  
  print $cl "Chromosome\tStrand\tStart\tStop\tCluster Length\tCount\n";
  
  my $all_cluster_count = 0;
  my $cluster_count = 0;
  foreach my $chr (sort {
    my @aa = $a =~ /^([A-Za-z]*)(\d*)/;
    my @bb = $b =~ /^([A-Za-z]*)(\d*)/;
    lc $aa[0] cmp lc $bb[0] or $aa[1] <=> $bb[1]; } keys %uar_clusters){
    foreach my $strand (sort keys $uar_clusters{$chr}){
      foreach my $cluster_start (sort {$a <=> $b} keys $uar_clusters{$chr}{$strand}) {
	foreach my $cluster_stop (sort {$a <=> $b} keys $uar_clusters{$chr}{$strand}{$cluster_start}) {
	  if($uar_clusters{$chr}{$strand}{$cluster_start}{$cluster_stop} >= $clust_dens){
	    my $cluster_length = $cluster_stop - $cluster_start + 1;
	    print $cl "$chr\t$strand\t$cluster_start\t$cluster_stop\t$cluster_length\t$uar_clusters{$chr}{$strand}{$cluster_start}{$cluster_stop}\n";
	  }
	}
      }
    }
  }
}

#Alignment against transcriptome for a) reads that exceed the MaxMultiLoci parameter b) unaligned to the genome reads.
sub align_against_transcriptome {

my $input = shift; 
my $trans_unaligned = shift;
my $mismatches = shift;
my $trans_aligned = shift;
my $create_index;

  if (-e $input){
    my @files;
    my %transcripts_temp;
    my $bowtie_trans_index = check_index($bowtie_trans_index);
    if($bowtie_trans_index eq $NOT_SPECIFIED){ #Creating transcriptome index file inside the transcripts directory.
	my @path = split('/',$annotation_file);
	my $transcripts_fasta = $RealBin.$slash.$bowtie_trans_index_dir.$slash.$path[$#path].'.fa';
	
	$bowtie_trans_index = $RealBin.$slash.$bowtie_trans_index_dir.$slash.$path[$#path];
	open(TRANSCRIPTS, ">", $transcripts_fasta); #Fasta file that will contain transcript sequences.
	my $start_loci;
	my $stop_loci;
	  
	foreach my $chromosome (keys %gtf){
	
	foreach my $strand (keys $gtf{$chromosome}){
	  foreach my $start (keys $gtf{$chromosome}{$strand}){
	    foreach my $stop (keys $gtf{$chromosome}{$strand}{$start}){
	      my @elements = split($SEPARATOR,$gtf{$chromosome}{$strand}{$start}{$stop});
	      $transcripts_temp{$elements[2]} = $gtf{$chromosome}{$strand}{$start}{$stop}; 
	      $start_loci = $start;
	      $stop_loci = $stop;
	      my $biotype = $elements[1];
	      if($biotype eq 'miRNA'){
		$start_loci = $start-5;
		$stop_loci  = $stop+5;
	      }
	      my $output = `samtools faidx $reference_genome $chromosome:$start_loci-$stop_loci 2> /dev/null`;
	      my @array = split('\n',$output);
	      my $sequence = "";
	      my $first=0;
	      foreach my $element (@array){
		if($first){
		  $sequence=$sequence.$element;
		}
		  $first=1;
	      }
		if($sequence ne "" && $sequence !~ /N/){
		  $sequence =~ s/^\s+|\s+$//g; # Removing leading and trailing white spaces.
		  print TRANSCRIPTS ">$elements[2]\n";
		  print TRANSCRIPTS "$sequence\n";
		}
	      }
	    }
	  }
	}
      close TRANSCRIPTS;
      if(!-s $transcripts_fasta){ #Transcripts fasta is empty. 
	 `rm $transcripts_fasta`;
	 return;
      }
      	print STDERR "
Building transcriptome index in transcripts directory ...
";
	
      `$RealBin/bowtie-1.0.1/bowtie-build --quiet $transcripts_fasta $bowtie_trans_index`; #Build transcriptome idex with bowtie.
      
       print "Created index: $bowtie_trans_index\n";
    }
      my $file_format = file_format($input);
      #Align reads that exceed MaxMultiLoci against the transcriptome
      if($file_format eq $fasta_format){
	$bowtie_call = "$path_to_bowtie --quiet -f -v $mismatches --best --strata --all -p $aln_cores -S $bowtie_trans_index $input $trans_aligned --un $trans_unaligned";  ## always -f because of read condensation process
      }else{
	$bowtie_call = "$path_to_bowtie --quiet -v $mismatches --best --strata --all -p $aln_cores -S $bowtie_trans_index $input $trans_aligned --un $trans_unaligned";  ## always -f because of read condensation process
      }
      system($bowtie_call);
      #Sort sam file based on 1st column.
      `sort -T $output_path -k 1,1 $trans_aligned`;
      open(my $TRANS_ALIGNED_FILE, "<", $trans_aligned) or die;
      my %trans_aligned_reads;
      my $prev_id;
      my $read_id;
      my $first_time = 1;
     
      my $transcript_name;
      my $score;
      my $sequence;
      my $prev_sequence;
      my $quality;
      my $tag1;
      my $tag2;
      my $tag3;
      my $tag4;
           
      while (my $line = <$TRANS_ALIGNED_FILE>){
        
	if($line !~ /^@/){ #Avoid reading SAM header lines.
	  my @array = split('\t',$line);
	  $read_id = $array[0];
	  $transcript_name = $array[2];
	  my $score = $array[3];
	  $sequence = ();
	  $tag1 = $array[5];
	  $sequence = $array[9];
	  $quality = $array[10];
	  $tag2 = $array[11];
	  $tag3 = $array[12];
	  $tag4 = $array[13];

	  if($first_time and $transcript_name ne '*'){ $prev_id = $read_id; $first_time = 0; }

	  #print "PREV ID $prev_id READ ID $read_id\n";
	  if($transcript_name ne '*'){
	    #print "LINE: $line\n";
	    if( $prev_id ne $read_id ){ #New transcript.
	      #Select n transcripts with the highest alignment scores as obtained from alignment against the transcriptome.
	      my $tmp_hash = n_best_transcripts(\%trans_aligned_reads, $prev_id);
	      store_transcript($tmp_hash, $prev_id, $prev_sequence);
	      %trans_aligned_reads = ();
	    }
	    $prev_id = $read_id;
	    $prev_sequence = $sequence;
	    my $trans_key = $read_id.$SEPARATOR.$sequence.$SEPARATOR.$quality.$SEPARATOR.$tag1.$SEPARATOR.$tag2.$SEPARATOR.$tag3.$SEPARATOR.$tag4;
	    
	    $trans_aligned_reads{$score}{$transcript_name} = $trans_key;
	  }
	}
      }
      # Process the last lines of the file.
      my $tmp_hash = n_best_transcripts(\%trans_aligned_reads, $prev_id);
      store_transcript($tmp_hash, $prev_id, $prev_sequence);   
  }
}

#Select n best transcripts as obtained from the alignment to the transcriptome.
#All transcripts with the same score are considered as 1 out of n.
sub n_best_transcripts {

my $trans_aligned_reads = shift;
my $read_id = shift;
my %tmp_hash;
my $count = 0;
my $first = 1;
my $prev_alignment_score;
      
   foreach my $alignment_score (sort {$a <=> $b} keys %{$trans_aligned_reads}){
    foreach my $transcript_name (sort keys $$trans_aligned_reads{$alignment_score}){  
      
	my $transcript_info = $$trans_aligned_reads{$alignment_score}{$transcript_name};
	#Select n transcripts with the highest score as obtained from the aligner.
	$transcript_name =~ s/^\s+|\s+$//g;
	if($first){ $prev_alignment_score = $alignment_score; $first = 0; $count++; }
	  if($alignment_score ne $prev_alignment_score) # Increase count only for not equal alignment scores.
	  {
	    $count++;
	  }
	$tmp_hash{$alignment_score}{$transcript_name} = $transcript_info;
	if($count eq $n_best_transcripts){ #Process n best transcript scores and select the one with the 
	  return(\%tmp_hash);
	}
	$prev_alignment_score = $alignment_score;
    }
   }
   return(\%tmp_hash);
}

#Store n best transcripts for selected reads in a proper global structure.
sub store_transcript {

  my $tmp_hash = shift;
  my $read_id = shift;
  my $sequence = shift;

  #Check for existing transcripts and the assigned to them counts.
  foreach my $score ( sort {$a <=> $b} keys %{$tmp_hash} ){
    foreach my $transcript_name (keys $$tmp_hash{$score}){
      my $key = $transcript_name{$transcript_name};
 
      my $info = $$tmp_hash{$score}{$transcript_name};
      $info =~ s/^\s+|\s+$//g;
	#Check whether a specific transcript was already included in previous steps.
	if(exists $transcripts{$key}){ 
	  foreach my $type (keys $transcripts{$key}){
	    #Checking for assigned transcript counts.
	    $transcripts_transcriptome{$read_id}{$sequence}{$key}{$info}{$score} += $transcripts{$key}{$type};	   
	  }
	}else{ #If there is not existing assigned transcript, store to the hash.
	  $transcripts_transcriptome{$read_id}{$sequence}{$key}{$info}{$score}++;
	}
    }
  }
}

#Store transcripts from transcriptome alignment to transcripts hash.
sub store_transcripts { 
  #Add transcripts from transcriptome alignment
  my %tmp;
  my $sum = 0;
  my $nr_keys = keys %transcripts_transcriptome;
  my $trans_count;
  foreach my $read_id (sort keys %transcripts_transcriptome){
  
    $trans_count = 0;
    my $mirna_indicator = 0;
    my %tmp_tmp;
    my $min_align_score = 1000000;
    my $existing_trans_count_counter = 0;
    my $trans_flag = 0;
    foreach my $sequence (sort keys $transcripts_transcriptome{$read_id}){
      foreach my $key (sort keys $transcripts_transcriptome{$read_id}{$sequence}){
	foreach my $info (keys $transcripts_transcriptome{$read_id}{$sequence}{$key}){	
	  foreach my $score (keys $transcripts_transcriptome{$read_id}{$sequence}{$key}{$info}){	
	  $sum += $transcripts_transcriptome{$read_id}{$sequence}{$key}{$info}{$score};	  
	  $tmp{$read_id}{$sequence}{$key}{$info} = $transcripts_transcriptome{$read_id}{$sequence}{$key}{$info}{$score};
	  if($min_align_score >= $score){#If miRNA is present among the transcripts from alignment, the final transcript
	  #that will be kept is the one with the best alignment score.
	    $min_align_score = $score;
	  }
	  #How many reads correspond to that transcript from previous calculations.
	  my $existing_trans_count = $transcripts_transcriptome{$read_id}{$sequence}{$key}{$info}{$score};
	  $tmp_tmp{$existing_trans_count}{$score}{$read_id}{$key}{$info}{$sequence} = $existing_trans_count;
	  $trans_count++;
	  
	  if($key =~ /miRNA/){
	    $mirna_indicator = 1;
	  }
	  if($existing_trans_count > 1){
	    $trans_flag = 1;
	    $existing_trans_count_counter++;
	  }
	 }
	}
      }
    }
    
    my $counter = 0;
    my $alt_trans = $EMPTY;
    my $alt_flag = 0;
    my %a;
    my $glob_seq;
    my $glob_info;
    FOO: {	
    foreach my $trans_count (sort {$b <=> $a} keys %tmp_tmp){
      foreach my $score (keys $tmp_tmp{$trans_count}){
	foreach my $read_id (keys $tmp_tmp{$trans_count}{$score}){

	      foreach my $key (sort keys $tmp_tmp{$trans_count}{$score}{$read_id}){
	      
		 foreach my $info (keys $tmp_tmp{$trans_count}{$score}{$read_id}{$key}) {
		 
		  foreach my $sequence (keys $tmp_tmp{$trans_count}{$score}{$read_id}{$key}{$info}){
		
		      $glob_seq = $sequence;
		      $glob_info = $info;

		      if($trans_flag){# Check whether for reads aligned to the transcriptome there are
		      #already assigned transcripts from previous analysis
			$counter++;
			$a{$read_id}{$key}{$info}{$sequence} = $trans_count;
		      }elsif($mirna_indicator){# If miRNA transcripts are present in the transcripts then chose the best alignment score.
			if($min_align_score eq $score){
			  $a{$read_id}{$key}{$info}{$sequence} = $trans_count;
			  last FOO;
			}
		      #If no miRNA transcripts present and no already assigned reads
		      #to transcripts.
		      }else{
			if($alt_trans eq $EMPTY){
			  $alt_trans = $key;
			}else{
			  $alt_trans = $alt_trans.$ALTERNATIVE_TRANS_SEPARATOR.$key;
			}		  
			$alt_flag = 1;
		      }
		      #If alt. transcripts for which previous transcript count exists is reached.
		      if($counter eq $existing_trans_count_counter and $counter ne 0){			  
			last FOO;
		      }
		    }
	    }
	  }
	}
      }
    }
    }
    if($alt_flag){    
      #Orer alternative transcripts according to the transcript name.
      my @altNames = split($ALTERNATIVE_TRANS_SEPARATOR,$alt_trans);
      my %alphabeticOrderTrans;
      foreach my $alt_name (@altNames){
	my @transNames = split($SEPARATOR,$alt_name);
	my $transName = $transNames[4];
	$alphabeticOrderTrans{$transName} = $alt_name;
      }
      my $ordered_alt_trans = $EMPTY;
      foreach my $ordered_key (sort keys %alphabeticOrderTrans){
	if($ordered_alt_trans eq $EMPTY){
	  $ordered_alt_trans = $alphabeticOrderTrans{$ordered_key};
	}else{
	  $ordered_alt_trans = $ordered_alt_trans.$ALTERNATIVE_TRANS_SEPARATOR.$alphabeticOrderTrans{$ordered_key};
	}	
      }
      $a{$read_id}{$ordered_alt_trans}{$glob_info}{$glob_seq} = $ordered_alt_trans;

    }

    foreach my $read_id (keys %a){    
	foreach my $key (sort keys $a{$read_id}){
	  foreach my $info (keys $a{$read_id}{$key}){
	  foreach my $sequence (sort keys $a{$read_id}{$key}{$info}){
	    my @trans_key;
	    my @trans_info;
	    my $i=0;
	    my $j=0;
	    my $alt_trans_count;
	    

	  if($collapsed){ #If collapse reads setting is activated.
	    
	    #Case of uniquely aligned read without previously existing matching annotation.
	    if( exists $no_indication_tag{$read_id} and $no_indication_tag{$read_id}{'counter'} eq 1 ){
		$transcripts{$key}{$unique_type} += $read_tags{$read_id};
		detect_isomir($key, create_info($key, $sequence) ,$read_tags{$read_id});
	     }else{
		$transcripts{$key}{$non_unique_type} += $read_tags{$read_id};
		detect_isomir($key, create_info($key, $sequence) ,$read_tags{$read_id});
	    }
	    
	  }else{
	    if( exists $no_indication_tag{$read_id} and $no_indication_tag{$read_id}{'counter'} eq 1 ){
		$transcripts{$key}{$unique_type}++;
		detect_isomir($key, create_info($key, $sequence) , 1);
	    }else{
		$transcripts{$key}{$non_unique_type}++;
		detect_isomir($key, create_info($key, $sequence) , 1);
	    }
	  }
	 }
	}
      }
    }
    %tmp = ();
    $sum = 0;
  } 
}

#Helper functions for creation info of read alignment.
sub create_info {

  my $key = shift;
  my $sequence = shift;

  my @arr = split($SEPARATOR, $key);
  my $chr = $arr[0];
  my $biotype = $arr[1];
  my $id = $arr[2];
  my $strand = $arr[3];
  my $tr_name = $arr[4];
  my $start_pos = $arr[5];
  my $stop_pos = $arr[6];
  my $info = $chr.$SEPARATOR.$strand.$SEPARATOR.$start_pos.$SEPARATOR.$stop_pos.$SEPARATOR.$sequence.$SEPARATOR.$sequence;

  return($info);

}

sub split_info {

  my $info = shift;
  
  my @info_elements = split($SEPARATOR, $info);
  my $read_id = $info_elements[0];
  my $chromosome = $info_elements[1];
  my $strand = $info_elements[2];
  my $start_pos = $info_elements[3];
  my $stop_pos = $info_elements[4];
  my $read = $info_elements[5];
  my $quality = $info_elements[6];
  my $tag1 = $info_elements[7];
  my $tag2 = $info_elements[8];
  my $tag3 = $info_elements[9];
  my $tag4 = $info_elements[10];
  my $multimap_loci = $info_elements[11];
 
  return ($read_id, $chromosome, $strand, $start_pos, $stop_pos, $read, $quality, $tag1, $tag2, $tag3, $tag4, $multimap_loci);
  
}

sub generate_info {

my $line = shift;
my $read_id = shift;

my @sam_elements = split('\t', $line);
	my $strand;
	if($strand_specific){
	  if($sam_elements[1] eq '16'){
	    $strand = '-';
	  }elsif($sam_elements[1] eq '0'){
	    $strand = '+';
	  }
	}else{
	  $strand = $NA;
	}
	my $chromosome = $sam_elements[2];
	my $start_pos = $sam_elements[3];
	my $read_length = length($sam_elements[9]);
	my $stop_pos = $start_pos + $read_length -1;
	my $read = $sam_elements[9];
 	my $quality = $sam_elements[10];
 	
 	my $tag1 = $sam_elements[5];
 	my $tag2 = $sam_elements[11];
 	my $tag3 = $sam_elements[12];
 	my $tag4 = $sam_elements[13];
	my $info = $read_id.$SEPARATOR.$chromosome.$SEPARATOR.$strand.$SEPARATOR.$start_pos.$SEPARATOR.$stop_pos.$SEPARATOR.$read.$SEPARATOR.$quality.$SEPARATOR.$tag1.$SEPARATOR.$tag2.$SEPARATOR.$tag3.$SEPARATOR.$tag4;
	return $info;

}

#Check the input file format, fasta or fastq.
sub file_format {
  
  my $file_format;
  my $input_file = shift;
  
  my $first_line = `head -1 $input_file`;
  
  if($first_line =~ /^>/){
    $file_format = $fasta_format;
  }else{
    $file_format = $fastq_format;
  }
   return $file_format;
   
}

#Quantify reads.
sub quantify {

my $transcripts = shift;

open(my $TMP_SAM, "<", $tmp_sam);

my $prev_ead_id = '';
my $continue = 1;
my $reads_non_accordance = 0;
my $tot_reads = 0;
my $no_indication = 1;
my $only_annotation = 1;
my $only_unique = 1;
my %hash;
my $prev_read_id;
my $counter = 0;
my %tmp_hash = ();

while(my $line = <$TMP_SAM>){

  chomp $line;
  
  my ($read_id, $chromosome, $strand, $start_pos, $stop_pos, $read, $quality, $tag1, $tag2, $tag3, $tag4, $multimap) = split_info($line);
  if($counter < $multimap){
    my $info = $chromosome.$SEPARATOR.$strand.$SEPARATOR.$start_pos.$SEPARATOR.$stop_pos.$SEPARATOR.$read.$SEPARATOR.$quality.$SEPARATOR.$tag1.$SEPARATOR.$tag2.$SEPARATOR.$tag3.$SEPARATOR.$tag4;
    my $read_length = $stop_pos - $start_pos + 1;
    $tmp_hash{$chromosome}{$strand}{$start_pos}{$stop_pos} = $info;
    $counter++;  
  }
  
  my %multimaps;
  
  if($counter eq $multimap){ #Reached all the multimapping regions for one read.
  
    my $indication = 0;
    my %score = ();
    
    my $uniques_flag;
    my $annotation_flag;
    
    foreach my $chromosome (keys %tmp_hash){
      foreach my $strand (keys $tmp_hash{$chromosome}){
	foreach my $start_pos (keys $tmp_hash{$chromosome}{$strand}){
	  foreach my $stop_pos (keys $tmp_hash{$chromosome}{$strand}{$start_pos}){
	  
	    if(!exists $score{$tmp_hash{$chromosome}{$strand}{$start_pos}{$stop_pos}}){
	      $score{$tmp_hash{$chromosome}{$strand}{$start_pos}{$stop_pos}} = 0;
	    }
	   
	    my $read_length = $stop_pos - $start_pos + 1;
	 	   
	    $uniques_flag = 0;

	    for (my $i = $start_pos - $multimap_range; $i <= $start_pos + $read_length + $multimap_range - 1; $i++){

	      my $key = $chromosome . $SEPARATOR . $strand . $SEPARATOR . $i;
	      if(exists $densities{$key}){
	        $indication = 1;
	        $uniques_flag = 1;
		if($i >= $start_pos and $i <= $stop_pos){
		  $score{$tmp_hash{$chromosome}{$strand}{$start_pos}{$stop_pos}} += $densities{$key};
		}elsif($i >= $start_pos - $multimap_range and $i < $start_pos){ #Downstram		  
		  $score{$tmp_hash{$chromosome}{$strand}{$start_pos}{$stop_pos}} += $densities{$key}*$range_downstream[($start_pos-$i-1)];
		}else{ #Upstream
		  my $tt = $i-$stop_pos;
		  $score{$tmp_hash{$chromosome}{$strand}{$start_pos}{$stop_pos}} += $densities{$key}*$range_upstream[($i-$stop_pos-1)];
		}
	      }
	    }

	    my $results = $annotation->fetch($start_pos, $stop_pos);
	    my %annotation_tmp;
	    $annotation_flag = 0;
	    my $strand_reg = "\\".$strand;
	    for ( my $i = 0; $i <= $#{ $results }; $i++ )
	    {
	      if($results->[$i] =~ /^$chromosome$SEPARATOR$strand_reg/){ 
		$indication = 1;
		$annotation_flag = 1;
		$annotation_tmp{$start_pos}{$stop_pos}{$read_length}{$results->[$i]} = $results->[$i];
	      }
	    }
	    my $best_fitted_transcript;
	    my $min_ratio;
	    if($annotation_flag){ #Select the best fitted transcript for a specific read. 
	      $best_fitted_transcript = best_fitted_transcript(\%annotation_tmp);
	    }else{
	      $best_fitted_transcript = $no_annotation_flag;
	    }
	    $multimaps{$tmp_hash{$chromosome}{$strand}{$start_pos}{$stop_pos}}{'score'} =  $score{$tmp_hash{$chromosome}{$strand}{$start_pos}{$stop_pos}};
	    $multimaps{$tmp_hash{$chromosome}{$strand}{$start_pos}{$stop_pos}}{'annotation'} = $best_fitted_transcript;	    	    
	  }
	  %score = ();
	}
	%score = ();
      }
      %score = ();
    }
    if($indication eq 0){ #No unique reads or annotation was found for those multimaps.
     foreach my $chromosome (keys %tmp_hash){
      foreach my $strand (keys $tmp_hash{$chromosome}){
	foreach my $start_pos (keys $tmp_hash{$chromosome}{$strand}){
	  foreach my $stop_pos (keys $tmp_hash{$chromosome}{$strand}{$start_pos}){
		my $info = $tmp_hash{$chromosome}{$strand}{$start_pos}{$stop_pos};
		print $NO_INDICATION_FILE "$read_id$SEPARATOR$info$SEPARATOR$multimap\n";
	  }
	}
      }
     }
    }
    %tmp_hash = ();
    %score = ();
    $counter = 0;
    $tot_reads++;
    assign(\%multimaps, \%transcripts, \%densities, $read_id);
    %multimaps = ();
  }  
}

}

#Process mapped reads and create temp file with aligned reads and their counts.
sub reads_process {

my($SAM_FILE,$uniquely_aligned_tree, $uniquely_aligned_count, $densities, $transcripts) = @_;

my %read_counts; 
my $strand;
my $chromosome;
my $start_pos;
my $stop_pos;
my %reads_ids;

my $lines_counter = 0;
open(SAM_FILE, "<", $SAM_FILE);
open(my $TMP_SAM, ">", $tmp_sam);

my $first_line = 1;
my $prev_read_id;
my $read_id;
my $count = 0;
my %tmp;
my $prev_line;

  while (my $line = <SAM_FILE>) {
  
      $lines_counter++;
      chomp $line;
      if($line !~ /^@/) {
      my @array = split('\t',$line);
      if($array[1] ne 4 && $array[2] ne '*'){ #Count only mapped reads.
      
	$read_id = $array[0];

	$read_id =~ s/^\s+|\s+$//g; 
	if($first_line){$prev_read_id = $read_id; $prev_line = $line; $first_line = 0;}
	  if($prev_read_id eq $read_id){
	    $tmp{generate_info($line, $read_id)} = generate_info($line, $read_id);
	  }else{
	      my $mltp = keys %tmp;

	      if($mltp eq 1){
		$$uniquely_aligned_count++;
		densities_and_transcripts(\$densities, \%tmp, \%transcripts);
	      }else{
	        #Write multimaps into temp file 
	        #in Manatee like format.
		foreach my $info (keys %tmp){
		  print $TMP_SAM "$info$SEPARATOR$mltp\n";
		}
	      }
	       %tmp = ();
	       $tmp{generate_info($line, $read_id)} = generate_info($line, $read_id);
	       
	       $mapped_reads++;
	  }
	  $prev_read_id = $read_id;
	  $prev_line = $line;
	}else{
	  $global_unaligned_tag = $array[11];
	}
      }
  }
  my $mltp = keys %tmp;
  if($mltp eq 1){ #The last line of the input file.
    densities_and_transcripts(\$densities, \%tmp, \%transcripts);
    $$uniquely_aligned_count++;
  }else{
    foreach my $info (keys %tmp){
      print $TMP_SAM "$info$SEPARATOR$mltp\n";
    }
  }
  $mapped_reads++;
  
  close(SAM_FILE);
  close($TMP_SAM);
}

#Apply the relation for best fitted transcript.
sub best_fitted_transcript {

  my $tmp = shift;

  my $best_fitted_transcript = "";
  my $tmp_best_fitted_transcript = "";
  my $existing_best_fitted_transcript;
  my $ratio;
  my $min_ratio = 3000000000;
  my $mirna_flag = 0;
  my $test_flag = 0;
  #Check whether any of the alignments is associated with miRNA annotation.
   foreach my $start_pos (sort {$a <=> $b} keys %{$tmp}){
    foreach my $stop_pos (sort {$a <=> $b} keys $$tmp{$start_pos}){
      foreach my $read_length (sort {$a <=> $b} keys $$tmp{$start_pos}{$stop_pos}){
	foreach my $annotation (sort keys $$tmp{$start_pos}{$stop_pos}{$read_length}){
	  if($annotation ne $no_annotation_flag){
	    if($annotation =~ /miRNA/){
	      $mirna_flag = 1;
	    }
	  }
	}
      }
    }
   }

  foreach my $start_pos (sort {$a <=> $b} keys %{$tmp}){
    foreach my $stop_pos (sort {$a <=> $b} keys $$tmp{$start_pos}){
      foreach my $read_length (sort {$a <=> $b} keys $$tmp{$start_pos}{$stop_pos}){
	foreach my $annotation (sort keys $$tmp{$start_pos}{$stop_pos}{$read_length}){
	  if($annotation ne $no_annotation_flag){
	    if($mirna_flag){
	      my @arrays;
	      if($annotation =~ /$ALTERNATIVE_TRANS_SEPARATOR/){
		@arrays = split($ALTERNATIVE_TRANS_SEPARATOR,$annotation);
	      }else{
		$arrays[0] = $annotation;
	      }
	      foreach my $annotation_sep (@arrays){
		my @arr = split($SEPARATOR, $annotation_sep);
		my $start_tr = $arr[5];
		my $stop_tr = $arr[6];
		my $tr_len = $stop_tr - $start_tr + 1;
		my $start = ($start_tr, $start_pos)[$start_tr < $start_pos]; #Max
		my $end = ($stop_tr, $stop_pos)[$stop_tr > $stop_pos]; #Min
		my $cov = $end - $start + 1;
		$ratio = (($tr_len+$read_length)/2)/$cov;
		$ratio = abs(1-$ratio);
		if($ratio <= $min_ratio){
		  $best_fitted_transcript = $annotation_sep;
		  $min_ratio = $ratio;
		}
	      }
	     
	    }else{ #If no miRNA annotation was 
	      if($best_fitted_transcript eq ""){
		$best_fitted_transcript = $annotation;
	      }else{
		$best_fitted_transcript = $best_fitted_transcript.$ALTERNATIVE_TRANS_SEPARATOR.$annotation;
	      }
	    }
	  }
	}
      }
    }
  }
  return $best_fitted_transcript;

}

#Save isomir in a proper structure.
sub detect_isomir {

  my $annotation = shift;
  my $key = shift;
  my $times = shift;
  my @arr = split($SEPARATOR, $annotation);
  my $trans_name = $arr[4];
  @arr = split($SEPARATOR, $transcript_name{$trans_name});
  my $biotype = $arr[3];
  if($biotype eq "miRNA"){
    @arr = split($SEPARATOR, $key);
    my $sequence = $arr[4];
    $isomirs{$trans_name}{$sequence} += $times;
  }
}

#Main function for assignment of transcript counts in the designated structure.
sub assign {

   my $multimaps = shift;
   my $transcripts = shift;
   my $densities = shift;
   my $read_id = shift;
   my $score_sum = 0;
   my $no_annotation_sum = 0;
   my %tmp;
   my $annotation_unique = 0;
   my $unique = 0;
   my $annotation = 0;
   
   my %sam;
   
   foreach my $key (keys %{$multimaps}){

    if($$multimaps{$key}{'score'} ne 0 and $$multimaps{$key}{'annotation'} ne $no_annotation_flag){ #Region/regions with both annotation and uniques.
      $annotation_unique++;
    } 
     if($$multimaps{$key}{'score'} ne 0 and $$multimaps{$key}{'annotation'} eq $no_annotation_flag){ #Region/regions with only uniques.
      $unique++;
     }
    if($$multimaps{$key}{'score'} eq 0 and $$multimaps{$key}{'annotation'} ne $no_annotation_flag){ #Region/regions with only annotation.
      $annotation++;
    }
   }

   my $max_score = 0;
   my $max_score_key;
   if($unique > 0 and $annotation_unique eq 0 and $annotation eq 0){ #Multimappeed r
	foreach my $key (keys %{$multimaps}){
	  foreach my $type (keys $$multimaps{$key}){
 	    if($type eq 'score'){
 	    
 	    	if($$multimaps{$key}{'score'} >= $max_score ){
		  $max_score_key = $key;
		  $max_score = $$multimaps{$key}{'score'};
 	    	}   
 	    }
	  }
	}
   }
    my $sum_score = 0;
    if($annotation_unique eq 1){ #Unique annotation and UARs.
   
      foreach my $key (keys %{$multimaps}){
     
        # Select only the combination with annotation and UARs.
	if( ( $$multimaps{$key}{'annotation'} ne $no_annotation_flag ) and ($$multimaps{$key}{'score'} ne 0) ){
	    if($collapsed){
	      $$transcripts{$$multimaps{$key}{'annotation'}}{$non_unique_type} += $read_tags{$read_id};
	      detect_isomir($$multimaps{$key}{'annotation'}, $key, $read_tags{$read_id});
	    }else{
	      $$transcripts{$$multimaps{$key}{'annotation'}}{$non_unique_type}++;
	      detect_isomir($$multimaps{$key}{'annotation'}, $key, 1);
	    }
	}
      }
    }elsif($annotation_unique >= 1){ #Case where both annotation and unique reads are present and there is more than one such region.
       my $sum_score_count = 0;
       foreach my $key (keys %{$multimaps}){
     
	if( ( $$multimaps{$key}{'annotation'} ne $no_annotation_flag ) and ($$multimaps{$key}{'score'} ne 0) ){
	  $sum_score += $$multimaps{$key}{'score'};
	  $sum_score_count++;
	}
	
       }
       
       foreach my $key (keys %{$multimaps}){ 
        if( ( $$multimaps{$key}{'annotation'} ne $no_annotation_flag ) and ($$multimaps{$key}{'score'} ne 0) ){
	    if($collapsed){
	      $$transcripts{$$multimaps{$key}{'annotation'}}{$non_unique_type} += $read_tags{$read_id}*($$multimaps{$key}{'score'}/$sum_score); #Divide th
	      detect_isomir($$multimaps{$key}{'annotation'}, $key, $read_tags{$read_id}*($$multimaps{$key}{'score'}/$sum_score));
	   }else{
	      $$transcripts{$$multimaps{$key}{'annotation'}}{$non_unique_type} += ($$multimaps{$key}{'score'}/$sum_score); #Divide the
	      detect_isomir($$multimaps{$key}{'annotation'}, $key, $$multimaps{$key}{'score'}/$sum_score);
	    }	    
	}
       }
    }elsif($unique and $annotation){ #Prefer annotation instead of unique in case they do not match.
       my %annotation_tmp;
       my $mirna_flag = 0;
       my %tmp_hash;
       foreach my $key (keys %{$multimaps}){
       
        if($$multimaps{$key}{'annotation'} ne $no_annotation_flag){
	  my @tmp_arr1 = split($SEPARATOR, $key);
	  my $start_pos = $tmp_arr1[2];
	  my $stop_pos = $tmp_arr1[3];
	  my $read_length = $stop_pos - $start_pos + 1;
	  $annotation_tmp{$start_pos}{$stop_pos}{$read_length}{$$multimaps{$key}{'annotation'}} = $$multimaps{$key}{'annotation'}; 
	  my @tmp_arr2 = split($SEPARATOR, $$multimaps{$key}{'annotation'});
	  $tmp_hash{$tmp_arr2[4]} = $key;
	}
       }
       
       my $best_fitted_transcript = best_fitted_transcript(\%annotation_tmp);
       my @bft = split($ALTERNATIVE_TRANS_SEPARATOR, $best_fitted_transcript);
       my @first_annotation = split($SEPARATOR, $bft[0]);
       
       my $annotation_mirna;
       my $key_mirna; 
       foreach my $key (keys %{$multimaps}){
        if($$multimaps{$key}{'annotation'} ne $no_annotation_flag){
	    if($$multimaps{$key}{'annotation'} =~ /miRNA/ and $$multimaps{$key}{'annotation'} eq $best_fitted_transcript){
	      $mirna_flag = 1;
	      $annotation_mirna = $$multimaps{$key}{'annotation'};
	      $key_mirna = $key;
	      last;
	    }
	}
       }
	if($collapsed){
	  $$transcripts{$best_fitted_transcript}{$non_unique_type} += $read_tags{$read_id};
	  if($mirna_flag){
	    detect_isomir($annotation_mirna, $key_mirna, $read_tags{$read_id});
	  }
	  
	}else{
	  $$transcripts{$best_fitted_transcript}{$non_unique_type}++;
	  if($mirna_flag){
	    detect_isomir($annotation_mirna, $key_mirna, 1);
	  }
	}
    }
    elsif(!$unique and $annotation){ #Only annotation available.
    
       my $mirna_flag = 0;
       my %annotation_tmp;
       
       my %hash;
       
       my %tmp_hash;
       
       foreach my $key (keys %{$multimaps}){

        my @tmp_arr1 = split($SEPARATOR, $key);
        my $start_pos = $tmp_arr1[2];
        my $stop_pos = $tmp_arr1[3];
        my $read_length = $stop_pos - $start_pos + 1;

	$annotation_tmp{$start_pos}{$stop_pos}{$read_length}{$$multimaps{$key}{'annotation'}} = $$multimaps{$key}{'annotation'};
	$hash{$$multimaps{$key}{'annotation'}} = $key;
	
	my @tmp_arr2 = split($SEPARATOR, $$multimaps{$key}{'annotation'});
	if($$multimaps{$key}{'annotation'} ne $no_annotation_flag){
	  $tmp_hash{$tmp_arr2[4]} = $key;
	}
	
       }
             
       my $best_fitted_transcript = best_fitted_transcript(\%annotation_tmp);
       my @bft = split($ALTERNATIVE_TRANS_SEPARATOR, $best_fitted_transcript);
       my @first_annotation = split($SEPARATOR, $bft[0]);
       
       my $annotation_mirna;
       my $key_mirna; 
       foreach my $key (keys %{$multimaps}){
        if($$multimaps{$key}{'annotation'} ne $no_annotation_flag){
	    if($$multimaps{$key}{'annotation'} =~ /miRNA/ and $$multimaps{$key}{'annotation'} eq $best_fitted_transcript){
	      $mirna_flag = 1;
	      $annotation_mirna = $$multimaps{$key}{'annotation'};
	      $key_mirna = $key;
	      last;
	    }
	}
       }
	if($collapsed){
	  $$transcripts{$best_fitted_transcript}{$non_unique_type} += $read_tags{$read_id};
	 if($mirna_flag){
	    detect_isomir($annotation_mirna, $key_mirna, $read_tags{$read_id});
	  }
	}else{	
	  $$transcripts{$best_fitted_transcript}{$non_unique_type}++;
	 if($mirna_flag){
	    detect_isomir($annotation_mirna, $key_mirna, 1);
	  }
	}
    }
}

#Create densities of uniquely aligned reads and assign uniquely aligned reads to available annotation.
sub densities_and_transcripts {

  my $densities = shift;
  my $tmp = shift;
  my $transcripts = shift;
  
  my $read_id;
  my $chromosome;
  my $strand;
  my $start_pos;
  my $stop_pos;
  my $read_length;
  
  foreach my $info (keys %{$tmp}){ #Always only one element because there is only one unique read.

    my @element = split($SEPARATOR, $info);
    
    $read_id = $element[0];
    $chromosome = $element[1];
    $strand = $element[2];
    $start_pos = $element[3];
    $stop_pos = $element[4];

    my $strand_reg = "\\".$strand;
    
    $read_length = $stop_pos - $start_pos + 1;
    
    my $results = $annotation->fetch($start_pos, $stop_pos);
   
    my %annotation_tmp;
   
    my $annotation_flag = 0; 
    for ( my $i = 0; $i <= $#{ $results }; $i++ )
    {
      if($results->[$i] =~ /^$chromosome$SEPARATOR$strand_reg/){ 
	$annotation_flag = 1;
	$annotation_tmp{$start_pos}{$stop_pos}{$read_length}{$results->[$i]} = $results->[$i];
      }
    }
    
    $info = $info.$SEPARATOR.'UAR';
    if($annotation_flag){

    #Store annotation for UARs
    my $best_fitted_transcript = best_fitted_transcript(\%annotation_tmp);
    
    if($best_fitted_transcript =~ /miRNA/){
      
      my @bft = split($SEPARATOR, $best_fitted_transcript);
      my @arr = split($SEPARATOR, $info);
      my $read_id = $arr[0];
      my $chromosome = $arr[1];
      my $strand = $arr[2];
      my $start = $arr[3];
      my $stop = $arr[4];
      my $sequence = $arr[5];
      my $quality = $arr[6];

      my $mirna_annotation = $chromosome.$SEPARATOR."miRNA".$SEPARATOR."".$SEPARATOR.$strand.$SEPARATOR.$bft[4];
      my $mirna_key = $chromosome.$SEPARATOR.$strand.$SEPARATOR.$start.$SEPARATOR.$stop.$SEPARATOR.$sequence.$SEPARATOR.$quality;

      if($collapsed){

	      detect_isomir($mirna_annotation, $mirna_key, $read_tags{$read_id});
      }else{
	      detect_isomir($mirna_annotation, $mirna_key, 1);
      }

    }

      #Collapsed reads
      if(exists $$transcripts{$best_fitted_transcript}{$unique_type}){
	if($collapsed){
	  $$transcripts{$best_fitted_transcript}{$unique_type} += $read_tags{$read_id};	  
	}else{
	  $$transcripts{$best_fitted_transcript}{$unique_type}++;
	}
      }else{
	if($collapsed){
	  $$transcripts{$best_fitted_transcript}{$unique_type} = $read_tags{$read_id};
	}else{
	  $$transcripts{$best_fitted_transcript}{$unique_type} = 1;
	}
      }
    }else{ #Store uniquely aligned reads without existing annotation in a file.
    
        my $key = $info.$SEPARATOR."1";
 	my @arr = split($SEPARATOR, $info);
 	my $sequence = $arr[5];
 	my $quality = $arr[6];
 	print $NO_INDICATION_FILE "$key\n";
 	
    }
  }
  
  for (my $i=0; $i < $read_length; $i++){
  
    my $nucleotide = $start_pos + $i;
    $nucleotide =~ s/^\s+|\s+$//g; 
    my $key =  "$chromosome" . $SEPARATOR . $strand . $SEPARATOR . "$nucleotide";

    if(exists $densities{$key}){
    
      if($collapsed){
	$densities{$key} += $read_tags{$read_id};
      }else{
	$densities{$key} += 1;
      }
    }else{
      if($collapsed){
	$densities{$key} = $read_tags{$read_id};
      }else{
	$densities{$key} = 1;
      }
    }
  }
}

#Display help options.
sub get_help {
print "\nManatee version $version

USAGE with configuration file
	
manatee -config <file> -i <file> -o <dir>
	
OR 

USAGE with input parameters
	
manatee [options] -i <file> -o <dir> -index <ebwt> -genome <file> -annotation <file>
	
-config <file>       : Path to configuration file.

-i <file>            : Path to pre-processed FASTQ or FASTA file.

-o <dir>             : Path to directory where the output will be stored.

-index <ebwt>        : Path and basename of the genome index to be searched. 
		       The basename is the name of any of the index files 
                       up to but not including the final 
                       .1.ebwt/.rev.1.ebwt/etc. 
	
-genome <file>       : Path to genome fa or fasta file. 
	
-annotation <file>   : Path to annotation file in GFF3/GTF format. File should contain 
		       non coding annotation. 
	
OPTIONS
   
-t_index <ebwt>      : Path and basename of the transcriptome index to be searched. 
                       The basename is the name of any of the index files 
                       up to but not including the final .1.ebwt/.rev.1.ebwt/etc. 
                       If left blank, Manatee will generate transcriptome 
                       index based on the non coding annotation present in provided
                       annotation file and will store that index within the trans-index 
                       directory.
	
-cores <int>         : Number of alignment cores (default: -cores 1)

-m <int>             : Max of multimapping loci, -m in bowtie execution. The mapping algorithm 
                       will be applied only for reads with multi-mapped loci less or equal than m. 
                       Reads with multimapped loci that exceed the -m will be aligned against 
                       transcriptome (default: -m 50).
                       
-mismatches <int>    : Maximun number of mismatches in genomic alignments (default: mismatches=1).
	
-s <yes/no>          : Strand specific mode of the algorithm (default -s yes).
	
-cd <int>            : Minimum number of unannotated read abundances per cluster (default: -cd 5).
	
-cdi <int>           : Clusters of unannotated reads will be merged if the distance between them 
                       is equal or less than cdi (default: -cdi 50).
                       
-collapse <yes/no>   : Collapse reads with the same genomic sequence. This setting 
	               reduces significantly the execution time. Possible values 
	               yes/no (default: -collapse yes).                      
	
";
}

#Print user input parameters.
sub print_user_input {
  print "\n";
  print "Manatee $version\n\n";
  print "---------- Input ----------\n\n";
  print "Bowtie index: $path_to_bowtie_index\n\n";
  print "Annotation: $annotation_file\n\n";
  print "Reference genome file: $reference_genome\n\n";
  if($bowtie_trans_index eq $NOT_SPECIFIED){
    if(check_index($bowtie_trans_index) eq $NOT_SPECIFIED){
      print "Transcriptome index: not specified, index will be generated based on provided annotation.\n\n";
    }else{
      print "Transcriptome index: not specified, index related to provided annotation file already exists in trans-index directory.\n\n";
    }
  }else{
      print "Transcriptome index: $bowtie_trans_index\n\n";
  }
  print "Alignment cores: $aln_cores\n\n";
  print "Bowtie -m: $m\n\n";
  print "Alignment mismatches: $mismatches\n\n";
  print "Input data file: $input_file\n\n";
  if($strand_specific){
    print "Strand specificity: yes\n\n";
  }else{
    print "Strand specificity: no\n\n";
  }
  print "--------------------------------------\n\n";
  print "Processing data ...\n\n";
}

#Check if proper bowtie index exists.
sub check_index {

      my($path) = @_;

      if($path eq $NOT_SPECIFIED){ #If transcriptome index is not specified, additional check in 
                                   #transcripts dir is performed.
	my @full_path = split('/',$annotation_file);
	my $check = $RealBin.$slash.$bowtie_trans_index_dir.$slash.$full_path[$#full_path];
	if( -s $check.'.1.ebwt' && 
	  -s $check.'.2.ebwt' &&
	  -s $check.'.3.ebwt' &&
	  -s $check.'.4.ebwt' &&
	  -s $check.'.rev.1.ebwt' &&
	  -s $check.'.rev.2.ebwt'){ 
	    my $bowtie_trans_index = $RealBin.$slash.$bowtie_trans_index_dir.$slash.$full_path[$#full_path];
	    return $bowtie_trans_index; #Transcriptome index related to provided anotation found in transcripts directory.
	}else{
	    return $NOT_SPECIFIED; #Transcriptome index undefined and not found in transciptome directory. Index will be generated based
	                           #on the provided annotation file.
	}
      }else{
	if( -s $path.'.1.ebwt' && 
	  -s $path.'.2.ebwt' &&
	  -s $path.'.3.ebwt' &&
	  -s $path.'.4.ebwt' &&
	  -s $path.'.rev.1.ebwt' &&
	  -s $path.'.rev.2.ebwt'){ #If defined bowtie genome index exists.
	    return $path;
	}else{
	    print "\nThe provided transcriptome index does not exist.
Please provide the correct index or let it blank 
so Manatee will create the it for you. \n\n";
	    exit;
	}
      }
}

#Store annotation file into hash structures.
sub process_annotation {

  my($ANNOTATION_FILE, $transcript_name, $annotation, $gtf) = @_; 
  if(open ANNOTATION_FILE, "<", $ANNOTATION_FILE) { #Open annotation file provided as input or configuration parameter
      while (my $line = <ANNOTATION_FILE>){
      #Process each line of the annotation file.
      chomp $line;

      if($line =~ /^##FASTA/){ return;} #FASTA section if present resides at the end of the GFF file
      if($line !~ /^#/){ #Don't read comments.
      
       	my @element = split('\t',$line);
 	my $chromosome = $element[0];
	if($chromosome =~ /[Cc]hr/){
	  $chromosome = substr($chromosome,3);
	}	   
		
 	my $start = $element[3];
        $start =~ s/^\s+|\s+$//g;
  	my $end = $element[4];
 	$end =~ s/^\s+|\s+$//g;
        my $strand;
        if($strand_specific){
	  $strand = $element[6];
        }else{
	  $strand = $NA;
        }
        $strand =~ s/^\s+|\s+$//g;
        
        my @attributes = split(';', $element[8]);
        my $gene_name;
        my $gene_biotype; 
        my $gene_id;
        
        foreach my $attribute (@attributes){
          my @subattributes;
	  if($attribute =~ /gene_name/){
	    if($attribute =~ /=/){
	      @subattributes = split('=', $attribute);
	    }else{
	      @subattributes = split(' ', $attribute);
	    }
	    if($subattributes[-1] =~ /"/){
	      $gene_name = substr($subattributes[-1],1,-1);
	    }else{
	      $gene_name = $subattributes[-1];
	    }
	  }
	  if($attribute =~ /gene_biotype/){	    
	    if($attribute =~ /=/){
	      @subattributes = split('=', $attribute);
	    }else{
	      @subattributes = split(' ', $attribute);
	    }
	    if($subattributes[-1] =~ /"/){
	      $gene_biotype = substr($subattributes[-1],1,-1);
	    }else{
	      $gene_biotype = $subattributes[-1];
	    } 
	  }
	  if($attribute =~ /gene_id/){
	    if($attribute =~ /=/){
	      @subattributes = split('=', $attribute);
	    }else{
	      @subattributes = split(' ', $attribute);
	    }
	    if($subattributes[-1] =~ /"/){
	      $gene_id = substr($subattributes[-1],1,-1);
	    }else{
	      $gene_id = $subattributes[-1];
	    } 
	  }
        }

	my $transcript_info = $gene_id.$SEPARATOR.$gene_biotype.$SEPARATOR.$gene_name;
	my $full_info = $chromosome.$SEPARATOR.$strand.$SEPARATOR.$transcript_info.$SEPARATOR.$start.$SEPARATOR.$end;
	$$annotation->insert($full_info, $start,$end);

	$$gtf{$chromosome}{$strand}{$start}{$end} = $transcript_info;

	$$transcript_name{$gene_name} = $full_info;
      }
    }
  }else{
    print "Could not open annotation file: $ANNOTATION_FILE\n";
    exit 10;
  }
}

#Split the input path to file name and path.
sub get_input_filename_base_path { 
  my($input_path, $file_name_base, $file_name_path)=@_;
  $input_path =~ s/^\s+|\s+$//g; # Removing leading and trailing white spaces.
  my @path_parts = split('/',$input_path);
  my $file_id;
  my $bam_file;
  my $sam_file;
  if($input_path =~ /.fa$/ || $input_path =~ /.fq$/){
    $$file_name_base = substr($path_parts[$#path_parts], 0, -3);
    $$file_name_path = substr($input_path, 0, -(3+length($$file_name_base)));
  }elsif($input_path =~ /.fasta$/ || $input_path =~ /.fastq$/){
    $$file_name_base = substr($path_parts[$#path_parts], 0, -6);
    $$file_name_path = substr($input_path, 0, -(6+length($$file_name_base)));
  }else{
    print "FATAL: Could not determine the format of the reads file $input_path .. Accepted extensions: .fa, .fasta, .fq, or .fastq\n";
    exit;
  }
}

#Process input fasta/fa file, merge identical reads, store all the reads to specific hashes.
sub process_input{
  my($input_file, $output_file,$collapsed_reads,$read_tags) = @_;
  open(OUTPUT, ">", $output_file) or die "Couldn't open output file for reading: $output_file\n\n";
  open(INPUT, "<", $input_file) or die "Couldn't open input file: $input_file\n\n";
  if($input_file_format eq $fastq_format){ # Fastq file
    my $line_nr = 1;
    my $counter = 1;
    my $sequence;
    my $quality;
    while(<INPUT>){
      chomp $_;
      $_ =~ s/^\s+|\s+$//g;
      if($line_nr % 4 eq 2){
        $sequence = $_
      }elsif($line_nr % 4 eq 0){
	$quality = $_;
	$quality = 'A' x length($sequence);
	if(exists $collapsed_reads{$sequence}{$quality}){
	  $collapsed_reads{$sequence}{$quality}++;
	}else{
	  $collapsed_reads{$sequence}{$quality} = 1;
	}
      }
      $line_nr++;
    }
    foreach my $sequence (keys %collapsed_reads){
      foreach my $quality (keys $collapsed_reads{$sequence}){
	  my $read_name = "Manatee"."_".$counter;
	  print OUTPUT "\@$read_name\n";
	  print OUTPUT "$sequence\n";
	  print OUTPUT "+$read_name\n";
	  print OUTPUT "$quality\n";
	  $quality = 'A' x length($sequence);
	  $read_tags{$read_name} = $collapsed_reads{$sequence}{$quality};
	  $counter++;
      }
    }    
  }else{ # Fasta file
      my $line_nr = 1;
      my $counter = 1; 
      while(<INPUT>){
	chomp $_;
	$_ =~ s/^\s+|\s+$//g;
	if($line_nr % 2 eq 0){
	  if(exists $collapsed_reads{$_}){
	    $collapsed_reads{$_}++;
	  }else{
	    $collapsed_reads{$_} = 1;
	  }
	}
	$line_nr++;
      }
      foreach my $sequence (keys %collapsed_reads){
	  my $read_name = "Manatee"."_".$counter;
	  print OUTPUT ">$read_name\n";
	  print OUTPUT "$sequence\n";
	  $read_tags{$read_name} = $collapsed_reads{$sequence};#Nr of identical nucleotide sequences.
	  $counter++;
      }
 }
  close INPUT;
  close OUTPUT;
}

#Write estimated transcript counts to output file.
sub write_expressions {

  open(my $TRANSCRIPTS_COUNTS, ">", $transcripts_counts);
  my %tmp;
  my $i = 0;
  
  foreach my $transcript (sort keys %transcripts){  
    my @arr1 = split($ALTERNATIVE_TRANS_SEPARATOR, $transcript);
    my @arr2 = split($SEPARATOR, $arr1[0]);
    my $chromosome = $arr2[0];
    my $strand = $arr2[1];
    my $id = $arr2[2];
    my $biotype = $arr2[3];
    my $first_transcript = $arr2[4];
    my $start_pos = $arr2[5];
    my $stop_pos = $arr2[6];
    my $sum = 0;
    my $unique = 0;

    foreach my $type (sort keys $transcripts{$transcript}){
      if($type eq $unique_type){
	$unique = $transcripts{$transcript}{$type};
	$sum += $transcripts{$transcript}{$type};
      }else{
	$sum += $transcripts{$transcript}{$type};
      }
    }
    
    shift @arr1;
    my $all_transcripts = $EMPTY; #Add the first transcript. 
    foreach my $element (@arr1){
      my @arr = split($SEPARATOR, $element);
        if($all_transcripts !~ /$arr[4]/ and $arr[4] ne $first_transcript){ #Check for double transcripts.
	  if($all_transcripts eq $EMPTY){
	    $all_transcripts = $arr[4]
	  }else{
	    $all_transcripts = $all_transcripts.$ALTERNATIVE_TRANS_SEPARATOR_FILE.$arr[4];
	  }	  
	}
    }
    
    my $rpm;
    if($mapped_reads > 0){
      $rpm = ($sum*(10^6))/($mapped_reads); #Reads per million calculation.
    }else{
      $rpm = $NA;
    }
    
    $tmp{$first_transcript}{$all_transcripts}{'sum'} += $sum;
    $tmp{$first_transcript}{$all_transcripts}{'rpm'} += $rpm;
    $tmp{$first_transcript}{$all_transcripts}{'unique'} += $unique;
    #The biotype of the first transcript
    $tmp{$first_transcript}{$all_transcripts}{'biotype'} = $biotype;
    
  }
  
  my %common;
  #Extract common with the highest read counts.
  foreach my $first_transcript (sort keys %tmp){
    my %alternative = ();
    foreach my $all_transcripts (keys $tmp{$first_transcript}){
    
        $alternative{$all_transcripts}{'sum'} = $tmp{$first_transcript}{$all_transcripts}{'sum'};
        $alternative{$all_transcripts}{'rpm'} = $tmp{$first_transcript}{$all_transcripts}{'rpm'};
        $alternative{$all_transcripts}{'unique'} = $tmp{$first_transcript}{$all_transcripts}{'unique'};
        $alternative{$all_transcripts}{'biotype'} = $tmp{$first_transcript}{$all_transcripts}{'biotype'};
        
    }
    my $max_reads = 0;
    my $sum_reads = 0;
    my $sum_unique = 0;
    my $sum_rpm = 0;
    my $biotype;
    my $keep_trans = $EMPTY;
    foreach my $all_transcripts (keys %alternative){
      if($max_reads <= $alternative{$all_transcripts}{'sum'}){
	$keep_trans = $all_transcripts;
      }
      $sum_reads  += $alternative{$all_transcripts}{'sum'};
      $sum_rpm    += $alternative{$all_transcripts}{'rpm'};
      $sum_unique += $alternative{$all_transcripts}{'unique'};
      $biotype     = $alternative{$all_transcripts}{'biotype'};
    }
    $common{$first_transcript}{$keep_trans}{'sum'} = $sum_reads;
    $common{$first_transcript}{$keep_trans}{'rpm'} = $sum_rpm;
    $common{$first_transcript}{$keep_trans}{'unique'} = $sum_unique;
    $common{$first_transcript}{$keep_trans}{'biotype'} = $biotype;
  }
 
  print $TRANSCRIPTS_COUNTS "Transcript Name\tBiotype\tCount\tRPM\tUnique Reads\tOther possible transcripts\n";
  foreach my $first_transcript (sort keys %common){
    foreach my $keep_trans (keys $common{$first_transcript}){
  
      my $sum = $common{$first_transcript}{$keep_trans}{'sum'};
      my $rpm = $common{$first_transcript}{$keep_trans}{'rpm'};
      my $unique = $common{$first_transcript}{$keep_trans}{'unique'};
      my $biotype = $common{$first_transcript}{$keep_trans}{'biotype'};
      print $TRANSCRIPTS_COUNTS "$first_transcript\t$biotype\t";
      print $TRANSCRIPTS_COUNTS "$sum\t$rpm\t$unique\t$keep_trans\n";
      
    }
  }
}

#Remove all the temp files generated during the execution.
sub clean {
 
	 #Remove all temp files.
	 my $files_to_del_str = `ls $output_path`;
	 my @files_to_del = split('\n',$files_to_del_str);
	 for my $file (@files_to_del){
	  $file =~ s/^\s+|\s+$//g; # Removing leading and trailing white spaces.
	  if($file !~ /Manatee_isomirs.tsv|Manatee_counts.tsv|Manatee_clusters.tsv/){
	     my $ftr = $output_path.$file;
	    `rm $ftr`;
	  }
	 }
	 #Remove clusters file if empty.
	 my @nr_lines = `cat $clusters_output | wc -l`;
	 $nr_lines[0] =~ s/\n+$//g;
	 if($nr_lines[0] eq 1){
	  if(-f $clusters_output){
	    `rm $clusters_output`;
	  }
	 }
	 
 }