forked from upendrabhattarai/Long_read_genome_assembly
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpurgehaplotigs.sh
28 lines (23 loc) · 1.05 KB
/
purgehaplotigs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/bin/bash -e
#SBATCH --nodes 1
#SBATCH --cpus-per-task 1
#SBATCH --ntasks 10
#SBATCH --partition=bigmem
#SBATCH --job-name purgehap.nem
#SBATCH --mem=50G
#SBATCH --time=12:00:00
#SBATCH --account=uoo02752
#SBATCH --output=%x_%j.out
#SBATCH --error=%x_%j.err
#SBATCH --mail-type=ALL
#SBATCH [email protected]
#SBATCH --hint=nomultithread
module load SAMtools/1.12-GCC-9.2.0
module load minimap2/2.20-GCC-9.2.0
minimap2 -t 10 -ax map-pb genome_assembly.fasta reads.fastq \
--secondary=no | samtools sort -m 5G -o aligned.bam -T tmp.ali
export PATH="/nesi/nobackup/uoo02752/.conda/envs/purge_haplotigs_env/bin:$PATH"
purge_haplotigs hist -b aligned.bam -g genome_assembly.fasta -t 10
# We didn’t see the two distinctive peaks on the histogram generated by purge_haplotigs, might be because of the not enough or uneven coverage so we will produce cov_stat.csv file with the command below and use it to purge
awk '{print $1",s,"}' genome_assembly.fasta.fai > cov_stat.csv
purge_haplotigs purge -g genome_assembly.fasta -c cov_stat.csv -b aligned.bam