-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_access.txt
31 lines (28 loc) · 1022 Bytes
/
data_access.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# install sra toolkit in Gitpod
sudo apt-get install sra-toolkit
# configure SRA toolkit to save in current directory
vdb-config -i
# downloading data
# select an accession id from sra
# example ERR2090226 (bacterial genome)
# gather data using prefetch
mkdir sra_data
cd sra_data
prefetch ERR2090226 # creates a directory with .sra file in it
# generate reads from pre-fetched download
fastq-dump --split-files --gzip \
-X 10000 ERR2090226/ERR2090226.sra
# READ QC
# FASTQC
# install FASTQC
wget https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.12.1.zip
unzip fastqc_v0.12.1.zip
# fastq QC
/workspace/nyberman_ngs_batch2/FastQC/fastqc ERR2090226_1.fastq.gz ERR2090226_2.fastq.gz
# Read Trimming - Trimmomatic, fastp, cutadapt (to remove adapters)
# Install fastp
wget http://opengene.org/fastp/fastp
chmod a+x ./fastp
# trimming reads
/workspace/nyberman_ngs_batch2/fastp -i ERR2090226_1.fastq.gz -I ERR2090226_2.fastq.gz \
-o ERR2090226_1_trimmed.fastq.gz -O ERR2090226_2_trimmed.fastq.gz