-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhifiadaptfilt-stats.py
36 lines (29 loc) · 1.64 KB
/
hifiadaptfilt-stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
import csv
# Define the base directory where the samples are stored
base_dir = "/scratch/pawsey0812/lhuet/HIFI_ONLY" # Change this to the correct path
output_csv = "hifiadaptorfilt_stats_summary.csv"
# Open the output CSV file for writing
with open(output_csv, mode='w', newline='') as csvfile:
csv_writer = csv.writer(csvfile)
# Write the header
csv_writer.writerow(['sample', 'contam_reads'])
# Loop through each sample directory
for sample in os.listdir(base_dir):
sample_dir = os.path.join(base_dir, sample, '01-data-processing', 'hifiadaptfilt')
# Loop through the subdirectories in hifiadaptfilt
if os.path.isdir(sample_dir):
for root, dirs, files in os.walk(sample_dir):
for file in files:
if file.endswith('.stats'):
stats_file = os.path.join(root, file)
# Open the stats file and search for the line containing contaminated reads
with open(stats_file, 'r') as f:
for line in f:
if "Number of adapter contaminated ccs reads" in line:
# Extract the number of contaminated reads
contam_reads = line.split(":")[1].split()[0]
# Write the sample and contamination count to the CSV
csv_writer.writerow([sample, contam_reads])
break
print(f"Summary CSV file generated: {output_csv}")