Skip to content

Commit

Permalink
final touches email, fix stats sequence upload to parkour
Browse files Browse the repository at this point in the history
  • Loading branch information
WardDeb committed Jul 12, 2024
1 parent 7c8a249 commit f9b25cf
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 29 deletions.
58 changes: 33 additions & 25 deletions BRB/ET.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,17 @@ def getNReads(d):

def getOffSpeciesRate(d, organism = None) -> float:
"""
Parses
Parses kraken report for number of reads mapping to unexpected organisms
"""
fname = glob.glob("{}/*rep".format(d))[0]
if not os.path.exists(fname):
return 0
# match parkour org to kraken db organism/group
org_map = {
'Human (GRCh38)': 'humangrp',
'Human (GRCh37 / hg19)': 'humangrp',
'Mouse (GRCm38 / mm10)': 'mousegrp',
'Mouse (GRCm39)': 'mousegrp',
'mouse': 'mousegrp',
'human': 'humangrp',
'Escherichia phage Lambda':'lambdaphage',
'Caenorhabditis_elegans': 'c-elegans',
'lamprey': 'sea-lamprey',
Expand All @@ -52,6 +52,7 @@ def getOffSpeciesRate(d, organism = None) -> float:
'drosophila': 'flygrp',
}
if organism not in org_map:
log.info(f"getOffSpeciesRate - organism {organism} is not in the org_map!")
return 0
with open(fname) as f:
for line in f:
Expand Down Expand Up @@ -97,7 +98,20 @@ def DNA(config, outputDir, baseDict, sample2lib):
Add % mapped, % dupped, and insert size to baseDict. Filter it for those actually in the output
"""
# baseDict, sample2lib = getBaseStatistics(config, outputDir)

# If we have RELACS, the sample2lib won't match what we find here.
# We can re-parse the sampleSheet to upload actual statistics of the RELACS demuxed samples.
if Path(outputDir, 'RELACS_sampleSheet.txt').exists():
# RELACS is a problem for parkour (matching is in sampleID / barcode level).
# Just return a list of dicts with the previous info
m = []
for k, v in baseDict.items():
m.append({'barcode': k,
'reads_pf_sequenced': v[1],
'confident_reads': v[2],
'optical_duplicates': v[3]})
log.info(f"ET - DNA module detected RELACS. Returning {m}")
return m

# % Mapped
for fname in glob.glob("{}/Bowtie2/*.Bowtie2_summary.txt".format(outputDir)):
sampleName = os.path.basename(fname).split(".Bowtie2_summary")[0]
Expand All @@ -117,19 +131,19 @@ def DNA(config, outputDir, baseDict, sample2lib):
medInsertSize = insert_size_df.loc[insert_size_df["Unnamed: 0"]=="filtered_bam/"+sampleName+".filtered.bam"]
medInsertSize = medInsertSize["Frag. Len. Median"].values[0]
baseDict[sample2lib[sampleName]].append(int(medInsertSize))

log.info(f"ET - DNA module parsed {baseDict}")

# # Filter
outputDict = {k: v for k, v in baseDict.items() if len(v) == 8}
# Reformat into a matrix
m = []
for k, v in outputDict.items():
for k, v in baseDict.items():
m.append({'barcode': k,
'reads_pf_sequenced': v[1],
'confident_reads': v[2],
'optical_duplicates': v[3],
'dupped_reads': v[6],
'mapped_reads': v[5],
'insert_size': v[7]})
'dupped_reads': v[5],
'mapped_reads': v[4],
'insert_size': v[6]})
return m


Expand All @@ -139,8 +153,7 @@ def RNA(config, outputDir, baseDict, sample2lib):
Add % mapped to baseDict. Filter it for those actually in the output
"""
# baseDict, sample2lib = getBaseStatistics(config, outputDir)
# % Mapped

for fname in glob.glob("{}/STAR/*/*.Log.final.out".format(outputDir)):
f = open(fname)
tot = 0
Expand Down Expand Up @@ -173,21 +186,19 @@ def RNA(config, outputDir, baseDict, sample2lib):
baseDict[sample2lib[sampleName]].append(assigned_rate)



# Filter
outputDict = {k: v for k, v in baseDict.items() if len(v) == 10}
log.info(f"ET - RNA module parsed {baseDict}")
# Reformat into a matrix
m = []
for k, v in outputDict.items():
for k, v in baseDict.items():
m.append({'barcode': k,
'reads_pf_sequenced': v[1],
'confident_reads': v[2],
'optical_duplicates': v[3],
'mapped_reads': v[5],
'uniq_mapped': v[6],
'multi_mapped': v[7],
'dupped_reads': v[8],
'assigned_reads': v[9]})
'mapped_reads': v[4],
'uniq_mapped': v[5],
'multi_mapped': v[6],
'dupped_reads': v[7],
'assigned_reads': v[8]})
return m


Expand All @@ -210,9 +221,6 @@ def phoneHome(config, outputDir, pipeline, samples_tuples, organism, project, li
"""
samples_id = [row[0] for row in samples_tuples]
baseDict, sample2lib = getBaseStatistics(config, outputDir, samples_id, organism)

log.info("phoneHome: baseDict: {}, sample2lib: {}".format(baseDict, sample2lib))

msg = None
if pipeline == 'DNA':
msg = DNA(config, outputDir, baseDict, sample2lib)
Expand All @@ -226,7 +234,7 @@ def phoneHome(config, outputDir, pipeline, samples_tuples, organism, project, li
'confident_reads': v[2],
'optical_duplicates': v[3]})
msg = m

log.info(f"phoneHome: got msg = {msg}")
if msg is not None:
ret = sendToParkour(config, msg)
else:
Expand Down
1 change: 0 additions & 1 deletion BRB/PushButton.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,6 @@ def GetResults(config, project, libraries):
#hence the pacifier is applied on the project in each pipeline separately
outputDir, rv, sambaUpdate = globals()[pipeline](config, group, project, organism, libraryType, tuples)
if rv == 0:
#try:
msg = msg + [BRB.ET.phoneHome(config, outputDir, pipeline, tuples, organism, project, libraryType) + [sambaUpdate]]
log.info(f"Processed project {BRB.misc.pacifier(project)} with the {pipeline} pipeline. {libraryType}, {organism}")
else:
Expand Down
6 changes: 3 additions & 3 deletions BRB/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ def finishedEmail(config, msg):
if [i[4] for i in msg].count('success') == len(msg):
recipient = config.get("Email","deepSeq")
_html.add(div(
"Post-processing is ready, deepSeq's sambda drive is updated for at least one project.",
f"Post-processing is ready, Samba drive is updated for {[i[6] for i in msg].count(True)} project(s).",
br()
))

mailer['To'] = recipient
# Table
tabHead = ['Project', 'organism', 'libraryType', 'workflow', 'workflow_status', 'parkour_status', 'sambaUpdate']
message = tabulate(
message = _html.render() + '\n\n' + tabulate(
msg, tabHead, tablefmt="html", disable_numparse=True
)

Expand Down

0 comments on commit f9b25cf

Please sign in to comment.