Skip to content

Commit

Permalink
Generate cylinter_report.yml file. (#179)
Browse files Browse the repository at this point in the history
  • Loading branch information
gjbaker authored Mar 14, 2024
1 parent d377773 commit 30ab9a7
Show file tree
Hide file tree
Showing 23 changed files with 3,991 additions and 2,895 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

## An Interactive Image Segmentation Filter for Multiplex Microscopy.

CyLinter is quality control software for identifying and removing cell segmentation instances corrupted by optical and/or image-processing artifacts in multiplex microscopy images. The tool is user-guided and comprises a set of modular and extensible QC modules instantiated in a configurable [Python](https://www.python.org) Class object. Module results are cached to allow for dynamic restarts.
CyLinter is quality control software for identifying and removing cell segmentation instances corrupted by optical and/or image-processing artifacts in multiplex microscopy images. The tool is interactive and comprises a set of modular and extensible QC modules instantiated in a configurable [Python](https://www.python.org) Class object. Module results are cached to allow for progress bookmarking and dynamic restarts.

CyLinter development is led by [Greg Baker](https://github.com/gjbaker) at the [Laboratory of Systems Pharmacology](https://labsyspharm.org/), Harvard Medical School.

**Funding:** This work was supported by Ludwig Cancer Research and the Ludwig Center at Harvard and by NIH NCI grants U2C-CA233280 (Omic and Multidimensional Spatial Atlas of Metastatic Breast and Prostate Cancers) and U2C-CA233262 (Pre-cancer atlases of cutaneous and hematologic origin—PATCH Center) to Peter K. Sorger and Sandro Santagata as part of the Human Tumor Atlas Network. Development of computational methods and image processing software is supported by a Team Science Grant from the Gray Foundation, the Gates Foundation grant INV-027106, the David Liposarcoma Research Initiative, and the Emerson Collective.
**Funding:** This work was supported by the Ludwig Cancer Research and the Ludwig Center at Harvard (P.K.S., S.S.) and by NIH NCI grants U2C-CA233280, and U2C-CA233262 (P.K.S., S.S.). Development of computational methods and image processing software is supported by a Team Science Grant from the Gray Foundation (P.K.S., S.S.), the Gates Foundation grant INV-027106 (P.K.S.), the David Liposarcoma Research Initiative at Dana-Farber Cancer Institute supported by KBF Canada via the Rossy Foundation Fund (P.K.S., S.S.) and the Emerson Collective (P.K.S.). S.S. is supported by the BWH President’s Scholars Award.

**Instructions:** https://labsyspharm.github.io/cylinter/
**Project Website:** https://labsyspharm.github.io/cylinter/
17 changes: 4 additions & 13 deletions cylinter/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def __init__(self,
sampleStatuses=None,
sampleReplicates=None,
samplesToExclude=None,
counterstainChannel=None,
markersToExclude=None,

# selectROIs -
Expand Down Expand Up @@ -104,9 +105,7 @@ def __init__(self,
embeddingAlgorithmQC=None,
channelExclusionsClusteringQC=None,
samplesToRemoveClusteringQC=None,
fracForEmbeddingQC=None,
dimensionEmbeddingQC=None,
topMarkersQC=None,
percentDataPerChunk=None,
colormapAnnotationQC=None,
metricQC=None,
perplexityQC=None,
Expand Down Expand Up @@ -142,7 +141,6 @@ def __init__(self,
normalizeTissueCounts=None,
fracForEmbedding=None,
dimensionEmbedding=None,
topMarkers=None,
colormapAnnotationClustering=None,
colormapAnnotation=None,
perplexity=None,
Expand All @@ -163,14 +161,10 @@ def __init__(self,

# curateThumbnails —
numThumbnails=None,
topMarkersThumbnails=None,
windowSize=None,
segOutlines=None,
):

assert topMarkers in ['channels', 'clusters'], \
'Invalid input for topMarkers configuration parameter.'

self.inDir = inDir
self.outDir = outDir
self.startModule = startModule
Expand All @@ -180,6 +174,7 @@ def __init__(self,
self.sampleStatuses = sampleStatuses
self.sampleReplicates = sampleReplicates
self.samplesToExclude = samplesToExclude
self.counterstainChannel = counterstainChannel
self.markersToExclude = markersToExclude

self.delintMode = delintMode
Expand All @@ -203,9 +198,7 @@ def __init__(self,
self.embeddingAlgorithmQC = embeddingAlgorithmQC
self.channelExclusionsClusteringQC = channelExclusionsClusteringQC
self.samplesToRemoveClusteringQC = samplesToRemoveClusteringQC
self.fracForEmbeddingQC = fracForEmbeddingQC
self.dimensionEmbeddingQC = dimensionEmbeddingQC
self.topMarkersQC = topMarkersQC
self.percentDataPerChunk = percentDataPerChunk
self.colormapAnnotationQC = colormapAnnotationQC
self.metricQC = metricQC
self.perplexityQC = perplexityQC
Expand Down Expand Up @@ -237,7 +230,6 @@ def __init__(self,
self.normalizeTissueCounts = normalizeTissueCounts
self.fracForEmbedding = fracForEmbedding
self.dimensionEmbedding = dimensionEmbedding
self.topMarkers = topMarkers
self.colormapAnnotationClustering = colormapAnnotationClustering
self.perplexity = perplexity
self.earlyExaggeration = earlyExaggeration
Expand All @@ -255,7 +247,6 @@ def __init__(self,
self.FDRCorrection = FDRCorrection

self.numThumbnails = numThumbnails
self.topMarkersThumbnails = topMarkersThumbnails
self.windowSize = windowSize
self.segOutlines = segOutlines

Expand Down
11 changes: 4 additions & 7 deletions cylinter/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ def from_path(cls, path):
config.inDir = pathlib.Path(data['inDir']).resolve()
config.outDir = pathlib.Path(data['outDir']).resolve()
config._parse_sample_metadata(data['sampleMetadata'])
config.samplesToExclude = (data['samplesToExclude'])
config.markersToExclude = (data['markersToExclude'])
config.samplesToExclude = list(data['samplesToExclude'])
config.counterstainChannel = str(data['counterstainChannel'])
config.markersToExclude = list(data['markersToExclude'])

# CLASS MODULE CONFIGURATIONS

Expand Down Expand Up @@ -96,12 +97,9 @@ def from_path(cls, path):
data['samplesToRemoveClustering']
)
config.normalizeTissueCounts = bool(data['normalizeTissueCounts'])
config.fracForEmbeddingQC = float(data['fracForEmbeddingQC'])
config.percentDataPerChunk = float(data['percentDataPerChunk'])
config.fracForEmbedding = float(data['fracForEmbedding'])
config.dimensionEmbeddingQC = int(data['dimensionEmbeddingQC'])
config.dimensionEmbedding = int(data['dimensionEmbedding'])
config.topMarkersQC = str(data['topMarkersQC'])
config.topMarkers = str(data['topMarkers'])
config.colormapAnnotationQC = str(
data['colormapAnnotationQC'])
config.colormapAnnotationClustering = str(
Expand Down Expand Up @@ -136,7 +134,6 @@ def from_path(cls, path):
config.FDRCorrection = bool(data['FDRCorrection'])

config.numThumbnails = int(data['numThumbnails'])
config.topMarkersThumbnails = str(data['topMarkersThumbnails'])
config.windowSize = int(data['windowSize'])
config.segOutlines = bool(data['segOutlines'])

Expand Down
7 changes: 5 additions & 2 deletions cylinter/config.yml → cylinter/cylinter_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

inDir: /Users/<username>/Desktop/cylinter_demo
# Path to CyLinter input directory containing multi-channel
# image files (TIFF or OME-TIFF), segmentation outlines (OME-TIFF),
# segmentation masks (TIFF), and corresponding single-cell feature tables (CSV)
# image files (TIF or OME-TIF), segmentation outlines (OME-TIF),
# segmentation masks (TIF), and corresponding single-cell feature tables (CSV)

outDir: /Users/<username>/Desktop/cylinter_demo/output
# CyLinter output directory. Path is created if it does not exist.
Expand All @@ -26,6 +26,9 @@ samplesToExclude: []
# (list of strs) Sample names to exclude from analysis specified
# according to the first elements of sampleMetadata configuration.

counterstainChannel: "DNA1"
# (str) Name of marker in markers.csv file for use in visualizing nuclear counterstain

markersToExclude: ["Rabbit IgG", "Goat IgG", "Mouse IgG", "CD56", "CD13",
"pAUR", "CCNE", "CDKN2A", "PCNA_1", "CDKN1B_2",
"CD63", "CD32", "CCNA2", "CDKN1C", "PCNA_1",
Expand Down
17 changes: 11 additions & 6 deletions cylinter/modules/PCA.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@ def PCA(data, self, args):
check, markers_filepath = input_check(self)

# read marker metadata
markers, dna1, dna_moniker, abx_channels = read_markers(
markers_filepath=markers_filepath, markers_to_exclude=self.markersToExclude, data=data
markers, abx_channels = read_markers(
markers_filepath=markers_filepath,
counterstain_channel=self.counterstainChannel,
markers_to_exclude=self.markersToExclude, data=None
)

# drop antibody channel exclusions for PCA
Expand Down Expand Up @@ -188,7 +190,7 @@ def PCA(data, self, args):
markersize=5.0, linewidth=5)
)
ax1.legend(handles=legend_handles, prop={'size': 10.0}, bbox_to_anchor=[0.95, 1.0])
fig1.savefig(os.path.join(pca_dir, 'variance.pdf'), bbox_inches='tight')
fig1.savefig(os.path.join(pca_dir, 'horns_analysis.pdf'), bbox_inches='tight')
plt.close(fig1)

###################################################################
Expand Down Expand Up @@ -216,7 +218,7 @@ def PCA(data, self, args):
ax2.tick_params(axis='both', which='major', labelsize=7.0)

fig2.savefig(
os.path.join(pca_dir, 'pcaScoresPlotCells.png'), dpi=600, bbox_inches='tight'
os.path.join(pca_dir, 'pca_cells.png'), dpi=600, bbox_inches='tight'
)
plt.close(fig2)

Expand Down Expand Up @@ -492,13 +494,16 @@ def get_key(val):

# save figure
plt.savefig(
os.path.join(pca_dir, 'pcaScoresPlotSamples.pdf'),
os.path.join(pca_dir, 'pca_samples.pdf'),
bbox_inches='tight')
plt.close('all')

data = reorganize_dfcolumns(data, markers, self.dimensionEmbedding)
else:
logging.info("n_components = 1, skipping PCA and Horn's parallel analysis.")
logging.info(
"n_components = 1. Only 1 sample (or 1 marker) in analysis. "
"Skipping PCA and Horn's parallel analysis."
)

print()
print()
Expand Down
38 changes: 28 additions & 10 deletions cylinter/modules/aggregateData.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os
import sys
import yaml
import logging

import pandas as pd
Expand All @@ -13,10 +16,18 @@ def aggregateData(data, self, args):

check, markers_filepath = input_check(self)

markers, dna1, dna_moniker, abx_channels = read_markers(
markers_filepath=markers_filepath, markers_to_exclude=self.markersToExclude, data=None
markers, abx_channels = read_markers(
markers_filepath=markers_filepath,
counterstain_channel=self.counterstainChannel,
markers_to_exclude=self.markersToExclude, data=None
)

# initialize CyLinter QC report if it hasn't been already
report_path = os.path.join(self.outDir, 'cylinter_report.yml')
if not os.path.exists(report_path):
f = open(report_path, 'w')
yaml.dump({}, f)

df_list = []
channel_setlist = []
sample_keys = [i for i in self.sampleNames.keys()]
Expand All @@ -41,11 +52,10 @@ def aggregateData(data, self, args):

# select boilerplate columns
cols = (
['CellID', 'X_centroid', 'Y_centroid', 'Area',
'MajorAxisLength', 'MinorAxisLength',
'Eccentricity', 'Solidity', 'Extent',
'Orientation'] +
[i for i in markers['marker_name'] if i in csv.columns]
[i for i in [j for j in markers['marker_name']] +
[i for i in ['CellID', 'X_centroid', 'Y_centroid', 'Area', 'MajorAxisLength',
'MinorAxisLength', 'Eccentricity', 'Solidity', 'Extent',
'Orientation'] if i in csv.columns]]
)

# (for BAF project)
Expand Down Expand Up @@ -98,8 +108,16 @@ def aggregateData(data, self, args):
# 'Orientation'] +
# [f'{i}_{mask_dict[i]}' for i
# in markers['marker_name']])

csv = csv[cols]

try:
csv = csv[cols]
except KeyError as e:
logger.info(
'Aborting; some (or all) marker names in markers.csv do not appear '
'as columns in the single-cell data table. Check for spelling and case.'
)
print(e)
sys.exit()

# (for SARDANA)
# trim mask object names from column headers
Expand Down Expand Up @@ -159,7 +177,7 @@ def aggregateData(data, self, args):
data.reset_index(drop=True, inplace=True)

# ensure MCMICRO-generated columns come first and
# are in the same order as csv input
# are in the same order as csv feature tables
data = reorganize_dfcolumns(data, markers, self.dimensionEmbedding)

print()
Expand Down
Loading

0 comments on commit 30ab9a7

Please sign in to comment.