Skip to content

Commit

Permalink
Init commit - alt id support (#87) (#90)
Browse files Browse the repository at this point in the history
Use ALT IDs when assigning sample counter to CMO labels

- Uses samples matching a given ALT ID when assigning sample counters in CMO labels
- Updated mock data and added unit tests
- Updated smile-server version


Signed-off-by: Angelica Ochoa <[email protected]>
  • Loading branch information
ao508 authored Jan 15, 2025
1 parent 3306648 commit 3c6f1cf
Show file tree
Hide file tree
Showing 29 changed files with 1,320 additions and 160 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
<smile_commons.version>2.0.0.RELEASE</smile_commons.version>
<!-- smile server model module -->
<smile_server.group>com.github.mskcc.smile-server</smile_server.group>
<smile_server.version>2.0.2.RELEASE</smile_server.version>
<smile_server.version>2.1.0.RELEASE</smile_server.version>
</properties>

<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
*/
public interface CmoLabelGeneratorService {
String generateCmoSampleLabel(String requestId,
IgoSampleManifest sampleManifest, List<SampleMetadata> existingPatientSamples);
String generateCmoSampleLabel(SampleMetadata sample, List<SampleMetadata> existingPatientSamples);
IgoSampleManifest sampleManifest, List<SampleMetadata> existingPatientSamples,
List<SampleMetadata> samplesByAltId);
String generateCmoSampleLabel(SampleMetadata sample, List<SampleMetadata> existingPatientSamples,
List<SampleMetadata> samplesByAltId);
Status generateSampleStatus(String requestId, IgoSampleManifest sampleManifest,
List<SampleMetadata> existingSamples) throws JsonProcessingException;
Status generateSampleStatus(SampleMetadata sampleMetadata,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -166,7 +168,7 @@ private Boolean compareMatcherGroups(Matcher matcher1, Matcher matcher2, Integer

@Override
public String generateCmoSampleLabel(String requestId, IgoSampleManifest sampleManifest,
List<SampleMetadata> existingSamples) {
List<SampleMetadata> existingSamples, List<SampleMetadata> samplesByAltId) {
// if sample is a cellline sample then generate a cmo cellline label
if (isCmoCelllineSample(sampleManifest)) {
return generateCmoCelllineSampleLabel(requestId, sampleManifest.getInvestigatorSampleId());
Expand All @@ -176,7 +178,8 @@ public String generateCmoSampleLabel(String requestId, IgoSampleManifest sampleM
String sampleTypeAbbreviation = resolveSampleTypeAbbreviation(sampleManifest);

// resolve the sample counter value to use for the cmo label
Integer sampleCounter = resolveSampleIncrementValue(sampleManifest.getIgoId(), existingSamples);
Integer sampleCounter = resolveSampleIncrementValue(sampleManifest.getIgoId(),
existingSamples, samplesByAltId);
String paddedSampleCounter = getPaddedIncrementString(sampleCounter,
CMO_SAMPLE_COUNTER_STRING_PADDING);

Expand All @@ -188,7 +191,8 @@ public String generateCmoSampleLabel(String requestId, IgoSampleManifest sampleM
return null;
}
// get next increment for nucleic acid abbreviation
Integer nextNucAcidCounter = getNextNucleicAcidIncrement(nucleicAcidAbbreviation, existingSamples);
Integer nextNucAcidCounter = getNextNucleicAcidIncrement(sampleManifest.getIgoId(),
nucleicAcidAbbreviation, existingSamples, samplesByAltId);
String paddedNucAcidCounter = getPaddedIncrementString(nextNucAcidCounter,
CMO_SAMPLE_NUCACID_COUNTER_PADDING);

Expand All @@ -200,7 +204,7 @@ public String generateCmoSampleLabel(String requestId, IgoSampleManifest sampleM

@Override
public String generateCmoSampleLabel(SampleMetadata sampleMetadata,
List<SampleMetadata> existingSamples) {
List<SampleMetadata> existingSamples, List<SampleMetadata> samplesByAltId) {
// if sample is a cellline sample then generate a cmo cellline label
if (isCmoCelllineSample(sampleMetadata.getSampleClass(), sampleMetadata.getCmoSampleIdFields())) {
return generateCmoCelllineSampleLabel(sampleMetadata.getIgoRequestId(),
Expand All @@ -218,7 +222,8 @@ public String generateCmoSampleLabel(SampleMetadata sampleMetadata,
}

// resolve the sample counter value to use for the cmo label
Integer sampleCounter = resolveSampleIncrementValue(sampleMetadata.getPrimaryId(), existingSamples);
Integer sampleCounter = resolveSampleIncrementValue(sampleMetadata.getPrimaryId(),
existingSamples, samplesByAltId);
String paddedSampleCounter = getPaddedIncrementString(sampleCounter,
CMO_SAMPLE_COUNTER_STRING_PADDING);

Expand All @@ -234,7 +239,8 @@ public String generateCmoSampleLabel(SampleMetadata sampleMetadata,
return null;
}
// get next increment for nucleic acid abbreviation
Integer nextNucAcidCounter = getNextNucleicAcidIncrement(nucleicAcidAbbreviation, existingSamples);
Integer nextNucAcidCounter = getNextNucleicAcidIncrement(sampleMetadata.getPrimaryId(),
nucleicAcidAbbreviation, existingSamples, samplesByAltId);
String paddedNucAcidCounter = getPaddedIncrementString(nextNucAcidCounter,
CMO_SAMPLE_NUCACID_COUNTER_PADDING);

Expand Down Expand Up @@ -437,11 +443,30 @@ private String getPaddedIncrementString(Integer increment, Integer padding) {
* @param existingSamples
* @return Integer
*/
private Integer resolveSampleIncrementValue(String primaryId, List<SampleMetadata> existingSamples) {
if (existingSamples.isEmpty()) {
private Integer resolveSampleIncrementValue(String primaryId, List<SampleMetadata> existingSamples,
List<SampleMetadata> samplesByAltId) {
if (existingSamples.isEmpty() && samplesByAltId.isEmpty()) {
return 1;
}

// if match isn't found by primary id then attempt to resolve count by checking increments
// of samples with matching alt ids
if (!samplesByAltId.isEmpty()) {
List<Integer> altIdSampleCounters = new ArrayList<>();
for (SampleMetadata sample : samplesByAltId) {
Matcher matcher = CMO_SAMPLE_ID_REGEX.matcher(sample.getCmoSampleName());
if (matcher.find()) {
Integer increment = Integer.valueOf(matcher.group(CMO_SAMPLE_COUNTER_GROUP));
altIdSampleCounters.add(increment);
}
}
if (altIdSampleCounters.size() == 1) {
return altIdSampleCounters.get(0);
} else {
return Collections.min(altIdSampleCounters);
}
}

// if we find a match by the primary id then return the increment parsed from
// the matching sample's current cmo label
for (SampleMetadata sample : existingSamples) {
Expand All @@ -454,8 +479,9 @@ private Integer resolveSampleIncrementValue(String primaryId, List<SampleMetadat
}
}

// assuming that a match by the primary id has not been identified
// then we can use the next sample increment logic like before
// if there aren't any existing samples by the same alt id then this is a new sample specimen for the
// current patient so the sample increment for the sample cmo label will be based on number of other
// existing patient samples
return getNextSampleIncrement(existingSamples);
}

Expand Down Expand Up @@ -499,52 +525,116 @@ private Integer getNextSampleIncrement(List<SampleMetadata> samples) {
* from 01-99 (values less < 10 are filled in with zeros '0' to preserve 2-digit format).
* From the time of implementation the first sample for a particular Nucleic Acid get 01.
* @param nucleicAcidAbbreviation
* @param samples
* @param existingSamples
* @return Integer
*/
private Integer getNextNucleicAcidIncrement(String nucleicAcidAbbreviation,
List<SampleMetadata> samples) {
if (samples.isEmpty()) {
private Integer getNextNucleicAcidIncrement(String primaryId, String nucleicAcidAbbreviation,
List<SampleMetadata> existingSamples, List<SampleMetadata> samplesByAltId) {
if (existingSamples.isEmpty() && samplesByAltId.isEmpty()) {
return 1;
}

if (!samplesByAltId.isEmpty()) {
Integer maxIncrement = 0;
for (SampleMetadata sample : samplesByAltId) {
// ignore samples with empty cmo sample labels
if (StringUtils.isBlank(sample.getCmoSampleName())) {
continue;
}
// skip cell line samples as well
if (CMO_CELLLINE_ID_REGEX.matcher(sample.getCmoSampleName()).find()) {
continue;
}

// if sample cmo label does not meet matcher criteria then skip
Matcher matcher = CMO_SAMPLE_ID_REGEX.matcher(sample.getCmoSampleName());
if (!matcher.find()) {
continue;
}

// skip labels that do not match the current nucleic acid abbreviation
String currentNucAcidAbbreviation = matcher.group(CMO_SAMPLE_NUCACID_ABBREV_GROUP);
if (!currentNucAcidAbbreviation.equals(nucleicAcidAbbreviation)) {
continue;
}

Integer currentIncrement;
if (matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP) == null
|| matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP).isEmpty()) {
currentIncrement = 1;
} else {
currentIncrement = Integer.valueOf(matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP));
}

// if primary id match is found then we know that this sample isn't actually being
// reprocessed and likely received some other metadata update that should not affect
// the label generation as far as the nuc acid counter is concerned
if (sample.getPrimaryId().equals(primaryId)) {
return currentIncrement;
}

// update max increment if needed
if (currentIncrement > maxIncrement) {
maxIncrement = currentIncrement;
}
}
// assuming that this sample primary id does not already exist in the list of samples matching
// the same alt id means that we should use the max increment found + 1
return maxIncrement + 1;
}

// handle cases where alt ID hasn't been backfilled yet or maybe it's the first time
// that this alt ID will be appearing in the database

// otherwise extract the max counter from the current set of samples
// do not rely on the size of the list having the exact same counter
// to prevent accidentally giving samples the same counter
Integer maxIncrement = 0;
for (SampleMetadata sample : samples) {
for (SampleMetadata sample : existingSamples) {
// skip samples with null cmo sample name (possible now that we allow all samples to get in db
// even if they fail validation and/or fail label generation)
if (StringUtils.isBlank(sample.getCmoSampleName())) {
LOG.warn("Skipping patient sample with null CMO sample label: CMO patient ID = "
+ sample.getCmoPatientId() + ", sample primary ID = " + sample.getPrimaryId());
continue;
}
// skip cell line samples
if (CMO_CELLLINE_ID_REGEX.matcher(sample.getCmoSampleName()).find()) {
continue;
}

Matcher matcher = CMO_SAMPLE_ID_REGEX.matcher(sample.getCmoSampleName());

// if label doesn't match the cmo label regex pattern then skip
if (!matcher.find()) {
continue;
}
// increment assigned to the current sample is in group 3 of matcher
if (matcher.find()) {
// nucleic acid abbreviation determines which counters we consider
// when iterating through sample list
if (!matcher.group(CMO_SAMPLE_NUCACID_ABBREV_GROUP)
.equalsIgnoreCase(nucleicAcidAbbreviation)) {
continue;
}
Integer currentIncrement;
if (matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP) == null
|| matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP).isEmpty()) {
currentIncrement = 1;
} else {
currentIncrement = Integer.valueOf(matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP));
}
if (currentIncrement > maxIncrement) {
maxIncrement = currentIncrement;
}
// nucleic acid abbreviation determines which counters we consider

// when iterating through sample list if nuc acid doesn't match then move
// on to the next one
if (!matcher.group(CMO_SAMPLE_NUCACID_ABBREV_GROUP)
.equalsIgnoreCase(nucleicAcidAbbreviation)) {
continue;
}

Integer currentIncrement;
if (matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP) == null
|| matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP).isEmpty()) {
currentIncrement = 1;
} else {
currentIncrement = Integer.valueOf(matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP));
}

// if primary id match is found then we know that this sample isn't actually being
// reprocessed and likely received some other metadata update that should not affect
// the label generation as far as the nuc acid counter is concerned
if (sample.getPrimaryId().equals(primaryId)) {
return currentIncrement;
}
}
return maxIncrement + 1;
// always return 1 by default if a primary id match isn't found and we are resolving the
// nuc acid counter since the nuc acid counter should be based on a per-unique sample
// basis and not by total patient samples
return 1;
}

private String generateCmoCelllineSampleLabel(String requestId, String sampleInvestigatorId) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ public class LabelGenMessageHandlingServiceImpl implements MessageHandlingServic
@Value("${request_reply.samples_by_cmo_label_topic}")
private String SAMPLES_BY_CMO_LABEL_REQREPLY_TOPIC;

@Value("${request_reply.samples_by_alt_id_topic}")
private String SAMPLES_BY_ALT_ID_REQREPLY_TOPIC;

@Autowired
private CmoLabelGeneratorService cmoLabelGeneratorService;

Expand All @@ -77,6 +80,7 @@ public class LabelGenMessageHandlingServiceImpl implements MessageHandlingServic
private static boolean initialized = false;
private static volatile boolean shutdownInitiated;
private static final ExecutorService exec = Executors.newCachedThreadPool();

private static final BlockingQueue<String> cmoLabelGeneratorQueue =
new LinkedBlockingQueue<String>();
private static final BlockingQueue<String> cmoPromotedLabelQueue =
Expand All @@ -87,6 +91,7 @@ public class LabelGenMessageHandlingServiceImpl implements MessageHandlingServic
new LinkedBlockingQueue<String>();
private static final BlockingQueue<SampleMetadata> cmoSampleLabelUpdateQueue =
new LinkedBlockingQueue<SampleMetadata>();

private static CountDownLatch cmoLabelGeneratorShutdownLatch;
private static CountDownLatch cmoPromotedLabelShutdownLatch;
private static CountDownLatch newRequestPublisherShutdownLatch;
Expand Down Expand Up @@ -229,9 +234,12 @@ public void run() {
List<SampleMetadata> existingSamples =
patientSamplesMap.getOrDefault(sampleManifest.getCmoPatientId(),
new ArrayList<>());
List<SampleMetadata> samplesByAltId
= getSamplesByAltId(sampleManifest.getAltid());

// TODO resolve any issues that arise with errors in generating cmo label
String newSampleCmoLabel = cmoLabelGeneratorService.generateCmoSampleLabel(
requestId, sampleManifest, existingSamples);
requestId, sampleManifest, existingSamples, samplesByAltId);
if (newSampleCmoLabel == null) {
sampleStatus = cmoLabelGeneratorService.generateSampleStatus(
requestId, sampleManifest, existingSamples);
Expand Down Expand Up @@ -391,6 +399,8 @@ public void run() {
String origSampleJson = mapper.writeValueAsString(sample);
List<SampleMetadata> existingSamples =
getExistingPatientSamples(sample.getCmoPatientId());
List<SampleMetadata> samplesByAltId
= getSamplesByAltId(sample.getAdditionalProperty("altId"));
// Case when sample update json doesn't have status
if (sample.getStatus() == null) {
Status newSampleStatus = cmoLabelGeneratorService
Expand All @@ -400,7 +410,8 @@ public void run() {
if (sample.getStatus().getValidationStatus()) {
// generate new cmo sample label and update sample metadata object
String newCmoSampleLabel =
cmoLabelGeneratorService.generateCmoSampleLabel(sample, existingSamples);
cmoLabelGeneratorService.generateCmoSampleLabel(sample,
existingSamples, samplesByAltId);
if (newCmoSampleLabel == null) {
Status newSampleStatus = cmoLabelGeneratorService
.generateSampleStatus(sample, existingSamples);
Expand Down Expand Up @@ -601,6 +612,15 @@ private List<SampleMetadata> getSamplesByCmoLabel(String cmoLabel) throws Except
return new ArrayList<>(Arrays.asList(samplesByCmoLabel));
}

private List<SampleMetadata> getSamplesByAltId(String altId) throws Exception {
Message reply = messagingGateway.request(SAMPLES_BY_ALT_ID_REQREPLY_TOPIC,
altId);
SampleMetadata[] samplesByAltId = mapper.readValue(
new String(reply.getData(), StandardCharsets.UTF_8),
SampleMetadata[].class);
return new ArrayList<>(Arrays.asList(samplesByAltId));
}

private Boolean isCmoLabelAlreadyInUse(String primaryId, String cmoLabel) throws Exception {
List<SampleMetadata> samplesByCmoLabel = getSamplesByCmoLabel(cmoLabel);
for (SampleMetadata sm : samplesByCmoLabel) {
Expand Down
Loading

0 comments on commit 3c6f1cf

Please sign in to comment.