Skip to content

Commit

Permalink
One more fix for the nuc acid counter resolution (#102)
Browse files Browse the repository at this point in the history
These changes allow an existing nuc acid counter to be reused if it doesn't need to be incremented.

Signed-off-by: Angelica Ochoa <[email protected]>
  • Loading branch information
ao508 authored Feb 6, 2025
1 parent b084a22 commit 3f47ec4
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -780,18 +781,31 @@ private Set<Integer> parseMatchingNucleicAcidCountersFromSampleLabels(String stA
continue;
}

Integer currentIncrement;
if (matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP) == null
|| matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP).isEmpty()) {
currentIncrement = 1;
} else {
currentIncrement = Integer.valueOf(matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP));
Integer currentIncrement = parseNucleicAcidCounterFromLabel(sample.getCmoSampleName());
if (currentIncrement != null) {
nucAcidCountersByAltId.add(currentIncrement);
}
nucAcidCountersByAltId.add(currentIncrement);
}
return nucAcidCountersByAltId;
}

private Integer parseNucleicAcidCounterFromLabel(String cmoLabel) {
// if sample cmo label does not meet matcher criteria then skip
Matcher matcher = CMO_SAMPLE_ID_REGEX.matcher(cmoLabel);
if (!matcher.find()) {
return null;
}

Integer currentIncrement;
if (matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP) == null
|| matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP).isEmpty()) {
currentIncrement = 1;
} else {
currentIncrement = Integer.valueOf(matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP));
}
return currentIncrement;
}

/**
* Returns the nucleic acid increment. Counter will be a 2 digit integer value range
* from 01-99 (values less < 10 are filled in with zeros '0' to preserve 2-digit format).
Expand All @@ -812,22 +826,30 @@ private Integer resolveNextNucleicAcidIncrement(String primaryId, String stAbbre
Set<Integer> nucAcidCountersByAltId
= parseMatchingNucleicAcidCountersFromSampleLabels(stAbbrev, nucAcidAbbrev, samplesByAltId);

// if primary id exists in the set of samples by alt id then store nuc acid counter for reference
Integer existingNucAcidCounter = null;
for (SampleMetadata s : samplesByAltId) {
if (s.getPrimaryId().equals(primaryId)) {
existingNucAcidCounter = parseNucleicAcidCounterFromLabel(s.getCmoSampleName());
}
}

// easy scenario: length of matching samples given an alt id is 1 and sample matches the
// primary id of the sample currently being interrogated then return nucleic acid counter as 1
if (samplesByAltId.size() == 1 && samplesByAltId.get(0).getPrimaryId().equals(primaryId)) {
return 1;
}

// for all other scenarios, resolve next consecutive counter from the parsed set of counters
return getNextNucleicAcidIncrement(nucAcidCountersByAltId);
return getNextNucleicAcidIncrement(nucAcidCountersByAltId, existingNucAcidCounter);
}

/**
* Resolves the next nucleic acid increment from a set of provided counters.
* @param counters
* @return Integer
*/
private Integer getNextNucleicAcidIncrement(Set<Integer> counters) {
private Integer getNextNucleicAcidIncrement(Set<Integer> counters, Integer existingNucAcidCounter) {
if (counters.isEmpty() || Collections.min(counters) != 1) {
return 1;
}
Expand All @@ -839,11 +861,21 @@ private Integer getNextNucleicAcidIncrement(Set<Integer> counters) {
for (int i = 1; i < sortedCounters.size(); i++) {
Integer currentCounter = sortedCounters.get(i);
Integer prevCounter = sortedCounters.get(i - 1);

// if the difference between the counters is > 1 then return the prev counter + 1
if ((currentCounter - prevCounter) > 1) {
return prevCounter + 1;
} else {
refCounter = currentCounter;
}

// if the current counter matches the existing nuc acid counter
// then return since the current counter is +1 from the prev counter
// and therefore is already the next consecutive integer
if (existingNucAcidCounter != null && Objects.equals(existingNucAcidCounter, currentCounter)) {
return existingNucAcidCounter;
}

// move onto the next counter in the list
refCounter = currentCounter;
}
return refCounter + 1;
}
Expand Down
33 changes: 20 additions & 13 deletions src/test/java/org/mskcc/smile/CmoLabelGeneratorServiceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -337,21 +338,17 @@ public void testChangeInSampleCounter() throws Exception {
@Test
public void testNextConsecutiveCounter() throws Exception {
Set<Integer> counters = new HashSet<>(Arrays.asList(1, 2, 7));
Integer nextConsecutiveInt = getNextNucleicAcidIncrement(counters);
Assertions.assertEquals(3, nextConsecutiveInt);
Integer nextConsecutiveInt1 = getNextNucleicAcidIncrement(counters, null);
Assertions.assertEquals(3, nextConsecutiveInt1);

Integer nextConsecutiveInt2 = getNextNucleicAcidIncrement(counters, 2);
Assertions.assertEquals(2, nextConsecutiveInt2);
}

private Integer getNextNucleicAcidIncrement(Set<Integer> counters) {
if (counters.isEmpty()) {
private Integer getNextNucleicAcidIncrement(Set<Integer> counters, Integer existingNucAcidCounter) {
if (counters.isEmpty() || Collections.min(counters) != 1) {
return 1;
}
if (counters.size() == 1) {
if (Collections.min(counters) != 1) {
return 1;
} else {
return 2;
}
}

List<Integer> sortedCounters = Arrays.asList(counters.toArray(Integer[]::new));
Collections.sort(sortedCounters);
Expand All @@ -360,11 +357,21 @@ private Integer getNextNucleicAcidIncrement(Set<Integer> counters) {
for (int i = 1; i < sortedCounters.size(); i++) {
Integer currentCounter = sortedCounters.get(i);
Integer prevCounter = sortedCounters.get(i - 1);

// if the difference between the counters is > 1 then return the prev counter + 1
if ((currentCounter - prevCounter) > 1) {
return prevCounter + 1;
} else {
refCounter = currentCounter;
}

// if the current counter matches the existing nuc acid counter
// then return since the current counter is +1 from the prev counter
// and therefore is already the next consecutive integer
if (existingNucAcidCounter != null && Objects.equals(existingNucAcidCounter, currentCounter)) {
return existingNucAcidCounter;
}

// move onto the next counter in the list
refCounter = currentCounter;
}
return refCounter + 1;
}
Expand Down

0 comments on commit 3f47ec4

Please sign in to comment.