Skip to content

Commit

Permalink
Merge branch 'master' into kj_sv_cleanvcf
Browse files Browse the repository at this point in the history
  • Loading branch information
kjaisingh committed Jan 17, 2025
2 parents a40b193 + 717d8a9 commit 63838fe
Show file tree
Hide file tree
Showing 11 changed files with 200 additions and 94 deletions.
2 changes: 1 addition & 1 deletion .github/actions/upload-gatk-test-results/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ runs:
steps:
- name: Upload test results
if: always()
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
# ternary expression syntax is workaround found here https://github.com/actions/runner/issues/409
name: test-results-${{ inputs.is-docker == 'true' && 'docker-' || '' }}${{ matrix.Java }}-${{ matrix.testType }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/gatk-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
outputs: type=docker,dest=/tmp/myimage.tar
# By uploading the docker image as an artifact we save ourselves the requirement for the image to be built with ghcr push permission
- name: Upload docker image as artifact
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: gatkDockerImage
path: /tmp/myimage.tar
Expand Down Expand Up @@ -204,7 +204,7 @@ jobs:
cp scripts/docker/dockertest.gradle .
- name: Download docker image artifact
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: gatkDockerImage
path: /tmp
Expand Down
4 changes: 2 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,8 @@ dependencies {
implementation 'org.apache.commons:commons-compress:1.26.0'
implementation 'org.apache.ivy:ivy:2.5.2'
implementation 'org.apache.commons:commons-text:1.10.0' because 'of https://nvd.nist.gov/vuln/detail/CVE-2022-42889'
implementation 'ch.qos.logback:logback-classic:1.4.14'
implementation 'ch.qos.logback:logback-core:1.4.14'
implementation 'ch.qos.logback:logback-classic:1.5.13'
implementation 'ch.qos.logback:logback-core:1.5.13'
implementation 'org.apache.avro:avro:1.12.0'
implementation 'io.airlift:aircompressor:0.27'
implementation 'org.scala-lang:scala-library:2.13.14'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,30 +122,35 @@ public enum FlagFieldLogic {
/**
* Comparators used for picking the representative genotype for a given sample
*/
// Priotize non-ref over ref
final Comparator<Genotype> genotypeIsNonRefComparator = (o1, o2) -> {
final long count1 = Math.min(1, o1.getAlleles().stream().filter(Allele::isNonReference).filter(Allele::isCalled).count());
final long count2 = Math.min(1, o2.getAlleles().stream().filter(Allele::isNonReference).filter(Allele::isCalled).count());
return Long.compare(count1, count2);
};

// Priotize fewer ALT alleles over more. When applied after non-ref comparator, hom-ref genotypes will not be encountered.
final Comparator<Genotype> genotypeNonRefCountComparator = (o1, o2) -> {
final long count1 = o1.getAlleles().stream().filter(Allele::isNonReference).filter(Allele::isCalled).count();
final long count2 = o2.getAlleles().stream().filter(Allele::isNonReference).filter(Allele::isCalled).count();
return Long.compare(count1, count2);
return Long.compare(count2, count1);
};

// Priotize called genotypes
final Comparator<Genotype> genotypeCalledComparator = (o1, o2) -> {
final long count1 = o1.getAlleles().stream().filter(Allele::isCalled).count();
final long count2 = o2.getAlleles().stream().filter(Allele::isCalled).count();
return Long.compare(count1, count2);
};

// Priotize higher quality
final Comparator<Genotype> genotypeQualityComparator = (o1, o2) -> {
final int quality1 = VariantContextGetters.getAttributeAsInt(o1, VCFConstants.GENOTYPE_QUALITY_KEY, 0);
final int quality2 = VariantContextGetters.getAttributeAsInt(o2, VCFConstants.GENOTYPE_QUALITY_KEY, 0);
return Integer.compare(quality1, quality2);
};

// Priotize higher depth genotyping quality
final Comparator<Genotype> genotypeCopyNumberQualityComparator = new Comparator<Genotype>() {
@Override
public int compare(Genotype o1, Genotype o2) {
Expand All @@ -155,14 +160,33 @@ public int compare(Genotype o1, Genotype o2) {
}
};

// Priotize depth genotypes closer to reference
final Comparator<Genotype> genotypeCopyNumberComparator = new Comparator<Genotype>() {
@Override
public int compare(Genotype o1, Genotype o2) {
final int expectedQualityNumber1 = VariantContextGetters.getAttributeAsInt(o1, GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT, 0);
final int copyNumber1 = VariantContextGetters.getAttributeAsInt(o1, GATKSVVCFConstants.COPY_NUMBER_FORMAT, 0);
final int expectedQualityNumber2 = VariantContextGetters.getAttributeAsInt(o2, GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT, 0);
final int copyNumber2 = VariantContextGetters.getAttributeAsInt(o2, GATKSVVCFConstants.COPY_NUMBER_FORMAT, 0);
return Double.compare(Math.abs(expectedQualityNumber1 - copyNumber1), Math.abs(expectedQualityNumber2 - copyNumber2));
return Double.compare(Math.abs(expectedQualityNumber2 - copyNumber2), Math.abs(expectedQualityNumber1 - copyNumber1));
}
};

// Priotize DEL over DUP as final tiebreaker
final Comparator<Genotype> genotypeDelOverDupComparator = new Comparator<Genotype>() {
@Override
public int compare(Genotype o1, Genotype o2) {
final int expectedCN1 = VariantContextGetters.getAttributeAsInt(o1, GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT, 0);
final boolean isDel1 = VariantContextGetters.getAttributeAsInt(o1, GATKSVVCFConstants.COPY_NUMBER_FORMAT, expectedCN1) < expectedCN1;
final int expectedCN2 = VariantContextGetters.getAttributeAsInt(o2, GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT, 0);
final boolean isDel2 = VariantContextGetters.getAttributeAsInt(o2, GATKSVVCFConstants.COPY_NUMBER_FORMAT, expectedCN2) < expectedCN2;
if (isDel1 && !isDel2) {
return 1;
} else if (isDel2 && !isDel1) {
return -1;
} else {
return 0;
}
}
};

Expand Down Expand Up @@ -461,7 +485,8 @@ protected Genotype getRepresentativeGenotype(final Collection<Genotype> genotype
.thenComparing(genotypeQualityComparator)
.thenComparing(genotypeNonRefCountComparator)
.thenComparing(genotypeCopyNumberQualityComparator)
.thenComparing(genotypeCopyNumberComparator)).get();
.thenComparing(genotypeCopyNumberComparator)
.thenComparing(genotypeDelOverDupComparator)).get();
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ public double haplotypeReadMatching(final FlowBasedHaplotype haplotype, final Fl
read.getTrimmedEnd()).getLeft();

final int haplotypeLength = haplotypeEnd - haplotypeStart;
final int readLength = read.seqLength();
final int readLength = read.getLength();


//in case there is a deletion on the haplotype and hte read falls inside the deletion (thus length of the read is
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,9 @@ public HaplotypeCallerEngine(final HaplotypeCallerArgumentCollection hcArgs, Ass

// Parse the user provided custom ploidy regions into ploidyRegions object containing SimpleCounts
if (this.hcArgs.ploidyRegions != null) {
FeatureDataSource<NamedFeature> ploidyDataSource = new FeatureDataSource<>(this.hcArgs.ploidyRegions, FeatureDataSource.DEFAULT_QUERY_LOOKAHEAD_BASES, NamedFeature.class);
ploidyDataSource.forEach(r -> this.ploidyRegions.add(new SimpleCount(r)));
try (FeatureDataSource<NamedFeature> ploidyDataSource = new FeatureDataSource<>(this.hcArgs.ploidyRegions, FeatureDataSource.DEFAULT_QUERY_LOOKAHEAD_BASES, NamedFeature.class)) {
ploidyDataSource.forEach(r -> this.ploidyRegions.add(new SimpleCount(r)));
}
}

for (SimpleCount region : this.ploidyRegions) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
* is coded in the tags of the BAM and is given in flow space). This code is not used in production, but was used in
* development and testing
*
* A common usage pattern is to covert a GATKRead into a FlowBasedRead. Additionally
* A common usage pattern is to covert a GATKRead into a FlowBasedRead. Additionally,
* a SAMRecord can also be converted into a FlowBasedRead. Follows a common usage pattern:
*
* For a self contained example of a usage pattern, see {@link FlowBasedReadUtils#convertToFlowBasedRead(GATKRead, SAMFileHeader)}
* For a self-contained example of a usage pattern, see {@link FlowBasedReadUtils#convertToFlowBasedRead(GATKRead, SAMFileHeader)}
*
**/

Expand Down Expand Up @@ -72,30 +72,25 @@ public class FlowBasedRead extends SAMRecordToGATKReadAdapter implements GATKRea
/**
* The sam record from which this flow based read originated
*/
private SAMRecord samRecord;
private final SAMRecord samRecord;

/**
* The read's sequence, always in forward direction
*/
private byte[] forwardSequence;

/**
* The flow key for the read - i.e. lengths of hmers in an flow order.
* The flow key for the read - i.e. lengths of hmers in flow order.
*
* For example, assuming a flow order of TGCA, and a forward sequence of GGAAT, the key will be 0,2,0,2,1
*/
private int[] key;

/**
* the maping of key elements to their origin locations in the sequence. Entry n contains the offset in the sequence
* the mapping of key elements to their origin locations in the sequence. Entry n contains the offset in the sequence
* where the hmer described by this key element starts.
*/
private int [] flow2base;

/**
* The maximal length of an hmer that can be encoded (normally in the 10-12 range)
*/
private int maxHmer;
private final int maxHmer;

/**
* The value to fill the flow matrix with. Normally 0.001
Expand All @@ -104,20 +99,20 @@ public class FlowBasedRead extends SAMRecordToGATKReadAdapter implements GATKRea
private double perHmerMinErrorProb;

/**
* The order in which flow key in encoded (See decription for key field). Flow order may be wrapped if a longer one
* The order in which flow key in encoded (See description for key field). Flow order may be wrapped if a longer one
* needed.
*/
private byte[] flowOrder;

/**
* The probability matrix for this read. [n][m] position represents that probablity that an hmer of n length will be
* present at the m key position. Therefore, the first dimention is in the maxHmer order, where the second dimension
* The probability matrix for this read. [n][m] position represents that probability that an hmer of n length will be
* present at the m key position. Therefore, the first dimension is in the maxHmer order, where the second dimension
* is length(key).
*/
private double[][] flowMatrix;

/**
* The validity status of the key. Certain operations may produce undefined/errornous results. This is signaled by
* The validity status of the key. Certain operations may produce undefined/erroneous results. This is signaled by
* the read being marked with a validKey == false
*/
private boolean validKey = true;
Expand Down Expand Up @@ -199,7 +194,7 @@ public FlowBasedRead(final GATKRead read, final String flowOrder, final int maxH
* @param samRecord record from SAM file
* @param flowOrder flow order (single cycle)
* @param maxHmer maximal hmer to keep in the flow matrix
* @param fbargs arguments that control resoltion of the flow matrix
* @param fbargs arguments that control resolution of the flow matrix
*/
public FlowBasedRead(final SAMRecord samRecord, final String flowOrder, final int maxHmer, final FlowBasedArgumentCollection fbargs) {
super(samRecord);
Expand All @@ -208,9 +203,8 @@ public FlowBasedRead(final SAMRecord samRecord, final String flowOrder, final in
this.fbargs = fbargs;
this.maxHmer = maxHmer;
this.samRecord = samRecord;
forwardSequence = getForwardSequence();

// read flow matrix in. note that below code contains accomodates for old formats
// read flow matrix in. note that below code contains accommodates for old formats
if ( samRecord.hasAttribute(FLOW_MATRIX_TAG_NAME) ) {
perHmerMinErrorProb = fbargs.fillingValue;
totalMinErrorProb = perHmerMinErrorProb;
Expand Down Expand Up @@ -408,18 +402,6 @@ public Direction getDirection(){
}


private byte[] getForwardSequence(){
if (!isReverseStrand()) {
return samRecord.getReadBases();
} else {
final byte[] result = new byte[samRecord.getReadBases().length];
System.arraycopy(samRecord.getReadBases(), 0, result, 0, result.length);
SequenceUtil.reverseComplement(result);
return result;
}
}


private int[] getAttributeAsIntArray(final String attributeName, final boolean isSigned) {
ReadUtils.assertAttributeNameIsLegal(attributeName);
final Object attributeValue = this.samRecord.getAttribute(attributeName);
Expand Down Expand Up @@ -460,7 +442,7 @@ public boolean isValid() {
* @return
*/
public double getProb(final int flow, final int hmer) {
double prob = flowMatrix[hmer < maxHmer ? hmer : maxHmer][flow];
double prob = flowMatrix[Math.min(hmer, maxHmer)][flow];
return (prob <= 1) ? prob : 1;
}

Expand Down Expand Up @@ -525,7 +507,7 @@ private void implementMatrixMods(final int[] flowMatrixModsInstructions) {
flowMatrix[hmer2][pos] = flowMatrix[hmer][pos];
}

// if we are copying bacwards, zero out source
// if we are copying backwards, zero out source
if (hmer > hmer2)
flowMatrix[hmer][pos] = 0;
}
Expand Down Expand Up @@ -783,9 +765,6 @@ public int totalKeyBases() {
return sum;
}

public int seqLength(){
return forwardSequence.length;
}
public boolean isBaseClipped() {
return baseClipped;
}
Expand Down
Loading

0 comments on commit 63838fe

Please sign in to comment.