Skip to content

Commit

Permalink
VS-1341 Improve test cost calculation (#8795)
Browse files Browse the repository at this point in the history
* Fixed cost calculation to ignore duplicate rows caused by preemption.
* Resetting tolerances to 5% across the board. We shall see.
  • Loading branch information
gbggrant authored May 6, 2024
1 parent bdf9590 commit 6f3c7bf
Showing 1 changed file with 15 additions and 9 deletions.
24 changes: 15 additions & 9 deletions scripts/variantstore/wdl/GvsQuickstartVcfIntegration.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -285,11 +285,17 @@ task AssertCostIsTrackedAndExpected {
mkdir output

echo "project_id = ~{project_id}" > ~/.bigqueryrc
# Note that in this query we are using the ROW_NUMBER() functionality to ignore extra entries caused
# by preemption (for instance there might be two rows for shard_identifier '*033')
bq --apilog=false query --project_id=~{project_id} --format=csv --use_legacy_sql=false \
'SELECT call, step, event_key, sum(event_bytes)
FROM `~{dataset_name}.cost_observability`
GROUP BY call, step, event_key
ORDER BY call, step, event_key' > output/cost_observability.csv
'SELECT call, step, event_key, sum(event_bytes) FROM (
SELECT *, ROW_NUMBER()
OVER (PARTITION BY call, step, event_key, shard_identifier) row_number
FROM `~{dataset_name}.cost_observability`
)
WHERE row_number = 1
GROUP BY call, step, event_key
ORDER BY call, step, event_key' > output/cost_observability.csv

# Put the exit code in a file because we are using a subshell (while) later and changes to the variable *in* the subshell are lost
echo "0" > ret_val.txt
Expand Down Expand Up @@ -320,15 +326,15 @@ task AssertCostIsTrackedAndExpected {

TOLERANCE=0

# For these two costs, there is non-determinism in the pipeline - we allow a % difference
# For these costs, there is non-determinism in the pipeline - we allow a % difference
if [[ $OBS_KEY == "ExtractFilterTask.GvsCreateFilterSet.BigQuery Query Scanned" ]]; then
TOLERANCE=0.05 # 5% tolerance (Note - have seen as high as: 0.0371646)
TOLERANCE=0.05 # 5% tolerance (Note - have seen as high as: 0.0239439)
elif [[ $OBS_KEY == "ExtractFilterTask.GvsCreateFilterSet.Storage API Scanned" ]]; then
TOLERANCE=0.05 # 5% tolerance (Note - have seen as high as: 0.0281223)
TOLERANCE=0.05 # 5% tolerance (Note - have seen as high as: n/a)
elif [[ $OBS_KEY == "ExtractTask.GvsExtractCallset.BigQuery Query Scanned" ]]; then
TOLERANCE=1.0 # 100% tolerance
TOLERANCE=0.05 # 5% tolerance (Note - have seen as high as: n/a)
elif [[ $OBS_KEY == "ExtractTask.GvsExtractCallset.Storage API Scanned" ]]; then
TOLERANCE=0.1 # 10% tolerance
TOLERANCE=0.05 # 5% tolerance (Note - have seen as high as: 0.00181463
fi

if [[ $OBS_BYTES -ne $EXP_BYTES ]]; then
Expand Down

0 comments on commit 6f3c7bf

Please sign in to comment.