jump-cellpainting · deflaux · May 8, 2024 · May 6, 2024 · May 6, 2024 · May 6, 2024
diff --git a/.dockstore.yml b/.dockstore.yml
@@ -1,5 +1,7 @@
 version: 1.2
 workflows:
   - subclass: WDL
-    primaryDescriptorPath: embedding_creation_script.wdl
-    name: EmbeddingCreation
+    primaryDescriptorPath: /embedding_creation_script.wdl
+    topic: Method to compute per-cell embeddings for all JUMP-CP data.
+    testParameterFiles:
+    - /single_plate_example_inputs.json
diff --git a/embedding_creation_script.wdl b/embedding_creation_script.wdl
@@ -32,10 +32,11 @@ workflow EmbeddingCreation {
         Int tfHubModelInputImageHeight
         Int tfHubModelInputImageWidth
         Int tfHubModelOutputEmbSize
-        String embeddingCreationDockerImage = 'PUBLIC_DOCKER_IMAGE_GOES_HERE'
+        String embeddingCreationDockerImage = 'ghcr.io/deflaux/embedding_creation:20240502_203214'
         Int embeddingCreationCPU = 8
         Int embeddingCreationMemoryGB = 30
         Int embeddingCreationDiskGB = 10
+        Int embeddingCreationBootDiskGB = 15
         Int embeddingCreationMaxRetries = 1
         Int embeddingCreationPreemptibleAttempts = 2
         String embeddingCreationGPUType = "nvidia-tesla-t4"
@@ -52,7 +53,8 @@ workflow EmbeddingCreation {
         input:
             loadDataWithIllum = loadDataWithIllum,
             modulus = modulus,
-            dockerImage = embeddingCreationDockerImage
+            dockerImage = embeddingCreationDockerImage,
+            bootDiskGB = embeddingCreationBootDiskGB
     }
 
     # Run embedding creation scattered by shards of multiple wells.
@@ -72,6 +74,7 @@ workflow EmbeddingCreation {
                 cpu = embeddingCreationCPU,
                 memoryGB = embeddingCreationMemoryGB,
                 diskGB = embeddingCreationDiskGB,
+                bootDiskGB = embeddingCreationBootDiskGB,
                 maxRetries = embeddingCreationMaxRetries,
                 preemptibleAttempts = embeddingCreationPreemptibleAttempts,
                 gpuType = embeddingCreationGPUType,
@@ -103,7 +106,8 @@ task determineShards {
         Int modulus = 24
 
         # Docker image
-        String dockerImage = 'PUBLIC_DOCKER_IMAGE_GOES_HERE'
+        String dockerImage = 'ghcr.io/deflaux/embedding_creation:20240502_203214'
+        Int bootDiskGB = 15
     }
 
     String outputFilename = 'shards_metadata.txt'
@@ -130,6 +134,7 @@ task determineShards {
 
     runtime {
         docker: dockerImage
+        bootDiskSizeGb: bootDiskGB
         maxRetries: 1
         preemptible: 2
     }
@@ -150,10 +155,11 @@ task runEmbeddingCreationScript {
         Int tfHubModelInputImageWidth
         Int tfHubModelOutputEmbSize
 
-        String dockerImage = 'PUBLIC_DOCKER_IMAGE_GOES_HERE'
+        String dockerImage = 'ghcr.io/deflaux/embedding_creation:20240502_203214'
         Int cpu = 8
         Int memoryGB = 30
         Int diskGB = 10
+        Int bootDiskGB = 15
         Int maxRetries = 1
         Int preemptibleAttempts = 2
         String gpuType = 'nvidia-tesla-t4'
@@ -205,6 +211,7 @@ task runEmbeddingCreationScript {
         docker: dockerImage
         memory: memoryGB + ' GB'
         disks: 'local-disk ' + diskGB + ' SSD'
+        bootDiskSizeGb: bootDiskGB
         maxRetries: maxRetries
         preemptible: preemptibleAttempts
         cpu: cpu

diff --git a/single_plate_example_inputs.json b/single_plate_example_inputs.json
@@ -0,0 +1,10 @@
+{
+  "EmbeddingCreation.tfHubModelOutputEmbSize":"${1280}",
+  "EmbeddingCreation.cellCentersPathPrefix":"s3://cellpainting-gallery/cpg0016-jump/source_10/workspace/analysis/2021_05_31_U2OS_48_hr_run1/Dest210531-152149/analysis",
+  "EmbeddingCreation.cellPatchDim":"${128}",
+  "EmbeddingCreation.loadDataWithIllum":"s3://cellpainting-gallery/cpg0016-jump/source_10/workspace/load_data_csv/2021_05_31_U2OS_48_hr_run1/Dest210531-152149/load_data_with_illum.parquet",
+  "EmbeddingCreation.tfHubModelInputImageHeight":"${384}",
+  "EmbeddingCreation.tfHubModelPath":"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_s/feature_vector/2",
+  "EmbeddingCreation.tfHubModelInputImageWidth":"${384}",
+  "EmbeddingCreation.modelBatchDim":"${256}"
+}