marrink-lab · ma3ke · Apr 2, 2024 · Apr 2, 2024 · Apr 3, 2024 · Apr 3, 2024
diff --git a/src/bentopy/pack/pack.py b/src/bentopy/pack/pack.py
@@ -1,8 +1,10 @@
 import argparse
+import itertools
 import json
 import math
-import time
 import pathlib
+from concurrent.futures import ThreadPoolExecutor
+import time
 
 import numpy as np
 from scipy.signal import fftconvolve
@@ -23,6 +25,16 @@ def log(*args, **kwargs):
         print(*args, **kwargs)
 
 
+def trail_off(v: float) -> float:
+    """
+    Takes a value `v` in the range `0.0 <= v <= 1.0` and returns a probability
+    according to a sigmoid density distribution, such that this probability
+    is 1.0 for `v == 0.0` and goes to 0.0 as `v` approaches 1.0.
+    """
+    # See https://www.desmos.com/calculator/cdyqdxvjmy.
+    return 1 - 1 / (1 + np.exp(-12 * (v - 0.5)))
+
+
 def placement_location(valid, selection, segment):
     """
     Returns the valid voxel placement position for the segment.
@@ -32,24 +44,35 @@ def placement_location(valid, selection, segment):
     return valid_pos - segment_center
 
 
-def place(
-    background,
+def place_domain(
     segment_voxels,
+    query,
+    resolution,
+    background,
+    background_size,
+    domain_start,
+    domain_end,
+    domain_axis,
+    half_domain_size,
+    segments_per_domain,
     max_at_once,
     max_tries,
     threshold_coefficient,
-    resolution,
-) -> list | None:
-    """
-    Place a number of segments into the background.
-    """
-
-    start = time.time()
-
-    # First, we convolve the background to reveal the points where we can
+    start,
+    placements,
+):
+    domain_slice = slice(domain_start, domain_end)
+    if domain_axis == 0:
+        domain = background[domain_slice, :, :]
+    elif domain_axis == 1:
+        domain = background[:, domain_slice, :]
+    elif domain_axis == 2:
+        domain = background[:, :, domain_slice]
+    domain_offset = np.roll(np.array([domain_start, 0, 0]), domain_axis)
+    print(f"{domain_offset = }")
+    # First, we convolve the domain to reveal the points where we can
     # safely place a segment without overlapping them.
-    query = np.flip(segment_voxels)
-    collisions = fftconvolve(background, query, mode="valid")
+    collisions = fftconvolve(domain, query, mode="valid")
     valid_offset = np.array(query.shape) // 2
     convolution_duration = time.time() - start
     log(f"        (convolution took {convolution_duration:.3f} s)")
@@ -59,20 +82,22 @@ def place(
     # generous cutoff.
     valid = np.array(np.where(collisions < 1e-4)) + valid_offset[:, None]
     if valid.size == 0:
-        return None
+        # If there are no valid placements, move on the next domain early.
+        placements.append(None)
 
-    placements = []
+    domain_placements = []
     hits = 0
     tries = 0  # Number of times segment placement was unsuccesful.
     start = time.time()
     previously_selected = set()
-    while hits < max_at_once:
+    while hits < segments_per_domain and hits < max_at_once:
         if tries >= max_tries:
             # Give up.
             log("  ! tries is exceeding max_tries")
             break
         if len(previously_selected) == len(valid[0]):
-            return None
+            domain_placements = None
+            break
         while True:
             selection = RNG.integers(0, len(valid[0]))
             if selection not in previously_selected:
@@ -84,24 +109,50 @@ def place(
         location = placement_location(valid, selection, segment_voxels)
         prospect = np.where(segment_voxels) + location[:, None]
         # Check for collisions at the prospective site.
-        free = not np.any(background[prospect[0, :], prospect[1, :], prospect[2, :]])
+        free = not np.any(domain[prospect[0, :], prospect[1, :], prospect[2, :]])
 
         if free:
             start = time.time()
 
-            temp_selected_indices = prospect
-            background[
-                temp_selected_indices[0, :],
-                temp_selected_indices[1, :],
-                temp_selected_indices[2, :],
-            ] = 1.0
-
-            placements.append(tuple(int(a) for a in location * resolution))
-            hits += 1
+            domain_axis_location = location[domain_axis]
+            # FIXME(domains): This is truly weird and can probably all be integrated in one well-formed if statement.
+            is_half_domain = domain_end - domain_start == half_domain_size
+            if is_half_domain:
+                # We need to deal with a half-sized domain at the start or end of the black pass.
+                if domain_start == 0:
+                    acceptance_probability = trail_off(
+                        domain_axis_location / half_domain_size
+                    )
+                elif domain_end == background_size:
+                    acceptance_probability = 1.0
+                else:
+                    assert False, "Unreachable! Encountered a half domain that is not at the start or end of the background. This should be impossible."
+            else:
+                # The common case.
+                if domain_axis_location < half_domain_size:
+                    acceptance_probability = 1.0
+                if domain_axis_location >= half_domain_size:
+                    acceptance_probability = trail_off(
+                        (domain_axis_location - half_domain_size) / half_domain_size
+                    )
+
+            # Only actually place it if we accept it according to the
+            # placement densitity distribution.
+            if RNG.random() < acceptance_probability:
+                domain[
+                    prospect[0, :],
+                    prospect[1, :],
+                    prospect[2, :],
+                ] = True
+
+                domain_placements.append(
+                    tuple(int(a) for a in (location + domain_offset) * resolution)
+                )
+                hits += 1
         else:
             tries += 1
             log(
-                f"        {tries = }/{max_tries},\thits = {hits}/{max_at_once}",
+                f"        {tries = }/{max_tries},\thits = {hits}/{segments_per_domain}({max_at_once})",
                 end="\r",
             )
             placement_duration = time.time() - start
@@ -115,8 +166,128 @@ def place(
                 break
     log("\x1b[K", end="")  # Clear the line since we used \r before.
     log(f"  . placed {hits} segments with {tries}/{max_tries} misses")
+    placements.append(domain_placements)
+
 
-    return placements
+def place(
+    background,
+    segment_voxels,
+    max_at_once,
+    max_tries,
+    threshold_coefficient,
+    resolution,
+) -> list | None:
+    """
+    Place a number of segments into the background.
+    """
+
+    start = time.time()
+
+    # FIXME(domains): Revisit and reformat this comment. Really needs to be correct ;)
+    # We break up the background into a set of layers over the the longest axis.
+    # We call these layers 'domains'. Each of these domains will be packed
+    # sequentially, such that we place the segment over the entire space,
+    # eventually. This breaking up into domains serves to reduce the peak
+    # memory footprint during the convolution. For large spaces, the memory
+    # required for a convolution of the entire space may exceed the available
+    # amount. Hence, we want the ability to break up the space over which
+    # the convolution is applied.
+    # To make sure that the resulting packing remains well-stirred despite the
+    # divided procedure, two passes are performed. The first we call the
+    # 'white' pass, and it leaves clear division planes between the domains.
+    # The second pass, which we call the 'black' pass, goes over the space with
+    # an offset of half a domain size.
+    # To ensure the placement densities are uniform overall, a placement
+    # probability density function is used within a domain. It is shaped such
+    # that the first half of the domain is a constant, full density. In this
+    # first half, every valid placement that is considered will be accepted.
+    # Within the second half of a domain, the placement probability density
+    # will trail off to zero according to a sigmoid distribution. According to
+    # this distribution, the closer a valid placement is to the end of the
+    # domain, the less likely it becomes the placement is accepted.
+    n_domains = 5
+    domain_axis = np.argmax(background.shape)  # The longest axis.
+    segments_per_domain = max_at_once / n_domains
+    # The size of the background in the direction of the domain layers.
+    background_size = background.shape[domain_axis]
+    assert (
+        background.shape[domain_axis] % n_domains == 0
+    ), "For now, make sure we can neatly divide the background into domains"
+    domain_size = background.shape[domain_axis] // n_domains
+    assert (
+        domain_size % 2 == 0
+    ), "For now, make sure we can neatly divide a domain into two halves"
+    half_domain_size = domain_size // 2
+    # # We first pack the domains at the even indices, followed by the odd ones.
+    # domain_indices = (*range(0, n_domains, 2), *range(1, n_domains, 2))
+    white_domains = [
+        ((2 * i) * half_domain_size, (2 * i + 2) * half_domain_size)
+        for i in range(n_domains)
+    ]
+    # The black domains should also cover the first and last 'half' domains.
+    black_domains = [
+        (
+            max(
+                (2 * i - 1) * half_domain_size,
+                0,
+            ),
+            min(
+                (2 * i + 1) * half_domain_size,
+                background_size,
+            ),
+        )
+        for i in range(n_domains + 1)
+    ]
+
+    query = np.flip(segment_voxels)
+    placements = []
+
+    # We place the white and black domains after each other, making sure that
+    # all white domains are placed before placing starts in the black domains.
+    # This is necessary to prevent data races.
+
+    placer = lambda domain_start, domain_end: place_domain(
+        segment_voxels,
+        query,
+        resolution,
+        background,
+        background_size,
+        domain_start,
+        domain_end,
+        domain_axis,
+        half_domain_size,
+        segments_per_domain,
+        max_at_once,
+        max_tries,
+        threshold_coefficient,
+        start,
+        placements,
+    )
+    # placer = lambda domain_start, domain_end: print(f"{domain_start, domain_end = }")
+
+    # Place the white domains.
+    with ThreadPoolExecutor(max_workers=8) as executor:
+        for domain_start, domain_end in white_domains:
+            executor.submit(placer, domain_start, domain_end)
+
+    # Place the black domains.
+    with ThreadPoolExecutor(max_workers=8) as executor:
+        for domain_start, domain_end in black_domains:
+            executor.submit(placer, domain_start, domain_end)
+
+    print(f"placed {len(placements)} domains")
+
+    # If the placements for each domain are all None, we need to let the caller
+    # know. But if only some or none of them are None, we can just sum to
+    # communicate the number of segments that were placed.
+    if all(map(lambda domain_placements: domain_placements is None, placements)):
+        return None
+    else:
+        r = []
+        for domain_placements in placements:
+            if domain_placements is not None:
+                r.extend(domain_placements)
+        return r
 
 
 def pack(

diff --git a/src/bentopy/pack/space.py b/src/bentopy/pack/space.py
@@ -92,7 +92,7 @@ def background(self, compartment_ids=[], onto=None):
         # Adjust size and padding for resolution.
         size = (np.array(self.size) / self.resolution).astype(int)
         if onto is None:
-            background = np.ones(size, dtype=np.float32)
+            background = np.ones(size, dtype=np.bool_)
         else:
             background = onto