HypothesisWorks · DRMacIver · Dec 16, 2024 · tybug · Dec 18, 2024
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,6 @@
+RELEASE_TYPE: patch
+
+This release improves the internal representation of integers. This should have relatively
+little user visible difference, but will improve performance of both generation and shrinking
+in some cases, and also will improve shrink quality in a few others. In particular code like
+``st.one_of(st.integers(), st.text())`` should now reliably prefer ``0`` over ``""``.
diff --git a/hypothesis-python/src/hypothesis/database.py b/hypothesis-python/src/hypothesis/database.py
@@ -744,7 +744,14 @@ def ir_to_bytes(ir: Iterable[IRType], /) -> bytes:
             elem = struct.pack("!d", elem)
         elif isinstance(elem, int):
             tag = 2 << 5
-            elem = elem.to_bytes(1 + elem.bit_length() // 8, "big", signed=True)
+            # We represent zero specially as zero bytes wide to
+            # make sure shrinking order makes sense. It's also
+            # a small space saving but we don't really care about
+            # that.
+            if elem != 0:
+                elem = elem.to_bytes(1 + elem.bit_length() // 8, "big", signed=True)
+            else:
+                elem = b""
         elif isinstance(elem, bytes):
             tag = 3 << 5
         else:

@@ -1770,6 +1770,31 @@ def _draw_unbounded_integer(
         self, *, forced: Optional[int] = None, fake_forced: bool = False
     ) -> int:
         assert self._cd is not None
+
+        max_small_integer = 50
+        if forced is None:
+            small_forced = None
+        elif abs(forced) > max_small_integer:
+            small_forced = max_small_integer * 2 + 2
+        elif forced == 0:
+            small_forced = 0
+        else:
+            small_forced = abs(forced) << 1
+            if forced < 0:
+                small_forced |= 1
+
+        small_integer_bits = self._cd.draw_bits(
+            8, forced=small_forced, fake_forced=fake_forced
+        )
+        if small_integer_bits == 0:
+            return 0
+        if small_integer_bits <= (max_small_integer * 2 + 1):
+            value = small_integer_bits >> 1
+            if small_integer_bits & 1:
+                return -value
+            else:
+                return value
+
         forced_i = None
         if forced is not None:
             # Using any bucket large enough to contain this integer would be a

@@ -221,7 +221,11 @@ def sample(
 
 
 INT_SIZES = (8, 16, 32, 64, 128)
-INT_SIZES_SAMPLER = Sampler((4.0, 8.0, 1.0, 1.0, 0.5), observe=False)
+# We relatively rarely draw an 8-bit integer because most of the 8-bit integers
+# we draw are from our special case for integers with absolute value <= 50.
+# We allow some small number both for shrinking purposes and to get better
+# coverage of the interval [51, 256]
+INT_SIZES_SAMPLER = Sampler((1.0, 8.0, 1.0, 1.0, 0.5), observe=False)
 
 
 class many:

diff --git a/hypothesis-python/tests/conjecture/test_shrinker.py b/hypothesis-python/tests/conjecture/test_shrinker.py
@@ -21,6 +21,7 @@
     node_program,
 )
 from hypothesis.internal.conjecture.utils import Sampler
+from hypothesis.internal.intervalsets import IntervalSet
 
 from tests.conjecture.common import SOME_LABEL, ir, run_to_nodes, shrinking_from
 
@@ -518,3 +519,33 @@ def shrinker(data: ConjectureData):
     # shrinking. Since the second draw is forced, this isn't possible to shrink
     # with just this pass.
     assert shrinker.choices == (15, 10)
+
+
+@pytest.mark.parametrize("start,boundary", [(-63, -46)])
+def test_shrink_small_integer_down_to_boundary(start, boundary):
+    @shrinking_from(ir(start))
+    def shrinker(data: ConjectureData):
+        n = data.draw_integer()
+        if boundary == 0:
+            data.mark_interesting()
+        elif boundary < 0:
+            if n <= boundary:
+                data.mark_interesting()
+        elif n >= boundary:
+            data.mark_interesting()
+
+    shrinker.fixate_shrink_passes(["minimize_individual_nodes"])
+    assert shrinker.shrink_target.ir_nodes[0].value == boundary
+
+
+def test_will_prefer_zero_to_the_empty_string():
+    @shrinking_from(ir(True, ""))
+    def shrinker(data: ConjectureData):
+        if data.draw_boolean():
+            data.draw_string(IntervalSet([(0, 127)]))
+        else:
+            data.draw_integer()
+        data.mark_interesting()
+
+    shrinker.greedy_shrink()
+    assert shrinker.shrink_target.ir_nodes[1].value == 0
diff --git a/hypothesis-python/tests/cover/test_replay_logic.py b/hypothesis-python/tests/cover/test_replay_logic.py
@@ -124,6 +124,7 @@ def test_will_shrink_if_the_previous_example_does_not_look_right():
     def test(data):
         nonlocal last
         m = data.draw(st.integers())
+        print(m)
         last = m
         if first_test:
             data.draw(st.integers())

diff --git a/hypothesis-python/tests/nocover/test_integer_shrinking.py b/hypothesis-python/tests/nocover/test_integer_shrinking.py
@@ -0,0 +1,64 @@
+# This file is part of Hypothesis, which may be found at
+# https://github.com/HypothesisWorks/hypothesis/
+#
+# Copyright the Hypothesis Authors.
+# Individual contributors are listed in AUTHORS.rst and the git log.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+
+from random import Random
+
+from hypothesis import assume, example, given, strategies as st
+from hypothesis.errors import StopTest
+from hypothesis.internal.conjecture.data import ConjectureData, Status
+from hypothesis.internal.conjecture.engine import ConjectureRunner
+
+
+@st.composite
+def integer_buffer(draw):
+    for _ in range(100):
+        buf = draw(st.binary(min_size=8))
+        try:
+            data = ConjectureData.for_buffer(buf)
+            data.draw_integer()
+            return bytes(data.buffer)
+        except StopTest:
+            continue
+    assume(False)
+
+
+@example(
+    n=-46,
+    buffer=b"f\x00\x01\x01\x01",
+)
+@given(st.integers(), integer_buffer())
+def test_will_always_shrink_an_integer_to_a_boundary(n, buffer):
+    if n > 0:
+
+        def test_function(data):
+            if data.draw_integer() >= n:
+                data.mark_interesting()
+
+    elif n < 0:
+
+        def test_function(data):
+            if data.draw_integer() <= n:
+                data.mark_interesting()
+
+    else:
+
+        def test_function(data):
+            data.draw_integer()
+            data.mark_interesting()
+
+    runner = ConjectureRunner(test_function, random=Random(0))
+    assume(runner.cached_test_function(buffer).status == Status.INTERESTING)
+
+    runner.shrink_interesting_examples()
+
+    (shrunk,) = runner.interesting_examples.values()
+
+    result = ConjectureData.for_buffer(shrunk.buffer).draw_integer()
+    assert result == n
diff --git a/hypothesis-python/tests/nocover/test_minimal_representations.py b/hypothesis-python/tests/nocover/test_minimal_representations.py
@@ -0,0 +1,27 @@
+# This file is part of Hypothesis, which may be found at
+# https://github.com/HypothesisWorks/hypothesis/
+#
+# Copyright the Hypothesis Authors.
+# Individual contributors are listed in AUTHORS.rst and the git log.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+
+import hypothesis.strategies as st
+from hypothesis.internal.conjecture.data import ConjectureData
+from hypothesis.internal.conjecture.engine import BUFFER_SIZE
+from hypothesis.strategies import SearchStrategy
+
+
+def minimal_buffer_for(strategy: SearchStrategy) -> bytes:
+    data = ConjectureData.for_buffer(bytes(BUFFER_SIZE))
+    # TODO: Not all strategies will actually produce a valid result
+    # for all zero bytes. When we have one we want to test this
+    # will require updating to use the shrinker.
+    data.draw(strategy)
+    return bytes(data.buffer)
+
+
+def test_integers_have_a_one_byte_representation():
+    assert len(minimal_buffer_for(st.integers())) == 1