Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add small integer representation #4204

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
RELEASE_TYPE: patch

This release improves the internal representation of integers. This should have relatively
little user visible difference, but will improve performance of both generation and shrinking
in some cases, and also will improve shrink quality in a few others. In particular code like
``st.one_of(st.integers(), st.text())`` should now reliably prefer ``0`` over ``""``.
9 changes: 8 additions & 1 deletion hypothesis-python/src/hypothesis/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,14 @@ def ir_to_bytes(ir: Iterable[IRType], /) -> bytes:
elem = struct.pack("!d", elem)
elif isinstance(elem, int):
tag = 2 << 5
elem = elem.to_bytes(1 + elem.bit_length() // 8, "big", signed=True)
# We represent zero specially as zero bytes wide to
# make sure shrinking order makes sense. It's also
# a small space saving but we don't really care about
# that.
if elem != 0:
elem = elem.to_bytes(1 + elem.bit_length() // 8, "big", signed=True)
else:
elem = b""
elif isinstance(elem, bytes):
tag = 3 << 5
else:
Expand Down
25 changes: 25 additions & 0 deletions hypothesis-python/src/hypothesis/internal/conjecture/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1770,6 +1770,31 @@ def _draw_unbounded_integer(
self, *, forced: Optional[int] = None, fake_forced: bool = False
) -> int:
assert self._cd is not None

max_small_integer = 50
if forced is None:
small_forced = None
elif abs(forced) > max_small_integer:
small_forced = max_small_integer * 2 + 2
elif forced == 0:
small_forced = 0
else:
small_forced = abs(forced) << 1
if forced < 0:
small_forced |= 1

small_integer_bits = self._cd.draw_bits(
8, forced=small_forced, fake_forced=fake_forced
)
if small_integer_bits == 0:
return 0
if small_integer_bits <= (max_small_integer * 2 + 1):
value = small_integer_bits >> 1
if small_integer_bits & 1:
return -value
else:
return value

forced_i = None
if forced is not None:
# Using any bucket large enough to contain this integer would be a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,11 @@ def sample(


INT_SIZES = (8, 16, 32, 64, 128)
INT_SIZES_SAMPLER = Sampler((4.0, 8.0, 1.0, 1.0, 0.5), observe=False)
# We relatively rarely draw an 8-bit integer because most of the 8-bit integers
# we draw are from our special case for integers with absolute value <= 50.
# We allow some small number both for shrinking purposes and to get better
# coverage of the interval [51, 256]
INT_SIZES_SAMPLER = Sampler((1.0, 8.0, 1.0, 1.0, 0.5), observe=False)


class many:
Expand Down
31 changes: 31 additions & 0 deletions hypothesis-python/tests/conjecture/test_shrinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
node_program,
)
from hypothesis.internal.conjecture.utils import Sampler
from hypothesis.internal.intervalsets import IntervalSet

from tests.conjecture.common import SOME_LABEL, ir, run_to_nodes, shrinking_from

Expand Down Expand Up @@ -518,3 +519,33 @@ def shrinker(data: ConjectureData):
# shrinking. Since the second draw is forced, this isn't possible to shrink
# with just this pass.
assert shrinker.choices == (15, 10)


@pytest.mark.parametrize("start,boundary", [(-63, -46)])
def test_shrink_small_integer_down_to_boundary(start, boundary):
@shrinking_from(ir(start))
def shrinker(data: ConjectureData):
n = data.draw_integer()
if boundary == 0:
data.mark_interesting()
elif boundary < 0:
if n <= boundary:
data.mark_interesting()
elif n >= boundary:
data.mark_interesting()

shrinker.fixate_shrink_passes(["minimize_individual_nodes"])
assert shrinker.shrink_target.ir_nodes[0].value == boundary
Copy link
Member

@tybug tybug Dec 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shrink_target.choices[0] is a nice concise alternative here! I envision .buffer[i] -> .choices[i] being the default migration path for bytestring tests, though of course with tweaked indices.

(or better yet shrinker.choices, using the implicit forwarding to .shrink_target).



def test_will_prefer_zero_to_the_empty_string():
@shrinking_from(ir(True, ""))
def shrinker(data: ConjectureData):
if data.draw_boolean():
data.draw_string(IntervalSet([(0, 127)]))
else:
data.draw_integer()
data.mark_interesting()

shrinker.greedy_shrink()
assert shrinker.shrink_target.ir_nodes[1].value == 0
1 change: 1 addition & 0 deletions hypothesis-python/tests/cover/test_replay_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def test_will_shrink_if_the_previous_example_does_not_look_right():
def test(data):
nonlocal last
m = data.draw(st.integers())
print(m)
last = m
if first_test:
data.draw(st.integers())
Expand Down
64 changes: 64 additions & 0 deletions hypothesis-python/tests/nocover/test_integer_shrinking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# This file is part of Hypothesis, which may be found at
# https://github.com/HypothesisWorks/hypothesis/
#
# Copyright the Hypothesis Authors.
# Individual contributors are listed in AUTHORS.rst and the git log.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

from random import Random

from hypothesis import assume, example, given, strategies as st
from hypothesis.errors import StopTest
from hypothesis.internal.conjecture.data import ConjectureData, Status
from hypothesis.internal.conjecture.engine import ConjectureRunner


@st.composite
def integer_buffer(draw):
for _ in range(100):
buf = draw(st.binary(min_size=8))
try:
data = ConjectureData.for_buffer(buf)
data.draw_integer()
return bytes(data.buffer)
except StopTest:
continue
assume(False)


@example(
n=-46,
buffer=b"f\x00\x01\x01\x01",
)
@given(st.integers(), integer_buffer())
def test_will_always_shrink_an_integer_to_a_boundary(n, buffer):
if n > 0:

def test_function(data):
if data.draw_integer() >= n:
data.mark_interesting()

elif n < 0:

def test_function(data):
if data.draw_integer() <= n:
data.mark_interesting()

else:

def test_function(data):
data.draw_integer()
data.mark_interesting()

runner = ConjectureRunner(test_function, random=Random(0))
assume(runner.cached_test_function(buffer).status == Status.INTERESTING)

runner.shrink_interesting_examples()

(shrunk,) = runner.interesting_examples.values()

result = ConjectureData.for_buffer(shrunk.buffer).draw_integer()
assert result == n
27 changes: 27 additions & 0 deletions hypothesis-python/tests/nocover/test_minimal_representations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# This file is part of Hypothesis, which may be found at
# https://github.com/HypothesisWorks/hypothesis/
#
# Copyright the Hypothesis Authors.
# Individual contributors are listed in AUTHORS.rst and the git log.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

import hypothesis.strategies as st
from hypothesis.internal.conjecture.data import ConjectureData
from hypothesis.internal.conjecture.engine import BUFFER_SIZE
from hypothesis.strategies import SearchStrategy


def minimal_buffer_for(strategy: SearchStrategy) -> bytes:
data = ConjectureData.for_buffer(bytes(BUFFER_SIZE))
# TODO: Not all strategies will actually produce a valid result
# for all zero bytes. When we have one we want to test this
# will require updating to use the shrinker.
data.draw(strategy)
return bytes(data.buffer)


def test_integers_have_a_one_byte_representation():
assert len(minimal_buffer_for(st.integers())) == 1
Loading