From c683d41e024016615e0092bcb9095018ddc82b75 Mon Sep 17 00:00:00 2001 From: Liam DeVoe Date: Fri, 3 Jan 2025 11:05:43 -0500 Subject: [PATCH] support "T" and "V" dtypes in from_dtype --- hypothesis-python/RELEASE.rst | 3 ++ .../src/hypothesis/extra/numpy.py | 42 ++++++++++++++++++- .../tests/numpy/test_from_dtype.py | 20 +++++++++ .../tests/numpy/test_from_type.py | 4 +- 4 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 hypothesis-python/RELEASE.rst diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..b51cce90a4 --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,3 @@ +RELEASE_TYPE: minor + +:func:`~hypothesis.extra.numpy.from_dtype` now supports the :obj:`numpy.dtypes.VoidDType` (``"V``) dtype, as well as the new :obj:`numpy:numpy.dtypes.StringDType` (``"T"``) dtype in NumPy 2.0. diff --git a/hypothesis-python/src/hypothesis/extra/numpy.py b/hypothesis-python/src/hypothesis/extra/numpy.py index da0b119409..0e1115b0bb 100644 --- a/hypothesis-python/src/hypothesis/extra/numpy.py +++ b/hypothesis-python/src/hypothesis/extra/numpy.py @@ -92,8 +92,15 @@ def _try_import(mod_name: str, attr_name: str) -> Any: TIME_RESOLUTIONS = tuple("Y M D h m s ms us ns ps fs as".split()) +numpy_version = tuple(map(int, np.__version__.split(".")[:2])) # See https://github.com/HypothesisWorks/hypothesis/pull/3394 and linked discussion. -NP_FIXED_UNICODE = tuple(int(x) for x in np.__version__.split(".")[:2]) >= (1, 19) +NP_FIXED_UNICODE = numpy_version >= (1, 19) + +maybe_string_dtype = [] +if numpy_version >= (2, 0): # pragma: no branch # else covered by oldestnumpy job + from numpy.dtypes import StringDType + + maybe_string_dtype = [st.just(StringDType())] @defines_strategy(force_reusable_values=True) @@ -213,6 +220,17 @@ def compat_kw(*args, **kw): else: # NEP-7 defines the NaT value as integer -(2**63) elems = st.integers(-(2**63) + 1, 2**63 - 1) result = st.builds(dtype.type, elems, res) + elif dtype.kind == "T": + result = st.text(**compat_kw("alphabet", "min_size", "max_size")) + elif dtype.kind == "V": + result = st.binary( + **compat_kw( + "min_size", max_size=None if dtype.itemsize == 0 else dtype.itemsize + ) + ) + # we explicitly avoid supporting dtype.kind == "O", because it is easy to + # OOM when evaluating e.g. np.array(range(0, n)) for large n (and this is in + # fact a thing hypothesis will generate via st.from_type(object)). else: raise InvalidArgument(f"No strategy inference for {dtype}") return result.map(dtype.type) @@ -927,6 +945,9 @@ def timedelta64_dtypes( ) +# TODO: we should uncap max_len here, and for unicode/void below. +# Also allow generating undetermined-width dtypes like "S" / "S0"? Possibly with +# a new parameter allow_undetermined? @defines_dtype_strategy def byte_string_dtypes( *, endianness: str = "?", min_len: int = 1, max_len: int = 16 @@ -957,6 +978,21 @@ def unicode_string_dtypes( return dtype_factory("U", list(range(min_len, max_len + 1)), None, endianness) +@defines_dtype_strategy +def void_dtypes( + *, endianness: str = "?", min_len: int = 1, max_len: int = 16 +) -> st.SearchStrategy["np.dtype[np.void]"]: + """Return a strategy for generating void dtypes, of various lengths + and byteorder. + + While Hypothesis' st.binary strategy can generate empty bytestrings, void + dtypes with length 0 indicate that size is still to be determined, so + the minimum length for void dtypes is 1. + """ + order_check("len", 1, min_len, max_len) + return dtype_factory("V", list(range(min_len, max_len + 1)), None, endianness) + + def _no_title_is_name_of_a_titled_field(ls): seen = set() for title_and_name, *_ in ls: @@ -1336,6 +1372,8 @@ def _from_type(thing: type[Ex]) -> Optional[st.SearchStrategy[Ex]]: # Note: Parameterized dtypes and DTypeLike are not supported. return st.one_of( scalar_dtypes(), + void_dtypes(), + *maybe_string_dtype, byte_string_dtypes(), unicode_string_dtypes(), array_dtypes(), @@ -1368,7 +1406,7 @@ def _from_type(thing: type[Ex]) -> Optional[st.SearchStrategy[Ex]]: if isinstance(thing, type) and issubclass(thing, np.generic): dtype = np.dtype(thing) - return from_dtype(dtype) if dtype.kind not in "OV" else None + return from_dtype(dtype) if dtype.kind != "O" else None real_thing, args = _unpack_generic(thing) diff --git a/hypothesis-python/tests/numpy/test_from_dtype.py b/hypothesis-python/tests/numpy/test_from_dtype.py index dacbe0af2c..fd0cfb6f5e 100644 --- a/hypothesis-python/tests/numpy/test_from_dtype.py +++ b/hypothesis-python/tests/numpy/test_from_dtype.py @@ -42,6 +42,7 @@ "complex128", "datetime64", "timedelta64", + "void", bool, str, bytes, @@ -103,6 +104,14 @@ def test_unicode_string_dtypes_generate_unicode_strings(data): assert isinstance(result, str) +@given(st.data()) +def test_void_dtype_generates_void(data): + dtype = data.draw(nps.void_dtypes()) + value = data.draw(nps.from_dtype(dtype)) + assert isinstance(value, np.void) + assert isinstance(value.tobytes(), bytes) + + @given(nps.arrays(dtype="U99", shape=(10,))) def test_can_unicode_strings_without_decode_error(arr): # See https://github.com/numpy/numpy/issues/15363 @@ -129,6 +138,7 @@ def test_byte_string_dtypes_generate_unicode_strings(data): skipif_np2 = pytest.mark.skipif(np_version >= (2, 0), reason="removed in new version") +skipif_np1 = pytest.mark.skipif(np_version < (2, 0), reason="added in new version") @pytest.mark.parametrize( @@ -251,6 +261,16 @@ def test_arrays_gives_useful_error_on_inconsistent_time_unit(): ("U", {"min_size": 1, "max_size": 2}, lambda x: 1 <= len(x) <= 2), ("U4", {"min_size": 1, "max_size": 2}, lambda x: 1 <= len(x) <= 2), ("U", {"alphabet": "abc"}, lambda x: set(x).issubset("abc")), + pytest.param( + "T", {"alphabet": "abc"}, lambda x: set(x).issubset("abc"), marks=skipif_np1 + ), + pytest.param( + "T", + {"min_size": 1, "max_size": 2}, + lambda x: 1 <= len(x) <= 2, + marks=skipif_np1, + ), + ("V", {"min_size": 1, "max_size": 2}, lambda x: 1 <= len(x.tobytes()) <= 2), ], ) @given(data=st.data()) diff --git a/hypothesis-python/tests/numpy/test_from_type.py b/hypothesis-python/tests/numpy/test_from_type.py index 50d6e597cc..b213429329 100644 --- a/hypothesis-python/tests/numpy/test_from_type.py +++ b/hypothesis-python/tests/numpy/test_from_type.py @@ -42,7 +42,9 @@ def test_does_not_resolve_nonscalar_types(typ): @pytest.mark.parametrize("typ", STANDARD_TYPES_TYPE) def test_resolves_and_varies_numpy_scalar_type(typ): # Check that we find an instance that is not equal to the default - x = find_any(from_type(typ), lambda x: x != type(x)()) + # (except for void, which does not have a default) + cond = lambda _: True if typ is np.void else lambda x: x != type(x)() + x = find_any(from_type(typ), cond) assert isinstance(x, typ)