From c683d41e024016615e0092bcb9095018ddc82b75 Mon Sep 17 00:00:00 2001
From: Liam DeVoe <orionldevoe@gmail.com>
Date: Fri, 3 Jan 2025 11:05:43 -0500
Subject: [PATCH] support "T" and "V" dtypes in from_dtype

---
 hypothesis-python/RELEASE.rst                 |  3 ++
 .../src/hypothesis/extra/numpy.py             | 42 ++++++++++++++++++-
 .../tests/numpy/test_from_dtype.py            | 20 +++++++++
 .../tests/numpy/test_from_type.py             |  4 +-
 4 files changed, 66 insertions(+), 3 deletions(-)
 create mode 100644 hypothesis-python/RELEASE.rst

diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
new file mode 100644
index 0000000000..b51cce90a4
--- /dev/null
+++ b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,3 @@
+RELEASE_TYPE: minor
+
+:func:`~hypothesis.extra.numpy.from_dtype` now supports the :obj:`numpy.dtypes.VoidDType` (``"V``) dtype, as well as the new :obj:`numpy:numpy.dtypes.StringDType` (``"T"``) dtype in NumPy 2.0.
diff --git a/hypothesis-python/src/hypothesis/extra/numpy.py b/hypothesis-python/src/hypothesis/extra/numpy.py
index da0b119409..0e1115b0bb 100644
--- a/hypothesis-python/src/hypothesis/extra/numpy.py
+++ b/hypothesis-python/src/hypothesis/extra/numpy.py
@@ -92,8 +92,15 @@ def _try_import(mod_name: str, attr_name: str) -> Any:
 
 TIME_RESOLUTIONS = tuple("Y  M  D  h  m  s  ms  us  ns  ps  fs  as".split())
 
+numpy_version = tuple(map(int, np.__version__.split(".")[:2]))
 # See https://github.com/HypothesisWorks/hypothesis/pull/3394 and linked discussion.
-NP_FIXED_UNICODE = tuple(int(x) for x in np.__version__.split(".")[:2]) >= (1, 19)
+NP_FIXED_UNICODE = numpy_version >= (1, 19)
+
+maybe_string_dtype = []
+if numpy_version >= (2, 0):  # pragma: no branch # else covered by oldestnumpy job
+    from numpy.dtypes import StringDType
+
+    maybe_string_dtype = [st.just(StringDType())]
 
 
 @defines_strategy(force_reusable_values=True)
@@ -213,6 +220,17 @@ def compat_kw(*args, **kw):
         else:  # NEP-7 defines the NaT value as integer -(2**63)
             elems = st.integers(-(2**63) + 1, 2**63 - 1)
         result = st.builds(dtype.type, elems, res)
+    elif dtype.kind == "T":
+        result = st.text(**compat_kw("alphabet", "min_size", "max_size"))
+    elif dtype.kind == "V":
+        result = st.binary(
+            **compat_kw(
+                "min_size", max_size=None if dtype.itemsize == 0 else dtype.itemsize
+            )
+        )
+    # we explicitly avoid supporting dtype.kind == "O", because it is easy to
+    # OOM when evaluating e.g. np.array(range(0, n)) for large n (and this is in
+    # fact a thing hypothesis will generate via st.from_type(object)).
     else:
         raise InvalidArgument(f"No strategy inference for {dtype}")
     return result.map(dtype.type)
@@ -927,6 +945,9 @@ def timedelta64_dtypes(
     )
 
 
+# TODO: we should uncap max_len here, and for unicode/void below.
+# Also allow generating undetermined-width dtypes like "S" / "S0"? Possibly with
+# a new parameter allow_undetermined?
 @defines_dtype_strategy
 def byte_string_dtypes(
     *, endianness: str = "?", min_len: int = 1, max_len: int = 16
@@ -957,6 +978,21 @@ def unicode_string_dtypes(
     return dtype_factory("U", list(range(min_len, max_len + 1)), None, endianness)
 
 
+@defines_dtype_strategy
+def void_dtypes(
+    *, endianness: str = "?", min_len: int = 1, max_len: int = 16
+) -> st.SearchStrategy["np.dtype[np.void]"]:
+    """Return a strategy for generating void dtypes, of various lengths
+    and byteorder.
+
+    While Hypothesis' st.binary strategy can generate empty bytestrings, void
+    dtypes with length 0 indicate that size is still to be determined, so
+    the minimum length for void dtypes is 1.
+    """
+    order_check("len", 1, min_len, max_len)
+    return dtype_factory("V", list(range(min_len, max_len + 1)), None, endianness)
+
+
 def _no_title_is_name_of_a_titled_field(ls):
     seen = set()
     for title_and_name, *_ in ls:
@@ -1336,6 +1372,8 @@ def _from_type(thing: type[Ex]) -> Optional[st.SearchStrategy[Ex]]:
         # Note: Parameterized dtypes and DTypeLike are not supported.
         return st.one_of(
             scalar_dtypes(),
+            void_dtypes(),
+            *maybe_string_dtype,
             byte_string_dtypes(),
             unicode_string_dtypes(),
             array_dtypes(),
@@ -1368,7 +1406,7 @@ def _from_type(thing: type[Ex]) -> Optional[st.SearchStrategy[Ex]]:
 
     if isinstance(thing, type) and issubclass(thing, np.generic):
         dtype = np.dtype(thing)
-        return from_dtype(dtype) if dtype.kind not in "OV" else None
+        return from_dtype(dtype) if dtype.kind != "O" else None
 
     real_thing, args = _unpack_generic(thing)
 
diff --git a/hypothesis-python/tests/numpy/test_from_dtype.py b/hypothesis-python/tests/numpy/test_from_dtype.py
index dacbe0af2c..fd0cfb6f5e 100644
--- a/hypothesis-python/tests/numpy/test_from_dtype.py
+++ b/hypothesis-python/tests/numpy/test_from_dtype.py
@@ -42,6 +42,7 @@
         "complex128",
         "datetime64",
         "timedelta64",
+        "void",
         bool,
         str,
         bytes,
@@ -103,6 +104,14 @@ def test_unicode_string_dtypes_generate_unicode_strings(data):
     assert isinstance(result, str)
 
 
+@given(st.data())
+def test_void_dtype_generates_void(data):
+    dtype = data.draw(nps.void_dtypes())
+    value = data.draw(nps.from_dtype(dtype))
+    assert isinstance(value, np.void)
+    assert isinstance(value.tobytes(), bytes)
+
+
 @given(nps.arrays(dtype="U99", shape=(10,)))
 def test_can_unicode_strings_without_decode_error(arr):
     # See https://github.com/numpy/numpy/issues/15363
@@ -129,6 +138,7 @@ def test_byte_string_dtypes_generate_unicode_strings(data):
 
 
 skipif_np2 = pytest.mark.skipif(np_version >= (2, 0), reason="removed in new version")
+skipif_np1 = pytest.mark.skipif(np_version < (2, 0), reason="added in new version")
 
 
 @pytest.mark.parametrize(
@@ -251,6 +261,16 @@ def test_arrays_gives_useful_error_on_inconsistent_time_unit():
         ("U", {"min_size": 1, "max_size": 2}, lambda x: 1 <= len(x) <= 2),
         ("U4", {"min_size": 1, "max_size": 2}, lambda x: 1 <= len(x) <= 2),
         ("U", {"alphabet": "abc"}, lambda x: set(x).issubset("abc")),
+        pytest.param(
+            "T", {"alphabet": "abc"}, lambda x: set(x).issubset("abc"), marks=skipif_np1
+        ),
+        pytest.param(
+            "T",
+            {"min_size": 1, "max_size": 2},
+            lambda x: 1 <= len(x) <= 2,
+            marks=skipif_np1,
+        ),
+        ("V", {"min_size": 1, "max_size": 2}, lambda x: 1 <= len(x.tobytes()) <= 2),
     ],
 )
 @given(data=st.data())
diff --git a/hypothesis-python/tests/numpy/test_from_type.py b/hypothesis-python/tests/numpy/test_from_type.py
index 50d6e597cc..b213429329 100644
--- a/hypothesis-python/tests/numpy/test_from_type.py
+++ b/hypothesis-python/tests/numpy/test_from_type.py
@@ -42,7 +42,9 @@ def test_does_not_resolve_nonscalar_types(typ):
 @pytest.mark.parametrize("typ", STANDARD_TYPES_TYPE)
 def test_resolves_and_varies_numpy_scalar_type(typ):
     # Check that we find an instance that is not equal to the default
-    x = find_any(from_type(typ), lambda x: x != type(x)())
+    # (except for void, which does not have a default)
+    cond = lambda _: True if typ is np.void else lambda x: x != type(x)()
+    x = find_any(from_type(typ), cond)
     assert isinstance(x, typ)