-
Notifications
You must be signed in to change notification settings - Fork 590
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support "T" and "V" dtypes in from_dtype
#4226
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
RELEASE_TYPE: minor | ||
|
||
:func:`~hypothesis.extra.numpy.from_dtype` now supports the :obj:`numpy.dtypes.VoidDType` (``"V``) dtype, as well as the new :obj:`numpy:numpy.dtypes.StringDType` (``"T"``) dtype in NumPy 2.0. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,8 +92,15 @@ def _try_import(mod_name: str, attr_name: str) -> Any: | |
|
||
TIME_RESOLUTIONS = tuple("Y M D h m s ms us ns ps fs as".split()) | ||
|
||
numpy_version = tuple(map(int, np.__version__.split(".")[:2])) | ||
# See https://github.com/HypothesisWorks/hypothesis/pull/3394 and linked discussion. | ||
NP_FIXED_UNICODE = tuple(int(x) for x in np.__version__.split(".")[:2]) >= (1, 19) | ||
NP_FIXED_UNICODE = numpy_version >= (1, 19) | ||
|
||
maybe_string_dtype = [] | ||
if numpy_version >= (2, 0): # pragma: no branch # else covered by oldestnumpy job | ||
from numpy.dtypes import StringDType | ||
|
||
maybe_string_dtype = [st.just(StringDType())] | ||
|
||
|
||
@defines_strategy(force_reusable_values=True) | ||
|
@@ -213,6 +220,17 @@ def compat_kw(*args, **kw): | |
else: # NEP-7 defines the NaT value as integer -(2**63) | ||
elems = st.integers(-(2**63) + 1, 2**63 - 1) | ||
result = st.builds(dtype.type, elems, res) | ||
elif dtype.kind == "T": | ||
result = st.text(**compat_kw("alphabet", "min_size", "max_size")) | ||
elif dtype.kind == "V": | ||
result = st.binary( | ||
**compat_kw( | ||
"min_size", max_size=None if dtype.itemsize == 0 else dtype.itemsize | ||
) | ||
) | ||
# we explicitly avoid supporting dtype.kind == "O", because it is easy to | ||
# OOM when evaluating e.g. np.array(range(0, n)) for large n (and this is in | ||
# fact a thing hypothesis will generate via st.from_type(object)). | ||
else: | ||
raise InvalidArgument(f"No strategy inference for {dtype}") | ||
return result.map(dtype.type) | ||
|
@@ -927,6 +945,9 @@ def timedelta64_dtypes( | |
) | ||
|
||
|
||
# TODO: we should uncap max_len here, and for unicode/void below. | ||
# Also allow generating undetermined-width dtypes like "S" / "S0"? Possibly with | ||
# a new parameter allow_undetermined? | ||
Comment on lines
+949
to
+950
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Weakly against uncapped dtypes; they make sense as an aid to interactive use (eg notebooks) but aren't actually a dtype that an array can have. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the flipside, they *are* a valid dtype, and I think we should avoid compromising completeness unless there's a reason to. Generating them seems harmless if you're passing directly to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I see your point, but I can't think of an API that I like more than writing |
||
@defines_dtype_strategy | ||
def byte_string_dtypes( | ||
*, endianness: str = "?", min_len: int = 1, max_len: int = 16 | ||
|
@@ -957,6 +978,21 @@ def unicode_string_dtypes( | |
return dtype_factory("U", list(range(min_len, max_len + 1)), None, endianness) | ||
|
||
|
||
@defines_dtype_strategy | ||
def void_dtypes( | ||
*, endianness: str = "?", min_len: int = 1, max_len: int = 16 | ||
) -> st.SearchStrategy["np.dtype[np.void]"]: | ||
"""Return a strategy for generating void dtypes, of various lengths | ||
and byteorder. | ||
|
||
While Hypothesis' st.binary strategy can generate empty bytestrings, void | ||
dtypes with length 0 indicate that size is still to be determined, so | ||
the minimum length for void dtypes is 1. | ||
""" | ||
order_check("len", 1, min_len, max_len) | ||
return dtype_factory("V", list(range(min_len, max_len + 1)), None, endianness) | ||
|
||
|
||
def _no_title_is_name_of_a_titled_field(ls): | ||
seen = set() | ||
for title_and_name, *_ in ls: | ||
|
@@ -1336,6 +1372,8 @@ def _from_type(thing: type[Ex]) -> Optional[st.SearchStrategy[Ex]]: | |
# Note: Parameterized dtypes and DTypeLike are not supported. | ||
return st.one_of( | ||
scalar_dtypes(), | ||
void_dtypes(), | ||
*maybe_string_dtype, | ||
byte_string_dtypes(), | ||
unicode_string_dtypes(), | ||
array_dtypes(), | ||
|
@@ -1368,7 +1406,7 @@ def _from_type(thing: type[Ex]) -> Optional[st.SearchStrategy[Ex]]: | |
|
||
if isinstance(thing, type) and issubclass(thing, np.generic): | ||
dtype = np.dtype(thing) | ||
return from_dtype(dtype) if dtype.kind not in "OV" else None | ||
return from_dtype(dtype) if dtype.kind != "O" else None | ||
|
||
real_thing, args = _unpack_generic(thing) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,7 +32,7 @@ def test_resolves_dtype_type(dtype): | |
assert isinstance(dtype, np.dtype) | ||
|
||
|
||
@pytest.mark.parametrize("typ", [np.object_, np.void]) | ||
@pytest.mark.parametrize("typ", [np.object_]) | ||
def test_does_not_resolve_nonscalar_types(typ): | ||
# Comparing the objects directly fails on Windows, | ||
# so compare their reprs instead. | ||
|
@@ -42,7 +42,9 @@ def test_does_not_resolve_nonscalar_types(typ): | |
@pytest.mark.parametrize("typ", STANDARD_TYPES_TYPE) | ||
def test_resolves_and_varies_numpy_scalar_type(typ): | ||
# Check that we find an instance that is not equal to the default | ||
x = find_any(from_type(typ), lambda x: x != type(x)()) | ||
# (except for void, which does not have a default) | ||
cond = lambda _: True if typ is np.void else lambda x: x != type(x)() | ||
x = find_any(from_type(typ), cond) | ||
Comment on lines
+45
to
+47
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd just skip the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I left it in because this test doubles as a "can draw at all from from_dtype" - that's probably clearer if I split this into two (albeit mostly redundant) tests, and assume-away void in this one. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's just stick a top-level |
||
assert isinstance(x, typ) | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like we should generate all
itemsize
bytes if that's not None, rather than variable-length?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
totally missed that
Vn
pads to n with\x00
, thanks