Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add enum checking for attributes #557

Closed
wants to merge 10 commits into from
6 changes: 6 additions & 0 deletions src/pynxtools/data/NXtest.nxdl.xml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@
<item value="3rd type" />
<item value="4th type" />
</enumeration>
<attribute name="array" type="NX_INT">
<enumeration>
<item value="[0, 1, 2]" />
<item value="[2, 3, 4]" />
</enumeration>
</attribute>
</field>
</group>
<group type="NXnote" name="required_group">
Expand Down
87 changes: 20 additions & 67 deletions src/pynxtools/dataconverter/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
)
elif log_type == ValidationProblem.InvalidEnum:
logger.warning(
f"The value at {path} should be on of the following strings: {value}"
f"The value at {path} should be one of the following: {value}"
)
elif log_type == ValidationProblem.MissingRequiredGroup:
logger.warning(f"The required group, {path}, hasn't been supplied.")
Expand All @@ -96,7 +96,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
)
elif log_type == ValidationProblem.InvalidType:
logger.warning(
f"The value at {path} should be one of: {value}"
f"The value at {path} should be one of the following Python types: {value}"
f", as defined in the NXDL as {args[0] if args else '<unknown>'}."
)
elif log_type == ValidationProblem.InvalidDatetime:
Expand Down Expand Up @@ -578,66 +578,23 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]:
NUMPY_FLOAT_TYPES = (np.half, np.float16, np.single, np.double, np.longdouble)
NUMPY_INT_TYPES = (np.short, np.intc, np.int_)
NUMPY_UINT_TYPES = (np.ushort, np.uintc, np.uint)
# np int for np version 1.26.0
np_int = (
np.intc,
np.int_,
np.intp,
np.int8,
np.int16,
np.int32,
np.int64,
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.unsignedinteger,
np.signedinteger,
)
np_float = (np.float16, np.float32, np.float64, np.floating)
np_bytes = (np.bytes_, np.byte, np.ubyte)
np_char = (np.str_, np.char.chararray, *np_bytes)
np_bool = (np.bool_,)
np_complex = (np.complex64, np.complex128, np.cdouble, np.csingle)

NEXUS_TO_PYTHON_DATA_TYPES = {
"ISO8601": (str,),
"NX_BINARY": (
bytes,
bytearray,
np.ndarray,
*np_bytes,
),
"NX_BOOLEAN": (bool, np.ndarray, *np_bool),
"NX_CHAR": (str, np.ndarray, *np_char),
"NX_DATE_TIME": (str,),
"NX_FLOAT": (float, np.ndarray, *np_float),
"NX_INT": (int, np.ndarray, *np_int),
"ISO8601": (str),
"NX_BINARY": (bytes, bytearray, np.byte, np.ubyte, np.ndarray),
"NX_BOOLEAN": (bool, np.ndarray, np.bool_),
"NX_CHAR": (str, np.ndarray, np.chararray),
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still allows to write numeric arrays to an NX_CHAR, which is not correct. This issue is also solved by #554

"NX_DATE_TIME": (str),
"NX_FLOAT": (float, np.ndarray, np.floating),
"NX_INT": (int, np.ndarray, np.integer),
"NX_UINT": (np.ndarray, np.unsignedinteger),
"NX_NUMBER": (
int,
float,
np.ndarray,
*np_int,
*np_float,
dict,
),
"NX_NUMBER": (int, float, np.ndarray, np.integer, np.floating),
"NX_POSINT": (
int,
np.ndarray,
np.signedinteger,
np.integer,
), # > 0 is checked in is_valid_data_field()
"NX_COMPLEX": (complex, np.ndarray, *np_complex),
"NXDL_TYPE_UNAVAILABLE": (str,), # Defaults to a string if a type is not provided.
"NX_CHAR_OR_NUMBER": (
str,
int,
float,
np.ndarray,
*np_char,
*np_int,
*np_float,
dict,
),
"NXDL_TYPE_UNAVAILABLE": (str), # Defaults to a string if a type is not provided.
}


Expand Down Expand Up @@ -699,18 +656,14 @@ def is_valid_data_field(value, nxdl_type, path):
accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type]
output_value = value

if isinstance(value, list):
return all(is_valid_data_field(v, nxdl_type, path) for v in value), output_value

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This produces one warning per element in a list, if the datatype is not correct. @RubelMozumder had a different solution for this in #554.

if not isinstance(value, dict) and not is_valid_data_type(value, accepted_types):
try:
if accepted_types[0] is bool and isinstance(value, str):
value = convert_str_to_bool_safe(value)
if value is None:
raise ValueError
output_value = accepted_types[0](value)
except ValueError:
collector.collect_and_log(
path, ValidationProblem.InvalidType, accepted_types, nxdl_type
)
return False, value
Comment on lines -703 to -713
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would keep the boolean string to bool conversion, this certainly makes sense and We could even think about doing this for all datatypes with safe literal conversion, as you did for the enums.

collector.collect_and_log(
path, ValidationProblem.InvalidType, accepted_types, nxdl_type
)
return False, value

if nxdl_type == "NX_POSINT" and not is_positive_int(value):
collector.collect_and_log(path, ValidationProblem.IsNotPosInt, value)
Expand Down
17 changes: 13 additions & 4 deletions src/pynxtools/dataconverter/nexus_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ class NexusEntity(NexusNode):
type: Literal["field", "attribute"]
unit: Optional[NexusUnitCategory] = None
dtype: NexusType = "NX_CHAR"
items: Optional[List[str]] = None
items: Optional[List[Any]] = None
shape: Optional[Tuple[Optional[int], ...]] = None

def _set_type(self):
Expand Down Expand Up @@ -790,14 +790,23 @@ def _set_items(self):
based on the values in the inheritance chain.
The first vale found is used.
"""
if not self.dtype == "NX_CHAR":
return
for elem in self.inheritance:
enum = elem.find(f"nx:enumeration", namespaces=namespaces)
if enum is not None:
self.items = []
for items in enum.findall(f"nx:item", namespaces=namespaces):
self.items.append(items.attrib["value"])
value = items.attrib["value"]
if value[0] == "[" and value[-1] == "]":
import ast

try:
self.items.append(ast.literal_eval(value))
except (ValueError, SyntaxError):
raise Exception(
f"Error parsing enumeration item in the provided NXDL: {value}"
)
else:
self.items.append(value)
return

def _set_shape(self):
Expand Down
53 changes: 38 additions & 15 deletions src/pynxtools/dataconverter/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def validate_dict_against(
appdef: str, mapping: Mapping[str, Any], ignore_undocumented: bool = False
) -> Tuple[bool, List]:
"""
Validates a mapping against the NeXus tree for applicationd definition `appdef`.
Validates a mapping against the NeXus tree for application definition `appdef`.

Args:
appdef (str): The appdef name to validate against.
Expand Down Expand Up @@ -248,6 +248,14 @@ def check_nxdata():
prev_path=prev_path,
)

# check NXdata attributes
for attr in ("signal", "auxiliary_signals", "axes"):
handle_attribute(
node.search_add_child_for(attr),
keys,
prev_path=prev_path,
)

for i, axis in enumerate(axes):
if axis == ".":
continue
Expand Down Expand Up @@ -392,17 +400,17 @@ def _follow_link(
def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
full_path = remove_from_not_visited(f"{prev_path}/{node.name}")
variants = get_variations_of(node, keys)
if not variants:
if node.optionality == "required" and node.type in missing_type_err:
collector.collect_and_log(
full_path, missing_type_err.get(node.type), None
)

if (
not variants
and node.optionality == "required"
and node.type in missing_type_err
):
collector.collect_and_log(full_path, missing_type_err.get(node.type), None)
return

for variant in variants:
if node.optionality == "required" and isinstance(keys[variant], Mapping):
# Check if all fields in the dict are actual attributes (startwith @)
# Check if all fields in the dict are actual attributes (startswith @)
all_attrs = True
for entry in keys[variant]:
if not entry.startswith("@"):
Expand Down Expand Up @@ -460,11 +468,12 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
full_path = remove_from_not_visited(f"{prev_path}/@{node.name}")
variants = get_variations_of(node, keys)
if not variants:
if node.optionality == "required" and node.type in missing_type_err:
collector.collect_and_log(
full_path, missing_type_err.get(node.type), None
)
if (
not variants
and node.optionality == "required"
and node.type in missing_type_err
):
collector.collect_and_log(full_path, missing_type_err.get(node.type), None)
return

for variant in variants:
Expand All @@ -476,6 +485,20 @@ def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}",
)

# Check enumeration
if (
node.items is not None
and mapping[
f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}"
]
not in node.items
):
collector.collect_and_log(
f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}",
ValidationProblem.InvalidEnum,
node.items,
)

def handle_choice(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
global collector
old_collector = collector
Expand Down Expand Up @@ -556,7 +579,7 @@ def check_attributes_of_nonexisting_field(
) -> list:
"""
This method runs through the mapping dictionary and checks if there are any
attributes assigned to the fields (not groups!) which are not expicitly
attributes assigned to the fields (not groups!) which are not explicitly
present in the mapping.
If there are any found, a warning is logged and the corresponding items are
added to the list returned by the method.
Expand Down Expand Up @@ -657,7 +680,7 @@ def check_type_with_tree(
if (next_child_class is not None) or (next_child_name is not None):
output = None
for child in node.children:
# regexs to separarte the class and the name from full name of the child
# regexs to separate the class and the name from full name of the child
child_class_from_node = re.sub(
r"(\@.*)*(\[.*?\])*(\(.*?\))*([a-z]\_)*(\_[a-z])*[a-z]*\s*",
"",
Expand Down
3 changes: 2 additions & 1 deletion tests/data/dataconverter/readers/example/testdata.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@
"date_value_units": "",
"required_child": 1,
"optional_child": 1,
"@version": "1.0"
"@version": "1.0",
"@array": [0, 1, 2]
}
48 changes: 33 additions & 15 deletions tests/dataconverter/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def listify_template(data_dict: Template):
"type",
"definition",
"date_value",
):
) or isinstance(data_dict[optionality][path], list):
listified_template[optionality][path] = data_dict[optionality][path]
else:
listified_template[optionality][path] = [data_dict[optionality][path]]
Expand Down Expand Up @@ -229,6 +229,11 @@ def fixture_filled_test_data(template, tmp_path):
"/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/char_value/@units"
] = ""
TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/type"] = "2nd type" # pylint: disable=E1126
TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/type/@array"] = [
0,
1,
2,
]
TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/date_value"] = (
"2022-01-22T12:14:12.05018+00:00" # pylint: disable=E1126
)
Expand All @@ -240,6 +245,7 @@ def fixture_filled_test_data(template, tmp_path):
TEMPLATE["required"]["/ENTRY[my_entry]/definition/@version"] = "2.4.6" # pylint: disable=E1126
TEMPLATE["required"]["/ENTRY[my_entry]/program_name"] = "Testing program" # pylint: disable=E1126
TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/type"] = "2nd type" # pylint: disable=E1126
TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array"] = [0, 1, 2]
TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value"] = (
"2022-01-22T12:14:12.05018+00:00" # pylint: disable=E1126
)
Expand Down Expand Up @@ -279,13 +285,7 @@ def fixture_filled_test_data(template, tmp_path):
),
(
"The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/in"
"t_value should be one of: (<class 'int'>, <class 'numpy"
".ndarray'>, <class 'numpy.int32'>, <class 'numpy.int64'>,"
" <class 'numpy.int64'>, <class 'numpy.int8'>, <class 'numpy"
".int16'>, <class 'numpy.int32'>, <class 'numpy.int64'>, "
"<class 'numpy.uint8'>, <class 'numpy.uint16'>, <class 'numpy"
".uint32'>, <class 'numpy.uint64'>, <class 'numpy.unsignedi"
"nteger'>, <class 'numpy.signedinteger'>), as defined in "
"t_value should be one of the following Python types: (<class 'int'>, <class 'numpy.ndarray'>, <class 'numpy.integer'>), as defined in "
"the NXDL as NX_INT."
),
id="string-instead-of-int",
Expand All @@ -298,10 +298,9 @@ def fixture_filled_test_data(template, tmp_path):
),
(
"The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value sh"
"ould be one of: (<class 'bool'>, <class 'numpy.ndarray'>, <class '"
"numpy.bool"
"ould be one of the following Python types: (<class 'bool'>, <class 'numpy.ndarray'>, <class 'numpy.bool_'>"
),
id="string-instead-of-int",
id="string-instead-of-bool",
),
pytest.param(
alter_dict(
Expand All @@ -327,7 +326,7 @@ def fixture_filled_test_data(template, tmp_path):
TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", 3
),
(
"The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be of Python type:"
"The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one of the following Python types:"
" (<class 'str'>, <class 'numpy.ndarray'>, <class 'numpy.chararray'>),"
" as defined in the NXDL as NX_CHAR."
),
Expand Down Expand Up @@ -433,8 +432,8 @@ def fixture_filled_test_data(template, tmp_path):
),
(
"The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type should "
"be on of the following"
" strings: ['1st type', '2nd type', '3rd type', '4th type']"
"be one of the following"
": ['1st type', '2nd type', '3rd type', '4th type']"
),
id="wrong-enum-choice",
),
Expand Down Expand Up @@ -519,6 +518,26 @@ def fixture_filled_test_data(template, tmp_path):
pytest.param(
remove_optional_parent(TEMPLATE), (""), id="opt-group-completely-removed"
),
pytest.param(
alter_dict(
TEMPLATE,
"/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array",
["0", 1, 2],
),
(
"The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]"
),
id="wrong-type-array-in-attribute",
),
pytest.param(
alter_dict(
TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array", [1, 2]
),
(
"The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]"
),
id="wrong-value-array-in-attribute",
),
],
)
def test_validate_data_dict(caplog, data_dict, error_message, request):
Expand All @@ -530,7 +549,6 @@ def test_validate_data_dict(caplog, data_dict, error_message, request):
"UTC-with-+00:00",
"UTC-with-Z",
"no-child-provided-optional-parent",
"int-instead-of-chars",
"link-dict-instead-of-bool",
"opt-group-completely-removed",
"required-field-provided-in-variadic-optional-group",
Expand Down
Loading
Loading