From b9499451e9bfa40a2467c526a5e506903b71cf1d Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 17 Aug 2022 22:06:15 +0200 Subject: [PATCH 01/20] Alow aliases to be nested fields. --- optimade/server/data/test_structures.json | 7 +++++++ optimade/server/mappers/entries.py | 16 +++++++++++++--- tests/server/test_mappers.py | 16 ++++++++++++++++ tests/test_config.json | 3 ++- 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/optimade/server/data/test_structures.json b/optimade/server/data/test_structures.json index 18a7eba03..e1f2923e4 100644 --- a/optimade/server/data/test_structures.json +++ b/optimade/server/data/test_structures.json @@ -1222,6 +1222,7 @@ "nelements": 5, "nsites": 24, "pretty_formula": "Ag2C6ClH12N3", + "fancy_formulas": {"hill": "C6H12Ag2ClN3"}, "species": [ { "chemical_symbols": [ @@ -1475,6 +1476,9 @@ "nelements": 5, "nsites": 25, "pretty_formula": "Ag2C2H2N6O13", + "fancy_formulas" : { + "hill": "C2H2Ag2N6O13" + }, "species": [ { "chemical_symbols": [ @@ -1723,6 +1727,7 @@ "nelements": 7, "nsites": 23, "pretty_formula": "Ag2C2ClH8N5O3S2", + "fancy_formulas": {"hill": "C2H8Ag2ClN5O3S2"}, "species": [ { "chemical_symbols": [ @@ -2467,6 +2472,7 @@ "nelements": 8, "nsites": 74, "pretty_formula": "AgB10C15Cl2H40NO3P2", + "fancy_formulas": {"hill": "C15H40AgB10Cl2NO3P2"}, "species": [ { "chemical_symbols": [ @@ -2821,6 +2827,7 @@ "nelements": 7, "nsites": 29, "pretty_formula": "AgC3ClH14N6OS3", + "fancy_formulas":{"hill": "C3H14AgClN6OS3"}, "species": [ { "chemical_symbols": [ diff --git a/optimade/server/mappers/entries.py b/optimade/server/mappers/entries.py index 9ee25cc7c..7605d6805 100644 --- a/optimade/server/mappers/entries.py +++ b/optimade/server/mappers/entries.py @@ -7,6 +7,15 @@ __all__ = ("BaseResourceMapper",) +def get_value(dict, real): + split_real = real.split(".", 1) + if not split_real[0] in dict: + return None, False + if len(split_real) == 1: + return dict[split_real[0]], True + return get_value(dict[split_real[0]], split_real[1]) + + class classproperty(property): """A simple extension of the property decorator that binds to types rather than instances. @@ -335,13 +344,14 @@ def map_back(cls, doc: dict) -> dict: """ mapping = ((real, alias) for alias, real in cls.all_aliases()) newdoc = {} - reals = {real for alias, real in cls.all_aliases()} + reals = {real.split(".", 1)[0] for alias, real in cls.all_aliases()} for key in doc: if key not in reals: newdoc[key] = doc[key] for real, alias in mapping: - if real in doc: - newdoc[alias] = doc[real] + value, found = get_value(doc, real) + if found: + newdoc[alias] = value if "attributes" in newdoc: raise Exception("Will overwrite doc field!") diff --git a/tests/server/test_mappers.py b/tests/server/test_mappers.py index e08a099bd..ed90ad040 100644 --- a/tests/server/test_mappers.py +++ b/tests/server/test_mappers.py @@ -63,3 +63,19 @@ class MyMapper(mapper(MAPPER)): mapper.get_backend_field("_exmpl_dft_parameters_dft_parameters.nested.property") == "_exmpl_dft_parameters_dft_parameters.nested.property" ) + + +def test_map_back_nested_field(mapper): + class MyMapper(mapper(MAPPER)): + ALIASES = (("some_field", "main_field.nested_field.field_we_need"),) + + mapper = MyMapper() + input_dict = { + "main_field": { + "nested_field": {"field_we_need": 42, "other_field": 78}, + "another_nested_field": 89, + }, + "secondary_field": 52, + } + output_dict = mapper.map_back(input_dict) + assert output_dict["attributes"]["some_field"] == 42 diff --git a/tests/test_config.json b/tests/test_config.json index 84e05066c..67180f77f 100644 --- a/tests/test_config.json +++ b/tests/test_config.json @@ -27,7 +27,8 @@ "immutable_id": "_id", "chemical_formula_descriptive": "pretty_formula", "chemical_formula_reduced": "pretty_formula", - "chemical_formula_anonymous": "formula_anonymous" + "chemical_formula_anonymous": "formula_anonymous", + "chemical_formula_hill": "fancy_formulas.hill" } }, "length_aliases": { From 7adbf4adfda84bb87ae13270b18bbb90f464ffa3 Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 17 Aug 2022 22:43:08 +0200 Subject: [PATCH 02/20] Added a line to docs to explain how to alias nested fields. --- docs/getting_started/setting_up_an_api.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/getting_started/setting_up_an_api.md b/docs/getting_started/setting_up_an_api.md index 32f69c0e6..92d39a576 100644 --- a/docs/getting_started/setting_up_an_api.md +++ b/docs/getting_started/setting_up_an_api.md @@ -37,6 +37,7 @@ Instead, if you are storing chemical formulae as an unreduced count per simulati This would then instead require option 2 above, namely either the addition of auxiliary fields that store the correct (or mappable) OPTIMADE format in the database, or the creation of a secondary database that returns the pre-converted structures. In the simplest case, the mapper classes can be used to define aliases between fields in the database and the OPTIMADE field name; these can be configured via the [`aliases`][optimade.server.config.ServerConfig.aliases] option as a dictionary mapping stored in a dictionary under the appropriate endpoint name, e.g. `"aliases": {"structures": {"chemical_formula_reduced": "my_chem_form"}}`, or defined as part of a custom mapper class. +Incase the alias is a nested field the field names should be separated by ".". Example: `"aliases": { "structures": {"OPTIMADE_field": "field.nested_field"}}` In either option, you should now be able to insert your data into the corresponding MongoDB (or otherwise) collection. From 368792a1d75889466bb8e9b5801d3cc29efbaeac Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Fri, 11 Nov 2022 17:39:44 +0100 Subject: [PATCH 03/20] 1. Added support for more versatile aliassing allowing nested fields. 2.Now only the requested fields are retrieved from the backend. --- optimade/models/links.py | 4 +- optimade/models/references.py | 7 - optimade/server/data/test_structures.json | 1 + .../entry_collections/entry_collections.py | 100 ++++++-- optimade/server/mappers/entries.py | 241 ++++++++++++++---- optimade/server/routers/utils.py | 28 +- optimade/validator/validator.py | 1 + tests/server/routers/test_info.py | 5 +- tests/server/routers/test_references.py | 6 + tests/server/routers/test_structures.py | 10 + tests/server/test_mappers.py | 53 +++- tests/test_config.json | 6 +- 12 files changed, 353 insertions(+), 109 deletions(-) diff --git a/optimade/models/links.py b/optimade/models/links.py index 3c0d322c1..609fcdf79 100644 --- a/optimade/models/links.py +++ b/optimade/models/links.py @@ -39,11 +39,11 @@ class Aggregate(Enum): class LinksResourceAttributes(Attributes): """Links endpoint resource object attributes""" - name: str = StrictField( + name: Optional[str] = StrictField( ..., description="Human-readable name for the OPTIMADE API implementation, e.g., for use in clients to show the name to the end-user.", ) - description: str = StrictField( + description: Optional[str] = StrictField( ..., description="Human-readable description for the OPTIMADE API implementation, e.g., for use in clients to show a description to the end-user.", ) diff --git a/optimade/models/references.py b/optimade/models/references.py index 40dbca3c1..2c9479912 100644 --- a/optimade/models/references.py +++ b/optimade/models/references.py @@ -2,7 +2,6 @@ from pydantic import ( # pylint: disable=no-name-in-module BaseModel, AnyUrl, - validator, ) from typing import List, Optional @@ -264,9 +263,3 @@ class ReferenceResource(EntryResource): queryable=SupportLevel.MUST, ) attributes: ReferenceResourceAttributes - - @validator("attributes") - def validate_attributes(cls, v): - if not any(prop[1] is not None for prop in v): - raise ValueError("reference object must have at least one field defined") - return v diff --git a/optimade/server/data/test_structures.json b/optimade/server/data/test_structures.json index e1f2923e4..4340ed3bb 100644 --- a/optimade/server/data/test_structures.json +++ b/optimade/server/data/test_structures.json @@ -5,6 +5,7 @@ }, "assemblies": null, "chemsys": "Ac", + "dichtheid": 10.07, "cartesian_site_positions": [ [ 0.17570227444196573, diff --git a/optimade/server/entry_collections/entry_collections.py b/optimade/server/entry_collections/entry_collections.py index 4d762b442..97646ffb7 100644 --- a/optimade/server/entry_collections/entry_collections.py +++ b/optimade/server/entry_collections/entry_collections.py @@ -4,7 +4,7 @@ import re from lark import Transformer - +from functools import lru_cache from optimade.filterparser import LarkParser from optimade.models import EntryResource from optimade.server.config import CONFIG, SupportedBackend @@ -117,11 +117,32 @@ def count(self, **kwargs: Any) -> int: """ + def set_field_to_none_if_missing_in_dict(self, entry: dict, field: str): + + split_field = field.split(".", 1) + if split_field[0] in entry: + if len(split_field) > 1: + self.set_field_to_none_if_missing_in_dict( + entry[split_field[0]], split_field[1] + ) + else: + if len(split_field) == 1: + if ( + split_field[0] == "structure_features" + ): # It would be nice if there would be a more universal way to handle special cases like this. + entry[split_field[0]] = [] + else: + entry[split_field[0]] = None + else: + entry[split_field[0]] = {} + self.set_field_to_none_if_missing_in_dict( + entry[split_field[0]], split_field[1] + ) + return entry + def find( self, params: Union[EntryListingQueryParams, SingleEntryQueryParams] - ) -> Tuple[ - Union[List[EntryResource], EntryResource, None], int, bool, Set[str], Set[str] - ]: + ) -> Tuple[Union[List[EntryResource], EntryResource, None], int, bool, Set[str]]: """ Fetches results and indicates if more data is available. @@ -145,6 +166,14 @@ def find( criteria, single_entry ) + exclude_fields = self.all_fields - response_fields + + results = [self.resource_mapper.map_back(doc) for doc in results] + self.check_and_add_missing_fields(results, response_fields) + + if results: + results = self.resource_mapper.deserialize(results) + if single_entry: results = results[0] if results else None @@ -153,10 +182,20 @@ def find( detail=f"Instead of a single entry, {data_returned} entries were found", ) - exclude_fields = self.all_fields - response_fields + return (results, data_returned, more_data_available, exclude_fields) + + def check_and_add_missing_fields(self, results: dict, response_fields: set): + """Checks whether the response_fields and mandatory fields are present. + If they are not present the values are set to None, so the deserialization works correctly. + It also checks whether all fields in the response have been defined either in the model or in the config file. + If not it raises an appropriate error or warning.""" include_fields = ( response_fields - self.resource_mapper.TOP_LEVEL_NON_ATTRIBUTES_FIELDS - ) + ) | set(self.get_non_optional_fields()) + # Include missing fields + for result in results: + for field in include_fields: + self.set_field_to_none_if_missing_in_dict(result["attributes"], field) bad_optimade_fields = set() bad_provider_fields = set() @@ -183,17 +222,6 @@ def find( detail=f"Unrecognised OPTIMADE field(s) in requested `response_fields`: {bad_optimade_fields}." ) - if results: - results = self.resource_mapper.deserialize(results) - - return ( - results, - data_returned, - more_data_available, - exclude_fields, - include_fields, - ) - @abstractmethod def _run_db_query( self, criteria: Dict[str, Any], single_entry: bool = False @@ -236,6 +264,29 @@ def all_fields(self) -> Set[str]: return self._all_fields + @lru_cache(maxsize=4) + def get_non_optional_fields(self) -> List[str]: + """ + Returns those fields that should be set before a response class can be initialized. + + Returns: + Property names. + """ + + schema = self.get_schema() + attributes = schema["properties"]["attributes"] + if "$ref" in attributes: + path = attributes["$ref"].split("/")[1:] + attributes = schema.copy() + while path: + next_key = path.pop(0) + if next_key not in attributes: + return [] + attributes = attributes[next_key] + + return attributes["required"] + + @lru_cache(maxsize=4) def get_attribute_fields(self) -> Set[str]: """Get the set of attribute fields @@ -252,7 +303,7 @@ def get_attribute_fields(self) -> Set[str]: """ - schema = self.resource_cls.schema() + schema = self.get_schema() attributes = schema["properties"]["attributes"] if "allOf" in attributes: allOf = attributes.pop("allOf") @@ -266,6 +317,10 @@ def get_attribute_fields(self) -> Set[str]: attributes = attributes[next_key] return set(attributes["properties"].keys()) + @lru_cache(maxsize=4) + def get_schema(self): + return self.resource_cls.schema() + def handle_query_params( self, params: Union[EntryListingQueryParams, SingleEntryQueryParams] ) -> Dict[str, Any]: @@ -319,16 +374,15 @@ def handle_query_params( cursor_kwargs["limit"] = CONFIG.page_limit # response_fields - cursor_kwargs["projection"] = { - f"{self.resource_mapper.get_backend_field(f)}": True - for f in self.all_fields - } - if getattr(params, "response_fields", False): response_fields = set(params.response_fields.split(",")) response_fields |= self.resource_mapper.get_required_fields() else: response_fields = self.all_fields.copy() + cursor_kwargs["projection"] = { + f"{self.resource_mapper.get_backend_field(f)}": True + for f in response_fields + } cursor_kwargs["fields"] = response_fields diff --git a/optimade/server/mappers/entries.py b/optimade/server/mappers/entries.py index 7605d6805..19142bb86 100644 --- a/optimade/server/mappers/entries.py +++ b/optimade/server/mappers/entries.py @@ -1,7 +1,7 @@ -from typing import Tuple, Optional, Type, Set, Dict, Any, Union, List, Iterable +from typing import Tuple, Optional, Type, Set, Dict, Any, List, Iterable from functools import lru_cache import warnings - +from optimade.server.config import CONFIG from optimade.models.entries import EntryResource __all__ = ("BaseResourceMapper",) @@ -16,6 +16,72 @@ def get_value(dict, real): return get_value(dict[split_real[0]], split_real[1]) +def write_to_nested_dict(dictionary: dict, composite_key: str, value: Any): + """Puts a value into an arbitrary position in a nested dict. + + Arguments: + dictionary: the dictionary to which the value should be added under the composite_key. + composite_key: The key under which the value should be stored. The sub_keys should be separated by a ".". + e.g. "outer_level_key.inner_level_key" + value: The value that should be stored in the dictionary. + + """ + if "." in composite_key: + split_key = composite_key.split(".", 1) + if split_key[0] not in dictionary: + dictionary[split_key[0]] = {} + write_to_nested_dict(dictionary[split_key[0]], split_key[1], value) + else: + dictionary[composite_key] = value + + +def read_from_nested_dict(dictionary: dict, composite_key: str) -> Any: + """Reads a value from an arbitrary position in a nested dict. + + Arguments: + dictionary: the dictionary from which the value under the composite_key should be read . + composite_key: The key under which the value should be read. The sub_keys should be separated by a ".". + e.g. "outer_level_key.inner_level_key" + + Returns: + The value as stored in the dictionary. If the value is not stored in the dictionary it returns None. + A boolean. True indicates that the composite_key was present in the dictionary, False that it is not present. + + """ + split_key = composite_key.split(".", 1) + if split_key[0] in dictionary: + if len(split_key) > 1: + return read_from_nested_dict(dictionary[split_key[0]], split_key[1]) + else: + return dictionary[composite_key], True + return None, False + + +def remove_from_nested_dict(dictionary: dict, composite_key: str): + """Removes an entry from an arbitrary position in a nested dict. + + Arguments: + dictionary: the dictionary from which the composite key should be removed. + composite_key: The key that should be removed. The sub_keys should be separated by a ".". + e.g. "outer_level_key.inner_level_key" + If the removal of key causes the dictionary one level up to be empty it is removed as well. + """ + split_key = composite_key.split(".", 1) + if split_key[0] in dictionary: + if len(split_key) > 1: + empty = remove_from_nested_dict(dictionary[split_key[0]], split_key[1]) + if empty: + return remove_from_nested_dict(dictionary, split_key[0]) + else: + return False + else: + del dictionary[composite_key] + if not dictionary: + return True + else: + return False + + class classproperty(property): """A simple extension of the property decorator that binds to types rather than instances. @@ -87,36 +153,61 @@ class BaseResourceMapper: @classmethod @lru_cache(maxsize=1) - def all_aliases(cls) -> Iterable[Tuple[str, str]]: - """Returns all of the associated aliases for this entry type, + def all_prefixed_fields(cls) -> Iterable[Tuple[str, str]]: + """Returns all of the prefixed, unprefixed field name pairs, including those defined by the server config. The first member - of each tuple is the OPTIMADE-compliant field name, the second - is the backend-specific field name. + of each tuple is the prefixed field name, the second + is the field name as presented in the optimade database without prefix. Returns: - A tuple of alias tuples. + A list of alias tuples. """ - from optimade.server.config import CONFIG - - return ( - tuple( - (f"_{CONFIG.provider.prefix}_{field}", field) + field_list = ( + [ + field for field in CONFIG.provider_fields.get(cls.ENDPOINT, []) if isinstance(field, str) - ) - + tuple( - (f"_{CONFIG.provider.prefix}_{field['name']}", field["name"]) + ] + + [ + field["name"] for field in CONFIG.provider_fields.get(cls.ENDPOINT, []) if isinstance(field, dict) - ) - + tuple( - (f"_{CONFIG.provider.prefix}_{field}", field) - for field in cls.PROVIDER_FIELDS - ) - + tuple(CONFIG.aliases.get(cls.ENDPOINT, {}).items()) - + cls.ALIASES + ] + + list(cls.PROVIDER_FIELDS) ) + prefixed_field_pairs = [] + for field in field_list: + split_field = field.split( + ".", 1 + ) # For now I assume there are no nested dictionaries for the official Optimade fields + if split_field[0] in cls.ENTRY_RESOURCE_ATTRIBUTES: + prefixed_field_pairs.append( + ( + f"{split_field[0]}._{CONFIG.provider.prefix}_{split_field[1]}", + field, + ) + ) + else: + prefixed_field_pairs.append( + (f"_{CONFIG.provider.prefix}_{field}", field) + ) + return prefixed_field_pairs + + @classmethod + @lru_cache(maxsize=1) + def all_aliases(cls) -> Iterable[Tuple[str, str]]: + """Returns all of the associated aliases for this entry type, + including those defined by the server config. The first member + of each tuple is the field name as presented in the optimade database without prefix, the second + is the backend-specific field name. + + Returns: + A tuple of alias tuples. + + """ + + return tuple(CONFIG.aliases.get(cls.ENDPOINT, {}).items()) + cls.ALIASES @classproperty @lru_cache(maxsize=1) @@ -130,14 +221,12 @@ def SUPPORTED_PREFIXES(cls) -> Set[str]: domain-specific terms). """ - from optimade.server.config import CONFIG return {CONFIG.provider.prefix} @classproperty def ALL_ATTRIBUTES(cls) -> Set[str]: """Returns all attributes served by this entry.""" - from optimade.server.config import CONFIG return ( set(cls.ENTRY_RESOURCE_ATTRIBUTES) @@ -185,7 +274,6 @@ def all_length_aliases(cls) -> Iterable[Tuple[str, str]]: A tuple of length alias tuples. """ - from optimade.server.config import CONFIG return cls.LENGTH_ALIASES + tuple( CONFIG.length_aliases.get(cls.ENDPOINT, {}).items() @@ -206,6 +294,24 @@ def length_alias_for(cls, field: str) -> Optional[str]: """ return dict(cls.all_length_aliases()).get(field, None) + @classmethod + def get_map_field_from_dict(cls, field: str, aliases: dict): + """Replaces (part of) the field_name "field" with the matching field in the dictionary dict" + It first tries to find the entire field name(incl. subfields(which are separated by:".")) in the dictionary. + If it is not present it removes the deepest nesting level and checks again. + If the field occurs in the dictionary it is replaced by the value in the dictionary. + Any unmatched subfields are appended. + """ + split = field.split(".") + for i in range(len(split), 0, -1): + field_path = ".".join(split[0:i]) + if field_path in aliases: + field = aliases.get(field_path) + if split[i:]: + field += "." + ".".join(split[i:]) + break + return field + @classmethod @lru_cache(maxsize=128) def get_backend_field(cls, optimade_field: str) -> str: @@ -216,9 +322,12 @@ def get_backend_field(cls, optimade_field: str) -> str: Aliases are read from [`all_aliases()`][optimade.server.mappers.entries.BaseResourceMapper.all_aliases]. - If a dot-separated OPTIMADE field is provided, e.g., `species.mass`, only the first part will be mapped. + If a dot-separated field is provided, the mapper first looks for that field. + If it is not present in the aliases it repeats the search with one nesting level less untill the field is found. + If the field is not found, the unmodified `optimade_field` is returned. + This means for an (OPTIMADE, DB) alias of (`species`, `kinds`), `get_backend_fields("species.mass")` - will return `kinds.mass`. + will return `kinds.mass` as there is no specific entry for "species.mass" in the aliases Arguments: optimade_field: The OPTIMADE field to attempt to map to the backend-specific field. @@ -235,11 +344,9 @@ def get_backend_field(cls, optimade_field: str) -> str: The mapped field name to be used in the query to the backend. """ - split = optimade_field.split(".") - alias = dict(cls.all_aliases()).get(split[0], None) - if alias is not None: - return alias + ("." + ".".join(split[1:]) if len(split) > 1 else "") - return optimade_field + prefixed = dict(cls.all_prefixed_fields()) + optimade_field = cls.get_map_field_from_dict(optimade_field, prefixed) + return cls.get_map_field_from_dict(optimade_field, dict(cls.all_aliases())) @classmethod @lru_cache(maxsize=128) @@ -268,9 +375,11 @@ def alias_for(cls, field: str) -> str: def get_optimade_field(cls, backend_field: str) -> str: """Return the corresponding OPTIMADE field name for the underlying database field, ready to be used to construct the OPTIMADE-compliant JSON response. + !!Warning!! Incase a backend_field maps to multiple OPTIMADE fields, only one of the fields is returned. Aliases are read from [`all_aliases()`][optimade.server.mappers.entries.BaseResourceMapper.all_aliases]. + [`all_prefixed_fields()`][optimade.server.mappers.entries.BaseResourceMapper.all_prefixed_fields] Arguments: backend_field: The backend field to attempt to map to an OPTIMADE field. @@ -287,9 +396,13 @@ def get_optimade_field(cls, backend_field: str) -> str: The mapped field name to be used in an OPTIMADE-compliant response. """ - return {alias: real for real, alias in cls.all_aliases()}.get( - backend_field, backend_field + # first map the property back to the field as presented in the optimade database. + inv_alias_dict = dict((real, alias) for alias, real in cls.all_aliases()) + backend_field = cls.get_map_field_from_dict(backend_field, inv_alias_dict) + inv_prefix_dict = dict( + (real, alias) for alias, real in cls.all_prefixed_fields() ) + return cls.get_map_field_from_dict(backend_field, inv_prefix_dict) @classmethod @lru_cache(maxsize=128) @@ -327,31 +440,22 @@ def get_required_fields(cls) -> set: @classmethod def map_back(cls, doc: dict) -> dict: - """Map properties from MongoDB to OPTIMADE. + """Map properties in a dictionary to the OPTIMADE fields. - Starting from a MongoDB document `doc`, map the DB fields to the corresponding OPTIMADE fields. + Starting from a document `doc`, map the DB fields to the corresponding OPTIMADE fields. Then, the fields are all added to the top-level field "attributes", with the exception of other top-level fields, defined in `cls.TOP_LEVEL_NON_ATTRIBUTES_FIELDS`. All fields not in `cls.TOP_LEVEL_NON_ATTRIBUTES_FIELDS` + "attributes" will be removed. Finally, the `type` is given the value of the specified `cls.ENDPOINT`. Parameters: - doc: A resource object in MongoDB format. + doc: A resource object. Returns: A resource object in OPTIMADE format. """ - mapping = ((real, alias) for alias, real in cls.all_aliases()) - newdoc = {} - reals = {real.split(".", 1)[0] for alias, real in cls.all_aliases()} - for key in doc: - if key not in reals: - newdoc[key] = doc[key] - for real, alias in mapping: - value, found = get_value(doc, real) - if found: - newdoc[alias] = value + newdoc = cls.add_alias_and_prefix(doc) if "attributes" in newdoc: raise Exception("Will overwrite doc field!") @@ -371,10 +475,41 @@ def map_back(cls, doc: dict) -> dict: return newdoc @classmethod - def deserialize( - cls, results: Union[dict, Iterable[dict]] - ) -> Union[List[EntryResource], EntryResource]: - if isinstance(results, dict): - return cls.ENTRY_RESOURCE_CLASS(**cls.map_back(results)) + def add_alias_and_prefix(cls, doc: dict): + """Converts a dictionary with field names that match the backend database with the field names that are presented in the OPTIMADE database. + The way these fields are converted is read from: + [`all_aliases()`][optimade.server.mappers.entries.BaseResourceMapper.all_aliases]. + [`all_prefixed_fields()`][optimade.server.mappers.entries.BaseResourceMapper.all_prefixed_fields] + + Parameters: + doc: A dictionary with the backend fields. - return [cls.ENTRY_RESOURCE_CLASS(**cls.map_back(doc)) for doc in results] + Returns: + A dictionary with the fieldnames as presented by OPTIMADE + """ + newdoc = {} + mod_doc = doc.copy() + # First apply all the aliases (They are sorted so the deepest nesting level occurs first.) + sorted_aliases = sorted(cls.all_aliases(), key=lambda ele: ele[0], reverse=True) + for alias, real in sorted_aliases: + value, found = read_from_nested_dict(mod_doc, real) + if not found: + # Some backend fields are used for more than one optimade field. As they are deleted from mod_doc the first time they are mapped we need a backup option to read the data. + value, found = read_from_nested_dict(doc, real) + if found: + write_to_nested_dict(newdoc, alias, value) + remove_from_nested_dict(mod_doc, real) + # move fields without alias to new doc + newdoc.update(mod_doc) + # apply prefixes + for prefixed_field, unprefixed_field in cls.all_prefixed_fields(): + value, found = read_from_nested_dict(newdoc, unprefixed_field) + if found: + write_to_nested_dict(newdoc, prefixed_field, value) + remove_from_nested_dict(newdoc, unprefixed_field) + + return newdoc + + @classmethod + def deserialize(cls, results: Iterable[dict]) -> List[EntryResource]: + return [cls.ENTRY_RESOURCE_CLASS(**result) for result in results] diff --git a/optimade/server/routers/utils.py b/optimade/server/routers/utils.py index a56d030d4..26820d4b1 100644 --- a/optimade/server/routers/utils.py +++ b/optimade/server/routers/utils.py @@ -26,7 +26,7 @@ __all__ = ( "BASE_URL_PREFIXES", "meta_values", - "handle_response_fields", + "remove_exclude_fields", "get_included_relationships", "get_base_url", "get_entries", @@ -91,10 +91,9 @@ def meta_values( ) -def handle_response_fields( +def remove_exclude_fields( results: Union[List[EntryResource], EntryResource], exclude_fields: Set[str], - include_fields: Set[str], ) -> List[Dict[str, Any]]: """Handle query parameter `response_fields`. @@ -103,8 +102,6 @@ def handle_response_fields( Parameters: exclude_fields: Fields under `attributes` to be excluded from the response. - include_fields: Fields under `attributes` that were requested that should be - set to null if missing in the entry. Returns: List of resulting resources as dictionaries after pruning according to @@ -123,11 +120,6 @@ def handle_response_fields( if field in new_entry["attributes"]: del new_entry["attributes"][field] - # Include missing fields that were requested in `response_fields` - for field in include_fields: - if field not in new_entry["attributes"]: - new_entry["attributes"][field] = None - new_results.append(new_entry) return new_results @@ -203,7 +195,7 @@ def get_included_relationships( ) # still need to handle pagination - ref_results, _, _, _, _ = ENTRY_COLLECTIONS[entry_type].find(params) + ref_results, _, _, _ = ENTRY_COLLECTIONS[entry_type].find(params) included[entry_type] = ref_results # flatten dict by endpoint to list @@ -245,8 +237,7 @@ def get_entries( results, data_returned, more_data_available, - fields, - include_fields, + exclude_fields, ) = collection.find(params) include = [] @@ -265,8 +256,8 @@ def get_entries( else: links = ToplevelLinks(next=None) - if fields or include_fields: - results = handle_response_fields(results, fields, include_fields) + if exclude_fields: + results = remove_exclude_fields(results, exclude_fields) return response( links=links, @@ -299,8 +290,7 @@ def get_single_entry( results, data_returned, more_data_available, - fields, - include_fields, + exclude_fields, ) = collection.find(params) include = [] @@ -315,8 +305,8 @@ def get_single_entry( links = ToplevelLinks(next=None) - if fields or include_fields and results is not None: - results = handle_response_fields(results, fields, include_fields)[0] + if exclude_fields and results is not None: + results = remove_exclude_fields(results, exclude_fields)[0] return response( links=links, diff --git a/optimade/validator/validator.py b/optimade/validator/validator.py index a0444593b..5f7fbe244 100644 --- a/optimade/validator/validator.py +++ b/optimade/validator/validator.py @@ -559,6 +559,7 @@ def _check_response_fields(self, endp: str, fields: List[str]) -> Tuple[bool, st subset_fields = random.sample(fields, min(len(fields) - 1, 3)) test_query = f"{endp}?response_fields={','.join(subset_fields)}&page_limit=1" response, _ = self._get_endpoint(test_query, multistage=True) + subset_fields = [field.split(".")[0] for field in subset_fields] if response and len(response.json()["data"]) >= 0: doc = response.json()["data"][0] diff --git a/tests/server/routers/test_info.py b/tests/server/routers/test_info.py index 397e3cb4d..dbb0419b8 100644 --- a/tests/server/routers/test_info.py +++ b/tests/server/routers/test_info.py @@ -86,7 +86,10 @@ def test_info_structures_unit(self): if field in unit_fields: assert "unit" in info_keys, f"Field: {field}" else: - assert "unit" not in info_keys, f"Field: {field}" + if not field.startswith( + "_" + ): # database specific properties can also have units + assert "unit" not in info_keys, f"Field: {field}" def test_provider_fields(self): """Check the presence of provider-specific fields""" diff --git a/tests/server/routers/test_references.py b/tests/server/routers/test_references.py index c40c8e195..7dd06360a 100644 --- a/tests/server/routers/test_references.py +++ b/tests/server/routers/test_references.py @@ -23,6 +23,12 @@ class TestSingleReferenceEndpointDifficult(RegularEndpointTests): response_cls = ReferenceResponseOne +class TestResponseFields(RegularEndpointTests): + + request_str = "references?response_fields=volume,month,organization&page_limit=1" + response_cls = ReferenceResponseMany + + class TestMissingSingleReferenceEndpoint(RegularEndpointTests): """Tests for /references/ for unknown """ diff --git a/tests/server/routers/test_structures.py b/tests/server/routers/test_structures.py index c83dd4a20..eefdfe4a6 100644 --- a/tests/server/routers/test_structures.py +++ b/tests/server/routers/test_structures.py @@ -67,6 +67,16 @@ def test_structures_endpoint_data(self): assert self.json_response["data"]["type"] == "structures" assert "attributes" in self.json_response["data"] assert "_exmpl_chemsys" in self.json_response["data"]["attributes"] + assert ( + "density" + in self.json_response["data"]["attributes"]["_exmpl_physical_properties"] + ) + assert ( + self.json_response["data"]["attributes"]["_exmpl_physical_properties"][ + "density" + ] + == 10.07 + ) def test_check_response_single_structure(check_response): diff --git a/tests/server/test_mappers.py b/tests/server/test_mappers.py index ed90ad040..9876d5718 100644 --- a/tests/server/test_mappers.py +++ b/tests/server/test_mappers.py @@ -24,14 +24,37 @@ class MyMapper(mapper(MAPPER)): def test_property_aliases(mapper): class MyMapper(mapper(MAPPER)): - PROVIDER_FIELDS = ("dft_parameters", "test_field") + PROVIDER_FIELDS = ( + "dft_parameters", + "test_field", + "database_specific_field", + "species.oxidation_state", + ) LENGTH_ALIASES = (("_exmpl_test_field", "test_field_len"),) - ALIASES = (("field", "completely_different_field"),) + ALIASES = ( + ("field", "completely_different_field"), + ("species", "particle_type"), + ("species.name", "particles.class"), + ("database_specific_field", "backend.subfield.sub_sub_field"), + ) + ENTRY_RESOURCE_ATTRIBUTES = { + "species": 42 + } # This is not a proper value for ENTRY_RESOURCE_ATTRIBUTES but we need ut to test allowing database specific properties within optimade dictionary fields. mapper = MyMapper() assert mapper.get_backend_field("_exmpl_dft_parameters") == "dft_parameters" assert mapper.get_backend_field("_exmpl_test_field") == "test_field" assert mapper.get_backend_field("field") == "completely_different_field" + assert mapper.get_backend_field("species.mass") == "particle_type.mass" + assert ( + mapper.get_backend_field("species.oxidation_state") + == "particle_type.oxidation_state" + ) + assert mapper.get_backend_field("species.name") == "particles.class" + assert ( + mapper.get_backend_field("_exmpl_database_specific_field") + == "backend.subfield.sub_sub_field" + ) assert mapper.length_alias_for("_exmpl_test_field") == "test_field_len" assert mapper.length_alias_for("test_field") is None assert mapper.get_backend_field("test_field") == "test_field" @@ -42,6 +65,16 @@ class MyMapper(mapper(MAPPER)): assert mapper.get_optimade_field("test_field") == "_exmpl_test_field" assert mapper.get_optimade_field("completely_different_field") == "field" assert mapper.get_optimade_field("nonexistent_field") == "nonexistent_field" + assert mapper.get_optimade_field("particles.class") == "species.name" + assert mapper.get_optimade_field("particle_type.mass") == "species.mass" + assert ( + mapper.get_optimade_field("backend.subfield.sub_sub_field") + == "_exmpl_database_specific_field" + ) + assert ( + mapper.get_optimade_field("particle_type.oxidation_state") + == "species._exmpl_oxidation_state" + ) with pytest.warns(DeprecationWarning): assert mapper.alias_of("nonexistent_field") == "nonexistent_field" @@ -79,3 +112,19 @@ class MyMapper(mapper(MAPPER)): } output_dict = mapper.map_back(input_dict) assert output_dict["attributes"]["some_field"] == 42 + + +def test_map_back_to_nested_field(mapper): + class MyMapper(mapper(MAPPER)): + ALIASES = (("some_field.subfield", "main_field.nested_field.field_we_need"),) + + mapper = MyMapper() + input_dict = { + "main_field": { + "nested_field": {"field_we_need": 42, "other_field": 78}, + "another_nested_field": 89, + }, + "secondary_field": 52, + } + output_dict = mapper.map_back(input_dict) + assert output_dict["attributes"]["some_field"]["subfield"] == 42 diff --git a/tests/test_config.json b/tests/test_config.json index 67180f77f..39ce39c07 100644 --- a/tests/test_config.json +++ b/tests/test_config.json @@ -18,7 +18,8 @@ "provider_fields": { "structures": [ "band_gap", - {"name": "chemsys", "type": "string", "description": "A string representing the chemical system in an ordered fashion"} + {"name": "chemsys", "type": "string", "description": "A string representing the chemical system in an ordered fashion"}, + {"name":"physical_properties.density", "type": "float", "description": "The density of the material.", "unit": "kg/L"} ] }, "aliases": { @@ -28,7 +29,8 @@ "chemical_formula_descriptive": "pretty_formula", "chemical_formula_reduced": "pretty_formula", "chemical_formula_anonymous": "formula_anonymous", - "chemical_formula_hill": "fancy_formulas.hill" + "chemical_formula_hill": "fancy_formulas.hill", + "physical_properties.density": "dichtheid" } }, "length_aliases": { From 5cec6d64b907c400efb462e439707715739ba63e Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Mon, 14 Nov 2022 10:08:42 +0100 Subject: [PATCH 04/20] Added type hint to alias_and_prefix function. --- optimade/server/mappers/entries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimade/server/mappers/entries.py b/optimade/server/mappers/entries.py index 19142bb86..b4f21eb79 100644 --- a/optimade/server/mappers/entries.py +++ b/optimade/server/mappers/entries.py @@ -475,7 +475,7 @@ def map_back(cls, doc: dict) -> dict: return newdoc @classmethod - def add_alias_and_prefix(cls, doc: dict): + def add_alias_and_prefix(cls, doc: dict) -> dict: """Converts a dictionary with field names that match the backend database with the field names that are presented in the OPTIMADE database. The way these fields are converted is read from: [`all_aliases()`][optimade.server.mappers.entries.BaseResourceMapper.all_aliases]. From cbba008b625596409a5c90de22e527413b60b3fa Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Mon, 14 Nov 2022 10:28:55 +0100 Subject: [PATCH 05/20] Added missing.. --- optimade/server/mappers/entries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimade/server/mappers/entries.py b/optimade/server/mappers/entries.py index b4f21eb79..4ea3182ee 100644 --- a/optimade/server/mappers/entries.py +++ b/optimade/server/mappers/entries.py @@ -327,7 +327,7 @@ def get_backend_field(cls, optimade_field: str) -> str: If the field is not found, the unmodified `optimade_field` is returned. This means for an (OPTIMADE, DB) alias of (`species`, `kinds`), `get_backend_fields("species.mass")` - will return `kinds.mass` as there is no specific entry for "species.mass" in the aliases + will return `kinds.mass` as there is no specific entry for "species.mass" in the aliases. Arguments: optimade_field: The OPTIMADE field to attempt to map to the backend-specific field. From 67f2dea8be6baaea9b7ab9480f81da8d527e9be5 Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Tue, 15 Nov 2022 17:11:04 +0100 Subject: [PATCH 06/20] Fixed bug in elastic search where the requested fields were determined again eventhough they had already been determined in find in entry_collections.py --- optimade/server/entry_collections/elasticsearch.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/optimade/server/entry_collections/elasticsearch.py b/optimade/server/entry_collections/elasticsearch.py index 75590525a..e7176f491 100644 --- a/optimade/server/entry_collections/elasticsearch.py +++ b/optimade/server/entry_collections/elasticsearch.py @@ -169,9 +169,7 @@ def _run_db_query( page_offset = criteria.get("skip", 0) limit = criteria.get("limit", CONFIG.page_limit) - all_aliased_fields = [ - self.resource_mapper.get_backend_field(field) for field in self.all_fields - ] + all_aliased_fields = [field for field in criteria.get("projection", [])] search = search.source(includes=all_aliased_fields) elastic_sort = [ From 775009de0749b504357b8b09e9a979c92c2fb990 Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Tue, 15 Nov 2022 18:01:29 +0100 Subject: [PATCH 07/20] Use https://github.com/pycqa/flake8 instead of https://gitlab.com/pycqa/flake8 --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 320af4767..a3fc5359d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,7 +23,7 @@ repos: - id: trailing-whitespace args: [--markdown-linebreak-ext=md] - - repo: https://gitlab.com/pycqa/flake8 + - repo: https://github.com/pycqa/flake8 rev: '3.9.2' hooks: - id: flake8 From 1358b9d5a9585aafda4b623a91271c7ff3a079f4 Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 16 Nov 2022 09:26:05 +0100 Subject: [PATCH 08/20] Added pyyaml to install requirements and added python 3.11 classifier. --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 1bcc37ad7..4aee2b26c 100644 --- a/setup.py +++ b/setup.py @@ -93,6 +93,7 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Intended Audience :: Developers", "Topic :: Database", "Topic :: Database :: Database Engines/Servers", @@ -105,6 +106,7 @@ "pydantic~=1.10,>=1.10.2", "email_validator~=1.2", "requests~=2.28", + "pyyaml~=5.4", ], extras_require={ "all": all_deps, From bb228ee0c1c9731c964b68ae61f282309f16ebd8 Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 16 Nov 2022 10:06:08 +0100 Subject: [PATCH 09/20] Made a small change to the descriptions of the nested dict functions to trigger tests. --- optimade/server/mappers/entries.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/optimade/server/mappers/entries.py b/optimade/server/mappers/entries.py index 4ea3182ee..adc229c01 100644 --- a/optimade/server/mappers/entries.py +++ b/optimade/server/mappers/entries.py @@ -21,7 +21,7 @@ def write_to_nested_dict(dictionary: dict, composite_key: str, value: Any): Arguments: dictionary: the dictionary to which the value should be added under the composite_key. - composite_key: The key under which the value should be stored. The sub_keys should be separated by a ".". + composite_key: The key under which the value should be stored. The sub keys should be separated by a ".". e.g. "outer_level_key.inner_level_key" value: The value that should be stored in the dictionary. @@ -40,7 +40,7 @@ def read_from_nested_dict(dictionary: dict, composite_key: str) -> Any: Arguments: dictionary: the dictionary from which the value under the composite_key should be read . - composite_key: The key under which the value should be read. The sub_keys should be separated by a ".". + composite_key: The key under which the value should be read. The sub keys should be separated by a ".". e.g. "outer_level_key.inner_level_key" Returns: @@ -62,7 +62,7 @@ def remove_from_nested_dict(dictionary: dict, composite_key: str): Arguments: dictionary: the dictionary from which the composite key should be removed. - composite_key: The key that should be removed. The sub_keys should be separated by a ".". + composite_key: The key that should be removed. The sub keys should be separated by a ".". e.g. "outer_level_key.inner_level_key" If the removal of key causes the dictionary one level up to be empty it is removed as well. """ From 8245884daaa1a302024a8f114769e939829b6d87 Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 16 Nov 2022 10:33:43 +0100 Subject: [PATCH 10/20] Removed pyyaml from serverdeps as I placed it already under installrequirements. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4aee2b26c..eeea7e483 100644 --- a/setup.py +++ b/setup.py @@ -106,7 +106,7 @@ "pydantic~=1.10,>=1.10.2", "email_validator~=1.2", "requests~=2.28", - "pyyaml~=5.4", + "pyyaml>=5.4, <7", ], extras_require={ "all": all_deps, From 7b2320b4fc361d2462ad298a51340e5743ac2e06 Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 16 Nov 2022 10:55:53 +0100 Subject: [PATCH 11/20] Removed get_value function from entries.py as it was no longer needed and removed fallback value for get_non_optional_fields. --- optimade/server/entry_collections/entry_collections.py | 3 --- optimade/server/mappers/entries.py | 9 --------- 2 files changed, 12 deletions(-) diff --git a/optimade/server/entry_collections/entry_collections.py b/optimade/server/entry_collections/entry_collections.py index 97646ffb7..41f790fea 100644 --- a/optimade/server/entry_collections/entry_collections.py +++ b/optimade/server/entry_collections/entry_collections.py @@ -280,10 +280,7 @@ def get_non_optional_fields(self) -> List[str]: attributes = schema.copy() while path: next_key = path.pop(0) - if next_key not in attributes: - return [] attributes = attributes[next_key] - return attributes["required"] @lru_cache(maxsize=4) diff --git a/optimade/server/mappers/entries.py b/optimade/server/mappers/entries.py index adc229c01..8efae401f 100644 --- a/optimade/server/mappers/entries.py +++ b/optimade/server/mappers/entries.py @@ -7,15 +7,6 @@ __all__ = ("BaseResourceMapper",) -def get_value(dict, real): - split_real = real.split(".", 1) - if not split_real[0] in dict: - return None, False - if len(split_real) == 1: - return dict[split_real[0]], True - return get_value(dict[split_real[0]], split_real[1]) - - def write_to_nested_dict(dictionary: dict, composite_key: str, value: Any): """Puts a value into an arbitrary position in a nested dict. From d87bc1bffb3f787c97393698865b8592fe4a5aeb Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 16 Nov 2022 10:55:53 +0100 Subject: [PATCH 12/20] Removed get_value function from entries.py as it was no longer needed and removed fallback value for get_non_optional_fields. --- optimade/server/entry_collections/entry_collections.py | 3 --- optimade/server/mappers/entries.py | 9 --------- setup.py | 3 +-- 3 files changed, 1 insertion(+), 14 deletions(-) diff --git a/optimade/server/entry_collections/entry_collections.py b/optimade/server/entry_collections/entry_collections.py index 97646ffb7..41f790fea 100644 --- a/optimade/server/entry_collections/entry_collections.py +++ b/optimade/server/entry_collections/entry_collections.py @@ -280,10 +280,7 @@ def get_non_optional_fields(self) -> List[str]: attributes = schema.copy() while path: next_key = path.pop(0) - if next_key not in attributes: - return [] attributes = attributes[next_key] - return attributes["required"] @lru_cache(maxsize=4) diff --git a/optimade/server/mappers/entries.py b/optimade/server/mappers/entries.py index adc229c01..8efae401f 100644 --- a/optimade/server/mappers/entries.py +++ b/optimade/server/mappers/entries.py @@ -7,15 +7,6 @@ __all__ = ("BaseResourceMapper",) -def get_value(dict, real): - split_real = real.split(".", 1) - if not split_real[0] in dict: - return None, False - if len(split_real) == 1: - return dict[split_real[0]], True - return get_value(dict[split_real[0]], split_real[1]) - - def write_to_nested_dict(dictionary: dict, composite_key: str, value: Any): """Puts a value into an arbitrary position in a nested dict. diff --git a/setup.py b/setup.py index eeea7e483..c8fdd63a1 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,6 @@ mongo_deps = ["pymongo>=3.12.1,<5", "mongomock~=4.1"] server_deps = [ "uvicorn~=0.19", - "pyyaml>=5.4,<7", # Keep at pyyaml 5.4 for aiida-core support ] + mongo_deps @@ -106,7 +105,7 @@ "pydantic~=1.10,>=1.10.2", "email_validator~=1.2", "requests~=2.28", - "pyyaml>=5.4, <7", + "pyyaml>=5.4, <7", # Keep at pyyaml 5.4 for aiida-core support ], extras_require={ "all": all_deps, From 6b99822bdeb16296b7424916225ab41721db03c7 Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 16 Nov 2022 13:15:48 +0100 Subject: [PATCH 13/20] Simplified set_field_to_none_if_missing_in_dict and moved it out of the class. --- .../entry_collections/entry_collections.py | 43 ++++++++----------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/optimade/server/entry_collections/entry_collections.py b/optimade/server/entry_collections/entry_collections.py index 41f790fea..140e17254 100644 --- a/optimade/server/entry_collections/entry_collections.py +++ b/optimade/server/entry_collections/entry_collections.py @@ -18,6 +18,24 @@ ) +def set_field_to_none_if_missing_in_dict(entry: dict, field: str): + from optimade.server.mappers.entries import ( + read_from_nested_dict, + write_to_nested_dict, + ) + + _, present = read_from_nested_dict(entry, field) + if not present: + split_field = field.split(".", 1) + # It would be nice if there would be a more universal way to handle special cases like this. + if split_field[0] == "structure_features": + value = [] + else: + value = None + write_to_nested_dict(entry, field, value) + return entry + + def create_collection( name: str, resource_cls: EntryResource, resource_mapper: BaseResourceMapper ) -> "EntryCollection": @@ -117,29 +135,6 @@ def count(self, **kwargs: Any) -> int: """ - def set_field_to_none_if_missing_in_dict(self, entry: dict, field: str): - - split_field = field.split(".", 1) - if split_field[0] in entry: - if len(split_field) > 1: - self.set_field_to_none_if_missing_in_dict( - entry[split_field[0]], split_field[1] - ) - else: - if len(split_field) == 1: - if ( - split_field[0] == "structure_features" - ): # It would be nice if there would be a more universal way to handle special cases like this. - entry[split_field[0]] = [] - else: - entry[split_field[0]] = None - else: - entry[split_field[0]] = {} - self.set_field_to_none_if_missing_in_dict( - entry[split_field[0]], split_field[1] - ) - return entry - def find( self, params: Union[EntryListingQueryParams, SingleEntryQueryParams] ) -> Tuple[Union[List[EntryResource], EntryResource, None], int, bool, Set[str]]: @@ -195,7 +190,7 @@ def check_and_add_missing_fields(self, results: dict, response_fields: set): # Include missing fields for result in results: for field in include_fields: - self.set_field_to_none_if_missing_in_dict(result["attributes"], field) + set_field_to_none_if_missing_in_dict(result["attributes"], field) bad_optimade_fields = set() bad_provider_fields = set() From 841520d8346db96ce29031519d6206e7cb69cb36 Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 16 Nov 2022 13:43:21 +0100 Subject: [PATCH 14/20] Moved functions related to handling nested dicts to utils.py. --- .../entry_collections/entry_collections.py | 21 +---- optimade/server/mappers/entries.py | 71 ++-------------- optimade/utils.py | 83 ++++++++++++++++++- 3 files changed, 88 insertions(+), 87 deletions(-) diff --git a/optimade/server/entry_collections/entry_collections.py b/optimade/server/entry_collections/entry_collections.py index 140e17254..57311b779 100644 --- a/optimade/server/entry_collections/entry_collections.py +++ b/optimade/server/entry_collections/entry_collections.py @@ -16,24 +16,7 @@ UnknownProviderProperty, QueryParamNotUsed, ) - - -def set_field_to_none_if_missing_in_dict(entry: dict, field: str): - from optimade.server.mappers.entries import ( - read_from_nested_dict, - write_to_nested_dict, - ) - - _, present = read_from_nested_dict(entry, field) - if not present: - split_field = field.split(".", 1) - # It would be nice if there would be a more universal way to handle special cases like this. - if split_field[0] == "structure_features": - value = [] - else: - value = None - write_to_nested_dict(entry, field, value) - return entry +from optimade.utils import set_field_to_none_if_missing_in_dict def create_collection( @@ -179,7 +162,7 @@ def find( return (results, data_returned, more_data_available, exclude_fields) - def check_and_add_missing_fields(self, results: dict, response_fields: set): + def check_and_add_missing_fields(self, results: List[dict], response_fields: set): """Checks whether the response_fields and mandatory fields are present. If they are not present the values are set to None, so the deserialization works correctly. It also checks whether all fields in the response have been defined either in the model or in the config file. diff --git a/optimade/server/mappers/entries.py b/optimade/server/mappers/entries.py index 8efae401f..9818828c8 100644 --- a/optimade/server/mappers/entries.py +++ b/optimade/server/mappers/entries.py @@ -3,76 +3,15 @@ import warnings from optimade.server.config import CONFIG from optimade.models.entries import EntryResource +from optimade.utils import ( + write_to_nested_dict, + read_from_nested_dict, + remove_from_nested_dict, +) __all__ = ("BaseResourceMapper",) -def write_to_nested_dict(dictionary: dict, composite_key: str, value: Any): - """Puts a value into an arbitrary position in a nested dict. - - Arguments: - dictionary: the dictionary to which the value should be added under the composite_key. - composite_key: The key under which the value should be stored. The sub keys should be separated by a ".". - e.g. "outer_level_key.inner_level_key" - value: The value that should be stored in the dictionary. - - """ - if "." in composite_key: - split_key = composite_key.split(".", 1) - if split_key[0] not in dictionary: - dictionary[split_key[0]] = {} - write_to_nested_dict(dictionary[split_key[0]], split_key[1], value) - else: - dictionary[composite_key] = value - - -def read_from_nested_dict(dictionary: dict, composite_key: str) -> Any: - """Reads a value from an arbitrary position in a nested dict. - - Arguments: - dictionary: the dictionary from which the value under the composite_key should be read . - composite_key: The key under which the value should be read. The sub keys should be separated by a ".". - e.g. "outer_level_key.inner_level_key" - - Returns: - The value as stored in the dictionary. If the value is not stored in the dictionary it returns None. - A boolean. True indicates that the composite_key was present in the dictionary, False that it is not present. - - """ - split_key = composite_key.split(".", 1) - if split_key[0] in dictionary: - if len(split_key) > 1: - return read_from_nested_dict(dictionary[split_key[0]], split_key[1]) - else: - return dictionary[composite_key], True - return None, False - - -def remove_from_nested_dict(dictionary: dict, composite_key: str): - """Removes an entry from an arbitrary position in a nested dict. - - Arguments: - dictionary: the dictionary from which the composite key should be removed. - composite_key: The key that should be removed. The sub keys should be separated by a ".". - e.g. "outer_level_key.inner_level_key" - If the removal of key causes the dictionary one level up to be empty it is removed as well. - """ - split_key = composite_key.split(".", 1) - if split_key[0] in dictionary: - if len(split_key) > 1: - empty = remove_from_nested_dict(dictionary[split_key[0]], split_key[1]) - if empty: - return remove_from_nested_dict(dictionary, split_key[0]) - else: - return False - else: - del dictionary[composite_key] - if not dictionary: - return True - else: - return False - - class classproperty(property): """A simple extension of the property decorator that binds to types rather than instances. diff --git a/optimade/utils.py b/optimade/utils.py index a7343a28c..a5baf4d90 100644 --- a/optimade/utils.py +++ b/optimade/utils.py @@ -1,10 +1,10 @@ """This submodule implements some useful utilities for dealing -with OPTIMADE providers that can be used in server or client code. +with OPTIMADE providers that can be used in server or client code and for handling nested dicts. """ import json -from typing import List, Iterable +from typing import List, Iterable, Any from pydantic import ValidationError from optimade.models.links import LinksResource @@ -164,3 +164,82 @@ def get_all_databases() -> Iterable[str]: yield str(link.attributes.base_url) except RuntimeError: pass + + +def write_to_nested_dict(dictionary: dict, composite_key: str, value: Any): + """Puts a value into an arbitrary position in a nested dict. + + Arguments: + dictionary: the dictionary to which the value should be added under the composite_key. + composite_key: The key under which the value should be stored. The sub keys should be separated by a ".". + e.g. "outer_level_key.inner_level_key" + value: The value that should be stored in the dictionary. + + """ + if "." in composite_key: + split_key = composite_key.split(".", 1) + if split_key[0] not in dictionary: + dictionary[split_key[0]] = {} + write_to_nested_dict(dictionary[split_key[0]], split_key[1], value) + else: + dictionary[composite_key] = value + + +def read_from_nested_dict(dictionary: dict, composite_key: str) -> Any: + """Reads a value from an arbitrary position in a nested dict. + + Arguments: + dictionary: the dictionary from which the value under the composite_key should be read . + composite_key: The key under which the value should be read. The sub keys should be separated by a ".". + e.g. "outer_level_key.inner_level_key" + + Returns: + The value as stored in the dictionary. If the value is not stored in the dictionary it returns None. + A boolean. True indicates that the composite_key was present in the dictionary, False that it is not present. + + """ + split_key = composite_key.split(".", 1) + if split_key[0] in dictionary: + if len(split_key) > 1: + return read_from_nested_dict(dictionary[split_key[0]], split_key[1]) + else: + return dictionary[composite_key], True + return None, False + + +def remove_from_nested_dict(dictionary: dict, composite_key: str): + """Removes an entry from an arbitrary position in a nested dict. + + Arguments: + dictionary: the dictionary from which the composite key should be removed. + composite_key: The key that should be removed. The sub keys should be separated by a ".". + e.g. "outer_level_key.inner_level_key" + If the removal of key causes the dictionary one level up to be empty it is removed as well. + """ + split_key = composite_key.split(".", 1) + if split_key[0] in dictionary: + if len(split_key) > 1: + empty = remove_from_nested_dict(dictionary[split_key[0]], split_key[1]) + if empty: + return remove_from_nested_dict(dictionary, split_key[0]) + else: + return False + else: + del dictionary[composite_key] + if not dictionary: + return True + else: + return False + + +def set_field_to_none_if_missing_in_dict(entry: dict, field: str): + _, present = read_from_nested_dict(entry, field) + if not present: + split_field = field.split(".", 1) + # It would be nice if there would be a more universal way to handle special cases like this. + if split_field[0] == "structure_features": + value = [] + else: + value = None + write_to_nested_dict(entry, field, value) + return entry From c22089f27bfb389b7ebdc371438cf5128e6ef2a2 Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 16 Nov 2022 14:06:28 +0100 Subject: [PATCH 15/20] Updated docstring remove_exclude_fields and removed unneccesary brackets. --- optimade/server/entry_collections/entry_collections.py | 2 +- optimade/server/routers/utils.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/optimade/server/entry_collections/entry_collections.py b/optimade/server/entry_collections/entry_collections.py index 57311b779..0974e3358 100644 --- a/optimade/server/entry_collections/entry_collections.py +++ b/optimade/server/entry_collections/entry_collections.py @@ -160,7 +160,7 @@ def find( detail=f"Instead of a single entry, {data_returned} entries were found", ) - return (results, data_returned, more_data_available, exclude_fields) + return results, data_returned, more_data_available, exclude_fields def check_and_add_missing_fields(self, results: List[dict], response_fields: set): """Checks whether the response_fields and mandatory fields are present. diff --git a/optimade/server/routers/utils.py b/optimade/server/routers/utils.py index 26820d4b1..91f8cd3ca 100644 --- a/optimade/server/routers/utils.py +++ b/optimade/server/routers/utils.py @@ -95,12 +95,12 @@ def remove_exclude_fields( results: Union[List[EntryResource], EntryResource], exclude_fields: Set[str], ) -> List[Dict[str, Any]]: - """Handle query parameter `response_fields`. + """Removes the fields that are present in exclude_fields from the entries in the results.`. - It is assumed that all fields are under `attributes`. - This is due to all other top-level fields are REQUIRED in the response. + It is assumed that all fields are under `attributes`, because all top-level fields are REQUIRED in the response. Parameters: + results: A list with resources with dictionaries from which the fields in exclude_fields should be removed. exclude_fields: Fields under `attributes` to be excluded from the response. Returns: From 4c46f555a28cbb7b442f175fd605702e328813ab Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Thu, 17 Nov 2022 14:39:23 +0100 Subject: [PATCH 16/20] Updateof the explanation of the handling of nested fields Co-authored-by: Matthew Evans <7916000+ml-evs@users.noreply.github.com> --- docs/getting_started/setting_up_an_api.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started/setting_up_an_api.md b/docs/getting_started/setting_up_an_api.md index 92d39a576..7efaa13fc 100644 --- a/docs/getting_started/setting_up_an_api.md +++ b/docs/getting_started/setting_up_an_api.md @@ -37,7 +37,7 @@ Instead, if you are storing chemical formulae as an unreduced count per simulati This would then instead require option 2 above, namely either the addition of auxiliary fields that store the correct (or mappable) OPTIMADE format in the database, or the creation of a secondary database that returns the pre-converted structures. In the simplest case, the mapper classes can be used to define aliases between fields in the database and the OPTIMADE field name; these can be configured via the [`aliases`][optimade.server.config.ServerConfig.aliases] option as a dictionary mapping stored in a dictionary under the appropriate endpoint name, e.g. `"aliases": {"structures": {"chemical_formula_reduced": "my_chem_form"}}`, or defined as part of a custom mapper class. -Incase the alias is a nested field the field names should be separated by ".". Example: `"aliases": { "structures": {"OPTIMADE_field": "field.nested_field"}}` +If the alias is a nested field (i.e., a field within a dictionary), the field names should be separated by `"."`, for example: `"aliases": { "structures": {"chemical_formula_reduced": "formulae.reduced"}}`. In either option, you should now be able to insert your data into the corresponding MongoDB (or otherwise) collection. From 1e49fb863f670a580d9affce9ff1ff6ea9d29aa2 Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 30 Nov 2022 14:36:33 +0100 Subject: [PATCH 17/20] fix bug introduced by merge with master. --- optimade/server/entry_collections/entry_collections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimade/server/entry_collections/entry_collections.py b/optimade/server/entry_collections/entry_collections.py index 1e3b95b2c..5af264148 100644 --- a/optimade/server/entry_collections/entry_collections.py +++ b/optimade/server/entry_collections/entry_collections.py @@ -156,7 +156,7 @@ def find( results = self.resource_mapper.deserialize(results) if single_entry: - raw_results = raw_results[0] if raw_results else None # type: ignore[assignment] + results = results[0] if results else None # type: ignore[assignment] if data_returned > 1: raise NotFound( From 596bb733673fe1c79384a5197c7820b843e38cd4 Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 30 Nov 2022 15:01:40 +0100 Subject: [PATCH 18/20] Made changes to satisfy mypy. --- optimade/server/entry_collections/entry_collections.py | 2 +- optimade/server/routers/utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/optimade/server/entry_collections/entry_collections.py b/optimade/server/entry_collections/entry_collections.py index 5af264148..2a39a94be 100644 --- a/optimade/server/entry_collections/entry_collections.py +++ b/optimade/server/entry_collections/entry_collections.py @@ -149,7 +149,7 @@ def find( exclude_fields = self.all_fields - response_fields - results = [self.resource_mapper.map_back(doc) for doc in raw_results] + results: List = [self.resource_mapper.map_back(doc) for doc in raw_results] self.check_and_add_missing_fields(results, response_fields) if results: diff --git a/optimade/server/routers/utils.py b/optimade/server/routers/utils.py index 761b6f9a1..3cad26124 100644 --- a/optimade/server/routers/utils.py +++ b/optimade/server/routers/utils.py @@ -260,7 +260,7 @@ def get_entries( links = ToplevelLinks(next=None) if exclude_fields: - results = remove_exclude_fields(results, exclude_fields) + results = remove_exclude_fields(results, exclude_fields) # type: ignore[assignment] return response( links=links, @@ -312,7 +312,7 @@ def get_single_entry( links = ToplevelLinks(next=None) if exclude_fields and results is not None: - results = remove_exclude_fields(results, exclude_fields)[0] + results = remove_exclude_fields(results, exclude_fields)[0] # type: ignore[assignment] return response( links=links, From 703a9df90583c0077c49f5de0c7c049ba210b60c Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 30 Nov 2022 18:38:56 +0100 Subject: [PATCH 19/20] Added option to automatically set missing but required fields to the Attributes class. --- optimade/models/jsonapi.py | 13 +++++++++++++ .../server/entry_collections/entry_collections.py | 15 ++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/optimade/models/jsonapi.py b/optimade/models/jsonapi.py index 23e0db241..3c2cb5307 100644 --- a/optimade/models/jsonapi.py +++ b/optimade/models/jsonapi.py @@ -298,6 +298,19 @@ def check_illegal_attributes_fields(cls, values): ) return values + @root_validator(pre=True) + def set_missing_to_none(cls, values): + if "set_missing_to_none" in values and values.pop("set_missing_to_none"): + for field in cls.schema()["required"]: + if field not in values: + if ( + field == "structure_features" + ): # It would be nice if there would be a more universal way to handle special cases like this. + values[field] = [] + else: + values[field] = None + return values + class Resource(BaseResource): """Resource objects appear in a JSON API document to represent resources.""" diff --git a/optimade/server/entry_collections/entry_collections.py b/optimade/server/entry_collections/entry_collections.py index 2a39a94be..d50cf71bd 100644 --- a/optimade/server/entry_collections/entry_collections.py +++ b/optimade/server/entry_collections/entry_collections.py @@ -142,6 +142,7 @@ def find( criteria = self.handle_query_params(params) single_entry = isinstance(params, SingleEntryQueryParams) response_fields = criteria.pop("fields") + response_fields_set = criteria.pop("response_fields_set", False) raw_results, data_returned, more_data_available = self._run_db_query( criteria, single_entry @@ -150,7 +151,8 @@ def find( exclude_fields = self.all_fields - response_fields results: List = [self.resource_mapper.map_back(doc) for doc in raw_results] - self.check_and_add_missing_fields(results, response_fields) + + self.check_and_add_missing_fields(results, response_fields, response_fields_set) if results: results = self.resource_mapper.deserialize(results) @@ -165,19 +167,25 @@ def find( return results, data_returned, more_data_available, exclude_fields - def check_and_add_missing_fields(self, results: List[dict], response_fields: set): + def check_and_add_missing_fields( + self, results: List[dict], response_fields: set, response_fields_set: bool + ): """Checks whether the response_fields and mandatory fields are present. If they are not present the values are set to None, so the deserialization works correctly. It also checks whether all fields in the response have been defined either in the model or in the config file. If not it raises an appropriate error or warning.""" include_fields = ( response_fields - self.resource_mapper.TOP_LEVEL_NON_ATTRIBUTES_FIELDS - ) | set(self.get_non_optional_fields()) + ) # Include missing fields for result in results: for field in include_fields: set_field_to_none_if_missing_in_dict(result["attributes"], field) + if response_fields_set: + for result in results: + result["attributes"]["set_missing_to_none"] = True + bad_optimade_fields = set() bad_provider_fields = set() supported_prefixes = self.resource_mapper.SUPPORTED_PREFIXES @@ -354,6 +362,7 @@ def handle_query_params( # response_fields if getattr(params, "response_fields", False): + cursor_kwargs["response_fields_set"] = True response_fields = set(params.response_fields.split(",")) response_fields |= self.resource_mapper.get_required_fields() else: From f17485073de15dfab75c172386cf565e7e26021c Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 30 Nov 2022 18:49:50 +0100 Subject: [PATCH 20/20] Removed get_non_optional_fields and get_schema as they are no longer needed. --- .../entry_collections/entry_collections.py | 26 +------------------ 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/optimade/server/entry_collections/entry_collections.py b/optimade/server/entry_collections/entry_collections.py index d50cf71bd..5790c6365 100644 --- a/optimade/server/entry_collections/entry_collections.py +++ b/optimade/server/entry_collections/entry_collections.py @@ -253,26 +253,6 @@ def all_fields(self) -> Set[str]: return self._all_fields - @lru_cache(maxsize=4) - def get_non_optional_fields(self) -> List[str]: - """ - Returns those fields that should be set before a response class can be initialized. - - Returns: - Property names. - """ - - schema = self.get_schema() - attributes = schema["properties"]["attributes"] - if "$ref" in attributes: - path = attributes["$ref"].split("/")[1:] - attributes = schema.copy() - while path: - next_key = path.pop(0) - attributes = attributes[next_key] - return attributes["required"] - return [] - @lru_cache(maxsize=4) def get_attribute_fields(self) -> Set[str]: """Get the set of attribute fields @@ -290,7 +270,7 @@ def get_attribute_fields(self) -> Set[str]: """ - schema = self.get_schema() + schema = self.resource_cls.schema() attributes = schema["properties"]["attributes"] if "allOf" in attributes: allOf = attributes.pop("allOf") @@ -304,10 +284,6 @@ def get_attribute_fields(self) -> Set[str]: attributes = attributes[next_key] return set(attributes["properties"].keys()) - @lru_cache(maxsize=4) - def get_schema(self): - return self.resource_cls.schema() - def handle_query_params( self, params: Union[EntryListingQueryParams, SingleEntryQueryParams] ) -> Dict[str, Any]: