From 5e998165483449a9c7cf831552cc115869b15fd4 Mon Sep 17 00:00:00 2001 From: John Readey Date: Wed, 25 Dec 2024 12:24:48 +0800 Subject: [PATCH 1/8] get_config compat for h5py --- h5pyd/__init__.py | 3 +- h5pyd/_hl/config.py | 103 ---------------------------- h5pyd/_hl/files.py | 18 +++-- h5pyd/_hl/folders.py | 4 +- h5pyd/_hl/group.py | 64 +++++++++++++----- h5pyd/_hl/httpconn.py | 8 +-- h5pyd/_hl/objectid.py | 16 +++++ h5pyd/_hl/openid.py | 9 +-- h5pyd/_hl/serverinfo.py | 4 +- h5pyd/config.py | 145 +++++++++++++++++++++++++++++++++++----- test/hl/test_config.py | 57 ++++++++++++++++ test/hl/test_file.py | 28 +++++--- test/hl/test_group.py | 128 +++++++++++++++++++---------------- testall.py | 1 + 14 files changed, 368 insertions(+), 220 deletions(-) delete mode 100755 h5pyd/_hl/config.py create mode 100644 test/hl/test_config.py diff --git a/h5pyd/__init__.py b/h5pyd/__init__.py index 732a15a..28da064 100644 --- a/h5pyd/__init__.py +++ b/h5pyd/__init__.py @@ -29,7 +29,8 @@ from . import h5ds -from .config import Config +from .config import get_config + __version__ = version.version diff --git a/h5pyd/_hl/config.py b/h5pyd/_hl/config.py deleted file mode 100755 index 40ac2e8..0000000 --- a/h5pyd/_hl/config.py +++ /dev/null @@ -1,103 +0,0 @@ -############################################################################## -# Copyright by The HDF Group. # -# All rights reserved. # -# # -# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and # -# Utilities. The full HSDS copyright notice, including # -# terms governing use, modification, and redistribution, is contained in # -# the file COPYING, which can be found at the root of the source code # -# distribution tree. If you do not have access to this file, you may # -# request a copy from help@hdfgroup.org. # -############################################################################## -import os -import json -import sys - - -def eprint(*args, **kwargs): - print(*args, file=sys.stderr, **kwargs) - - -class Config: - """ - User Config state - """ - def __init__(self, config_file=None, **kwargs): - self._cfg = {} - if config_file: - self._config_file = config_file - elif os.path.isfile(".hscfg"): - self._config_file = ".hscfg" - else: - self._config_file = os.path.expanduser("~/.hscfg") - # process config file if found - if os.path.isfile(self._config_file): - line_number = 0 - with open(self._config_file) as f: - for line in f: - line_number += 1 - s = line.strip() - if not s: - continue - if s[0] == '#': - # comment line - continue - index = line.find('=') - if index <= 0: - eprint(f"config file: {self._config_file} line: {line_number} is not valid") - continue - k = line[:index].strip() - v = line[(index + 1):].strip() - if v and v.upper() != "NONE": - self._cfg[k] = v - # override any config values with environment variable if found - for k in self._cfg.keys(): - if k.upper() in os.environ: - self._cfg[k] = os.environ[k.upper()] - - # finally update any values that are passed in to the constructor - for k in kwargs.keys(): - self._cfg[k] = kwargs[k] - - def __getitem__(self, name): - """ Get a config item """ - - # Load a variable from environment. It would have only been loaded in - # __init__ if it was also specified in the config file. - env_name = name.upper() - if name not in self._cfg and env_name in os.environ: - self._cfg[name] = os.environ[env_name] - - return self._cfg[name] - - def __setitem__(self, name, obj): - """ set config item """ - self._cfg[name] = obj - - def __delitem__(self, name): - """ Delete option. """ - del self._cfg[name] - - def __len__(self): - return len(self._cfg) - - def __iter__(self): - """ Iterate over config names """ - keys = self._cfg.keys() - for key in keys: - yield key - - def __contains__(self, name): - return name in self._cfg or name.upper() in os.environ - - def __repr__(self): - return json.dumps(self._cfg) - - def keys(self): - return self._cfg.keys() - - def get(self, name, default=None): - if name in self: - return self[name] - else: - return default diff --git a/h5pyd/_hl/files.py b/h5pyd/_hl/files.py index c623e86..5937d04 100644 --- a/h5pyd/_hl/files.py +++ b/h5pyd/_hl/files.py @@ -21,7 +21,7 @@ from .objectid import GroupID from .group import Group from .httpconn import HttpConn -from .config import Config +from .. import config VERBOSE_REFRESH_TIME = 1.0 # 1 second @@ -276,7 +276,7 @@ def __init__( logger=None, owner=None, linked_domain=None, - track_order=False, + track_order=None, retries=10, timeout=180, **kwds, @@ -320,7 +320,8 @@ def __init__( Create new domain using the root of the linked domain track_order Whether to track dataset/group/attribute creation order within this file. Objects will be iterated - in ascending creation order if this is enabled, otherwise in ascending alphanumeric order. + in ascending creation order if this is True, if False in ascending alphanumeric order. + If None use global default get_config().track_order. retries Number of retry attempts to be used if a server request fails timeout @@ -341,7 +342,7 @@ def __init__( if mode is None: mode = "r" - cfg = Config() # pulls in state from a .hscfg file (if found). + cfg = config.get_config() # pulls in state from a .hscfg file (if found). # accept domain values in the form: # http://server:port/home/user/myfile.h5 @@ -406,6 +407,9 @@ def __init__( if swmr: use_cache = False # disable metadata caching in swmr mode + if track_order is None: + track_order = cfg.track_order + http_conn = HttpConn( domain, endpoint=endpoint, @@ -433,8 +437,6 @@ def __init__( if bucket: params["bucket"] = bucket - params["CreateOrder"] = "1" if track_order else "0" - # need some special logic for the first request in local mode # to give the sockets time to initialize @@ -487,6 +489,10 @@ def __init__( body["owner"] = owner if linked_domain: body["linked_domain"] = linked_domain + if track_order: + create_props = {"CreateOrder": 1} + group_body = {"creationProperties": create_props} + body["group"] = group_body rsp = http_conn.PUT(req, params=params, body=body) if rsp.status_code != 201: http_conn.close() diff --git a/h5pyd/_hl/folders.py b/h5pyd/_hl/folders.py index ab37fcf..bfdfe67 100644 --- a/h5pyd/_hl/folders.py +++ b/h5pyd/_hl/folders.py @@ -17,7 +17,7 @@ import time import logging from .httpconn import HttpConn -from .config import Config +from .. import config class Folder: @@ -143,7 +143,7 @@ def __init__( if mode is None: mode = "r" - cfg = Config() # pulls in state from a .hscfg file (if found). + cfg = config.get_config() # pulls in state from a .hscfg file (if found). if endpoint is None and "hs_endpoint" in cfg: endpoint = cfg["hs_endpoint"] diff --git a/h5pyd/_hl/group.py b/h5pyd/_hl/group.py index 2fb9dc0..0983fcc 100644 --- a/h5pyd/_hl/group.py +++ b/h5pyd/_hl/group.py @@ -25,6 +25,7 @@ from .table import Table from .datatype import Datatype from . import h5type +from .. import config def isUUID(name): @@ -49,7 +50,7 @@ class Group(HLObject, MutableMappingHDF5): """ Represents an HDF5 group. """ - def __init__(self, bind, track_order=False, **kwargs): + def __init__(self, bind, track_order=None, **kwargs): # print "group init, bind:", bind """ Create a new Group object by binding to a low-level GroupID. @@ -58,7 +59,20 @@ def __init__(self, bind, track_order=False, **kwargs): if not isinstance(bind, GroupID): raise ValueError(f"{bind} is not a GroupID") HLObject.__init__(self, bind, **kwargs) - self._track_order = track_order + + if track_order is None: + # set order based on group creation props + gcpl = self.id.gcpl_json + if "CreateOrder" in gcpl: + createOrder = gcpl["CreateOrder"] + if not createOrder or createOrder == "0": + self._track_order = False + else: + self._track_order = True + else: + self._track_order = False + else: + self._track_order = track_order self._req_prefix = "/groups/" + self.id.uuid self._link_db = {} # cache for links @@ -182,9 +196,10 @@ def _get_objdb_links(self): group_json = objdb[self.id.id] return group_json["links"] - def _make_group(self, parent_id=None, parent_name=None, link=None): + def _make_group(self, parent_id=None, parent_name=None, link=None, track_order=None): """ helper function to make a group """ + cfg = config.get_config() link_json = {} if parent_id: link_json["id"] = parent_id @@ -195,6 +210,9 @@ def _make_group(self, parent_id=None, parent_name=None, link=None): body = {} if link_json: body["link"] = link_json + if track_order or cfg.track_order: + body["creationProperties"] = {"CreateOrder": 1} + self.log.debug(f"create group with body: {body}") rsp = self.POST('/groups', body=body) @@ -211,7 +229,7 @@ def _make_group(self, parent_id=None, parent_name=None, link=None): return sub_group - def create_group(self, h5path, track_order=False): + def create_group(self, h5path, track_order=None): """ Create and return a new subgroup. Name may be absolute or relative. Fails if the target name already @@ -223,8 +241,7 @@ def create_group(self, h5path, track_order=False): if h5path is None: # anonymous group - sub_group = self._make_group() - sub_group._track_order = track_order + sub_group = self._make_group(track_order=track_order) return sub_group if h5path[-1] == '/': @@ -514,7 +531,7 @@ def require_group(self, name): raise TypeError(f"Incompatible object ({grp.__class__.__name__}) already exists") return grp - def getObjByUuid(self, uuid, collection_type=None, track_order=False): + def getObjByUuid(self, uuid, collection_type=None, track_order=None): """ Utility method to get an obj based on collection type and uuid """ self.log.debug(f"getObjByUuid({uuid})") obj_json = None @@ -549,7 +566,10 @@ def getObjByUuid(self, uuid, collection_type=None, track_order=False): # will need to get JSON from server req = f"/{collection_type}/{uuid}" # make server request - obj_json = self.GET(req, params={"CreateOrder": "1" if track_order else "0"}) + params = {} + if track_order is not None: + params["CreateOrder"] = "1" if track_order else "0" + obj_json = self.GET(req, params=params) if collection_type == 'groups': tgt = Group(GroupID(self, obj_json), track_order=track_order) @@ -568,7 +588,7 @@ def getObjByUuid(self, uuid, collection_type=None, track_order=False): return tgt - def __getitem__(self, name, track_order=False): + def __getitem__(self, name, track_order=None): """ Open an object in the file """ # convert bytes to str for PY3 if isinstance(name, bytes): @@ -655,7 +675,7 @@ def _objectify_link_Json(self, link_json): return link_obj - def get(self, name, default=None, getclass=False, getlink=False, track_order=False, **kwds): + def get(self, name, default=None, getclass=False, getlink=False, track_order=None, **kwds): """ Retrieve an item or other information. "name" given only: @@ -739,8 +759,8 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Fal params["pattern"] = pattern if follow_links: params["follow_links"] = 1 - if track_order: - params["CreateOrder"] = 1 + if track_order is not None: + params["CreateOrder"] = "1" if track_order else "0" if name: body = {} @@ -848,7 +868,10 @@ def __setitem__(self, name, obj): raise IOError("cannot create subgroup of softlink") parent_uuid = link_json["id"] req = "/groups/" + parent_uuid - group_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"}) + params = {} + if self._track_order is not None: + params["CreateOrder"] = "1" if self._track_order else "0" + group_json = self.GET(req, params=params) tgt = Group(GroupID(self, group_json)) tgt[basename] = obj @@ -946,7 +969,10 @@ def __len__(self): return len(links_json) req = "/groups/" + self.id.uuid - rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"}) + params = {} + if self._track_order is not None: + params["CreateOrder"] = "1" if self._track_order else "0" + rsp_json = self.GET(req, params=params) return rsp_json['linkCount'] def __iter__(self): @@ -955,7 +981,10 @@ def __iter__(self): if links is None: req = "/groups/" + self.id.uuid + "/links" - rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"}) + params = {} + if self._track_order is not None: + params["CreateOrder"] = "1" if self._track_order else "0" + rsp_json = self.GET(req, params=params) links = rsp_json['links'] # reset the link cache @@ -1180,7 +1209,10 @@ def visititems(self, func): else: # request from server req = "/groups/" + parent.id.uuid + "/links" - rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"}) + params = {} + if self._track_order is not None: + params["CreateOrder"] = "1" if self._track_order else "0" + rsp_json = self.GET(req, params=params) links = rsp_json['links'] for link in links: obj = None diff --git a/h5pyd/_hl/httpconn.py b/h5pyd/_hl/httpconn.py index 84e21e0..8d55d6d 100644 --- a/h5pyd/_hl/httpconn.py +++ b/h5pyd/_hl/httpconn.py @@ -25,7 +25,7 @@ import logging from . import openid -from .config import Config +from .. import config from . import requests_lambda MAX_CACHE_ITEM_SIZE = 10000 # max size of an item to put in the cache @@ -73,7 +73,7 @@ def getAzureApiKey(): api_key = None # if Azure AD ids are set, pass them to HttpConn via api_key dict - cfg = Config() # pulls in state from a .hscfg file (if found). + cfg = config.get_config() # pulls in state from a .hscfg file (if found). ad_app_id = None # Azure AD HSDS Server id if "HS_AD_APP_ID" in os.environ: @@ -114,7 +114,7 @@ def getAzureApiKey(): def getKeycloakApiKey(): # check for keycloak next - cfg = Config() # pulls in state from a .hscfg file (if found). + cfg = config.get_config() # pulls in state from a .hscfg file (if found). api_key = None # check to see if we are configured for keycloak authentication if "HS_KEYCLOAK_URI" in os.environ: @@ -293,7 +293,6 @@ def __init__( if isinstance(api_key, dict): # Maintain Azure-defualt backwards compatibility, but allow # both environment variable and kwarg override. - # provider = Config().get('hs_openid_provider', 'azure') provider = api_key.get("openid_provider", "azure") if provider == "azure": self.log.debug("creating OpenIDHandler for Azure") @@ -327,6 +326,7 @@ def __del__(self): self._s = None def getHeaders(self, username=None, password=None, headers=None): + if headers is None: headers = {} elif "Authorization" in headers: diff --git a/h5pyd/_hl/objectid.py b/h5pyd/_hl/objectid.py index b16b6ca..173f6d5 100644 --- a/h5pyd/_hl/objectid.py +++ b/h5pyd/_hl/objectid.py @@ -168,6 +168,14 @@ def get_type(self): dtype = createDataType(type_json) return dtype + @property + def tcpl_json(self): + if 'creationProperties' in self._obj_json: + tcpl = self._obj_json['creationProperties'] + else: + tcpl = {} + return tcpl + def __init__(self, parent, item, **kwds): """Create a new TypeID. """ @@ -255,3 +263,11 @@ def __init__(self, parent, item, http_conn=None, **kwds): if self.collection_type != "groups": raise IOError(f"Unexpected collection_type: {self._collection_type}") + + @property + def gcpl_json(self): + if 'creationProperties' in self._obj_json: + gcpl = self._obj_json['creationProperties'] + else: + gcpl = {} + return gcpl diff --git a/h5pyd/_hl/openid.py b/h5pyd/_hl/openid.py index aae0a08..e0eb0f0 100644 --- a/h5pyd/_hl/openid.py +++ b/h5pyd/_hl/openid.py @@ -28,7 +28,8 @@ def eprint(*args, **kwargs): pass # change this to the eprint below to see the import error # eprint("Unable to import google auth packages") -from .config import Config + +from .. import config as hsconfig class OpenIDHandler(ABC): @@ -136,7 +137,7 @@ def __init__(self, endpoint, config=None): """Store configuration.""" # Configuration manager - hs_config = Config() + hs_config = hsconfig.get_config() # Config is a dictionary. if isinstance(config, dict): @@ -256,7 +257,7 @@ def __init__(self, endpoint, config=None, scopes=None): raise ModuleNotFoundError(msg) # Configuration manager - hs_config = Config() + hs_config = hsconfig.get_config() if scopes is None: scopes = hs_config.get('hs_google_scopes', 'openid').split() @@ -345,7 +346,7 @@ def __init__(self, endpoint, config=None, scopes=None, username=None, password=N """Store configuration.""" # Configuration manager - hs_config = Config() + hs_config = hsconfig.get_config() if scopes is None: scopes = hs_config.get('hs_keycloak_scopes', 'openid').split() diff --git a/h5pyd/_hl/serverinfo.py b/h5pyd/_hl/serverinfo.py index ff9f602..66c3a04 100644 --- a/h5pyd/_hl/serverinfo.py +++ b/h5pyd/_hl/serverinfo.py @@ -14,12 +14,12 @@ import time from .httpconn import HttpConn -from .config import Config +from .. import config def getServerInfo(endpoint=None, username=None, password=None, api_key=None, **kwds): - cfg = Config() # get credentials from .hscfg file (if found) + cfg = config.get_config() # get credentials from .hscfg file (if found) if endpoint is None and "hs_endpoint" in cfg: endpoint = cfg["hs_endpoint"] diff --git a/h5pyd/config.py b/h5pyd/config.py index 504dfcb..f153171 100755 --- a/h5pyd/config.py +++ b/h5pyd/config.py @@ -17,8 +17,11 @@ class Config: """ User Config state """ + _cfg = {} # global state + def __init__(self, config_file=None, **kwargs): - self._cfg = {} + if Config._cfg: + return # already initialized if config_file: self._config_file = config_file elif os.path.isfile(".hscfg"): @@ -39,51 +42,159 @@ def __init__(self, config_file=None, **kwargs): continue fields = s.split('=') if len(fields) < 2: - print("config file: {} line: {} is not valid".format(self._config_file, line_number)) + print(f"config file: {self._config_file} line: {line_number} is not valid") continue k = fields[0].strip() v = fields[1].strip() - self._cfg[k] = v + if k == "complex_names": + self.complex_names = v + elif k == "bool_names": + self.bool_names = v + elif k == "track_order": + self.track_order = v + else: + Config._cfg[k] = v + # override any config values with environment variable if found - for k in self._cfg.keys(): + for k in Config._cfg.keys(): if k.upper() in os.environ: - self._cfg[k] = os.environ[k.upper()] + Config._cfg[k] = os.environ[k.upper()] - # finally update any values that are passed in to the constructor + # update any values that are passed in to the constructor for k in kwargs.keys(): - self._cfg[k] = kwargs[k] + Config._cfg[k] = kwargs[k] + + # finally, set defaults for any expected keys that are not already set + for k in ("hs_endpoint", "hs_username", "hs_endpoint"): + if k not in Config._cfg: + Config._cfg[k] = None + if "bool_names" not in Config._cfg: + Config._cfg["bool_names"] = (b"FALSE", b"TRUE") + if "complex_names" not in Config._cfg: + Config._cfg["complex_names"] = ("r", "i") + if "track_order" not in Config._cfg: + Config._cfg["track_order"] = False def __getitem__(self, name): """ Get a config item """ - if name not in self._cfg: + if name not in Config._cfg: if name.upper() in os.environ: - self._cfg[name] = os.environ[name.upper()] + Config._cfg[name] = os.environ[name.upper()] else: return None - return self._cfg[name] + return Config._cfg[name] def __setitem__(self, name, obj): """ set config item """ - self._cfg[name] = obj + Config._cfg[name] = obj def __delitem__(self, name): """ Delete option. """ - del self._cfg[name] + del Config._cfg[name] def __len__(self): - return len(self._cfg) + return len(Config._cfg) def __iter__(self): """ Iterate over config names """ - keys = self._cfg.keys() + keys = Config._cfg.keys() for key in keys: yield key def __contains__(self, name): - return name in self._cfg + return name in Config._cfg def __repr__(self): - return json.dumps(self._cfg) + return json.dumps(Config._cfg) def keys(self): - return self._cfg.keys() + return Config._cfg.keys() + + @property + def hs_endpoint(self): + return Config._cfg.get("hs_endpoint") + + @property + def hs_username(self): + return Config._cfg.get("hs_username") + + @property + def hs_password(self): + return Config._cfg.get("hs_password") + + @property + def hs_api_key(self): + return Config._cfg.get("hs_api_key") + + @property + def bool_names(self): + if "bool_names" in Config._cfg: + names = Config._cfg["bool_names"] + else: + names = (b"FALSE", b"TRUE") + return names + + @bool_names.setter + def bool_names(self, value): + if isinstance(value, str): + names = value.split(()) + if len(names) < 2: + raise ValueError("bool_names must have two items") + elif len(names) == 2: + pass + else: + names = names[:2] # just use the first two items + elif len(value) != 2: + raise ValueError("expected two-element list for bool_names") + else: + names = value + Config._cfg["bool_names"] = tuple(names) + + @property + def complex_names(self): + if "complex_names" in Config._cfg: + names = Config._cfg["complex_names"] + else: + names = ("r", "i") + return names + + @complex_names.setter + def complex_names(self, value): + if isinstance(value, str): + names = value.split() + if len(names) < 2: + raise ValueError("complex_names must have two items") + elif len(names) == 2: + pass + else: + names = names[:2] # just use the first two items + elif len(value) != 2: + raise ValueError("complex_names must have two values") + else: + names = value + + Config._cfg["complex_names"] = tuple(names) + + @property + def track_order(self): + if "track_order" in Config._cfg: + track = Config._cfg["track_order"] + else: + track = False + return track + + @track_order.setter + def track_order(self, value): + if isinstance(value, str): + tokens = value.split() + if len(tokens) == 0: + track = False + else: + track = bool(tokens[0]) # strip any comments + else: + track = bool(value) + Config._cfg["track_order"] = track + + +def get_config(config_file=None, **kwargs): + return Config(config_file=config_file, **kwargs) diff --git a/test/hl/test_config.py b/test/hl/test_config.py new file mode 100644 index 0000000..1112e70 --- /dev/null +++ b/test/hl/test_config.py @@ -0,0 +1,57 @@ +############################################################################## +# Copyright by The HDF Group. # +# All rights reserved. # +# # +# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and # +# Utilities. The full HDF5 REST Server copyright notice, including # +# terms governing use, modification, and redistribution, is contained in # +# the file COPYING, which can be found at the root of the source code # +# distribution tree. If you do not have access to this file, you may # +# request a copy from help@hdfgroup.org. # +############################################################################## + +import numpy as np +import logging +import config + + +if config.get("use_h5py"): + import h5py +else: + import h5pyd as h5py + +from common import ut, TestCase + + +class TestConfig(TestCase): + + def test_config_h5py(self): + cfg = h5py.get_config() + + self.assertEqual(cfg.bool_names, (b"FALSE", b"TRUE")) + self.assertEqual(cfg.complex_names, ("r", "i")) + self.assertEqual(cfg.track_order, False) + + cfg.bool_names = ("nope", "yep") + cfg.complex_names = ("real", "imag") + cfg.track_order = True + + cfg2 = h5py.get_config() + self.assertEqual(cfg2.bool_names, ("nope", "yep")) + self.assertEqual(cfg2.complex_names, ("real", "imag")) + self.assertEqual(cfg2.track_order, True) + + def test_config_hs(self): + if config.get("use_h5py"): + return # test with h5pyd only + cfg = h5py.get_config() + self.assertTrue(cfg.hs_endpoint.startswith("http")) + cfg["XYZ"] = 42 + cfg2 = h5py.get_config() + self.assertEqual(cfg2["XYZ"], 42) + + +if __name__ == '__main__': + loglevel = logging.ERROR + logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) + ut.main() diff --git a/test/hl/test_file.py b/test/hl/test_file.py index 34c6826..d8da2bd 100644 --- a/test/hl/test_file.py +++ b/test/hl/test_file.py @@ -351,28 +351,38 @@ def test_close(self): class TestTrackOrder(TestCase): def populate(self, f): - for i in range(100): + count = 3 + for i in range(count): # Mix group and dataset creation. if i % 10 == 0: f.create_group(str(i)) else: f[str(i)] = [i] + return count def test_track_order(self): filename = self.getFileName("test_track_order_file") print(f"filename: {filename}") - f = h5py.File(filename, 'w', track_order=True) # creation order - self.populate(f) - self.assertEqual(list(f), - [str(i) for i in range(100)]) + # write file using creation order + with h5py.File(filename, 'w', track_order=True) as f: + count = self.populate(f) + self.assertEqual(list(f), [str(i) for i in range(count)]) + + with h5py.File(filename) as f: + # domain/file should have been saved with track_order state + self.assertEqual(list(f), [str(i) for i in range(count)]) def test_no_track_order(self): filename = self.getFileName("test_no_track_order_file") print(f"filename: {filename}") - f = h5py.File(filename, 'w', track_order=False) # name alphanumeric - self.populate(f) - self.assertEqual(list(f), - sorted([str(i) for i in range(100)])) + + # create file using alphanumeric order + with h5py.File(filename, 'w', track_order=False) as f: + count = self.populate(f) + self.assertEqual(list(f), sorted([str(i) for i in range(count)])) + + with h5py.File(filename) as f: # name alphanumeric + self.assertEqual(list(f), sorted([str(i) for i in range(count)])) if __name__ == '__main__': diff --git a/test/hl/test_group.py b/test/hl/test_group.py index b4db405..4b86702 100644 --- a/test/hl/test_group.py +++ b/test/hl/test_group.py @@ -552,38 +552,58 @@ def test_link_get_multi(self): class TestTrackOrder(TestCase): def populate(self, g): - for i in range(100): + count = 10 + for i in range(count): # Mix group and dataset creation. if i % 10 == 0: g.create_group(str(i)) else: g[str(i)] = [i] + return count def populate_attrs(self, d): - for i in range(100): + count = 10 + for i in range(count): d.attrs[str(i)] = i + return count def test_track_order(self): filename = self.getFileName("test_track_order_group") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - g = self.f.create_group('order', track_order=True) # creation order - self.populate(g) - - ref = [str(i) for i in range(100)] - self.assertEqual(list(g), ref) - self.assertEqual(list(reversed(g)), list(reversed(ref))) + with h5py.File(filename, 'w') as f: + g = f.create_group('order', track_order=True) # creation order + count = self.populate(g) + + ref = [str(i) for i in range(count)] + self.assertEqual(list(g), ref) + self.assertEqual(list(reversed(g)), list(reversed(ref))) + + # re-opening the file should retain the track_order setting + with h5py.File(filename) as f: + g = f['order'] + count = len(g) + self.assertTrue(count > 0) + ref = [str(i) for i in range(count)] + self.assertEqual(list(g), ref) + self.assertEqual(list(reversed(g)), list(reversed(ref))) def test_no_track_order(self): filename = self.getFileName("test_no_track_order_group") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - g = self.f.create_group('order', track_order=False) # name alphanumeric - self.populate(g) - - ref = sorted([str(i) for i in range(100)]) - self.assertEqual(list(g), ref) - self.assertEqual(list(reversed(g)), list(reversed(ref))) + with h5py.File(filename, 'w') as f: + g = f.create_group('order', track_order=False) # name alphanumeric + count = self.populate(g) + ref = sorted([str(i) for i in range(count)]) + self.assertEqual(list(g), ref) + self.assertEqual(list(reversed(g)), list(reversed(ref))) + + with h5py.File(filename) as f: + g = f['order'] # name alphanumeric + count = len(g) + self.assertTrue(count > 0) + ref = sorted([str(i) for i in range(count)]) + self.assertEqual(list(g), ref) + self.assertEqual(list(reversed(g)), list(reversed(ref))) def test_get_dataset_track_order(self): @@ -593,28 +613,27 @@ def test_get_dataset_track_order(self): filename = self.getFileName("test_get_dataset_track_order") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - g = self.f.create_group('order') + with h5py.File(filename, 'w') as f: + g = f.create_group('order') - dset = g.create_dataset('dset', (10,), dtype='i4') - dset2 = g.create_dataset('dset2', (10,), dtype='i4') + dset = g.create_dataset('dset', (10,), dtype='i4') + dset2 = g.create_dataset('dset2', (10,), dtype='i4') - self.populate_attrs(dset) - self.populate_attrs(dset2) + count1 = self.populate_attrs(dset) + count2 = self.populate_attrs(dset2) - self.f.close() - self.f = h5py.File(filename, 'r') - g = self.f['order'] + with h5py.File(filename) as f: + g = f['order'] - d = g.get('dset', track_order=True) - ref = [str(i) for i in range(100)] - self.assertEqual(list(d.attrs), ref) - self.assertEqual(list(reversed(d.attrs)), list(reversed(ref))) + d = g.get('dset', track_order=True) + ref = [str(i) for i in range(count1)] + self.assertEqual(list(d.attrs), ref) + self.assertEqual(list(reversed(d.attrs)), list(reversed(ref))) - d2 = g.get('dset2', track_order=False) - ref = sorted([str(i) for i in range(100)]) - self.assertEqual(list(d2.attrs), ref) - self.assertEqual(list(reversed(d2.attrs)), list(reversed(ref))) + d2 = g.get('dset2', track_order=False) + ref = sorted([str(i) for i in range(count2)]) + self.assertEqual(list(d2.attrs), ref) + self.assertEqual(list(reversed(d2.attrs)), list(reversed(ref))) def test_get_group_track_order(self): # h5py does not support track_order on group.get() @@ -622,29 +641,26 @@ def test_get_group_track_order(self): return filename = self.getFileName("test_get_group_track_order") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - g = self.f.create_group('order') - - # create subgroup and populate it with links - g.create_group('subgroup') - self.populate(g['subgroup']) - - self.f.close() - self.f = h5py.File(filename, 'r') - g = self.f['order'] - - subg = g.get('subgroup', track_order=True) - ref = [str(i) for i in range(100)] - self.assertEqual(list(subg), ref) - self.assertEqual(list(reversed(subg)), list(reversed(ref))) - - self.f.close() - self.f = h5py.File(filename, 'r') - g = self.f['order'] - subg2 = g.get('subgroup', track_order=False) - ref = sorted([str(i) for i in range(100)]) - self.assertEqual(list(subg2), ref) - self.assertEqual(list(reversed(subg2)), list(reversed(ref))) + with h5py.File(filename, 'w') as f: + g = f.create_group('order') + # create subgroup and populate it with links + g.create_group('subgroup') + count = self.populate(g['subgroup']) + + with h5py.File(filename) as f: + g = f['order'] + subg = g.get('subgroup', track_order=True) + ref = [str(i) for i in range(count)] + self.assertEqual(list(subg), ref) + self.assertEqual(list(reversed(subg)), list(reversed(ref))) + + with h5py.File(filename) as f: + g = f['order'] + subg2 = g.get('subgroup', track_order=False) + count = len(subg2) + ref = sorted([str(i) for i in range(count)]) + self.assertEqual(list(subg2), ref) + self.assertEqual(list(reversed(subg2)), list(reversed(ref))) if __name__ == '__main__': diff --git a/testall.py b/testall.py index 10efbf1..860cafc 100755 --- a/testall.py +++ b/testall.py @@ -16,6 +16,7 @@ hl_tests = ('test_attribute', + 'test_config', 'test_committedtype', 'test_complex_numbers', 'test_dataset', From 9018f6dccbf6e05fa3779d715d6ed1e6b3cb23ef Mon Sep 17 00:00:00 2001 From: John Readey Date: Thu, 26 Dec 2024 18:50:21 +0800 Subject: [PATCH 2/8] support order with cfg.track_order --- h5pyd/_hl/base.py | 9 +++- h5pyd/_hl/files.py | 14 ++---- h5pyd/_hl/group.py | 15 ++++-- test/hl/test_attribute.py | 87 ++++++++++++++++++++-------------- test/hl/test_file.py | 48 ++++++++++++++----- test/hl/test_group.py | 99 +++++++++++++++++++++++---------------- 6 files changed, 170 insertions(+), 102 deletions(-) diff --git a/h5pyd/_hl/base.py b/h5pyd/_hl/base.py index f581480..dd1a78e 100644 --- a/h5pyd/_hl/base.py +++ b/h5pyd/_hl/base.py @@ -14,6 +14,7 @@ import posixpath import os +import sys import json import numpy as np import logging @@ -28,6 +29,10 @@ numpy_float_types = (np.float16, np.float32, np.float64) +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + class FakeLock(): def __init__(self): pass @@ -506,7 +511,7 @@ def readElement(buffer, offset, arr, index, dt): e = np.frombuffer(bytes(e_buffer), dtype=dt) arr[index] = e[0] except ValueError: - print(f"ERROR: ValueError setting {e_buffer} and dtype: {dt}") + eprint(f"ERROR: ValueError setting {e_buffer} and dtype: {dt}") raise else: # variable length element @@ -533,7 +538,7 @@ def readElement(buffer, offset, arr, index, dt): try: e = np.frombuffer(bytes(e_buffer), dtype=vlen) except ValueError: - print("ValueError -- e_buffer:", e_buffer, "dtype:", vlen) + eprint("ValueError -- e_buffer:", e_buffer, "dtype:", vlen) raise arr[index] = e diff --git a/h5pyd/_hl/files.py b/h5pyd/_hl/files.py index 5937d04..22ad0b4 100644 --- a/h5pyd/_hl/files.py +++ b/h5pyd/_hl/files.py @@ -327,7 +327,6 @@ def __init__( timeout Timeout value in seconds """ - groupid = None dn_ids = [] # if we're passed a GroupId as domain, just initialize the file object @@ -407,9 +406,6 @@ def __init__( if swmr: use_cache = False # disable metadata caching in swmr mode - if track_order is None: - track_order = cfg.track_order - http_conn = HttpConn( domain, endpoint=endpoint, @@ -489,7 +485,7 @@ def __init__( body["owner"] = owner if linked_domain: body["linked_domain"] = linked_domain - if track_order: + if track_order or cfg.track_order: create_props = {"CreateOrder": 1} group_body = {"creationProperties": create_props} body["group"] = group_body @@ -558,22 +554,20 @@ def __init__( groupid = GroupID(None, group_json, http_conn=http_conn) # end else + self._name = "/" self._id = groupid - self._verboseInfo = None # aditional state we'll get when requested + self._verboseInfo = None # additional state we'll get when requested self._verboseUpdated = None # when the verbose data was fetched self._lastScan = None # when summary stats where last updated by server self._dn_ids = dn_ids - self._track_order = track_order self._swmr_mode = swmr Group.__init__(self, self._id, track_order=track_order) def _getVerboseInfo(self): now = time.time() - if ( - self._verboseUpdated is None or now - self._verboseUpdated > VERBOSE_REFRESH_TIME - ): + if (self._verboseUpdated is None or now - self._verboseUpdated > VERBOSE_REFRESH_TIME): # resynch the verbose data req = "/?verbose=1" rsp_json = self.GET(req, use_cache=False, params={"CreateOrder": "1" if self._track_order else "0"}) diff --git a/h5pyd/_hl/group.py b/h5pyd/_hl/group.py index 0983fcc..21404db 100644 --- a/h5pyd/_hl/group.py +++ b/h5pyd/_hl/group.py @@ -200,6 +200,7 @@ def _make_group(self, parent_id=None, parent_name=None, link=None, track_order=N """ helper function to make a group """ cfg = config.get_config() + link_json = {} if parent_id: link_json["id"] = parent_id @@ -219,6 +220,8 @@ def _make_group(self, parent_id=None, parent_name=None, link=None, track_order=N group_json = rsp groupId = GroupID(self, group_json) sub_group = Group(groupId) + if track_order or cfg.track_order: + sub_group._track_order = True if parent_name: if parent_name[-1] == '/': parent_name = parent_name + link @@ -272,8 +275,12 @@ def create_group(self, h5path, track_order=None): create_group = True if create_group: - sub_group = self._make_group(parent_id=parent_uuid, parent_name=parent_name, link=link) - sub_group._track_order = track_order + kwargs = {} + kwargs["parent_id"] = parent_uuid + kwargs["parent_name"] = parent_name + kwargs["link"] = link + kwargs["track_order"] = track_order + sub_group = self._make_group(**kwargs) parent_uuid = sub_group.id.id else: @@ -593,7 +600,7 @@ def __getitem__(self, name, track_order=None): # convert bytes to str for PY3 if isinstance(name, bytes): name = name.decode('utf-8') - self.log.debug(f"group.__getitem__({name})") + self.log.debug(f"group.__getitem__({name}, track_order={track_order})") tgt = None if isinstance(name, h5type.Reference): @@ -716,7 +723,7 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Non """ if not (getclass or getlink): try: - return self.__getitem__(name, track_order) + return self.__getitem__(name, track_order=track_order) except KeyError: return default diff --git a/test/hl/test_attribute.py b/test/hl/test_attribute.py index 21d2290..408ed99 100644 --- a/test/hl/test_attribute.py +++ b/test/hl/test_attribute.py @@ -292,49 +292,68 @@ def test_delete_multiple(self): class TestTrackOrder(TestCase): - def fill_attrs(self, track_order): - attrs = self.f.create_group('test', track_order=track_order).attrs - for i in range(100): - attrs[str(i)] = i - return attrs + titles = ("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten") + + def fill_attrs(self, obj): + count = len(self.titles) + attrs = obj.attrs + for i in range(count): + title = self.titles[i] + val = i + 1 + attrs[title] = val - # https://forum.hdfgroup.org/t/bug-h5arename-fails-unexpectedly/4881 def test_track_order(self): filename = self.getFileName("test_test_track_order_attribute") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - attrs = self.fill_attrs(track_order=True) # creation order - self.assertEqual(list(attrs), - [str(i) for i in range(100)]) + with h5py.File(filename, 'w') as f: + g1 = f.create_group('test', track_order=True) + self.fill_attrs(g1) + self.assertEqual(list(g1.attrs), list(self.titles)) + # group should return track order + with h5py.File(filename) as f: + g1 = f['test'] + self.assertEqual(list(g1.attrs), list(self.titles)) + + def test_track_order_cfg(self): + filename = self.getFileName("test_test_track_order_attribute") + print(f"filename: {filename}") + cfg = h5py.get_config() + with h5py.File(filename, 'w') as f: + cfg.track_order = True + g1 = f.create_group('test') + cfg.track_order = False # reset + + self.fill_attrs(g1) + self.assertEqual(list(g1.attrs), list(self.titles)) + + with h5py.File(filename) as f: + g1 = f['test'] + self.assertEqual(list(g1.attrs), list(self.titles)) def test_no_track_order(self): filename = self.getFileName("test_test_no_track_order_attribute") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - attrs = self.fill_attrs(track_order=False) # name alphanumeric - self.assertEqual(list(attrs), - sorted([str(i) for i in range(100)])) - - def fill_attrs2(self, track_order): - group = self.f.create_group('test', track_order=track_order) - for i in range(12): - group.attrs[str(i)] = i - return group + f = h5py.File(filename, 'w') + g1 = f.create_group('test') # name alphanumeric + self.fill_attrs(g1) + self.assertEqual(list(g1.attrs), sorted(list(self.titles))) def test_track_order_overwrite_delete(self): filename = self.getFileName("test_test_track_order_overwrite_delete") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - # issue h5py#1385 - group = self.fill_attrs2(track_order=True) # creation order - self.assertEqual(group.attrs["11"], 11) + f = h5py.File(filename, 'w') + + g1 = f.create_group("g1", track_order=True) # creation order + self.fill_attrs(g1) + title = 'three' + self.assertEqual(g1.attrs[title], 3) # overwrite attribute - group.attrs['11'] = 42.0 - self.assertEqual(group.attrs["11"], 42.0) + g1.attrs[title] = 42.0 + self.assertEqual(g1.attrs[title], 42.0) # delete attribute - self.assertIn('10', group.attrs) - del group.attrs['10'] - self.assertNotIn('10', group.attrs) + self.assertIn(title, g1.attrs) + del g1.attrs[title] + self.assertNotIn(title, g1.attrs) def test_track_order_not_inherited(self): """ @@ -343,13 +362,11 @@ def test_track_order_not_inherited(self): """ filename = self.getFileName("test_test_track_order_not_inherited") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w', track_order=True) - group = self.f.create_group('test') - - for i in range(12): - group.attrs[str(i)] = i + f = h5py.File(filename, 'w', track_order=True) + g1 = f.create_group('test') + self.fill_attrs(g1) - self.assertEqual(list(group.attrs), sorted([str(i) for i in range(12)])) + self.assertEqual(list(g1.attrs), sorted(list(self.titles))) if __name__ == '__main__': diff --git a/test/hl/test_file.py b/test/hl/test_file.py index d8da2bd..a8f6760 100644 --- a/test/hl/test_file.py +++ b/test/hl/test_file.py @@ -350,27 +350,53 @@ def test_close(self): class TestTrackOrder(TestCase): + titles = ("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten") + def populate(self, f): - count = 3 + count = len(self.titles) + # create count datasets/groups for i in range(count): + title = self.titles[i] # Mix group and dataset creation. - if i % 10 == 0: - f.create_group(str(i)) + if i % 2 == 0: + f.create_group(title) else: - f[str(i)] = [i] - return count + f[title] = [i] + # create count attributes + for i in range(count): + title = self.titles[i] + f.attrs[title] = i def test_track_order(self): filename = self.getFileName("test_track_order_file") print(f"filename: {filename}") # write file using creation order with h5py.File(filename, 'w', track_order=True) as f: - count = self.populate(f) - self.assertEqual(list(f), [str(i) for i in range(count)]) + self.populate(f) + self.assertEqual(list(f), list(self.titles)) + self.assertEqual(list(f.attrs), list(self.titles)) + + with h5py.File(filename) as f: + # domain/file should have been saved with track_order state + self.assertEqual(list(f), list(self.titles)) + self.assertEqual(list(f.attrs), list(self.titles)) + + def test_cfg_track_order(self): + filename = self.getFileName("test_cfg_track_order_file") + print(f"filename: {filename}") + # write file using creation order + cfg = h5py.get_config() + cfg.track_order = True + with h5py.File(filename, 'w') as f: + self.populate(f) + self.assertEqual(list(f), list(self.titles)) + self.assertEqual(list(f.attrs), list(self.titles)) + cfg.track_order = False # reset with h5py.File(filename) as f: # domain/file should have been saved with track_order state - self.assertEqual(list(f), [str(i) for i in range(count)]) + self.assertEqual(list(f), list(self.titles)) + self.assertEqual(list(f.attrs), list(self.titles)) def test_no_track_order(self): filename = self.getFileName("test_no_track_order_file") @@ -378,11 +404,11 @@ def test_no_track_order(self): # create file using alphanumeric order with h5py.File(filename, 'w', track_order=False) as f: - count = self.populate(f) - self.assertEqual(list(f), sorted([str(i) for i in range(count)])) + self.populate(f) + self.assertEqual(list(f), sorted(self.titles)) with h5py.File(filename) as f: # name alphanumeric - self.assertEqual(list(f), sorted([str(i) for i in range(count)])) + self.assertEqual(list(f), sorted(self.titles)) if __name__ == '__main__': diff --git a/test/hl/test_group.py b/test/hl/test_group.py index 4b86702..89d391d 100644 --- a/test/hl/test_group.py +++ b/test/hl/test_group.py @@ -313,7 +313,7 @@ def test_link_multi_removal(self): if config.get("use_h5py"): return filename = self.getFileName("test_link_multi_removal") - print(filename) + print(f"filename: {filename}") f = h5py.File(filename, 'w') g1 = f.create_group("g1") @@ -358,7 +358,7 @@ def test_link_multi_create(self): return filename = self.getFileName("test_link_multi_create") - print(filename) + print(f"filename: {filename}") f = h5py.File(filename, 'w') g1 = f.create_group("g1") @@ -438,7 +438,7 @@ def test_link_get_multi(self): return filename = self.getFileName("test_link_get_multi") - print(filename) + print(f"filename: {filename}") f = h5py.File(filename, 'w') g1 = f.create_group("g1") @@ -550,58 +550,85 @@ def test_link_get_multi(self): class TestTrackOrder(TestCase): + titles = ("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten") def populate(self, g): - count = 10 + count = len(self.titles) for i in range(count): # Mix group and dataset creation. - if i % 10 == 0: - g.create_group(str(i)) + if i % 2 == 0: + g.create_group(self.titles[i]) else: - g[str(i)] = [i] - return count + g[self.titles[i]] = [i] def populate_attrs(self, d): - count = 10 + count = len(self.titles) for i in range(count): - d.attrs[str(i)] = i - return count + d.attrs[self.titles[i]] = i def test_track_order(self): filename = self.getFileName("test_track_order_group") print(f"filename: {filename}") with h5py.File(filename, 'w') as f: g = f.create_group('order', track_order=True) # creation order - count = self.populate(g) + self.populate(g) - ref = [str(i) for i in range(count)] - self.assertEqual(list(g), ref) - self.assertEqual(list(reversed(g)), list(reversed(ref))) + ref = self.titles + self.assertEqual(tuple(g), ref) + i = 0 + for title in g: + self.assertEqual(title, self.titles[i]) + i += 1 # re-opening the file should retain the track_order setting with h5py.File(filename) as f: g = f['order'] - count = len(g) - self.assertTrue(count > 0) - ref = [str(i) for i in range(count)] - self.assertEqual(list(g), ref) - self.assertEqual(list(reversed(g)), list(reversed(ref))) + self.assertEqual(len(g), len(self.titles)) + self.assertEqual(tuple(g), self.titles) + self.assertEqual(tuple(reversed(g)), tuple(reversed(self.titles))) + i = 0 + for title in g: + self.assertEqual(title, self.titles[i]) + i += 1 + + def test_track_order_cfg(self): + filename = self.getFileName("test_track_order_cfg_group") + print(f"filename: {filename}") + cfg = h5py.get_config() + with h5py.File(filename, 'w') as f: + cfg.track_order = True # creation order + g = f.create_group('order') + cfg.track_order = False # reset + self.populate(g) + self.assertEqual(tuple(g), self.titles) + i = 0 + for title in g: + self.assertEqual(title, self.titles[i]) + i += 1 + + # re-opening the file should retain the track_order setting + with h5py.File(filename) as f: + g = f['order'] + self.assertEqual(len(g), len(self.titles)) + self.assertEqual(tuple(g), self.titles) + i = 0 + for title in g: + self.assertEqual(title, self.titles[i]) + i += 1 def test_no_track_order(self): filename = self.getFileName("test_no_track_order_group") print(f"filename: {filename}") with h5py.File(filename, 'w') as f: g = f.create_group('order', track_order=False) # name alphanumeric - count = self.populate(g) - ref = sorted([str(i) for i in range(count)]) + self.populate(g) + ref = sorted(self.titles) self.assertEqual(list(g), ref) self.assertEqual(list(reversed(g)), list(reversed(ref))) with h5py.File(filename) as f: g = f['order'] # name alphanumeric - count = len(g) - self.assertTrue(count > 0) - ref = sorted([str(i) for i in range(count)]) + ref = sorted(self.titles) self.assertEqual(list(g), ref) self.assertEqual(list(reversed(g)), list(reversed(ref))) @@ -619,21 +646,18 @@ def test_get_dataset_track_order(self): dset = g.create_dataset('dset', (10,), dtype='i4') dset2 = g.create_dataset('dset2', (10,), dtype='i4') - count1 = self.populate_attrs(dset) - count2 = self.populate_attrs(dset2) + self.populate_attrs(dset) + self.populate_attrs(dset2) with h5py.File(filename) as f: g = f['order'] d = g.get('dset', track_order=True) - ref = [str(i) for i in range(count1)] - self.assertEqual(list(d.attrs), ref) - self.assertEqual(list(reversed(d.attrs)), list(reversed(ref))) + self.assertEqual(list(d.attrs), list(self.titles)) d2 = g.get('dset2', track_order=False) - ref = sorted([str(i) for i in range(count2)]) + ref = sorted(self.titles) self.assertEqual(list(d2.attrs), ref) - self.assertEqual(list(reversed(d2.attrs)), list(reversed(ref))) def test_get_group_track_order(self): # h5py does not support track_order on group.get() @@ -645,22 +669,17 @@ def test_get_group_track_order(self): g = f.create_group('order') # create subgroup and populate it with links g.create_group('subgroup') - count = self.populate(g['subgroup']) + self.populate(g['subgroup']) with h5py.File(filename) as f: g = f['order'] subg = g.get('subgroup', track_order=True) - ref = [str(i) for i in range(count)] - self.assertEqual(list(subg), ref) - self.assertEqual(list(reversed(subg)), list(reversed(ref))) + self.assertEqual(tuple(subg), self.titles) with h5py.File(filename) as f: g = f['order'] subg2 = g.get('subgroup', track_order=False) - count = len(subg2) - ref = sorted([str(i) for i in range(count)]) - self.assertEqual(list(subg2), ref) - self.assertEqual(list(reversed(subg2)), list(reversed(ref))) + self.assertEqual(list(subg2), sorted(self.titles)) if __name__ == '__main__': From 0699a0869fc16df755c5ad7e70e19f9a3f016e84 Mon Sep 17 00:00:00 2001 From: John Readey Date: Thu, 26 Dec 2024 19:03:35 +0800 Subject: [PATCH 3/8] fix flake8 errors --- h5pyd/config.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/h5pyd/config.py b/h5pyd/config.py index f153171..753c641 100755 --- a/h5pyd/config.py +++ b/h5pyd/config.py @@ -17,7 +17,7 @@ class Config: """ User Config state """ - _cfg = {} # global state + _cfg = {} # global state def __init__(self, config_file=None, **kwargs): if Config._cfg: @@ -109,23 +109,23 @@ def __repr__(self): def keys(self): return Config._cfg.keys() - + @property def hs_endpoint(self): return Config._cfg.get("hs_endpoint") - + @property def hs_username(self): return Config._cfg.get("hs_username") - + @property def hs_password(self): return Config._cfg.get("hs_password") - + @property def hs_api_key(self): return Config._cfg.get("hs_api_key") - + @property def bool_names(self): if "bool_names" in Config._cfg: @@ -133,7 +133,7 @@ def bool_names(self): else: names = (b"FALSE", b"TRUE") return names - + @bool_names.setter def bool_names(self, value): if isinstance(value, str): @@ -149,7 +149,7 @@ def bool_names(self, value): else: names = value Config._cfg["bool_names"] = tuple(names) - + @property def complex_names(self): if "complex_names" in Config._cfg: @@ -157,7 +157,7 @@ def complex_names(self): else: names = ("r", "i") return names - + @complex_names.setter def complex_names(self, value): if isinstance(value, str): @@ -182,7 +182,7 @@ def track_order(self): else: track = False return track - + @track_order.setter def track_order(self, value): if isinstance(value, str): @@ -194,7 +194,7 @@ def track_order(self, value): else: track = bool(value) Config._cfg["track_order"] = track - - + + def get_config(config_file=None, **kwargs): return Config(config_file=config_file, **kwargs) From ede4621d4f3f32debb9ed5e8d8c84b07d53c0c7c Mon Sep 17 00:00:00 2001 From: John Readey Date: Thu, 26 Dec 2024 19:37:46 +0800 Subject: [PATCH 4/8] fix null ref --- h5pyd/_hl/files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h5pyd/_hl/files.py b/h5pyd/_hl/files.py index 22ad0b4..57d31a2 100644 --- a/h5pyd/_hl/files.py +++ b/h5pyd/_hl/files.py @@ -383,7 +383,7 @@ def __init__( endpoint = cfg["hs_endpoint"] # remove the trailing slash on endpoint if it exists - if endpoint.endswith('/'): + if endpoint and endpoint.endswith('/'): endpoint = endpoint.strip('/') if username is None: From 69c49f3efba90ac27e2d30757cacfe6f9ea19d7f Mon Sep 17 00:00:00 2001 From: John Readey Date: Thu, 26 Dec 2024 19:44:45 +0800 Subject: [PATCH 5/8] fix null ref --- h5pyd/_hl/files.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/h5pyd/_hl/files.py b/h5pyd/_hl/files.py index 57d31a2..c9d4be5 100644 --- a/h5pyd/_hl/files.py +++ b/h5pyd/_hl/files.py @@ -49,7 +49,7 @@ class H5Image(io.RawIOBase): def __init__(self, domain_path, h5path="h5image", chunks_per_page=1, logger=None): """ verify dataset can be accessed and set logger if supplied """ self._cursor = 0 - if domain_path.startswith("hdf5::/"): + if domain_path and domain_path.startswith("hdf5::/"): self._domain_path = domain_path else: self._domain_path = "hdf5:/" + domain_path @@ -354,7 +354,7 @@ def __init__( # # For http prefixed values, extract the endpont and use the rest as domain path for protocol in ("http://", "https://", "hdf5://", "http+unix://"): - if domain.startswith(protocol): + if domain and domain.startswith(protocol): if protocol.startswith("http"): domain = domain[len(protocol):] # extract the endpoint @@ -436,7 +436,7 @@ def __init__( # need some special logic for the first request in local mode # to give the sockets time to initialize - if endpoint.startswith("local"): + if endpoint and endpoint.startswith("local"): connect_backoff = [0.5, 1, 2, 4, 8, 16] else: connect_backoff = [] From 1228a8b7117208f08bbeca4b82ddae50bd138039 Mon Sep 17 00:00:00 2001 From: John Readey Date: Thu, 26 Dec 2024 20:10:19 +0800 Subject: [PATCH 6/8] fix null ref --- h5pyd/_hl/serverinfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h5pyd/_hl/serverinfo.py b/h5pyd/_hl/serverinfo.py index 66c3a04..10203cb 100644 --- a/h5pyd/_hl/serverinfo.py +++ b/h5pyd/_hl/serverinfo.py @@ -40,7 +40,7 @@ def getServerInfo(endpoint=None, username=None, password=None, api_key=None, **k # need some special logic for the first request in local mode # to give the sockets time to initialize - if endpoint.startswith("local"): + if endpoint and endpoint.startswith("local"): connect_backoff = [0.5, 1, 2, 4, 8, 16] else: connect_backoff = [] From ab8c97f4b1f36db1d3a0c80359a0558aa767eac2 Mon Sep 17 00:00:00 2001 From: John Readey Date: Thu, 26 Dec 2024 20:20:27 +0800 Subject: [PATCH 7/8] add expected keys to config dict --- h5pyd/config.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/h5pyd/config.py b/h5pyd/config.py index 753c641..57b6676 100755 --- a/h5pyd/config.py +++ b/h5pyd/config.py @@ -55,6 +55,11 @@ def __init__(self, config_file=None, **kwargs): else: Config._cfg[k] = v + # add standard keys if not already picked up + for k in ("hs_endpoint", "hs_username", "hs_password", "hs_api_key"): + if k not in Config._cfg: + Config._cfg[k] = "" + # override any config values with environment variable if found for k in Config._cfg.keys(): if k.upper() in os.environ: From 4ca47d62b8cadb3f189ca09259add9d0d7b7e523 Mon Sep 17 00:00:00 2001 From: John Readey Date: Fri, 27 Dec 2024 10:35:12 +0800 Subject: [PATCH 8/8] version bump --- h5pyd/version.py | 6 +++--- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/h5pyd/version.py b/h5pyd/version.py index f1051e9..db13f9e 100644 --- a/h5pyd/version.py +++ b/h5pyd/version.py @@ -16,7 +16,7 @@ import sys import numpy -version = "0.19.0" +version = "0.20.0" hdf5_version = "REST" @@ -28,8 +28,8 @@ else ("",) ) -api_version_tuple = (0, 19, 0) -api_version = "0.19.0" +api_version_tuple = (0, 20, 0) +api_version = "0.20.0" __doc__ = f"""\ This is h5pyd **{version}** diff --git a/pyproject.toml b/pyproject.toml index fe230ab..28239dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ classifiers = [ "Topic :: Software Development :: Libraries :: Python Modules", ] requires-python = ">=3.8" -version = "0.19.0" +version = "0.20.0" dependencies = [ "numpy >=2.0.0rc1; python_version>='3.9'",