Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multi-Link API #203

Merged
merged 6 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 119 additions & 10 deletions h5pyd/_hl/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,19 @@ def __getitem__(self, name, track_order=False):
tgt._name = name
return tgt

def get(self, name, default=None, getclass=False, getlink=False, track_order=False):
def objectify_link_json(self, link_json):
if "id" in link_json:
link_obj = HardLink(link_json["id"])
elif "h5path" in link_json and "h5domain" not in link_json:
link_obj = SoftLink(link_json["h5path"])
elif "h5path" in link_json and "h5domain" in link_json:
link_obj = ExternalLink(link_json["h5domain"], link_json["h5path"])
else:
raise ValueError("Invalid link JSON")

return link_obj

def get(self, name, default=None, getclass=False, getlink=False, track_order=False, **kwds):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The description for "name" seems incorrect.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For methods that aren't intended to be called outside the class, let's prepend with "_".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the description for name-only operation is accurate - it makes a _getitem_ call, returning the item that the link points to if it exists, or default if it doesn't.

""" Retrieve an item or other information.

"name" given only:
Expand All @@ -697,6 +709,21 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Fal
Return HardLink, SoftLink and ExternalLink classes. Return
"default" if nothing with that name exists.

"limit" is an integer:
If "name" is None, this will return the first "limit" links in the group.

"marker" is a string:
If "name" is None, this will return only the links that come after the marker in the group's link ordering.

"pattern" is a string:
If "name" is None, this will return only the links that match the given pattern
in the target group (and subgroups, if follow_links is provided).
Matching is done according to Unix pathname expansion rules.

"follow_links" is True:
If "name" is None, subgroups of the target group will be recursively searched
for links that match the given names or pattern.

Example:

>>> cls = group.get('foo', getclass=True)
Expand All @@ -709,7 +736,7 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Fal
except KeyError:
return default

if name not in self:
if name is not None and name not in self:
return default

elif getclass and not getlink:
Expand All @@ -726,6 +753,52 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Fal
raise TypeError("Unknown object type")

elif getlink:
if name is None:
# Get all links in target group(s)
# Retrieve "limit", "marker", and "pattern" from kwds
limit = kwds.get("limit", None)
marker = kwds.get("marker", None)
pattern = kwds.get("pattern", None)
follow_links = kwds.get("follow_links", False)

req = "/groups/" + self.id.uuid + "/links"
params = {}

if limit:
params["Limit"] = limit
if marker:
params["Marker"] = marker
if pattern:
params["pattern"] = pattern
if follow_links:
params["follow_links"] = 1
if track_order:
params["CreateOrder"] = 1

rsp = self.GET(req, params=params)

if "links" in rsp:
# Process list of link objects so they may be accessed by name
links = rsp['links']
links_out = {}
if all([isUUID(k) for k in links]):
# Multiple groups queried, links are returned under group ids
for group_id in links:
group_links = {}

for link in links[group_id]:
group_links[link["title"]] = self.objectify_link_json(link)

links_out[group_id] = group_links

else:
for link in links:
links_out[link["title"]] = self.objectify_link_json(link)
else:
raise ValueError("Can't parse server response to links query")

return links_out

parent_uuid, link_json = self._get_link_json(name)
typecode = link_json['class']

Expand All @@ -740,7 +813,7 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Fal

return ExternalLink(link_json['h5domain'], link_json['h5path'])
elif typecode == 'H5L_TYPE_HARD':
return HardLink if getclass else HardLink()
return HardLink if getclass else HardLink(link_json['id'])
else:
raise TypeError("Unknown link type")

Expand Down Expand Up @@ -768,7 +841,27 @@ def __setitem__(self, name, obj):
values are stored as scalar datasets. Raise ValueError if we
can't understand the resulting array dtype.
"""
if name.find('/') != -1:
if isinstance(name, list) and isinstance(obj, list):
if len(name) != len(obj):
raise ValueError("name and object list lengths do not match")

links = {}

for i in range(len(name)):
if isinstance(obj[i], HLObject):
links[name[i]] = {"id": obj[i].id.uuid}
elif isinstance(obj[i], SoftLink):
links[name[i]] = {"h5path": obj[i].path}
elif isinstance(obj[i], ExternalLink):
links[name[i]] = {"h5path": obj[i].path, "h5domain": obj[i].filename}
else:
raise ValueError("only links are supported for multiple object creation")

body = {"links": links}
req = "/groups/" + self.id.uuid + "/links"
self.PUT(req, body=body)

elif name.find('/') != -1:
parent_path = op.dirname(name)
basename = op.basename(name)
if not basename:
Expand Down Expand Up @@ -855,12 +948,20 @@ def __delitem__(self, name):
raise IOError("Not found")

else:
# delete the link, not an object
req = "/groups/" + self.id.uuid + "/links/" + name
# delete the link(s), not an object
if isinstance(name, list):
# delete multiple links
req = "/groups/" + self.id.uuid + "/links?titles=" + '/'.join(name)
else:
# delete single link
req = "/groups/" + self.id.uuid + "/links/" + name

self.DELETE(req)
if name.find('/') == -1 and name in self._link_db:
# remove from link cache
del self._link_db[name]

for n in name:
if n.find('/') == -1 and n in self._link_db:
# remove from link cache
del self._link_db[name]

def __len__(self):
""" Number of members attached to this group """
Expand Down Expand Up @@ -1186,8 +1287,16 @@ class HardLink(object):
Represents a hard link in an HDF5 file. Provided only so that
Group.get works in a sensible way. Has no other function.
"""
@property
# The uuid of the target object
def id(self):
return self._id

def __init__(self, id=None):
self._id = id

pass
def __repr__(self):
return f'<HardLink to "{self.id}">'


# TODO: implement equality testing for these
Expand Down
6 changes: 6 additions & 0 deletions h5pyd/_hl/httpconn.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,8 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True):
check_cache = self._cache is not None and use_cache and format == "json"
check_cache = check_cache and params["domain"] == self._domain
check_cache = check_cache and "select" not in params and "query" not in params
check_cache = check_cache and "follow_links" not in params and "pattern" not in params
check_cache = check_cache and "Limit" not in params and "Marker" not in params

if check_cache:
self.log.debug("httpcon - checking cache")
Expand All @@ -448,6 +450,7 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True):
self.log.info(
f"GET: {self._endpoint + req} [{params['domain']}] timeout: {self._timeout}"
)

for k in params:
if k != "domain":
v = params[k]
Expand All @@ -462,6 +465,7 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True):
stream = False
else:
stream = True

rsp = s.get(
self._endpoint + req,
params=params,
Expand Down Expand Up @@ -497,6 +501,8 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True):

add_to_cache = content_type and content_type.startswith("application/json")
add_to_cache = add_to_cache and content_length < MAX_CACHE_ITEM_SIZE and not req.endswith("/value")
add_to_cache = add_to_cache and "follow_links" not in params and "pattern" not in params
add_to_cache = add_to_cache and "Limit" not in params and "Marker" not in params

if add_to_cache:
# add to our _cache
Expand Down
Loading