From 17d831b9e2fcb49f8a15efba4e72e0df5f0ea89c Mon Sep 17 00:00:00 2001 From: Alexander VT Date: Wed, 1 Sep 2021 13:08:50 -0500 Subject: [PATCH 1/6] feat(ga4gh): move DRS access API to fence --- fence/__init__.py | 2 + fence/blueprints/data/indexd.py | 4 +- fence/blueprints/ga4gh.py | 27 ++++ tests/conftest.py | 4 +- tests/test_drs.py | 232 ++++++++++++++++++++++++++++++++ 5 files changed, 265 insertions(+), 4 deletions(-) create mode 100644 fence/blueprints/ga4gh.py create mode 100644 tests/test_drs.py diff --git a/fence/__init__.py b/fence/__init__.py index bc78cca5a..c55a01777 100755 --- a/fence/__init__.py +++ b/fence/__init__.py @@ -53,6 +53,7 @@ import fence.blueprints.google import fence.blueprints.privacy import fence.blueprints.register +import fence.blueprints.ga4gh # for some reason the temp dir does not get created properly if we move @@ -139,6 +140,7 @@ def app_register_blueprints(app): ) app.register_blueprint(fence.blueprints.register.blueprint, url_prefix="/register") + app.register_blueprint(fence.blueprints.ga4gh.blueprint, url_prefix="/ga4gh") fence.blueprints.misc.register_misc(app) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index fde0e353e..63d94b877 100755 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -62,8 +62,8 @@ ANONYMOUS_USERNAME = "anonymous" -def get_signed_url_for_file(action, file_id, file_name=None): - requested_protocol = flask.request.args.get("protocol", None) +def get_signed_url_for_file(action, file_id, file_name=None, requested_protocol=None): + requested_protocol = requested_protocol or flask.request.args.get("protocol", None) r_pays_project = flask.request.args.get("userProject", None) # default to signing the url even if it's a public object diff --git a/fence/blueprints/ga4gh.py b/fence/blueprints/ga4gh.py new file mode 100644 index 000000000..2abcc5dcd --- /dev/null +++ b/fence/blueprints/ga4gh.py @@ -0,0 +1,27 @@ +import flask +from fence.errors import AuthError, UserError + +from fence.blueprints.data.indexd import ( + get_signed_url_for_file, +) + +blueprint = flask.Blueprint("ga4gh", __name__) + + +@blueprint.route( + "/drs/v1/objects//access", + defaults={"access_id": None}, + methods=["GET"], +) +@blueprint.route( + "/drs/v1/objects//access/", + methods=["GET", "POST"], +) +def get_ga4gh_signed_url(object_id, access_id): + if not access_id: + raise UserError("Access ID/Protocol is required.") + + result = get_signed_url_for_file( + "download", object_id, requested_protocol=access_id + ) + return flask.jsonify(result) diff --git a/tests/conftest.py b/tests/conftest.py index f9e7a3fe0..0bbdb86ea 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -568,7 +568,7 @@ def indexd_client(app, request): "created_date": "", "updated_date": "", } - elif request.param == "az": + elif protocol == "az": record = { "did": "", "baseid": "", @@ -584,7 +584,7 @@ def indexd_client(app, request): "created_date": "", "updated_date": "", } - elif request.param == "https": + elif protocol == "https": record = { "did": "", "baseid": "", diff --git a/tests/test_drs.py b/tests/test_drs.py new file mode 100644 index 000000000..2769784de --- /dev/null +++ b/tests/test_drs.py @@ -0,0 +1,232 @@ +import json +import jwt +import pytest +import requests +import responses +from tests import utils + + +def get_doc(has_version=True, urls=list(), drs_list=0): + doc = { + "form": "object", + "size": 123, + "urls": ["s3://endpointurl/bucket/key"], + "hashes": {"md5": "1234"}, + } + if has_version: + doc["version"] = "1" + if urls: + doc["urls"] = urls + + return doc + + +@responses.activate +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +def test_get_presigned_url_unauthorized( + client, + indexd_client, + kid, + rsa_private_key, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + user = {"Authorization": "Bearer INVALID"} + + res = client.get( + "/ga4gh/drs/v1/objects/" + test_guid + f"/access/{access_id}", + headers=user, + ) + assert res.status_code == 401 + + +@responses.activate +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +def test_get_presigned_url_with_access_id( + client, + user_client, + indexd_client, + kid, + rsa_private_key, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + user = { + "Authorization": "Bearer " + + jwt.encode( + utils.authorized_download_context_claims( + user_client.username, user_client.user_id + ), + key=rsa_private_key, + headers={"kid": kid}, + algorithm="RS256", + ).decode("utf-8") + } + + res = client.get( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + headers=user, + ) + assert res.status_code == 200 + + +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +def test_get_presigned_url_no_access_id( + client, + user_client, + indexd_client, + kid, + rsa_private_key, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + user = { + "Authorization": "Bearer " + + jwt.encode( + utils.authorized_download_context_claims( + user_client.username, user_client.user_id + ), + key=rsa_private_key, + headers={"kid": kid}, + algorithm="RS256", + ).decode("utf-8") + } + + res = client.get( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/", + headers=user, + ) + assert res.status_code == 400 + + +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +def test_get_presigned_url_no_bearer_token( + client, + indexd_client, +): + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + + res = client.get("/ga4gh/drs/v1/objects/" + test_guid + f"/access/{access_id}") + assert res.status_code == 401 + + +@responses.activate +def test_get_presigned_url_wrong_access_id( + client, + user_client, + indexd_client, + kid, + rsa_private_key, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + test_guid = "1" + user = { + "Authorization": "Bearer " + + jwt.encode( + utils.authorized_download_context_claims( + user_client.username, user_client.user_id + ), + key=rsa_private_key, + headers={"kid": kid}, + algorithm="RS256", + ).decode("utf-8") + } + res = client.get( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/s2", + headers=user, + ) + assert res.status_code == 404 + + +@responses.activate +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +def test_get_presigned_url_with_encoded_slash( + client, + user_client, + indexd_client, + kid, + rsa_private_key, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + user = { + "Authorization": "Bearer " + + jwt.encode( + utils.authorized_download_context_claims( + user_client.username, user_client.user_id + ), + key=rsa_private_key, + headers={"kid": kid}, + algorithm="RS256", + ).decode("utf-8") + } + data = get_doc() + data["did"] = "dg.TEST/ed8f4658-6acd-4f96-9dd8-3709890c959e" + did = "dg.TEST%2Fed8f4658-6acd-4f96-9dd8-3709890c959e" + + res = client.get( + "/ga4gh/drs/v1/objects/" + did + "/access/" + access_id, + headers=user, + ) + assert res.status_code == 200 + + +@responses.activate +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +def test_get_presigned_url_with_query_params( + client, + user_client, + indexd_client, + kid, + rsa_private_key, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + user = { + "Authorization": "Bearer " + + jwt.encode( + utils.authorized_download_context_claims( + user_client.username, user_client.user_id + ), + key=rsa_private_key, + headers={"kid": kid}, + algorithm="RS256", + ).decode("utf-8") + } + data = get_doc() + data["did"] = "dg.TEST/ed8f4658-6acd-4f96-9dd8-3709890c959e" + did = "dg.TEST%2Fed8f4658-6acd-4f96-9dd8-3709890c959e" + + res = client.get( + "/ga4gh/drs/v1/objects/" + + did + + "/access/" + + access_id + + "?userProject=someproject&arbitrary_parameter=val", + headers=user, + ) + assert res.status_code == 200 From e216282a0249aa509602c396d82ebfad2f07e969 Mon Sep 17 00:00:00 2001 From: Alexander VT Date: Wed, 1 Sep 2021 16:16:52 -0500 Subject: [PATCH 2/6] tests(users): change assertion to only rely on information from that test --- tests/admin/test_admin_users_endpoints.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/admin/test_admin_users_endpoints.py b/tests/admin/test_admin_users_endpoints.py index 2d8b15f01..f9083a138 100644 --- a/tests/admin/test_admin_users_endpoints.py +++ b/tests/admin/test_admin_users_endpoints.py @@ -239,7 +239,8 @@ def test_get_user( "/admin/user", headers={"Authorization": "Bearer " + encoded_admin_jwt} ) assert r.status_code == 200 - assert len(r.json["users"]) == 4 + # should at least have the users added from above (may have more from other tests) + assert len(r.json["users"]) >= 4 usernames = [user["name"] for user in r.json["users"]] assert "test_a" in usernames assert "test_b" in usernames From c95130324b8df4dfcdd7cefecc8989a9ecb961d8 Mon Sep 17 00:00:00 2001 From: Alexander VT Date: Wed, 1 Sep 2021 16:30:18 -0500 Subject: [PATCH 3/6] chore(imports): remove unused import --- fence/blueprints/ga4gh.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fence/blueprints/ga4gh.py b/fence/blueprints/ga4gh.py index 2abcc5dcd..c02ba4f7a 100644 --- a/fence/blueprints/ga4gh.py +++ b/fence/blueprints/ga4gh.py @@ -1,5 +1,5 @@ import flask -from fence.errors import AuthError, UserError +from fence.errors import UserError from fence.blueprints.data.indexd import ( get_signed_url_for_file, From 00fff630479da8e81d801ab68903635492132461 Mon Sep 17 00:00:00 2001 From: Alexander VT Date: Thu, 2 Sep 2021 12:21:44 -0500 Subject: [PATCH 4/6] chore(logs): don't mention microservice in logs --- fence/blueprints/data/indexd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index 63d94b877..ca02c0fe4 100755 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -404,7 +404,7 @@ def get_signed_url( raise Unauthorized( f"Either you weren't logged in or you don't have " f"{action_to_permission[action]} permission " - f"on {self.index_document['authz']} for fence" + f"on authz resource: {self.index_document['authz']}" ) else: if self.public_acl and action == "upload": From 8d33d052d906c4cfa5ae32abed674515bd0ef2ba Mon Sep 17 00:00:00 2001 From: Alexander VT Date: Wed, 8 Sep 2021 09:58:32 -0500 Subject: [PATCH 5/6] docs(ga4gh): add new data access to swagger docs --- openapis/swagger.yaml | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/openapis/swagger.yaml b/openapis/swagger.yaml index 3b0875a5b..a76b0a228 100644 --- a/openapis/swagger.yaml +++ b/openapis/swagger.yaml @@ -489,6 +489,46 @@ paths: description: 'Invalid input: UUID not found or invalid location' '404': description: 'No location found for this file' + '/ga4gh/drs/v1/objects/{object_id}/access/{access_id}': + get: + summary: GA4GH DRS Access API to get a URL for fetching bytes. + description: >- + Returns a URL that can be used to fetch the bytes of a DrsObject. + This method only needs to be called when using an AccessMethod that contains an access_id + (e.g., for servers that use signed URLs for fetching object bytes). + operationId: GetAccessURL + responses: + '200': + description: The access URL was found successfully. + content: + '*/*': + schema: + $ref: '#/components/schemas/AccessURL' + '400': + description: The request is malformed. + '401': + description: The request is unauthorized. + '404': + description: The requested access URL wasn't found + '403': + description: The requester is not authorized to perform this action. + '500': + description: An unexpected error occurred. + parameters: + - name: object_id + schema: + type: string + in: path + required: true + description: An id of a DrsObject + - name: access_id + schema: + type: string + in: path + required: true + description: An access_id from the access_methods list of a DrsObject + tags: + - data '/data/upload': post: tags: @@ -2122,6 +2162,14 @@ components: email: type: string description: 'The email of the end-user' + AccessURL: + type: object + required: + - url + properties: + url: + type: string + description: 'A fully resolvable URL that can be used to fetch the actual object bytes.' UserInfo: type: object required: From ad8fa2187964f5e6a26676dc6ea50083041eb9f9 Mon Sep 17 00:00:00 2001 From: Alexander VT Date: Wed, 8 Sep 2021 10:32:51 -0500 Subject: [PATCH 6/6] chore(version): bump minor version for new feature --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 60ad6420b..0008806f1 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "fence" -version = "5.4.1" +version = "5.5.0" description = "Gen3 AuthN/AuthZ OIDC Service" authors = ["CTDS UChicago "] license = "Apache-2.0"