diff --git a/fence/__init__.py b/fence/__init__.py index bc78cca5a..c55a01777 100755 --- a/fence/__init__.py +++ b/fence/__init__.py @@ -53,6 +53,7 @@ import fence.blueprints.google import fence.blueprints.privacy import fence.blueprints.register +import fence.blueprints.ga4gh # for some reason the temp dir does not get created properly if we move @@ -139,6 +140,7 @@ def app_register_blueprints(app): ) app.register_blueprint(fence.blueprints.register.blueprint, url_prefix="/register") + app.register_blueprint(fence.blueprints.ga4gh.blueprint, url_prefix="/ga4gh") fence.blueprints.misc.register_misc(app) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index f801083c8..9237a2e95 100755 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -62,8 +62,8 @@ ANONYMOUS_USERNAME = "anonymous" -def get_signed_url_for_file(action, file_id, file_name=None): - requested_protocol = flask.request.args.get("protocol", None) +def get_signed_url_for_file(action, file_id, file_name=None, requested_protocol=None): + requested_protocol = requested_protocol or flask.request.args.get("protocol", None) r_pays_project = flask.request.args.get("userProject", None) # default to signing the url even if it's a public object @@ -404,7 +404,7 @@ def get_signed_url( raise Unauthorized( f"Either you weren't logged in or you don't have " f"{action_to_permission[action]} permission " - f"on {self.index_document['authz']} for fence" + f"on authz resource: {self.index_document['authz']}" ) else: if self.public_acl and action == "upload": diff --git a/fence/blueprints/ga4gh.py b/fence/blueprints/ga4gh.py new file mode 100644 index 000000000..c02ba4f7a --- /dev/null +++ b/fence/blueprints/ga4gh.py @@ -0,0 +1,27 @@ +import flask +from fence.errors import UserError + +from fence.blueprints.data.indexd import ( + get_signed_url_for_file, +) + +blueprint = flask.Blueprint("ga4gh", __name__) + + +@blueprint.route( + "/drs/v1/objects//access", + defaults={"access_id": None}, + methods=["GET"], +) +@blueprint.route( + "/drs/v1/objects//access/", + methods=["GET", "POST"], +) +def get_ga4gh_signed_url(object_id, access_id): + if not access_id: + raise UserError("Access ID/Protocol is required.") + + result = get_signed_url_for_file( + "download", object_id, requested_protocol=access_id + ) + return flask.jsonify(result) diff --git a/openapis/swagger.yaml b/openapis/swagger.yaml index 3b0875a5b..a76b0a228 100644 --- a/openapis/swagger.yaml +++ b/openapis/swagger.yaml @@ -489,6 +489,46 @@ paths: description: 'Invalid input: UUID not found or invalid location' '404': description: 'No location found for this file' + '/ga4gh/drs/v1/objects/{object_id}/access/{access_id}': + get: + summary: GA4GH DRS Access API to get a URL for fetching bytes. + description: >- + Returns a URL that can be used to fetch the bytes of a DrsObject. + This method only needs to be called when using an AccessMethod that contains an access_id + (e.g., for servers that use signed URLs for fetching object bytes). + operationId: GetAccessURL + responses: + '200': + description: The access URL was found successfully. + content: + '*/*': + schema: + $ref: '#/components/schemas/AccessURL' + '400': + description: The request is malformed. + '401': + description: The request is unauthorized. + '404': + description: The requested access URL wasn't found + '403': + description: The requester is not authorized to perform this action. + '500': + description: An unexpected error occurred. + parameters: + - name: object_id + schema: + type: string + in: path + required: true + description: An id of a DrsObject + - name: access_id + schema: + type: string + in: path + required: true + description: An access_id from the access_methods list of a DrsObject + tags: + - data '/data/upload': post: tags: @@ -2122,6 +2162,14 @@ components: email: type: string description: 'The email of the end-user' + AccessURL: + type: object + required: + - url + properties: + url: + type: string + description: 'A fully resolvable URL that can be used to fetch the actual object bytes.' UserInfo: type: object required: diff --git a/pyproject.toml b/pyproject.toml index 60ad6420b..0008806f1 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "fence" -version = "5.4.1" +version = "5.5.0" description = "Gen3 AuthN/AuthZ OIDC Service" authors = ["CTDS UChicago "] license = "Apache-2.0" diff --git a/tests/admin/test_admin_users_endpoints.py b/tests/admin/test_admin_users_endpoints.py index 2d8b15f01..f9083a138 100644 --- a/tests/admin/test_admin_users_endpoints.py +++ b/tests/admin/test_admin_users_endpoints.py @@ -239,7 +239,8 @@ def test_get_user( "/admin/user", headers={"Authorization": "Bearer " + encoded_admin_jwt} ) assert r.status_code == 200 - assert len(r.json["users"]) == 4 + # should at least have the users added from above (may have more from other tests) + assert len(r.json["users"]) >= 4 usernames = [user["name"] for user in r.json["users"]] assert "test_a" in usernames assert "test_b" in usernames diff --git a/tests/conftest.py b/tests/conftest.py index f9e7a3fe0..0bbdb86ea 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -568,7 +568,7 @@ def indexd_client(app, request): "created_date": "", "updated_date": "", } - elif request.param == "az": + elif protocol == "az": record = { "did": "", "baseid": "", @@ -584,7 +584,7 @@ def indexd_client(app, request): "created_date": "", "updated_date": "", } - elif request.param == "https": + elif protocol == "https": record = { "did": "", "baseid": "", diff --git a/tests/test_drs.py b/tests/test_drs.py new file mode 100644 index 000000000..2769784de --- /dev/null +++ b/tests/test_drs.py @@ -0,0 +1,232 @@ +import json +import jwt +import pytest +import requests +import responses +from tests import utils + + +def get_doc(has_version=True, urls=list(), drs_list=0): + doc = { + "form": "object", + "size": 123, + "urls": ["s3://endpointurl/bucket/key"], + "hashes": {"md5": "1234"}, + } + if has_version: + doc["version"] = "1" + if urls: + doc["urls"] = urls + + return doc + + +@responses.activate +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +def test_get_presigned_url_unauthorized( + client, + indexd_client, + kid, + rsa_private_key, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + user = {"Authorization": "Bearer INVALID"} + + res = client.get( + "/ga4gh/drs/v1/objects/" + test_guid + f"/access/{access_id}", + headers=user, + ) + assert res.status_code == 401 + + +@responses.activate +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +def test_get_presigned_url_with_access_id( + client, + user_client, + indexd_client, + kid, + rsa_private_key, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + user = { + "Authorization": "Bearer " + + jwt.encode( + utils.authorized_download_context_claims( + user_client.username, user_client.user_id + ), + key=rsa_private_key, + headers={"kid": kid}, + algorithm="RS256", + ).decode("utf-8") + } + + res = client.get( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + headers=user, + ) + assert res.status_code == 200 + + +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +def test_get_presigned_url_no_access_id( + client, + user_client, + indexd_client, + kid, + rsa_private_key, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + user = { + "Authorization": "Bearer " + + jwt.encode( + utils.authorized_download_context_claims( + user_client.username, user_client.user_id + ), + key=rsa_private_key, + headers={"kid": kid}, + algorithm="RS256", + ).decode("utf-8") + } + + res = client.get( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/", + headers=user, + ) + assert res.status_code == 400 + + +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +def test_get_presigned_url_no_bearer_token( + client, + indexd_client, +): + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + + res = client.get("/ga4gh/drs/v1/objects/" + test_guid + f"/access/{access_id}") + assert res.status_code == 401 + + +@responses.activate +def test_get_presigned_url_wrong_access_id( + client, + user_client, + indexd_client, + kid, + rsa_private_key, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + test_guid = "1" + user = { + "Authorization": "Bearer " + + jwt.encode( + utils.authorized_download_context_claims( + user_client.username, user_client.user_id + ), + key=rsa_private_key, + headers={"kid": kid}, + algorithm="RS256", + ).decode("utf-8") + } + res = client.get( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/s2", + headers=user, + ) + assert res.status_code == 404 + + +@responses.activate +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +def test_get_presigned_url_with_encoded_slash( + client, + user_client, + indexd_client, + kid, + rsa_private_key, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + user = { + "Authorization": "Bearer " + + jwt.encode( + utils.authorized_download_context_claims( + user_client.username, user_client.user_id + ), + key=rsa_private_key, + headers={"kid": kid}, + algorithm="RS256", + ).decode("utf-8") + } + data = get_doc() + data["did"] = "dg.TEST/ed8f4658-6acd-4f96-9dd8-3709890c959e" + did = "dg.TEST%2Fed8f4658-6acd-4f96-9dd8-3709890c959e" + + res = client.get( + "/ga4gh/drs/v1/objects/" + did + "/access/" + access_id, + headers=user, + ) + assert res.status_code == 200 + + +@responses.activate +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +def test_get_presigned_url_with_query_params( + client, + user_client, + indexd_client, + kid, + rsa_private_key, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + user = { + "Authorization": "Bearer " + + jwt.encode( + utils.authorized_download_context_claims( + user_client.username, user_client.user_id + ), + key=rsa_private_key, + headers={"kid": kid}, + algorithm="RS256", + ).decode("utf-8") + } + data = get_doc() + data["did"] = "dg.TEST/ed8f4658-6acd-4f96-9dd8-3709890c959e" + did = "dg.TEST%2Fed8f4658-6acd-4f96-9dd8-3709890c959e" + + res = client.get( + "/ga4gh/drs/v1/objects/" + + did + + "/access/" + + access_id + + "?userProject=someproject&arbitrary_parameter=val", + headers=user, + ) + assert res.status_code == 200