Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Libpitt/640 register collections doi #641

Merged
merged 8 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.4.22
1.4.23
2 changes: 1 addition & 1 deletion ingest-api-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ openapi: 3.0.0
info:
description: |
A RESTful web service exposing calls needed for the SenNet Data Sharing Portal.
version: 1.4.22
version: 1.4.23
title: SenNet Ingest API
contact:
name: SenNet Help Desk
Expand Down
137 changes: 137 additions & 0 deletions src/lib/datacite_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import requests
from requests.auth import HTTPBasicAuth
import logging

logger = logging.getLogger(__name__)


# DataCite REST API Guide:
# https://support.datacite.org/reference/dois-2
class DataCiteApi:

def __init__(self, datacite_repository_id: str, datacite_repository_password: str,
datacite_sennet_prefix: str, datacite_api_url: str, entity_api_url: str):
self.auth = HTTPBasicAuth(datacite_repository_id, datacite_repository_password)
self.datacite_sennet_prefix = datacite_sennet_prefix
self.datacite_api_url = datacite_api_url
self.redirect_prefix = f"{entity_api_url}/doi/redirect"
self.ssl_verification_enabed = False

# https://support.datacite.org/docs/doi-basics
def build_doi_name(self, entity_sennet_id: str):
# Format: prefix/suffix, no need for proxy part
return f"{self.datacite_sennet_prefix}/{entity_sennet_id}"

# DOI retrieval
# https://support.datacite.org/reference/dois-2#get_dois-id
def get_doi_by_id(self, doi_id: str) -> object:
logger.debug(f"======Target DOI ID: {doi_id}======")

response = requests.get(
url=f"{self.datacite_api_url}/{doi_id}",
auth=self.auth,
headers={'Content-Type': 'application/vnd.api+json'},
verify=self.ssl_verification_enabed
)
return response

# https://support.datacite.org/reference/dois-2#post_dois
# and https://docs.python.org/3/library/typing.html
def create_new_draft_doi(self,
sennet_id: str,
uuid: str,
contributors: list,
title: str,
publication_year: int,
creators: list,
entity_type='Dataset') -> object:
publisher = 'SenNet Consortium'

# Draft DOI doesn't specify the 'event' attribute
json = {
'data': {
'id': sennet_id,
'type': 'dois',
'attributes': {
# ==============ATTENTION==============
# Do NOT add 'event' field in order to create a "Draft" DOI
# Do NOT specify 'event': 'register', this creates a "Registered" DOI directly or
# triggers a state move from "Draft" to "Registered" and this DOI can not be deleted nor returned to the "Draft" state
# Do NOT specify 'event': 'publish', this creates a "Findable" DOI directly or
# triggers a state move from "Draft" or "Registered" to "Findable" and this DOI can not be deleted nor returned to a different state
# =====================================

# Below are all the "Manditory" properties. See:
# https://schema.datacite.org/meta/kernel-4.3/doc/DataCite-MetadataKernel_v4.3.pdf#%5B%7B%22num%22%3A19%2C%22gen%22%3A0%7D%2C%7B%22name%22%3A%22XYZ%22%7D%2C68%2C549%2C0%5D

# The globally unique string that identifies the resource and can't be changed
'doi': self.build_doi_name(sennet_id),
# One or more names or titles by which the resource is known
'titles': [{
'title': title
}],
# The name of the entity that holds, archives, publishes prints, distributes,
# releases, issues, or produces the resource
'publisher': publisher,
# The year when the resource was or will be made publicly available
'publicationYear': publication_year, # Integer
# The general type of the resource
'types': {
'resourceTypeGeneral': entity_type
},
# The location of the landing page with more information about the resource
'url': f"{self.redirect_prefix}/{uuid}"
}
}
}

# <Orchid_ID, first, lastname, name, institution_affiliation> from Dataset.contributors is mapped here (see reference above)
if contributors is not None:
json['data']['attributes']['contributors'] = contributors

if creators is not None:
json['data']['attributes']['creators'] = creators

logger.debug("======Draft DOI json_to_post======")
#logger.debug(json)

response = requests.post(
url=self.datacite_api_url,
auth=self.auth,
headers={'Content-Type': 'application/vnd.api+json'},
json=json,
verify=self.ssl_verification_enabed
)
return response

# https://support.datacite.org/reference/dois-2#put_dois-id
def update_doi_event_publish(self, entity_sennet_id: str) -> object:
doi = self.build_doi_name(entity_sennet_id)
json = {
'data': {
'id': doi,
'type': 'dois',
'attributes': {
# Triggers a state move from "Draft" or "Registered" to "Findable"
'event': 'publish'
}
}
}

logger.debug("====== DataCiteApi.update_doi_event_publish() json ======")
logger.debug(json)

response = requests.put(
url=f"{self.datacite_api_url}/{doi}",
auth=self.auth,
headers={'Content-Type': 'application/vnd.api+json'},
json=json,
verify=self.ssl_verification_enabed
)
return response

class DataciteApiException(Exception):

def __init__(self, message, error_code=None):
super().__init__(message)
self.error_code = error_code
32 changes: 28 additions & 4 deletions src/lib/datacite_doi_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from requests.packages.urllib3.exceptions import InsecureRequestWarning

from api.datacite_api import DataCiteApi
from lib.datacite_api import DataciteApiException
from lib.services import get_entity_by_id

requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
Expand All @@ -38,7 +39,7 @@ def __init__(self):
self.datacite_repository_id = config['DATACITE_REPOSITORY_ID']
self.datacite_repository_password = config['DATACITE_REPOSITORY_PASSWORD']
# Prefix, e.g., 10.80478 for test...
self.datacite_hubmap_prefix = config['DATACITE_SENNET_PREFIX']
self.datacite_sennet_prefix = config['DATACITE_SENNET_PREFIX']
# DataCite TEST API: https://api.test.datacite.org/
self.datacite_api_url = config['DATACITE_API_URL']
self.entity_api_url = config['ENTITY_WEBSERVICE_URL']
Expand Down Expand Up @@ -124,6 +125,24 @@ def build_doi_creators(self, dataset: object) -> list:

return creators

def check_doi_existence_and_state(self, entity: dict):
datacite_api = DataCiteApi(self.datacite_repository_id, self.datacite_repository_password,
self.datacite_sennet_prefix, self.datacite_api_url, self.entity_api_url)
doi_name = datacite_api.build_doi_name(entity['sennet_id'])
try:
doi_response = datacite_api.get_doi_by_id(doi_name)
except requests.exceptions.RequestException as e:
raise DataciteApiException(error_code=500, message="Failed to connect to DataCite")
if doi_response.status_code == 200:
logger.debug("==========DOI already exists. Skipping create-draft=========")
response_data = doi_response.json()
state = response_data.get("data", {}).get("attributes", {}).get("state")
if state == "findable":
return True
else:
return False
return None

"""
Register a draft DOI with DataCite

Expand All @@ -150,7 +169,7 @@ def create_dataset_draft_doi(self, dataset: dict, check_publication_status=True)
raise ValueError('This Dataset is not Published, can not register DOI')

datacite_api = DataCiteApi(self.datacite_repository_id, self.datacite_repository_password,
self.datacite_hubmap_prefix, self.datacite_api_url, self.entity_api_url)
self.datacite_sennet_prefix, self.datacite_api_url, self.entity_api_url)

# Get publication_year, default to the current year
publication_year = int(datetime.now().year)
Expand Down Expand Up @@ -209,7 +228,7 @@ def move_doi_state_from_draft_to_findable(self, entity: dict, user_token: str) -
entity_types = ['Dataset', 'Collection', 'Epicollection']
if ('entity_type' in entity) and (entity['entity_type'] in entity_types):
datacite_api = DataCiteApi(self.datacite_repository_id, self.datacite_repository_password,
self.datacite_hubmap_prefix, self.datacite_api_url, self.entity_api_url)
self.datacite_sennet_prefix, self.datacite_api_url, self.entity_api_url)
response = datacite_api.update_doi_event_publish(entity['sennet_id'])

if response.status_code == 200:
Expand Down Expand Up @@ -262,7 +281,7 @@ def move_doi_state_from_draft_to_findable(self, entity: dict, user_token: str) -

def create_collection_draft_doi(self, collection: dict) -> object:
datacite_api = DataCiteApi(self.datacite_repository_id, self.datacite_repository_password,
self.datacite_hubmap_prefix, self.datacite_api_url, self.entity_api_url)
self.datacite_sennet_prefix, self.datacite_api_url, self.entity_api_url)
publication_year = int(datetime.now().year)
response = datacite_api.create_new_draft_doi(collection['sennet_id'],
collection['uuid'],
Expand All @@ -287,6 +306,11 @@ def create_collection_draft_doi(self, collection: dict) -> object:
# Also bubble up the error message from DataCite
raise requests.exceptions.RequestException(response.text)

def build_doi_name(self, entity):
datacite_api = DataCiteApi(self.datacite_repository_id, self.datacite_repository_password, self.datacite_sennet_prefix, self.datacite_api_url, self.entity_api_url)
doi_name = datacite_api.build_doi_name(entity['sennet_id'])
return doi_name

"""
Update the dataset's properties in Entity-API after DOI is published (Draft -> Findable)

Expand Down
4 changes: 2 additions & 2 deletions src/routes/assayclassifier/testing_rule_chain.json
Original file line number Diff line number Diff line change
Expand Up @@ -524,8 +524,8 @@
{
"type": "match",
"match": "is_dcwg and is_primary and dataset_type == 'GeoMx (NGS)'",
"value": "{'assaytype': 'geomx_ngs?', 'vitessce-hints': [], 'dir-schema': 'geomx-ngs-v2', 'description': 'GeoMx (NGS)', 'contains-pii': true, 'primary': true, 'dataset-type': 'GeoMx (NGS)', 'must-contain': ['RNAseq (with probes)'], 'is-multi-assay': true}",
"rule_description": "DCWG geomx_ngs?"
"value": "{'assaytype': 'geomx_ngs', 'vitessce-hints': [], 'dir-schema': 'geomx-ngs-v2', 'description': 'GeoMx (NGS)', 'contains-pii': true, 'primary': true, 'dataset-type': 'GeoMx (NGS)', 'must-contain': ['RNAseq (with probes)'], 'is-multi-assay': true}",
"rule_description": "DCWG geomx_ngs"
},
{
"type": "match",
Expand Down
53 changes: 38 additions & 15 deletions src/routes/collections/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from hubmap_commons.hm_auth import AuthHelper
from hubmap_commons.exceptions import HTTPException


from lib.datacite_api import DataciteApiException
from lib.datacite_doi_helper import DataCiteDoiHelper
from lib.neo4j_helper import Neo4jHelper

Expand Down Expand Up @@ -92,22 +94,43 @@ def register_collections_doi(collection_id):

entity_dict = vars(entity)
datacite_doi_helper = DataCiteDoiHelper()
# Checks both whether a doi already exists, as well as if it is already findable. If True, DOI exists and is findable
# If false, DOI exists but is not yet in findable. If None, doi does not yet exist.
try:
datacite_doi_helper.create_collection_draft_doi(entity_dict)
except Exception as e:
logger.exception(f"Exception while creating a draft doi for {collection_uuid}")
return jsonify(
{
"error": f"Error occurred while trying to create a draft doi for {collection_uuid}. Check logs."}), 500
# This will make the draft DOI created above 'findable'....
try:
doi_info = datacite_doi_helper.move_doi_state_from_draft_to_findable(entity_dict, auth_tokens)
except Exception as e:
logger.exception \
(f"Exception while creating making doi findable and saving to entity for {collection_uuid}")
return jsonify(
{
"error": f"Error occurred while making doi findable and saving to entity for {collection_uuid}. Check logs."}), 500
doi_exists = datacite_doi_helper.check_doi_existence_and_state(entity_dict)
except DataciteApiException as e:
logger.exception(f"Exception while fetching doi for {collection_uuid}")
return jsonify({"error": f"Error occurred while trying to confirm existence of doi for {collection_uuid}. {e}"}), 500
# Doi does not exist, create draft then make it findable
if doi_exists is None:
try:
datacite_doi_helper.create_collection_draft_doi(entity_dict)
except DataciteApiException as datacite_exception:
return jsonify({"error": str(datacite_exception)}), datacite_exception.error_code
except Exception as e:
logger.exception(f"Exception while creating a draft doi for {collection_uuid}")
return jsonify({"error": f"Error occurred while trying to create a draft doi for {collection_uuid}. Check logs."}), 500
# This will make the draft DOI created above 'findable'....
try:
doi_info = datacite_doi_helper.move_doi_state_from_draft_to_findable(entity_dict, auth_tokens)
except Exception as e:
logger.exception(f"Exception while creating making doi findable and saving to entity for {collection_uuid}")
return jsonify({"error": f"Error occurred while making doi findable and saving to entity for {collection_uuid}. Check logs."}), 500
# Doi exists, but is not yet findable. Just make it findable
elif doi_exists is False:
try:
doi_info = datacite_doi_helper.move_doi_state_from_draft_to_findable(entity_dict, auth_tokens)
except Exception as e:
logger.exception(f"Exception while creating making doi findable and saving to entity for {collection_uuid}")
return jsonify({"error": f"Error occurred while making doi findable and saving to entity for {collection_uuid}. Check logs."}), 500
# The doi exists and it is already findable, skip both steps
elif doi_exists is True:
logger.debug(f"DOI for {collection_uuid} is already findable. Skipping creation and state change.")
doi_name = datacite_doi_helper.build_doi_name(entity_dict)
doi_info = {
'registered_doi': doi_name,
'doi_url': f'https://doi.org/{doi_name}'
}
doi_update_data = ""
if not doi_info is None:
doi_update_data = {"registered_doi": doi_info["registered_doi"], "doi_url": doi_info['doi_url']}
Expand Down