Skip to content
This repository has been archived by the owner on Sep 16, 2022. It is now read-only.

Commit

Permalink
Merge pull request #490 from CSCfi/stable
Browse files Browse the repository at this point in the history
Merge stable to master
  • Loading branch information
hannu40k authored Aug 19, 2019
2 parents b8cc2c8 + 8d32fd0 commit 18459d1
Show file tree
Hide file tree
Showing 24 changed files with 261 additions and 95 deletions.
39 changes: 20 additions & 19 deletions .travis-deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,25 @@ if [[ "$TRAVIS_BRANCH" == "master" || "$TRAVIS_PULL_REQUEST" != "false" ]]; then
exit 0
fi

pip install ansible
git clone https://github.com/CSCfi/metax-ops
cd metax-ops/ansible/
# pip install ansible
# git clone https://github.com/CSCfi/metax-ops
# cd metax-ops/ansible/

if [[ "$TRAVIS_BRANCH" == "test" && "$TRAVIS_PULL_REQUEST" == "false" ]]; then
echo "Deploying to test.."
ansible-galaxy -r requirements.yml install --roles-path=roles
ansible-playbook -vv -i inventories/test/hosts site_deploy.yml --extra-vars "ssh_user=metax-deploy-user server_domain_name=metax-test.csc.fi"
elif [[ "$TRAVIS_BRANCH" == "stable" && "$TRAVIS_PULL_REQUEST" == "false" ]]; then
echo "Deploying to stable.."
ansible-galaxy -r requirements.yml install --roles-path=roles
ansible-playbook -vv -i inventories/stable/hosts site_deploy.yml --extra-vars "ssh_user=metax-deploy-user server_domain_name=metax-stable.csc.fi"
fi
# if [[ "$TRAVIS_BRANCH" == "test" && "$TRAVIS_PULL_REQUEST" == "false" ]]; then
# echo "Deploying to test.."
# ansible-galaxy -r requirements.yml install --roles-path=roles
# ansible-playbook -vv -i inventories/test/hosts site_deploy.yml --extra-vars "ssh_user=metax-deploy-user server_domain_name=metax-test.csc.fi"
# elif [[ "$TRAVIS_BRANCH" == "stable" && "$TRAVIS_PULL_REQUEST" == "false" ]]; then
# echo "Deploying to stable.."
# ansible-galaxy -r requirements.yml install --roles-path=roles
# ansible-playbook -vv -i inventories/stable/hosts site_deploy.yml --extra-vars "ssh_user=metax-deploy-user server_domain_name=metax-stable.csc.fi"
# fi

# Make sure the last command to run before this part is the ansible-playbook command
if [ $? -eq 0 ]
then
exit 0
else
exit 1
fi
# # Make sure the last command to run before this part is the ansible-playbook command
# if [ $? -eq 0 ]
# then
# exit 0
# else
# exit 1
# fi
exit 0
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
coveralls==1.7.0 # code coverage reportin in travis
datacite==1.0.1 # BSD-license. convert datasets to datacite xml. datacite metadata store api wrappers
python-dateutil==2.8.0
Django==2.1.9 # BSD-license
Django==2.1.11 # BSD-license
elasticsearch<6.0.0
hiredis==1.0.0 # Used by redis (redis-py) for parser
djangorestframework==3.9.2 # BSD-license
Expand Down
51 changes: 34 additions & 17 deletions src/metax_api/api/oaipmh/base/metax_oai_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,19 @@ def _get_urnresolver_record_data(self, set, cursor, batch_size, from_=None, unti
records = proxy.objects_unfiltered.filter(active=True)

if from_ and until:
records = proxy.objects.filter(date_modified__gte=from_, date_modified__lte=until)
records = records.filter(date_modified__gte=from_, date_modified__lte=until)
elif from_:
records = proxy.objects.filter(date_modified__gte=from_)
records = records.filter(date_modified__gte=from_)
elif until:
records = proxy.objects.filter(date_modified__lte=until)
records = records.filter(date_modified__lte=until)

# Fetch only needed values as dict to increase performance.
records = records.values(
'identifier',
'date_created',
'date_modified',
'data_catalog__catalog_json',
'research_dataset')

data = []
for record in records:
Expand Down Expand Up @@ -102,7 +110,6 @@ def _get_filtered_records_data(self, verb, metadata_prefix, set, cursor, batch_s
data_catalog__catalog_json__identifier__in=settings.OAI['SET_MAPPINGS'][set])
else:
query_set = query_set.filter(data_catalog__catalog_json__identifier__in=self._get_default_set_filter())

data = []
for record in query_set:
if verb == 'ListRecords':
Expand Down Expand Up @@ -145,10 +152,14 @@ def _get_oai_dc_urnresolver_metadatas_for_record(self, record):
"""
metadatas = []

if isinstance(record, CatalogRecord):
pref_id = record.research_dataset.get('preferred_identifier')
dc_id = record.data_catalog.catalog_json.get('identifier')
other_ids = record.research_dataset.get('other_identifier', [])
if isinstance(record, dict):
pref_id = record['research_dataset'].get('preferred_identifier')
dc_id = record['data_catalog__catalog_json'].get('identifier')
is_harvested = record['data_catalog__catalog_json'].get('harvested', False) is True
if record['research_dataset'].get('other_identifier') is not None:
other_ids = record['research_dataset'].get('other_identifier')
else:
other_ids = []

if dc_id == 'urn:nbn:fi:att:data-catalog-harvest-syke':
for id_obj in other_ids:
Expand All @@ -157,9 +168,9 @@ def _get_oai_dc_urnresolver_metadatas_for_record(self, record):
break

elif dc_id not in settings.LEGACY_CATALOGS:
resolution_url = settings.OAI['ETSIN_URL_TEMPLATE'] % record.identifier
if not record.catalog_is_harvested() and (pref_id.startswith('urn:nbn:fi:att:') or
pref_id.startswith('urn:nbn:fi:csc')):
resolution_url = settings.OAI['ETSIN_URL_TEMPLATE'] % record['identifier']
if not is_harvested and (pref_id.startswith('urn:nbn:fi:att:') or
pref_id.startswith('urn:nbn:fi:csc')):
metadatas.append({'identifier': [resolution_url, pref_id]})

for id_obj in other_ids:
Expand Down Expand Up @@ -321,10 +332,14 @@ def _get_metadata_for_record(self, record, metadataPrefix):
return self._fix_metadata(meta)

def _get_header_timestamp(self, record):
if record.date_modified:
timestamp = record.date_modified
"""
Can handle record as json or object.
"""
if isinstance(record, dict):
modified = record.get('date_modified', None)
timestamp = modified if modified is not None else record['date_created']
else:
timestamp = record.date_created
timestamp = record.date_modified if record.date_modified else record.date_created
return timezone.make_naive(timestamp)

def _get_oai_item(self, identifier, record, metadata_prefix):
Expand All @@ -344,10 +359,13 @@ def _fix_metadata(self, meta):
return metadata

def _get_record_identifier(self, record, set):
"""
Can handle record as json or object.
"""
if set == DATACATALOGS_SET:
return record.catalog_json['identifier']
return record['catalog_json__identifier'] if isinstance(record, dict) else record.catalog_json['identifier']
else:
return record.identifier
return record['identifier'] if isinstance(record, dict) else record.identifier

# OAI-PMH VERBS

Expand Down Expand Up @@ -414,7 +432,6 @@ def listRecords(self, metadataPrefix=None, set=None, cursor=None, from_=None,
data = self._get_urnresolver_record_data(set, cursor, batch_size, from_, until)
else:
data = self._get_filtered_records_data('ListRecords', metadataPrefix, set, cursor, batch_size, from_, until)

return data

def getRecord(self, metadataPrefix, identifier):
Expand Down
5 changes: 3 additions & 2 deletions src/metax_api/middleware/request_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,16 @@ def __call__(self, request):

try:
_logger.info(
'%s - [%s] %s - "%s %s" %d %s'
'%s - [%s] %s - "%s %s" %d %s - %.3fs'
% (
request.environ['HTTP_X_REAL_IP'],
user_type,
username,
request.method,
request.get_full_path(),
response.status_code,
response._headers.get('content-length', ['-', '-'])[1]
response._headers.get('content-length', ['-', '-'])[1],
time() - start_time
)
)
except:
Expand Down
40 changes: 27 additions & 13 deletions src/metax_api/models/catalog_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,13 +663,14 @@ def _find_new_dirs_to_add(self, file_description_changes, dirs_to_add_by_project
"""
assert 'directories' in file_description_changes

dir_identifiers = list(file_description_changes['directories']['removed']) + \
list(file_description_changes['directories']['added'])
dir_identifiers = list(file_description_changes['directories']['added']) +\
list(file_description_changes['directories']['removed'])

dir_details = Directory.objects.filter(identifier__in=dir_identifiers) \
.values('project_identifier', 'identifier', 'directory_path')

if len(dir_identifiers) != len(dir_details):
# skip deprecated datasets, since there might be deleted directories
if len(dir_identifiers) != len(dir_details) and not self.deprecated:
existig_dirs = set( d['identifier'] for d in dir_details )
missing_identifiers = [ d for d in dir_identifiers if d not in existig_dirs ]
raise ValidationError({'detail': ['the following directory identifiers were not found:\n%s'
Expand Down Expand Up @@ -705,16 +706,23 @@ def _find_new_files_to_add(self, file_description_changes, files_to_add, files_t
"""
assert 'files' in file_description_changes

file_identifiers = list(file_description_changes['files']['removed']) + \
list(file_description_changes['files']['added']) + \
list(file_description_changes['files']['keep'])
add_and_keep_ids = list(file_description_changes['files']['added']) \
+ list(file_description_changes['files']['keep'])

file_details = File.objects.filter(identifier__in=file_identifiers) \
add_and_keep = File.objects.filter(identifier__in=add_and_keep_ids) \
.values('id', 'project_identifier', 'identifier', 'file_path')

if len(file_identifiers) != len(file_details):
removed_ids = list(file_description_changes['files']['removed'])

removed = File.objects_unfiltered.filter(identifier__in=removed_ids) \
.values('id', 'project_identifier', 'identifier', 'file_path')

file_details = add_and_keep | removed

if len(add_and_keep_ids) + len(removed_ids) != len(file_details):
existig_files = set( f['identifier'] for f in file_details )
missing_identifiers = [ f for f in file_identifiers if f not in existig_files ]
missing_identifiers = [ f for f in add_and_keep_ids if f not in existig_files ]
missing_identifiers += [ f for f in removed_ids if f not in existig_files ]
raise ValidationError({'detail': ['the following file identifiers were not found:\n%s'
% '\n'.join(missing_identifiers) ]})

Expand Down Expand Up @@ -776,13 +784,14 @@ def delete(self, *args, **kwargs):
else:
super().delete(*args, **kwargs)
log_args['catalogrecord']['date_removed'] = datetime_to_str(self.date_removed)
log_args['catalogrecord']['date_modified'] = datetime_to_str(self.date_modified)

self.add_post_request_callable(DelayedLog(**log_args))

def deprecate(self, timestamp=None):
self.deprecated = True
self.date_deprecated = timestamp or get_tz_aware_now_without_micros()
super().save(update_fields=['deprecated', 'date_deprecated'])
self.date_deprecated = self.date_modified = timestamp or get_tz_aware_now_without_micros()
super().save(update_fields=['deprecated', 'date_deprecated', 'date_modified'])
self.add_post_request_callable(DelayedLog(
event='dataset_deprecated',
catalogrecord={
Expand Down Expand Up @@ -1090,7 +1099,7 @@ def _files_added_for_first_time(self):
"""
Find out if this update is the first time files are being added/changed since the dataset's creation.
"""
if self.files.exists():
if self.files(manager='objects_unfiltered').exists():
# current version already has files
return False

Expand Down Expand Up @@ -1227,7 +1236,8 @@ def _get_top_level_parent_dirs_by_project(self, dir_identifiers):
.values('project_identifier', 'directory_path', 'identifier') \
.order_by('project_identifier', 'directory_path')

if len(dirs) != len(dir_identifiers):
# skip deprecated datasets, since there might be deleted directories
if len(dirs) != len(dir_identifiers) and not self.deprecated:
missing_identifiers = [ pid for pid in dir_identifiers if pid not in set(d['identifier'] for d in dirs)]
raise ValidationError({ 'detail': [
'some requested directories were not found. directory identifiers not found:\n%s'
Expand Down Expand Up @@ -1774,6 +1784,7 @@ def __call__(self):
format(self.cr.identifier, doi)
)

from metax_api.services.datacite_service import DataciteException
try:
if self.action == 'create':
try:
Expand All @@ -1788,6 +1799,9 @@ def __call__(self):
# If metadata is in "findable" state, the operation below should transition the DOI to "registered"
# state
self.dcs.delete_doi_metadata(doi)
except DataciteException as e:
_logger.error(e)
raise Http400(str(e))
except Exception as e:
_logger.error(e)
_logger.exception('Datacite API interaction failed')
Expand Down
5 changes: 3 additions & 2 deletions src/metax_api/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def delete(self):
Mark record as removed, never delete from db.
"""
self._set_removed()
super().save(update_fields=['removed', 'date_removed'])
super().save(update_fields=['removed', 'date_removed', 'date_modified'])
self._update_tracked_field_values()

def user_has_access(self, request):
Expand Down Expand Up @@ -150,11 +150,12 @@ def track_fields(self, *fields):

def _set_removed(self):
self.removed = True
self.date_removed = get_tz_aware_now_without_micros()
self.date_removed = self.date_modified = get_tz_aware_now_without_micros()

def _unset_removed(self):
self.removed = False
self.date_removed = None
self.date_modified = get_tz_aware_now_without_micros()

def _track_json_field(self, field_name):
field_name, json_field_name = field_name.split('.')
Expand Down
2 changes: 1 addition & 1 deletion src/metax_api/onappstart.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def ready(self): # pragma: no cover
if settings.ELASTICSEARCH['ALWAYS_RELOAD_REFERENCE_DATA_ON_RESTART']:
cache.set('reference_data', None)

if not cache.get('reference_data', master=True):
if not cache.get('reference_data', master=True) or not cache.get('ref_data_up_to_date', master=True):
ReferenceDataLoader.populate_cache_reference_data(cache)
json_logger.info(
event='reference_data_loaded',
Expand Down
5 changes: 5 additions & 0 deletions src/metax_api/services/auth_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import logging

from django.conf import settings
from django.http import Http404


_logger = logging.getLogger(__name__)
Expand All @@ -22,6 +23,10 @@ def get_user_projects(request):
token. On local file values must be a list of strings.
"""
user_projects = AuthService.extract_file_projects_from_token(request.user.token)

if request.user.token is None:
raise Http404

username = request.user.token.get('CSCUserName', '')

try:
Expand Down
3 changes: 3 additions & 0 deletions src/metax_api/services/catalog_record_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ def get_queryset_search_params(cls, request):
if CommonService.get_boolean_query_param(request, 'latest'):
queryset_search_params['next_dataset_version_id'] = None

if request.query_params.get('deprecated', None) is not None:
queryset_search_params['deprecated'] = CommonService.get_boolean_query_param(request, 'deprecated')

if request.query_params.get('curator', False):
queryset_search_params['research_dataset__contains'] = \
{'curator': [{ 'identifier': request.query_params['curator']}]}
Expand Down
17 changes: 11 additions & 6 deletions src/metax_api/services/datacite_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from os.path import dirname, join

import jsonschema
import requests
from datacite import schema41 as datacite_schema41, DataCiteMDSClient
from django.conf import settings as django_settings

Expand Down Expand Up @@ -67,12 +68,13 @@ def __init__(self, settings=django_settings):

self.user = settings['USERNAME']
self.pw = settings['PASSWORD']
self.url = settings['URL']

self.mds = DataCiteMDSClient(
username=self.user,
password=self.pw,
prefix=settings['PREFIX'],
test_mode=False)
url=self.url)

def create_doi_metadata(self, datacite_xml_metadata):
"""
Expand Down Expand Up @@ -114,12 +116,15 @@ def delete_draft_doi(self, doi):
:param doi:
:return:
"""
from requests import delete
try:
delete('https://mds.datacite.org/doi/{0}'.format(doi),
headers={'Content-Type': 'application/plain;charset=UTF-8'}, auth=(self.user, self.pw))
except:
pass
requests.delete(
'{0}/doi/{1}'.format(self.url, doi),
headers={'Content-Type': 'application/plain;charset=UTF-8'},
auth=(self.user, self.pw)
)
except Exception as e:
_logger.warning('Could not delete doi in draft state')
_logger.warning(e)

def get_validated_datacite_json(self, cr_json, is_strict):
if isinstance(cr_json, list):
Expand Down
Loading

0 comments on commit 18459d1

Please sign in to comment.