Skip to content
This repository has been archived by the owner on Sep 16, 2022. It is now read-only.

Commit

Permalink
Merge branch 'stable'
Browse files Browse the repository at this point in the history
  • Loading branch information
tonurmi committed Mar 11, 2022
2 parents 139cce7 + 14e514d commit 2a398b4
Show file tree
Hide file tree
Showing 22 changed files with 395 additions and 33 deletions.
9 changes: 5 additions & 4 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ clean_gitlab_env:
- echo "Cleaning deleted branches from environments"
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
when: manual
- when: never
when: never
- when: manual

update_metax:
stage: update
Expand All @@ -66,6 +66,7 @@ clean_previous_build:
script:
- ansible-playbook -i $ANSIBLE_INVENTORY $DELETE_PLAYBOOK -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME"
rules:
- if: $CI_COMMIT_BRANCH =~ /^(demo|stable|staging|test)$/
- if: $CI_COMMIT_BRANCH =~ /^(staging|test|stable)$/
when: never
when: manual
- when: never

28 changes: 28 additions & 0 deletions src/metax_api/api/rpc/base/views/statistic_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,31 @@ def count_files(self, request):
}

return Response(StatisticService.count_files(**params))

@action(detail=False, methods=["get"], url_path="projects_summary")
def projects_summary(self, request):
if not request.query_params.get("projects"):
params = {
"projects": None,
}

else:
params = {
"projects": list(CS.get_list_query_param(request, "projects")),
}

return Response(StatisticService.projects_summary(**params))

@action(detail=False, methods=["get"], url_path="organizations_summary")
def organizations_summary(self, request):
if not request.query_params.get("organizations"):
params = {
"organizations": None,
}

else:
params = {
"organizations": list(CS.get_list_query_param(request, "organizations")),
}

return Response(StatisticService.organizations_summary(**params))
47 changes: 47 additions & 0 deletions src/metax_api/management/commands/create_statistic_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import logging

from django.core.management.base import BaseCommand
from django.db.models import Sum
from django.db.models.expressions import RawSQL

from metax_api.models import File, CatalogRecordV2, OrganizationStatistics, ProjectStatistics
from metax_api.api.rest.base.views import FileViewSet
from metax_api.services import FileService, StatisticService

logger = logging.getLogger(__name__)

class Command(BaseCommand):
def handle(self, *args, **options):

logger.info("Creating statistic summary")

OrganizationStatistics.objects.all().delete()
ProjectStatistics.objects.all().delete()


ida_projects = File.objects.all().values("project_identifier").distinct()
for project in ida_projects:
project_id = project["project_identifier"]
ret = StatisticService.count_files([project_id], include_pids=True)
count = ret[0]["count"]
size = ret[0]["byte_size"]
file_pids = ret[1]

if len(file_pids) == 0:
catalog_records = ""
else:
catalog_records = FileService.get_identifiers(file_pids, "noparams", True, get_pids=True).data

stat = ProjectStatistics(project_id, count, size, catalog_records)
stat.save()


organizations = CatalogRecordV2.objects.all().order_by().values("metadata_provider_org").distinct()

for org in organizations:
org_id = org["metadata_provider_org"]
ret = StatisticService.count_datasets(metadata_provider_org=org_id)
stat = OrganizationStatistics(org_id, ret["count"], ret["ida_byte_size"])
stat.save()

logger.info("Statistic summary created")
20 changes: 17 additions & 3 deletions src/metax_api/management/commands/load_data_to_TTV.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,35 @@ def __init__(self, user):

class Command(BaseCommand):

help = "Upload all existing data to TTV's RabbitMQ queue"
help = "Upload all existing and removed catalog records to TTV's RabbitMQ queue"

def handle(self, *args, **options):
catalog_records = CatalogRecord.objects.filter(state="published")
aff_rows = 0
user = User()
request = Request(user)
context = {"request": request}

aff_rows = 0
catalog_records = CatalogRecord.objects.filter(state="published")
for catalog_record in catalog_records:
serializer = catalog_record.serializer_class
cr_json = serializer(catalog_record, context=context).data
cr_json["data_catalog"] = {"catalog_json": catalog_record.data_catalog.catalog_json}

RabbitMQService.publish(cr_json, routing_key="create", exchange="TTV-datasets")
aff_rows += 1
logger.info(f"Published {aff_rows} records to exchange: TTV-datasets, routing_key: create")

aff_rows = 0
removed_catalog_records = CatalogRecord.objects_unfiltered.filter(removed=True)
for catalog_record in removed_catalog_records:
serializer = catalog_record.serializer_class
cr_json = serializer(catalog_record, context=context).data
cr_json["data_catalog"] = {"catalog_json": catalog_record.data_catalog.catalog_json}

RabbitMQService.publish(cr_json, routing_key="delete", exchange="TTV-datasets")
aff_rows += 1
logger.info(f"Published {aff_rows} records to exchange: TTV-datasets, routing_key: delete")



logger.info("All catalog records published to TTV exchange")
19 changes: 14 additions & 5 deletions src/metax_api/management/commands/update_orgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@ def get_orgs_from_api() -> List[Organization]:
orgs.append(o)

o = Organization(name_fi, name_en, org_code)
if name_sv:
o.org_name_sv = name_sv
orgs.append(o)

logger.info(f"retrieved {len(orgs)} organizations from research.fi")
return orgs

Expand Down Expand Up @@ -153,9 +156,12 @@ def handle(self, *args, **options):
added += 1
logger.info(f"Added {added} organisations from research.fi to local org list")

# sort orgs alphabetically
s = sorted(union, key=lambda i: (i.org_name_fi, i.unit_name))
with open(settings.ORG_FILE_PATH, "w") as f:
# write new orgs to local csv
with open(settings.ORG_FILE_PATH, "w", newline='') as f:
logger.info("writing updated csv")
# remove duplicates
no_duplicates = []
for c in s:
if c not in no_duplicates:
Expand All @@ -165,6 +171,7 @@ def handle(self, *args, **options):
writer = csv.DictWriter(
f,
fieldnames=CSV_HEADERS,
lineterminator='\n',
)
writer.writeheader()
for i in csv_serialized:
Expand All @@ -174,9 +181,11 @@ def handle(self, *args, **options):
# Malformed values from TTV api
if i["unit_name"] == "LÄÄKETIETEELLINEN TIEDEKUNTA":
continue
if "Ã…bo" in i["unit_name"]:
i["unit_name"] = str(i["unit_name"]).replace("Ã…bo", "Åbo")
if "ÖS" in i["unit_name"]:
i["unit_name"] = str(i["unit_name"]).replace("ÖS", "Ö")
if "Ã…" in i["unit_name"]:
i["unit_name"] = str(i["unit_name"]).replace("Ã…", "Å")
if "Ö" in i["unit_name"]:
i["unit_name"] = str(i["unit_name"]).replace("Ö", "Ö")
if "Ä" in i["unit_name"]:
i["unit_name"] = str(i["unit_name"]).replace("Ä", "Ä")
writer.writerow(i)
logger.info("successfully updated organization csv")
55 changes: 55 additions & 0 deletions src/metax_api/migrations/0047_update_vrk_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from django.db import migrations

import logging

logger = logging.getLogger(__name__)

def update_vrk_datasets(apps, schema_editor):

logger.info("Updating organization info of catalog records by Väestörekisterikeskus")

CatalogRecord = apps.get_model('metax_api', 'CatalogRecord')
new_name_fi = "Digi- ja väestötietovirasto"
new_name_en = "Digital and Population Data Services Agency"
new_name_sv = "Myndigheten för digitalisering och befolkningsdata"
description_suffix_fi = "\n\nAineiston luojaorganisaation aikaisempi nimi: Väestörekisterikeskus."
description_suffix_en = "\n\nPrevious name of dataset creator organization: Population Register Center."

# Catalog Records by Väestörekisterikeskus
# Getting these from the database using Django filters would have
# been too complicated, so instead they are hardcoded
cr_ids = [
"a3610de8-73fa-4e25-a89b-320549c71f0a",
"b77c91cf-a437-4d01-b2ec-efb08605d559",
"7787c312-3973-4e16-a032-7b89a0257739"
]

crs = CatalogRecord.objects.filter(identifier__in = cr_ids)
logger.info(f"Found {len(crs)} catalog records to update")
for cr in crs:
cr_json = cr.research_dataset
logger.info(f"Updating catalog record: {cr}")
cr_json["creator"][0]["name"]["en"] = new_name_en
cr_json["creator"][0]["name"]["fi"] = new_name_fi
cr_json["creator"][0]["name"]["sv"] = new_name_sv
if description_suffix_en not in cr_json["description"]["en"]:
cr_json["description"]["en"] += description_suffix_en
if description_suffix_fi not in cr_json["description"]["fi"]:
cr_json["description"]["fi"] += description_suffix_fi
cr.save()


def revert(apps, schema_editor):
pass



class Migration(migrations.Migration):

dependencies = [
('metax_api', '0046_replace_dataset_owner'),
]

operations = [
migrations.RunPython(update_vrk_datasets, revert),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Generated by Django 3.2.10 on 2021-12-31 07:43

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('metax_api', '0047_update_vrk_datasets'),
]

operations = [
migrations.CreateModel(
name='OrganizationStatistics',
fields=[
('organization', models.CharField(max_length=200, primary_key=True, serialize=False)),
('count', models.IntegerField()),
('byte_size', models.IntegerField()),
],
),
migrations.CreateModel(
name='ProjectStatistics',
fields=[
('project_identifier', models.CharField(max_length=200, primary_key=True, serialize=False)),
('count', models.IntegerField()),
('byte_size', models.IntegerField()),
('published_datasets', models.TextField()),
],
),
]
2 changes: 2 additions & 0 deletions src/metax_api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@
from .file import File
from .file_storage import FileStorage
from .metax_user import MetaxUser
from .organization_statistics import OrganizationStatistics
from .project_statistics import ProjectStatistics
from .xml_metadata import XmlMetadata
13 changes: 8 additions & 5 deletions src/metax_api/models/catalog_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ def user_has_access(self, request):
raise Http404

# write operation
return self.user_is_owner(request)
return self.user_is_privileged(request)

def user_is_owner(self, request):
if self.state == self.STATE_DRAFT and self.metadata_provider_user != request.user.username:
Expand Down Expand Up @@ -648,11 +648,14 @@ def user_is_privileged(self, request):
return True
else:
return True

users = self.editor_permissions.users
ids = users.all().values_list('user_id', flat=True)
if request.user.username in ids:
return True
elif self.user_is_owner(request):
# can see sensitive fields
return True
else:
# unknown user
return False

def _check_catalog_permissions(self, catalog_groups, catalog_services, request=None):
Expand Down Expand Up @@ -3185,12 +3188,12 @@ def __call__(self):
if self.cr.catalog_publishes_to_ttv():

_logger.info(
"Publishing CatalogRecord %s to RabbitMQ... exchange: ttv-datasets, routing_key: %s"
"Publishing CatalogRecord %s to RabbitMQ... exchange: TTV-datasets, routing_key: %s"
% (self.cr.identifier, self.routing_key)
)

rabbitmq.publish(
cr_json, routing_key=self.routing_key, exchange="ttv-datasets"
cr_json, routing_key=self.routing_key, exchange="TTV-datasets"
)

except:
Expand Down
18 changes: 18 additions & 0 deletions src/metax_api/models/organization_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# This file is part of the Metax API service
#
# Copyright 2017-2018 Ministry of Education and Culture, Finland
#
# :author: CSC - IT Center for Science Ltd., Espoo Finland <[email protected]>
# :license: MIT

import logging

from django.db import models

_logger = logging.getLogger(__name__)

class OrganizationStatistics(models.Model):
organization = models.CharField(primary_key=True, max_length=200)
count = models.IntegerField()
byte_size = models.IntegerField()

18 changes: 18 additions & 0 deletions src/metax_api/models/project_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# This file is part of the Metax API service
#
# Copyright 2017-2018 Ministry of Education and Culture, Finland
#
# :author: CSC - IT Center for Science Ltd., Espoo Finland <[email protected]>
# :license: MIT

import logging

from django.db import models

_logger = logging.getLogger(__name__)

class ProjectStatistics(models.Model):
project_identifier = models.CharField(primary_key=True, max_length=200)
count = models.IntegerField()
byte_size = models.IntegerField()
published_datasets = models.TextField()
1 change: 1 addition & 0 deletions src/metax_api/onappstart.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from metax_api.utils import ReferenceDataLoader, executing_test_case, convert_yaml_to_html


_logger = logging.getLogger(__name__)


Expand Down
Loading

0 comments on commit 2a398b4

Please sign in to comment.