Skip to content

Commit

Permalink
#40: don't validate uri when saving DatasetURI, try/except clause in …
Browse files Browse the repository at this point in the history
…crawl command, and changed a docstring in nansat_ingestor/managers.py
  • Loading branch information
mortenwh committed Jul 25, 2019
1 parent 265fb38 commit af2f2e7
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 5 deletions.
2 changes: 1 addition & 1 deletion geospaas/catalog/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def protocol(self):
return self.uri.split(':')[0]

def save(self, *args, **kwargs):
validate_uri(self.uri)
#validate_uri(self.uri) -- this will often fail because of server failures..
# Validation is not usually done in the models but rather via form
# validation. We should discuss if we want it here or not.
super(DatasetURI, self).save(*args, **kwargs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from thredds_crawler.crawl import Crawl

from django.core.management.base import BaseCommand, CommandError
from django.db.utils import IntegrityError

from geospaas.utils.utils import validate_uri
from geospaas.catalog.models import DatasetURI
Expand All @@ -22,7 +23,8 @@ def crawl(url, **options):
else:
select = None

c = Crawl(url, select=select, skip=['.*ncml'], debug=True)
skips = Crawl.SKIPS + ['.*ncml']
c = Crawl(url, select=select, skip=skips, debug=True)
added = 0
for ds in c.datasets:
url = [s.get('url') for s in ds.services if
Expand All @@ -38,8 +40,16 @@ def crawl(url, **options):
print('Added %s, no. %d/%d'%(url, added, len(c.datasets)))
# Connect all service uris to the dataset
for s in ds.services:
ds_uri, _ = DatasetURI.objects.get_or_create(name=s.get('name'),
try:
ds_uri, _ = DatasetURI.objects.get_or_create(name=s.get('name'),
service=s.get('service'), uri=s.get('url'), dataset=gds)
except IntegrityError:
# There is no standard for the name (and possibly the service). This means that the
# naming defined by geospaas.catalog.managers.DAP_SERVICE_NAME (and assigned to the
# DatasetURI in geospaas.nansat_ingestor.managers.DatasetManager.get_or_create) may
# be different from s.get('name').
# Solution: ignore the error and continue the loop
continue
return added

class Command(BaseCommand):
Expand Down
4 changes: 2 additions & 2 deletions geospaas/nansat_ingestor/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
class DatasetManager(models.Manager):

def get_or_create(self, uri, n_points=10, uri_filter_args=None, *args, **kwargs):
''' Create dataset and corresponding metadata
""" Create dataset and corresponding metadata
Parameters:
----------
Expand All @@ -42,7 +42,7 @@ def get_or_create(self, uri, n_points=10, uri_filter_args=None, *args, **kwargs)
Returns:
-------
dataset and flag
'''
"""
if not uri_filter_args:
uri_filter_args = {}

Expand Down

0 comments on commit af2f2e7

Please sign in to comment.