Skip to content

Commit

Permalink
Merge pull request #48 from HiSPARC/cleanup
Browse files Browse the repository at this point in the history
Cleanup and ignore old events
  • Loading branch information
153957 authored Jun 25, 2024
2 parents 516030f + 0fa4225 commit b09e4ed
Show file tree
Hide file tree
Showing 10 changed files with 87 additions and 119 deletions.
8 changes: 0 additions & 8 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
# Specific
.remote-sync.json
station_list.csv
config.ini
writer_app.py
*.h5
_build/
build/


# Python
*.pyc
*.pyo
Expand All @@ -23,6 +18,3 @@ build/
ehthumbs.db
Icon?
Thumbs.db

# logfiles generated by tests
tests/hisparc.log*
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ test: unittests linttest doctest

.PHONY: unittests
unittests:
coverage run -m unittest discover tests -v
coverage run -m unittest -v
coverage report

.PHONY: linttest
Expand Down
Empty file added tests/__init__.py
Empty file.
Empty file added tests/fake_datastore/logs/.keep
Empty file.
6 changes: 3 additions & 3 deletions tests/test_data/config.ini
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[General]
log=hisparc.log
log=tests/fake_datastore/logs/hisparc.log
loglevel=debug
station_list=fake_datastore/station_list.csv
data_dir=fake_datastore
station_list=tests/fake_datastore/station_list.csv
data_dir=tests/fake_datastore/

[Writer]
sleep=5
Expand Down
65 changes: 24 additions & 41 deletions tests/test_writer_acceptance.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,52 @@
"""
Acceptance tests for the writer
"""Acceptance tests for the writer
Check with data pickled by Python 2 and 3.
python 3
"""

import base64
import configparser
import os
import shutil
import sys
import unittest

from pathlib import Path
from unittest import mock

import tables

from numpy import array
from numpy.testing import assert_array_equal

self_path = os.path.dirname(__file__)
test_data_path = os.path.join(self_path, 'test_data/')

# configuration:
WRITER_PATH = os.path.join(self_path, '../')
DATASTORE_PATH = os.path.join(self_path, 'fake_datastore')
CONFIGFILE = os.path.join(test_data_path, 'config.ini')
from writer import writer_app

CONFIG = f"""
[General]
log=hisparc.log
loglevel=debug
station_list={DATASTORE_PATH}/station_list.csv
data_dir={DATASTORE_PATH}
"""

with open(CONFIGFILE, 'w') as f:
f.write(CONFIG)
self_path = Path(__file__).parent
test_data_path = self_path / 'test_data'

# Configuration
DATASTORE_PATH = self_path / 'fake_datastore'
CONFIGFILE = test_data_path / 'config.ini'
STATION_ID = 99
CLUSTER = 'amsterdam'

UPLOAD_CODES = ['CIC', 'SIN', 'WTR', 'CFG']
pickle_data_path = os.path.join(test_data_path, 'incoming_writer/')
PICKLE_DATA_PATH = test_data_path / 'incoming_writer'


def import_writer_app():
"""import the writer"""
sys.path.append(WRITER_PATH)
from writer import writer_app

def configure_writer_app():
"""configure the writer"""
writer_app.config = configparser.ConfigParser()
writer_app.config.read(CONFIGFILE)
return writer_app


def get_writer_app(writer_app=None):
"""return the WSGI application"""
if writer_app is None:
writer_app = import_writer_app()
if not hasattr(writer_app, 'config'):
writer_app = configure_writer_app()
return writer_app


@mock.patch('writer.store_events.MINIMUM_YEAR', 2016)
class TestWriterAcceptancePy2Pickles(unittest.TestCase):
"""Acceptance tests for python 2 pickles"""

Expand All @@ -69,15 +57,13 @@ def setUp(self):
self.station_id = STATION_ID
self.cluster = CLUSTER
self.filepath = '2017/2/2017_2_26.h5'
self.pickle_filename = {}
for upload_code in UPLOAD_CODES:
self.pickle_filename[upload_code] = os.path.join(
pickle_data_path,
f'writer_{self.pickle_version}_{upload_code}',
)
self.pickle_filename = {
upload_code: PICKLE_DATA_PATH / f'writer_{self.pickle_version}_{upload_code}'
for upload_code in UPLOAD_CODES
}

def tearDown(self):
self.clean_datastore()
shutil.rmtree(DATASTORE_PATH / '2017')

def test_event_acceptance(self):
self.writer_app.process_data(self.pickle_filename['CIC'])
Expand Down Expand Up @@ -140,17 +126,14 @@ def test_config_acceptance(self):
self.assertEqual(blobs[1], b'Hardware: 0 FPGA: 0')

def read_table(self, table):
path = os.path.join(DATASTORE_PATH, self.filepath)
path = DATASTORE_PATH / self.filepath
table_path = f'/hisparc/cluster_{self.cluster}/station_{self.station_id}/{table}'
with tables.open_file(path, 'r') as datafile:
t = datafile.get_node(table_path)
data = t.read()

return data

def clean_datastore(self):
shutil.rmtree(os.path.join(DATASTORE_PATH, '2017'))


class TestWriterAcceptancePy3Pickles(TestWriterAcceptancePy2Pickles):
"""Acceptance tests for python 3 pickles"""
Expand Down
71 changes: 30 additions & 41 deletions tests/test_wsgi_app.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,47 @@
"""Acceptance tests for the datastore WSGI app"""

import functools
import glob
import hashlib
import os
import pickle
import sys
import unittest

from webtest import TestApp
from http import HTTPStatus
from pathlib import Path
from unittest import mock

self_path = os.path.dirname(__file__)
test_data_path = os.path.join(self_path, 'test_data/')
from webtest import TestApp

# configuration:
WSGI_APP_PATH = os.path.join(self_path, '../')
DATASTORE_PATH = os.path.join(self_path, 'fake_datastore')
CONFIGFILE = os.path.join(test_data_path, 'config.ini')
from wsgi import wsgi_app

CONFIG = f"""
[General]
log=hisparc.log
loglevel=debug
station_list={DATASTORE_PATH}/station_list.csv
data_dir={DATASTORE_PATH}
"""
self_path = Path(__file__).parent
test_data_path = self_path / 'test_data'

with open(CONFIGFILE, 'w') as f:
f.write(CONFIG)
# configuration:
DATASTORE_PATH = self_path / 'fake_datastore'
CONFIGFILE = test_data_path / 'config.ini'

STATION_ID = 99
PASSWORD = 'fake_station'

EVENTPY2 = os.path.join(test_data_path, 'incoming_http/py2_s510_100events')
EVENTPY3 = os.path.join(test_data_path, 'incoming_http/py3event')
EVENTSUS = os.path.join(test_data_path, 'incoming_http/suspicious_event')
EVENTPY2 = test_data_path / 'incoming_http/py2_s510_100events'
EVENTPY3 = test_data_path / 'incoming_http/py3event'
EVENTSUS = test_data_path / 'incoming_http/suspicious_event'


def import_wsgi_app():
def configure_wsgi_app():
"""import the WSGI application"""
sys.path.append(WSGI_APP_PATH)
from wsgi import wsgi_app

return functools.partial(wsgi_app.application, configfile=CONFIGFILE)


def get_wsgi_app(wsgi_app=None):
"""return the WSGI application"""
if wsgi_app is None:
wsgi_app = import_wsgi_app()
wsgi_app = configure_wsgi_app()
return wsgi_app


@mock.patch('wsgi.wsgi_app.MINIMUM_YEAR', 2016)
class TestWsgiAppAcceptance(unittest.TestCase):
def setUp(self):
self.station_id = STATION_ID
Expand All @@ -64,6 +54,7 @@ def tearDown(self):
def test_invalid_post_data(self):
resp = self.app.post('/', {})
self.assertEqual(resp.body, b'400') # invalid post data
self.assertEqual(resp.status_code, HTTPStatus.OK)
self.assert_num_files_in_datastore(incoming=0, suspicious=0)

def test_unpickling_error(self):
Expand Down Expand Up @@ -136,30 +127,28 @@ def upload(self, pickled_data, checksum=None):
}

response = self.app.post('/', data)
self.assertEqual(response.status_code, HTTPStatus.OK)
return response.body

def read_pickle(self, fn):
with open(fn, 'rb') as f:
pickle = f.read()
return pickle
event = fn.read_bytes()
return event

def files_in_folder(self, folder):
return glob.glob(folder + '/*')
def files_in_folder(self, path):
return [file for file in path.iterdir() if file.name != '.keep']

def clean_datastore(self):
for folder in ['incoming', 'tmp', 'suspicious']:
for fn in self.files_in_folder(os.path.join(DATASTORE_PATH, folder)):
os.remove(fn)
for folder in ['incoming', 'tmp', 'suspicious', 'logs']:
for filepath in self.files_in_folder(DATASTORE_PATH / folder):
filepath.unlink()

def assert_num_files_in_datastore(self, incoming=None, suspicious=None):
self.assertEqual(len(self.files_in_folder(os.path.join(DATASTORE_PATH, 'tmp'))), 0)
if incoming is not None:
self.assertEqual(len(self.files_in_folder(os.path.join(DATASTORE_PATH, 'incoming'))), incoming)
if suspicious is not None:
self.assertEqual(len(self.files_in_folder(os.path.join(DATASTORE_PATH, 'suspicious'))), suspicious)
def assert_num_files_in_datastore(self, incoming=0, suspicious=0):
self.assertEqual(len(self.files_in_folder(DATASTORE_PATH / 'tmp')), 0)
self.assertEqual(len(self.files_in_folder(DATASTORE_PATH / 'incoming')), incoming)
self.assertEqual(len(self.files_in_folder(DATASTORE_PATH / 'suspicious')), suspicious)

def assert_num_events_written(self, number_of_events):
fn = self.files_in_folder(os.path.join(DATASTORE_PATH, 'incoming'))[0]
fn = self.files_in_folder(DATASTORE_PATH / 'incoming')[0]
with open(fn, 'rb') as f:
data = pickle.load(f)
written_event_list = data['event_list']
Expand Down
7 changes: 6 additions & 1 deletion writer/store_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

logger = logging.getLogger('writer.store_events')

MINIMUM_YEAR = 2020


def store_event(datafile, cluster, station_id, event):
"""Stores an event in the h5 filesystem
Expand Down Expand Up @@ -38,7 +40,7 @@ def store_event(datafile, cluster, station_id, event):
nanoseconds = eventheader['nanoseconds']
# make an extended timestamp, which is the number of nanoseconds since
# epoch
ext_timestamp = timestamp * int(1e9) + nanoseconds
ext_timestamp = timestamp * 1_000_000_000 + nanoseconds
row['timestamp'] = timestamp

if upload_codes['_has_ext_time']:
Expand Down Expand Up @@ -118,6 +120,9 @@ def store_event_list(data_dir, station_id, cluster, event_list):
timestamp = event['header']['datetime']
if timestamp:
date = timestamp.date()
if date.year < MINIMUM_YEAR:
logger.error(f'Old event ({date}), discarding event (station: {station_id})')
continue
if date != prev_date:
if datafile:
datafile.close()
Expand Down
Loading

0 comments on commit b09e4ed

Please sign in to comment.