Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
mats-claassen committed Sep 13, 2023
1 parent 2583a2a commit a7a19d6
Show file tree
Hide file tree
Showing 23 changed files with 8,062 additions and 1 deletion.
7 changes: 7 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[flake8]
max-line-length = 130
ignores = W503
per-file-ignores =
__init__.py: F401
exclude =
.git,__pycache__,.ipynb_checkpoints,models/,dataset/
150 changes: 150 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# Custom
/datasets/
/models/
clearml.conf
*.parquet

# Intellij
.idea/

# .DS_Store
.DS_Store
8 changes: 8 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM tensorflow/tensorflow:2.13.0-gpu-jupyter

WORKDIR /app
COPY requirements.txt /app

RUN pip install -r requirements.txt

ENTRYPOINT cd src && jupyter notebook --ip=0.0.0.0
9 changes: 9 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
The MIT License (MIT)

Copyright (c) 2023 XMARTLABS

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 changes: 30 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,30 @@
# time-series-playground
# Time Series Playrground

Welcome to Xmartlabs' time series playground. This repository contains scripts and code to train time series models on weather datasets.

## Instructions

* Download the Jena Climate dataset by running:

```bash
./download_jena_dataset.sh
```

* Build the docker container:

```bash
./build.sh
```

* Start the docker container with the Jupyter Notebook:

```bash
./start.sh
```

* Follow the instructions to access the notebook on your browser


## ClearML experiment tracking

If you use the ClearML tracker, make sure to configure correctly your $HOME/clearml.conf file and create a $HOME/.clearml folder that will store the caches and other stuff.
2 changes: 2 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
docker build -t time_series_playground .
17 changes: 17 additions & 0 deletions download_jena_dataset.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

# constants
DIR=datasets/jena_climate/
DATASET_URL="https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip"

mkdir -p $DIR
wget -P $DIR $DATASET_URL
cd $DIR
unzip jena_climate_2009_2016.csv.zip

# Clean up
rm jena_climate_2009_2016.csv.zip
rm -rf __MACOSX/

echo "Goodbye! Here goes a joke:"
curl -s https://api.chucknorris.io/jokes/random?category=dev | jq -r '.value'
6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
clearml
keras
matplotlib
pandas
scikit-learn
seaborn
Empty file added src/__init__.py
Empty file.
36 changes: 36 additions & 0 deletions src/dataset_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import json
import os
import pandas as pd
from clearml import Dataset


class DatasetLoader():
"""Abstract class that serves to load datasets from different sources (local, ClearML, other tracker)
"""

def get_dataset_folder(self, dataset_project, dataset_name):
return NotImplementedError()


class LocalDatasetLoader(DatasetLoader):

def get_dataset_folder(self, dataset_project, dataset_name):
return f"data/{dataset_name}"


class ClearMLDatasetLoader(DatasetLoader):

def get_dataset_folder(self, dataset_project, dataset_name):
return Dataset.get(dataset_project=dataset_project, dataset_name=dataset_name).get_local_copy()


class JenaDatasetLoader(ClearMLDatasetLoader):
project = 'Time Series PG'
dataset = 'jena_climate'

def load(self):
self.data_folder = self.get_dataset_folder(self.project, self.dataset)

def get_data(self):
assert self.data_folder is not None, "You must call `load` before reading files"
return pd.read_csv(os.path.join(self.data_folder, "jena_climate_2009_2016.csv"))
Empty file added src/models/__init__.py
Empty file.
67 changes: 67 additions & 0 deletions src/models/single_step_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import tensorflow as tf
from src.models.time_series_model import TimeSeriesModel


class Baseline(tf.keras.Model):
def __init__(self, label_index=None):
super().__init__()
self.label_index = label_index

def call(self, inputs):
if self.label_index is None:
return inputs
result = inputs[:, :, self.label_index]
return result[:, :, tf.newaxis]


class LinearModel(TimeSeriesModel):
def build_model(self, **kwargs):
self.model = tf.keras.Sequential([
tf.keras.layers.Dense(units=1)
])


class DenseModel(TimeSeriesModel):
def build_model(self, **kwargs):
self.model = tf.keras.Sequential([
tf.keras.layers.Dense(units=64, activation='relu'),
tf.keras.layers.Dense(units=64, activation='relu'),
tf.keras.layers.Dense(units=1)
])


class MultiStepDense(TimeSeriesModel):
def build_model(self, **kwargs):
self.model = tf.keras.Sequential([
# Shape: (time, features) => (time*features)
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(units=32, activation='relu'),
tf.keras.layers.Dense(units=32, activation='relu'),
tf.keras.layers.Dense(units=1),
# Add back the time dimension.
# Shape: (outputs) => (1, outputs)
tf.keras.layers.Reshape([1, -1]),
])


class ConvModel(TimeSeriesModel):
def build_model(self, **kwargs):
kernel_size = kwargs.get('conv_width', 3)
self.model = tf.keras.Sequential([
tf.keras.layers.Conv1D(filters=32,
kernel_size=(kernel_size,),
activation='relu'),
tf.keras.layers.Dense(units=32, activation='relu'),
tf.keras.layers.Dense(units=1),
])


class RNNModel(TimeSeriesModel):
def build_model(self, **kwargs):
self.model = tf.keras.models.Sequential([
# Shape [batch, time, features] => [batch, time, lstm_units]
tf.keras.layers.LSTM(32, return_sequences=True),
# Shape => [batch, time, features]
tf.keras.layers.Dense(units=1)
])

30 changes: 30 additions & 0 deletions src/models/time_series_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import tensorflow as tf


class TimeSeriesModel:

model = None

def __init__(self, tracker):
self.tracker = tracker

# hidden1_size=512, hidden2_size=128, l2_param=0.002, dropout_factor=0.2, bias_regularizer='l1'
def build_model(self, **kwargs):
raise NotImplementedError()

def compile_and_fit(self, window, patience=2, epochs=20):
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
patience=patience,
mode='min')

self.model.compile(loss=tf.keras.losses.MeanSquaredError(),
optimizer=tf.keras.optimizers.Adam(),
metrics=[tf.keras.metrics.MeanAbsoluteError()])

history = self.model.fit(window.train, epochs=epochs,
validation_data=window.val,
callbacks=[early_stopping])
return history

def predict(self, batch_generator):
pass
Empty file added src/notebooks/__init__.py
Empty file.
Loading

0 comments on commit a7a19d6

Please sign in to comment.