diff --git a/config/default_resnet.ini b/config/default_resnet.ini new file mode 100644 index 0000000..38efdd5 --- /dev/null +++ b/config/default_resnet.ini @@ -0,0 +1,53 @@ +[info] +experiment: ResNet default settings +name: resnet + +[dataset] +pixels: 64 +n_classes: 2 +channels: 1 +subset: None +filenames_train: ./../../data/cadV2_0.5mm_64x64_xy_xz_yz/subset[0-7]/*/*.pkl.gz +filenames_validation:./../../data/cadV2_0.5mm_64x64_xy_xz_yz/subset[8]/*/*.pkl.gz +data_folder: None ;not used + +[network] +architecture: resnet +input_size: 64 +depth: 2 +branching_factor: 1 +batch_normalization: False +batch_normalization_alpha: 0.3 +dropout: 0.25 +spatial_dropout: 0.5 +gaussian_noise: 0.05 + +[updates] ;Not used +optimization: adam +learning_rate: 0.0001 +momentum: 0.99 +l2_lambda: 1e-5 +batch_size_train: 64 +batch_size_validation: 128 +n_epochs: 200 + +[preprocessing] ;Not used +random_crop: 0 ;no cropping +erode_segmentation: 11 + +[normalization] +zero_center: True +mean_pixel: 0.2606236106512 + +[augmentation] ;Not used +augment: True +flip: True +zoom: 0.08 ;Only if OpenCV2 is available +rotation: 20 +translation: 3 + + +[misc] +multiprocess_load_augmentation: False +save_every_n_epoch: 2 +n_workers_load_augmentation: 4 \ No newline at end of file diff --git a/config/wide_resnet.ini b/config/wide_resnet.ini new file mode 100644 index 0000000..0c63363 --- /dev/null +++ b/config/wide_resnet.ini @@ -0,0 +1,51 @@ +[info] +experiment: Wide Residual Network default settings +name: resnet + +[dataset] +pixels: 224 +n_classes: 3 +channels: 3 +data_level: 0 +subset: None +subset_train: 1352 670 832 ; +subset_validation: 316 135 305 ; +filenames_train: None ;NOT USED +filenames_validation: None :NOT USED +data_folder: /mnt/rdstorage1/Userdata/Guido/BreastDataset + + +[network] +architecture: resnet +input_size: 224 +depth: 5 +branching_factor: 4 +batch_normalization: True + +[updates] +optimization: nesterov +learning_rate: 0 ;Schedule is used instead +momentum: 0.9 ;Not used yet +l2_lambda: 0.0001 ;Not used yet +batch_size_train: 6 +batch_size_validation: 12 +n_epochs: 200 +epoch_samples_train: 600 +epoch_samples_validation: 600 + +[normalization] +zero_center: True +mean_pixel: 0.79704494411170501 0.61885510553571943 0.71202771615037175 + +[augmentation] +augment: False ;All augmentation to be implemented +flip: True +zoom: 0.08 +rotation: 16 +translation: 3 + + +[misc] +multiprocess_load_augmentation: True +n_workers_load_augmentation: 6 +save_every_n_epoch: 2 \ No newline at end of file diff --git a/src/deep/dataset_2D.py b/src/deep/dataset_2D.py index 4b01dfd..0e933be 100644 --- a/src/deep/dataset_2D.py +++ b/src/deep/dataset_2D.py @@ -8,15 +8,20 @@ def load_images (image_paths): X = [] + labels = [] + for image_path in image_paths: with gzip.open(image_path) as file: - X.append(pickle.load(file)) + X.append(pickle.load(file)[0]) + + label = [0,1] if "True" in image_path else [1,0] + labels.append(label) X = np.array(X) X = normalize(X) - + if P.ZERO_CENTER: X -= P.MEAN_PIXEL - return X \ No newline at end of file + return np.array(X, dtype=np.float32), np.array(labels) diff --git a/src/deep/resnet/resnet.py b/src/deep/resnet/resnet.py new file mode 100644 index 0000000..45e7bb4 --- /dev/null +++ b/src/deep/resnet/resnet.py @@ -0,0 +1,311 @@ +import sys +sys.setrecursionlimit(10000) +import lasagne +from lasagne.nonlinearities import rectify, softmax, sigmoid +from lasagne.layers import InputLayer, MaxPool2DLayer, DenseLayer, DropoutLayer, helper, batch_norm, BatchNormLayer +# for ResNet +from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer +from lasagne.layers import Pool2DLayer, ElemwiseSumLayer, NonlinearityLayer, PadLayer, GlobalPoolLayer, ExpressionLayer +from lasagne.init import Orthogonal, HeNormal, GlorotNormal +from lasagne.updates import nesterov_momentum + +import theano +import theano.tensor as T +import numpy as np + +LR_SCHEDULE = { + 0: 0.01, + 8: 0.1, + 60: 0.01, + 100: 0.001, +} + +PIXELS = 64 +imageSize = PIXELS * PIXELS +num_classes = 2 + +he_norm = HeNormal(gain='relu') + +# Original from https://github.com/FlorianMuellerklein/Identity-Mapping-ResNet-Lasagne + +def ResNet_FullPreActivation(input_var=None, n=18): + ''' + Adapted from https://github.com/Lasagne/Recipes/tree/master/papers/deep_residual_learning. + Tweaked to be consistent with 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027) + + Forumala to figure out depth: 6n + 2 + ''' + # create a residual learning building block with two stacked 3x3 convlayers as in paper + def residual_block(l, increase_dim=False, projection=True, first=False): + input_num_filters = l.output_shape[1] + if increase_dim: + first_stride = (2,2) + out_num_filters = input_num_filters*2 + else: + first_stride = (1,1) + out_num_filters = input_num_filters + + if first: + # hacky solution to keep layers correct + bn_pre_relu = l + else: + # contains the BN -> ReLU portion, steps 1 to 2 + bn_pre_conv = BatchNormLayer(l) + bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) + + # contains the weight -> BN -> ReLU portion, steps 3 to 5 + conv_1 = batch_norm(ConvLayer(bn_pre_relu, num_filters=out_num_filters, filter_size=(3,3), stride=first_stride, nonlinearity=rectify, pad='same', W=he_norm)) + + # contains the last weight portion, step 6 + conv_2 = ConvLayer(conv_1, num_filters=out_num_filters, filter_size=(3,3), stride=(1,1), nonlinearity=None, pad='same', W=he_norm) + + # add shortcut connections + if increase_dim: + # projection shortcut, as option B in paper + projection = ConvLayer(l, num_filters=out_num_filters, filter_size=(1,1), stride=(2,2), nonlinearity=None, pad='same', b=None) + block = ElemwiseSumLayer([conv_2, projection]) + else: + block = ElemwiseSumLayer([conv_2, l]) + + return block + + # Building the network + l_in = InputLayer(shape=(None, 3, PIXELS, PIXELS), input_var=input_var) + + # first layer, output is 16 x 32 x 32 + l = batch_norm(ConvLayer(l_in, num_filters=16, filter_size=(3,3), stride=(1,1), nonlinearity=rectify, pad='same', W=he_norm)) + + # first stack of residual blocks, output is 16 x 32 x 32 + l = residual_block(l, first=True) + for _ in range(1,n): + l = residual_block(l) + + # second stack of residual blocks, output is 32 x 16 x 16 + l = residual_block(l, increase_dim=True) + for _ in range(1,n): + l = residual_block(l) + + # third stack of residual blocks, output is 64 x 8 x 8 + l = residual_block(l, increase_dim=True) + for _ in range(1,n): + l = residual_block(l) + + bn_post_conv = BatchNormLayer(l) + bn_post_relu = NonlinearityLayer(bn_post_conv, rectify) + + # average pooling + avg_pool = GlobalPoolLayer(bn_post_relu) + + # fully connected layer + network = DenseLayer(avg_pool, num_units=num_classes, W=HeNormal(), nonlinearity=softmax) + + return network + +# ======================================================================================================================== + +def ResNet_BottleNeck_FullPreActivation(input_var=None, n=18): + ''' + Adapted from https://github.com/Lasagne/Recipes/tree/master/papers/deep_residual_learning. + Tweaked to be consistent with 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027) + + Judging from https://github.com/KaimingHe/resnet-1k-layers/blob/master/resnet-pre-act.lua. + Number of filters go 16 -> 64 -> 128 -> 256 + + Forumala to figure out depth: 9n + 2 + ''' + + # create a residual learning building block with two stacked 3x3 convlayers as in paper + def residual_bottleneck_block(l, increase_dim=False, first=False): + input_num_filters = l.output_shape[1] + + if increase_dim: + first_stride = (2,2) + out_num_filters = input_num_filters * 2 + else: + first_stride = (1,1) + out_num_filters = input_num_filters + + if first: + # hacky solution to keep layers correct + bn_pre_relu = l + out_num_filters = out_num_filters * 4 + else: + # contains the BN -> ReLU portion, steps 1 to 2 + bn_pre_conv = BatchNormLayer(l) + bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) + + bottleneck_filters = out_num_filters / 4 + + # contains the weight -> BN -> ReLU portion, steps 3 to 5 + conv_1 = batch_norm(ConvLayer(bn_pre_relu, num_filters=bottleneck_filters, filter_size=(1,1), stride=(1,1), nonlinearity=rectify, pad='same', W=he_norm)) + + conv_2 = batch_norm(ConvLayer(conv_1, num_filters=bottleneck_filters, filter_size=(3,3), stride=first_stride, nonlinearity=rectify, pad='same', W=he_norm)) + + # contains the last weight portion, step 6 + conv_3 = ConvLayer(conv_2, num_filters=out_num_filters, filter_size=(1,1), stride=(1,1), nonlinearity=None, pad='same', W=he_norm) + + if increase_dim: + # projection shortcut, as option B in paper + projection = ConvLayer(l, num_filters=out_num_filters, filter_size=(1,1), stride=(2,2), nonlinearity=None, pad='same', b=None) + block = ElemwiseSumLayer([conv_3, projection]) + + elif first: + # projection shortcut, as option B in paper + projection = ConvLayer(l, num_filters=out_num_filters, filter_size=(1,1), stride=(1,1), nonlinearity=None, pad='same', b=None) + block = ElemwiseSumLayer([conv_3, projection]) + + else: + block = ElemwiseSumLayer([conv_3, l]) + + return block + + # Building the network + l_in = InputLayer(shape=(None, 3, PIXELS, PIXELS), input_var=input_var) + + # first layer, output is 16x16x16 + l = batch_norm(ConvLayer(l_in, num_filters=16, filter_size=(3,3), stride=(1,1), nonlinearity=rectify, pad='same', W=he_norm)) + + # first stack of residual blocks, output is 64x16x16 + l = residual_bottleneck_block(l, first=True) + for _ in range(1,n): + l = residual_bottleneck_block(l) + + # second stack of residual blocks, output is 128x8x8 + l = residual_bottleneck_block(l, increase_dim=True) + for _ in range(1,n): + l = residual_bottleneck_block(l) + + # third stack of residual blocks, output is 256x4x4 + l = residual_bottleneck_block(l, increase_dim=True) + for _ in range(1,n): + l = residual_bottleneck_block(l) + + bn_post_conv = BatchNormLayer(l) + bn_post_relu = NonlinearityLayer(bn_post_conv, rectify) + + # average pooling + avg_pool = GlobalPoolLayer(bn_post_relu) + + # fully connected layer + network = DenseLayer(avg_pool, num_units=num_classes, W=HeNormal(), nonlinearity=softmax) + + return network + +# ======================================================================================================================== + +def ResNet_FullPre_Wide(input_var=None, n=6, k=4): + ''' + Adapted from https://github.com/Lasagne/Recipes/tree/master/papers/deep_residual_learning. + + Tweaked to be consistent with 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027) + + And 'Wide Residual Networks', Sergey Zagoruyko, Nikos Komodakis 2016 (http://arxiv.org/pdf/1605.07146v1.pdf) + + Depth = 6n + 2 + ''' + n_filters = {0:16, 1:16*k, 2:32*k, 3:64*k} + + # create a residual learning building block with two stacked 3x3 convlayers as in paper + def residual_block(l, increase_dim=False, projection=True, first=False, filters=16): + if increase_dim: + first_stride = (2,2) + else: + first_stride = (1,1) + + if first: + # hacky solution to keep layers correct + bn_pre_relu = l + else: + # contains the BN -> ReLU portion, steps 1 to 2 + bn_pre_conv = BatchNormLayer(l) + bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) + + # contains the weight -> BN -> ReLU portion, steps 3 to 5 + conv_1 = batch_norm(ConvLayer(bn_pre_relu, num_filters=filters, filter_size=(3,3), stride=first_stride, nonlinearity=rectify, pad='same', W=he_norm)) + + dropout = DropoutLayer(conv_1, p=0.3) + + # contains the last weight portion, step 6 + conv_2 = ConvLayer(dropout, num_filters=filters, filter_size=(3,3), stride=(1,1), nonlinearity=None, pad='same', W=he_norm) + + # add shortcut connections + if increase_dim: + # projection shortcut, as option B in paper + projection = ConvLayer(l, num_filters=filters, filter_size=(1,1), stride=(2,2), nonlinearity=None, pad='same', b=None) + block = ElemwiseSumLayer([conv_2, projection]) + + elif first: + # projection shortcut, as option B in paper + projection = ConvLayer(l, num_filters=filters, filter_size=(1,1), stride=(1,1), nonlinearity=None, pad='same', b=None) + block = ElemwiseSumLayer([conv_2, projection]) + + else: + block = ElemwiseSumLayer([conv_2, l]) + + return block + + # Building the network + l_in = InputLayer(shape=(None, 3, PIXELS, PIXELS), input_var=input_var) + + # first layer, output is 16 x 64 x 64 + l = batch_norm(ConvLayer(l_in, num_filters=n_filters[0], filter_size=(3,3), stride=(1,1), nonlinearity=rectify, pad='same', W=he_norm)) + + # first stack of residual blocks, output is 32 x 64 x 64 + l = residual_block(l, first=True, filters=n_filters[1]) + for _ in range(1,n): + l = residual_block(l, filters=n_filters[1]) + + # second stack of residual blocks, output is 64 x 32 x 32 + l = residual_block(l, increase_dim=True, filters=n_filters[2]) + for _ in range(1,(n+2)): + l = residual_block(l, filters=n_filters[2]) + + # third stack of residual blocks, output is 128 x 16 x 16 + l = residual_block(l, increase_dim=True, filters=n_filters[3]) + for _ in range(1,(n+2)): + l = residual_block(l, filters=n_filters[3]) + + bn_post_conv = BatchNormLayer(l) + bn_post_relu = NonlinearityLayer(bn_post_conv, rectify) + + # average pooling + avg_pool = GlobalPoolLayer(bn_post_relu) + + # fully connected layer + network = DenseLayer(avg_pool, num_units=num_classes, W=HeNormal(), nonlinearity=softmax) + + return network + + +def define_updates(output_layer, X, Y): + output_train = lasagne.layers.get_output(output_layer) + output_test = lasagne.layers.get_output(output_layer, deterministic=True) + + # set up the loss that we aim to minimize when using cat cross entropy our Y should be ints not one-hot + loss = lasagne.objectives.categorical_crossentropy(output_train, Y) + loss = loss.mean() + + acc = T.mean(T.eq(T.argmax(output_train, axis=1), Y), dtype=theano.config.floatX) + + # if using ResNet use L2 regularization + all_layers = lasagne.layers.get_all_layers(output_layer) + l2_penalty = lasagne.regularization.regularize_layer_params(all_layers, lasagne.regularization.l2) * 0.0001 + loss = loss + l2_penalty + + # set up loss functions for validation dataset + test_loss = lasagne.objectives.categorical_crossentropy(output_test, Y) + test_loss = test_loss.mean() + + test_acc = T.mean(T.eq(T.argmax(output_test, axis=1), Y), dtype=theano.config.floatX) + + # get parameters from network and set up sgd with nesterov momentum to update parameters, l_r is shared var so it can be changed + l_r = theano.shared(np.array(LR_SCHEDULE[0], dtype=theano.config.floatX)) + params = lasagne.layers.get_all_params(output_layer, trainable=True) + updates = nesterov_momentum(loss, params, learning_rate=l_r, momentum=0.94) + #updates = adam(loss, params, learning_rate=l_r) + + # set up training and prediction functions + train_fn = theano.function(inputs=[X,Y], outputs=[loss, l2_penalty, acc], updates=updates) + valid_fn = theano.function(inputs=[X,Y], outputs=[test_loss, l2_penalty, test_acc]) + + return train_fn, valid_fn, l_r diff --git a/src/deep/resnet/resnet_trainer.py b/src/deep/resnet/resnet_trainer.py new file mode 100644 index 0000000..4dd86f4 --- /dev/null +++ b/src/deep/resnet/resnet_trainer.py @@ -0,0 +1,107 @@ +from __future__ import division +import time +import numpy as np +import trainer +from params import params as P +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from functools import partial +import logging +import scipy.misc +from parallel import ParallelBatchIterator +from tqdm import tqdm +import os.path + +import dataset_2D + +import theano +import theano.tensor as T +import lasagne +import resnet +from resnet import LR_SCHEDULE + + +class ResNetTrainer(trainer.Trainer): + def __init__(self): + metric_names = ['Loss','L2','Accuracy'] + super(ResNetTrainer, self).__init__(metric_names) + + input_var = T.tensor4('inputs') + target_var = T.ivector('targets') + + logging.info("Defining network") + net = resnet.ResNet_FullPre_Wide(input_var, P.DEPTH, P.BRANCHING_FACTOR) + self.network = net + train_fn, val_fn, l_r = resnet.define_updates(self.network, input_var, target_var) + + self.train_fn = train_fn + self.val_fn = val_fn + self.l_r = l_r + + def do_batches(self, fn, batch_generator, metrics): + for i, batch in enumerate(tqdm(batch_generator)): + inputs, targets = batch + targets = np.array(np.argmax(targets, axis=1), dtype=np.int32) + err, l2_loss, acc = fn(inputs, targets) + + metrics.append([err, l2_loss, acc]) + #metrics.append_prediction(true, prob_b) + + def train(self, generator_train, X_train, generator_val, X_val): + #filenames_train, filenames_val = patch_sampling.get_filenames() + #generator = partial(patch_sampling.extract_random_patches, patch_size=P.INPUT_SIZE, crop_size=OUTPUT_SIZE) + + + train_true = filter(lambda x: "True" in x, X_train) + train_false = filter(lambda x: "False" in x, X_train) + + val_true = filter(lambda x: "True" in x, X_val) + val_false = filter(lambda x: "False" in x, X_val) + + n_train_true = len(train_true) + n_val_true = len(val_true) + + logging.info("Starting training...") + for epoch in range(P.N_EPOCHS): + self.pre_epoch() + + if epoch in LR_SCHEDULE: + self.l_r.set_value(LR_SCHEDULE[epoch]) + + + np.random.shuffle(train_false) + np.random.shuffle(val_false) + + train_epoch_data = train_true + train_false[:n_train_true] + val_epoch_data = val_true + val_false[:n_val_true*20] + + np.random.shuffle(train_epoch_data) + #np.random.shuffle(val_epoch_data) + + #Full pass over the training data + train_gen = ParallelBatchIterator(generator_train, train_epoch_data, ordered=False, + batch_size=P.BATCH_SIZE_TRAIN, + multiprocess=P.MULTIPROCESS_LOAD_AUGMENTATION, + n_producers=P.N_WORKERS_LOAD_AUGMENTATION) + + self.do_batches(self.train_fn, train_gen, self.train_metrics) + + # And a full pass over the validation data: + val_gen = ParallelBatchIterator(generator_val, val_epoch_data, ordered=False, + batch_size=P.BATCH_SIZE_VALIDATION, + multiprocess=P.MULTIPROCESS_LOAD_AUGMENTATION, + n_producers=P.N_WORKERS_LOAD_AUGMENTATION) + + self.do_batches(self.val_fn, val_gen, self.val_metrics) + self.post_epoch() + +if __name__ == "__main__": + X_train = glob.glob(P.FILENAMES_TRAIN) + X_val = glob.glob(P.FILENAMES_VALIDATION) + + train_generator = dataset_2D.load_images + validation_generator = dataset_2D.load_images + + trainer = ResNetTrainer() + trainer.train(train_generator, X_train, validation_generator, X_val) diff --git a/src/deep/train.py b/src/deep/train.py index add715e..9a70d61 100644 --- a/src/deep/train.py +++ b/src/deep/train.py @@ -1,10 +1,17 @@ from __future__ import division import sys import numpy as np -sys.path.append('./unet') -from unet_trainer import UNetTrainer from params import params as P -import dataset + + +if P.ARCHITECTURE == 'unet': + sys.path.append('./unet') + from unet_trainer import UNetTrainer + import dataset +elif P.ARCHITECTURE == 'resnet': + sys.path.append('./resnet') + from resnet_trainer import ResNetTrainer + import dataset_2D from functools import partial import glob @@ -16,11 +23,25 @@ filenames_train = filenames_train[:P.SUBSET] filenames_val = filenames_val[:P.SUBSET] - generator_train = dataset.load_images - generator_val = partial(dataset.load_images, deterministic=True) + if P.ARCHITECTURE == 'unet': + + generator_train = dataset.load_images + generator_val = partial(dataset.load_images, deterministic=True) + + print "Creating train splits" + train_splits = dataset.train_splits_by_z(filenames_train, 0.3, P.N_EPOCHS) + + trainer = UNetTrainer() + trainer.train(train_splits, filenames_val, generator_train, generator_val) + + elif P.ARCHITECTURE == 'resnet': + + + X_train = glob.glob(P.FILENAMES_TRAIN) + X_val = glob.glob(P.FILENAMES_VALIDATION) - print "Creating train splits" - train_splits = dataset.train_splits_by_z(filenames_train, 0.3, P.N_EPOCHS) + train_generator = dataset_2D.load_images + validation_generator = dataset_2D.load_images - trainer = UNetTrainer() - trainer.train(train_splits, filenames_val, generator_train, generator_val) + trainer = ResNetTrainer() + trainer.train(train_generator, X_train, validation_generator, X_val)