From 3c2da65655ba3dde6bb95922097022eac4bf4ef4 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 27 Oct 2015 15:45:01 +1100 Subject: [PATCH] Adding Source Code for Parallel Forall post on MATLAB Deep Learning --- .../PetDetectionRecognitionScript.m | 111 ++++++++++++++++++ MATLAB_deeplearning/README.md | 9 ++ MATLAB_deeplearning/cnnPredict.m | 88 ++++++++++++++ MATLAB_deeplearning/filterRegions.m | 22 ++++ MATLAB_deeplearning/findPet.m | 12 ++ MATLAB_deeplearning/getImageLabels.m | 5 + 6 files changed, 247 insertions(+) create mode 100755 MATLAB_deeplearning/PetDetectionRecognitionScript.m create mode 100644 MATLAB_deeplearning/README.md create mode 100755 MATLAB_deeplearning/cnnPredict.m create mode 100755 MATLAB_deeplearning/filterRegions.m create mode 100755 MATLAB_deeplearning/findPet.m create mode 100755 MATLAB_deeplearning/getImageLabels.m diff --git a/MATLAB_deeplearning/PetDetectionRecognitionScript.m b/MATLAB_deeplearning/PetDetectionRecognitionScript.m new file mode 100755 index 0000000..f17bd46 --- /dev/null +++ b/MATLAB_deeplearning/PetDetectionRecognitionScript.m @@ -0,0 +1,111 @@ +% Copyright (c) 2015, MathWorks, Inc. + +%% Download and and predict using a pretrained ImageNet model + +% Download from MatConvNet pretrained networks repository +urlwrite('http://www.vlfeat.org/matconvnet/models/imagenet-vgg-f.mat', 'imagenet-vgg-f.mat'); +cnnModel.net = load('imagenet-vgg-f.mat'); + +% Setup up MatConvNet, modify the path if it's installed in a different +% folder +run(fullfile('matconvnet-1.0-beta15','matlab','vl_setupnn.m')); + +% Load and display an example image +imshow('dog_example.png'); +img = imread('dog_example.png'); + +% Predict label using ImageNet trained vgg-f CNN model +label = cnnPredict(cnnModel,img); +title(label,'FontSize',20) + +%% Load images from folder + +% Use imageSet to manage images stored in multiple folders +imset = imageSet('pet_images','recursive'); + +% Preallocate arrays with fixed size for prediction +imageSize = cnnModel.net.normalization.imageSize; +trainingImages = zeros([imageSize sum([imset(:).Count])],'single'); + +% Load and resize images for prediction +for ii = 1:numel(imset) + for jj = 1:imset(ii).Count + trainingImages(:,:,:,jj) = imresize(single(read(imset(ii),jj)),imageSize(1:2)); + end +end + +% Get the image labels directly from the ImageSet object +trainingLabels = getImageLabels(imset); +summary(trainingLabels) + +%% Extract features using pretrained CNN + +% Depending on how much memory you have on your GPU you may use a larger +% batch size. We have 400 images, I'm going to choose 200 as my batch size +cnnModel.info.opts.batchSize = 200; + +% Make prediction on a CPU +[~, cnnFeatures, timeCPU] = cnnPredict(cnnModel,trainingImages,'UseGPU',false); +% Make prediction on a GPU +[~, cnnFeatures, timeGPU] = cnnPredict(cnnModel,trainingImages,'UseGPU',true); + +% Compare the performance increase +bar([sum(timeCPU),sum(timeGPU)],0.5) +title(sprintf('Approximate speedup: %2.00f x ',sum(timeCPU)/sum(timeGPU))) +set(gca,'XTickLabel',{'CPU','GPU'},'FontSize',18) +ylabel('Time(sec)'), grid on, grid minor + +%% Train a classifier using extracted features and calculate CV accuracy + +% Train and validate a linear support vector machine (SVM) classifier. +classifierModel = fitcsvm(cnnFeatures, trainingLabels); + +% 10 fold crossvalidation accuracy +cvmdl = crossval(classifierModel,'KFold',10); +fprintf('kFold CV accuracy: %2.2f\n',1-cvmdl.kfoldLoss) + +%% Object Detection +% Use findPet function that was automatically generated using the +% Image Region Analyzer App + +%% Tying the workflow together +frameNumber = 0; +vr = VideoReader(fullfile('PetVideos','videoExample.mov')); +vw = VideoWriter('test.avi','Motion JPEG AVI'); +opticFlow = opticalFlowFarneback; +open(vw); +while hasFrame(vr) + % Count frames + frameNumber = frameNumber + 1; + + % Step 1. Read Frame + videoFrame = readFrame(vr); + + % Step 2. Detect ROI + vFrame = imresize(videoFrame,0.25); % Get video frame + frameGray = rgb2gray(vFrame); % Convert to gray for detection + bboxes = findPet(frameGray,opticFlow); % Find bounding boxes + if ~isempty(bboxes) + img = zeros([imageSize size(bboxes,1)]); + for ii = 1:size(bboxes,1) + img(:,:,:,ii) = imresize(imcrop(vFrame,bboxes(ii,:)),imageSize(1:2)); + end + + % Step 3. Recognize object + % (a) Extract features using a CNN + [~, scores] = cnnPredict(cnnModel,img,'UseGPU',true,'display',false); + + % (b) Predict using a trained Classifier + label = predict(classifierModel,scores); + + % Step 4. Annotate object + vFrame = insertObjectAnnotation(vFrame,'Rectangle',bboxes,cellstr(label),'FontSize',40); + end + + % Step 5. Write video to file + writeVideo(vw,videoFrame); + +% fprintf('Frame: %d of %d\n',frameNumber,ceil(vr.FrameRate*vr.Duration)); +end +close(vw); + diff --git a/MATLAB_deeplearning/README.md b/MATLAB_deeplearning/README.md new file mode 100644 index 0000000..53c5b21 --- /dev/null +++ b/MATLAB_deeplearning/README.md @@ -0,0 +1,9 @@ +Companion Code for "Deep Learning for Computer Vision with MATLAB" by Shashank Prasanna +============================ + +This folder contains source code from the NVIDIA Parallel Forall Blog post [Deep Learning for Computer Vision with MATLABMATLAB by Joss Knight]() by Shashank Prasanna (The Mathworks). + +License +------- + +These examples are released under the BSD open source license. Refer to license.txt in this directory for full details. diff --git a/MATLAB_deeplearning/cnnPredict.m b/MATLAB_deeplearning/cnnPredict.m new file mode 100755 index 0000000..2ac3a10 --- /dev/null +++ b/MATLAB_deeplearning/cnnPredict.m @@ -0,0 +1,88 @@ +function [classLabel, scores, batchTime] = cnnPredict(cnnModel,predImage,varargin) +% Copyright (c) 2015, MathWorks, Inc. + +% Parse inputs +p = inputParser; +addParameter(p,'outputLayer',numel(cnnModel.net.layers),@isnumeric); +addParameter(p,'UseGPU',false,@islogical); +addParameter(p,'display',true,@islogical); +parse(p,varargin{:}); + +% Get batch size and number of images +if ~isfield(cnnModel,'info') + cnnModel.info.opts.batchSize = 1; +end +batchSize = cnnModel.info.opts.batchSize; +n_obs = size(predImage,4); +isTapLayer = p.Results.outputLayer < numel(cnnModel.net.layers); + +if isTapLayer + cnnModel.net.layers(p.Results.outputLayer+1:end) = []; +else + cnnModel.net.layers{end} = struct('type', 'softmax'); +end + +% Preallocate scores +resTemp = vl_simplenn(cnnModel.net, cnnPreprocess(predImage(:,:,:,1)), [], []); +scores = zeros([size(resTemp(end).x), n_obs]); + +% Move model to GPU if requested +if p.Results.UseGPU + cnnModel.net = vl_simplenn_move(cnnModel.net,'gpu'); +end + +% Make predictions +batchNumber = 0; +numBatches = ceil(n_obs/batchSize); +batchTime = zeros(numBatches,1); +if p.Results.display + disp(' ') + fprintf('Using GPU: %s\n',mat2str(p.Results.UseGPU)) + fprintf('Number of images: %d\n',n_obs) + fprintf('Number of batches: %d\n',numBatches) + fprintf('Number of layers in the Network: %d\n',numel(cnnModel.net.layers)) + disp('-------------------------------------') +end +for ii = 1:batchSize:n_obs + tic + idx = ii:min(ii+batchSize-1,n_obs); + batchImages = predImage(:,:,:,idx); + im = cnnPreprocess(batchImages); + + % Move batch to GPU if requested + if p.Results.UseGPU + im = gpuArray(im); + end + train_res = vl_simplenn(cnnModel.net, im, [], []); + scores(:,:,:,idx) = squeeze(gather(train_res(end).x)); + batchNumber = batchNumber + 1; + batchTime(batchNumber) = toc; + if p.Results.display + fprintf('Batch: %2d/%d. Execution time: %2.4f\n',batchNumber,numBatches,batchTime(batchNumber)) + end +end + +if p.Results.display + fprintf('Avg. execution time/batch: %2.4f\n',mean(batchTime)) + disp('-------------------------------------') + fprintf('Total execution time: %2.4f\n',sum(batchTime)) + disp('-------------------------------------') +end + +if isTapLayer + classLabel = []; +else + scores = squeeze(gather(scores))'; + [~, labelId] = max(scores,[],2); +% classLabel = categorical(cnnModel.net.classes.description(labelId)'); + classLabel = cnnModel.net.classes.description(labelId)'; +end + +function im = cnnPreprocess(batchImages) + % Preprocess images + im = single(batchImages); + im = imresize(im, cnnModel.net.normalization.imageSize(1:2)); + im = bsxfun(@minus,im,cnnModel.net.normalization.averageImage); +end + +end diff --git a/MATLAB_deeplearning/filterRegions.m b/MATLAB_deeplearning/filterRegions.m new file mode 100755 index 0000000..6563135 --- /dev/null +++ b/MATLAB_deeplearning/filterRegions.m @@ -0,0 +1,22 @@ +function [BW_out,properties] = filterRegions(BW_in) +% Copyright (c) 2015, MathWorks, Inc. +%filterRegions Filter BW image using auto-generated code from imageRegionAnalyzer app. +% [BW_OUT,PROPERTIES] = filterRegions(BW_IN) filters binary image BW_IN +% using auto-generated code from the imageRegionAnalyzer App. BW_OUT has +% had all of the options and filtering selections that were specified in +% imageRegionAnalyzer applied to it. The PROPERTIES structure contains the +% attributes of BW_out that were visible in the App. + +% Auto-generated by imageRegionAnalyzer app on 19-Oct-2015 +%--------------------------------------------------------- + +BW_out = BW_in; + +% Filter image based on image properties. +BW_out = bwpropfilt(BW_out, 'Area', [5000 + eps(5000), Inf]); + +% Get properties. +properties = regionprops(BW_out, {'BoundingBox','Area'}); + +% Uncomment the following line to return the properties in a table. +% properties = struct2table(properties); diff --git a/MATLAB_deeplearning/findPet.m b/MATLAB_deeplearning/findPet.m new file mode 100755 index 0000000..e2aef65 --- /dev/null +++ b/MATLAB_deeplearning/findPet.m @@ -0,0 +1,12 @@ +function bboxes = findPet(frameGray, opticFlow) +% Copyright (c) 2015, MathWorks, Inc. + +flow = estimateFlow(opticFlow,frameGray); +threshImage = ( flow.Magnitude > 4); +[BW_out,regions] = filterRegions(threshImage); +if(size(regions) > 0) + bboxes = regions.BoundingBox; +else + bboxes = []; +end +end diff --git a/MATLAB_deeplearning/getImageLabels.m b/MATLAB_deeplearning/getImageLabels.m new file mode 100755 index 0000000..3286127 --- /dev/null +++ b/MATLAB_deeplearning/getImageLabels.m @@ -0,0 +1,5 @@ +function imageType = getImageLabels(imset) +% Copyright (c) 2015, MathWorks, Inc. + imageType = categorical(repelem({imset.Description}', ... + [imset.Count], 1)); +end \ No newline at end of file