forked from robertmaynard/code-samples
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding Source Code for Parallel Forall post on MATLAB Deep Learning
- Loading branch information
Showing
6 changed files
with
247 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
% Copyright (c) 2015, MathWorks, Inc. | ||
|
||
%% Download and and predict using a pretrained ImageNet model | ||
|
||
% Download from MatConvNet pretrained networks repository | ||
urlwrite('http://www.vlfeat.org/matconvnet/models/imagenet-vgg-f.mat', 'imagenet-vgg-f.mat'); | ||
cnnModel.net = load('imagenet-vgg-f.mat'); | ||
|
||
% Setup up MatConvNet, modify the path if it's installed in a different | ||
% folder | ||
run(fullfile('matconvnet-1.0-beta15','matlab','vl_setupnn.m')); | ||
|
||
% Load and display an example image | ||
imshow('dog_example.png'); | ||
img = imread('dog_example.png'); | ||
|
||
% Predict label using ImageNet trained vgg-f CNN model | ||
label = cnnPredict(cnnModel,img); | ||
title(label,'FontSize',20) | ||
|
||
%% Load images from folder | ||
|
||
% Use imageSet to manage images stored in multiple folders | ||
imset = imageSet('pet_images','recursive'); | ||
|
||
% Preallocate arrays with fixed size for prediction | ||
imageSize = cnnModel.net.normalization.imageSize; | ||
trainingImages = zeros([imageSize sum([imset(:).Count])],'single'); | ||
|
||
% Load and resize images for prediction | ||
for ii = 1:numel(imset) | ||
for jj = 1:imset(ii).Count | ||
trainingImages(:,:,:,jj) = imresize(single(read(imset(ii),jj)),imageSize(1:2)); | ||
end | ||
end | ||
|
||
% Get the image labels directly from the ImageSet object | ||
trainingLabels = getImageLabels(imset); | ||
summary(trainingLabels) | ||
|
||
%% Extract features using pretrained CNN | ||
|
||
% Depending on how much memory you have on your GPU you may use a larger | ||
% batch size. We have 400 images, I'm going to choose 200 as my batch size | ||
cnnModel.info.opts.batchSize = 200; | ||
|
||
% Make prediction on a CPU | ||
[~, cnnFeatures, timeCPU] = cnnPredict(cnnModel,trainingImages,'UseGPU',false); | ||
% Make prediction on a GPU | ||
[~, cnnFeatures, timeGPU] = cnnPredict(cnnModel,trainingImages,'UseGPU',true); | ||
|
||
% Compare the performance increase | ||
bar([sum(timeCPU),sum(timeGPU)],0.5) | ||
title(sprintf('Approximate speedup: %2.00f x ',sum(timeCPU)/sum(timeGPU))) | ||
set(gca,'XTickLabel',{'CPU','GPU'},'FontSize',18) | ||
ylabel('Time(sec)'), grid on, grid minor | ||
|
||
%% Train a classifier using extracted features and calculate CV accuracy | ||
|
||
% Train and validate a linear support vector machine (SVM) classifier. | ||
classifierModel = fitcsvm(cnnFeatures, trainingLabels); | ||
|
||
% 10 fold crossvalidation accuracy | ||
cvmdl = crossval(classifierModel,'KFold',10); | ||
fprintf('kFold CV accuracy: %2.2f\n',1-cvmdl.kfoldLoss) | ||
|
||
%% Object Detection | ||
% Use findPet function that was automatically generated using the | ||
% Image Region Analyzer App | ||
|
||
%% Tying the workflow together | ||
frameNumber = 0; | ||
vr = VideoReader(fullfile('PetVideos','videoExample.mov')); | ||
vw = VideoWriter('test.avi','Motion JPEG AVI'); | ||
opticFlow = opticalFlowFarneback; | ||
open(vw); | ||
while hasFrame(vr) | ||
% Count frames | ||
frameNumber = frameNumber + 1; | ||
|
||
% Step 1. Read Frame | ||
videoFrame = readFrame(vr); | ||
|
||
% Step 2. Detect ROI | ||
vFrame = imresize(videoFrame,0.25); % Get video frame | ||
frameGray = rgb2gray(vFrame); % Convert to gray for detection | ||
bboxes = findPet(frameGray,opticFlow); % Find bounding boxes | ||
if ~isempty(bboxes) | ||
img = zeros([imageSize size(bboxes,1)]); | ||
for ii = 1:size(bboxes,1) | ||
img(:,:,:,ii) = imresize(imcrop(vFrame,bboxes(ii,:)),imageSize(1:2)); | ||
end | ||
|
||
% Step 3. Recognize object | ||
% (a) Extract features using a CNN | ||
[~, scores] = cnnPredict(cnnModel,img,'UseGPU',true,'display',false); | ||
|
||
% (b) Predict using a trained Classifier | ||
label = predict(classifierModel,scores); | ||
|
||
% Step 4. Annotate object | ||
vFrame = insertObjectAnnotation(vFrame,'Rectangle',bboxes,cellstr(label),'FontSize',40); | ||
end | ||
|
||
% Step 5. Write video to file | ||
writeVideo(vw,videoFrame); | ||
|
||
% fprintf('Frame: %d of %d\n',frameNumber,ceil(vr.FrameRate*vr.Duration)); | ||
end | ||
close(vw); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
Companion Code for "Deep Learning for Computer Vision with MATLAB" by Shashank Prasanna | ||
============================ | ||
|
||
This folder contains source code from the NVIDIA Parallel Forall Blog post [Deep Learning for Computer Vision with MATLABMATLAB by Joss Knight]() by Shashank Prasanna (The Mathworks). | ||
|
||
License | ||
------- | ||
|
||
These examples are released under the BSD open source license. Refer to license.txt in this directory for full details. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
function [classLabel, scores, batchTime] = cnnPredict(cnnModel,predImage,varargin) | ||
% Copyright (c) 2015, MathWorks, Inc. | ||
|
||
% Parse inputs | ||
p = inputParser; | ||
addParameter(p,'outputLayer',numel(cnnModel.net.layers),@isnumeric); | ||
addParameter(p,'UseGPU',false,@islogical); | ||
addParameter(p,'display',true,@islogical); | ||
parse(p,varargin{:}); | ||
|
||
% Get batch size and number of images | ||
if ~isfield(cnnModel,'info') | ||
cnnModel.info.opts.batchSize = 1; | ||
end | ||
batchSize = cnnModel.info.opts.batchSize; | ||
n_obs = size(predImage,4); | ||
isTapLayer = p.Results.outputLayer < numel(cnnModel.net.layers); | ||
|
||
if isTapLayer | ||
cnnModel.net.layers(p.Results.outputLayer+1:end) = []; | ||
else | ||
cnnModel.net.layers{end} = struct('type', 'softmax'); | ||
end | ||
|
||
% Preallocate scores | ||
resTemp = vl_simplenn(cnnModel.net, cnnPreprocess(predImage(:,:,:,1)), [], []); | ||
scores = zeros([size(resTemp(end).x), n_obs]); | ||
|
||
% Move model to GPU if requested | ||
if p.Results.UseGPU | ||
cnnModel.net = vl_simplenn_move(cnnModel.net,'gpu'); | ||
end | ||
|
||
% Make predictions | ||
batchNumber = 0; | ||
numBatches = ceil(n_obs/batchSize); | ||
batchTime = zeros(numBatches,1); | ||
if p.Results.display | ||
disp(' ') | ||
fprintf('Using GPU: %s\n',mat2str(p.Results.UseGPU)) | ||
fprintf('Number of images: %d\n',n_obs) | ||
fprintf('Number of batches: %d\n',numBatches) | ||
fprintf('Number of layers in the Network: %d\n',numel(cnnModel.net.layers)) | ||
disp('-------------------------------------') | ||
end | ||
for ii = 1:batchSize:n_obs | ||
tic | ||
idx = ii:min(ii+batchSize-1,n_obs); | ||
batchImages = predImage(:,:,:,idx); | ||
im = cnnPreprocess(batchImages); | ||
|
||
% Move batch to GPU if requested | ||
if p.Results.UseGPU | ||
im = gpuArray(im); | ||
end | ||
train_res = vl_simplenn(cnnModel.net, im, [], []); | ||
scores(:,:,:,idx) = squeeze(gather(train_res(end).x)); | ||
batchNumber = batchNumber + 1; | ||
batchTime(batchNumber) = toc; | ||
if p.Results.display | ||
fprintf('Batch: %2d/%d. Execution time: %2.4f\n',batchNumber,numBatches,batchTime(batchNumber)) | ||
end | ||
end | ||
|
||
if p.Results.display | ||
fprintf('Avg. execution time/batch: %2.4f\n',mean(batchTime)) | ||
disp('-------------------------------------') | ||
fprintf('Total execution time: %2.4f\n',sum(batchTime)) | ||
disp('-------------------------------------') | ||
end | ||
|
||
if isTapLayer | ||
classLabel = []; | ||
else | ||
scores = squeeze(gather(scores))'; | ||
[~, labelId] = max(scores,[],2); | ||
% classLabel = categorical(cnnModel.net.classes.description(labelId)'); | ||
classLabel = cnnModel.net.classes.description(labelId)'; | ||
end | ||
|
||
function im = cnnPreprocess(batchImages) | ||
% Preprocess images | ||
im = single(batchImages); | ||
im = imresize(im, cnnModel.net.normalization.imageSize(1:2)); | ||
im = bsxfun(@minus,im,cnnModel.net.normalization.averageImage); | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
function [BW_out,properties] = filterRegions(BW_in) | ||
% Copyright (c) 2015, MathWorks, Inc. | ||
%filterRegions Filter BW image using auto-generated code from imageRegionAnalyzer app. | ||
% [BW_OUT,PROPERTIES] = filterRegions(BW_IN) filters binary image BW_IN | ||
% using auto-generated code from the imageRegionAnalyzer App. BW_OUT has | ||
% had all of the options and filtering selections that were specified in | ||
% imageRegionAnalyzer applied to it. The PROPERTIES structure contains the | ||
% attributes of BW_out that were visible in the App. | ||
|
||
% Auto-generated by imageRegionAnalyzer app on 19-Oct-2015 | ||
%--------------------------------------------------------- | ||
|
||
BW_out = BW_in; | ||
|
||
% Filter image based on image properties. | ||
BW_out = bwpropfilt(BW_out, 'Area', [5000 + eps(5000), Inf]); | ||
|
||
% Get properties. | ||
properties = regionprops(BW_out, {'BoundingBox','Area'}); | ||
|
||
% Uncomment the following line to return the properties in a table. | ||
% properties = struct2table(properties); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
function bboxes = findPet(frameGray, opticFlow) | ||
% Copyright (c) 2015, MathWorks, Inc. | ||
|
||
flow = estimateFlow(opticFlow,frameGray); | ||
threshImage = ( flow.Magnitude > 4); | ||
[BW_out,regions] = filterRegions(threshImage); | ||
if(size(regions) > 0) | ||
bboxes = regions.BoundingBox; | ||
else | ||
bboxes = []; | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
function imageType = getImageLabels(imset) | ||
% Copyright (c) 2015, MathWorks, Inc. | ||
imageType = categorical(repelem({imset.Description}', ... | ||
[imset.Count], 1)); | ||
end |