forked from libertyh/ising-model
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ising_neurons_L1reg.m
executable file
·190 lines (163 loc) · 9.31 KB
/
ising_neurons_L1reg.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
function [all_J, all_logL, modelname] = ising_neurons_L1reg(datafile, model, nchunks)
%ISING_NEURONS_L1REG Implements minimum probability flow learning for
% an Ising model fit to neural data with the connectivity matrix of
% your choice.
%
% [all_J, all_logL, modelname] = ISING_NEURONS_L1REG(datafile),
% where datafile is a mat file containing a binary matrix of
% spike events, a binary matrix of stimulus conditions, and the
% time bin size, returns the Ising model coupling matrices
% all_J, the log-likelihood all_logL of a held-out validation
% set given these couplings, and the modelname (e.g. "fully
% connected").
%
% [all_J, all_logL, modelname] = ISING_NEURONS_L1REG(datafile, model, nchunks)
% allows you to specify a model number that describes which couplings
% should be set to zero, if you are assuming a set connectivity
% matrix. By default, model=0, which assumes a fully connected model
% where all possible pairwise couplings are fitted. See select_zero_nodes.m
% for details on other available models. nchunks is the number of cross-validation
% iterations so that you can determine reasonable boundaries on the log-likelihoods
% (nchunks = 10 by default).
%
% EXAMPLE:
% datafile = 'sample_data.mat';
% model = 0;
% nchunks = 10;
% [all_J, all_logL, modelname] = ISING_NEURONS_L1REG(datafile, model, nchunks);
% imagesc(all_J(:,:,1), [-2 2]); % plot coupling from 1st cross validation iteration
%
% See also SELECT_ZERO_NODES, K_DK_ISING_L1REG
%
% References:
% Hamilton LS, Sohl-Dickstein J, Huth AG, Carels VM, Bao S (2013). Optogenetic
% Activation of an Inhibitory Network Enhances Functional Connectivity in
% Auditory Cortex. Neuron (in press).
% Sohl-Dickstein J, Battaglino P, DeWeese M (2011). New method for parameter
% estimation in probabilistic models: minimum probability flow. Physical
% Review Letters, 107:22 p220601.
%
% Uses 3rd party code minFunc developed by Mark Schmidt at the Computer Science
% Department at the Ecole Normale Superieure, see his website with code at
% http://www.di.ens.fr/~mschmidt/Software/minFunc.html
%
%
% Copyright ©2013 [See notes below]. The Regents of the University of
% California (Regents). All Rights Reserved. Permission to use, copy
% and distribute this software and its documentation for educational,
% research, and not-for-profit purposes, without fee and without a
% signed licensing agreement, is hereby granted, provided that the above
% copyright notice, this paragraph and the following two paragraphs appear in
% all copies, modifications, and distributions. Contact
% The Office of Technology Licensing, UC Berkeley,
% 2150 Shattuck Avenue, Suite 510,
% Berkeley, CA 94720-1620,
% (510) 643-7201,
% for commercial licensing opportunities.
% Created by Liberty S. Hamilton, Jascha Sohl-Dickstein, and Alexander Huth,
% at the University of California, Berkeley.
% Based on code written by Jascha Sohl-Dickstein (2009) available at
% https://github.com/Sohl-Dickstein/Minimum-Probability-Flow-Learning
%
% IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
% SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
% ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
% IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
% REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
% FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION,
% IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION
% TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
%
if nargin < 2
model = 0; % assume fully connected model
nchunks = 10; % 10 cross-validation iterations
end
% load the data matrices. [datafile] should contain three variables with the
% following names and contents:
% spk: an nneurons x t matrix (nneurons = number of neurons/electrode sites, t = number of time bins)
% stim: an nstims x t matrix (nstims = number of stimulus features, t = number of time bins)
% bin_size: the size of each time bin in seconds (in our sample dataset, bin_size = 0.005 (5 ms))
mydata = load(datafile);
nneurons = size(mydata.spk, 1); % number of neurons or electrode/recording sites
nstims = size(mydata.stim, 1); % number of stimuli or number of stimulus features
t = size(mydata.spk, 2); % number of time bins
X = vertcat(mydata.stim, mydata.spk); % concatenate stimulus and spike matrices
% Initialize variables for the cross-validated log-likelihood measurements
% and the cross-validated coupling matrices.
all_logL = zeros(nchunks,1);
all_J = zeros(nneurons, nneurons+nstims, nchunks);
% Set the length of the validation set to be 10% of the total length
val_len = floor(t/10);
% When we randomize, we want to shuffle trials if possible to make sure
% that the noise level does not affect our model. To do this, we choose
% a [spanlen] as the segment that will be randomized from the data matrices.
% A good choice here would be a multiple of your trial length (for our sample
% data, the trial length is 500 ms, and the bin size is 5 ms, so we can shuffle
% the trials using a span length = 200.
spanlen = 200; % 200 samples @ 5ms/sample = 1s/span
nspans = floor(floor(t/spanlen)/nchunks)*nchunks; % truncate data if not exactly divisible
% randomize which data validation is performed on, so it's not all from
% the same time period, since behavior may change with time and we want
% the behavior on the validation set to be qualitatively the same as
% the behavior on the training set
span_inds = reshape(1:(spanlen*nspans), spanlen, [])';
% randomly permute the order of each of these segments
chunk_span_inds = reshape(randperm(nspans), nchunks, []);
chunk_inds = {};
for ii = 1:nchunks
smat = span_inds(chunk_span_inds(ii,:),:)';
chunk_inds{ii} = smat(:);
end
% set options for minFunc function minimization
T = nneurons+nstims; % number of couplings to fit
nsamples = size(X,2); % number of training samples
maxlinesearch = 1000; % this number is excessive just to be safe! Learning works fine if this is just a few hundred
independent_steps = 10*nneurons; % the number of Gibbs sampling steps to take between samples
minf_options = [];
minf_options.display = 'iter'; % set to 'none' to make this faster
minf_options.maxFunEvals = maxlinesearch;
minf_options.maxIter = maxlinesearch;
run_checkgrad = 0; % set to 1 to check that the derivative being calculated in K_dK_ising.m
% is correct -- usually you will want this off, as it is mainly for debugging purposes.
if run_checkgrad
nsamples = 2;
minf_options.DerivativeCheck = 'on';
end
% Perform cross validation
for k = 1:nchunks
fprintf(1,'\n.............Running cross validation iteration %d of 10..........\n',k);
smat = span_inds(chunk_span_inds(k,:),:)';
cinds = smat(:); % validation data indices
X_val = X(:,cinds); % validation data
X_train = X;
X_train(:,cinds) = []; % training data
description = sprintf('d=%d, %d samples, %d learning steps',T,nsamples,maxlinesearch);
fprintf(1,'%s\n',description);
% randomly initialize the parameter matrix we're going to try to learn
% note that the bias units lie on the diagonal of J
Jnew_n = randn(nneurons, nneurons) / sqrt(nneurons) / 100; % site-to-site coupling
Jnew_n = (Jnew_n + Jnew_n')/2; % make weight matrix symmetric
Jnew_s = randn(nneurons, nstims)/sqrt(nneurons)/100; % stimulus-to-site coupling
% choose model connectivity (see select_zero_nodes.m or make your own function here)
[Jnew_all, zeronodes, modelname] = select_zero_nodes(model, Jnew_s, Jnew_n);
% choose lambda (ideally with another round of cross-validation on the training set)
% but for simplicity we choose a value here.
lambda = 5.9948e-05;
% perform parameter estimation using L1-regularized Ising model fit with MPF
fprintf(1, '\nRunning minFunc for up to %d learning steps...\n', maxlinesearch );
time_elapsed = tic();
Jnew_all_fit = minFunc( @K_dK_ising_L1reg, Jnew_all(:), minf_options, X_train, nneurons, nstims, lambda, zeronodes);
Jnew_all_fit(zeronodes) = 0; % force couplings to zero based on connectivity provided
Jnew_all_fit = reshape(Jnew_all_fit, size(Jnew_all));
time_elapsed = toc(time_elapsed);
fprintf(1, 'parameter estimation in %f seconds \n', time_elapsed );
% add the new coupling matrix to the 3D matrix of Js for each cross validation iteration
all_J(:,:,k) = -Jnew_all_fit; % flip the sign for plotting so that positive couplings indicate sites fire together
% compute log likelihood of the validation data given the model
[loglik] = L_dL_ising(Jnew_all_fit, X_val, nneurons, nstims, zeronodes);
% set log-likelihood for this cross validation iteration
all_logL(k) = loglik;
fprintf(1,'Log-likelihood for %s: %d\n', modelname, loglik);
end