-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwalker.py
284 lines (243 loc) · 12.6 KB
/
walker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
"""
This file contains the main functionality of the software and is used to
achieve two primary goals and is divided into two sections
1. Approximate the walker density as a function of time using Monte Carlo
simulations
2. Determine the steady state solution of a random walk on a network using the
methodology developed in our paper
"""
from operator import itemgetter
from scipy.integrate import quad # Use quadrature integration
import numpy as np # General mathematics
import scipy.sparse as sparse # Sparse representation of matrices necessary for larger networks
import scipy.sparse.linalg as linalg # Access to eigenvectors and eigenvalues
"""
Section 1: Monte Carlo simulations
The Monte-Carlo simulations are implemented as follows
1. A large number of random walkers is allowed to make steps on a given network
2. The walker density is averaged among the walkers
The details of the Monte-Carlo simulation are explained in the appendix of our
paper.
"""
def make_step(G, origin, wtd='wtd'):
"""
This function assumes that a walker is located on node `origin` of a network
`G`. Furthermore, it assumes that all edges originating from `origin` have
a data attribute `wtd` which is an instance of a waiting time distribution.
This function draws a sample waiting time from all edges originating from
`origin` and chooses the edge that gives the smallest waiting time.
This function returns a tuple (`delta`, `neighbour`), where
- `delta` is the length of time before the step was executed
- `neighbour` is the neighbour that the step was made to from `origin`
"""
# Optain a list of tuples (time, neighbour)
options = [(data[wtd].rvs(), neighbour) for neighbour, data in G[origin].iteritems()]
# Select the neighbour with the smallest waiting time
# The argument `key = itemgetter(0)` ensures that the waiting times are compared
return min(options, key=itemgetter(0))
def make_steps(G, origin, maxtime, wtd='wtd'):
"""
This function assumes that a walker is located on node `origin` of a network
`G`. Furthermore, it assumes that all edges have a data attribute `wtd`
which is an instance of a waiting time distribution.
This function executes steps starting at `origin` until the time elapsed
exceeds the argument `maxtime`. Only the elapsed time is constrained. The
number of steps executed is NOT.
This function returns a list of tuples (`time`, `neighbour`), where
- `time` is the time at which the step was executed
- `neighbour` is the neighbour that the step was made to
This function is an implementation of Algorithm 1 in Appendix 1 on page
9 of our paper.
"""
time = 0 # Set the current time
current = origin # Start with the origin
steps = [(time, current)] # Initialise the list of nodes
while time <= maxtime:
delta, node = make_step(G, current, wtd) # Make one step
time += delta # Update the time...
current = node # ...and the node
steps.append((time, current)) # Extend the step list
return steps # Return the list of steps
def steps2probability(steps, delta, bins):
"""
This function takes a sequence of steps and computes an array representing
the probability to find a walker on a given node in a range of time
intervals. The time intervals are uniformly spaced.
`steps` is the sequence of step tuples obtained from, e.g. calling `make_steps`.
`delta` is the width of a time step.
`bins` is the number of bins.
Hence, the array of probabilities corresponds to a time span
$[0, `delta` * `bins`]$.
This function returns a dictionary keyed by node. The values are arrays of
length `bins` whose $i^{th}% element represents the probability to find the
walker on the associated node in the time interval
$[i * `delta`, (i + 1) * `delta`]$.
This function is an implementation of the algorithm discussed in appendix
2 on page 10 of our paper.
"""
# The ith element of the vector associated with each node shall represent
# the probability to find the walker at the respective node in the time
# interval [i, i + 1] * delta
probabilities = {} # Declare a dictionary of probabilities
# Consider all transitions
for (t_i, _), (t_j, j) in zip(steps[1:], steps):
p = probabilities.setdefault(j, np.zeros(bins)) # Get a default probability
lower = int(t_j / delta) # Index of the lowest bin involved
upper = int(t_i / delta) # Index of the highest bin involved
# Did the step happen in the same bin?
if lower == upper:
frac = (t_i - t_j) / delta
p[lower] += frac
else:
# The fractional time spent in the lower bin is given by
# [(lower + 1) * delta - t_j] / delta and simplifying gives
lowerfrac = lower + 1 - t_j / delta
p[lower] += lowerfrac
# The fractional time spent in the upper bin is given by
# [t_i - upper * delta] / delta and simplifying gives
upperfrac = t_i / delta - upper
if upper < bins:
p[upper] += upperfrac
# The number of bins between the lower and upper bins are
span = upper - lower - 1
# Fill these with ones if there are bins inbetween
if span > 0:
p[lower + 1 : lower + 1 + span] = 1
return probabilities
def probability_moments(probability, bins, run=0, moments=None):
"""
This function calculates the mean and standard deviation of the probability
to find a walker on a given node. It does so iteratively such that the
results of a Monte-Carlo simulation can be discarded after each simulation
is completed.
`probability` is the dictionary of probabilities obtained from `steps2probability`
`bins` is the number of bins passed to `steps2probability`
`run` is the number of iterations (not to be used explicitly).
`moments` is a dictionary to keep track of moments (not to be used explicitly).
This function returns a dictionary keyed by node. The value is a tuple of
arrays. The first element is an array of means $x$ and the second array is the
mean of $x^2$, NOT the variance.
"""
z = np.zeros(bins) # Create a default zero array
probability = dict(probability)
if moments == None: moments = {} # Create empty dictionaries for the moments
for node, (mean, mean2) in moments.iteritems(): # Go over each node part of the mean already
p = probability.pop(node, z) # Get the probability
# Calculate the mean, mean square and update iteratively
mean = (run * mean + p) / (run + 1)
mean2 = (run * mean2 + p ** 2) / (run + 1)
moments[node] = (mean, mean2)
for node, p in probability.iteritems(): # Consider nodes that are not part of the mean already
moments[node] = (p / (run + 1), p ** 2 / (run + 1))
return moments
def walk(G, origin, bins, delta, runs, wtd='wtd', debug=False):
"""
This function is a convenience function which calculates probability moments
for a walker starting on node `origin` of the network `G`. The maximal
simulation time is determined by `bins`*`delta`. The behaviour of the walker
is simulated `runs` times.
This function assumes that each edge of the network has a data attribute
`wtd` which is an instance of a probability distribution.
`debug` is a flag which results in the run number being printed if set to True.
This function returns a dictionary keyed by node. The value is a tuple of
arrays. The first element is an array of means $x$ and the second array is the
mean of $x^2$, NOT the variance.
"""
maxtime = bins * delta # The maximum time to run the simulation up to
moments = {} # The moments of the probability distributions
for run in xrange(runs):
steps = make_steps(G, origin, maxtime, wtd) # Make enough steps
probability = steps2probability(steps, delta, bins) # Get the pdf
moments = probability_moments(probability, bins, run, moments)
if debug:
print run + 1 # , np.sum(probability.values()) / bins, \
# np.sum([x for x,_ in moments.values()], axis = 0)
return moments # Return the moments
"""
Section 2: Steady state solutions
"""
def steady_state(ETM, resting_time):
"""
This function calculates at most `k` steady state solutions of
a random walk on a network with effective transition matrix
`ETM` and mean resting time `resting_time`.
Note that multiple distinct steady state solutions can exist
if the network is not connected.
This function returns a list of at most `k` tuples of the form
(`eigenvalue`, `vector`), where `eigenvalue` is
the eigenvalue associated with `vector`. Steady state solutions
will have $`eigenvalue`\approx 1$ and will be represented by
`vector`. Note that the approximate equality is a result of
numerical errors.
"""
# Get the eigenvectors as discussed in section III on page 5 of our paper
evalues, evectors = linalg.eigs(ETM, k=1, which='LM')
results = []
for i in xrange(len(evalues)):
evalue = evalues[i] # Obtain the ith eigenvalue and eigenvector
evector = evectors[:, i]
# Multiply by the resting times as defined in Eq. (27) on page 6
p = evector * resting_time
p = p / sum(p) # Normalise the solutions
results.append((evalue, p)) # Add to the list of possible solutions
results.sort(reverse=True) # Order the results by largest eigenvalue
return results
def ETM_rest_uniform(G, wtd, max_int=np.inf, cache=None):
"""
This function does the same as `ETM_rest` but assumes that the
WTDs of all edges are identical and equal to `wtd`.
`cache` is a dictionary that is used to speed of the calculation
by caching results. If cache is `None` the functionality is disabled.
"""
n = G.number_of_nodes()
ETM = sparse.lil_matrix((n, n)) # Set up a sparse matrix
resting_time = np.zeros(n) # Set up the resting times
temp = np.zeros(n)
for node in G: # Go over each node
neighbours = G.neighbors(node) # Get all the neighbours
n_neighbours = len(neighbours) # Get the number of neighbours
for neighbour in neighbours:
temp[node] = ETM[neighbour, node] = 1. / n_neighbours
# Check the cache
if cache is not None and n_neighbours in cache:
resting_time[node] = cache[n_neighbours]
else:
# Define the phi element
phi = lambda t: (1 - wtd.cdf(t)) ** n_neighbours
# Calculate the resting time
integral = quad(phi, 0, max_int)[0]
resting_time[node] = integral
# Save it to the cache if desired
if cache is not None: cache[n_neighbours] = integral
return ETM, resting_time
def ETM_rest(G, wtd='wtd', max_int=np.inf, debug=False):
"""
This function calculates the matrix $\mathbb{T}$ and the mean resting times
for a network `G`. It assumes that each edge of the network has a data
attribute `wtd` which is a probability distribution.
In principle, the calculation requires the evaluation of integrals over the
domain $[0, \infty]$. The parameter `max_int` can be used to set a fixed
upper limit to the integration domain if desired.
The flag `debugs` indicates whether to print debug information.
This function returns a tuple ($\mathbb{T}$, mean resting times).
"""
n = G.number_of_nodes() # The number of nodes
ETM = sparse.lil_matrix((n, n)) # Create the effective transition matrix
resting_time = np.zeros(n) # Create a vector of resting times
for node in G:
edges = G[node].items()
for neighbour, data in edges:
# Define the T_{neighbour, node} matrix element
# as in Eq. (2) on page 3 of our paper
T = lambda t: data[wtd].pdf(t) * reduce(lambda a, b: a * b,
[1 - data2[wtd].cdf(t) for neighbour2, data2 in edges if neighbour2 != neighbour], 1)
# Integrate to get the effective transition matrix as in
# Eq. (26) on page 5 of our paper
int1 = ETM[neighbour, node] = quad(T, 0, max_int)[0]
# Carry out the integral to find resting times as in the second column
# of page 5
int2 = quad(lambda t: t * T(t), 0, max_int)[0]
# Add to the resting time on the node
resting_time[node] += int2
if debug: print node, int1, resting_time[node]
return ETM, resting_time