-
Notifications
You must be signed in to change notification settings - Fork 4.2k
/
cache.py
156 lines (116 loc) · 4.65 KB
/
cache.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
########################################################################
#
# Cache-wrapper for a function or class.
#
# Save the result of calling a function or creating an object-instance
# to harddisk. This is used to persist the data so it can be reloaded
# very quickly and easily.
#
# Implemented in Python 3.5
#
########################################################################
#
# This file is part of the TensorFlow Tutorials available at:
#
# https://github.com/Hvass-Labs/TensorFlow-Tutorials
#
# Published under the MIT License. See the file LICENSE for details.
#
# Copyright 2016 by Magnus Erik Hvass Pedersen
#
########################################################################
import os
import pickle
import numpy as np
########################################################################
def cache(cache_path, fn, *args, **kwargs):
"""
Cache-wrapper for a function or class. If the cache-file exists
then the data is reloaded and returned, otherwise the function
is called and the result is saved to cache. The fn-argument can
also be a class instead, in which case an object-instance is
created and saved to the cache-file.
:param cache_path:
File-path for the cache-file.
:param fn:
Function or class to be called.
:param args:
Arguments to the function or class-init.
:param kwargs:
Keyword arguments to the function or class-init.
:return:
The result of calling the function or creating the object-instance.
"""
# If the cache-file exists.
if os.path.exists(cache_path):
# Load the cached data from the file.
with open(cache_path, mode='rb') as file:
obj = pickle.load(file)
print("- Data loaded from cache-file: " + cache_path)
else:
# The cache-file does not exist.
# Call the function / class-init with the supplied arguments.
obj = fn(*args, **kwargs)
# Save the data to a cache-file.
with open(cache_path, mode='wb') as file:
pickle.dump(obj, file)
print("- Data saved to cache-file: " + cache_path)
return obj
########################################################################
def convert_numpy2pickle(in_path, out_path):
"""
Convert a numpy-file to pickle-file.
The first version of the cache-function used numpy for saving the data.
Instead of re-calculating all the data, you can just convert the
cache-file using this function.
:param in_path:
Input file in numpy-format written using numpy.save().
:param out_path:
Output file written as a pickle-file.
:return:
Nothing.
"""
# Load the data using numpy.
data = np.load(in_path)
# Save the data using pickle.
with open(out_path, mode='wb') as file:
pickle.dump(data, file)
########################################################################
if __name__ == '__main__':
# This is a short example of using a cache-file.
# This is the function that will only get called if the result
# is not already saved in the cache-file. This would normally
# be a function that takes a long time to compute, or if you
# need persistent data for some other reason.
def expensive_function(a, b):
return a * b
print('Computing expensive_function() ...')
# Either load the result from a cache-file if it already exists,
# otherwise calculate expensive_function(a=123, b=456) and
# save the result to the cache-file for next time.
result = cache(cache_path='cache_expensive_function.pkl',
fn=expensive_function, a=123, b=456)
print('result =', result)
# Newline.
print()
# This is another example which saves an object to a cache-file.
# We want to cache an object-instance of this class.
# The motivation is to do an expensive computation only once,
# or if we need to persist the data for some other reason.
class ExpensiveClass:
def __init__(self, c, d):
self.c = c
self.d = d
self.result = c * d
def print_result(self):
print('c =', self.c)
print('d =', self.d)
print('result = c * d =', self.result)
print('Creating object from ExpensiveClass() ...')
# Either load the object from a cache-file if it already exists,
# otherwise make an object-instance ExpensiveClass(c=123, d=456)
# and save the object to the cache-file for the next time.
obj = cache(cache_path='cache_ExpensiveClass.pkl',
fn=ExpensiveClass, c=123, d=456)
obj.print_result()
########################################################################