-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimple_cache.py
151 lines (116 loc) · 4.02 KB
/
simple_cache.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import os, json, shutil, time, hashlib
from dataclasses import dataclass
from base64 import urlsafe_b64encode, urlsafe_b64decode
from rid_lib.core import RID
# Utility Functions
def encode_b64(string: str):
return urlsafe_b64encode(
string.encode()).decode().rstrip("=")
def decode_b64(string: str):
return urlsafe_b64decode(
(string + "=" * (-len(string) % 4)).encode()).decode()
def hash_json(data: dict):
json_bytes = json.dumps(data, sort_keys=True).encode()
hash = hashlib.sha256()
hash.update(json_bytes)
return hash.hexdigest()
# Cache Implementation
@dataclass
class CacheObject:
"""Object representing an individual RID cache entry.
A container object for the cached data associated with an RID. It is
returned by the read and write functions of a CacheInterface. It
stores the JSON data associated with an RID object and corresponding
metadata.
"""
data: dict
meta: dict
@classmethod
def from_dict(cls, json_object):
return cls(
json_object.get("data"),
json_object.get("meta")
)
def to_dict(self): return {
"meta": self.meta,
"data": self.data,
}
class CacheInterface:
def __init__(self, directory="cache"):
self.directory = directory
def file_path_to(self, rid: RID):
encoded_rid_str = encode_b64(str(rid))
return f"{self.directory}/{encoded_rid_str}.json"
def write(self, rid: RID, data: dict) -> CacheObject:
"""Writes data (dict) to RID cache with autogenerated metadata.
Returns a CacheObject.
"""
if not os.path.exists(self.directory):
os.makedirs(self.directory)
cache_entry = CacheObject(
data=data,
meta={
"rid": str(rid),
"timestamp": time.time(),
"sha256_hash": hash_json(data),
}
)
with open(self.file_path_to(rid), "w") as f:
json.dump(cache_entry.to_dict(), f, indent=2)
return cache_entry
def exists(self, rid: RID):
return os.path.exists(
self.file_path_to(rid)
)
def read(self, rid: RID):
"""Reads and returns CacheObject from RID cache."""
try:
with open(self.file_path_to(rid), "r") as f:
return CacheObject.from_dict(json.load(f))
except FileNotFoundError:
return None
def read_all_rids(self):
rids = []
for filename in os.listdir(self.directory):
encoded_rid_str = filename.split(".")[0]
rid_str = decode_b64(encoded_rid_str)
rid = RID.from_string(rid_str, allow_prov_ctx=True)
rids.append(rid)
return rids
def delete(self, rid: RID):
"""Deletes RID cache entry and associated files."""
try:
os.remove(self.file_path_to(rid))
except FileNotFoundError:
return
def drop(self):
"""Deletes all RID cache entries."""
try:
shutil.rmtree(self.directory)
except FileNotFoundError:
return
class TransformationCacheInterface:
def __init__(self):
self.file_path = "trans_cache.json"
def _read(self):
try:
with open(self.file_path, "r") as f:
return json.load(f)
except FileNotFoundError:
return {}
def _write(self, data: dict):
with open(self.file_path, "w") as f:
json.dump(data, f, indent=2)
def write(self, rid: RID, other: RID):
data = self._read()
entry = data.get(str(rid))
if not entry: entry = {}
entry[other.context] = str(other)
data[str(rid)] = entry
self._write(data)
def read(self, rid: RID, context: str):
data = self._read()
entry = data.get(str(rid))
if not entry:
return None
return entry.get(context)