Skip to content

Commit

Permalink
Merge pull request #6 from Jakeway/check_for_obj_get_cache_key
Browse files Browse the repository at this point in the history
Add CacheHelperCacheable Interface; Clean up existing code
  • Loading branch information
Jakeway authored Dec 19, 2018
2 parents c036ed3 + 3ce3fd0 commit 1a1407f
Show file tree
Hide file tree
Showing 5 changed files with 427 additions and 151 deletions.
30 changes: 16 additions & 14 deletions cache_helper/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,32 @@
except ImportError:
from cache_helper.exceptions import CacheHelperException as CacheSetError


from django.core.cache import cache
from django.utils.functional import wraps

from cache_helper import utils
from cache_helper.exceptions import CacheHelperFunctionError


def cached(timeout):
def get_key(*args, **kwargs):
return utils.sanitize_key(utils._cache_key(*args, **kwargs))

def _cached(func, *args):
func_type = utils._func_type(func)
def _cached(func):
func_type = utils.get_function_type(func)
if func_type is None:
raise CacheHelperFunctionError('Error determining function type of {func}'.format(func=func))

func_name = utils.get_function_name(func)
if func_name is None:
raise CacheHelperFunctionError('Error determining function name of {func}'.format(func=func))

@wraps(func)
def wrapper(*args, **kwargs):
name = utils._func_info(func, args)
key = get_key(name, func_type, args, kwargs)
function_cache_key = utils.get_function_cache_key(func_type, func_name, args, kwargs)
cache_key = utils.get_hashed_cache_key(function_cache_key)

try:
value = cache.get(key)
value = cache.get(cache_key)
except Exception:
value = None

Expand All @@ -31,16 +38,11 @@ def wrapper(*args, **kwargs):
# But if it fails on an error from the underlying
# cache system, handle it.
try:
cache.set(key, value, timeout)
cache.set(cache_key, value, timeout)
except CacheSetError:
pass

return value

def invalidate(*args, **kwargs):
name = utils._func_info(func, args)
key = get_key(name, func_type, args, kwargs)
cache.delete(key)
wrapper.invalidate = invalidate
return wrapper
return _cached
return _cached
9 changes: 7 additions & 2 deletions cache_helper/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
class CacheHelperException(Exception):
pass
pass


class CacheKeyCreationError(CacheHelperException):
pass
pass


class CacheHelperFunctionError(CacheHelperException):
pass
13 changes: 13 additions & 0 deletions cache_helper/interfaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import abc


class CacheHelperCacheable(abc.ABC):
@abc.abstractmethod
def get_cache_helper_key(self):
"""
For any two objects of the same class which are considered equal in your application,
get_cache_helper_key should return the same key. This key should be unique to all objects
considered equal. This key will be used as a component to the final cache key to get/set
values from the cache. The key should be a string.
"""
pass
129 changes: 62 additions & 67 deletions cache_helper/utils.py
Original file line number Diff line number Diff line change
@@ -1,89 +1,68 @@
import unicodedata
from hashlib import sha256

from django.core.cache import cache
import inspect

from cache_helper import settings
from cache_helper.exceptions import CacheKeyCreationError
from cache_helper.interfaces import CacheHelperCacheable


def get_function_cache_key(func_type, func_name, func_args, func_kwargs):
if func_type in ['method', 'function']:
args_string = _sanitize_args(*func_args, **func_kwargs)
elif func_type == 'class_method':
# In this case, since we are dealing with a class method, the first arg to the function
# will be the class. Since the name of the class and function is already built in to the
# cache key, we can bypass the class variable and instead slice from the first index.
args_string = _sanitize_args(*func_args[1:], **func_kwargs)
key = '{func_name}{args_string}'.format(func_name=func_name, args_string=args_string)
return key

# List of Control Characters not useable by memcached
CONTROL_CHARACTERS = set([chr(i) for i in range(0, 33)])
CONTROL_CHARACTERS.add(chr(127))

def sanitize_key(key, max_length=250):
def get_hashed_cache_key(key):
"""
Truncates key to keep it under memcached char limit. Replaces with hash.
Remove control characters b/c of memcached restriction on control chars.
Given the intermediate key produced by a function call along with its args + kwargs,
performs a sha256 hash on the utf-8 encoded version of the key, and returns the result
"""
key = ''.join([c for c in key if c not in CONTROL_CHARACTERS])
key_length = len(key)
# django memcached backend will, by default, add a prefix. Account for this in max
# key length. '%s:%s:%s'.format()
version_length = len(str(getattr(cache, 'version', '')))
prefix_length = len(settings.CACHE_MIDDLEWARE_KEY_PREFIX)
# +2 for the colons
max_length -= (version_length + prefix_length + 2)
if key_length > max_length:
the_hash = sha256(key.encode('utf-8')).hexdigest()
# sha256 always 64 chars.
key = key[:max_length - 64] + the_hash
return key
key_hash = sha256(key.encode('utf-8', errors='ignore')).hexdigest()
return key_hash


def _sanitize_args(args=[], kwargs={}):
def _sanitize_args(*args, **kwargs):
"""
Creates unicode key from all kwargs/args
-Note: comma separate args in order to prevent poo(1,2), poo(12, None) corner-case collisions...
-Note: comma separate args in order to prevent foo(1,2), foo(12, None) corner-case collisions...
"""
key = ";{0};{1}"
kwargs_key = ""
key = ";{args_key};{kwargs_key}"
args_key = _plumb_collections(args)
kwargs_key = _plumb_collections(kwargs)
return key.format(args_key, kwargs_key)
return key.format(args_key=args_key, kwargs_key=kwargs_key)


def _func_type(func):
argnames = func.__code__.co_varnames[:func.__code__.co_argcount]
if len(argnames) > 0:
if argnames[0] == 'self':
return 'method'
elif argnames[0] == 'cls':
return 'class_method'
return 'function'
def get_function_type(func):
"""
Gets the type of the given function
"""
if 'self' in inspect.getargspec(func).args:
return 'method'
if 'cls' in inspect.getargspec(func).args:
return 'class_method'

if inspect.isfunction(func):
return 'function'

def get_normalized_term(term, dash_replacement=''):
term = str(term)
if isinstance(term, bytes):
term = term.decode('utf-8')
term = unicodedata.normalize('NFKD', term).encode('ascii', 'ignore').decode('utf-8')
term = term.lower()
term = term.strip()
return term
return None


def _func_info(func, args):
func_type = _func_type(func)
lineno = ":%s" % func.__code__.co_firstlineno
def get_function_name(func):
func_type = get_function_type(func)

if func_type == 'function':
name = ".".join([func.__module__, func.__name__]) + lineno
if func_type in ['method', 'class_method', 'function']:
name = '{func_module}.{qualified_name}'\
.format(func_module=func.__module__, qualified_name=func.__qualname__)
return name
elif func_type == 'class_method':
class_name = args[0].__name__
else:
class_name = args[0].__class__.__name__
name = ".".join([func.__module__, class_name, func.__name__]) + lineno
return name

return None

def _cache_key(func_name, func_type, args, kwargs):
if func_type in ['method', 'function']:
args_string = _sanitize_args(args, kwargs)
elif func_type == 'class_method':
args_string = _sanitize_args(args[1:], kwargs)
key = '%s%s' % (func_name, args_string)
return key

def _plumb_collections(input_item):
"""
Expand All @@ -96,18 +75,19 @@ def _plumb_collections(input_item):
if hasattr(input_item, '__iter__'):
if isinstance(input_item, dict):
# Py3k Compatibility nonsense...
remains = [[(k,v) for k, v in input_item.items()].__iter__()]
remains = [[(k, v) for k, v in input_item.items()].__iter__()]
# because dictionary iterators yield tuples, it would appear
# to be 2 levels per dictionary, but that seems unexpected.
level -= 1
else:
remains = [input_item.__iter__()]
else:
return get_normalized_term(input_item)
return _get_object_cache_key(input_item)

while len(remains) > 0:
if settings.MAX_DEPTH is not None and level > settings.MAX_DEPTH:
raise CacheKeyCreationError('Function args or kwargs have too many nested collections for current MAX_DEPTH')
raise CacheKeyCreationError(
'Function args or kwargs have too many nested collections for current MAX_DEPTH')
current_iterator = remains.pop()
level += 1
while True:
Expand All @@ -127,7 +107,8 @@ def _plumb_collections(input_item):
hashed_list = []

for k, v in current_item.items():
hashed_list.append((sha256(str(k).encode('utf-8')).hexdigest(), v))
item_cache_key = _get_object_cache_key(k)
hashed_list.append((sha256(item_cache_key.encode('utf-8')).hexdigest(), v))

hashed_list = sorted(hashed_list, key=lambda t: t[0])
remains.append(current_iterator)
Expand All @@ -139,7 +120,8 @@ def _plumb_collections(input_item):
hashed_list = []

for item in current_item:
hashed_list.append(sha256(str(item).encode('utf-8')).hexdigest())
item_cache_key = _get_object_cache_key(item)
hashed_list.append(sha256(item_cache_key.encode('utf-8')).hexdigest())

hashed_list = sorted(hashed_list)
remains.append(current_iterator)
Expand All @@ -150,10 +132,23 @@ def _plumb_collections(input_item):
remains.append(current_item.__iter__())
break
else:
current_item_string = '{0},'.format(get_normalized_term(current_item))
current_item_string = '{0},'.format(_get_object_cache_key(current_item))
return_list.append(current_item_string)
continue
# trim trailing comma
return_string = ''.join(return_list)
# trim last ',' because it lacks significant meaning.
return return_string[:-1]


def _get_object_cache_key(obj):
"""
Function used to get the individual cache key for objects. Checks if the
object is an instance of CacheHelperCacheable, which means it will have a
get_cache_helper_key function defined for it which will be used as the key.
Otherwise, just uses the string representation of the object.
"""
if isinstance(obj, CacheHelperCacheable):
return obj.get_cache_helper_key()
else:
return str(obj)
Loading

0 comments on commit 1a1407f

Please sign in to comment.