-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathingredients.py
100 lines (78 loc) · 3.29 KB
/
ingredients.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os
import re
import itertools
from collections import defaultdict
import numpy as np
from toolbox.strings import remove_range
from toolbox.io import UnicodeDictReader
from toolbox.functions import compose
def ingredient_iterator(directory):
directory = os.path.abspath(os.path.expanduser(directory))
for root, dirs, files in os.walk(directory):
for f in files:
f = os.path.join(root, f)
data = np.load(f)['arr_0'][()]
ingredients = data['ingredients']
for ingredient in ingredients:
yield ingredient
def get_ingredients(directory):
ingredients = []
directory = os.path.abspath(os.path.expanduser(directory))
for root, dirs, files in os.walk(directory):
for f in files:
f = os.path.join(root, f)
data = np.load(f)['arr_0'][()]
ingredients.extend(data['ingredients'])
return ingredients
class StandardizedIngredients(object):
"""
Lists the standardized ingredients of FooDB.
"""
aliases = re.compile(r'(\(.*?\))')
def __init__(self, filename):
# One-to-one
self._id_maps_details = defaultdict(list)
# Many-to-one
self._ingredient_maps_id = {}
with open(filename) as fp:
reader = UnicodeDictReader(fp, encoding='Windows-1252')
for details in reader:
if not details or not details['id'].isdigit():
continue
name = details['name']
scanner = self.aliases.scanner(name)
m = scanner.search()
if m: # Has alias(es)
aliases = m.group(1)[1:-1].split(',')
common_name = remove_range(name, m.start(), m.end())
aliases.append(common_name)
else: # Has no aliases
aliases = [name]
clean = compose(unicode.lower, unicode.strip)
# Map each alias to an id (many-to-one)
for alias in itertools.imap(clean, aliases):
# NOTE: this is a VERY LAZY fix. There are multiple
# ingredients with (almost) the same name and we're just
# storing the first one that pops up (for now).
if alias in self._ingredient_maps_id:
continue
# This assertion fails!
#assert alias not in self._ingredient_maps_id, "Ingredient %s already in mapping" % alias
self._ingredient_maps_id[alias] = details['id']
# Map each id to the corresponding details
self._id_maps_details[details['id']] = details
def ingredients(self):
"""
Returns ingredients and aliasses, making no distiction between the two.
"""
return self._ingredient_maps_id.keys()
def ingredient_mappings(self):
return [(self[ingredient]['name'], ingredient) for ingredient in self.ingredients()]
def __getitem__(self, name):
if isinstance(name, str) or isinstance(name, unicode):
id_ = self._ingredient_maps_id[name]
return self._id_maps_details[id_]
elif isinstance(name, int):
return self._id_maps_details[name]
else:
return None