-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathallrecipes.go
233 lines (220 loc) · 7.28 KB
/
allrecipes.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
package allrecipes
import (
"errors"
"fmt"
"io"
"net/http"
"net/url"
"path"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
type Recipe struct {
RecipeID string `json:"recipe_id"`
Author string `json:"author"`
SourceURL string `json:"source_url"`
Name string `json:"name"`
ImageURL string `json:"image_url"`
Description string `json:"description"`
Ingredients []string `json:"ingredients"`
Directions []string `json:"directions"`
Footnotes []string `json:"footnotes"`
}
func delNewLine(s string) string {
return strings.Replace(
strings.Replace(s, "\n", "", -1),
"\r", "", -1)
}
func checkAttr(attr []html.Attribute, key, val string) bool {
for _, a := range attr {
if a.Key == key && a.Val == val {
return true
}
}
return false
}
func getAttrVal(attr []html.Attribute, key string) string {
for _, a := range attr {
if a.Key == key {
return a.Val
}
}
return ""
}
func GetRecipe(recipeID string) (Recipe, error) {
// get recipe id from url
u, err := url.Parse("https://www.allrecipes.com")
if err != nil {
return Recipe{}, fmt.Errorf("parse error %s", err)
}
u.Path = path.Join("recipe", recipeID)
// parse html
resp, err := http.Get(u.String())
if err != nil {
return Recipe{}, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK /*200*/ {
return Recipe{}, fmt.Errorf("allrecipes.com responded with: %s", resp.Status)
}
ret := Recipe{RecipeID: recipeID, SourceURL: resp.Request.URL.String()}
z := html.NewTokenizer(resp.Body)
endloop:
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
if z.Err() == io.EOF {
break endloop
}
return Recipe{}, fmt.Errorf("main parser loop error: %s", z.Err())
case html.StartTagToken:
token := z.Token()
if token.DataAtom == atom.H1 &&
checkAttr(token.Attr, "itemprop", "name") {
// <h1 class="recipe-summary__h1" itemprop="name">Spaghetti Pie I</h1>
tt := z.Next()
switch tt {
case html.TextToken:
token = z.Token()
ret.Name = delNewLine(html.UnescapeString(token.Data))
// fmt.Println("Name>", ret.Name)
case html.ErrorToken:
return Recipe{}, fmt.Errorf("name text err: %s", z.Err())
default:
return Recipe{}, errors.New("allrecipes parser: recipe name text was expected here")
}
} else if token.DataAtom == atom.Span &&
checkAttr(token.Attr, "itemprop", "author") {
// <span class="submitter__name" itemprop="author">Kimberley</span>
tt := z.Next()
switch tt {
case html.TextToken:
token = z.Token()
ret.Author = delNewLine(html.UnescapeString(token.Data))
// fmt.Println("Author>", ret.Author)
case html.ErrorToken:
return Recipe{}, fmt.Errorf("author text err: %s", z.Err())
default:
return Recipe{}, errors.New("allrecipes parser: author name text was expected here")
}
} else if token.DataAtom == atom.Div &&
checkAttr(token.Attr, "itemprop", "description") {
// <div class="submitter__description" itemprop="description"> "Family favorite. Serve with lemon wedges."</div>
tt := z.Next()
switch tt {
case html.TextToken:
token = z.Token()
ret.Description = delNewLine(html.UnescapeString(token.Data))
// fmt.Println("Description>", ret.Description)
case html.ErrorToken:
return Recipe{}, fmt.Errorf("description text err: %s", z.Err())
default:
return Recipe{}, errors.New("allrecipes parser: description text was expected here")
}
} else if token.DataAtom == atom.Span &&
checkAttr(token.Attr, "itemprop", "ingredients") {
// did we hit one of the ingredients
// <span class="recipe-ingred_txt added" ... itemprop="ingredients">
tt := z.Next()
// next token should be text of the ingredient span
switch tt {
case html.TextToken:
token = z.Token()
ret.Ingredients = append(ret.Ingredients,
delNewLine(html.UnescapeString(token.Data)))
// fmt.Println("Ingredient>", ret.Ingredients[len(ret.Ingredients)-1])
case html.ErrorToken:
return Recipe{}, fmt.Errorf("ingredient text err: %s", z.Err())
default:
return Recipe{}, errors.New("allrecipes parser: ingredient text was expected here")
}
} else if token.DataAtom == atom.Span &&
checkAttr(token.Attr, "class", "recipe-directions__list--item") &&
!checkAttr(token.Attr, "ng-bind", "model.itemNote") {
// did we hit one of the instructions
// <span class="recipe-directions__list--item" ...>
tt := z.Next()
// next token should be text of the instruction span
switch tt {
case html.TextToken:
token = z.Token()
ret.Directions = append(ret.Directions,
delNewLine(html.UnescapeString(token.Data)))
// fmt.Println("Instructioni>", ret.Directions[len(ret.Directions)-1])
case html.ErrorToken:
return Recipe{}, fmt.Errorf("direction text err: %s", z.Err())
default:
return Recipe{}, errors.New("allrecipes parser: direction text was expected here")
}
} else if token.DataAtom == atom.Span &&
checkAttr(token.Attr, "class", "recipe-footnotes__header") {
// did we hit footnotes
// <span class="recipe-footnotes__header">Nutrition:</span>
tt := z.Next()
// next token should be text of the footnotes title
about := ""
switch tt {
case html.TextToken:
token = z.Token()
about = html.UnescapeString(token.Data)
// fmt.Println("footnotes about:", about)
endloopFootnotes:
for {
// fast forward to next "Li" token
tt := z.Next()
switch tt {
case html.StartTagToken:
token = z.Token()
if token.DataAtom == atom.Li {
tt := z.Next()
// next token should be text of the instruction <li>
switch tt {
case html.TextToken:
token = z.Token()
ret.Footnotes = append(ret.Footnotes,
about+" "+delNewLine(html.UnescapeString(token.Data)))
// fmt.Println("Footnotes>", ret.Footnotes[len(ret.Footnotes)-1])
break endloopFootnotes
case html.ErrorToken:
return Recipe{}, fmt.Errorf("footnotes text err: %s", z.Err())
default:
return Recipe{}, errors.New("allrecipes parser: footnotes text was expected here")
}
}
case html.ErrorToken:
return Recipe{}, fmt.Errorf("footnotest <li> err: %s", z.Err())
}
}
case html.ErrorToken:
return Recipe{}, fmt.Errorf("footnotest title text err: %s", z.Err())
default:
return Recipe{}, errors.New("allrecipes parser: footnotes title text was expected here")
}
}
case html.SelfClosingTagToken:
token := z.Token()
if token.DataAtom == atom.Meta &&
checkAttr(token.Attr, "property", "og:image") {
// <meta property="og:image" content="https://images.media-allrecipes.com/userphotos/560x315/726090.jpg" />
imgURL := getAttrVal(token.Attr, "content")
// fmt.Println("Image>", imgURL)
ret.ImageURL = imgURL
}
}
}
return ret, nil
}
/*
func main() {
//url := "http://allrecipes.com/recipe/231495/texas-boiled-beer-shrimp/"
//url := "http://allrecipes.com/recipe/11772/spaghetti-pie-i/?clickId=right%20rail0&internalSource=rr_feed_recipe_sb&referringId=231495%20referringContentType%3Drecipe"
recipe, err := GetRecipe("231495")
if err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err) // TODO stderr
return
}
fmt.Printf("\nrecipe: %+v\n", recipe)
}
*/