-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsample.go
74 lines (64 loc) · 1.97 KB
/
sample.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
package main
import (
"fmt"
"log"
"strings"
"github.com/andybalholm/cascadia"
"golang.org/x/net/html"
)
var feedbackHtml string = `
<section class="feedback-section">
<div class="container">
<div class="row">
<div class="col-md-12 text-center">
<h1 class="single-line">The Customers’ Verdict</h1>
</div>
</div><br>
<div class="row">
<div class="col-md-3">
<div class="feedback-head">
<a href="https://www.google.com/search?q=THE+MARINA+MALL&rlz=1C1CHBD_enIN908IN908&oq=th+&aqs=chrome.1.69i57j69i59l2j69i60l5.2415j0j7&sourceid=chrome&ie=UTF-8#lrd=0x3a525a5ed3d3509d:0x51ba8d5c2f099ebb,1,,," target="_blank">
<img src="http://marinamallchennai.com/wp-content/uploads/2020/09/Googlebusiness.png">
</a>
<p><i class="fa fa-star"></i> <i class="fa fa-star"></i> <i class="fa fa-star"></i> <i class="fa fa-star"></i> <i class="fa fa-star"></i></p><br>
</div>
</div>
</div>
</div>
</section>
`
func Query(n *html.Node, query string) *html.Node {
sel, err := cascadia.Parse(query)
if err != nil {
return &html.Node{}
}
return cascadia.Query(n, sel)
}
func QueryAll(n *html.Node, query string) []*html.Node {
sel, err := cascadia.Parse(query)
if err != nil {
return []*html.Node{}
}
return cascadia.QueryAll(n, sel)
}
func AttrOr(n *html.Node, attrName, or string) string {
for _, a := range n.Attr {
if a.Key == attrName {
return a.Val
}
}
return or
}
// main function
func main() {
doc, err := html.Parse(strings.NewReader(feedbackHtml))
if err != nil {
log.Fatal(err)
}
fmt.Printf("List of URLS:\n\n")
for _, p := range QueryAll(doc, "section.feedback-section") {
reviewUrl := AttrOr(Query(p, "div a "), "href", "")
imageUrl := AttrOr(Query(p, "div a img"), "src", "")
fmt.Println("Review url", reviewUrl, "\n", "Image URl", imageUrl)
}
}