-
Notifications
You must be signed in to change notification settings - Fork 144
/
Copy pathwebanalyze_test.go
90 lines (68 loc) · 1.92 KB
/
webanalyze_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
package webanalyze
import (
"fmt"
"net/url"
"strings"
"testing"
"github.com/PuerkitoBio/goquery"
)
func TestParseLinks(t *testing.T) {
crawlData := `
<html><body>
<a href="./foo.html">Relative Link 1</a>
<a href="https://google.com">google.com</a>
<a href="https://robinverton.de">robinverton.de</a>
<a href="http://127.0.0.1/foobar.html">Same Host</a>
</body></html>
`
doc, err := goquery.NewDocumentFromReader(strings.NewReader(crawlData))
if err != nil {
t.Fatalf("Invalid testing document")
}
u, _ := url.Parse("http://127.0.0.1")
links := parseLinks(doc, u, false)
if len(links) != 2 {
t.Fatalf("Invalid number of links returned")
}
if links[0] != "http://127.0.0.1/foo.html" {
t.Fatalf("Invalid link parsed")
}
if links[1] != "http://127.0.0.1/foobar.html" {
t.Fatalf("Invalid link parsed")
}
return
}
func TestParseLinksSubdomain(t *testing.T) {
crawlData := `
<html><body>
<a href="https://example.com">google.com</a>
<a href="https://foo.example.com">robinverton.de</a>
<a href="https://bar.foo.example.com">robinverton.de</a>
<a href="http://127.0.0.1/foobar.html">Same Host</a>
</body></html>
`
doc, err := goquery.NewDocumentFromReader(strings.NewReader(crawlData))
if err != nil {
t.Fatalf("Invalid testing document")
}
u, _ := url.Parse("http://example.com")
if links := parseLinks(doc, u, false); len(links) != 0 {
fmt.Println(links)
t.Fatalf("Invalid number of subdomain crawl returned")
}
if len(parseLinks(doc, u, true)) != 2 {
t.Fatalf("Invalid number of subdomain crawl returned")
}
return
}
func TestIsSubdomain(t *testing.T) {
u1, _ := url.Parse("http://example.com")
u2, _ := url.Parse("http://sub.example.com")
u3, _ := url.Parse("http://sub1.sub2.example.com")
if !isSubdomain(u1, u2) {
t.Fatalf("%v is not a subdomain of %v (but should be)", u2, u1)
}
if !isSubdomain(u1, u3) {
t.Fatalf("%v is not a subdomain of %v (but should be)", u2, u1)
}
}