Skip to content

Commit

Permalink
Add tests for automated .gz decompression
Browse files Browse the repository at this point in the history
This is not about Content-Encoding: gzip, which is undone
automatically by Go net/http.

This is about files that are compressed as is, e.g. /sitemap.xml.gz,
without Content-Encoding: gzip being sent.
  • Loading branch information
WGH- committed Oct 16, 2023
1 parent c853cea commit 1b000c4
Showing 1 changed file with 25 additions and 9 deletions.
34 changes: 25 additions & 9 deletions colly_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package colly
import (
"bufio"
"bytes"
"compress/gzip"
"context"
"errors"
"fmt"
Expand All @@ -43,6 +44,13 @@ Disallow: /disallowed
Disallow: /allowed*q=
`

const testXML = `<?xml version="1.0" encoding="UTF-8"?>
<page>
<title>Test Page</title>
<paragraph type="description">This is a test page</paragraph>
<paragraph type="description">This is a test paragraph</paragraph>
</page>`

func newUnstartedTestServer() *httptest.Server {
mux := http.NewServeMux()

Expand All @@ -69,13 +77,13 @@ func newUnstartedTestServer() *httptest.Server {

mux.HandleFunc("/xml", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/xml")
w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<page>
<title>Test Page</title>
<paragraph type="description">This is a test page</paragraph>
<paragraph type="description">This is a test paragraph</paragraph>
</page>
`))
w.Write([]byte(testXML))
})

mux.HandleFunc("/test.xml.gz", func(w http.ResponseWriter, r *http.Request) {
ww := gzip.NewWriter(w)
defer ww.Close()
ww.Write([]byte(testXML))
})

mux.HandleFunc("/login", func(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -1417,7 +1425,7 @@ func TestCollectorOnXMLWithHtml(t *testing.T) {
}
}

func TestCollectorOnXMLWithXML(t *testing.T) {
func testCollectorOnXMLWithXML(t *testing.T, path string) {
ts := newTestServer()
defer ts.Close()

Expand Down Expand Up @@ -1450,7 +1458,7 @@ func TestCollectorOnXMLWithXML(t *testing.T) {
}
})

c.Visit(ts.URL + "/xml")
c.Visit(ts.URL + path)

if !titleCallbackCalled {
t.Error("Failed to call OnXML callback for <title> tag")
Expand All @@ -1461,6 +1469,14 @@ func TestCollectorOnXMLWithXML(t *testing.T) {
}
}

func TestCollectorOnXMLWithXML(t *testing.T) {
testCollectorOnXMLWithXML(t, "/xml")
}

func TestCollectorOnXMLWithXMLCompressed(t *testing.T) {
testCollectorOnXMLWithXML(t, "/test.xml.gz")
}

func TestCollectorVisitWithTrace(t *testing.T) {
ts := newTestServer()
defer ts.Close()
Expand Down

0 comments on commit 1b000c4

Please sign in to comment.