Skip to content

Commit cc1aeb6

Browse files
committed
#61 add gzip
close #61
1 parent c960c2a commit cc1aeb6

2 files changed

Lines changed: 49 additions & 4 deletions

File tree

query.go

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ package htmlquery
55

66
import (
77
"bufio"
8+
"compress/gzip"
9+
"compress/zlib"
810
"fmt"
911
"io"
1012
"net/http"
@@ -88,15 +90,42 @@ func QuerySelectorAll(top *html.Node, selector *xpath.Expr) []*html.Node {
8890
return elems
8991
}
9092

91-
// LoadURL loads the HTML document from the specified URL.
93+
// LoadURL loads the HTML document from the specified URL. Default enabling gzip on a HTTP request.
9294
func LoadURL(url string) (*html.Node, error) {
93-
resp, err := http.Get(url)
95+
req, err := http.NewRequest("GET", url, nil)
9496
if err != nil {
9597
return nil, err
9698
}
97-
defer resp.Body.Close()
99+
// Enable gzip compression.
100+
req.Header.Add("Accept-Encoding", "gzip")
101+
resp, err := http.DefaultClient.Do(req)
102+
if err != nil {
103+
return nil, err
104+
}
105+
var reader io.ReadCloser
106+
107+
defer func() {
108+
if reader != nil {
109+
reader.Close()
110+
}
111+
}()
112+
113+
switch resp.Header.Get("Content-Encoding") {
114+
case "gzip":
115+
reader, err = gzip.NewReader(resp.Body)
116+
if err != nil {
117+
return nil, err
118+
}
119+
case "deflate":
120+
reader, err = zlib.NewReader(resp.Body)
121+
if err != nil {
122+
return nil, err
123+
}
124+
default:
125+
reader = resp.Body
126+
}
98127

99-
r, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
128+
r, err := charset.NewReader(reader, resp.Header.Get("Content-Type"))
100129
if err != nil {
101130
return nil, err
102131
}

query_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package htmlquery
22

33
import (
4+
"compress/gzip"
45
"fmt"
56
"io/ioutil"
67
"net/http"
@@ -80,6 +81,21 @@ func TestLoadURL(t *testing.T) {
8081
}
8182
}
8283

84+
func TestLoadURLWithGzipResponse(t *testing.T) {
85+
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
86+
w.Header().Add("Content-Encoding", "gzip")
87+
gz := gzip.NewWriter(w)
88+
defer gz.Close()
89+
fmt.Fprint(gz, htmlSample)
90+
}))
91+
defer ts.Close()
92+
93+
_, err := LoadURL(ts.URL)
94+
if err != nil {
95+
t.Fatal(err)
96+
}
97+
}
98+
8399
func TestLoadDoc(t *testing.T) {
84100
tempHTMLdoc, err := ioutil.TempFile("", "sample_*.html")
85101
if err != nil {

0 commit comments

Comments
 (0)