File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -5,6 +5,8 @@ package htmlquery
55
66import (
77 "bufio"
8+ "compress/gzip"
9+ "compress/zlib"
810 "fmt"
911 "io"
1012 "net/http"
@@ -88,15 +90,42 @@ func QuerySelectorAll(top *html.Node, selector *xpath.Expr) []*html.Node {
8890 return elems
8991}
9092
91- // LoadURL loads the HTML document from the specified URL.
93+ // LoadURL loads the HTML document from the specified URL. Default enabling gzip on a HTTP request.
9294func LoadURL (url string ) (* html.Node , error ) {
93- resp , err := http .Get ( url )
95+ req , err := http .NewRequest ( "GET" , url , nil )
9496 if err != nil {
9597 return nil , err
9698 }
97- defer resp .Body .Close ()
99+ // Enable gzip compression.
100+ req .Header .Add ("Accept-Encoding" , "gzip" )
101+ resp , err := http .DefaultClient .Do (req )
102+ if err != nil {
103+ return nil , err
104+ }
105+ var reader io.ReadCloser
106+
107+ defer func () {
108+ if reader != nil {
109+ reader .Close ()
110+ }
111+ }()
112+
113+ switch resp .Header .Get ("Content-Encoding" ) {
114+ case "gzip" :
115+ reader , err = gzip .NewReader (resp .Body )
116+ if err != nil {
117+ return nil , err
118+ }
119+ case "deflate" :
120+ reader , err = zlib .NewReader (resp .Body )
121+ if err != nil {
122+ return nil , err
123+ }
124+ default :
125+ reader = resp .Body
126+ }
98127
99- r , err := charset .NewReader (resp . Body , resp .Header .Get ("Content-Type" ))
128+ r , err := charset .NewReader (reader , resp .Header .Get ("Content-Type" ))
100129 if err != nil {
101130 return nil , err
102131 }
Original file line number Diff line number Diff line change 11package htmlquery
22
33import (
4+ "compress/gzip"
45 "fmt"
56 "io/ioutil"
67 "net/http"
@@ -80,6 +81,21 @@ func TestLoadURL(t *testing.T) {
8081 }
8182}
8283
84+ func TestLoadURLWithGzipResponse (t * testing.T ) {
85+ ts := httptest .NewServer (http .HandlerFunc (func (w http.ResponseWriter , r * http.Request ) {
86+ w .Header ().Add ("Content-Encoding" , "gzip" )
87+ gz := gzip .NewWriter (w )
88+ defer gz .Close ()
89+ fmt .Fprint (gz , htmlSample )
90+ }))
91+ defer ts .Close ()
92+
93+ _ , err := LoadURL (ts .URL )
94+ if err != nil {
95+ t .Fatal (err )
96+ }
97+ }
98+
8399func TestLoadDoc (t * testing.T ) {
84100 tempHTMLdoc , err := ioutil .TempFile ("" , "sample_*.html" )
85101 if err != nil {
You can’t perform that action at this time.
0 commit comments