Skip to content

Commit 506f541

Browse files
authored
Merge pull request #18 from carloseabade/feat/keep-spaces-as-they-are
feat: add option to keep spaces as they are
2 parents e0473e1 + 7bf1204 commit 506f541

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

html2text.go

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,14 @@ type options struct {
2525
lbr string
2626
linksInnerText bool
2727
listPrefix string
28+
keepSpaces bool
2829
}
2930

3031
func newOptions() *options {
3132
// apply defaults
3233
return &options{
3334
lbr: WIN_LBR,
35+
keepSpaces: false,
3436
}
3537
}
3638

@@ -64,6 +66,13 @@ func WithListSupport() Option {
6466
return WithListSupportPrefix(" - ")
6567
}
6668

69+
// WithKeepSpaces keep spaces as they are
70+
func WithKeepSpaces() Option {
71+
return func(o *options) {
72+
o.keepSpaces = true
73+
}
74+
}
75+
6776
func parseHTMLEntity(entName string) (string, bool) {
6877
if r, ok := entity[entName]; ok {
6978
return string(r), true
@@ -193,15 +202,21 @@ func HTML2TextWithOptions(html string, reqOpts ...Option) string {
193202
}
194203

195204
switch {
196-
// skip new lines and spaces adding a single space if not there yet
197-
case r <= 0xD, r == 0x85, r == 0x2028, r == 0x2029, // new lines
198-
r == ' ', r >= 0x2008 && r <= 0x200B: // spaces
205+
// skip new lines adding a single space if not there yet
206+
case r <= 0xD, r == 0x85, r == 0x2028, r == 0x2029: // new lines
199207
if shouldOutput && badTagStackDepth == 0 && !inEnt {
200208
//outBuf.WriteString(fmt.Sprintf("{DBG r:%c, inEnt:%t, tag:%s}", r, inEnt, html[tagStart:i]))
201209
writeSpace(outBuf)
202210
}
203211
continue
204212

213+
// skip spaces adding a single space if not there yet
214+
case r == ' ', r >= 0x2008 && r <= 0x200B: // spaces
215+
if !opts.keepSpaces && shouldOutput && badTagStackDepth == 0 && !inEnt {
216+
writeSpace(outBuf)
217+
continue
218+
}
219+
205220
case r == ';' && inEnt: // end of html entity
206221
inEnt = false
207222
continue

html2text_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,5 +138,9 @@ func TestHTML2Text(t *testing.T) {
138138
So(HTML2Text(`<aa x="1">hello</aa>`), ShouldEqual, "hello")
139139
})
140140

141+
Convey("Keep spaces as they are", func() {
142+
So(HTML2TextWithOptions("should not ignore spaces", WithKeepSpaces()), ShouldEqual, "should not ignore spaces")
143+
})
144+
141145
})
142146
}

0 commit comments

Comments
 (0)