@@ -25,12 +25,14 @@ type options struct {
2525 lbr string
2626 linksInnerText bool
2727 listPrefix string
28+ keepSpaces bool
2829}
2930
3031func newOptions () * options {
3132 // apply defaults
3233 return & options {
3334 lbr : WIN_LBR ,
35+ keepSpaces : false ,
3436 }
3537}
3638
@@ -64,6 +66,13 @@ func WithListSupport() Option {
6466 return WithListSupportPrefix (" - " )
6567}
6668
69+ // WithKeepSpaces keep spaces as they are
70+ func WithKeepSpaces () Option {
71+ return func (o * options ) {
72+ o .keepSpaces = true
73+ }
74+ }
75+
6776func parseHTMLEntity (entName string ) (string , bool ) {
6877 if r , ok := entity [entName ]; ok {
6978 return string (r ), true
@@ -193,15 +202,21 @@ func HTML2TextWithOptions(html string, reqOpts ...Option) string {
193202 }
194203
195204 switch {
196- // skip new lines and spaces adding a single space if not there yet
197- case r <= 0xD , r == 0x85 , r == 0x2028 , r == 0x2029 , // new lines
198- r == ' ' , r >= 0x2008 && r <= 0x200B : // spaces
205+ // skip new lines adding a single space if not there yet
206+ case r <= 0xD , r == 0x85 , r == 0x2028 , r == 0x2029 : // new lines
199207 if shouldOutput && badTagStackDepth == 0 && ! inEnt {
200208 //outBuf.WriteString(fmt.Sprintf("{DBG r:%c, inEnt:%t, tag:%s}", r, inEnt, html[tagStart:i]))
201209 writeSpace (outBuf )
202210 }
203211 continue
204212
213+ // skip spaces adding a single space if not there yet
214+ case r == ' ' , r >= 0x2008 && r <= 0x200B : // spaces
215+ if ! opts .keepSpaces && shouldOutput && badTagStackDepth == 0 && ! inEnt {
216+ writeSpace (outBuf )
217+ continue
218+ }
219+
205220 case r == ';' && inEnt : // end of html entity
206221 inEnt = false
207222 continue
0 commit comments