@@ -108,8 +108,16 @@ func (s *scanner) scan(lval *sqlSymType) {
108108 lval .pos = int32 (s .pos )
109109 lval .str = "EOF"
110110
111- if _ , ok := s .skipWhitespace (lval , true ); ! ok {
112- return
111+ for {
112+ if comment , _ , ok := s .skipWhitespace (lval , true ); ! ok {
113+ return
114+ } else if comment != "" {
115+ lval .str = comment
116+ lval .id = BLOCK_COMMENT
117+ return
118+ } else {
119+ break
120+ }
113121 }
114122
115123 ch := s .next ()
@@ -352,7 +360,7 @@ func (s *scanner) scan(lval *sqlSymType) {
352360 s .pos ++
353361 lval .id = CONTAINS
354362 return
355- case '@' : //@@
363+ case '@' : // @@
356364 s .pos ++
357365 lval .id = TEXTSEARCHMATCH
358366 }
@@ -439,7 +447,9 @@ func (s *scanner) next() int {
439447 return ch
440448}
441449
442- func (s * scanner ) skipWhitespace (lval * sqlSymType , allowComments bool ) (newline , ok bool ) {
450+ // skipWhitespace skips over whitespace characters (space, tab, newline, etc) and comments. Multiple consecutive
451+ // block comments and whitespace will be concatenated together into the final return value.
452+ func (s * scanner ) skipWhitespace (lval * sqlSymType , allowComments bool ) (blockComment string , newline , ok bool ) {
443453 newline = false
444454 for {
445455 ch := s .peek ()
@@ -453,37 +463,55 @@ func (s *scanner) skipWhitespace(lval *sqlSymType, allowComments bool) (newline,
453463 continue
454464 }
455465 if allowComments {
456- if present , cok := s .scanComment (lval ); ! cok {
457- return false , false
466+ if cmt , present , cok := s .scanComment (lval ); ! cok {
467+ return "" , false , false
458468 } else if present {
469+ if len (blockComment ) > 0 {
470+ blockComment += " "
471+ }
472+ blockComment += cmt
459473 continue
460474 }
461475 }
462476 break
463477 }
464- return newline , true
478+ return blockComment , newline , true
465479}
466480
467- func (s * scanner ) scanComment (lval * sqlSymType ) (present , ok bool ) {
481+ // scanComment scans for a comment starting at the current position.
482+ // For block-style comments, returns the comment string scanned.
483+ // For line-style comments, returns an empty string.
484+ // In either case, also returns whether a comment was present, and whether scanning succeeded.
485+ func (s * scanner ) scanComment (lval * sqlSymType ) (comment string , present , ok bool ) {
468486 start := s .pos
469487 ch := s .peek ()
470488
471489 if ch == '/' {
490+ sb := strings.Builder {}
491+ sb .WriteRune ('/' )
492+
472493 s .pos ++
473494 if s .peek () != '*' {
474495 s .pos --
475- return false , true
496+ return "" , false , true
476497 }
498+ sb .WriteRune ('*' )
499+
477500 s .pos ++
478501 depth := 1
479502 for {
480- switch s .next () {
503+ next := s .next ()
504+ sb .WriteRune (rune (next ))
505+
506+ switch next {
481507 case '*' :
482508 if s .peek () == '/' {
483509 s .pos ++
484510 depth --
511+ sb .WriteRune (rune ('/' ))
512+
485513 if depth == 0 {
486- return true , true
514+ return sb . String (), true , true
487515 }
488516 continue
489517 }
@@ -492,14 +520,15 @@ func (s *scanner) scanComment(lval *sqlSymType) (present, ok bool) {
492520 if s .peek () == '*' {
493521 s .pos ++
494522 depth ++
523+ sb .WriteRune (rune ('*' ))
495524 continue
496525 }
497526
498527 case eof :
499528 lval .id = ERROR
500529 lval .pos = int32 (start )
501530 lval .str = "unterminated comment"
502- return false , false
531+ return "" , false , false
503532 }
504533 }
505534 }
@@ -508,17 +537,17 @@ func (s *scanner) scanComment(lval *sqlSymType) (present, ok bool) {
508537 s .pos ++
509538 if s .peek () != '-' {
510539 s .pos --
511- return false , true
540+ return "" , false , true
512541 }
513542 for {
514543 switch s .next () {
515544 case eof , '\n' :
516- return true , true
545+ return "" , true , true
517546 }
518547 }
519548 }
520549
521- return false , true
550+ return "" , false , true
522551}
523552
524553func (s * scanner ) scanIdent (lval * sqlSymType ) {
@@ -535,7 +564,7 @@ func (s *scanner) scanIdent(lval *sqlSymType) {
535564 // of whether the string is only ASCII or only ASCII lowercase for later.
536565 for {
537566 ch := s .peek ()
538- //fmt.Println(ch, ch >= utf8.RuneSelf, ch >= 'A' && ch <= 'Z')
567+ // fmt.Println(ch, ch >= utf8.RuneSelf, ch >= 'A' && ch <= 'Z')
539568
540569 if ch >= utf8 .RuneSelf {
541570 isASCII = false
@@ -549,7 +578,7 @@ func (s *scanner) scanIdent(lval *sqlSymType) {
549578
550579 s .pos ++
551580 }
552- //fmt.Println("parsed: ", s.in[start:s.pos], isASCII, isLower)
581+ // fmt.Println("parsed: ", s.in[start:s.pos], isASCII, isLower)
553582
554583 if isLower {
555584 // Already lowercased - nothing to do.
@@ -730,7 +759,7 @@ outer:
730759 b := s .next ()
731760 switch b {
732761 case ch :
733- newline , ok := s .skipWhitespace (lval , false )
762+ _ , newline , ok := s .skipWhitespace (lval , false )
734763 if ! ok {
735764 return false
736765 }
@@ -783,7 +812,7 @@ outer:
783812 b := s .next ()
784813 switch b {
785814 case ch :
786- newline , ok := s .skipWhitespace (lval , false )
815+ _ , newline , ok := s .skipWhitespace (lval , false )
787816 if ! ok {
788817 return false
789818 }
@@ -832,7 +861,7 @@ outer:
832861 continue
833862 }
834863
835- newline , ok := s .skipWhitespace (lval , false )
864+ _ , newline , ok := s .skipWhitespace (lval , false )
836865 if ! ok {
837866 return false
838867 }
0 commit comments