1515package parser
1616
1717import (
18+ "fmt"
19+ "slices"
1820 "strings"
1921 "unicode/utf8"
2022
@@ -31,11 +33,12 @@ type lexer struct {
3133 cursor , count int
3234
3335 braces []token.ID
34- }
3536
36- func (l * lexer ) Push (length int , kind token.Kind ) token.Token {
37- l .count ++
38- return l .Stream .Push (length , kind )
37+ prev token.ID // The last non-skippable token.
38+
39+ firstCommentSincePrev token.ID
40+ firstCommentOnSameLine bool
41+ parStart , parEnd token.ID
3942}
4043
4144func (l * lexer ) Cursor () int {
@@ -107,6 +110,194 @@ func (l *lexer) SpanFrom(start int) report.Span {
107110 return l .Span (start , l .cursor )
108111}
109112
113+ func (l * lexer ) Push (length int , kind token.Kind ) token.Token {
114+ l .count ++
115+ prev := l .prev .In (l .Context )
116+ tok := l .Stream .Push (length , kind )
117+ // NOTE: tok will have the Stream rather than l.Context as its context,
118+ // which will cause issues when we call NewCursor below.
119+ tok = tok .ID ().In (l .Context )
120+
121+ // NOTE: For the purposes of attributing comments, we need to know what line
122+ // certain offsets are at. Although we could track this as we advance cursor,
123+ // we instead use other methods to determine if two tokens are on the same
124+ // line. This is for a couple of reasons.
125+ //
126+ // 1. Getting a line number from the line index is O(log n), but we can
127+ // instead use strings.Index and friends in some places without going
128+ // quadratic.
129+ //
130+ // 2. Having to examine every character directly locks us out of using e.g.
131+ // strings.Index for certain operations, which is much more efficient
132+ // than the naive for loop.
133+
134+ switch {
135+ case tok .Kind () == token .Comment :
136+ isLineComment := strings .HasPrefix (tok .Text (), "//" )
137+
138+ if l .firstCommentSincePrev .Nil () {
139+ l .firstCommentSincePrev = tok .ID ()
140+
141+ if ! prev .Nil () && l .newLinesBetween (prev , tok , 1 ) == 0 {
142+ // The first comment is always in a paragraph by itself if there
143+ // is no newline between it and the comment start.
144+ l .firstCommentOnSameLine = true
145+ break
146+ }
147+ }
148+
149+ if ! isLineComment {
150+ // Block comments cannot be made into paragraphs, so we must
151+ // interrupt the current paragraph.
152+ l .fuseParagraph ()
153+ break
154+ }
155+
156+ // Start building up a line comment paragraph if there isn't one
157+ // currently.
158+ if l .parStart .Nil () {
159+ l .parStart = tok .ID ()
160+ }
161+ l .parEnd = tok .ID ()
162+
163+ case tok .Kind () == token .Space :
164+ // Note that line comments contain their newlines, except for a line
165+ // comment at the end of the file. Thus, seeing a single new line
166+ // means that if we are interrupting a line comment paragraph, and thus
167+ // we must fuse the current paragraph.
168+ if strings .Contains (tok .Text (), "\n " ) {
169+ l .fuseParagraph ()
170+ }
171+
172+ default :
173+ l .fuseParagraph ()
174+ //nolint:dupword // False positive due to comments describing an algorithm.
175+ if ! l .firstCommentSincePrev .Nil () {
176+ fmt .Println (l .firstCommentSincePrev .In (l .Context ), tok )
177+ comments := token .NewCursor (l .firstCommentSincePrev .In (l .Context ), tok )
178+ var first , second , penultimate , last token.Token
179+ for { // Don't use l.Done() here, that tosses comment tokens.
180+ next := comments .PopSkippable ()
181+ if next .Nil () {
182+ break
183+ } else if next .Kind () == token .Comment {
184+ switch {
185+ case first .Nil ():
186+ first = next
187+ case second .Nil ():
188+ second = next
189+ }
190+ penultimate = last
191+ last = next
192+ }
193+ }
194+ fmt .Println (first , second , penultimate , last )
195+
196+ // Determine if we need to donate first to the previous comment.
197+ var donate bool
198+ switch {
199+ case prev .Nil ():
200+ donate = false
201+ case l .firstCommentOnSameLine :
202+ donate = true
203+ case l .newLinesBetween (prev , first , 2 ) < 2 :
204+ // Now we need to check the remaining three criteria for
205+ // donate. These are:
206+ //
207+ // 1. Is there more than one comment.
208+ // 2. Is the token one of the closers ), ], or } (but not
209+ // >).
210+ // 3. The line of the current token minus the end line of
211+ // the first comment is greater than one.
212+ switch {
213+ case ! second .Nil ():
214+ donate = true
215+ case slices .Contains ([]string {")" , "]" , "}" }, tok .Text ()):
216+ donate = true
217+ case l .newLinesBetween (first , tok , 2 ) > 1 :
218+ donate = true
219+ }
220+ }
221+
222+ if donate {
223+ prev .Comments ().SetTrailing (first )
224+ first = second
225+ }
226+
227+ // The leading comment must have precisely one newline between
228+ // it and the new token.
229+ if ! first .Nil () && ! last .Nil () && l .newLinesBetween (last , tok , 2 ) == 1 {
230+ tok .Comments ().SetLeading (last )
231+ last = penultimate
232+ }
233+
234+ // Check if we have any detached comments left. This is the case
235+ // when first and last are both non-nil and <=. If we donated the
236+ // only comment, second will have been nil, so first is now nil.
237+ //
238+ // If we attached the only remaining comment after donating a
239+ // comment, we would have had the following value evolution for
240+ // first, second, penultimate and last:
241+ //
242+ // before donate: a, b, a, b
243+ // after donate: b, b, a, b
244+ // after attach: b, b, a, a
245+ //
246+ // Thus, when we check b < a, we find that we have nothing left to
247+ // attach.
248+ if ! first .Nil () && ! last .Nil () && first .ID () <= last .ID () {
249+ tok .Comments ().SetDetachedRange (first , last )
250+ }
251+
252+ l .firstCommentSincePrev = 0
253+ l .firstCommentOnSameLine = false
254+ }
255+
256+ l .prev = tok .ID ()
257+ }
258+ return tok
259+ }
260+
261+ func (l * lexer ) fuseParagraph () {
262+ if ! l .parStart .Nil () && l .parEnd != l .parStart {
263+ token .Fuse (
264+ l .parStart .In (l .Context ),
265+ l .parEnd .In (l .Context ),
266+ )
267+ }
268+ l .parStart = 0
269+ }
270+
271+ // newLinesBetween counts the number of \n characters between the end of a
272+ // and the start of b, up to max.
273+ //
274+ // The final rune of a is included in this count, since comments may end in a
275+ // \n rune.
276+ //
277+ //nolint:revive,predeclared // Complains about redefining max.
278+ func (l * lexer ) newLinesBetween (a , b token.Token , max int ) int {
279+ end := a .Span ().End
280+ if end != 0 {
281+ // Account for the final rune of a.
282+ end --
283+ }
284+
285+ start := b .Span ().Start
286+ between := l .Text ()[end :start ]
287+
288+ var total int
289+ for total < max {
290+ var found bool
291+ _ , between , found = strings .Cut (between , "\n " )
292+ if ! found {
293+ break
294+ }
295+
296+ total ++
297+ }
298+ return total
299+ }
300+
110301// mustProgress returns a progress checker for this lexer.
111302func (l * lexer ) mustProgress () mustProgress {
112303 return mustProgress {l , - 1 }
0 commit comments