1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ast
6
7import (
8	"bytes"
9	"cmp"
10	"fmt"
11	"go/token"
12	"slices"
13	"strings"
14)
15
16// sortComments sorts the list of comment groups in source order.
17func sortComments(list []*CommentGroup) {
18	slices.SortFunc(list, func(a, b *CommentGroup) int {
19		return cmp.Compare(a.Pos(), b.Pos())
20	})
21}
22
23// A CommentMap maps an AST node to a list of comment groups
24// associated with it. See [NewCommentMap] for a description of
25// the association.
26type CommentMap map[Node][]*CommentGroup
27
28func (cmap CommentMap) addComment(n Node, c *CommentGroup) {
29	list := cmap[n]
30	if len(list) == 0 {
31		list = []*CommentGroup{c}
32	} else {
33		list = append(list, c)
34	}
35	cmap[n] = list
36}
37
38type byInterval []Node
39
40func (a byInterval) Len() int { return len(a) }
41func (a byInterval) Less(i, j int) bool {
42	pi, pj := a[i].Pos(), a[j].Pos()
43	return pi < pj || pi == pj && a[i].End() > a[j].End()
44}
45func (a byInterval) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
46
47// nodeList returns the list of nodes of the AST n in source order.
48func nodeList(n Node) []Node {
49	var list []Node
50	Inspect(n, func(n Node) bool {
51		// don't collect comments
52		switch n.(type) {
53		case nil, *CommentGroup, *Comment:
54			return false
55		}
56		list = append(list, n)
57		return true
58	})
59
60	// Note: The current implementation assumes that Inspect traverses the
61	//       AST in depth-first and thus _source_ order. If AST traversal
62	//       does not follow source order, the sorting call below will be
63	//       required.
64	// slices.Sort(list, func(a, b Node) int {
65	//       r := cmp.Compare(a.Pos(), b.Pos())
66	//       if r != 0 {
67	//               return r
68	//       }
69	//       return cmp.Compare(a.End(), b.End())
70	// })
71
72	return list
73}
74
75// A commentListReader helps iterating through a list of comment groups.
76type commentListReader struct {
77	fset     *token.FileSet
78	list     []*CommentGroup
79	index    int
80	comment  *CommentGroup  // comment group at current index
81	pos, end token.Position // source interval of comment group at current index
82}
83
84func (r *commentListReader) eol() bool {
85	return r.index >= len(r.list)
86}
87
88func (r *commentListReader) next() {
89	if !r.eol() {
90		r.comment = r.list[r.index]
91		r.pos = r.fset.Position(r.comment.Pos())
92		r.end = r.fset.Position(r.comment.End())
93		r.index++
94	}
95}
96
97// A nodeStack keeps track of nested nodes.
98// A node lower on the stack lexically contains the nodes higher on the stack.
99type nodeStack []Node
100
101// push pops all nodes that appear lexically before n
102// and then pushes n on the stack.
103func (s *nodeStack) push(n Node) {
104	s.pop(n.Pos())
105	*s = append((*s), n)
106}
107
108// pop pops all nodes that appear lexically before pos
109// (i.e., whose lexical extent has ended before or at pos).
110// It returns the last node popped.
111func (s *nodeStack) pop(pos token.Pos) (top Node) {
112	i := len(*s)
113	for i > 0 && (*s)[i-1].End() <= pos {
114		top = (*s)[i-1]
115		i--
116	}
117	*s = (*s)[0:i]
118	return top
119}
120
121// NewCommentMap creates a new comment map by associating comment groups
122// of the comments list with the nodes of the AST specified by node.
123//
124// A comment group g is associated with a node n if:
125//
126//   - g starts on the same line as n ends
127//   - g starts on the line immediately following n, and there is
128//     at least one empty line after g and before the next node
129//   - g starts before n and is not associated to the node before n
130//     via the previous rules
131//
132// NewCommentMap tries to associate a comment group to the "largest"
133// node possible: For instance, if the comment is a line comment
134// trailing an assignment, the comment is associated with the entire
135// assignment rather than just the last operand in the assignment.
136func NewCommentMap(fset *token.FileSet, node Node, comments []*CommentGroup) CommentMap {
137	if len(comments) == 0 {
138		return nil // no comments to map
139	}
140
141	cmap := make(CommentMap)
142
143	// set up comment reader r
144	tmp := make([]*CommentGroup, len(comments))
145	copy(tmp, comments) // don't change incoming comments
146	sortComments(tmp)
147	r := commentListReader{fset: fset, list: tmp} // !r.eol() because len(comments) > 0
148	r.next()
149
150	// create node list in lexical order
151	nodes := nodeList(node)
152	nodes = append(nodes, nil) // append sentinel
153
154	// set up iteration variables
155	var (
156		p     Node           // previous node
157		pend  token.Position // end of p
158		pg    Node           // previous node group (enclosing nodes of "importance")
159		pgend token.Position // end of pg
160		stack nodeStack      // stack of node groups
161	)
162
163	for _, q := range nodes {
164		var qpos token.Position
165		if q != nil {
166			qpos = fset.Position(q.Pos()) // current node position
167		} else {
168			// set fake sentinel position to infinity so that
169			// all comments get processed before the sentinel
170			const infinity = 1 << 30
171			qpos.Offset = infinity
172			qpos.Line = infinity
173		}
174
175		// process comments before current node
176		for r.end.Offset <= qpos.Offset {
177			// determine recent node group
178			if top := stack.pop(r.comment.Pos()); top != nil {
179				pg = top
180				pgend = fset.Position(pg.End())
181			}
182			// Try to associate a comment first with a node group
183			// (i.e., a node of "importance" such as a declaration);
184			// if that fails, try to associate it with the most recent
185			// node.
186			// TODO(gri) try to simplify the logic below
187			var assoc Node
188			switch {
189			case pg != nil &&
190				(pgend.Line == r.pos.Line ||
191					pgend.Line+1 == r.pos.Line && r.end.Line+1 < qpos.Line):
192				// 1) comment starts on same line as previous node group ends, or
193				// 2) comment starts on the line immediately after the
194				//    previous node group and there is an empty line before
195				//    the current node
196				// => associate comment with previous node group
197				assoc = pg
198			case p != nil &&
199				(pend.Line == r.pos.Line ||
200					pend.Line+1 == r.pos.Line && r.end.Line+1 < qpos.Line ||
201					q == nil):
202				// same rules apply as above for p rather than pg,
203				// but also associate with p if we are at the end (q == nil)
204				assoc = p
205			default:
206				// otherwise, associate comment with current node
207				if q == nil {
208					// we can only reach here if there was no p
209					// which would imply that there were no nodes
210					panic("internal error: no comments should be associated with sentinel")
211				}
212				assoc = q
213			}
214			cmap.addComment(assoc, r.comment)
215			if r.eol() {
216				return cmap
217			}
218			r.next()
219		}
220
221		// update previous node
222		p = q
223		pend = fset.Position(p.End())
224
225		// update previous node group if we see an "important" node
226		switch q.(type) {
227		case *File, *Field, Decl, Spec, Stmt:
228			stack.push(q)
229		}
230	}
231
232	return cmap
233}
234
235// Update replaces an old node in the comment map with the new node
236// and returns the new node. Comments that were associated with the
237// old node are associated with the new node.
238func (cmap CommentMap) Update(old, new Node) Node {
239	if list := cmap[old]; len(list) > 0 {
240		delete(cmap, old)
241		cmap[new] = append(cmap[new], list...)
242	}
243	return new
244}
245
246// Filter returns a new comment map consisting of only those
247// entries of cmap for which a corresponding node exists in
248// the AST specified by node.
249func (cmap CommentMap) Filter(node Node) CommentMap {
250	umap := make(CommentMap)
251	Inspect(node, func(n Node) bool {
252		if g := cmap[n]; len(g) > 0 {
253			umap[n] = g
254		}
255		return true
256	})
257	return umap
258}
259
260// Comments returns the list of comment groups in the comment map.
261// The result is sorted in source order.
262func (cmap CommentMap) Comments() []*CommentGroup {
263	list := make([]*CommentGroup, 0, len(cmap))
264	for _, e := range cmap {
265		list = append(list, e...)
266	}
267	sortComments(list)
268	return list
269}
270
271func summary(list []*CommentGroup) string {
272	const maxLen = 40
273	var buf bytes.Buffer
274
275	// collect comments text
276loop:
277	for _, group := range list {
278		// Note: CommentGroup.Text() does too much work for what we
279		//       need and would only replace this innermost loop.
280		//       Just do it explicitly.
281		for _, comment := range group.List {
282			if buf.Len() >= maxLen {
283				break loop
284			}
285			buf.WriteString(comment.Text)
286		}
287	}
288
289	// truncate if too long
290	if buf.Len() > maxLen {
291		buf.Truncate(maxLen - 3)
292		buf.WriteString("...")
293	}
294
295	// replace any invisibles with blanks
296	bytes := buf.Bytes()
297	for i, b := range bytes {
298		switch b {
299		case '\t', '\n', '\r':
300			bytes[i] = ' '
301		}
302	}
303
304	return string(bytes)
305}
306
307func (cmap CommentMap) String() string {
308	// print map entries in sorted order
309	var nodes []Node
310	for node := range cmap {
311		nodes = append(nodes, node)
312	}
313	slices.SortFunc(nodes, func(a, b Node) int {
314		r := cmp.Compare(a.Pos(), b.Pos())
315		if r != 0 {
316			return r
317		}
318		return cmp.Compare(a.End(), b.End())
319	})
320
321	var buf strings.Builder
322	fmt.Fprintln(&buf, "CommentMap {")
323	for _, node := range nodes {
324		comment := cmap[node]
325		// print name of identifiers; print node type for other nodes
326		var s string
327		if ident, ok := node.(*Ident); ok {
328			s = ident.Name
329		} else {
330			s = fmt.Sprintf("%T", node)
331		}
332		fmt.Fprintf(&buf, "\t%p  %20s:  %s\n", node, s, summary(comment))
333	}
334	fmt.Fprintln(&buf, "}")
335	return buf.String()
336}
337