1// Copyright 2024 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package relnote
6
7import (
8	"fmt"
9	"strings"
10	"unicode"
11	"unicode/utf8"
12
13	"golang.org/x/mod/module"
14	md "rsc.io/markdown"
15)
16
17// addSymbolLinks looks for text like [Buffer] and
18// [math.Max] and replaces them with links to standard library
19// symbols and packages.
20// It uses the given default package for links without a package.
21func addSymbolLinks(doc *md.Document, defaultPackage string) {
22	addSymbolLinksBlocks(doc.Blocks, defaultPackage)
23}
24
25func addSymbolLinksBlocks(bs []md.Block, defaultPackage string) {
26	for _, b := range bs {
27		addSymbolLinksBlock(b, defaultPackage)
28	}
29}
30
31func addSymbolLinksBlock(b md.Block, defaultPackage string) {
32	switch b := b.(type) {
33	case *md.Heading:
34		addSymbolLinksBlock(b.Text, defaultPackage)
35	case *md.Text:
36		b.Inline = addSymbolLinksInlines(b.Inline, defaultPackage)
37	case *md.List:
38		addSymbolLinksBlocks(b.Items, defaultPackage)
39	case *md.Item:
40		addSymbolLinksBlocks(b.Blocks, defaultPackage)
41	case *md.Paragraph:
42		addSymbolLinksBlock(b.Text, defaultPackage)
43	case *md.Quote:
44		addSymbolLinksBlocks(b.Blocks, defaultPackage)
45	// no links in these blocks
46	case *md.CodeBlock:
47	case *md.HTMLBlock:
48	case *md.Empty:
49	case *md.ThematicBreak:
50	default:
51		panic(fmt.Sprintf("unknown block type %T", b))
52	}
53}
54
55// addSymbolLinksInlines looks for symbol links in the slice of inline markdown
56// elements. It returns a new slice of inline elements with links added.
57func addSymbolLinksInlines(ins []md.Inline, defaultPackage string) []md.Inline {
58	ins = splitAtBrackets(ins)
59	var res []md.Inline
60	for i := 0; i < len(ins); i++ {
61		if txt := symbolLinkText(i, ins); txt != "" {
62			link, ok := symbolLink(txt, defaultPackage)
63			if ok {
64				res = append(res, link)
65				i += 2
66				continue
67			}
68		}
69
70		// Handle inline elements with nested content.
71		switch in := ins[i].(type) {
72		case *md.Strong:
73			res = append(res, &md.Strong{
74				Marker: in.Marker,
75				Inner:  addSymbolLinksInlines(in.Inner, defaultPackage),
76			})
77
78		case *md.Emph:
79			res = append(res, &md.Emph{
80				Marker: in.Marker,
81				Inner:  addSymbolLinksInlines(in.Inner, defaultPackage),
82			})
83		// Currently we don't support Del nodes because we don't enable the Strikethrough
84		// extension. But this can't hurt.
85		case *md.Del:
86			res = append(res, &md.Del{
87				Marker: in.Marker,
88				Inner:  addSymbolLinksInlines(in.Inner, defaultPackage),
89			})
90		// Don't look for links in anything else.
91		default:
92			res = append(res, in)
93		}
94	}
95	return res
96}
97
98// splitAtBrackets rewrites ins so that every '[' and ']' is the only character
99// of its Plain.
100// For example, the element
101//
102//	[Plain("the [Buffer] is")]
103//
104// is rewritten to
105//
106//	[Plain("the "), Plain("["), Plain("Buffer"), Plain("]"), Plain(" is")]
107//
108// This transformation simplifies looking for symbol links.
109func splitAtBrackets(ins []md.Inline) []md.Inline {
110	var res []md.Inline
111	for _, in := range ins {
112		if p, ok := in.(*md.Plain); ok {
113			text := p.Text
114			for len(text) > 0 {
115				i := strings.IndexAny(text, "[]")
116				// If there are no brackets, the remaining text is a single
117				// Plain and we are done.
118				if i < 0 {
119					res = append(res, &md.Plain{Text: text})
120					break
121				}
122				// There is a bracket; make Plains for it and the text before it (if any).
123				if i > 0 {
124					res = append(res, &md.Plain{Text: text[:i]})
125				}
126				res = append(res, &md.Plain{Text: text[i : i+1]})
127				text = text[i+1:]
128			}
129		} else {
130			res = append(res, in)
131		}
132	}
133	return res
134}
135
136// symbolLinkText returns the text of a possible symbol link.
137// It is given a slice of Inline elements and an index into the slice.
138// If the index refers to a sequence of elements
139//
140//	[Plain("["), Plain_or_Code(text), Plain("]")]
141//
142// and the brackets are adjacent to the right kind of runes for a link, then
143// symbolLinkText returns the text of the middle element.
144// Otherwise it returns the empty string.
145func symbolLinkText(i int, ins []md.Inline) string {
146	// plainText returns the text of ins[j] if it is a Plain element, or "" otherwise.
147	plainText := func(j int) string {
148		if j < 0 || j >= len(ins) {
149			return ""
150		}
151		if p, ok := ins[j].(*md.Plain); ok {
152			return p.Text
153		}
154		return ""
155	}
156
157	// ins[i] must be a "[".
158	if plainText(i) != "[" {
159		return ""
160	}
161	// The open bracket must be preceeded by a link-adjacent rune (or by nothing).
162	if t := plainText(i - 1); t != "" {
163		r, _ := utf8.DecodeLastRuneInString(t)
164		if !isLinkAdjacentRune(r) {
165			return ""
166		}
167	}
168	// The element after the next must be a ']'.
169	if plainText(i+2) != "]" {
170		return ""
171	}
172	// The ']' must be followed by a link-adjacent rune (or by nothing).
173	if t := plainText(i + 3); t != "" {
174		r, _ := utf8.DecodeRuneInString(t)
175		if !isLinkAdjacentRune(r) {
176			return ""
177		}
178	}
179
180	// ins[i+1] must be a Plain or a Code.
181	// Its text is the symbol to link to.
182	if i+1 >= len(ins) {
183		return ""
184	}
185	switch in := ins[i+1].(type) {
186	case *md.Plain:
187		return in.Text
188	case *md.Code:
189		return in.Text
190	default:
191		return ""
192	}
193}
194
195// symbolLink converts s into a Link and returns it and true, or nil and false if
196// s is not a valid link or is surrounded by runes that disqualify it from being
197// converted to a link.
198//
199// The argument s is the text between '[' and ']'.
200func symbolLink(s, defaultPackage string) (md.Inline, bool) {
201	pkg, sym, ok := splitRef(s)
202	if !ok {
203		return nil, false
204	}
205	if pkg == "" {
206		if defaultPackage == "" {
207			return nil, false
208		}
209		pkg = defaultPackage
210	}
211	if sym != "" {
212		sym = "#" + sym
213	}
214	return &md.Link{
215		Inner: []md.Inline{&md.Code{Text: s}},
216		URL:   fmt.Sprintf("/pkg/%s%s", pkg, sym),
217	}, true
218}
219
220// isLinkAdjacentRune reports whether r can be adjacent to a symbol link.
221// The logic is the same as the go/doc/comment package.
222func isLinkAdjacentRune(r rune) bool {
223	return unicode.IsPunct(r) || r == ' ' || r == '\t' || r == '\n'
224}
225
226// splitRef splits s into a package and possibly a symbol.
227// Examples:
228//
229//	splitRef("math.Max") => ("math", "Max", true)
230//	splitRef("bytes.Buffer.String") => ("bytes", "Buffer.String", true)
231//	splitRef("math") => ("math", "", true)
232func splitRef(s string) (pkg, name string, ok bool) {
233	s = strings.TrimPrefix(s, "*")
234	pkg, name, ok = splitDocName(s)
235	var recv string
236	if ok {
237		pkg, recv, _ = splitDocName(pkg)
238	}
239	if pkg != "" {
240		if err := module.CheckImportPath(pkg); err != nil {
241			return "", "", false
242		}
243	}
244	if recv != "" {
245		name = recv + "." + name
246	}
247	return pkg, name, true
248}
249
250// The following functions were copied from go/doc/comment/parse.go.
251
252// If text is of the form before.Name, where Name is a capitalized Go identifier,
253// then splitDocName returns before, name, true.
254// Otherwise it returns text, "", false.
255func splitDocName(text string) (before, name string, foundDot bool) {
256	i := strings.LastIndex(text, ".")
257	name = text[i+1:]
258	if !isName(name) {
259		return text, "", false
260	}
261	if i >= 0 {
262		before = text[:i]
263	}
264	return before, name, true
265}
266
267// isName reports whether s is a capitalized Go identifier (like Name).
268func isName(s string) bool {
269	t, ok := ident(s)
270	if !ok || t != s {
271		return false
272	}
273	r, _ := utf8.DecodeRuneInString(s)
274	return unicode.IsUpper(r)
275}
276
277// ident checks whether s begins with a Go identifier.
278// If so, it returns the identifier, which is a prefix of s, and ok == true.
279// Otherwise it returns "", false.
280// The caller should skip over the first len(id) bytes of s
281// before further processing.
282func ident(s string) (id string, ok bool) {
283	// Scan [\pL_][\pL_0-9]*
284	n := 0
285	for n < len(s) {
286		if c := s[n]; c < utf8.RuneSelf {
287			if isIdentASCII(c) && (n > 0 || c < '0' || c > '9') {
288				n++
289				continue
290			}
291			break
292		}
293		r, nr := utf8.DecodeRuneInString(s[n:])
294		if unicode.IsLetter(r) {
295			n += nr
296			continue
297		}
298		break
299	}
300	return s[:n], n > 0
301}
302
303// isIdentASCII reports whether c is an ASCII identifier byte.
304func isIdentASCII(c byte) bool {
305	// mask is a 128-bit bitmap with 1s for allowed bytes,
306	// so that the byte c can be tested with a shift and an and.
307	// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
308	// and this function will return false.
309	const mask = 0 |
310		(1<<26-1)<<'A' |
311		(1<<26-1)<<'a' |
312		(1<<10-1)<<'0' |
313		1<<'_'
314
315	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
316		(uint64(1)<<(c-64))&(mask>>64)) != 0
317}
318