1// Copyright 2022 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package comment
6
7import (
8	"slices"
9	"strings"
10	"unicode"
11	"unicode/utf8"
12)
13
14// A Doc is a parsed Go doc comment.
15type Doc struct {
16	// Content is the sequence of content blocks in the comment.
17	Content []Block
18
19	// Links is the link definitions in the comment.
20	Links []*LinkDef
21}
22
23// A LinkDef is a single link definition.
24type LinkDef struct {
25	Text string // the link text
26	URL  string // the link URL
27	Used bool   // whether the comment uses the definition
28}
29
30// A Block is block-level content in a doc comment,
31// one of [*Code], [*Heading], [*List], or [*Paragraph].
32type Block interface {
33	block()
34}
35
36// A Heading is a doc comment heading.
37type Heading struct {
38	Text []Text // the heading text
39}
40
41func (*Heading) block() {}
42
43// A List is a numbered or bullet list.
44// Lists are always non-empty: len(Items) > 0.
45// In a numbered list, every Items[i].Number is a non-empty string.
46// In a bullet list, every Items[i].Number is an empty string.
47type List struct {
48	// Items is the list items.
49	Items []*ListItem
50
51	// ForceBlankBefore indicates that the list must be
52	// preceded by a blank line when reformatting the comment,
53	// overriding the usual conditions. See the BlankBefore method.
54	//
55	// The comment parser sets ForceBlankBefore for any list
56	// that is preceded by a blank line, to make sure
57	// the blank line is preserved when printing.
58	ForceBlankBefore bool
59
60	// ForceBlankBetween indicates that list items must be
61	// separated by blank lines when reformatting the comment,
62	// overriding the usual conditions. See the BlankBetween method.
63	//
64	// The comment parser sets ForceBlankBetween for any list
65	// that has a blank line between any two of its items, to make sure
66	// the blank lines are preserved when printing.
67	ForceBlankBetween bool
68}
69
70func (*List) block() {}
71
72// BlankBefore reports whether a reformatting of the comment
73// should include a blank line before the list.
74// The default rule is the same as for [BlankBetween]:
75// if the list item content contains any blank lines
76// (meaning at least one item has multiple paragraphs)
77// then the list itself must be preceded by a blank line.
78// A preceding blank line can be forced by setting [List].ForceBlankBefore.
79func (l *List) BlankBefore() bool {
80	return l.ForceBlankBefore || l.BlankBetween()
81}
82
83// BlankBetween reports whether a reformatting of the comment
84// should include a blank line between each pair of list items.
85// The default rule is that if the list item content contains any blank lines
86// (meaning at least one item has multiple paragraphs)
87// then list items must themselves be separated by blank lines.
88// Blank line separators can be forced by setting [List].ForceBlankBetween.
89func (l *List) BlankBetween() bool {
90	if l.ForceBlankBetween {
91		return true
92	}
93	for _, item := range l.Items {
94		if len(item.Content) != 1 {
95			// Unreachable for parsed comments today,
96			// since the only way to get multiple item.Content
97			// is multiple paragraphs, which must have been
98			// separated by a blank line.
99			return true
100		}
101	}
102	return false
103}
104
105// A ListItem is a single item in a numbered or bullet list.
106type ListItem struct {
107	// Number is a decimal string in a numbered list
108	// or an empty string in a bullet list.
109	Number string // "1", "2", ...; "" for bullet list
110
111	// Content is the list content.
112	// Currently, restrictions in the parser and printer
113	// require every element of Content to be a *Paragraph.
114	Content []Block // Content of this item.
115}
116
117// A Paragraph is a paragraph of text.
118type Paragraph struct {
119	Text []Text
120}
121
122func (*Paragraph) block() {}
123
124// A Code is a preformatted code block.
125type Code struct {
126	// Text is the preformatted text, ending with a newline character.
127	// It may be multiple lines, each of which ends with a newline character.
128	// It is never empty, nor does it start or end with a blank line.
129	Text string
130}
131
132func (*Code) block() {}
133
134// A Text is text-level content in a doc comment,
135// one of [Plain], [Italic], [*Link], or [*DocLink].
136type Text interface {
137	text()
138}
139
140// A Plain is a string rendered as plain text (not italicized).
141type Plain string
142
143func (Plain) text() {}
144
145// An Italic is a string rendered as italicized text.
146type Italic string
147
148func (Italic) text() {}
149
150// A Link is a link to a specific URL.
151type Link struct {
152	Auto bool   // is this an automatic (implicit) link of a literal URL?
153	Text []Text // text of link
154	URL  string // target URL of link
155}
156
157func (*Link) text() {}
158
159// A DocLink is a link to documentation for a Go package or symbol.
160type DocLink struct {
161	Text []Text // text of link
162
163	// ImportPath, Recv, and Name identify the Go package or symbol
164	// that is the link target. The potential combinations of
165	// non-empty fields are:
166	//  - ImportPath: a link to another package
167	//  - ImportPath, Name: a link to a const, func, type, or var in another package
168	//  - ImportPath, Recv, Name: a link to a method in another package
169	//  - Name: a link to a const, func, type, or var in this package
170	//  - Recv, Name: a link to a method in this package
171	ImportPath string // import path
172	Recv       string // receiver type, without any pointer star, for methods
173	Name       string // const, func, type, var, or method name
174}
175
176func (*DocLink) text() {}
177
178// A Parser is a doc comment parser.
179// The fields in the struct can be filled in before calling [Parser.Parse]
180// in order to customize the details of the parsing process.
181type Parser struct {
182	// Words is a map of Go identifier words that
183	// should be italicized and potentially linked.
184	// If Words[w] is the empty string, then the word w
185	// is only italicized. Otherwise it is linked, using
186	// Words[w] as the link target.
187	// Words corresponds to the [go/doc.ToHTML] words parameter.
188	Words map[string]string
189
190	// LookupPackage resolves a package name to an import path.
191	//
192	// If LookupPackage(name) returns ok == true, then [name]
193	// (or [name.Sym] or [name.Sym.Method])
194	// is considered a documentation link to importPath's package docs.
195	// It is valid to return "", true, in which case name is considered
196	// to refer to the current package.
197	//
198	// If LookupPackage(name) returns ok == false,
199	// then [name] (or [name.Sym] or [name.Sym.Method])
200	// will not be considered a documentation link,
201	// except in the case where name is the full (but single-element) import path
202	// of a package in the standard library, such as in [math] or [io.Reader].
203	// LookupPackage is still called for such names,
204	// in order to permit references to imports of other packages
205	// with the same package names.
206	//
207	// Setting LookupPackage to nil is equivalent to setting it to
208	// a function that always returns "", false.
209	LookupPackage func(name string) (importPath string, ok bool)
210
211	// LookupSym reports whether a symbol name or method name
212	// exists in the current package.
213	//
214	// If LookupSym("", "Name") returns true, then [Name]
215	// is considered a documentation link for a const, func, type, or var.
216	//
217	// Similarly, if LookupSym("Recv", "Name") returns true,
218	// then [Recv.Name] is considered a documentation link for
219	// type Recv's method Name.
220	//
221	// Setting LookupSym to nil is equivalent to setting it to a function
222	// that always returns false.
223	LookupSym func(recv, name string) (ok bool)
224}
225
226// parseDoc is parsing state for a single doc comment.
227type parseDoc struct {
228	*Parser
229	*Doc
230	links     map[string]*LinkDef
231	lines     []string
232	lookupSym func(recv, name string) bool
233}
234
235// lookupPkg is called to look up the pkg in [pkg], [pkg.Name], and [pkg.Name.Recv].
236// If pkg has a slash, it is assumed to be the full import path and is returned with ok = true.
237//
238// Otherwise, pkg is probably a simple package name like "rand" (not "crypto/rand" or "math/rand").
239// d.LookupPackage provides a way for the caller to allow resolving such names with reference
240// to the imports in the surrounding package.
241//
242// There is one collision between these two cases: single-element standard library names
243// like "math" are full import paths but don't contain slashes. We let d.LookupPackage have
244// the first chance to resolve it, in case there's a different package imported as math,
245// and otherwise we refer to a built-in list of single-element standard library package names.
246func (d *parseDoc) lookupPkg(pkg string) (importPath string, ok bool) {
247	if strings.Contains(pkg, "/") { // assume a full import path
248		if validImportPath(pkg) {
249			return pkg, true
250		}
251		return "", false
252	}
253	if d.LookupPackage != nil {
254		// Give LookupPackage a chance.
255		if path, ok := d.LookupPackage(pkg); ok {
256			return path, true
257		}
258	}
259	return DefaultLookupPackage(pkg)
260}
261
262func isStdPkg(path string) bool {
263	_, ok := slices.BinarySearch(stdPkgs, path)
264	return ok
265}
266
267// DefaultLookupPackage is the default package lookup
268// function, used when [Parser.LookupPackage] is nil.
269// It recognizes names of the packages from the standard
270// library with single-element import paths, such as math,
271// which would otherwise be impossible to name.
272//
273// Note that the go/doc package provides a more sophisticated
274// lookup based on the imports used in the current package.
275func DefaultLookupPackage(name string) (importPath string, ok bool) {
276	if isStdPkg(name) {
277		return name, true
278	}
279	return "", false
280}
281
282// Parse parses the doc comment text and returns the *[Doc] form.
283// Comment markers (/* // and */) in the text must have already been removed.
284func (p *Parser) Parse(text string) *Doc {
285	lines := unindent(strings.Split(text, "\n"))
286	d := &parseDoc{
287		Parser:    p,
288		Doc:       new(Doc),
289		links:     make(map[string]*LinkDef),
290		lines:     lines,
291		lookupSym: func(recv, name string) bool { return false },
292	}
293	if p.LookupSym != nil {
294		d.lookupSym = p.LookupSym
295	}
296
297	// First pass: break into block structure and collect known links.
298	// The text is all recorded as Plain for now.
299	var prev span
300	for _, s := range parseSpans(lines) {
301		var b Block
302		switch s.kind {
303		default:
304			panic("go/doc/comment: internal error: unknown span kind")
305		case spanList:
306			b = d.list(lines[s.start:s.end], prev.end < s.start)
307		case spanCode:
308			b = d.code(lines[s.start:s.end])
309		case spanOldHeading:
310			b = d.oldHeading(lines[s.start])
311		case spanHeading:
312			b = d.heading(lines[s.start])
313		case spanPara:
314			b = d.paragraph(lines[s.start:s.end])
315		}
316		if b != nil {
317			d.Content = append(d.Content, b)
318		}
319		prev = s
320	}
321
322	// Second pass: interpret all the Plain text now that we know the links.
323	for _, b := range d.Content {
324		switch b := b.(type) {
325		case *Paragraph:
326			b.Text = d.parseLinkedText(string(b.Text[0].(Plain)))
327		case *List:
328			for _, i := range b.Items {
329				for _, c := range i.Content {
330					p := c.(*Paragraph)
331					p.Text = d.parseLinkedText(string(p.Text[0].(Plain)))
332				}
333			}
334		}
335	}
336
337	return d.Doc
338}
339
340// A span represents a single span of comment lines (lines[start:end])
341// of an identified kind (code, heading, paragraph, and so on).
342type span struct {
343	start int
344	end   int
345	kind  spanKind
346}
347
348// A spanKind describes the kind of span.
349type spanKind int
350
351const (
352	_ spanKind = iota
353	spanCode
354	spanHeading
355	spanList
356	spanOldHeading
357	spanPara
358)
359
360func parseSpans(lines []string) []span {
361	var spans []span
362
363	// The loop may process a line twice: once as unindented
364	// and again forced indented. So the maximum expected
365	// number of iterations is 2*len(lines). The repeating logic
366	// can be subtle, though, and to protect against introduction
367	// of infinite loops in future changes, we watch to see that
368	// we are not looping too much. A panic is better than a
369	// quiet infinite loop.
370	watchdog := 2 * len(lines)
371
372	i := 0
373	forceIndent := 0
374Spans:
375	for {
376		// Skip blank lines.
377		for i < len(lines) && lines[i] == "" {
378			i++
379		}
380		if i >= len(lines) {
381			break
382		}
383		if watchdog--; watchdog < 0 {
384			panic("go/doc/comment: internal error: not making progress")
385		}
386
387		var kind spanKind
388		start := i
389		end := i
390		if i < forceIndent || indented(lines[i]) {
391			// Indented (or force indented).
392			// Ends before next unindented. (Blank lines are OK.)
393			// If this is an unindented list that we are heuristically treating as indented,
394			// then accept unindented list item lines up to the first blank lines.
395			// The heuristic is disabled at blank lines to contain its effect
396			// to non-gofmt'ed sections of the comment.
397			unindentedListOK := isList(lines[i]) && i < forceIndent
398			i++
399			for i < len(lines) && (lines[i] == "" || i < forceIndent || indented(lines[i]) || (unindentedListOK && isList(lines[i]))) {
400				if lines[i] == "" {
401					unindentedListOK = false
402				}
403				i++
404			}
405
406			// Drop trailing blank lines.
407			end = i
408			for end > start && lines[end-1] == "" {
409				end--
410			}
411
412			// If indented lines are followed (without a blank line)
413			// by an unindented line ending in a brace,
414			// take that one line too. This fixes the common mistake
415			// of pasting in something like
416			//
417			// func main() {
418			//	fmt.Println("hello, world")
419			// }
420			//
421			// and forgetting to indent it.
422			// The heuristic will never trigger on a gofmt'ed comment,
423			// because any gofmt'ed code block or list would be
424			// followed by a blank line or end of comment.
425			if end < len(lines) && strings.HasPrefix(lines[end], "}") {
426				end++
427			}
428
429			if isList(lines[start]) {
430				kind = spanList
431			} else {
432				kind = spanCode
433			}
434		} else {
435			// Unindented. Ends at next blank or indented line.
436			i++
437			for i < len(lines) && lines[i] != "" && !indented(lines[i]) {
438				i++
439			}
440			end = i
441
442			// If unindented lines are followed (without a blank line)
443			// by an indented line that would start a code block,
444			// check whether the final unindented lines
445			// should be left for the indented section.
446			// This can happen for the common mistakes of
447			// unindented code or unindented lists.
448			// The heuristic will never trigger on a gofmt'ed comment,
449			// because any gofmt'ed code block would have a blank line
450			// preceding it after the unindented lines.
451			if i < len(lines) && lines[i] != "" && !isList(lines[i]) {
452				switch {
453				case isList(lines[i-1]):
454					// If the final unindented line looks like a list item,
455					// this may be the first indented line wrap of
456					// a mistakenly unindented list.
457					// Leave all the unindented list items.
458					forceIndent = end
459					end--
460					for end > start && isList(lines[end-1]) {
461						end--
462					}
463
464				case strings.HasSuffix(lines[i-1], "{") || strings.HasSuffix(lines[i-1], `\`):
465					// If the final unindented line ended in { or \
466					// it is probably the start of a misindented code block.
467					// Give the user a single line fix.
468					// Often that's enough; if not, the user can fix the others themselves.
469					forceIndent = end
470					end--
471				}
472
473				if start == end && forceIndent > start {
474					i = start
475					continue Spans
476				}
477			}
478
479			// Span is either paragraph or heading.
480			if end-start == 1 && isHeading(lines[start]) {
481				kind = spanHeading
482			} else if end-start == 1 && isOldHeading(lines[start], lines, start) {
483				kind = spanOldHeading
484			} else {
485				kind = spanPara
486			}
487		}
488
489		spans = append(spans, span{start, end, kind})
490		i = end
491	}
492
493	return spans
494}
495
496// indented reports whether line is indented
497// (starts with a leading space or tab).
498func indented(line string) bool {
499	return line != "" && (line[0] == ' ' || line[0] == '\t')
500}
501
502// unindent removes any common space/tab prefix
503// from each line in lines, returning a copy of lines in which
504// those prefixes have been trimmed from each line.
505// It also replaces any lines containing only spaces with blank lines (empty strings).
506func unindent(lines []string) []string {
507	// Trim leading and trailing blank lines.
508	for len(lines) > 0 && isBlank(lines[0]) {
509		lines = lines[1:]
510	}
511	for len(lines) > 0 && isBlank(lines[len(lines)-1]) {
512		lines = lines[:len(lines)-1]
513	}
514	if len(lines) == 0 {
515		return nil
516	}
517
518	// Compute and remove common indentation.
519	prefix := leadingSpace(lines[0])
520	for _, line := range lines[1:] {
521		if !isBlank(line) {
522			prefix = commonPrefix(prefix, leadingSpace(line))
523		}
524	}
525
526	out := make([]string, len(lines))
527	for i, line := range lines {
528		line = strings.TrimPrefix(line, prefix)
529		if strings.TrimSpace(line) == "" {
530			line = ""
531		}
532		out[i] = line
533	}
534	for len(out) > 0 && out[0] == "" {
535		out = out[1:]
536	}
537	for len(out) > 0 && out[len(out)-1] == "" {
538		out = out[:len(out)-1]
539	}
540	return out
541}
542
543// isBlank reports whether s is a blank line.
544func isBlank(s string) bool {
545	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
546}
547
548// commonPrefix returns the longest common prefix of a and b.
549func commonPrefix(a, b string) string {
550	i := 0
551	for i < len(a) && i < len(b) && a[i] == b[i] {
552		i++
553	}
554	return a[0:i]
555}
556
557// leadingSpace returns the longest prefix of s consisting of spaces and tabs.
558func leadingSpace(s string) string {
559	i := 0
560	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
561		i++
562	}
563	return s[:i]
564}
565
566// isOldHeading reports whether line is an old-style section heading.
567// line is all[off].
568func isOldHeading(line string, all []string, off int) bool {
569	if off <= 0 || all[off-1] != "" || off+2 >= len(all) || all[off+1] != "" || leadingSpace(all[off+2]) != "" {
570		return false
571	}
572
573	line = strings.TrimSpace(line)
574
575	// a heading must start with an uppercase letter
576	r, _ := utf8.DecodeRuneInString(line)
577	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
578		return false
579	}
580
581	// it must end in a letter or digit:
582	r, _ = utf8.DecodeLastRuneInString(line)
583	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
584		return false
585	}
586
587	// exclude lines with illegal characters. we allow "(),"
588	if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
589		return false
590	}
591
592	// allow "'" for possessive "'s" only
593	for b := line; ; {
594		var ok bool
595		if _, b, ok = strings.Cut(b, "'"); !ok {
596			break
597		}
598		if b != "s" && !strings.HasPrefix(b, "s ") {
599			return false // ' not followed by s and then end-of-word
600		}
601	}
602
603	// allow "." when followed by non-space
604	for b := line; ; {
605		var ok bool
606		if _, b, ok = strings.Cut(b, "."); !ok {
607			break
608		}
609		if b == "" || strings.HasPrefix(b, " ") {
610			return false // not followed by non-space
611		}
612	}
613
614	return true
615}
616
617// oldHeading returns the *Heading for the given old-style section heading line.
618func (d *parseDoc) oldHeading(line string) Block {
619	return &Heading{Text: []Text{Plain(strings.TrimSpace(line))}}
620}
621
622// isHeading reports whether line is a new-style section heading.
623func isHeading(line string) bool {
624	return len(line) >= 2 &&
625		line[0] == '#' &&
626		(line[1] == ' ' || line[1] == '\t') &&
627		strings.TrimSpace(line) != "#"
628}
629
630// heading returns the *Heading for the given new-style section heading line.
631func (d *parseDoc) heading(line string) Block {
632	return &Heading{Text: []Text{Plain(strings.TrimSpace(line[1:]))}}
633}
634
635// code returns a code block built from the lines.
636func (d *parseDoc) code(lines []string) *Code {
637	body := unindent(lines)
638	body = append(body, "") // to get final \n from Join
639	return &Code{Text: strings.Join(body, "\n")}
640}
641
642// paragraph returns a paragraph block built from the lines.
643// If the lines are link definitions, paragraph adds them to d and returns nil.
644func (d *parseDoc) paragraph(lines []string) Block {
645	// Is this a block of known links? Handle.
646	var defs []*LinkDef
647	for _, line := range lines {
648		def, ok := parseLink(line)
649		if !ok {
650			goto NoDefs
651		}
652		defs = append(defs, def)
653	}
654	for _, def := range defs {
655		d.Links = append(d.Links, def)
656		if d.links[def.Text] == nil {
657			d.links[def.Text] = def
658		}
659	}
660	return nil
661NoDefs:
662
663	return &Paragraph{Text: []Text{Plain(strings.Join(lines, "\n"))}}
664}
665
666// parseLink parses a single link definition line:
667//
668//	[text]: url
669//
670// It returns the link definition and whether the line was well formed.
671func parseLink(line string) (*LinkDef, bool) {
672	if line == "" || line[0] != '[' {
673		return nil, false
674	}
675	i := strings.Index(line, "]:")
676	if i < 0 || i+3 >= len(line) || (line[i+2] != ' ' && line[i+2] != '\t') {
677		return nil, false
678	}
679
680	text := line[1:i]
681	url := strings.TrimSpace(line[i+3:])
682	j := strings.Index(url, "://")
683	if j < 0 || !isScheme(url[:j]) {
684		return nil, false
685	}
686
687	// Line has right form and has valid scheme://.
688	// That's good enough for us - we are not as picky
689	// about the characters beyond the :// as we are
690	// when extracting inline URLs from text.
691	return &LinkDef{Text: text, URL: url}, true
692}
693
694// list returns a list built from the indented lines,
695// using forceBlankBefore as the value of the List's ForceBlankBefore field.
696func (d *parseDoc) list(lines []string, forceBlankBefore bool) *List {
697	num, _, _ := listMarker(lines[0])
698	var (
699		list *List = &List{ForceBlankBefore: forceBlankBefore}
700		item *ListItem
701		text []string
702	)
703	flush := func() {
704		if item != nil {
705			if para := d.paragraph(text); para != nil {
706				item.Content = append(item.Content, para)
707			}
708		}
709		text = nil
710	}
711
712	for _, line := range lines {
713		if n, after, ok := listMarker(line); ok && (n != "") == (num != "") {
714			// start new list item
715			flush()
716
717			item = &ListItem{Number: n}
718			list.Items = append(list.Items, item)
719			line = after
720		}
721		line = strings.TrimSpace(line)
722		if line == "" {
723			list.ForceBlankBetween = true
724			flush()
725			continue
726		}
727		text = append(text, strings.TrimSpace(line))
728	}
729	flush()
730	return list
731}
732
733// listMarker parses the line as beginning with a list marker.
734// If it can do that, it returns the numeric marker ("" for a bullet list),
735// the rest of the line, and ok == true.
736// Otherwise, it returns "", "", false.
737func listMarker(line string) (num, rest string, ok bool) {
738	line = strings.TrimSpace(line)
739	if line == "" {
740		return "", "", false
741	}
742
743	// Can we find a marker?
744	if r, n := utf8.DecodeRuneInString(line); r == '•' || r == '*' || r == '+' || r == '-' {
745		num, rest = "", line[n:]
746	} else if '0' <= line[0] && line[0] <= '9' {
747		n := 1
748		for n < len(line) && '0' <= line[n] && line[n] <= '9' {
749			n++
750		}
751		if n >= len(line) || (line[n] != '.' && line[n] != ')') {
752			return "", "", false
753		}
754		num, rest = line[:n], line[n+1:]
755	} else {
756		return "", "", false
757	}
758
759	if !indented(rest) || strings.TrimSpace(rest) == "" {
760		return "", "", false
761	}
762
763	return num, rest, true
764}
765
766// isList reports whether the line is the first line of a list,
767// meaning starts with a list marker after any indentation.
768// (The caller is responsible for checking the line is indented, as appropriate.)
769func isList(line string) bool {
770	_, _, ok := listMarker(line)
771	return ok
772}
773
774// parseLinkedText parses text that is allowed to contain explicit links,
775// such as [math.Sin] or [Go home page], into a slice of Text items.
776//
777// A “pkg” is only assumed to be a full import path if it starts with
778// a domain name (a path element with a dot) or is one of the packages
779// from the standard library (“[os]”, “[encoding/json]”, and so on).
780// To avoid problems with maps, generics, and array types, doc links
781// must be both preceded and followed by punctuation, spaces, tabs,
782// or the start or end of a line. An example problem would be treating
783// map[ast.Expr]TypeAndValue as containing a link.
784func (d *parseDoc) parseLinkedText(text string) []Text {
785	var out []Text
786	wrote := 0
787	flush := func(i int) {
788		if wrote < i {
789			out = d.parseText(out, text[wrote:i], true)
790			wrote = i
791		}
792	}
793
794	start := -1
795	var buf []byte
796	for i := 0; i < len(text); i++ {
797		c := text[i]
798		if c == '\n' || c == '\t' {
799			c = ' '
800		}
801		switch c {
802		case '[':
803			start = i
804		case ']':
805			if start >= 0 {
806				if def, ok := d.links[string(buf)]; ok {
807					def.Used = true
808					flush(start)
809					out = append(out, &Link{
810						Text: d.parseText(nil, text[start+1:i], false),
811						URL:  def.URL,
812					})
813					wrote = i + 1
814				} else if link, ok := d.docLink(text[start+1:i], text[:start], text[i+1:]); ok {
815					flush(start)
816					link.Text = d.parseText(nil, text[start+1:i], false)
817					out = append(out, link)
818					wrote = i + 1
819				}
820			}
821			start = -1
822			buf = buf[:0]
823		}
824		if start >= 0 && i != start {
825			buf = append(buf, c)
826		}
827	}
828
829	flush(len(text))
830	return out
831}
832
833// docLink parses text, which was found inside [ ] brackets,
834// as a doc link if possible, returning the DocLink and ok == true
835// or else nil, false.
836// The before and after strings are the text before the [ and after the ]
837// on the same line. Doc links must be preceded and followed by
838// punctuation, spaces, tabs, or the start or end of a line.
839func (d *parseDoc) docLink(text, before, after string) (link *DocLink, ok bool) {
840	if before != "" {
841		r, _ := utf8.DecodeLastRuneInString(before)
842		if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' {
843			return nil, false
844		}
845	}
846	if after != "" {
847		r, _ := utf8.DecodeRuneInString(after)
848		if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' {
849			return nil, false
850		}
851	}
852	text = strings.TrimPrefix(text, "*")
853	pkg, name, ok := splitDocName(text)
854	var recv string
855	if ok {
856		pkg, recv, _ = splitDocName(pkg)
857	}
858	if pkg != "" {
859		if pkg, ok = d.lookupPkg(pkg); !ok {
860			return nil, false
861		}
862	} else {
863		if ok = d.lookupSym(recv, name); !ok {
864			return nil, false
865		}
866	}
867	link = &DocLink{
868		ImportPath: pkg,
869		Recv:       recv,
870		Name:       name,
871	}
872	return link, true
873}
874
875// If text is of the form before.Name, where Name is a capitalized Go identifier,
876// then splitDocName returns before, name, true.
877// Otherwise it returns text, "", false.
878func splitDocName(text string) (before, name string, foundDot bool) {
879	i := strings.LastIndex(text, ".")
880	name = text[i+1:]
881	if !isName(name) {
882		return text, "", false
883	}
884	if i >= 0 {
885		before = text[:i]
886	}
887	return before, name, true
888}
889
890// parseText parses s as text and returns the result of appending
891// those parsed Text elements to out.
892// parseText does not handle explicit links like [math.Sin] or [Go home page]:
893// those are handled by parseLinkedText.
894// If autoLink is true, then parseText recognizes URLs and words from d.Words
895// and converts those to links as appropriate.
896func (d *parseDoc) parseText(out []Text, s string, autoLink bool) []Text {
897	var w strings.Builder
898	wrote := 0
899	writeUntil := func(i int) {
900		w.WriteString(s[wrote:i])
901		wrote = i
902	}
903	flush := func(i int) {
904		writeUntil(i)
905		if w.Len() > 0 {
906			out = append(out, Plain(w.String()))
907			w.Reset()
908		}
909	}
910	for i := 0; i < len(s); {
911		t := s[i:]
912		if autoLink {
913			if url, ok := autoURL(t); ok {
914				flush(i)
915				// Note: The old comment parser would look up the URL in words
916				// and replace the target with words[URL] if it was non-empty.
917				// That would allow creating links that display as one URL but
918				// when clicked go to a different URL. Not sure what the point
919				// of that is, so we're not doing that lookup here.
920				out = append(out, &Link{Auto: true, Text: []Text{Plain(url)}, URL: url})
921				i += len(url)
922				wrote = i
923				continue
924			}
925			if id, ok := ident(t); ok {
926				url, italics := d.Words[id]
927				if !italics {
928					i += len(id)
929					continue
930				}
931				flush(i)
932				if url == "" {
933					out = append(out, Italic(id))
934				} else {
935					out = append(out, &Link{Auto: true, Text: []Text{Italic(id)}, URL: url})
936				}
937				i += len(id)
938				wrote = i
939				continue
940			}
941		}
942		switch {
943		case strings.HasPrefix(t, "``"):
944			if len(t) >= 3 && t[2] == '`' {
945				// Do not convert `` inside ```, in case people are mistakenly writing Markdown.
946				i += 3
947				for i < len(t) && t[i] == '`' {
948					i++
949				}
950				break
951			}
952			writeUntil(i)
953			w.WriteRune('“')
954			i += 2
955			wrote = i
956		case strings.HasPrefix(t, "''"):
957			writeUntil(i)
958			w.WriteRune('”')
959			i += 2
960			wrote = i
961		default:
962			i++
963		}
964	}
965	flush(len(s))
966	return out
967}
968
969// autoURL checks whether s begins with a URL that should be hyperlinked.
970// If so, it returns the URL, which is a prefix of s, and ok == true.
971// Otherwise it returns "", false.
972// The caller should skip over the first len(url) bytes of s
973// before further processing.
974func autoURL(s string) (url string, ok bool) {
975	// Find the ://. Fast path to pick off non-URL,
976	// since we call this at every position in the string.
977	// The shortest possible URL is ftp://x, 7 bytes.
978	var i int
979	switch {
980	case len(s) < 7:
981		return "", false
982	case s[3] == ':':
983		i = 3
984	case s[4] == ':':
985		i = 4
986	case s[5] == ':':
987		i = 5
988	case s[6] == ':':
989		i = 6
990	default:
991		return "", false
992	}
993	if i+3 > len(s) || s[i:i+3] != "://" {
994		return "", false
995	}
996
997	// Check valid scheme.
998	if !isScheme(s[:i]) {
999		return "", false
1000	}
1001
1002	// Scan host part. Must have at least one byte,
1003	// and must start and end in non-punctuation.
1004	i += 3
1005	if i >= len(s) || !isHost(s[i]) || isPunct(s[i]) {
1006		return "", false
1007	}
1008	i++
1009	end := i
1010	for i < len(s) && isHost(s[i]) {
1011		if !isPunct(s[i]) {
1012			end = i + 1
1013		}
1014		i++
1015	}
1016	i = end
1017
1018	// At this point we are definitely returning a URL (scheme://host).
1019	// We just have to find the longest path we can add to it.
1020	// Heuristics abound.
1021	// We allow parens, braces, and brackets,
1022	// but only if they match (#5043, #22285).
1023	// We allow .,:;?! in the path but not at the end,
1024	// to avoid end-of-sentence punctuation (#18139, #16565).
1025	stk := []byte{}
1026	end = i
1027Path:
1028	for ; i < len(s); i++ {
1029		if isPunct(s[i]) {
1030			continue
1031		}
1032		if !isPath(s[i]) {
1033			break
1034		}
1035		switch s[i] {
1036		case '(':
1037			stk = append(stk, ')')
1038		case '{':
1039			stk = append(stk, '}')
1040		case '[':
1041			stk = append(stk, ']')
1042		case ')', '}', ']':
1043			if len(stk) == 0 || stk[len(stk)-1] != s[i] {
1044				break Path
1045			}
1046			stk = stk[:len(stk)-1]
1047		}
1048		if len(stk) == 0 {
1049			end = i + 1
1050		}
1051	}
1052
1053	return s[:end], true
1054}
1055
1056// isScheme reports whether s is a recognized URL scheme.
1057// Note that if strings of new length (beyond 3-7)
1058// are added here, the fast path at the top of autoURL will need updating.
1059func isScheme(s string) bool {
1060	switch s {
1061	case "file",
1062		"ftp",
1063		"gopher",
1064		"http",
1065		"https",
1066		"mailto",
1067		"nntp":
1068		return true
1069	}
1070	return false
1071}
1072
1073// isHost reports whether c is a byte that can appear in a URL host,
1074// like www.example.com or user@[::1]:8080
1075func isHost(c byte) bool {
1076	// mask is a 128-bit bitmap with 1s for allowed bytes,
1077	// so that the byte c can be tested with a shift and an and.
1078	// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
1079	// and this function will return false.
1080	const mask = 0 |
1081		(1<<26-1)<<'A' |
1082		(1<<26-1)<<'a' |
1083		(1<<10-1)<<'0' |
1084		1<<'_' |
1085		1<<'@' |
1086		1<<'-' |
1087		1<<'.' |
1088		1<<'[' |
1089		1<<']' |
1090		1<<':'
1091
1092	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
1093		(uint64(1)<<(c-64))&(mask>>64)) != 0
1094}
1095
1096// isPunct reports whether c is a punctuation byte that can appear
1097// inside a path but not at the end.
1098func isPunct(c byte) bool {
1099	// mask is a 128-bit bitmap with 1s for allowed bytes,
1100	// so that the byte c can be tested with a shift and an and.
1101	// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
1102	// and this function will return false.
1103	const mask = 0 |
1104		1<<'.' |
1105		1<<',' |
1106		1<<':' |
1107		1<<';' |
1108		1<<'?' |
1109		1<<'!'
1110
1111	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
1112		(uint64(1)<<(c-64))&(mask>>64)) != 0
1113}
1114
1115// isPath reports whether c is a (non-punctuation) path byte.
1116func isPath(c byte) bool {
1117	// mask is a 128-bit bitmap with 1s for allowed bytes,
1118	// so that the byte c can be tested with a shift and an and.
1119	// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
1120	// and this function will return false.
1121	const mask = 0 |
1122		(1<<26-1)<<'A' |
1123		(1<<26-1)<<'a' |
1124		(1<<10-1)<<'0' |
1125		1<<'$' |
1126		1<<'\'' |
1127		1<<'(' |
1128		1<<')' |
1129		1<<'*' |
1130		1<<'+' |
1131		1<<'&' |
1132		1<<'#' |
1133		1<<'=' |
1134		1<<'@' |
1135		1<<'~' |
1136		1<<'_' |
1137		1<<'/' |
1138		1<<'-' |
1139		1<<'[' |
1140		1<<']' |
1141		1<<'{' |
1142		1<<'}' |
1143		1<<'%'
1144
1145	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
1146		(uint64(1)<<(c-64))&(mask>>64)) != 0
1147}
1148
1149// isName reports whether s is a capitalized Go identifier (like Name).
1150func isName(s string) bool {
1151	t, ok := ident(s)
1152	if !ok || t != s {
1153		return false
1154	}
1155	r, _ := utf8.DecodeRuneInString(s)
1156	return unicode.IsUpper(r)
1157}
1158
1159// ident checks whether s begins with a Go identifier.
1160// If so, it returns the identifier, which is a prefix of s, and ok == true.
1161// Otherwise it returns "", false.
1162// The caller should skip over the first len(id) bytes of s
1163// before further processing.
1164func ident(s string) (id string, ok bool) {
1165	// Scan [\pL_][\pL_0-9]*
1166	n := 0
1167	for n < len(s) {
1168		if c := s[n]; c < utf8.RuneSelf {
1169			if isIdentASCII(c) && (n > 0 || c < '0' || c > '9') {
1170				n++
1171				continue
1172			}
1173			break
1174		}
1175		r, nr := utf8.DecodeRuneInString(s[n:])
1176		if unicode.IsLetter(r) {
1177			n += nr
1178			continue
1179		}
1180		break
1181	}
1182	return s[:n], n > 0
1183}
1184
1185// isIdentASCII reports whether c is an ASCII identifier byte.
1186func isIdentASCII(c byte) bool {
1187	// mask is a 128-bit bitmap with 1s for allowed bytes,
1188	// so that the byte c can be tested with a shift and an and.
1189	// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
1190	// and this function will return false.
1191	const mask = 0 |
1192		(1<<26-1)<<'A' |
1193		(1<<26-1)<<'a' |
1194		(1<<10-1)<<'0' |
1195		1<<'_'
1196
1197	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
1198		(uint64(1)<<(c-64))&(mask>>64)) != 0
1199}
1200
1201// validImportPath reports whether path is a valid import path.
1202// It is a lightly edited copy of golang.org/x/mod/module.CheckImportPath.
1203func validImportPath(path string) bool {
1204	if !utf8.ValidString(path) {
1205		return false
1206	}
1207	if path == "" {
1208		return false
1209	}
1210	if path[0] == '-' {
1211		return false
1212	}
1213	if strings.Contains(path, "//") {
1214		return false
1215	}
1216	if path[len(path)-1] == '/' {
1217		return false
1218	}
1219	elemStart := 0
1220	for i, r := range path {
1221		if r == '/' {
1222			if !validImportPathElem(path[elemStart:i]) {
1223				return false
1224			}
1225			elemStart = i + 1
1226		}
1227	}
1228	return validImportPathElem(path[elemStart:])
1229}
1230
1231func validImportPathElem(elem string) bool {
1232	if elem == "" || elem[0] == '.' || elem[len(elem)-1] == '.' {
1233		return false
1234	}
1235	for i := 0; i < len(elem); i++ {
1236		if !importPathOK(elem[i]) {
1237			return false
1238		}
1239	}
1240	return true
1241}
1242
1243func importPathOK(c byte) bool {
1244	// mask is a 128-bit bitmap with 1s for allowed bytes,
1245	// so that the byte c can be tested with a shift and an and.
1246	// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
1247	// and this function will return false.
1248	const mask = 0 |
1249		(1<<26-1)<<'A' |
1250		(1<<26-1)<<'a' |
1251		(1<<10-1)<<'0' |
1252		1<<'-' |
1253		1<<'.' |
1254		1<<'~' |
1255		1<<'_' |
1256		1<<'+'
1257
1258	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
1259		(uint64(1)<<(c-64))&(mask>>64)) != 0
1260}
1261