1// Copyright 2022 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package comment
6
7import (
8	"bytes"
9	"fmt"
10	"sort"
11	"strings"
12	"unicode/utf8"
13)
14
15// A textPrinter holds the state needed for printing a Doc as plain text.
16type textPrinter struct {
17	*Printer
18	long       strings.Builder
19	prefix     string
20	codePrefix string
21	width      int
22}
23
24// Text returns a textual formatting of the [Doc].
25// See the [Printer] documentation for ways to customize the text output.
26func (p *Printer) Text(d *Doc) []byte {
27	tp := &textPrinter{
28		Printer:    p,
29		prefix:     p.TextPrefix,
30		codePrefix: p.TextCodePrefix,
31		width:      p.TextWidth,
32	}
33	if tp.codePrefix == "" {
34		tp.codePrefix = p.TextPrefix + "\t"
35	}
36	if tp.width == 0 {
37		tp.width = 80 - utf8.RuneCountInString(tp.prefix)
38	}
39
40	var out bytes.Buffer
41	for i, x := range d.Content {
42		if i > 0 && blankBefore(x) {
43			out.WriteString(tp.prefix)
44			writeNL(&out)
45		}
46		tp.block(&out, x)
47	}
48	anyUsed := false
49	for _, def := range d.Links {
50		if def.Used {
51			anyUsed = true
52			break
53		}
54	}
55	if anyUsed {
56		writeNL(&out)
57		for _, def := range d.Links {
58			if def.Used {
59				fmt.Fprintf(&out, "[%s]: %s\n", def.Text, def.URL)
60			}
61		}
62	}
63	return out.Bytes()
64}
65
66// writeNL calls out.WriteByte('\n')
67// but first trims trailing spaces on the previous line.
68func writeNL(out *bytes.Buffer) {
69	// Trim trailing spaces.
70	data := out.Bytes()
71	n := 0
72	for n < len(data) && (data[len(data)-n-1] == ' ' || data[len(data)-n-1] == '\t') {
73		n++
74	}
75	if n > 0 {
76		out.Truncate(len(data) - n)
77	}
78	out.WriteByte('\n')
79}
80
81// block prints the block x to out.
82func (p *textPrinter) block(out *bytes.Buffer, x Block) {
83	switch x := x.(type) {
84	default:
85		fmt.Fprintf(out, "?%T\n", x)
86
87	case *Paragraph:
88		out.WriteString(p.prefix)
89		p.text(out, "", x.Text)
90
91	case *Heading:
92		out.WriteString(p.prefix)
93		out.WriteString("# ")
94		p.text(out, "", x.Text)
95
96	case *Code:
97		text := x.Text
98		for text != "" {
99			var line string
100			line, text, _ = strings.Cut(text, "\n")
101			if line != "" {
102				out.WriteString(p.codePrefix)
103				out.WriteString(line)
104			}
105			writeNL(out)
106		}
107
108	case *List:
109		loose := x.BlankBetween()
110		for i, item := range x.Items {
111			if i > 0 && loose {
112				out.WriteString(p.prefix)
113				writeNL(out)
114			}
115			out.WriteString(p.prefix)
116			out.WriteString(" ")
117			if item.Number == "" {
118				out.WriteString(" - ")
119			} else {
120				out.WriteString(item.Number)
121				out.WriteString(". ")
122			}
123			for i, blk := range item.Content {
124				const fourSpace = "    "
125				if i > 0 {
126					writeNL(out)
127					out.WriteString(p.prefix)
128					out.WriteString(fourSpace)
129				}
130				p.text(out, fourSpace, blk.(*Paragraph).Text)
131			}
132		}
133	}
134}
135
136// text prints the text sequence x to out.
137func (p *textPrinter) text(out *bytes.Buffer, indent string, x []Text) {
138	p.oneLongLine(&p.long, x)
139	words := strings.Fields(p.long.String())
140	p.long.Reset()
141
142	var seq []int
143	if p.width < 0 || len(words) == 0 {
144		seq = []int{0, len(words)} // one long line
145	} else {
146		seq = wrap(words, p.width-utf8.RuneCountInString(indent))
147	}
148	for i := 0; i+1 < len(seq); i++ {
149		if i > 0 {
150			out.WriteString(p.prefix)
151			out.WriteString(indent)
152		}
153		for j, w := range words[seq[i]:seq[i+1]] {
154			if j > 0 {
155				out.WriteString(" ")
156			}
157			out.WriteString(w)
158		}
159		writeNL(out)
160	}
161}
162
163// oneLongLine prints the text sequence x to out as one long line,
164// without worrying about line wrapping.
165// Explicit links have the [ ] dropped to improve readability.
166func (p *textPrinter) oneLongLine(out *strings.Builder, x []Text) {
167	for _, t := range x {
168		switch t := t.(type) {
169		case Plain:
170			out.WriteString(string(t))
171		case Italic:
172			out.WriteString(string(t))
173		case *Link:
174			p.oneLongLine(out, t.Text)
175		case *DocLink:
176			p.oneLongLine(out, t.Text)
177		}
178	}
179}
180
181// wrap wraps words into lines of at most max runes,
182// minimizing the sum of the squares of the leftover lengths
183// at the end of each line (except the last, of course),
184// with a preference for ending lines at punctuation (.,:;).
185//
186// The returned slice gives the indexes of the first words
187// on each line in the wrapped text with a final entry of len(words).
188// Thus the lines are words[seq[0]:seq[1]], words[seq[1]:seq[2]],
189// ..., words[seq[len(seq)-2]:seq[len(seq)-1]].
190//
191// The implementation runs in O(n log n) time, where n = len(words),
192// using the algorithm described in D. S. Hirschberg and L. L. Larmore,
193// “[The least weight subsequence problem],” FOCS 1985, pp. 137-143.
194//
195// [The least weight subsequence problem]: https://doi.org/10.1109/SFCS.1985.60
196func wrap(words []string, max int) (seq []int) {
197	// The algorithm requires that our scoring function be concave,
198	// meaning that for all i₀ ≤ i₁ < j₀ ≤ j₁,
199	// weight(i₀, j₀) + weight(i₁, j₁) ≤ weight(i₀, j₁) + weight(i₁, j₀).
200	//
201	// Our weights are two-element pairs [hi, lo]
202	// ordered by elementwise comparison.
203	// The hi entry counts the weight for lines that are longer than max,
204	// and the lo entry counts the weight for lines that are not.
205	// This forces the algorithm to first minimize the number of lines
206	// that are longer than max, which correspond to lines with
207	// single very long words. Having done that, it can move on to
208	// minimizing the lo score, which is more interesting.
209	//
210	// The lo score is the sum for each line of the square of the
211	// number of spaces remaining at the end of the line and a
212	// penalty of 64 given out for not ending the line in a
213	// punctuation character (.,:;).
214	// The penalty is somewhat arbitrarily chosen by trying
215	// different amounts and judging how nice the wrapped text looks.
216	// Roughly speaking, using 64 means that we are willing to
217	// end a line with eight blank spaces in order to end at a
218	// punctuation character, even if the next word would fit in
219	// those spaces.
220	//
221	// We care about ending in punctuation characters because
222	// it makes the text easier to skim if not too many sentences
223	// or phrases begin with a single word on the previous line.
224
225	// A score is the score (also called weight) for a given line.
226	// add and cmp add and compare scores.
227	type score struct {
228		hi int64
229		lo int64
230	}
231	add := func(s, t score) score { return score{s.hi + t.hi, s.lo + t.lo} }
232	cmp := func(s, t score) int {
233		switch {
234		case s.hi < t.hi:
235			return -1
236		case s.hi > t.hi:
237			return +1
238		case s.lo < t.lo:
239			return -1
240		case s.lo > t.lo:
241			return +1
242		}
243		return 0
244	}
245
246	// total[j] is the total number of runes
247	// (including separating spaces) in words[:j].
248	total := make([]int, len(words)+1)
249	total[0] = 0
250	for i, s := range words {
251		total[1+i] = total[i] + utf8.RuneCountInString(s) + 1
252	}
253
254	// weight returns weight(i, j).
255	weight := func(i, j int) score {
256		// On the last line, there is zero weight for being too short.
257		n := total[j] - 1 - total[i]
258		if j == len(words) && n <= max {
259			return score{0, 0}
260		}
261
262		// Otherwise the weight is the penalty plus the square of the number of
263		// characters remaining on the line or by which the line goes over.
264		// In the latter case, that value goes in the hi part of the score.
265		// (See note above.)
266		p := wrapPenalty(words[j-1])
267		v := int64(max-n) * int64(max-n)
268		if n > max {
269			return score{v, p}
270		}
271		return score{0, v + p}
272	}
273
274	// The rest of this function is “The Basic Algorithm” from
275	// Hirschberg and Larmore's conference paper,
276	// using the same names as in the paper.
277	f := []score{{0, 0}}
278	g := func(i, j int) score { return add(f[i], weight(i, j)) }
279
280	bridge := func(a, b, c int) bool {
281		k := c + sort.Search(len(words)+1-c, func(k int) bool {
282			k += c
283			return cmp(g(a, k), g(b, k)) > 0
284		})
285		if k > len(words) {
286			return true
287		}
288		return cmp(g(c, k), g(b, k)) <= 0
289	}
290
291	// d is a one-ended deque implemented as a slice.
292	d := make([]int, 1, len(words))
293	d[0] = 0
294	bestleft := make([]int, 1, len(words))
295	bestleft[0] = -1
296	for m := 1; m < len(words); m++ {
297		f = append(f, g(d[0], m))
298		bestleft = append(bestleft, d[0])
299		for len(d) > 1 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 {
300			d = d[1:] // “Retire”
301		}
302		for len(d) > 1 && bridge(d[len(d)-2], d[len(d)-1], m) {
303			d = d[:len(d)-1] // “Fire”
304		}
305		if cmp(g(m, len(words)), g(d[len(d)-1], len(words))) < 0 {
306			d = append(d, m) // “Hire”
307			// The next few lines are not in the paper but are necessary
308			// to handle two-word inputs correctly. It appears to be
309			// just a bug in the paper's pseudocode.
310			if len(d) == 2 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 {
311				d = d[1:]
312			}
313		}
314	}
315	bestleft = append(bestleft, d[0])
316
317	// Recover least weight sequence from bestleft.
318	n := 1
319	for m := len(words); m > 0; m = bestleft[m] {
320		n++
321	}
322	seq = make([]int, n)
323	for m := len(words); m > 0; m = bestleft[m] {
324		n--
325		seq[n] = m
326	}
327	return seq
328}
329
330// wrapPenalty is the penalty for inserting a line break after word s.
331func wrapPenalty(s string) int64 {
332	switch s[len(s)-1] {
333	case '.', ',', ':', ';':
334		return 0
335	}
336	return 64
337}
338