1// Copyright 2024 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package relnote 6 7import ( 8 "fmt" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 13 "golang.org/x/mod/module" 14 md "rsc.io/markdown" 15) 16 17// addSymbolLinks looks for text like [Buffer] and 18// [math.Max] and replaces them with links to standard library 19// symbols and packages. 20// It uses the given default package for links without a package. 21func addSymbolLinks(doc *md.Document, defaultPackage string) { 22 addSymbolLinksBlocks(doc.Blocks, defaultPackage) 23} 24 25func addSymbolLinksBlocks(bs []md.Block, defaultPackage string) { 26 for _, b := range bs { 27 addSymbolLinksBlock(b, defaultPackage) 28 } 29} 30 31func addSymbolLinksBlock(b md.Block, defaultPackage string) { 32 switch b := b.(type) { 33 case *md.Heading: 34 addSymbolLinksBlock(b.Text, defaultPackage) 35 case *md.Text: 36 b.Inline = addSymbolLinksInlines(b.Inline, defaultPackage) 37 case *md.List: 38 addSymbolLinksBlocks(b.Items, defaultPackage) 39 case *md.Item: 40 addSymbolLinksBlocks(b.Blocks, defaultPackage) 41 case *md.Paragraph: 42 addSymbolLinksBlock(b.Text, defaultPackage) 43 case *md.Quote: 44 addSymbolLinksBlocks(b.Blocks, defaultPackage) 45 // no links in these blocks 46 case *md.CodeBlock: 47 case *md.HTMLBlock: 48 case *md.Empty: 49 case *md.ThematicBreak: 50 default: 51 panic(fmt.Sprintf("unknown block type %T", b)) 52 } 53} 54 55// addSymbolLinksInlines looks for symbol links in the slice of inline markdown 56// elements. It returns a new slice of inline elements with links added. 57func addSymbolLinksInlines(ins []md.Inline, defaultPackage string) []md.Inline { 58 ins = splitAtBrackets(ins) 59 var res []md.Inline 60 for i := 0; i < len(ins); i++ { 61 if txt := symbolLinkText(i, ins); txt != "" { 62 link, ok := symbolLink(txt, defaultPackage) 63 if ok { 64 res = append(res, link) 65 i += 2 66 continue 67 } 68 } 69 70 // Handle inline elements with nested content. 71 switch in := ins[i].(type) { 72 case *md.Strong: 73 res = append(res, &md.Strong{ 74 Marker: in.Marker, 75 Inner: addSymbolLinksInlines(in.Inner, defaultPackage), 76 }) 77 78 case *md.Emph: 79 res = append(res, &md.Emph{ 80 Marker: in.Marker, 81 Inner: addSymbolLinksInlines(in.Inner, defaultPackage), 82 }) 83 // Currently we don't support Del nodes because we don't enable the Strikethrough 84 // extension. But this can't hurt. 85 case *md.Del: 86 res = append(res, &md.Del{ 87 Marker: in.Marker, 88 Inner: addSymbolLinksInlines(in.Inner, defaultPackage), 89 }) 90 // Don't look for links in anything else. 91 default: 92 res = append(res, in) 93 } 94 } 95 return res 96} 97 98// splitAtBrackets rewrites ins so that every '[' and ']' is the only character 99// of its Plain. 100// For example, the element 101// 102// [Plain("the [Buffer] is")] 103// 104// is rewritten to 105// 106// [Plain("the "), Plain("["), Plain("Buffer"), Plain("]"), Plain(" is")] 107// 108// This transformation simplifies looking for symbol links. 109func splitAtBrackets(ins []md.Inline) []md.Inline { 110 var res []md.Inline 111 for _, in := range ins { 112 if p, ok := in.(*md.Plain); ok { 113 text := p.Text 114 for len(text) > 0 { 115 i := strings.IndexAny(text, "[]") 116 // If there are no brackets, the remaining text is a single 117 // Plain and we are done. 118 if i < 0 { 119 res = append(res, &md.Plain{Text: text}) 120 break 121 } 122 // There is a bracket; make Plains for it and the text before it (if any). 123 if i > 0 { 124 res = append(res, &md.Plain{Text: text[:i]}) 125 } 126 res = append(res, &md.Plain{Text: text[i : i+1]}) 127 text = text[i+1:] 128 } 129 } else { 130 res = append(res, in) 131 } 132 } 133 return res 134} 135 136// symbolLinkText returns the text of a possible symbol link. 137// It is given a slice of Inline elements and an index into the slice. 138// If the index refers to a sequence of elements 139// 140// [Plain("["), Plain_or_Code(text), Plain("]")] 141// 142// and the brackets are adjacent to the right kind of runes for a link, then 143// symbolLinkText returns the text of the middle element. 144// Otherwise it returns the empty string. 145func symbolLinkText(i int, ins []md.Inline) string { 146 // plainText returns the text of ins[j] if it is a Plain element, or "" otherwise. 147 plainText := func(j int) string { 148 if j < 0 || j >= len(ins) { 149 return "" 150 } 151 if p, ok := ins[j].(*md.Plain); ok { 152 return p.Text 153 } 154 return "" 155 } 156 157 // ins[i] must be a "[". 158 if plainText(i) != "[" { 159 return "" 160 } 161 // The open bracket must be preceeded by a link-adjacent rune (or by nothing). 162 if t := plainText(i - 1); t != "" { 163 r, _ := utf8.DecodeLastRuneInString(t) 164 if !isLinkAdjacentRune(r) { 165 return "" 166 } 167 } 168 // The element after the next must be a ']'. 169 if plainText(i+2) != "]" { 170 return "" 171 } 172 // The ']' must be followed by a link-adjacent rune (or by nothing). 173 if t := plainText(i + 3); t != "" { 174 r, _ := utf8.DecodeRuneInString(t) 175 if !isLinkAdjacentRune(r) { 176 return "" 177 } 178 } 179 180 // ins[i+1] must be a Plain or a Code. 181 // Its text is the symbol to link to. 182 if i+1 >= len(ins) { 183 return "" 184 } 185 switch in := ins[i+1].(type) { 186 case *md.Plain: 187 return in.Text 188 case *md.Code: 189 return in.Text 190 default: 191 return "" 192 } 193} 194 195// symbolLink converts s into a Link and returns it and true, or nil and false if 196// s is not a valid link or is surrounded by runes that disqualify it from being 197// converted to a link. 198// 199// The argument s is the text between '[' and ']'. 200func symbolLink(s, defaultPackage string) (md.Inline, bool) { 201 pkg, sym, ok := splitRef(s) 202 if !ok { 203 return nil, false 204 } 205 if pkg == "" { 206 if defaultPackage == "" { 207 return nil, false 208 } 209 pkg = defaultPackage 210 } 211 if sym != "" { 212 sym = "#" + sym 213 } 214 return &md.Link{ 215 Inner: []md.Inline{&md.Code{Text: s}}, 216 URL: fmt.Sprintf("/pkg/%s%s", pkg, sym), 217 }, true 218} 219 220// isLinkAdjacentRune reports whether r can be adjacent to a symbol link. 221// The logic is the same as the go/doc/comment package. 222func isLinkAdjacentRune(r rune) bool { 223 return unicode.IsPunct(r) || r == ' ' || r == '\t' || r == '\n' 224} 225 226// splitRef splits s into a package and possibly a symbol. 227// Examples: 228// 229// splitRef("math.Max") => ("math", "Max", true) 230// splitRef("bytes.Buffer.String") => ("bytes", "Buffer.String", true) 231// splitRef("math") => ("math", "", true) 232func splitRef(s string) (pkg, name string, ok bool) { 233 s = strings.TrimPrefix(s, "*") 234 pkg, name, ok = splitDocName(s) 235 var recv string 236 if ok { 237 pkg, recv, _ = splitDocName(pkg) 238 } 239 if pkg != "" { 240 if err := module.CheckImportPath(pkg); err != nil { 241 return "", "", false 242 } 243 } 244 if recv != "" { 245 name = recv + "." + name 246 } 247 return pkg, name, true 248} 249 250// The following functions were copied from go/doc/comment/parse.go. 251 252// If text is of the form before.Name, where Name is a capitalized Go identifier, 253// then splitDocName returns before, name, true. 254// Otherwise it returns text, "", false. 255func splitDocName(text string) (before, name string, foundDot bool) { 256 i := strings.LastIndex(text, ".") 257 name = text[i+1:] 258 if !isName(name) { 259 return text, "", false 260 } 261 if i >= 0 { 262 before = text[:i] 263 } 264 return before, name, true 265} 266 267// isName reports whether s is a capitalized Go identifier (like Name). 268func isName(s string) bool { 269 t, ok := ident(s) 270 if !ok || t != s { 271 return false 272 } 273 r, _ := utf8.DecodeRuneInString(s) 274 return unicode.IsUpper(r) 275} 276 277// ident checks whether s begins with a Go identifier. 278// If so, it returns the identifier, which is a prefix of s, and ok == true. 279// Otherwise it returns "", false. 280// The caller should skip over the first len(id) bytes of s 281// before further processing. 282func ident(s string) (id string, ok bool) { 283 // Scan [\pL_][\pL_0-9]* 284 n := 0 285 for n < len(s) { 286 if c := s[n]; c < utf8.RuneSelf { 287 if isIdentASCII(c) && (n > 0 || c < '0' || c > '9') { 288 n++ 289 continue 290 } 291 break 292 } 293 r, nr := utf8.DecodeRuneInString(s[n:]) 294 if unicode.IsLetter(r) { 295 n += nr 296 continue 297 } 298 break 299 } 300 return s[:n], n > 0 301} 302 303// isIdentASCII reports whether c is an ASCII identifier byte. 304func isIdentASCII(c byte) bool { 305 // mask is a 128-bit bitmap with 1s for allowed bytes, 306 // so that the byte c can be tested with a shift and an and. 307 // If c > 128, then 1<<c and 1<<(c-64) will both be zero, 308 // and this function will return false. 309 const mask = 0 | 310 (1<<26-1)<<'A' | 311 (1<<26-1)<<'a' | 312 (1<<10-1)<<'0' | 313 1<<'_' 314 315 return ((uint64(1)<<c)&(mask&(1<<64-1)) | 316 (uint64(1)<<(c-64))&(mask>>64)) != 0 317} 318