1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package token
6
7import (
8	"cmp"
9	"fmt"
10	"slices"
11	"strconv"
12	"sync"
13	"sync/atomic"
14)
15
16// If debug is set, invalid offset and position values cause a panic
17// (go.dev/issue/57490).
18const debug = false
19
20// -----------------------------------------------------------------------------
21// Positions
22
23// Position describes an arbitrary source position
24// including the file, line, and column location.
25// A Position is valid if the line number is > 0.
26type Position struct {
27	Filename string // filename, if any
28	Offset   int    // offset, starting at 0
29	Line     int    // line number, starting at 1
30	Column   int    // column number, starting at 1 (byte count)
31}
32
33// IsValid reports whether the position is valid.
34func (pos *Position) IsValid() bool { return pos.Line > 0 }
35
36// String returns a string in one of several forms:
37//
38//	file:line:column    valid position with file name
39//	file:line           valid position with file name but no column (column == 0)
40//	line:column         valid position without file name
41//	line                valid position without file name and no column (column == 0)
42//	file                invalid position with file name
43//	-                   invalid position without file name
44func (pos Position) String() string {
45	s := pos.Filename
46	if pos.IsValid() {
47		if s != "" {
48			s += ":"
49		}
50		s += strconv.Itoa(pos.Line)
51		if pos.Column != 0 {
52			s += fmt.Sprintf(":%d", pos.Column)
53		}
54	}
55	if s == "" {
56		s = "-"
57	}
58	return s
59}
60
61// Pos is a compact encoding of a source position within a file set.
62// It can be converted into a [Position] for a more convenient, but much
63// larger, representation.
64//
65// The Pos value for a given file is a number in the range [base, base+size],
66// where base and size are specified when a file is added to the file set.
67// The difference between a Pos value and the corresponding file base
68// corresponds to the byte offset of that position (represented by the Pos value)
69// from the beginning of the file. Thus, the file base offset is the Pos value
70// representing the first byte in the file.
71//
72// To create the Pos value for a specific source offset (measured in bytes),
73// first add the respective file to the current file set using [FileSet.AddFile]
74// and then call [File.Pos](offset) for that file. Given a Pos value p
75// for a specific file set fset, the corresponding [Position] value is
76// obtained by calling fset.Position(p).
77//
78// Pos values can be compared directly with the usual comparison operators:
79// If two Pos values p and q are in the same file, comparing p and q is
80// equivalent to comparing the respective source file offsets. If p and q
81// are in different files, p < q is true if the file implied by p was added
82// to the respective file set before the file implied by q.
83type Pos int
84
85// The zero value for [Pos] is NoPos; there is no file and line information
86// associated with it, and NoPos.IsValid() is false. NoPos is always
87// smaller than any other [Pos] value. The corresponding [Position] value
88// for NoPos is the zero value for [Position].
89const NoPos Pos = 0
90
91// IsValid reports whether the position is valid.
92func (p Pos) IsValid() bool {
93	return p != NoPos
94}
95
96// -----------------------------------------------------------------------------
97// File
98
99// A File is a handle for a file belonging to a [FileSet].
100// A File has a name, size, and line offset table.
101type File struct {
102	name string // file name as provided to AddFile
103	base int    // Pos value range for this file is [base...base+size]
104	size int    // file size as provided to AddFile
105
106	// lines and infos are protected by mutex
107	mutex sync.Mutex
108	lines []int // lines contains the offset of the first character for each line (the first entry is always 0)
109	infos []lineInfo
110}
111
112// Name returns the file name of file f as registered with AddFile.
113func (f *File) Name() string {
114	return f.name
115}
116
117// Base returns the base offset of file f as registered with AddFile.
118func (f *File) Base() int {
119	return f.base
120}
121
122// Size returns the size of file f as registered with AddFile.
123func (f *File) Size() int {
124	return f.size
125}
126
127// LineCount returns the number of lines in file f.
128func (f *File) LineCount() int {
129	f.mutex.Lock()
130	n := len(f.lines)
131	f.mutex.Unlock()
132	return n
133}
134
135// AddLine adds the line offset for a new line.
136// The line offset must be larger than the offset for the previous line
137// and smaller than the file size; otherwise the line offset is ignored.
138func (f *File) AddLine(offset int) {
139	f.mutex.Lock()
140	if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size {
141		f.lines = append(f.lines, offset)
142	}
143	f.mutex.Unlock()
144}
145
146// MergeLine merges a line with the following line. It is akin to replacing
147// the newline character at the end of the line with a space (to not change the
148// remaining offsets). To obtain the line number, consult e.g. [Position.Line].
149// MergeLine will panic if given an invalid line number.
150func (f *File) MergeLine(line int) {
151	if line < 1 {
152		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
153	}
154	f.mutex.Lock()
155	defer f.mutex.Unlock()
156	if line >= len(f.lines) {
157		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
158	}
159	// To merge the line numbered <line> with the line numbered <line+1>,
160	// we need to remove the entry in lines corresponding to the line
161	// numbered <line+1>. The entry in lines corresponding to the line
162	// numbered <line+1> is located at index <line>, since indices in lines
163	// are 0-based and line numbers are 1-based.
164	copy(f.lines[line:], f.lines[line+1:])
165	f.lines = f.lines[:len(f.lines)-1]
166}
167
168// Lines returns the effective line offset table of the form described by [File.SetLines].
169// Callers must not mutate the result.
170func (f *File) Lines() []int {
171	f.mutex.Lock()
172	lines := f.lines
173	f.mutex.Unlock()
174	return lines
175}
176
177// SetLines sets the line offsets for a file and reports whether it succeeded.
178// The line offsets are the offsets of the first character of each line;
179// for instance for the content "ab\nc\n" the line offsets are {0, 3}.
180// An empty file has an empty line offset table.
181// Each line offset must be larger than the offset for the previous line
182// and smaller than the file size; otherwise SetLines fails and returns
183// false.
184// Callers must not mutate the provided slice after SetLines returns.
185func (f *File) SetLines(lines []int) bool {
186	// verify validity of lines table
187	size := f.size
188	for i, offset := range lines {
189		if i > 0 && offset <= lines[i-1] || size <= offset {
190			return false
191		}
192	}
193
194	// set lines table
195	f.mutex.Lock()
196	f.lines = lines
197	f.mutex.Unlock()
198	return true
199}
200
201// SetLinesForContent sets the line offsets for the given file content.
202// It ignores position-altering //line comments.
203func (f *File) SetLinesForContent(content []byte) {
204	var lines []int
205	line := 0
206	for offset, b := range content {
207		if line >= 0 {
208			lines = append(lines, line)
209		}
210		line = -1
211		if b == '\n' {
212			line = offset + 1
213		}
214	}
215
216	// set lines table
217	f.mutex.Lock()
218	f.lines = lines
219	f.mutex.Unlock()
220}
221
222// LineStart returns the [Pos] value of the start of the specified line.
223// It ignores any alternative positions set using [File.AddLineColumnInfo].
224// LineStart panics if the 1-based line number is invalid.
225func (f *File) LineStart(line int) Pos {
226	if line < 1 {
227		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
228	}
229	f.mutex.Lock()
230	defer f.mutex.Unlock()
231	if line > len(f.lines) {
232		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
233	}
234	return Pos(f.base + f.lines[line-1])
235}
236
237// A lineInfo object describes alternative file, line, and column
238// number information (such as provided via a //line directive)
239// for a given file offset.
240type lineInfo struct {
241	// fields are exported to make them accessible to gob
242	Offset       int
243	Filename     string
244	Line, Column int
245}
246
247// AddLineInfo is like [File.AddLineColumnInfo] with a column = 1 argument.
248// It is here for backward-compatibility for code prior to Go 1.11.
249func (f *File) AddLineInfo(offset int, filename string, line int) {
250	f.AddLineColumnInfo(offset, filename, line, 1)
251}
252
253// AddLineColumnInfo adds alternative file, line, and column number
254// information for a given file offset. The offset must be larger
255// than the offset for the previously added alternative line info
256// and smaller than the file size; otherwise the information is
257// ignored.
258//
259// AddLineColumnInfo is typically used to register alternative position
260// information for line directives such as //line filename:line:column.
261func (f *File) AddLineColumnInfo(offset int, filename string, line, column int) {
262	f.mutex.Lock()
263	if i := len(f.infos); (i == 0 || f.infos[i-1].Offset < offset) && offset < f.size {
264		f.infos = append(f.infos, lineInfo{offset, filename, line, column})
265	}
266	f.mutex.Unlock()
267}
268
269// fixOffset fixes an out-of-bounds offset such that 0 <= offset <= f.size.
270func (f *File) fixOffset(offset int) int {
271	switch {
272	case offset < 0:
273		if !debug {
274			return 0
275		}
276	case offset > f.size:
277		if !debug {
278			return f.size
279		}
280	default:
281		return offset
282	}
283
284	// only generate this code if needed
285	if debug {
286		panic(fmt.Sprintf("offset %d out of bounds [%d, %d] (position %d out of bounds [%d, %d])",
287			0 /* for symmetry */, offset, f.size,
288			f.base+offset, f.base, f.base+f.size))
289	}
290	return 0
291}
292
293// Pos returns the Pos value for the given file offset.
294//
295// If offset is negative, the result is the file's start
296// position; if the offset is too large, the result is
297// the file's end position (see also go.dev/issue/57490).
298//
299// The following invariant, though not true for Pos values
300// in general, holds for the result p:
301// f.Pos(f.Offset(p)) == p.
302func (f *File) Pos(offset int) Pos {
303	return Pos(f.base + f.fixOffset(offset))
304}
305
306// Offset returns the offset for the given file position p.
307//
308// If p is before the file's start position (or if p is NoPos),
309// the result is 0; if p is past the file's end position, the
310// the result is the file size (see also go.dev/issue/57490).
311//
312// The following invariant, though not true for offset values
313// in general, holds for the result offset:
314// f.Offset(f.Pos(offset)) == offset
315func (f *File) Offset(p Pos) int {
316	return f.fixOffset(int(p) - f.base)
317}
318
319// Line returns the line number for the given file position p;
320// p must be a [Pos] value in that file or [NoPos].
321func (f *File) Line(p Pos) int {
322	return f.Position(p).Line
323}
324
325func searchLineInfos(a []lineInfo, x int) int {
326	i, found := slices.BinarySearchFunc(a, x, func(a lineInfo, x int) int {
327		return cmp.Compare(a.Offset, x)
328	})
329	if !found {
330		// We want the lineInfo containing x, but if we didn't
331		// find x then i is the next one.
332		i--
333	}
334	return i
335}
336
337// unpack returns the filename and line and column number for a file offset.
338// If adjusted is set, unpack will return the filename and line information
339// possibly adjusted by //line comments; otherwise those comments are ignored.
340func (f *File) unpack(offset int, adjusted bool) (filename string, line, column int) {
341	f.mutex.Lock()
342	filename = f.name
343	if i := searchInts(f.lines, offset); i >= 0 {
344		line, column = i+1, offset-f.lines[i]+1
345	}
346	if adjusted && len(f.infos) > 0 {
347		// few files have extra line infos
348		if i := searchLineInfos(f.infos, offset); i >= 0 {
349			alt := &f.infos[i]
350			filename = alt.Filename
351			if i := searchInts(f.lines, alt.Offset); i >= 0 {
352				// i+1 is the line at which the alternative position was recorded
353				d := line - (i + 1) // line distance from alternative position base
354				line = alt.Line + d
355				if alt.Column == 0 {
356					// alternative column is unknown => relative column is unknown
357					// (the current specification for line directives requires
358					// this to apply until the next PosBase/line directive,
359					// not just until the new newline)
360					column = 0
361				} else if d == 0 {
362					// the alternative position base is on the current line
363					// => column is relative to alternative column
364					column = alt.Column + (offset - alt.Offset)
365				}
366			}
367		}
368	}
369	// TODO(mvdan): move Unlock back under Lock with a defer statement once
370	// https://go.dev/issue/38471 is fixed to remove the performance penalty.
371	f.mutex.Unlock()
372	return
373}
374
375func (f *File) position(p Pos, adjusted bool) (pos Position) {
376	offset := f.fixOffset(int(p) - f.base)
377	pos.Offset = offset
378	pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted)
379	return
380}
381
382// PositionFor returns the Position value for the given file position p.
383// If p is out of bounds, it is adjusted to match the File.Offset behavior.
384// If adjusted is set, the position may be adjusted by position-altering
385// //line comments; otherwise those comments are ignored.
386// p must be a Pos value in f or NoPos.
387func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) {
388	if p != NoPos {
389		pos = f.position(p, adjusted)
390	}
391	return
392}
393
394// Position returns the Position value for the given file position p.
395// If p is out of bounds, it is adjusted to match the File.Offset behavior.
396// Calling f.Position(p) is equivalent to calling f.PositionFor(p, true).
397func (f *File) Position(p Pos) (pos Position) {
398	return f.PositionFor(p, true)
399}
400
401// -----------------------------------------------------------------------------
402// FileSet
403
404// A FileSet represents a set of source files.
405// Methods of file sets are synchronized; multiple goroutines
406// may invoke them concurrently.
407//
408// The byte offsets for each file in a file set are mapped into
409// distinct (integer) intervals, one interval [base, base+size]
410// per file. [FileSet.Base] represents the first byte in the file, and size
411// is the corresponding file size. A [Pos] value is a value in such
412// an interval. By determining the interval a [Pos] value belongs
413// to, the file, its file base, and thus the byte offset (position)
414// the [Pos] value is representing can be computed.
415//
416// When adding a new file, a file base must be provided. That can
417// be any integer value that is past the end of any interval of any
418// file already in the file set. For convenience, [FileSet.Base] provides
419// such a value, which is simply the end of the Pos interval of the most
420// recently added file, plus one. Unless there is a need to extend an
421// interval later, using the [FileSet.Base] should be used as argument
422// for [FileSet.AddFile].
423//
424// A [File] may be removed from a FileSet when it is no longer needed.
425// This may reduce memory usage in a long-running application.
426type FileSet struct {
427	mutex sync.RWMutex         // protects the file set
428	base  int                  // base offset for the next file
429	files []*File              // list of files in the order added to the set
430	last  atomic.Pointer[File] // cache of last file looked up
431}
432
433// NewFileSet creates a new file set.
434func NewFileSet() *FileSet {
435	return &FileSet{
436		base: 1, // 0 == NoPos
437	}
438}
439
440// Base returns the minimum base offset that must be provided to
441// [FileSet.AddFile] when adding the next file.
442func (s *FileSet) Base() int {
443	s.mutex.RLock()
444	b := s.base
445	s.mutex.RUnlock()
446	return b
447}
448
449// AddFile adds a new file with a given filename, base offset, and file size
450// to the file set s and returns the file. Multiple files may have the same
451// name. The base offset must not be smaller than the [FileSet.Base], and
452// size must not be negative. As a special case, if a negative base is provided,
453// the current value of the [FileSet.Base] is used instead.
454//
455// Adding the file will set the file set's [FileSet.Base] value to base + size + 1
456// as the minimum base value for the next file. The following relationship
457// exists between a [Pos] value p for a given file offset offs:
458//
459//	int(p) = base + offs
460//
461// with offs in the range [0, size] and thus p in the range [base, base+size].
462// For convenience, [File.Pos] may be used to create file-specific position
463// values from a file offset.
464func (s *FileSet) AddFile(filename string, base, size int) *File {
465	// Allocate f outside the critical section.
466	f := &File{name: filename, size: size, lines: []int{0}}
467
468	s.mutex.Lock()
469	defer s.mutex.Unlock()
470	if base < 0 {
471		base = s.base
472	}
473	if base < s.base {
474		panic(fmt.Sprintf("invalid base %d (should be >= %d)", base, s.base))
475	}
476	f.base = base
477	if size < 0 {
478		panic(fmt.Sprintf("invalid size %d (should be >= 0)", size))
479	}
480	// base >= s.base && size >= 0
481	base += size + 1 // +1 because EOF also has a position
482	if base < 0 {
483		panic("token.Pos offset overflow (> 2G of source code in file set)")
484	}
485	// add the file to the file set
486	s.base = base
487	s.files = append(s.files, f)
488	s.last.Store(f)
489	return f
490}
491
492// RemoveFile removes a file from the [FileSet] so that subsequent
493// queries for its [Pos] interval yield a negative result.
494// This reduces the memory usage of a long-lived [FileSet] that
495// encounters an unbounded stream of files.
496//
497// Removing a file that does not belong to the set has no effect.
498func (s *FileSet) RemoveFile(file *File) {
499	s.last.CompareAndSwap(file, nil) // clear last file cache
500
501	s.mutex.Lock()
502	defer s.mutex.Unlock()
503
504	if i := searchFiles(s.files, file.base); i >= 0 && s.files[i] == file {
505		last := &s.files[len(s.files)-1]
506		s.files = append(s.files[:i], s.files[i+1:]...)
507		*last = nil // don't prolong lifetime when popping last element
508	}
509}
510
511// Iterate calls f for the files in the file set in the order they were added
512// until f returns false.
513func (s *FileSet) Iterate(f func(*File) bool) {
514	for i := 0; ; i++ {
515		var file *File
516		s.mutex.RLock()
517		if i < len(s.files) {
518			file = s.files[i]
519		}
520		s.mutex.RUnlock()
521		if file == nil || !f(file) {
522			break
523		}
524	}
525}
526
527func searchFiles(a []*File, x int) int {
528	i, found := slices.BinarySearchFunc(a, x, func(a *File, x int) int {
529		return cmp.Compare(a.base, x)
530	})
531	if !found {
532		// We want the File containing x, but if we didn't
533		// find x then i is the next one.
534		i--
535	}
536	return i
537}
538
539func (s *FileSet) file(p Pos) *File {
540	// common case: p is in last file.
541	if f := s.last.Load(); f != nil && f.base <= int(p) && int(p) <= f.base+f.size {
542		return f
543	}
544
545	s.mutex.RLock()
546	defer s.mutex.RUnlock()
547
548	// p is not in last file - search all files
549	if i := searchFiles(s.files, int(p)); i >= 0 {
550		f := s.files[i]
551		// f.base <= int(p) by definition of searchFiles
552		if int(p) <= f.base+f.size {
553			// Update cache of last file. A race is ok,
554			// but an exclusive lock causes heavy contention.
555			s.last.Store(f)
556			return f
557		}
558	}
559	return nil
560}
561
562// File returns the file that contains the position p.
563// If no such file is found (for instance for p == [NoPos]),
564// the result is nil.
565func (s *FileSet) File(p Pos) (f *File) {
566	if p != NoPos {
567		f = s.file(p)
568	}
569	return
570}
571
572// PositionFor converts a [Pos] p in the fileset into a [Position] value.
573// If adjusted is set, the position may be adjusted by position-altering
574// //line comments; otherwise those comments are ignored.
575// p must be a [Pos] value in s or [NoPos].
576func (s *FileSet) PositionFor(p Pos, adjusted bool) (pos Position) {
577	if p != NoPos {
578		if f := s.file(p); f != nil {
579			return f.position(p, adjusted)
580		}
581	}
582	return
583}
584
585// Position converts a [Pos] p in the fileset into a Position value.
586// Calling s.Position(p) is equivalent to calling s.PositionFor(p, true).
587func (s *FileSet) Position(p Pos) (pos Position) {
588	return s.PositionFor(p, true)
589}
590
591// -----------------------------------------------------------------------------
592// Helper functions
593
594func searchInts(a []int, x int) int {
595	// This function body is a manually inlined version of:
596	//
597	//   return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1
598	//
599	// With better compiler optimizations, this may not be needed in the
600	// future, but at the moment this change improves the go/printer
601	// benchmark performance by ~30%. This has a direct impact on the
602	// speed of gofmt and thus seems worthwhile (2011-04-29).
603	// TODO(gri): Remove this when compilers have caught up.
604	i, j := 0, len(a)
605	for i < j {
606		h := int(uint(i+j) >> 1) // avoid overflow when computing h
607		// i ≤ h < j
608		if a[h] <= x {
609			i = h + 1
610		} else {
611			j = h
612		}
613	}
614	return i - 1
615}
616