1// Copyright 2021 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package pkgbits
6
7import (
8	"encoding/binary"
9	"errors"
10	"fmt"
11	"go/constant"
12	"go/token"
13	"io"
14	"math/big"
15	"os"
16	"runtime"
17	"strings"
18)
19
20// A PkgDecoder provides methods for decoding a package's Unified IR
21// export data.
22type PkgDecoder struct {
23	// version is the file format version.
24	version uint32
25
26	// sync indicates whether the file uses sync markers.
27	sync bool
28
29	// pkgPath is the package path for the package to be decoded.
30	//
31	// TODO(mdempsky): Remove; unneeded since CL 391014.
32	pkgPath string
33
34	// elemData is the full data payload of the encoded package.
35	// Elements are densely and contiguously packed together.
36	//
37	// The last 8 bytes of elemData are the package fingerprint.
38	elemData string
39
40	// elemEnds stores the byte-offset end positions of element
41	// bitstreams within elemData.
42	//
43	// For example, element I's bitstream data starts at elemEnds[I-1]
44	// (or 0, if I==0) and ends at elemEnds[I].
45	//
46	// Note: elemEnds is indexed by absolute indices, not
47	// section-relative indices.
48	elemEnds []uint32
49
50	// elemEndsEnds stores the index-offset end positions of relocation
51	// sections within elemEnds.
52	//
53	// For example, section K's end positions start at elemEndsEnds[K-1]
54	// (or 0, if K==0) and end at elemEndsEnds[K].
55	elemEndsEnds [numRelocs]uint32
56
57	scratchRelocEnt []RelocEnt
58}
59
60// PkgPath returns the package path for the package
61//
62// TODO(mdempsky): Remove; unneeded since CL 391014.
63func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath }
64
65// SyncMarkers reports whether pr uses sync markers.
66func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync }
67
68// NewPkgDecoder returns a PkgDecoder initialized to read the Unified
69// IR export data from input. pkgPath is the package path for the
70// compilation unit that produced the export data.
71//
72// TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014.
73func NewPkgDecoder(pkgPath, input string) PkgDecoder {
74	pr := PkgDecoder{
75		pkgPath: pkgPath,
76	}
77
78	// TODO(mdempsky): Implement direct indexing of input string to
79	// avoid copying the position information.
80
81	r := strings.NewReader(input)
82
83	assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil)
84
85	switch pr.version {
86	default:
87		panic(fmt.Errorf("unsupported version: %v", pr.version))
88	case 0:
89		// no flags
90	case 1:
91		var flags uint32
92		assert(binary.Read(r, binary.LittleEndian, &flags) == nil)
93		pr.sync = flags&flagSyncMarkers != 0
94	}
95
96	assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil)
97
98	pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1])
99	assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil)
100
101	pos, err := r.Seek(0, io.SeekCurrent)
102	assert(err == nil)
103
104	pr.elemData = input[pos:]
105	assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1]))
106
107	return pr
108}
109
110// NumElems returns the number of elements in section k.
111func (pr *PkgDecoder) NumElems(k RelocKind) int {
112	count := int(pr.elemEndsEnds[k])
113	if k > 0 {
114		count -= int(pr.elemEndsEnds[k-1])
115	}
116	return count
117}
118
119// TotalElems returns the total number of elements across all sections.
120func (pr *PkgDecoder) TotalElems() int {
121	return len(pr.elemEnds)
122}
123
124// Fingerprint returns the package fingerprint.
125func (pr *PkgDecoder) Fingerprint() [8]byte {
126	var fp [8]byte
127	copy(fp[:], pr.elemData[len(pr.elemData)-8:])
128	return fp
129}
130
131// AbsIdx returns the absolute index for the given (section, index)
132// pair.
133func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int {
134	absIdx := int(idx)
135	if k > 0 {
136		absIdx += int(pr.elemEndsEnds[k-1])
137	}
138	if absIdx >= int(pr.elemEndsEnds[k]) {
139		errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds)
140	}
141	return absIdx
142}
143
144// DataIdx returns the raw element bitstream for the given (section,
145// index) pair.
146func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string {
147	absIdx := pr.AbsIdx(k, idx)
148
149	var start uint32
150	if absIdx > 0 {
151		start = pr.elemEnds[absIdx-1]
152	}
153	end := pr.elemEnds[absIdx]
154
155	return pr.elemData[start:end]
156}
157
158// StringIdx returns the string value for the given string index.
159func (pr *PkgDecoder) StringIdx(idx Index) string {
160	return pr.DataIdx(RelocString, idx)
161}
162
163// NewDecoder returns a Decoder for the given (section, index) pair,
164// and decodes the given SyncMarker from the element bitstream.
165func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
166	r := pr.NewDecoderRaw(k, idx)
167	r.Sync(marker)
168	return r
169}
170
171// TempDecoder returns a Decoder for the given (section, index) pair,
172// and decodes the given SyncMarker from the element bitstream.
173// If possible the Decoder should be RetireDecoder'd when it is no longer
174// needed, this will avoid heap allocations.
175func (pr *PkgDecoder) TempDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
176	r := pr.TempDecoderRaw(k, idx)
177	r.Sync(marker)
178	return r
179}
180
181func (pr *PkgDecoder) RetireDecoder(d *Decoder) {
182	pr.scratchRelocEnt = d.Relocs
183	d.Relocs = nil
184}
185
186// NewDecoderRaw returns a Decoder for the given (section, index) pair.
187//
188// Most callers should use NewDecoder instead.
189func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder {
190	r := Decoder{
191		common: pr,
192		k:      k,
193		Idx:    idx,
194	}
195
196	r.Data.Reset(pr.DataIdx(k, idx))
197	r.Sync(SyncRelocs)
198	r.Relocs = make([]RelocEnt, r.Len())
199	for i := range r.Relocs {
200		r.Sync(SyncReloc)
201		r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
202	}
203
204	return r
205}
206
207func (pr *PkgDecoder) TempDecoderRaw(k RelocKind, idx Index) Decoder {
208	r := Decoder{
209		common: pr,
210		k:      k,
211		Idx:    idx,
212	}
213
214	r.Data.Reset(pr.DataIdx(k, idx))
215	r.Sync(SyncRelocs)
216	l := r.Len()
217	if cap(pr.scratchRelocEnt) >= l {
218		r.Relocs = pr.scratchRelocEnt[:l]
219		pr.scratchRelocEnt = nil
220	} else {
221		r.Relocs = make([]RelocEnt, l)
222	}
223	for i := range r.Relocs {
224		r.Sync(SyncReloc)
225		r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
226	}
227
228	return r
229}
230
231// A Decoder provides methods for decoding an individual element's
232// bitstream data.
233type Decoder struct {
234	common *PkgDecoder
235
236	Relocs []RelocEnt
237	Data   strings.Reader
238
239	k   RelocKind
240	Idx Index
241}
242
243func (r *Decoder) checkErr(err error) {
244	if err != nil {
245		errorf("unexpected decoding error: %w", err)
246	}
247}
248
249func (r *Decoder) rawUvarint() uint64 {
250	x, err := readUvarint(&r.Data)
251	r.checkErr(err)
252	return x
253}
254
255// readUvarint is a type-specialized copy of encoding/binary.ReadUvarint.
256// This avoids the interface conversion and thus has better escape properties,
257// which flows up the stack.
258func readUvarint(r *strings.Reader) (uint64, error) {
259	var x uint64
260	var s uint
261	for i := 0; i < binary.MaxVarintLen64; i++ {
262		b, err := r.ReadByte()
263		if err != nil {
264			if i > 0 && err == io.EOF {
265				err = io.ErrUnexpectedEOF
266			}
267			return x, err
268		}
269		if b < 0x80 {
270			if i == binary.MaxVarintLen64-1 && b > 1 {
271				return x, overflow
272			}
273			return x | uint64(b)<<s, nil
274		}
275		x |= uint64(b&0x7f) << s
276		s += 7
277	}
278	return x, overflow
279}
280
281var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer")
282
283func (r *Decoder) rawVarint() int64 {
284	ux := r.rawUvarint()
285
286	// Zig-zag decode.
287	x := int64(ux >> 1)
288	if ux&1 != 0 {
289		x = ^x
290	}
291	return x
292}
293
294func (r *Decoder) rawReloc(k RelocKind, idx int) Index {
295	e := r.Relocs[idx]
296	assert(e.Kind == k)
297	return e.Idx
298}
299
300// Sync decodes a sync marker from the element bitstream and asserts
301// that it matches the expected marker.
302//
303// If EnableSync is false, then Sync is a no-op.
304func (r *Decoder) Sync(mWant SyncMarker) {
305	if !r.common.sync {
306		return
307	}
308
309	pos, _ := r.Data.Seek(0, io.SeekCurrent)
310	mHave := SyncMarker(r.rawUvarint())
311	writerPCs := make([]int, r.rawUvarint())
312	for i := range writerPCs {
313		writerPCs[i] = int(r.rawUvarint())
314	}
315
316	if mHave == mWant {
317		return
318	}
319
320	// There's some tension here between printing:
321	//
322	// (1) full file paths that tools can recognize (e.g., so emacs
323	//     hyperlinks the "file:line" text for easy navigation), or
324	//
325	// (2) short file paths that are easier for humans to read (e.g., by
326	//     omitting redundant or irrelevant details, so it's easier to
327	//     focus on the useful bits that remain).
328	//
329	// The current formatting favors the former, as it seems more
330	// helpful in practice. But perhaps the formatting could be improved
331	// to better address both concerns. For example, use relative file
332	// paths if they would be shorter, or rewrite file paths to contain
333	// "$GOROOT" (like objabi.AbsFile does) if tools can be taught how
334	// to reliably expand that again.
335
336	fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos)
337
338	fmt.Printf("\nfound %v, written at:\n", mHave)
339	if len(writerPCs) == 0 {
340		fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath)
341	}
342	for _, pc := range writerPCs {
343		fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc)))
344	}
345
346	fmt.Printf("\nexpected %v, reading at:\n", mWant)
347	var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size?
348	n := runtime.Callers(2, readerPCs[:])
349	for _, pc := range fmtFrames(readerPCs[:n]...) {
350		fmt.Printf("\t%s\n", pc)
351	}
352
353	// We already printed a stack trace for the reader, so now we can
354	// simply exit. Printing a second one with panic or base.Fatalf
355	// would just be noise.
356	os.Exit(1)
357}
358
359// Bool decodes and returns a bool value from the element bitstream.
360func (r *Decoder) Bool() bool {
361	r.Sync(SyncBool)
362	x, err := r.Data.ReadByte()
363	r.checkErr(err)
364	assert(x < 2)
365	return x != 0
366}
367
368// Int64 decodes and returns an int64 value from the element bitstream.
369func (r *Decoder) Int64() int64 {
370	r.Sync(SyncInt64)
371	return r.rawVarint()
372}
373
374// Int64 decodes and returns a uint64 value from the element bitstream.
375func (r *Decoder) Uint64() uint64 {
376	r.Sync(SyncUint64)
377	return r.rawUvarint()
378}
379
380// Len decodes and returns a non-negative int value from the element bitstream.
381func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v }
382
383// Int decodes and returns an int value from the element bitstream.
384func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v }
385
386// Uint decodes and returns a uint value from the element bitstream.
387func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v }
388
389// Code decodes a Code value from the element bitstream and returns
390// its ordinal value. It's the caller's responsibility to convert the
391// result to an appropriate Code type.
392//
393// TODO(mdempsky): Ideally this method would have signature "Code[T
394// Code] T" instead, but we don't allow generic methods and the
395// compiler can't depend on generics yet anyway.
396func (r *Decoder) Code(mark SyncMarker) int {
397	r.Sync(mark)
398	return r.Len()
399}
400
401// Reloc decodes a relocation of expected section k from the element
402// bitstream and returns an index to the referenced element.
403func (r *Decoder) Reloc(k RelocKind) Index {
404	r.Sync(SyncUseReloc)
405	return r.rawReloc(k, r.Len())
406}
407
408// String decodes and returns a string value from the element
409// bitstream.
410func (r *Decoder) String() string {
411	r.Sync(SyncString)
412	return r.common.StringIdx(r.Reloc(RelocString))
413}
414
415// Strings decodes and returns a variable-length slice of strings from
416// the element bitstream.
417func (r *Decoder) Strings() []string {
418	res := make([]string, r.Len())
419	for i := range res {
420		res[i] = r.String()
421	}
422	return res
423}
424
425// Value decodes and returns a constant.Value from the element
426// bitstream.
427func (r *Decoder) Value() constant.Value {
428	r.Sync(SyncValue)
429	isComplex := r.Bool()
430	val := r.scalar()
431	if isComplex {
432		val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar()))
433	}
434	return val
435}
436
437func (r *Decoder) scalar() constant.Value {
438	switch tag := CodeVal(r.Code(SyncVal)); tag {
439	default:
440		panic(fmt.Errorf("unexpected scalar tag: %v", tag))
441
442	case ValBool:
443		return constant.MakeBool(r.Bool())
444	case ValString:
445		return constant.MakeString(r.String())
446	case ValInt64:
447		return constant.MakeInt64(r.Int64())
448	case ValBigInt:
449		return constant.Make(r.bigInt())
450	case ValBigRat:
451		num := r.bigInt()
452		denom := r.bigInt()
453		return constant.Make(new(big.Rat).SetFrac(num, denom))
454	case ValBigFloat:
455		return constant.Make(r.bigFloat())
456	}
457}
458
459func (r *Decoder) bigInt() *big.Int {
460	v := new(big.Int).SetBytes([]byte(r.String()))
461	if r.Bool() {
462		v.Neg(v)
463	}
464	return v
465}
466
467func (r *Decoder) bigFloat() *big.Float {
468	v := new(big.Float).SetPrec(512)
469	assert(v.UnmarshalText([]byte(r.String())) == nil)
470	return v
471}
472
473// @@@ Helpers
474
475// TODO(mdempsky): These should probably be removed. I think they're a
476// smell that the export data format is not yet quite right.
477
478// PeekPkgPath returns the package path for the specified package
479// index.
480func (pr *PkgDecoder) PeekPkgPath(idx Index) string {
481	var path string
482	{
483		r := pr.TempDecoder(RelocPkg, idx, SyncPkgDef)
484		path = r.String()
485		pr.RetireDecoder(&r)
486	}
487	if path == "" {
488		path = pr.pkgPath
489	}
490	return path
491}
492
493// PeekObj returns the package path, object name, and CodeObj for the
494// specified object index.
495func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) {
496	var ridx Index
497	var name string
498	var rcode int
499	{
500		r := pr.TempDecoder(RelocName, idx, SyncObject1)
501		r.Sync(SyncSym)
502		r.Sync(SyncPkg)
503		ridx = r.Reloc(RelocPkg)
504		name = r.String()
505		rcode = r.Code(SyncCodeObj)
506		pr.RetireDecoder(&r)
507	}
508
509	path := pr.PeekPkgPath(ridx)
510	assert(name != "")
511
512	tag := CodeObj(rcode)
513
514	return path, name, tag
515}
516