1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package archive implements reading of archive files generated by the Go
6// toolchain.
7package archive
8
9import (
10	"bufio"
11	"bytes"
12	"cmd/internal/bio"
13	"cmd/internal/goobj"
14	"errors"
15	"fmt"
16	"io"
17	"log"
18	"os"
19	"strconv"
20	"strings"
21	"time"
22	"unicode/utf8"
23)
24
25/*
26The archive format is:
27
28First, on a line by itself
29	!<arch>
30
31Then zero or more file records. Each file record has a fixed-size one-line header
32followed by data bytes followed by an optional padding byte. The header is:
33
34	%-16s%-12d%-6d%-6d%-8o%-10d`
35	name mtime uid gid mode size
36
37(note the trailing backquote). The %-16s here means at most 16 *bytes* of
38the name, and if shorter, space padded on the right.
39*/
40
41// A Data is a reference to data stored in an object file.
42// It records the offset and size of the data, so that a client can
43// read the data only if necessary.
44type Data struct {
45	Offset int64
46	Size   int64
47}
48
49type Archive struct {
50	f       *os.File
51	Entries []Entry
52}
53
54func (a *Archive) File() *os.File { return a.f }
55
56type Entry struct {
57	Name  string
58	Type  EntryType
59	Mtime int64
60	Uid   int
61	Gid   int
62	Mode  os.FileMode
63	Data
64	Obj *GoObj // nil if this entry is not a Go object file
65}
66
67type EntryType int
68
69const (
70	EntryPkgDef EntryType = iota
71	EntryGoObj
72	EntryNativeObj
73	EntrySentinelNonObj
74)
75
76func (e *Entry) String() string {
77	return fmt.Sprintf("%s %6d/%-6d %12d %s %s",
78		(e.Mode & 0777).String(),
79		e.Uid,
80		e.Gid,
81		e.Size,
82		time.Unix(e.Mtime, 0).Format(timeFormat),
83		e.Name)
84}
85
86type GoObj struct {
87	TextHeader []byte
88	Arch       string
89	Data
90}
91
92const (
93	entryHeader = "%s%-12d%-6d%-6d%-8o%-10d`\n"
94	// In entryHeader the first entry, the name, is always printed as 16 bytes right-padded.
95	entryLen   = 16 + 12 + 6 + 6 + 8 + 10 + 1 + 1
96	timeFormat = "Jan _2 15:04 2006"
97)
98
99var (
100	archiveHeader = []byte("!<arch>\n")
101	archiveMagic  = []byte("`\n")
102	goobjHeader   = []byte("go objec") // truncated to size of archiveHeader
103
104	errCorruptArchive   = errors.New("corrupt archive")
105	errTruncatedArchive = errors.New("truncated archive")
106	errCorruptObject    = errors.New("corrupt object file")
107	errNotObject        = errors.New("unrecognized object file format")
108)
109
110type ErrGoObjOtherVersion struct{ magic []byte }
111
112func (e ErrGoObjOtherVersion) Error() string {
113	return fmt.Sprintf("go object of a different version: %q", e.magic)
114}
115
116// An objReader is an object file reader.
117type objReader struct {
118	a      *Archive
119	b      *bio.Reader
120	err    error
121	offset int64
122	limit  int64
123	tmp    [256]byte
124}
125
126func (r *objReader) init(f *os.File) {
127	r.a = &Archive{f, nil}
128	r.offset, _ = f.Seek(0, io.SeekCurrent)
129	r.limit, _ = f.Seek(0, io.SeekEnd)
130	f.Seek(r.offset, io.SeekStart)
131	r.b = bio.NewReader(f)
132}
133
134// error records that an error occurred.
135// It returns only the first error, so that an error
136// caused by an earlier error does not discard information
137// about the earlier error.
138func (r *objReader) error(err error) error {
139	if r.err == nil {
140		if err == io.EOF {
141			err = io.ErrUnexpectedEOF
142		}
143		r.err = err
144	}
145	// panic("corrupt") // useful for debugging
146	return r.err
147}
148
149// peek returns the next n bytes without advancing the reader.
150func (r *objReader) peek(n int) ([]byte, error) {
151	if r.err != nil {
152		return nil, r.err
153	}
154	if r.offset >= r.limit {
155		r.error(io.ErrUnexpectedEOF)
156		return nil, r.err
157	}
158	b, err := r.b.Peek(n)
159	if err != nil {
160		if err != bufio.ErrBufferFull {
161			r.error(err)
162		}
163	}
164	return b, err
165}
166
167// readByte reads and returns a byte from the input file.
168// On I/O error or EOF, it records the error but returns byte 0.
169// A sequence of 0 bytes will eventually terminate any
170// parsing state in the object file. In particular, it ends the
171// reading of a varint.
172func (r *objReader) readByte() byte {
173	if r.err != nil {
174		return 0
175	}
176	if r.offset >= r.limit {
177		r.error(io.ErrUnexpectedEOF)
178		return 0
179	}
180	b, err := r.b.ReadByte()
181	if err != nil {
182		if err == io.EOF {
183			err = io.ErrUnexpectedEOF
184		}
185		r.error(err)
186		b = 0
187	} else {
188		r.offset++
189	}
190	return b
191}
192
193// readFull reads exactly len(b) bytes from the input file.
194// If an error occurs, read returns the error but also
195// records it, so it is safe for callers to ignore the result
196// as long as delaying the report is not a problem.
197func (r *objReader) readFull(b []byte) error {
198	if r.err != nil {
199		return r.err
200	}
201	if r.offset+int64(len(b)) > r.limit {
202		return r.error(io.ErrUnexpectedEOF)
203	}
204	n, err := io.ReadFull(r.b, b)
205	r.offset += int64(n)
206	if err != nil {
207		return r.error(err)
208	}
209	return nil
210}
211
212// skip skips n bytes in the input.
213func (r *objReader) skip(n int64) {
214	if n < 0 {
215		r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
216	}
217	if n < int64(len(r.tmp)) {
218		// Since the data is so small, a just reading from the buffered
219		// reader is better than flushing the buffer and seeking.
220		r.readFull(r.tmp[:n])
221	} else if n <= int64(r.b.Buffered()) {
222		// Even though the data is not small, it has already been read.
223		// Advance the buffer instead of seeking.
224		for n > int64(len(r.tmp)) {
225			r.readFull(r.tmp[:])
226			n -= int64(len(r.tmp))
227		}
228		r.readFull(r.tmp[:n])
229	} else {
230		// Seek, giving up buffered data.
231		r.b.MustSeek(r.offset+n, io.SeekStart)
232		r.offset += n
233	}
234}
235
236// New writes to f to make a new archive.
237func New(f *os.File) (*Archive, error) {
238	_, err := f.Write(archiveHeader)
239	if err != nil {
240		return nil, err
241	}
242	return &Archive{f: f}, nil
243}
244
245// Parse parses an object file or archive from f.
246func Parse(f *os.File, verbose bool) (*Archive, error) {
247	var r objReader
248	r.init(f)
249	t, err := r.peek(8)
250	if err != nil {
251		if err == io.EOF {
252			err = io.ErrUnexpectedEOF
253		}
254		return nil, err
255	}
256
257	switch {
258	default:
259		return nil, errNotObject
260
261	case bytes.Equal(t, archiveHeader):
262		if err := r.parseArchive(verbose); err != nil {
263			return nil, err
264		}
265	case bytes.Equal(t, goobjHeader):
266		off := r.offset
267		o := &GoObj{}
268		if err := r.parseObject(o, r.limit-off); err != nil {
269			return nil, err
270		}
271		r.a.Entries = []Entry{{
272			Name: f.Name(),
273			Type: EntryGoObj,
274			Data: Data{off, r.limit - off},
275			Obj:  o,
276		}}
277	}
278
279	return r.a, nil
280}
281
282// trimSpace removes trailing spaces from b and returns the corresponding string.
283// This effectively parses the form used in archive headers.
284func trimSpace(b []byte) string {
285	return string(bytes.TrimRight(b, " "))
286}
287
288// parseArchive parses a Unix archive of Go object files.
289func (r *objReader) parseArchive(verbose bool) error {
290	r.readFull(r.tmp[:8]) // consume header (already checked)
291	for r.offset < r.limit {
292		if err := r.readFull(r.tmp[:60]); err != nil {
293			return err
294		}
295		data := r.tmp[:60]
296
297		// Each file is preceded by this text header (slice indices in first column):
298		//	 0:16	name
299		//	16:28 date
300		//	28:34 uid
301		//	34:40 gid
302		//	40:48 mode
303		//	48:58 size
304		//	58:60 magic - `\n
305		// We only care about name, size, and magic, unless in verbose mode.
306		// The fields are space-padded on the right.
307		// The size is in decimal.
308		// The file data - size bytes - follows the header.
309		// Headers are 2-byte aligned, so if size is odd, an extra padding
310		// byte sits between the file data and the next header.
311		// The file data that follows is padded to an even number of bytes:
312		// if size is odd, an extra padding byte is inserted betw the next header.
313		if len(data) < 60 {
314			return errTruncatedArchive
315		}
316		if !bytes.Equal(data[58:60], archiveMagic) {
317			return errCorruptArchive
318		}
319		name := trimSpace(data[0:16])
320		var err error
321		get := func(start, end, base, bitsize int) int64 {
322			if err != nil {
323				return 0
324			}
325			var v int64
326			v, err = strconv.ParseInt(trimSpace(data[start:end]), base, bitsize)
327			return v
328		}
329		size := get(48, 58, 10, 64)
330		var (
331			mtime    int64
332			uid, gid int
333			mode     os.FileMode
334		)
335		if verbose {
336			mtime = get(16, 28, 10, 64)
337			uid = int(get(28, 34, 10, 32))
338			gid = int(get(34, 40, 10, 32))
339			mode = os.FileMode(get(40, 48, 8, 32))
340		}
341		if err != nil {
342			return errCorruptArchive
343		}
344		data = data[60:]
345		fsize := size + size&1
346		if fsize < 0 || fsize < size {
347			return errCorruptArchive
348		}
349		switch name {
350		case "__.PKGDEF":
351			r.a.Entries = append(r.a.Entries, Entry{
352				Name:  name,
353				Type:  EntryPkgDef,
354				Mtime: mtime,
355				Uid:   uid,
356				Gid:   gid,
357				Mode:  mode,
358				Data:  Data{r.offset, size},
359			})
360			r.skip(size)
361		case "preferlinkext", "dynimportfail":
362			if size == 0 {
363				// These are not actual objects, but rather sentinel
364				// entries put into the archive by the Go command to
365				// be read by the linker. See #62036.
366				r.a.Entries = append(r.a.Entries, Entry{
367					Name:  name,
368					Type:  EntrySentinelNonObj,
369					Mtime: mtime,
370					Uid:   uid,
371					Gid:   gid,
372					Mode:  mode,
373					Data:  Data{r.offset, size},
374				})
375				break
376			}
377			fallthrough
378		default:
379			var typ EntryType
380			var o *GoObj
381			offset := r.offset
382			p, err := r.peek(8)
383			if err != nil {
384				return err
385			}
386			if bytes.Equal(p, goobjHeader) {
387				typ = EntryGoObj
388				o = &GoObj{}
389				err := r.parseObject(o, size)
390				if err != nil {
391					return err
392				}
393			} else {
394				typ = EntryNativeObj
395				r.skip(size)
396			}
397			r.a.Entries = append(r.a.Entries, Entry{
398				Name:  name,
399				Type:  typ,
400				Mtime: mtime,
401				Uid:   uid,
402				Gid:   gid,
403				Mode:  mode,
404				Data:  Data{offset, size},
405				Obj:   o,
406			})
407		}
408		if size&1 != 0 {
409			r.skip(1)
410		}
411	}
412	return nil
413}
414
415// parseObject parses a single Go object file.
416// The object file consists of a textual header ending in "\n!\n"
417// and then the part we want to parse begins.
418// The format of that part is defined in a comment at the top
419// of cmd/internal/goobj/objfile.go.
420func (r *objReader) parseObject(o *GoObj, size int64) error {
421	h := make([]byte, 0, 256)
422	var c1, c2, c3 byte
423	for {
424		c1, c2, c3 = c2, c3, r.readByte()
425		h = append(h, c3)
426		// The new export format can contain 0 bytes.
427		// Don't consider them errors, only look for r.err != nil.
428		if r.err != nil {
429			return errCorruptObject
430		}
431		if c1 == '\n' && c2 == '!' && c3 == '\n' {
432			break
433		}
434	}
435	o.TextHeader = h
436	hs := strings.Fields(string(h))
437	if len(hs) >= 4 {
438		o.Arch = hs[3]
439	}
440	o.Offset = r.offset
441	o.Size = size - int64(len(h))
442
443	p, err := r.peek(8)
444	if err != nil {
445		return err
446	}
447	if !bytes.Equal(p, []byte(goobj.Magic)) {
448		if bytes.HasPrefix(p, []byte("\x00go1")) && bytes.HasSuffix(p, []byte("ld")) {
449			return r.error(ErrGoObjOtherVersion{p[1:]}) // strip the \x00 byte
450		}
451		return r.error(errCorruptObject)
452	}
453	r.skip(o.Size)
454	return nil
455}
456
457// AddEntry adds an entry to the end of a, with the content from r.
458func (a *Archive) AddEntry(typ EntryType, name string, mtime int64, uid, gid int, mode os.FileMode, size int64, r io.Reader) {
459	off, err := a.f.Seek(0, io.SeekEnd)
460	if err != nil {
461		log.Fatal(err)
462	}
463	n, err := fmt.Fprintf(a.f, entryHeader, exactly16Bytes(name), mtime, uid, gid, mode, size)
464	if err != nil || n != entryLen {
465		log.Fatal("writing entry header: ", err)
466	}
467	n1, _ := io.CopyN(a.f, r, size)
468	if n1 != size {
469		log.Fatal(err)
470	}
471	if (off+size)&1 != 0 {
472		a.f.Write([]byte{0}) // pad to even byte
473	}
474	a.Entries = append(a.Entries, Entry{
475		Name:  name,
476		Type:  typ,
477		Mtime: mtime,
478		Uid:   uid,
479		Gid:   gid,
480		Mode:  mode,
481		Data:  Data{off + entryLen, size},
482	})
483}
484
485// exactly16Bytes truncates the string if necessary so it is at most 16 bytes long,
486// then pads the result with spaces to be exactly 16 bytes.
487// Fmt uses runes for its width calculation, but we need bytes in the entry header.
488func exactly16Bytes(s string) string {
489	for len(s) > 16 {
490		_, wid := utf8.DecodeLastRuneInString(s)
491		s = s[:len(s)-wid]
492	}
493	const sixteenSpaces = "                "
494	s += sixteenSpaces[:16-len(s)]
495	return s
496}
497
498// architecture-independent object file output
499const HeaderSize = 60
500
501func ReadHeader(b *bufio.Reader, name string) int {
502	var buf [HeaderSize]byte
503	if _, err := io.ReadFull(b, buf[:]); err != nil {
504		return -1
505	}
506	aname := strings.Trim(string(buf[0:16]), " ")
507	if !strings.HasPrefix(aname, name) {
508		return -1
509	}
510	asize := strings.Trim(string(buf[48:58]), " ")
511	i, _ := strconv.Atoi(asize)
512	return i
513}
514
515func FormatHeader(arhdr []byte, name string, size int64) {
516	copy(arhdr[:], fmt.Sprintf("%-16s%-12d%-6d%-6d%-8o%-10d`\n", name, 0, 0, 0, 0644, size))
517}
518