1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5/*
6Package pe implements access to PE (Microsoft Windows Portable Executable) files.
7
8# Security
9
10This package is not designed to be hardened against adversarial inputs, and is
11outside the scope of https://go.dev/security/policy. In particular, only basic
12validation is done when parsing object files. As such, care should be taken when
13parsing untrusted inputs, as parsing malformed files may consume significant
14resources, or cause panics.
15*/
16package pe
17
18import (
19	"bytes"
20	"compress/zlib"
21	"debug/dwarf"
22	"encoding/binary"
23	"errors"
24	"fmt"
25	"io"
26	"os"
27	"strings"
28)
29
30// A File represents an open PE file.
31type File struct {
32	FileHeader
33	OptionalHeader any // of type *OptionalHeader32 or *OptionalHeader64
34	Sections       []*Section
35	Symbols        []*Symbol    // COFF symbols with auxiliary symbol records removed
36	COFFSymbols    []COFFSymbol // all COFF symbols (including auxiliary symbol records)
37	StringTable    StringTable
38
39	closer io.Closer
40}
41
42// Open opens the named file using [os.Open] and prepares it for use as a PE binary.
43func Open(name string) (*File, error) {
44	f, err := os.Open(name)
45	if err != nil {
46		return nil, err
47	}
48	ff, err := NewFile(f)
49	if err != nil {
50		f.Close()
51		return nil, err
52	}
53	ff.closer = f
54	return ff, nil
55}
56
57// Close closes the [File].
58// If the [File] was created using [NewFile] directly instead of [Open],
59// Close has no effect.
60func (f *File) Close() error {
61	var err error
62	if f.closer != nil {
63		err = f.closer.Close()
64		f.closer = nil
65	}
66	return err
67}
68
69// TODO(brainman): add Load function, as a replacement for NewFile, that does not call removeAuxSymbols (for performance)
70
71// NewFile creates a new [File] for accessing a PE binary in an underlying reader.
72func NewFile(r io.ReaderAt) (*File, error) {
73	f := new(File)
74	sr := io.NewSectionReader(r, 0, 1<<63-1)
75
76	var dosheader [96]byte
77	if _, err := r.ReadAt(dosheader[0:], 0); err != nil {
78		return nil, err
79	}
80	var base int64
81	if dosheader[0] == 'M' && dosheader[1] == 'Z' {
82		signoff := int64(binary.LittleEndian.Uint32(dosheader[0x3c:]))
83		var sign [4]byte
84		r.ReadAt(sign[:], signoff)
85		if !(sign[0] == 'P' && sign[1] == 'E' && sign[2] == 0 && sign[3] == 0) {
86			return nil, fmt.Errorf("invalid PE file signature: % x", sign)
87		}
88		base = signoff + 4
89	} else {
90		base = int64(0)
91	}
92	sr.Seek(base, io.SeekStart)
93	if err := binary.Read(sr, binary.LittleEndian, &f.FileHeader); err != nil {
94		return nil, err
95	}
96	switch f.FileHeader.Machine {
97	case IMAGE_FILE_MACHINE_AMD64,
98		IMAGE_FILE_MACHINE_ARM64,
99		IMAGE_FILE_MACHINE_ARMNT,
100		IMAGE_FILE_MACHINE_I386,
101		IMAGE_FILE_MACHINE_RISCV32,
102		IMAGE_FILE_MACHINE_RISCV64,
103		IMAGE_FILE_MACHINE_RISCV128,
104		IMAGE_FILE_MACHINE_UNKNOWN:
105		// ok
106	default:
107		return nil, fmt.Errorf("unrecognized PE machine: %#x", f.FileHeader.Machine)
108	}
109
110	var err error
111
112	// Read string table.
113	f.StringTable, err = readStringTable(&f.FileHeader, sr)
114	if err != nil {
115		return nil, err
116	}
117
118	// Read symbol table.
119	f.COFFSymbols, err = readCOFFSymbols(&f.FileHeader, sr)
120	if err != nil {
121		return nil, err
122	}
123	f.Symbols, err = removeAuxSymbols(f.COFFSymbols, f.StringTable)
124	if err != nil {
125		return nil, err
126	}
127
128	// Seek past file header.
129	_, err = sr.Seek(base+int64(binary.Size(f.FileHeader)), io.SeekStart)
130	if err != nil {
131		return nil, err
132	}
133
134	// Read optional header.
135	f.OptionalHeader, err = readOptionalHeader(sr, f.FileHeader.SizeOfOptionalHeader)
136	if err != nil {
137		return nil, err
138	}
139
140	// Process sections.
141	f.Sections = make([]*Section, f.FileHeader.NumberOfSections)
142	for i := 0; i < int(f.FileHeader.NumberOfSections); i++ {
143		sh := new(SectionHeader32)
144		if err := binary.Read(sr, binary.LittleEndian, sh); err != nil {
145			return nil, err
146		}
147		name, err := sh.fullName(f.StringTable)
148		if err != nil {
149			return nil, err
150		}
151		s := new(Section)
152		s.SectionHeader = SectionHeader{
153			Name:                 name,
154			VirtualSize:          sh.VirtualSize,
155			VirtualAddress:       sh.VirtualAddress,
156			Size:                 sh.SizeOfRawData,
157			Offset:               sh.PointerToRawData,
158			PointerToRelocations: sh.PointerToRelocations,
159			PointerToLineNumbers: sh.PointerToLineNumbers,
160			NumberOfRelocations:  sh.NumberOfRelocations,
161			NumberOfLineNumbers:  sh.NumberOfLineNumbers,
162			Characteristics:      sh.Characteristics,
163		}
164		r2 := r
165		if sh.PointerToRawData == 0 { // .bss must have all 0s
166			r2 = &nobitsSectionReader{}
167		}
168		s.sr = io.NewSectionReader(r2, int64(s.SectionHeader.Offset), int64(s.SectionHeader.Size))
169		s.ReaderAt = s.sr
170		f.Sections[i] = s
171	}
172	for i := range f.Sections {
173		var err error
174		f.Sections[i].Relocs, err = readRelocs(&f.Sections[i].SectionHeader, sr)
175		if err != nil {
176			return nil, err
177		}
178	}
179
180	return f, nil
181}
182
183type nobitsSectionReader struct{}
184
185func (*nobitsSectionReader) ReadAt(p []byte, off int64) (n int, err error) {
186	return 0, errors.New("unexpected read from section with uninitialized data")
187}
188
189// getString extracts a string from symbol string table.
190func getString(section []byte, start int) (string, bool) {
191	if start < 0 || start >= len(section) {
192		return "", false
193	}
194
195	for end := start; end < len(section); end++ {
196		if section[end] == 0 {
197			return string(section[start:end]), true
198		}
199	}
200	return "", false
201}
202
203// Section returns the first section with the given name, or nil if no such
204// section exists.
205func (f *File) Section(name string) *Section {
206	for _, s := range f.Sections {
207		if s.Name == name {
208			return s
209		}
210	}
211	return nil
212}
213
214func (f *File) DWARF() (*dwarf.Data, error) {
215	dwarfSuffix := func(s *Section) string {
216		switch {
217		case strings.HasPrefix(s.Name, ".debug_"):
218			return s.Name[7:]
219		case strings.HasPrefix(s.Name, ".zdebug_"):
220			return s.Name[8:]
221		default:
222			return ""
223		}
224
225	}
226
227	// sectionData gets the data for s and checks its size.
228	sectionData := func(s *Section) ([]byte, error) {
229		b, err := s.Data()
230		if err != nil && uint32(len(b)) < s.Size {
231			return nil, err
232		}
233
234		if 0 < s.VirtualSize && s.VirtualSize < s.Size {
235			b = b[:s.VirtualSize]
236		}
237
238		if len(b) >= 12 && string(b[:4]) == "ZLIB" {
239			dlen := binary.BigEndian.Uint64(b[4:12])
240			dbuf := make([]byte, dlen)
241			r, err := zlib.NewReader(bytes.NewBuffer(b[12:]))
242			if err != nil {
243				return nil, err
244			}
245			if _, err := io.ReadFull(r, dbuf); err != nil {
246				return nil, err
247			}
248			if err := r.Close(); err != nil {
249				return nil, err
250			}
251			b = dbuf
252		}
253		return b, nil
254	}
255
256	// There are many other DWARF sections, but these
257	// are the ones the debug/dwarf package uses.
258	// Don't bother loading others.
259	var dat = map[string][]byte{"abbrev": nil, "info": nil, "str": nil, "line": nil, "ranges": nil}
260	for _, s := range f.Sections {
261		suffix := dwarfSuffix(s)
262		if suffix == "" {
263			continue
264		}
265		if _, ok := dat[suffix]; !ok {
266			continue
267		}
268
269		b, err := sectionData(s)
270		if err != nil {
271			return nil, err
272		}
273		dat[suffix] = b
274	}
275
276	d, err := dwarf.New(dat["abbrev"], nil, nil, dat["info"], dat["line"], nil, dat["ranges"], dat["str"])
277	if err != nil {
278		return nil, err
279	}
280
281	// Look for DWARF4 .debug_types sections and DWARF5 sections.
282	for i, s := range f.Sections {
283		suffix := dwarfSuffix(s)
284		if suffix == "" {
285			continue
286		}
287		if _, ok := dat[suffix]; ok {
288			// Already handled.
289			continue
290		}
291
292		b, err := sectionData(s)
293		if err != nil {
294			return nil, err
295		}
296
297		if suffix == "types" {
298			err = d.AddTypes(fmt.Sprintf("types-%d", i), b)
299		} else {
300			err = d.AddSection(".debug_"+suffix, b)
301		}
302		if err != nil {
303			return nil, err
304		}
305	}
306
307	return d, nil
308}
309
310// TODO(brainman): document ImportDirectory once we decide what to do with it.
311
312type ImportDirectory struct {
313	OriginalFirstThunk uint32
314	TimeDateStamp      uint32
315	ForwarderChain     uint32
316	Name               uint32
317	FirstThunk         uint32
318
319	dll string
320}
321
322// ImportedSymbols returns the names of all symbols
323// referred to by the binary f that are expected to be
324// satisfied by other libraries at dynamic load time.
325// It does not return weak symbols.
326func (f *File) ImportedSymbols() ([]string, error) {
327	if f.OptionalHeader == nil {
328		return nil, nil
329	}
330
331	_, pe64 := f.OptionalHeader.(*OptionalHeader64)
332
333	// grab the number of data directory entries
334	var dd_length uint32
335	if pe64 {
336		dd_length = f.OptionalHeader.(*OptionalHeader64).NumberOfRvaAndSizes
337	} else {
338		dd_length = f.OptionalHeader.(*OptionalHeader32).NumberOfRvaAndSizes
339	}
340
341	// check that the length of data directory entries is large
342	// enough to include the imports directory.
343	if dd_length < IMAGE_DIRECTORY_ENTRY_IMPORT+1 {
344		return nil, nil
345	}
346
347	// grab the import data directory entry
348	var idd DataDirectory
349	if pe64 {
350		idd = f.OptionalHeader.(*OptionalHeader64).DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT]
351	} else {
352		idd = f.OptionalHeader.(*OptionalHeader32).DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT]
353	}
354
355	// figure out which section contains the import directory table
356	var ds *Section
357	ds = nil
358	for _, s := range f.Sections {
359		if s.Offset == 0 {
360			continue
361		}
362		// We are using distance between s.VirtualAddress and idd.VirtualAddress
363		// to avoid potential overflow of uint32 caused by addition of s.VirtualSize
364		// to s.VirtualAddress.
365		if s.VirtualAddress <= idd.VirtualAddress && idd.VirtualAddress-s.VirtualAddress < s.VirtualSize {
366			ds = s
367			break
368		}
369	}
370
371	// didn't find a section, so no import libraries were found
372	if ds == nil {
373		return nil, nil
374	}
375
376	d, err := ds.Data()
377	if err != nil {
378		return nil, err
379	}
380
381	// seek to the virtual address specified in the import data directory
382	d = d[idd.VirtualAddress-ds.VirtualAddress:]
383
384	// start decoding the import directory
385	var ida []ImportDirectory
386	for len(d) >= 20 {
387		var dt ImportDirectory
388		dt.OriginalFirstThunk = binary.LittleEndian.Uint32(d[0:4])
389		dt.TimeDateStamp = binary.LittleEndian.Uint32(d[4:8])
390		dt.ForwarderChain = binary.LittleEndian.Uint32(d[8:12])
391		dt.Name = binary.LittleEndian.Uint32(d[12:16])
392		dt.FirstThunk = binary.LittleEndian.Uint32(d[16:20])
393		d = d[20:]
394		if dt.OriginalFirstThunk == 0 {
395			break
396		}
397		ida = append(ida, dt)
398	}
399	// TODO(brainman): this needs to be rewritten
400	//  ds.Data() returns contents of section containing import table. Why store in variable called "names"?
401	//  Why we are retrieving it second time? We already have it in "d", and it is not modified anywhere.
402	//  getString does not extracts a string from symbol string table (as getString doco says).
403	//  Why ds.Data() called again and again in the loop?
404	//  Needs test before rewrite.
405	names, _ := ds.Data()
406	var all []string
407	for _, dt := range ida {
408		dt.dll, _ = getString(names, int(dt.Name-ds.VirtualAddress))
409		d, _ = ds.Data()
410		// seek to OriginalFirstThunk
411		d = d[dt.OriginalFirstThunk-ds.VirtualAddress:]
412		for len(d) > 0 {
413			if pe64 { // 64bit
414				va := binary.LittleEndian.Uint64(d[0:8])
415				d = d[8:]
416				if va == 0 {
417					break
418				}
419				if va&0x8000000000000000 > 0 { // is Ordinal
420					// TODO add dynimport ordinal support.
421				} else {
422					fn, _ := getString(names, int(uint32(va)-ds.VirtualAddress+2))
423					all = append(all, fn+":"+dt.dll)
424				}
425			} else { // 32bit
426				va := binary.LittleEndian.Uint32(d[0:4])
427				d = d[4:]
428				if va == 0 {
429					break
430				}
431				if va&0x80000000 > 0 { // is Ordinal
432					// TODO add dynimport ordinal support.
433					//ord := va&0x0000FFFF
434				} else {
435					fn, _ := getString(names, int(va-ds.VirtualAddress+2))
436					all = append(all, fn+":"+dt.dll)
437				}
438			}
439		}
440	}
441
442	return all, nil
443}
444
445// ImportedLibraries returns the names of all libraries
446// referred to by the binary f that are expected to be
447// linked with the binary at dynamic link time.
448func (f *File) ImportedLibraries() ([]string, error) {
449	// TODO
450	// cgo -dynimport don't use this for windows PE, so just return.
451	return nil, nil
452}
453
454// FormatError is unused.
455// The type is retained for compatibility.
456type FormatError struct {
457}
458
459func (e *FormatError) Error() string {
460	return "unknown error"
461}
462
463// readOptionalHeader accepts an io.ReadSeeker pointing to optional header in the PE file
464// and its size as seen in the file header.
465// It parses the given size of bytes and returns optional header. It infers whether the
466// bytes being parsed refer to 32 bit or 64 bit version of optional header.
467func readOptionalHeader(r io.ReadSeeker, sz uint16) (any, error) {
468	// If optional header size is 0, return empty optional header.
469	if sz == 0 {
470		return nil, nil
471	}
472
473	var (
474		// First couple of bytes in option header state its type.
475		// We need to read them first to determine the type and
476		// validity of optional header.
477		ohMagic   uint16
478		ohMagicSz = binary.Size(ohMagic)
479	)
480
481	// If optional header size is greater than 0 but less than its magic size, return error.
482	if sz < uint16(ohMagicSz) {
483		return nil, fmt.Errorf("optional header size is less than optional header magic size")
484	}
485
486	// read reads from io.ReadSeeke, r, into data.
487	var err error
488	read := func(data any) bool {
489		err = binary.Read(r, binary.LittleEndian, data)
490		return err == nil
491	}
492
493	if !read(&ohMagic) {
494		return nil, fmt.Errorf("failure to read optional header magic: %v", err)
495
496	}
497
498	switch ohMagic {
499	case 0x10b: // PE32
500		var (
501			oh32 OptionalHeader32
502			// There can be 0 or more data directories. So the minimum size of optional
503			// header is calculated by subtracting oh32.DataDirectory size from oh32 size.
504			oh32MinSz = binary.Size(oh32) - binary.Size(oh32.DataDirectory)
505		)
506
507		if sz < uint16(oh32MinSz) {
508			return nil, fmt.Errorf("optional header size(%d) is less minimum size (%d) of PE32 optional header", sz, oh32MinSz)
509		}
510
511		// Init oh32 fields
512		oh32.Magic = ohMagic
513		if !read(&oh32.MajorLinkerVersion) ||
514			!read(&oh32.MinorLinkerVersion) ||
515			!read(&oh32.SizeOfCode) ||
516			!read(&oh32.SizeOfInitializedData) ||
517			!read(&oh32.SizeOfUninitializedData) ||
518			!read(&oh32.AddressOfEntryPoint) ||
519			!read(&oh32.BaseOfCode) ||
520			!read(&oh32.BaseOfData) ||
521			!read(&oh32.ImageBase) ||
522			!read(&oh32.SectionAlignment) ||
523			!read(&oh32.FileAlignment) ||
524			!read(&oh32.MajorOperatingSystemVersion) ||
525			!read(&oh32.MinorOperatingSystemVersion) ||
526			!read(&oh32.MajorImageVersion) ||
527			!read(&oh32.MinorImageVersion) ||
528			!read(&oh32.MajorSubsystemVersion) ||
529			!read(&oh32.MinorSubsystemVersion) ||
530			!read(&oh32.Win32VersionValue) ||
531			!read(&oh32.SizeOfImage) ||
532			!read(&oh32.SizeOfHeaders) ||
533			!read(&oh32.CheckSum) ||
534			!read(&oh32.Subsystem) ||
535			!read(&oh32.DllCharacteristics) ||
536			!read(&oh32.SizeOfStackReserve) ||
537			!read(&oh32.SizeOfStackCommit) ||
538			!read(&oh32.SizeOfHeapReserve) ||
539			!read(&oh32.SizeOfHeapCommit) ||
540			!read(&oh32.LoaderFlags) ||
541			!read(&oh32.NumberOfRvaAndSizes) {
542			return nil, fmt.Errorf("failure to read PE32 optional header: %v", err)
543		}
544
545		dd, err := readDataDirectories(r, sz-uint16(oh32MinSz), oh32.NumberOfRvaAndSizes)
546		if err != nil {
547			return nil, err
548		}
549
550		copy(oh32.DataDirectory[:], dd)
551
552		return &oh32, nil
553	case 0x20b: // PE32+
554		var (
555			oh64 OptionalHeader64
556			// There can be 0 or more data directories. So the minimum size of optional
557			// header is calculated by subtracting oh64.DataDirectory size from oh64 size.
558			oh64MinSz = binary.Size(oh64) - binary.Size(oh64.DataDirectory)
559		)
560
561		if sz < uint16(oh64MinSz) {
562			return nil, fmt.Errorf("optional header size(%d) is less minimum size (%d) for PE32+ optional header", sz, oh64MinSz)
563		}
564
565		// Init oh64 fields
566		oh64.Magic = ohMagic
567		if !read(&oh64.MajorLinkerVersion) ||
568			!read(&oh64.MinorLinkerVersion) ||
569			!read(&oh64.SizeOfCode) ||
570			!read(&oh64.SizeOfInitializedData) ||
571			!read(&oh64.SizeOfUninitializedData) ||
572			!read(&oh64.AddressOfEntryPoint) ||
573			!read(&oh64.BaseOfCode) ||
574			!read(&oh64.ImageBase) ||
575			!read(&oh64.SectionAlignment) ||
576			!read(&oh64.FileAlignment) ||
577			!read(&oh64.MajorOperatingSystemVersion) ||
578			!read(&oh64.MinorOperatingSystemVersion) ||
579			!read(&oh64.MajorImageVersion) ||
580			!read(&oh64.MinorImageVersion) ||
581			!read(&oh64.MajorSubsystemVersion) ||
582			!read(&oh64.MinorSubsystemVersion) ||
583			!read(&oh64.Win32VersionValue) ||
584			!read(&oh64.SizeOfImage) ||
585			!read(&oh64.SizeOfHeaders) ||
586			!read(&oh64.CheckSum) ||
587			!read(&oh64.Subsystem) ||
588			!read(&oh64.DllCharacteristics) ||
589			!read(&oh64.SizeOfStackReserve) ||
590			!read(&oh64.SizeOfStackCommit) ||
591			!read(&oh64.SizeOfHeapReserve) ||
592			!read(&oh64.SizeOfHeapCommit) ||
593			!read(&oh64.LoaderFlags) ||
594			!read(&oh64.NumberOfRvaAndSizes) {
595			return nil, fmt.Errorf("failure to read PE32+ optional header: %v", err)
596		}
597
598		dd, err := readDataDirectories(r, sz-uint16(oh64MinSz), oh64.NumberOfRvaAndSizes)
599		if err != nil {
600			return nil, err
601		}
602
603		copy(oh64.DataDirectory[:], dd)
604
605		return &oh64, nil
606	default:
607		return nil, fmt.Errorf("optional header has unexpected Magic of 0x%x", ohMagic)
608	}
609}
610
611// readDataDirectories accepts an io.ReadSeeker pointing to data directories in the PE file,
612// its size and number of data directories as seen in optional header.
613// It parses the given size of bytes and returns given number of data directories.
614func readDataDirectories(r io.ReadSeeker, sz uint16, n uint32) ([]DataDirectory, error) {
615	ddSz := uint64(binary.Size(DataDirectory{}))
616	if uint64(sz) != uint64(n)*ddSz {
617		return nil, fmt.Errorf("size of data directories(%d) is inconsistent with number of data directories(%d)", sz, n)
618	}
619
620	dd := make([]DataDirectory, n)
621	if err := binary.Read(r, binary.LittleEndian, dd); err != nil {
622		return nil, fmt.Errorf("failure to read data directories: %v", err)
623	}
624
625	return dd, nil
626}
627