1// Copyright 2020 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package pkgpath determines the package path used by gccgo/GoLLVM symbols.
6// This package is not used for the gc compiler.
7package pkgpath
8
9import (
10	"bytes"
11	"errors"
12	"fmt"
13	"os"
14	"os/exec"
15	"strings"
16)
17
18// ToSymbolFunc returns a function that may be used to convert a
19// package path into a string suitable for use as a symbol.
20// cmd is the gccgo/GoLLVM compiler in use, and tmpdir is a temporary
21// directory to pass to os.CreateTemp().
22// For example, this returns a function that converts "net/http"
23// into a string like "net..z2fhttp". The actual string varies for
24// different gccgo/GoLLVM versions, which is why this returns a function
25// that does the conversion appropriate for the compiler in use.
26func ToSymbolFunc(cmd, tmpdir string) (func(string) string, error) {
27	// To determine the scheme used by cmd, we compile a small
28	// file and examine the assembly code. Older versions of gccgo
29	// use a simple mangling scheme where there can be collisions
30	// between packages whose paths are different but mangle to
31	// the same string. More recent versions use a new mangler
32	// that avoids these collisions.
33	const filepat = "*_gccgo_manglechck.go"
34	f, err := os.CreateTemp(tmpdir, filepat)
35	if err != nil {
36		return nil, err
37	}
38	gofilename := f.Name()
39	f.Close()
40	defer os.Remove(gofilename)
41
42	if err := os.WriteFile(gofilename, []byte(mangleCheckCode), 0644); err != nil {
43		return nil, err
44	}
45
46	command := exec.Command(cmd, "-S", "-o", "-", gofilename)
47	buf, err := command.Output()
48	if err != nil {
49		return nil, err
50	}
51
52	// Original mangling: go.l__ufer.Run
53	// Mangling v2: go.l..u00e4ufer.Run
54	// Mangling v3: go_0l_u00e4ufer.Run
55	if bytes.Contains(buf, []byte("go_0l_u00e4ufer.Run")) {
56		return toSymbolV3, nil
57	} else if bytes.Contains(buf, []byte("go.l..u00e4ufer.Run")) {
58		return toSymbolV2, nil
59	} else if bytes.Contains(buf, []byte("go.l__ufer.Run")) {
60		return toSymbolV1, nil
61	} else {
62		return nil, errors.New(cmd + ": unrecognized mangling scheme")
63	}
64}
65
66// mangleCheckCode is the package we compile to determine the mangling scheme.
67const mangleCheckCode = `
68package läufer
69func Run(x int) int {
70  return 1
71}
72`
73
74// toSymbolV1 converts a package path using the original mangling scheme.
75func toSymbolV1(ppath string) string {
76	clean := func(r rune) rune {
77		switch {
78		case 'A' <= r && r <= 'Z', 'a' <= r && r <= 'z',
79			'0' <= r && r <= '9':
80			return r
81		}
82		return '_'
83	}
84	return strings.Map(clean, ppath)
85}
86
87// toSymbolV2 converts a package path using the second mangling scheme.
88func toSymbolV2(ppath string) string {
89	var bsl strings.Builder
90	changed := false
91	for _, c := range ppath {
92		if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || c == '_' {
93			bsl.WriteByte(byte(c))
94			continue
95		}
96		var enc string
97		switch {
98		case c == '.':
99			enc = ".x2e"
100		case c < 0x80:
101			enc = fmt.Sprintf("..z%02x", c)
102		case c < 0x10000:
103			enc = fmt.Sprintf("..u%04x", c)
104		default:
105			enc = fmt.Sprintf("..U%08x", c)
106		}
107		bsl.WriteString(enc)
108		changed = true
109	}
110	if !changed {
111		return ppath
112	}
113	return bsl.String()
114}
115
116// v3UnderscoreCodes maps from a character that supports an underscore
117// encoding to the underscore encoding character.
118var v3UnderscoreCodes = map[byte]byte{
119	'_': '_',
120	'.': '0',
121	'/': '1',
122	'*': '2',
123	',': '3',
124	'{': '4',
125	'}': '5',
126	'[': '6',
127	']': '7',
128	'(': '8',
129	')': '9',
130	'"': 'a',
131	' ': 'b',
132	';': 'c',
133}
134
135// toSymbolV3 converts a package path using the third mangling scheme.
136func toSymbolV3(ppath string) string {
137	var bsl strings.Builder
138	changed := false
139	for _, c := range ppath {
140		if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
141			bsl.WriteByte(byte(c))
142			continue
143		}
144
145		if c < 0x80 {
146			if u, ok := v3UnderscoreCodes[byte(c)]; ok {
147				bsl.WriteByte('_')
148				bsl.WriteByte(u)
149				changed = true
150				continue
151			}
152		}
153
154		var enc string
155		switch {
156		case c < 0x80:
157			enc = fmt.Sprintf("_x%02x", c)
158		case c < 0x10000:
159			enc = fmt.Sprintf("_u%04x", c)
160		default:
161			enc = fmt.Sprintf("_U%08x", c)
162		}
163		bsl.WriteString(enc)
164		changed = true
165	}
166	if !changed {
167		return ppath
168	}
169	return bsl.String()
170}
171