1// Copyright 2021 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// When using GOEXPERIMENT=boringcrypto, the test program links in the boringcrypto syso,
6// which does not respect GOAMD64, so we skip the test if boringcrypto is enabled.
7//go:build !boringcrypto
8
9package amd64_test
10
11import (
12	"bufio"
13	"debug/elf"
14	"debug/macho"
15	"errors"
16	"fmt"
17	"go/build"
18	"internal/testenv"
19	"io"
20	"math"
21	"math/bits"
22	"os"
23	"os/exec"
24	"regexp"
25	"runtime"
26	"strconv"
27	"strings"
28	"testing"
29)
30
31// Test to make sure that when building for GOAMD64=v1, we don't
32// use any >v1 instructions.
33func TestGoAMD64v1(t *testing.T) {
34	if runtime.GOARCH != "amd64" {
35		t.Skip("amd64-only test")
36	}
37	if runtime.GOOS != "linux" && runtime.GOOS != "darwin" {
38		t.Skip("test only works on elf or macho platforms")
39	}
40	for _, tag := range build.Default.ToolTags {
41		if tag == "amd64.v2" {
42			t.Skip("compiling for GOAMD64=v2 or higher")
43		}
44	}
45	if os.Getenv("TESTGOAMD64V1") != "" {
46		t.Skip("recursive call")
47	}
48
49	// Make a binary which will be a modified version of the
50	// currently running binary.
51	dst, err := os.CreateTemp("", "TestGoAMD64v1")
52	if err != nil {
53		t.Fatalf("failed to create temp file: %v", err)
54	}
55	defer os.Remove(dst.Name())
56	dst.Chmod(0500) // make executable
57
58	// Clobber all the non-v1 opcodes.
59	opcodes := map[string]bool{}
60	var features []string
61	for feature, opcodeList := range featureToOpcodes {
62		if runtimeFeatures[feature] {
63			features = append(features, fmt.Sprintf("cpu.%s=off", feature))
64		}
65		for _, op := range opcodeList {
66			opcodes[op] = true
67		}
68	}
69	clobber(t, os.Args[0], dst, opcodes)
70	if err = dst.Close(); err != nil {
71		t.Fatalf("can't close binary: %v", err)
72	}
73
74	// Run the resulting binary.
75	cmd := testenv.Command(t, dst.Name())
76	testenv.CleanCmdEnv(cmd)
77	cmd.Env = append(cmd.Env, "TESTGOAMD64V1=yes")
78	cmd.Env = append(cmd.Env, fmt.Sprintf("GODEBUG=%s", strings.Join(features, ",")))
79	out, err := cmd.CombinedOutput()
80	if err != nil {
81		t.Fatalf("couldn't execute test: %s", err)
82	}
83	// Expect to see output of the form "PASS\n", unless the test binary
84	// was compiled for coverage (in which case there will be an extra line).
85	success := false
86	lines := strings.Split(string(out), "\n")
87	if len(lines) == 2 {
88		success = lines[0] == "PASS" && lines[1] == ""
89	} else if len(lines) == 3 {
90		success = lines[0] == "PASS" &&
91			strings.HasPrefix(lines[1], "coverage") && lines[2] == ""
92	}
93	if !success {
94		t.Fatalf("test reported error: %s lines=%+v", string(out), lines)
95	}
96}
97
98// Clobber copies the binary src to dst, replacing all the instructions in opcodes with
99// faulting instructions.
100func clobber(t *testing.T, src string, dst *os.File, opcodes map[string]bool) {
101	// Run objdump to get disassembly.
102	var re *regexp.Regexp
103	var disasm io.Reader
104	if false {
105		// TODO: go tool objdump doesn't disassemble the bmi1 instructions
106		// in question correctly. See issue 48584.
107		cmd := testenv.Command(t, "go", "tool", "objdump", src)
108		var err error
109		disasm, err = cmd.StdoutPipe()
110		if err != nil {
111			t.Fatal(err)
112		}
113		if err := cmd.Start(); err != nil {
114			t.Fatal(err)
115		}
116		t.Cleanup(func() {
117			if err := cmd.Wait(); err != nil {
118				t.Error(err)
119			}
120		})
121		re = regexp.MustCompile(`^[^:]*:[-\d]+\s+0x([\da-f]+)\s+([\da-f]+)\s+([A-Z]+)`)
122	} else {
123		// TODO: we're depending on platform-native objdump here. Hence the Skipf
124		// below if it doesn't run for some reason.
125		cmd := testenv.Command(t, "objdump", "-d", src)
126		var err error
127		disasm, err = cmd.StdoutPipe()
128		if err != nil {
129			t.Fatal(err)
130		}
131		if err := cmd.Start(); err != nil {
132			if errors.Is(err, exec.ErrNotFound) {
133				t.Skipf("can't run test due to missing objdump: %s", err)
134			}
135			t.Fatal(err)
136		}
137		t.Cleanup(func() {
138			if err := cmd.Wait(); err != nil {
139				t.Error(err)
140			}
141		})
142		re = regexp.MustCompile(`^\s*([\da-f]+):\s*((?:[\da-f][\da-f] )+)\s*([a-z\d]+)`)
143	}
144
145	// Find all the instruction addresses we need to edit.
146	virtualEdits := map[uint64]bool{}
147	scanner := bufio.NewScanner(disasm)
148	for scanner.Scan() {
149		line := scanner.Text()
150		parts := re.FindStringSubmatch(line)
151		if len(parts) == 0 {
152			continue
153		}
154		addr, err := strconv.ParseUint(parts[1], 16, 64)
155		if err != nil {
156			continue // not a hex address
157		}
158		opcode := strings.ToLower(parts[3])
159		if !opcodes[opcode] {
160			continue
161		}
162		t.Logf("clobbering instruction %s", line)
163		n := (len(parts[2]) - strings.Count(parts[2], " ")) / 2 // number of bytes in instruction encoding
164		for i := 0; i < n; i++ {
165			// Only really need to make the first byte faulting, but might
166			// as well make all the bytes faulting.
167			virtualEdits[addr+uint64(i)] = true
168		}
169	}
170
171	// Figure out where in the binary the edits must be done.
172	physicalEdits := map[uint64]bool{}
173	if e, err := elf.Open(src); err == nil {
174		for _, sec := range e.Sections {
175			vaddr := sec.Addr
176			paddr := sec.Offset
177			size := sec.Size
178			for a := range virtualEdits {
179				if a >= vaddr && a < vaddr+size {
180					physicalEdits[paddr+(a-vaddr)] = true
181				}
182			}
183		}
184	} else if m, err2 := macho.Open(src); err2 == nil {
185		for _, sec := range m.Sections {
186			vaddr := sec.Addr
187			paddr := uint64(sec.Offset)
188			size := sec.Size
189			for a := range virtualEdits {
190				if a >= vaddr && a < vaddr+size {
191					physicalEdits[paddr+(a-vaddr)] = true
192				}
193			}
194		}
195	} else {
196		t.Log(err)
197		t.Log(err2)
198		t.Fatal("executable format not elf or macho")
199	}
200	if len(virtualEdits) != len(physicalEdits) {
201		t.Fatal("couldn't find an instruction in text sections")
202	}
203
204	// Copy source to destination, making edits along the way.
205	f, err := os.Open(src)
206	if err != nil {
207		t.Fatal(err)
208	}
209	r := bufio.NewReader(f)
210	w := bufio.NewWriter(dst)
211	a := uint64(0)
212	done := 0
213	for {
214		b, err := r.ReadByte()
215		if err == io.EOF {
216			break
217		}
218		if err != nil {
219			t.Fatal("can't read")
220		}
221		if physicalEdits[a] {
222			b = 0xcc // INT3 opcode
223			done++
224		}
225		err = w.WriteByte(b)
226		if err != nil {
227			t.Fatal("can't write")
228		}
229		a++
230	}
231	if done != len(physicalEdits) {
232		t.Fatal("physical edits remaining")
233	}
234	w.Flush()
235	f.Close()
236}
237
238func setOf(keys ...string) map[string]bool {
239	m := make(map[string]bool, len(keys))
240	for _, key := range keys {
241		m[key] = true
242	}
243	return m
244}
245
246var runtimeFeatures = setOf(
247	"adx", "aes", "avx", "avx2", "bmi1", "bmi2", "erms", "fma",
248	"pclmulqdq", "popcnt", "rdtscp", "sse3", "sse41", "sse42", "ssse3",
249)
250
251var featureToOpcodes = map[string][]string{
252	// Note: we include *q, *l, and plain opcodes here.
253	// go tool objdump doesn't include a [QL] on popcnt instructions, until CL 351889
254	// native objdump doesn't include [QL] on linux.
255	"popcnt": {"popcntq", "popcntl", "popcnt"},
256	"bmi1": {
257		"andnq", "andnl", "andn",
258		"blsiq", "blsil", "blsi",
259		"blsmskq", "blsmskl", "blsmsk",
260		"blsrq", "blsrl", "blsr",
261		"tzcntq", "tzcntl", "tzcnt",
262	},
263	"bmi2": {
264		"sarxq", "sarxl", "sarx",
265		"shlxq", "shlxl", "shlx",
266		"shrxq", "shrxl", "shrx",
267	},
268	"sse41": {
269		"roundsd",
270		"pinsrq", "pinsrl", "pinsrd", "pinsrb", "pinsr",
271		"pextrq", "pextrl", "pextrd", "pextrb", "pextr",
272		"pminsb", "pminsd", "pminuw", "pminud", // Note: ub and sw are ok.
273		"pmaxsb", "pmaxsd", "pmaxuw", "pmaxud",
274		"pmovzxbw", "pmovzxbd", "pmovzxbq", "pmovzxwd", "pmovzxwq", "pmovzxdq",
275		"pmovsxbw", "pmovsxbd", "pmovsxbq", "pmovsxwd", "pmovsxwq", "pmovsxdq",
276		"pblendvb",
277	},
278	"fma":   {"vfmadd231sd"},
279	"movbe": {"movbeqq", "movbeq", "movbell", "movbel", "movbe"},
280	"lzcnt": {"lzcntq", "lzcntl", "lzcnt"},
281}
282
283// Test to use POPCNT instruction, if available
284func TestPopCnt(t *testing.T) {
285	for _, tt := range []struct {
286		x    uint64
287		want int
288	}{
289		{0b00001111, 4},
290		{0b00001110, 3},
291		{0b00001100, 2},
292		{0b00000000, 0},
293	} {
294		if got := bits.OnesCount64(tt.x); got != tt.want {
295			t.Errorf("OnesCount64(%#x) = %d, want %d", tt.x, got, tt.want)
296		}
297		if got := bits.OnesCount32(uint32(tt.x)); got != tt.want {
298			t.Errorf("OnesCount32(%#x) = %d, want %d", tt.x, got, tt.want)
299		}
300	}
301}
302
303// Test to use ANDN, if available
304func TestAndNot(t *testing.T) {
305	for _, tt := range []struct {
306		x, y, want uint64
307	}{
308		{0b00001111, 0b00000011, 0b1100},
309		{0b00001111, 0b00001100, 0b0011},
310		{0b00000000, 0b00000000, 0b0000},
311	} {
312		if got := tt.x &^ tt.y; got != tt.want {
313			t.Errorf("%#x &^ %#x = %#x, want %#x", tt.x, tt.y, got, tt.want)
314		}
315		if got := uint32(tt.x) &^ uint32(tt.y); got != uint32(tt.want) {
316			t.Errorf("%#x &^ %#x = %#x, want %#x", tt.x, tt.y, got, tt.want)
317		}
318	}
319}
320
321// Test to use BLSI, if available
322func TestBLSI(t *testing.T) {
323	for _, tt := range []struct {
324		x, want uint64
325	}{
326		{0b00001111, 0b001},
327		{0b00001110, 0b010},
328		{0b00001100, 0b100},
329		{0b11000110, 0b010},
330		{0b00000000, 0b000},
331	} {
332		if got := tt.x & -tt.x; got != tt.want {
333			t.Errorf("%#x & (-%#x) = %#x, want %#x", tt.x, tt.x, got, tt.want)
334		}
335		if got := uint32(tt.x) & -uint32(tt.x); got != uint32(tt.want) {
336			t.Errorf("%#x & (-%#x) = %#x, want %#x", tt.x, tt.x, got, tt.want)
337		}
338	}
339}
340
341// Test to use BLSMSK, if available
342func TestBLSMSK(t *testing.T) {
343	for _, tt := range []struct {
344		x, want uint64
345	}{
346		{0b00001111, 0b001},
347		{0b00001110, 0b011},
348		{0b00001100, 0b111},
349		{0b11000110, 0b011},
350		{0b00000000, 1<<64 - 1},
351	} {
352		if got := tt.x ^ (tt.x - 1); got != tt.want {
353			t.Errorf("%#x ^ (%#x-1) = %#x, want %#x", tt.x, tt.x, got, tt.want)
354		}
355		if got := uint32(tt.x) ^ (uint32(tt.x) - 1); got != uint32(tt.want) {
356			t.Errorf("%#x ^ (%#x-1) = %#x, want %#x", tt.x, tt.x, got, uint32(tt.want))
357		}
358	}
359}
360
361// Test to use BLSR, if available
362func TestBLSR(t *testing.T) {
363	for _, tt := range []struct {
364		x, want uint64
365	}{
366		{0b00001111, 0b00001110},
367		{0b00001110, 0b00001100},
368		{0b00001100, 0b00001000},
369		{0b11000110, 0b11000100},
370		{0b00000000, 0b00000000},
371	} {
372		if got := tt.x & (tt.x - 1); got != tt.want {
373			t.Errorf("%#x & (%#x-1) = %#x, want %#x", tt.x, tt.x, got, tt.want)
374		}
375		if got := uint32(tt.x) & (uint32(tt.x) - 1); got != uint32(tt.want) {
376			t.Errorf("%#x & (%#x-1) = %#x, want %#x", tt.x, tt.x, got, tt.want)
377		}
378	}
379}
380
381func TestTrailingZeros(t *testing.T) {
382	for _, tt := range []struct {
383		x    uint64
384		want int
385	}{
386		{0b00001111, 0},
387		{0b00001110, 1},
388		{0b00001100, 2},
389		{0b00001000, 3},
390		{0b00000000, 64},
391	} {
392		if got := bits.TrailingZeros64(tt.x); got != tt.want {
393			t.Errorf("TrailingZeros64(%#x) = %d, want %d", tt.x, got, tt.want)
394		}
395		want := tt.want
396		if want == 64 {
397			want = 32
398		}
399		if got := bits.TrailingZeros32(uint32(tt.x)); got != want {
400			t.Errorf("TrailingZeros64(%#x) = %d, want %d", tt.x, got, want)
401		}
402	}
403}
404
405func TestRound(t *testing.T) {
406	for _, tt := range []struct {
407		x, want float64
408	}{
409		{1.4, 1},
410		{1.5, 2},
411		{1.6, 2},
412		{2.4, 2},
413		{2.5, 2},
414		{2.6, 3},
415	} {
416		if got := math.RoundToEven(tt.x); got != tt.want {
417			t.Errorf("RoundToEven(%f) = %f, want %f", tt.x, got, tt.want)
418		}
419	}
420}
421
422func TestFMA(t *testing.T) {
423	for _, tt := range []struct {
424		x, y, z, want float64
425	}{
426		{2, 3, 4, 10},
427		{3, 4, 5, 17},
428	} {
429		if got := math.FMA(tt.x, tt.y, tt.z); got != tt.want {
430			t.Errorf("FMA(%f,%f,%f) = %f, want %f", tt.x, tt.y, tt.z, got, tt.want)
431		}
432	}
433}
434