1// Copyright 2023 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package trace_test 6 7import ( 8 "bufio" 9 "bytes" 10 "fmt" 11 "internal/race" 12 "internal/testenv" 13 "internal/trace" 14 "internal/trace/testtrace" 15 "io" 16 "os" 17 "path/filepath" 18 "runtime" 19 "strings" 20 "testing" 21) 22 23func TestTraceAnnotations(t *testing.T) { 24 testTraceProg(t, "annotations.go", func(t *testing.T, tb, _ []byte, _ bool) { 25 type evDesc struct { 26 kind trace.EventKind 27 task trace.TaskID 28 args []string 29 } 30 want := []evDesc{ 31 {trace.EventTaskBegin, trace.TaskID(1), []string{"task0"}}, 32 {trace.EventRegionBegin, trace.TaskID(1), []string{"region0"}}, 33 {trace.EventRegionBegin, trace.TaskID(1), []string{"region1"}}, 34 {trace.EventLog, trace.TaskID(1), []string{"key0", "0123456789abcdef"}}, 35 {trace.EventRegionEnd, trace.TaskID(1), []string{"region1"}}, 36 {trace.EventRegionEnd, trace.TaskID(1), []string{"region0"}}, 37 {trace.EventTaskEnd, trace.TaskID(1), []string{"task0"}}, 38 // Currently, pre-existing region is not recorded to avoid allocations. 39 {trace.EventRegionBegin, trace.BackgroundTask, []string{"post-existing region"}}, 40 } 41 r, err := trace.NewReader(bytes.NewReader(tb)) 42 if err != nil { 43 t.Error(err) 44 } 45 for { 46 ev, err := r.ReadEvent() 47 if err == io.EOF { 48 break 49 } 50 if err != nil { 51 t.Fatal(err) 52 } 53 for i, wantEv := range want { 54 if wantEv.kind != ev.Kind() { 55 continue 56 } 57 match := false 58 switch ev.Kind() { 59 case trace.EventTaskBegin, trace.EventTaskEnd: 60 task := ev.Task() 61 match = task.ID == wantEv.task && task.Type == wantEv.args[0] 62 case trace.EventRegionBegin, trace.EventRegionEnd: 63 reg := ev.Region() 64 match = reg.Task == wantEv.task && reg.Type == wantEv.args[0] 65 case trace.EventLog: 66 log := ev.Log() 67 match = log.Task == wantEv.task && log.Category == wantEv.args[0] && log.Message == wantEv.args[1] 68 } 69 if match { 70 want[i] = want[len(want)-1] 71 want = want[:len(want)-1] 72 break 73 } 74 } 75 } 76 if len(want) != 0 { 77 for _, ev := range want { 78 t.Errorf("no match for %s TaskID=%d Args=%#v", ev.kind, ev.task, ev.args) 79 } 80 } 81 }) 82} 83 84func TestTraceAnnotationsStress(t *testing.T) { 85 testTraceProg(t, "annotations-stress.go", nil) 86} 87 88func TestTraceCgoCallback(t *testing.T) { 89 testenv.MustHaveCGO(t) 90 91 switch runtime.GOOS { 92 case "plan9", "windows": 93 t.Skipf("cgo callback test requires pthreads and is not supported on %s", runtime.GOOS) 94 } 95 testTraceProg(t, "cgo-callback.go", nil) 96} 97 98func TestTraceCPUProfile(t *testing.T) { 99 testTraceProg(t, "cpu-profile.go", func(t *testing.T, tb, stderr []byte, _ bool) { 100 // Parse stderr which has a CPU profile summary, if everything went well. 101 // (If it didn't, we shouldn't even make it here.) 102 scanner := bufio.NewScanner(bytes.NewReader(stderr)) 103 pprofSamples := 0 104 pprofStacks := make(map[string]int) 105 for scanner.Scan() { 106 var stack string 107 var samples int 108 _, err := fmt.Sscanf(scanner.Text(), "%s\t%d", &stack, &samples) 109 if err != nil { 110 t.Fatalf("failed to parse CPU profile summary in stderr: %s\n\tfull:\n%s", scanner.Text(), stderr) 111 } 112 pprofStacks[stack] = samples 113 pprofSamples += samples 114 } 115 if err := scanner.Err(); err != nil { 116 t.Fatalf("failed to parse CPU profile summary in stderr: %v", err) 117 } 118 if pprofSamples == 0 { 119 t.Skip("CPU profile did not include any samples while tracing was active") 120 } 121 122 // Examine the execution tracer's view of the CPU profile samples. Filter it 123 // to only include samples from the single test goroutine. Use the goroutine 124 // ID that was recorded in the events: that should reflect getg().m.curg, 125 // same as the profiler's labels (even when the M is using its g0 stack). 126 totalTraceSamples := 0 127 traceSamples := 0 128 traceStacks := make(map[string]int) 129 r, err := trace.NewReader(bytes.NewReader(tb)) 130 if err != nil { 131 t.Error(err) 132 } 133 var hogRegion *trace.Event 134 var hogRegionClosed bool 135 for { 136 ev, err := r.ReadEvent() 137 if err == io.EOF { 138 break 139 } 140 if err != nil { 141 t.Fatal(err) 142 } 143 if ev.Kind() == trace.EventRegionBegin && ev.Region().Type == "cpuHogger" { 144 hogRegion = &ev 145 } 146 if ev.Kind() == trace.EventStackSample { 147 totalTraceSamples++ 148 if hogRegion != nil && ev.Goroutine() == hogRegion.Goroutine() { 149 traceSamples++ 150 var fns []string 151 ev.Stack().Frames(func(frame trace.StackFrame) bool { 152 if frame.Func != "runtime.goexit" { 153 fns = append(fns, fmt.Sprintf("%s:%d", frame.Func, frame.Line)) 154 } 155 return true 156 }) 157 stack := strings.Join(fns, "|") 158 traceStacks[stack]++ 159 } 160 } 161 if ev.Kind() == trace.EventRegionEnd && ev.Region().Type == "cpuHogger" { 162 hogRegionClosed = true 163 } 164 } 165 if hogRegion == nil { 166 t.Fatalf("execution trace did not identify cpuHogger goroutine") 167 } else if !hogRegionClosed { 168 t.Fatalf("execution trace did not close cpuHogger region") 169 } 170 171 // The execution trace may drop CPU profile samples if the profiling buffer 172 // overflows. Based on the size of profBufWordCount, that takes a bit over 173 // 1900 CPU samples or 19 thread-seconds at a 100 Hz sample rate. If we've 174 // hit that case, then we definitely have at least one full buffer's worth 175 // of CPU samples, so we'll call that success. 176 overflowed := totalTraceSamples >= 1900 177 if traceSamples < pprofSamples { 178 t.Logf("execution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples) 179 if !overflowed { 180 t.Fail() 181 } 182 } 183 184 for stack, traceSamples := range traceStacks { 185 pprofSamples := pprofStacks[stack] 186 delete(pprofStacks, stack) 187 if traceSamples < pprofSamples { 188 t.Logf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace", 189 stack, pprofSamples, traceSamples) 190 if !overflowed { 191 t.Fail() 192 } 193 } 194 } 195 for stack, pprofSamples := range pprofStacks { 196 t.Logf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack) 197 if !overflowed { 198 t.Fail() 199 } 200 } 201 202 if t.Failed() { 203 t.Logf("execution trace CPU samples:") 204 for stack, samples := range traceStacks { 205 t.Logf("%d: %q", samples, stack) 206 } 207 t.Logf("CPU profile:\n%s", stderr) 208 } 209 }) 210} 211 212func TestTraceFutileWakeup(t *testing.T) { 213 testTraceProg(t, "futile-wakeup.go", func(t *testing.T, tb, _ []byte, _ bool) { 214 // Check to make sure that no goroutine in the "special" trace region 215 // ends up blocking, unblocking, then immediately blocking again. 216 // 217 // The goroutines are careful to call runtime.Gosched in between blocking, 218 // so there should never be a clean block/unblock on the goroutine unless 219 // the runtime was generating extraneous events. 220 const ( 221 entered = iota 222 blocked 223 runnable 224 running 225 ) 226 gs := make(map[trace.GoID]int) 227 seenSpecialGoroutines := false 228 r, err := trace.NewReader(bytes.NewReader(tb)) 229 if err != nil { 230 t.Error(err) 231 } 232 for { 233 ev, err := r.ReadEvent() 234 if err == io.EOF { 235 break 236 } 237 if err != nil { 238 t.Fatal(err) 239 } 240 // Only track goroutines in the special region we control, so runtime 241 // goroutines don't interfere (it's totally valid in traces for a 242 // goroutine to block, run, and block again; that's not what we care about). 243 if ev.Kind() == trace.EventRegionBegin && ev.Region().Type == "special" { 244 seenSpecialGoroutines = true 245 gs[ev.Goroutine()] = entered 246 } 247 if ev.Kind() == trace.EventRegionEnd && ev.Region().Type == "special" { 248 delete(gs, ev.Goroutine()) 249 } 250 // Track state transitions for goroutines we care about. 251 // 252 // The goroutines we care about will advance through the state machine 253 // of entered -> blocked -> runnable -> running. If in the running state 254 // we block, then we have a futile wakeup. Because of the runtime.Gosched 255 // on these specially marked goroutines, we should end up back in runnable 256 // first. If at any point we go to a different state, switch back to entered 257 // and wait for the next time the goroutine blocks. 258 if ev.Kind() != trace.EventStateTransition { 259 continue 260 } 261 st := ev.StateTransition() 262 if st.Resource.Kind != trace.ResourceGoroutine { 263 continue 264 } 265 id := st.Resource.Goroutine() 266 state, ok := gs[id] 267 if !ok { 268 continue 269 } 270 _, new := st.Goroutine() 271 switch state { 272 case entered: 273 if new == trace.GoWaiting { 274 state = blocked 275 } else { 276 state = entered 277 } 278 case blocked: 279 if new == trace.GoRunnable { 280 state = runnable 281 } else { 282 state = entered 283 } 284 case runnable: 285 if new == trace.GoRunning { 286 state = running 287 } else { 288 state = entered 289 } 290 case running: 291 if new == trace.GoWaiting { 292 t.Fatalf("found futile wakeup on goroutine %d", id) 293 } else { 294 state = entered 295 } 296 } 297 gs[id] = state 298 } 299 if !seenSpecialGoroutines { 300 t.Fatal("did not see a goroutine in a the region 'special'") 301 } 302 }) 303} 304 305func TestTraceGCStress(t *testing.T) { 306 testTraceProg(t, "gc-stress.go", nil) 307} 308 309func TestTraceGOMAXPROCS(t *testing.T) { 310 testTraceProg(t, "gomaxprocs.go", nil) 311} 312 313func TestTraceStacks(t *testing.T) { 314 testTraceProg(t, "stacks.go", func(t *testing.T, tb, _ []byte, stress bool) { 315 type frame struct { 316 fn string 317 line int 318 } 319 type evDesc struct { 320 kind trace.EventKind 321 match string 322 frames []frame 323 } 324 // mainLine is the line number of `func main()` in testprog/stacks.go. 325 const mainLine = 21 326 want := []evDesc{ 327 {trace.EventStateTransition, "Goroutine Running->Runnable", []frame{ 328 {"main.main", mainLine + 82}, 329 }}, 330 {trace.EventStateTransition, "Goroutine NotExist->Runnable", []frame{ 331 {"main.main", mainLine + 11}, 332 }}, 333 {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{ 334 {"runtime.block", 0}, 335 {"main.main.func1", 0}, 336 }}, 337 {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{ 338 {"runtime.chansend1", 0}, 339 {"main.main.func2", 0}, 340 }}, 341 {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{ 342 {"runtime.chanrecv1", 0}, 343 {"main.main.func3", 0}, 344 }}, 345 {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{ 346 {"runtime.chanrecv1", 0}, 347 {"main.main.func4", 0}, 348 }}, 349 {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{ 350 {"runtime.chansend1", 0}, 351 {"main.main", mainLine + 84}, 352 }}, 353 {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{ 354 {"runtime.chansend1", 0}, 355 {"main.main.func5", 0}, 356 }}, 357 {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{ 358 {"runtime.chanrecv1", 0}, 359 {"main.main", mainLine + 85}, 360 }}, 361 {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{ 362 {"runtime.selectgo", 0}, 363 {"main.main.func6", 0}, 364 }}, 365 {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{ 366 {"runtime.selectgo", 0}, 367 {"main.main", mainLine + 86}, 368 }}, 369 {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{ 370 {"sync.(*Mutex).Lock", 0}, 371 {"main.main.func7", 0}, 372 }}, 373 {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{ 374 {"sync.(*Mutex).Unlock", 0}, 375 {"main.main", 0}, 376 }}, 377 {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{ 378 {"sync.(*WaitGroup).Wait", 0}, 379 {"main.main.func8", 0}, 380 }}, 381 {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{ 382 {"sync.(*WaitGroup).Add", 0}, 383 {"sync.(*WaitGroup).Done", 0}, 384 {"main.main", mainLine + 91}, 385 }}, 386 {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{ 387 {"sync.(*Cond).Wait", 0}, 388 {"main.main.func9", 0}, 389 }}, 390 {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{ 391 {"sync.(*Cond).Signal", 0}, 392 {"main.main", 0}, 393 }}, 394 {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{ 395 {"time.Sleep", 0}, 396 {"main.main", 0}, 397 }}, 398 {trace.EventMetric, "/sched/gomaxprocs:threads", []frame{ 399 {"runtime.startTheWorld", 0}, // this is when the current gomaxprocs is logged. 400 {"runtime.startTheWorldGC", 0}, 401 {"runtime.GOMAXPROCS", 0}, 402 {"main.main", 0}, 403 }}, 404 } 405 if !stress { 406 // Only check for this stack if !stress because traceAdvance alone could 407 // allocate enough memory to trigger a GC if called frequently enough. 408 // This might cause the runtime.GC call we're trying to match against to 409 // coalesce with an active GC triggered this by traceAdvance. In that case 410 // we won't have an EventRangeBegin event that matches the stace trace we're 411 // looking for, since runtime.GC will not have triggered the GC. 412 gcEv := evDesc{trace.EventRangeBegin, "GC concurrent mark phase", []frame{ 413 {"runtime.GC", 0}, 414 {"main.main", 0}, 415 }} 416 want = append(want, gcEv) 417 } 418 if runtime.GOOS != "windows" && runtime.GOOS != "plan9" { 419 want = append(want, []evDesc{ 420 {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{ 421 {"internal/poll.(*FD).Accept", 0}, 422 {"net.(*netFD).accept", 0}, 423 {"net.(*TCPListener).accept", 0}, 424 {"net.(*TCPListener).Accept", 0}, 425 {"main.main.func10", 0}, 426 }}, 427 {trace.EventStateTransition, "Goroutine Running->Syscall", []frame{ 428 {"syscall.read", 0}, 429 {"syscall.Read", 0}, 430 {"internal/poll.ignoringEINTRIO", 0}, 431 {"internal/poll.(*FD).Read", 0}, 432 {"os.(*File).read", 0}, 433 {"os.(*File).Read", 0}, 434 {"main.main.func11", 0}, 435 }}, 436 }...) 437 } 438 stackMatches := func(stk trace.Stack, frames []frame) bool { 439 i := 0 440 match := true 441 stk.Frames(func(f trace.StackFrame) bool { 442 if f.Func != frames[i].fn { 443 match = false 444 return false 445 } 446 if line := uint64(frames[i].line); line != 0 && line != f.Line { 447 match = false 448 return false 449 } 450 i++ 451 return true 452 }) 453 return match 454 } 455 r, err := trace.NewReader(bytes.NewReader(tb)) 456 if err != nil { 457 t.Error(err) 458 } 459 for { 460 ev, err := r.ReadEvent() 461 if err == io.EOF { 462 break 463 } 464 if err != nil { 465 t.Fatal(err) 466 } 467 for i, wantEv := range want { 468 if wantEv.kind != ev.Kind() { 469 continue 470 } 471 match := false 472 switch ev.Kind() { 473 case trace.EventStateTransition: 474 st := ev.StateTransition() 475 str := "" 476 switch st.Resource.Kind { 477 case trace.ResourceGoroutine: 478 old, new := st.Goroutine() 479 str = fmt.Sprintf("%s %s->%s", st.Resource.Kind, old, new) 480 } 481 match = str == wantEv.match 482 case trace.EventRangeBegin: 483 rng := ev.Range() 484 match = rng.Name == wantEv.match 485 case trace.EventMetric: 486 metric := ev.Metric() 487 match = metric.Name == wantEv.match 488 } 489 match = match && stackMatches(ev.Stack(), wantEv.frames) 490 if match { 491 want[i] = want[len(want)-1] 492 want = want[:len(want)-1] 493 break 494 } 495 } 496 } 497 if len(want) != 0 { 498 for _, ev := range want { 499 t.Errorf("no match for %s Match=%s Stack=%#v", ev.kind, ev.match, ev.frames) 500 } 501 } 502 }) 503} 504 505func TestTraceStress(t *testing.T) { 506 switch runtime.GOOS { 507 case "js", "wasip1": 508 t.Skip("no os.Pipe on " + runtime.GOOS) 509 } 510 testTraceProg(t, "stress.go", nil) 511} 512 513func TestTraceStressStartStop(t *testing.T) { 514 switch runtime.GOOS { 515 case "js", "wasip1": 516 t.Skip("no os.Pipe on " + runtime.GOOS) 517 } 518 testTraceProg(t, "stress-start-stop.go", nil) 519} 520 521func TestTraceManyStartStop(t *testing.T) { 522 testTraceProg(t, "many-start-stop.go", nil) 523} 524 525func TestTraceWaitOnPipe(t *testing.T) { 526 switch runtime.GOOS { 527 case "dragonfly", "freebsd", "linux", "netbsd", "openbsd", "solaris": 528 testTraceProg(t, "wait-on-pipe.go", nil) 529 return 530 } 531 t.Skip("no applicable syscall.Pipe on " + runtime.GOOS) 532} 533 534func TestTraceIterPull(t *testing.T) { 535 testTraceProg(t, "iter-pull.go", nil) 536} 537 538func testTraceProg(t *testing.T, progName string, extra func(t *testing.T, trace, stderr []byte, stress bool)) { 539 testenv.MustHaveGoRun(t) 540 541 // Check if we're on a builder. 542 onBuilder := testenv.Builder() != "" 543 onOldBuilder := !strings.Contains(testenv.Builder(), "gotip") && !strings.Contains(testenv.Builder(), "go1") 544 545 testPath := filepath.Join("./testdata/testprog", progName) 546 testName := progName 547 runTest := func(t *testing.T, stress bool, extraGODEBUG string) { 548 // Run the program and capture the trace, which is always written to stdout. 549 cmd := testenv.Command(t, testenv.GoToolPath(t), "run") 550 if race.Enabled { 551 cmd.Args = append(cmd.Args, "-race") 552 } 553 cmd.Args = append(cmd.Args, testPath) 554 cmd.Env = append(os.Environ(), "GOEXPERIMENT=rangefunc") 555 // Add a stack ownership check. This is cheap enough for testing. 556 godebug := "tracecheckstackownership=1" 557 if stress { 558 // Advance a generation constantly to stress the tracer. 559 godebug += ",traceadvanceperiod=0" 560 } 561 if extraGODEBUG != "" { 562 // Add extra GODEBUG flags. 563 godebug += "," + extraGODEBUG 564 } 565 cmd.Env = append(cmd.Env, "GODEBUG="+godebug) 566 567 // Capture stdout and stderr. 568 // 569 // The protocol for these programs is that stdout contains the trace data 570 // and stderr is an expectation in string format. 571 var traceBuf, errBuf bytes.Buffer 572 cmd.Stdout = &traceBuf 573 cmd.Stderr = &errBuf 574 // Run the program. 575 if err := cmd.Run(); err != nil { 576 if errBuf.Len() != 0 { 577 t.Logf("stderr: %s", string(errBuf.Bytes())) 578 } 579 t.Fatal(err) 580 } 581 tb := traceBuf.Bytes() 582 583 // Test the trace and the parser. 584 testReader(t, bytes.NewReader(tb), testtrace.ExpectSuccess()) 585 586 // Run some extra validation. 587 if !t.Failed() && extra != nil { 588 extra(t, tb, errBuf.Bytes(), stress) 589 } 590 591 // Dump some more information on failure. 592 if t.Failed() && onBuilder { 593 // Dump directly to the test log on the builder, since this 594 // data is critical for debugging and this is the only way 595 // we can currently make sure it's retained. 596 t.Log("found bad trace; dumping to test log...") 597 s := dumpTraceToText(t, tb) 598 if onOldBuilder && len(s) > 1<<20+512<<10 { 599 // The old build infrastructure truncates logs at ~2 MiB. 600 // Let's assume we're the only failure and give ourselves 601 // up to 1.5 MiB to dump the trace. 602 // 603 // TODO(mknyszek): Remove this when we've migrated off of 604 // the old infrastructure. 605 t.Logf("text trace too large to dump (%d bytes)", len(s)) 606 } else { 607 t.Log(s) 608 } 609 } else if t.Failed() || *dumpTraces { 610 // We asked to dump the trace or failed. Write the trace to a file. 611 t.Logf("wrote trace to file: %s", dumpTraceToFile(t, testName, stress, tb)) 612 } 613 } 614 t.Run("Default", func(t *testing.T) { 615 runTest(t, false, "") 616 }) 617 t.Run("Stress", func(t *testing.T) { 618 if testing.Short() { 619 t.Skip("skipping trace stress tests in short mode") 620 } 621 runTest(t, true, "") 622 }) 623 t.Run("AllocFree", func(t *testing.T) { 624 if testing.Short() { 625 t.Skip("skipping trace alloc/free tests in short mode") 626 } 627 runTest(t, false, "traceallocfree=1") 628 }) 629} 630