1// Copyright 2023 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package trace 6 7import ( 8 "fmt" 9 "strings" 10 11 "internal/trace/event" 12 "internal/trace/event/go122" 13 "internal/trace/version" 14) 15 16// ordering emulates Go scheduler state for both validation and 17// for putting events in the right order. 18// 19// The interface to ordering consists of two methods: Advance 20// and Next. Advance is called to try and advance an event and 21// add completed events to the ordering. Next is used to pick 22// off events in the ordering. 23type ordering struct { 24 gStates map[GoID]*gState 25 pStates map[ProcID]*pState // TODO: The keys are dense, so this can be a slice. 26 mStates map[ThreadID]*mState 27 activeTasks map[TaskID]taskState 28 gcSeq uint64 29 gcState gcState 30 initialGen uint64 31 queue queue[Event] 32} 33 34// Advance checks if it's valid to proceed with ev which came from thread m. 35// 36// It assumes the gen value passed to it is monotonically increasing across calls. 37// 38// If any error is returned, then the trace is broken and trace parsing must cease. 39// If it's not valid to advance with ev, but no error was encountered, the caller 40// should attempt to advance with other candidate events from other threads. If the 41// caller runs out of candidates, the trace is invalid. 42// 43// If this returns true, Next is guaranteed to return a complete event. However, 44// multiple events may be added to the ordering, so the caller should (but is not 45// required to) continue to call Next until it is exhausted. 46func (o *ordering) Advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64) (bool, error) { 47 if o.initialGen == 0 { 48 // Set the initial gen if necessary. 49 o.initialGen = gen 50 } 51 52 var curCtx, newCtx schedCtx 53 curCtx.M = m 54 newCtx.M = m 55 56 var ms *mState 57 if m == NoThread { 58 curCtx.P = NoProc 59 curCtx.G = NoGoroutine 60 newCtx = curCtx 61 } else { 62 // Pull out or create the mState for this event. 63 var ok bool 64 ms, ok = o.mStates[m] 65 if !ok { 66 ms = &mState{ 67 g: NoGoroutine, 68 p: NoProc, 69 } 70 o.mStates[m] = ms 71 } 72 curCtx.P = ms.p 73 curCtx.G = ms.g 74 newCtx = curCtx 75 } 76 77 f := orderingDispatch[ev.typ] 78 if f == nil { 79 return false, fmt.Errorf("bad event type found while ordering: %v", ev.typ) 80 } 81 newCtx, ok, err := f(o, ev, evt, m, gen, curCtx) 82 if err == nil && ok && ms != nil { 83 // Update the mState for this event. 84 ms.p = newCtx.P 85 ms.g = newCtx.G 86 } 87 return ok, err 88} 89 90type orderingHandleFunc func(o *ordering, ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) 91 92var orderingDispatch = [256]orderingHandleFunc{ 93 // Procs. 94 go122.EvProcsChange: (*ordering).advanceAnnotation, 95 go122.EvProcStart: (*ordering).advanceProcStart, 96 go122.EvProcStop: (*ordering).advanceProcStop, 97 go122.EvProcSteal: (*ordering).advanceProcSteal, 98 go122.EvProcStatus: (*ordering).advanceProcStatus, 99 100 // Goroutines. 101 go122.EvGoCreate: (*ordering).advanceGoCreate, 102 go122.EvGoCreateSyscall: (*ordering).advanceGoCreateSyscall, 103 go122.EvGoStart: (*ordering).advanceGoStart, 104 go122.EvGoDestroy: (*ordering).advanceGoStopExec, 105 go122.EvGoDestroySyscall: (*ordering).advanceGoDestroySyscall, 106 go122.EvGoStop: (*ordering).advanceGoStopExec, 107 go122.EvGoBlock: (*ordering).advanceGoStopExec, 108 go122.EvGoUnblock: (*ordering).advanceGoUnblock, 109 go122.EvGoSyscallBegin: (*ordering).advanceGoSyscallBegin, 110 go122.EvGoSyscallEnd: (*ordering).advanceGoSyscallEnd, 111 go122.EvGoSyscallEndBlocked: (*ordering).advanceGoSyscallEndBlocked, 112 go122.EvGoStatus: (*ordering).advanceGoStatus, 113 114 // STW. 115 go122.EvSTWBegin: (*ordering).advanceGoRangeBegin, 116 go122.EvSTWEnd: (*ordering).advanceGoRangeEnd, 117 118 // GC events. 119 go122.EvGCActive: (*ordering).advanceGCActive, 120 go122.EvGCBegin: (*ordering).advanceGCBegin, 121 go122.EvGCEnd: (*ordering).advanceGCEnd, 122 go122.EvGCSweepActive: (*ordering).advanceGCSweepActive, 123 go122.EvGCSweepBegin: (*ordering).advanceGCSweepBegin, 124 go122.EvGCSweepEnd: (*ordering).advanceGCSweepEnd, 125 go122.EvGCMarkAssistActive: (*ordering).advanceGoRangeActive, 126 go122.EvGCMarkAssistBegin: (*ordering).advanceGoRangeBegin, 127 go122.EvGCMarkAssistEnd: (*ordering).advanceGoRangeEnd, 128 go122.EvHeapAlloc: (*ordering).advanceHeapMetric, 129 go122.EvHeapGoal: (*ordering).advanceHeapMetric, 130 131 // Annotations. 132 go122.EvGoLabel: (*ordering).advanceAnnotation, 133 go122.EvUserTaskBegin: (*ordering).advanceUserTaskBegin, 134 go122.EvUserTaskEnd: (*ordering).advanceUserTaskEnd, 135 go122.EvUserRegionBegin: (*ordering).advanceUserRegionBegin, 136 go122.EvUserRegionEnd: (*ordering).advanceUserRegionEnd, 137 go122.EvUserLog: (*ordering).advanceAnnotation, 138 139 // Coroutines. Added in Go 1.23. 140 go122.EvGoSwitch: (*ordering).advanceGoSwitch, 141 go122.EvGoSwitchDestroy: (*ordering).advanceGoSwitch, 142 go122.EvGoCreateBlocked: (*ordering).advanceGoCreate, 143 144 // GoStatus event with a stack. Added in Go 1.23. 145 go122.EvGoStatusStack: (*ordering).advanceGoStatus, 146 147 // Experimental events. 148 149 // Experimental heap span events. Added in Go 1.23. 150 go122.EvSpan: (*ordering).advanceAllocFree, 151 go122.EvSpanAlloc: (*ordering).advanceAllocFree, 152 go122.EvSpanFree: (*ordering).advanceAllocFree, 153 154 // Experimental heap object events. Added in Go 1.23. 155 go122.EvHeapObject: (*ordering).advanceAllocFree, 156 go122.EvHeapObjectAlloc: (*ordering).advanceAllocFree, 157 go122.EvHeapObjectFree: (*ordering).advanceAllocFree, 158 159 // Experimental goroutine stack events. Added in Go 1.23. 160 go122.EvGoroutineStack: (*ordering).advanceAllocFree, 161 go122.EvGoroutineStackAlloc: (*ordering).advanceAllocFree, 162 go122.EvGoroutineStackFree: (*ordering).advanceAllocFree, 163} 164 165func (o *ordering) advanceProcStatus(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 166 pid := ProcID(ev.args[0]) 167 status := go122.ProcStatus(ev.args[1]) 168 if int(status) >= len(go122ProcStatus2ProcState) { 169 return curCtx, false, fmt.Errorf("invalid status for proc %d: %d", pid, status) 170 } 171 oldState := go122ProcStatus2ProcState[status] 172 if s, ok := o.pStates[pid]; ok { 173 if status == go122.ProcSyscallAbandoned && s.status == go122.ProcSyscall { 174 // ProcSyscallAbandoned is a special case of ProcSyscall. It indicates a 175 // potential loss of information, but if we're already in ProcSyscall, 176 // we haven't lost the relevant information. Promote the status and advance. 177 oldState = ProcRunning 178 ev.args[1] = uint64(go122.ProcSyscall) 179 } else if status == go122.ProcSyscallAbandoned && s.status == go122.ProcSyscallAbandoned { 180 // If we're passing through ProcSyscallAbandoned, then there's no promotion 181 // to do. We've lost the M that this P is associated with. However it got there, 182 // it's going to appear as idle in the API, so pass through as idle. 183 oldState = ProcIdle 184 ev.args[1] = uint64(go122.ProcSyscallAbandoned) 185 } else if s.status != status { 186 return curCtx, false, fmt.Errorf("inconsistent status for proc %d: old %v vs. new %v", pid, s.status, status) 187 } 188 s.seq = makeSeq(gen, 0) // Reset seq. 189 } else { 190 o.pStates[pid] = &pState{id: pid, status: status, seq: makeSeq(gen, 0)} 191 if gen == o.initialGen { 192 oldState = ProcUndetermined 193 } else { 194 oldState = ProcNotExist 195 } 196 } 197 ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition. 198 199 // Bind the proc to the new context, if it's running. 200 newCtx := curCtx 201 if status == go122.ProcRunning || status == go122.ProcSyscall { 202 newCtx.P = pid 203 } 204 // If we're advancing through ProcSyscallAbandoned *but* oldState is running then we've 205 // promoted it to ProcSyscall. However, because it's ProcSyscallAbandoned, we know this 206 // P is about to get stolen and its status very likely isn't being emitted by the same 207 // thread it was bound to. Since this status is Running -> Running and Running is binding, 208 // we need to make sure we emit it in the right context: the context to which it is bound. 209 // Find it, and set our current context to it. 210 if status == go122.ProcSyscallAbandoned && oldState == ProcRunning { 211 // N.B. This is slow but it should be fairly rare. 212 found := false 213 for mid, ms := range o.mStates { 214 if ms.p == pid { 215 curCtx.M = mid 216 curCtx.P = pid 217 curCtx.G = ms.g 218 found = true 219 } 220 } 221 if !found { 222 return curCtx, false, fmt.Errorf("failed to find sched context for proc %d that's about to be stolen", pid) 223 } 224 } 225 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 226 return newCtx, true, nil 227} 228 229func (o *ordering) advanceProcStart(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 230 pid := ProcID(ev.args[0]) 231 seq := makeSeq(gen, ev.args[1]) 232 233 // Try to advance. We might fail here due to sequencing, because the P hasn't 234 // had a status emitted, or because we already have a P and we're in a syscall, 235 // and we haven't observed that it was stolen from us yet. 236 state, ok := o.pStates[pid] 237 if !ok || state.status != go122.ProcIdle || !seq.succeeds(state.seq) || curCtx.P != NoProc { 238 // We can't make an inference as to whether this is bad. We could just be seeing 239 // a ProcStart on a different M before the proc's state was emitted, or before we 240 // got to the right point in the trace. 241 // 242 // Note that we also don't advance here if we have a P and we're in a syscall. 243 return curCtx, false, nil 244 } 245 // We can advance this P. Check some invariants. 246 // 247 // We might have a goroutine if a goroutine is exiting a syscall. 248 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustNotHave, Goroutine: event.MayHave} 249 if err := validateCtx(curCtx, reqs); err != nil { 250 return curCtx, false, err 251 } 252 state.status = go122.ProcRunning 253 state.seq = seq 254 newCtx := curCtx 255 newCtx.P = pid 256 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 257 return newCtx, true, nil 258} 259 260func (o *ordering) advanceProcStop(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 261 // We must be able to advance this P. 262 // 263 // There are 2 ways a P can stop: ProcStop and ProcSteal. ProcStop is used when the P 264 // is stopped by the same M that started it, while ProcSteal is used when another M 265 // steals the P by stopping it from a distance. 266 // 267 // Since a P is bound to an M, and we're stopping on the same M we started, it must 268 // always be possible to advance the current M's P from a ProcStop. This is also why 269 // ProcStop doesn't need a sequence number. 270 state, ok := o.pStates[curCtx.P] 271 if !ok { 272 return curCtx, false, fmt.Errorf("event %s for proc (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.P) 273 } 274 if state.status != go122.ProcRunning && state.status != go122.ProcSyscall { 275 return curCtx, false, fmt.Errorf("%s event for proc that's not %s or %s", go122.EventString(ev.typ), go122.ProcRunning, go122.ProcSyscall) 276 } 277 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave} 278 if err := validateCtx(curCtx, reqs); err != nil { 279 return curCtx, false, err 280 } 281 state.status = go122.ProcIdle 282 newCtx := curCtx 283 newCtx.P = NoProc 284 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 285 return newCtx, true, nil 286} 287 288func (o *ordering) advanceProcSteal(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 289 pid := ProcID(ev.args[0]) 290 seq := makeSeq(gen, ev.args[1]) 291 state, ok := o.pStates[pid] 292 if !ok || (state.status != go122.ProcSyscall && state.status != go122.ProcSyscallAbandoned) || !seq.succeeds(state.seq) { 293 // We can't make an inference as to whether this is bad. We could just be seeing 294 // a ProcStart on a different M before the proc's state was emitted, or before we 295 // got to the right point in the trace. 296 return curCtx, false, nil 297 } 298 // We can advance this P. Check some invariants. 299 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MayHave} 300 if err := validateCtx(curCtx, reqs); err != nil { 301 return curCtx, false, err 302 } 303 // Smuggle in the P state that let us advance so we can surface information to the event. 304 // Specifically, we need to make sure that the event is interpreted not as a transition of 305 // ProcRunning -> ProcIdle but ProcIdle -> ProcIdle instead. 306 // 307 // ProcRunning is binding, but we may be running with a P on the current M and we can't 308 // bind another P. This P is about to go ProcIdle anyway. 309 oldStatus := state.status 310 ev.extra(version.Go122)[0] = uint64(oldStatus) 311 312 // Update the P's status and sequence number. 313 state.status = go122.ProcIdle 314 state.seq = seq 315 316 // If we've lost information then don't try to do anything with the M. 317 // It may have moved on and we can't be sure. 318 if oldStatus == go122.ProcSyscallAbandoned { 319 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 320 return curCtx, true, nil 321 } 322 323 // Validate that the M we're stealing from is what we expect. 324 mid := ThreadID(ev.args[2]) // The M we're stealing from. 325 326 newCtx := curCtx 327 if mid == curCtx.M { 328 // We're stealing from ourselves. This behaves like a ProcStop. 329 if curCtx.P != pid { 330 return curCtx, false, fmt.Errorf("tried to self-steal proc %d (thread %d), but got proc %d instead", pid, mid, curCtx.P) 331 } 332 newCtx.P = NoProc 333 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 334 return newCtx, true, nil 335 } 336 337 // We're stealing from some other M. 338 mState, ok := o.mStates[mid] 339 if !ok { 340 return curCtx, false, fmt.Errorf("stole proc from non-existent thread %d", mid) 341 } 342 343 // Make sure we're actually stealing the right P. 344 if mState.p != pid { 345 return curCtx, false, fmt.Errorf("tried to steal proc %d from thread %d, but got proc %d instead", pid, mid, mState.p) 346 } 347 348 // Tell the M it has no P so it can proceed. 349 // 350 // This is safe because we know the P was in a syscall and 351 // the other M must be trying to get out of the syscall. 352 // GoSyscallEndBlocked cannot advance until the corresponding 353 // M loses its P. 354 mState.p = NoProc 355 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 356 return newCtx, true, nil 357} 358 359func (o *ordering) advanceGoStatus(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 360 gid := GoID(ev.args[0]) 361 mid := ThreadID(ev.args[1]) 362 status := go122.GoStatus(ev.args[2]) 363 364 if int(status) >= len(go122GoStatus2GoState) { 365 return curCtx, false, fmt.Errorf("invalid status for goroutine %d: %d", gid, status) 366 } 367 oldState := go122GoStatus2GoState[status] 368 if s, ok := o.gStates[gid]; ok { 369 if s.status != status { 370 return curCtx, false, fmt.Errorf("inconsistent status for goroutine %d: old %v vs. new %v", gid, s.status, status) 371 } 372 s.seq = makeSeq(gen, 0) // Reset seq. 373 } else if gen == o.initialGen { 374 // Set the state. 375 o.gStates[gid] = &gState{id: gid, status: status, seq: makeSeq(gen, 0)} 376 oldState = GoUndetermined 377 } else { 378 return curCtx, false, fmt.Errorf("found goroutine status for new goroutine after the first generation: id=%v status=%v", gid, status) 379 } 380 ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition. 381 382 newCtx := curCtx 383 switch status { 384 case go122.GoRunning: 385 // Bind the goroutine to the new context, since it's running. 386 newCtx.G = gid 387 case go122.GoSyscall: 388 if mid == NoThread { 389 return curCtx, false, fmt.Errorf("found goroutine %d in syscall without a thread", gid) 390 } 391 // Is the syscall on this thread? If so, bind it to the context. 392 // Otherwise, we're talking about a G sitting in a syscall on an M. 393 // Validate the named M. 394 if mid == curCtx.M { 395 if gen != o.initialGen && curCtx.G != gid { 396 // If this isn't the first generation, we *must* have seen this 397 // binding occur already. Even if the G was blocked in a syscall 398 // for multiple generations since trace start, we would have seen 399 // a previous GoStatus event that bound the goroutine to an M. 400 return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, curCtx.G) 401 } 402 newCtx.G = gid 403 break 404 } 405 // Now we're talking about a thread and goroutine that have been 406 // blocked on a syscall for the entire generation. This case must 407 // not have a P; the runtime makes sure that all Ps are traced at 408 // the beginning of a generation, which involves taking a P back 409 // from every thread. 410 ms, ok := o.mStates[mid] 411 if ok { 412 // This M has been seen. That means we must have seen this 413 // goroutine go into a syscall on this thread at some point. 414 if ms.g != gid { 415 // But the G on the M doesn't match. Something's wrong. 416 return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, ms.g) 417 } 418 // This case is just a Syscall->Syscall event, which needs to 419 // appear as having the G currently bound to this M. 420 curCtx.G = ms.g 421 } else if !ok { 422 // The M hasn't been seen yet. That means this goroutine 423 // has just been sitting in a syscall on this M. Create 424 // a state for it. 425 o.mStates[mid] = &mState{g: gid, p: NoProc} 426 // Don't set curCtx.G in this case because this event is the 427 // binding event (and curCtx represents the "before" state). 428 } 429 // Update the current context to the M we're talking about. 430 curCtx.M = mid 431 } 432 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 433 return newCtx, true, nil 434} 435 436func (o *ordering) advanceGoCreate(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 437 // Goroutines must be created on a running P, but may or may not be created 438 // by a running goroutine. 439 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave} 440 if err := validateCtx(curCtx, reqs); err != nil { 441 return curCtx, false, err 442 } 443 // If we have a goroutine, it must be running. 444 if state, ok := o.gStates[curCtx.G]; ok && state.status != go122.GoRunning { 445 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning) 446 } 447 // This goroutine created another. Add a state for it. 448 newgid := GoID(ev.args[0]) 449 if _, ok := o.gStates[newgid]; ok { 450 return curCtx, false, fmt.Errorf("tried to create goroutine (%v) that already exists", newgid) 451 } 452 status := go122.GoRunnable 453 if ev.typ == go122.EvGoCreateBlocked { 454 status = go122.GoWaiting 455 } 456 o.gStates[newgid] = &gState{id: newgid, status: status, seq: makeSeq(gen, 0)} 457 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 458 return curCtx, true, nil 459} 460 461func (o *ordering) advanceGoStopExec(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 462 // These are goroutine events that all require an active running 463 // goroutine on some thread. They must *always* be advance-able, 464 // since running goroutines are bound to their M. 465 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 466 return curCtx, false, err 467 } 468 state, ok := o.gStates[curCtx.G] 469 if !ok { 470 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G) 471 } 472 if state.status != go122.GoRunning { 473 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning) 474 } 475 // Handle each case slightly differently; we just group them together 476 // because they have shared preconditions. 477 newCtx := curCtx 478 switch ev.typ { 479 case go122.EvGoDestroy: 480 // This goroutine is exiting itself. 481 delete(o.gStates, curCtx.G) 482 newCtx.G = NoGoroutine 483 case go122.EvGoStop: 484 // Goroutine stopped (yielded). It's runnable but not running on this M. 485 state.status = go122.GoRunnable 486 newCtx.G = NoGoroutine 487 case go122.EvGoBlock: 488 // Goroutine blocked. It's waiting now and not running on this M. 489 state.status = go122.GoWaiting 490 newCtx.G = NoGoroutine 491 } 492 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 493 return newCtx, true, nil 494} 495 496func (o *ordering) advanceGoStart(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 497 gid := GoID(ev.args[0]) 498 seq := makeSeq(gen, ev.args[1]) 499 state, ok := o.gStates[gid] 500 if !ok || state.status != go122.GoRunnable || !seq.succeeds(state.seq) { 501 // We can't make an inference as to whether this is bad. We could just be seeing 502 // a GoStart on a different M before the goroutine was created, before it had its 503 // state emitted, or before we got to the right point in the trace yet. 504 return curCtx, false, nil 505 } 506 // We can advance this goroutine. Check some invariants. 507 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MustNotHave} 508 if err := validateCtx(curCtx, reqs); err != nil { 509 return curCtx, false, err 510 } 511 state.status = go122.GoRunning 512 state.seq = seq 513 newCtx := curCtx 514 newCtx.G = gid 515 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 516 return newCtx, true, nil 517} 518 519func (o *ordering) advanceGoUnblock(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 520 // N.B. These both reference the goroutine to unblock, not the current goroutine. 521 gid := GoID(ev.args[0]) 522 seq := makeSeq(gen, ev.args[1]) 523 state, ok := o.gStates[gid] 524 if !ok || state.status != go122.GoWaiting || !seq.succeeds(state.seq) { 525 // We can't make an inference as to whether this is bad. We could just be seeing 526 // a GoUnblock on a different M before the goroutine was created and blocked itself, 527 // before it had its state emitted, or before we got to the right point in the trace yet. 528 return curCtx, false, nil 529 } 530 state.status = go122.GoRunnable 531 state.seq = seq 532 // N.B. No context to validate. Basically anything can unblock 533 // a goroutine (e.g. sysmon). 534 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 535 return curCtx, true, nil 536} 537 538func (o *ordering) advanceGoSwitch(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 539 // GoSwitch and GoSwitchDestroy represent a trio of events: 540 // - Unblock of the goroutine to switch to. 541 // - Block or destroy of the current goroutine. 542 // - Start executing the next goroutine. 543 // 544 // Because it acts like a GoStart for the next goroutine, we can 545 // only advance it if the sequence numbers line up. 546 // 547 // The current goroutine on the thread must be actively running. 548 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 549 return curCtx, false, err 550 } 551 curGState, ok := o.gStates[curCtx.G] 552 if !ok { 553 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G) 554 } 555 if curGState.status != go122.GoRunning { 556 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning) 557 } 558 nextg := GoID(ev.args[0]) 559 seq := makeSeq(gen, ev.args[1]) // seq is for nextg, not curCtx.G. 560 nextGState, ok := o.gStates[nextg] 561 if !ok || nextGState.status != go122.GoWaiting || !seq.succeeds(nextGState.seq) { 562 // We can't make an inference as to whether this is bad. We could just be seeing 563 // a GoSwitch on a different M before the goroutine was created, before it had its 564 // state emitted, or before we got to the right point in the trace yet. 565 return curCtx, false, nil 566 } 567 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 568 569 // Update the state of the executing goroutine and emit an event for it 570 // (GoSwitch and GoSwitchDestroy will be interpreted as GoUnblock events 571 // for nextg). 572 switch ev.typ { 573 case go122.EvGoSwitch: 574 // Goroutine blocked. It's waiting now and not running on this M. 575 curGState.status = go122.GoWaiting 576 577 // Emit a GoBlock event. 578 // TODO(mknyszek): Emit a reason. 579 o.queue.push(makeEvent(evt, curCtx, go122.EvGoBlock, ev.time, 0 /* no reason */, 0 /* no stack */)) 580 case go122.EvGoSwitchDestroy: 581 // This goroutine is exiting itself. 582 delete(o.gStates, curCtx.G) 583 584 // Emit a GoDestroy event. 585 o.queue.push(makeEvent(evt, curCtx, go122.EvGoDestroy, ev.time)) 586 } 587 // Update the state of the next goroutine. 588 nextGState.status = go122.GoRunning 589 nextGState.seq = seq 590 newCtx := curCtx 591 newCtx.G = nextg 592 593 // Queue an event for the next goroutine starting to run. 594 startCtx := curCtx 595 startCtx.G = NoGoroutine 596 o.queue.push(makeEvent(evt, startCtx, go122.EvGoStart, ev.time, uint64(nextg), ev.args[1])) 597 return newCtx, true, nil 598} 599 600func (o *ordering) advanceGoSyscallBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 601 // Entering a syscall requires an active running goroutine with a 602 // proc on some thread. It is always advancable. 603 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 604 return curCtx, false, err 605 } 606 state, ok := o.gStates[curCtx.G] 607 if !ok { 608 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G) 609 } 610 if state.status != go122.GoRunning { 611 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning) 612 } 613 // Goroutine entered a syscall. It's still running on this P and M. 614 state.status = go122.GoSyscall 615 pState, ok := o.pStates[curCtx.P] 616 if !ok { 617 return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(ev.typ)) 618 } 619 pState.status = go122.ProcSyscall 620 // Validate the P sequence number on the event and advance it. 621 // 622 // We have a P sequence number for what is supposed to be a goroutine event 623 // so that we can correctly model P stealing. Without this sequence number here, 624 // the syscall from which a ProcSteal event is stealing can be ambiguous in the 625 // face of broken timestamps. See the go122-syscall-steal-proc-ambiguous test for 626 // more details. 627 // 628 // Note that because this sequence number only exists as a tool for disambiguation, 629 // we can enforce that we have the right sequence number at this point; we don't need 630 // to back off and see if any other events will advance. This is a running P. 631 pSeq := makeSeq(gen, ev.args[0]) 632 if !pSeq.succeeds(pState.seq) { 633 return curCtx, false, fmt.Errorf("failed to advance %s: can't make sequence: %s -> %s", go122.EventString(ev.typ), pState.seq, pSeq) 634 } 635 pState.seq = pSeq 636 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 637 return curCtx, true, nil 638} 639 640func (o *ordering) advanceGoSyscallEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 641 // This event is always advance-able because it happens on the same 642 // thread that EvGoSyscallStart happened, and the goroutine can't leave 643 // that thread until its done. 644 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 645 return curCtx, false, err 646 } 647 state, ok := o.gStates[curCtx.G] 648 if !ok { 649 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G) 650 } 651 if state.status != go122.GoSyscall { 652 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning) 653 } 654 state.status = go122.GoRunning 655 656 // Transfer the P back to running from syscall. 657 pState, ok := o.pStates[curCtx.P] 658 if !ok { 659 return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(ev.typ)) 660 } 661 if pState.status != go122.ProcSyscall { 662 return curCtx, false, fmt.Errorf("expected proc %d in state %v, but got %v instead", curCtx.P, go122.ProcSyscall, pState.status) 663 } 664 pState.status = go122.ProcRunning 665 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 666 return curCtx, true, nil 667} 668 669func (o *ordering) advanceGoSyscallEndBlocked(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 670 // This event becomes advanceable when its P is not in a syscall state 671 // (lack of a P altogether is also acceptable for advancing). 672 // The transfer out of ProcSyscall can happen either voluntarily via 673 // ProcStop or involuntarily via ProcSteal. We may also acquire a new P 674 // before we get here (after the transfer out) but that's OK: that new 675 // P won't be in the ProcSyscall state anymore. 676 // 677 // Basically: while we have a preemptible P, don't advance, because we 678 // *know* from the event that we're going to lose it at some point during 679 // the syscall. We shouldn't advance until that happens. 680 if curCtx.P != NoProc { 681 pState, ok := o.pStates[curCtx.P] 682 if !ok { 683 return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(ev.typ)) 684 } 685 if pState.status == go122.ProcSyscall { 686 return curCtx, false, nil 687 } 688 } 689 // As mentioned above, we may have a P here if we ProcStart 690 // before this event. 691 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustHave}); err != nil { 692 return curCtx, false, err 693 } 694 state, ok := o.gStates[curCtx.G] 695 if !ok { 696 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G) 697 } 698 if state.status != go122.GoSyscall { 699 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning) 700 } 701 newCtx := curCtx 702 newCtx.G = NoGoroutine 703 state.status = go122.GoRunnable 704 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 705 return newCtx, true, nil 706} 707 708func (o *ordering) advanceGoCreateSyscall(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 709 // This event indicates that a goroutine is effectively 710 // being created out of a cgo callback. Such a goroutine 711 // is 'created' in the syscall state. 712 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustNotHave}); err != nil { 713 return curCtx, false, err 714 } 715 // This goroutine is effectively being created. Add a state for it. 716 newgid := GoID(ev.args[0]) 717 if _, ok := o.gStates[newgid]; ok { 718 return curCtx, false, fmt.Errorf("tried to create goroutine (%v) in syscall that already exists", newgid) 719 } 720 o.gStates[newgid] = &gState{id: newgid, status: go122.GoSyscall, seq: makeSeq(gen, 0)} 721 // Goroutine is executing. Bind it to the context. 722 newCtx := curCtx 723 newCtx.G = newgid 724 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 725 return newCtx, true, nil 726} 727 728func (o *ordering) advanceGoDestroySyscall(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 729 // This event indicates that a goroutine created for a 730 // cgo callback is disappearing, either because the callback 731 // ending or the C thread that called it is being destroyed. 732 // 733 // Also, treat this as if we lost our P too. 734 // The thread ID may be reused by the platform and we'll get 735 // really confused if we try to steal the P is this is running 736 // with later. The new M with the same ID could even try to 737 // steal back this P from itself! 738 // 739 // The runtime is careful to make sure that any GoCreateSyscall 740 // event will enter the runtime emitting events for reacquiring a P. 741 // 742 // Note: we might have a P here. The P might not be released 743 // eagerly by the runtime, and it might get stolen back later 744 // (or never again, if the program is going to exit). 745 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustHave}); err != nil { 746 return curCtx, false, err 747 } 748 // Check to make sure the goroutine exists in the right state. 749 state, ok := o.gStates[curCtx.G] 750 if !ok { 751 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G) 752 } 753 if state.status != go122.GoSyscall { 754 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %v", go122.EventString(ev.typ), GoSyscall) 755 } 756 // This goroutine is exiting itself. 757 delete(o.gStates, curCtx.G) 758 newCtx := curCtx 759 newCtx.G = NoGoroutine 760 761 // If we have a proc, then we're dissociating from it now. See the comment at the top of the case. 762 if curCtx.P != NoProc { 763 pState, ok := o.pStates[curCtx.P] 764 if !ok { 765 return curCtx, false, fmt.Errorf("found invalid proc %d during %s", curCtx.P, go122.EventString(ev.typ)) 766 } 767 if pState.status != go122.ProcSyscall { 768 return curCtx, false, fmt.Errorf("proc %d in unexpected state %s during %s", curCtx.P, pState.status, go122.EventString(ev.typ)) 769 } 770 // See the go122-create-syscall-reuse-thread-id test case for more details. 771 pState.status = go122.ProcSyscallAbandoned 772 newCtx.P = NoProc 773 774 // Queue an extra self-ProcSteal event. 775 extra := makeEvent(evt, curCtx, go122.EvProcSteal, ev.time, uint64(curCtx.P)) 776 extra.base.extra(version.Go122)[0] = uint64(go122.ProcSyscall) 777 o.queue.push(extra) 778 } 779 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 780 return newCtx, true, nil 781} 782 783func (o *ordering) advanceUserTaskBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 784 // Handle tasks. Tasks are interesting because: 785 // - There's no Begin event required to reference a task. 786 // - End for a particular task ID can appear multiple times. 787 // As a result, there's very little to validate. The only 788 // thing we have to be sure of is that a task didn't begin 789 // after it had already begun. Task IDs are allowed to be 790 // reused, so we don't care about a Begin after an End. 791 id := TaskID(ev.args[0]) 792 if _, ok := o.activeTasks[id]; ok { 793 return curCtx, false, fmt.Errorf("task ID conflict: %d", id) 794 } 795 // Get the parent ID, but don't validate it. There's no guarantee 796 // we actually have information on whether it's active. 797 parentID := TaskID(ev.args[1]) 798 if parentID == BackgroundTask { 799 // Note: a value of 0 here actually means no parent, *not* the 800 // background task. Automatic background task attachment only 801 // applies to regions. 802 parentID = NoTask 803 ev.args[1] = uint64(NoTask) 804 } 805 806 // Validate the name and record it. We'll need to pass it through to 807 // EvUserTaskEnd. 808 nameID := stringID(ev.args[2]) 809 name, ok := evt.strings.get(nameID) 810 if !ok { 811 return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ) 812 } 813 o.activeTasks[id] = taskState{name: name, parentID: parentID} 814 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 815 return curCtx, false, err 816 } 817 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 818 return curCtx, true, nil 819} 820 821func (o *ordering) advanceUserTaskEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 822 id := TaskID(ev.args[0]) 823 if ts, ok := o.activeTasks[id]; ok { 824 // Smuggle the task info. This may happen in a different generation, 825 // which may not have the name in its string table. Add it to the extra 826 // strings table so we can look it up later. 827 ev.extra(version.Go122)[0] = uint64(ts.parentID) 828 ev.extra(version.Go122)[1] = uint64(evt.addExtraString(ts.name)) 829 delete(o.activeTasks, id) 830 } else { 831 // Explicitly clear the task info. 832 ev.extra(version.Go122)[0] = uint64(NoTask) 833 ev.extra(version.Go122)[1] = uint64(evt.addExtraString("")) 834 } 835 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 836 return curCtx, false, err 837 } 838 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 839 return curCtx, true, nil 840} 841 842func (o *ordering) advanceUserRegionBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 843 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 844 return curCtx, false, err 845 } 846 tid := TaskID(ev.args[0]) 847 nameID := stringID(ev.args[1]) 848 name, ok := evt.strings.get(nameID) 849 if !ok { 850 return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ) 851 } 852 gState, ok := o.gStates[curCtx.G] 853 if !ok { 854 return curCtx, false, fmt.Errorf("encountered EvUserRegionBegin without known state for current goroutine %d", curCtx.G) 855 } 856 if err := gState.beginRegion(userRegion{tid, name}); err != nil { 857 return curCtx, false, err 858 } 859 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 860 return curCtx, true, nil 861} 862 863func (o *ordering) advanceUserRegionEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 864 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 865 return curCtx, false, err 866 } 867 tid := TaskID(ev.args[0]) 868 nameID := stringID(ev.args[1]) 869 name, ok := evt.strings.get(nameID) 870 if !ok { 871 return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ) 872 } 873 gState, ok := o.gStates[curCtx.G] 874 if !ok { 875 return curCtx, false, fmt.Errorf("encountered EvUserRegionEnd without known state for current goroutine %d", curCtx.G) 876 } 877 if err := gState.endRegion(userRegion{tid, name}); err != nil { 878 return curCtx, false, err 879 } 880 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 881 return curCtx, true, nil 882} 883 884// Handle the GC mark phase. 885// 886// We have sequence numbers for both start and end because they 887// can happen on completely different threads. We want an explicit 888// partial order edge between start and end here, otherwise we're 889// relying entirely on timestamps to make sure we don't advance a 890// GCEnd for a _different_ GC cycle if timestamps are wildly broken. 891func (o *ordering) advanceGCActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 892 seq := ev.args[0] 893 if gen == o.initialGen { 894 if o.gcState != gcUndetermined { 895 return curCtx, false, fmt.Errorf("GCActive in the first generation isn't first GC event") 896 } 897 o.gcSeq = seq 898 o.gcState = gcRunning 899 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 900 return curCtx, true, nil 901 } 902 if seq != o.gcSeq+1 { 903 // This is not the right GC cycle. 904 return curCtx, false, nil 905 } 906 if o.gcState != gcRunning { 907 return curCtx, false, fmt.Errorf("encountered GCActive while GC was not in progress") 908 } 909 o.gcSeq = seq 910 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 911 return curCtx, false, err 912 } 913 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 914 return curCtx, true, nil 915} 916 917func (o *ordering) advanceGCBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 918 seq := ev.args[0] 919 if o.gcState == gcUndetermined { 920 o.gcSeq = seq 921 o.gcState = gcRunning 922 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 923 return curCtx, true, nil 924 } 925 if seq != o.gcSeq+1 { 926 // This is not the right GC cycle. 927 return curCtx, false, nil 928 } 929 if o.gcState == gcRunning { 930 return curCtx, false, fmt.Errorf("encountered GCBegin while GC was already in progress") 931 } 932 o.gcSeq = seq 933 o.gcState = gcRunning 934 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 935 return curCtx, false, err 936 } 937 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 938 return curCtx, true, nil 939} 940 941func (o *ordering) advanceGCEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 942 seq := ev.args[0] 943 if seq != o.gcSeq+1 { 944 // This is not the right GC cycle. 945 return curCtx, false, nil 946 } 947 if o.gcState == gcNotRunning { 948 return curCtx, false, fmt.Errorf("encountered GCEnd when GC was not in progress") 949 } 950 if o.gcState == gcUndetermined { 951 return curCtx, false, fmt.Errorf("encountered GCEnd when GC was in an undetermined state") 952 } 953 o.gcSeq = seq 954 o.gcState = gcNotRunning 955 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 956 return curCtx, false, err 957 } 958 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 959 return curCtx, true, nil 960} 961 962func (o *ordering) advanceAnnotation(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 963 // Handle simple instantaneous events that require a G. 964 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 965 return curCtx, false, err 966 } 967 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 968 return curCtx, true, nil 969} 970 971func (o *ordering) advanceHeapMetric(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 972 // Handle allocation metrics, which don't require a G. 973 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil { 974 return curCtx, false, err 975 } 976 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 977 return curCtx, true, nil 978} 979 980func (o *ordering) advanceGCSweepBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 981 // Handle sweep, which is bound to a P and doesn't require a G. 982 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil { 983 return curCtx, false, err 984 } 985 if err := o.pStates[curCtx.P].beginRange(makeRangeType(ev.typ, 0)); err != nil { 986 return curCtx, false, err 987 } 988 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 989 return curCtx, true, nil 990} 991 992func (o *ordering) advanceGCSweepActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 993 pid := ProcID(ev.args[0]) 994 // N.B. In practice Ps can't block while they're sweeping, so this can only 995 // ever reference curCtx.P. However, be lenient about this like we are with 996 // GCMarkAssistActive; there's no reason the runtime couldn't change to block 997 // in the middle of a sweep. 998 pState, ok := o.pStates[pid] 999 if !ok { 1000 return curCtx, false, fmt.Errorf("encountered GCSweepActive for unknown proc %d", pid) 1001 } 1002 if err := pState.activeRange(makeRangeType(ev.typ, 0), gen == o.initialGen); err != nil { 1003 return curCtx, false, err 1004 } 1005 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 1006 return curCtx, true, nil 1007} 1008 1009func (o *ordering) advanceGCSweepEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 1010 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil { 1011 return curCtx, false, err 1012 } 1013 _, err := o.pStates[curCtx.P].endRange(ev.typ) 1014 if err != nil { 1015 return curCtx, false, err 1016 } 1017 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 1018 return curCtx, true, nil 1019} 1020 1021func (o *ordering) advanceGoRangeBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 1022 // Handle special goroutine-bound event ranges. 1023 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 1024 return curCtx, false, err 1025 } 1026 desc := stringID(0) 1027 if ev.typ == go122.EvSTWBegin { 1028 desc = stringID(ev.args[0]) 1029 } 1030 gState, ok := o.gStates[curCtx.G] 1031 if !ok { 1032 return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", ev.typ, curCtx.G) 1033 } 1034 if err := gState.beginRange(makeRangeType(ev.typ, desc)); err != nil { 1035 return curCtx, false, err 1036 } 1037 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 1038 return curCtx, true, nil 1039} 1040 1041func (o *ordering) advanceGoRangeActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 1042 gid := GoID(ev.args[0]) 1043 // N.B. Like GoStatus, this can happen at any time, because it can 1044 // reference a non-running goroutine. Don't check anything about the 1045 // current scheduler context. 1046 gState, ok := o.gStates[gid] 1047 if !ok { 1048 return curCtx, false, fmt.Errorf("uninitialized goroutine %d found during %s", gid, go122.EventString(ev.typ)) 1049 } 1050 if err := gState.activeRange(makeRangeType(ev.typ, 0), gen == o.initialGen); err != nil { 1051 return curCtx, false, err 1052 } 1053 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 1054 return curCtx, true, nil 1055} 1056 1057func (o *ordering) advanceGoRangeEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 1058 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 1059 return curCtx, false, err 1060 } 1061 gState, ok := o.gStates[curCtx.G] 1062 if !ok { 1063 return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", ev.typ, curCtx.G) 1064 } 1065 desc, err := gState.endRange(ev.typ) 1066 if err != nil { 1067 return curCtx, false, err 1068 } 1069 if ev.typ == go122.EvSTWEnd { 1070 // Smuggle the kind into the event. 1071 // Don't use ev.extra here so we have symmetry with STWBegin. 1072 ev.args[0] = uint64(desc) 1073 } 1074 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 1075 return curCtx, true, nil 1076} 1077 1078func (o *ordering) advanceAllocFree(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 1079 // Handle simple instantaneous events that may or may not have a P. 1080 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MayHave}); err != nil { 1081 return curCtx, false, err 1082 } 1083 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 1084 return curCtx, true, nil 1085} 1086 1087// Next returns the next event in the ordering. 1088func (o *ordering) Next() (Event, bool) { 1089 return o.queue.pop() 1090} 1091 1092// schedCtx represents the scheduling resources associated with an event. 1093type schedCtx struct { 1094 G GoID 1095 P ProcID 1096 M ThreadID 1097} 1098 1099// validateCtx ensures that ctx conforms to some reqs, returning an error if 1100// it doesn't. 1101func validateCtx(ctx schedCtx, reqs event.SchedReqs) error { 1102 // Check thread requirements. 1103 if reqs.Thread == event.MustHave && ctx.M == NoThread { 1104 return fmt.Errorf("expected a thread but didn't have one") 1105 } else if reqs.Thread == event.MustNotHave && ctx.M != NoThread { 1106 return fmt.Errorf("expected no thread but had one") 1107 } 1108 1109 // Check proc requirements. 1110 if reqs.Proc == event.MustHave && ctx.P == NoProc { 1111 return fmt.Errorf("expected a proc but didn't have one") 1112 } else if reqs.Proc == event.MustNotHave && ctx.P != NoProc { 1113 return fmt.Errorf("expected no proc but had one") 1114 } 1115 1116 // Check goroutine requirements. 1117 if reqs.Goroutine == event.MustHave && ctx.G == NoGoroutine { 1118 return fmt.Errorf("expected a goroutine but didn't have one") 1119 } else if reqs.Goroutine == event.MustNotHave && ctx.G != NoGoroutine { 1120 return fmt.Errorf("expected no goroutine but had one") 1121 } 1122 return nil 1123} 1124 1125// gcState is a trinary variable for the current state of the GC. 1126// 1127// The third state besides "enabled" and "disabled" is "undetermined." 1128type gcState uint8 1129 1130const ( 1131 gcUndetermined gcState = iota 1132 gcNotRunning 1133 gcRunning 1134) 1135 1136// String returns a human-readable string for the GC state. 1137func (s gcState) String() string { 1138 switch s { 1139 case gcUndetermined: 1140 return "Undetermined" 1141 case gcNotRunning: 1142 return "NotRunning" 1143 case gcRunning: 1144 return "Running" 1145 } 1146 return "Bad" 1147} 1148 1149// userRegion represents a unique user region when attached to some gState. 1150type userRegion struct { 1151 // name must be a resolved string because the string ID for the same 1152 // string may change across generations, but we care about checking 1153 // the value itself. 1154 taskID TaskID 1155 name string 1156} 1157 1158// rangeType is a way to classify special ranges of time. 1159// 1160// These typically correspond 1:1 with "Begin" events, but 1161// they may have an optional subtype that describes the range 1162// in more detail. 1163type rangeType struct { 1164 typ event.Type // "Begin" event. 1165 desc stringID // Optional subtype. 1166} 1167 1168// makeRangeType constructs a new rangeType. 1169func makeRangeType(typ event.Type, desc stringID) rangeType { 1170 if styp := go122.Specs()[typ].StartEv; styp != go122.EvNone { 1171 typ = styp 1172 } 1173 return rangeType{typ, desc} 1174} 1175 1176// gState is the state of a goroutine at a point in the trace. 1177type gState struct { 1178 id GoID 1179 status go122.GoStatus 1180 seq seqCounter 1181 1182 // regions are the active user regions for this goroutine. 1183 regions []userRegion 1184 1185 // rangeState is the state of special time ranges bound to this goroutine. 1186 rangeState 1187} 1188 1189// beginRegion starts a user region on the goroutine. 1190func (s *gState) beginRegion(r userRegion) error { 1191 s.regions = append(s.regions, r) 1192 return nil 1193} 1194 1195// endRegion ends a user region on the goroutine. 1196func (s *gState) endRegion(r userRegion) error { 1197 if len(s.regions) == 0 { 1198 // We do not know about regions that began before tracing started. 1199 return nil 1200 } 1201 if next := s.regions[len(s.regions)-1]; next != r { 1202 return fmt.Errorf("misuse of region in goroutine %v: region end %v when the inner-most active region start event is %v", s.id, r, next) 1203 } 1204 s.regions = s.regions[:len(s.regions)-1] 1205 return nil 1206} 1207 1208// pState is the state of a proc at a point in the trace. 1209type pState struct { 1210 id ProcID 1211 status go122.ProcStatus 1212 seq seqCounter 1213 1214 // rangeState is the state of special time ranges bound to this proc. 1215 rangeState 1216} 1217 1218// mState is the state of a thread at a point in the trace. 1219type mState struct { 1220 g GoID // Goroutine bound to this M. (The goroutine's state is Executing.) 1221 p ProcID // Proc bound to this M. (The proc's state is Executing.) 1222} 1223 1224// rangeState represents the state of special time ranges. 1225type rangeState struct { 1226 // inFlight contains the rangeTypes of any ranges bound to a resource. 1227 inFlight []rangeType 1228} 1229 1230// beginRange begins a special range in time on the goroutine. 1231// 1232// Returns an error if the range is already in progress. 1233func (s *rangeState) beginRange(typ rangeType) error { 1234 if s.hasRange(typ) { 1235 return fmt.Errorf("discovered event already in-flight for when starting event %v", go122.Specs()[typ.typ].Name) 1236 } 1237 s.inFlight = append(s.inFlight, typ) 1238 return nil 1239} 1240 1241// activeRange marks special range in time on the goroutine as active in the 1242// initial generation, or confirms that it is indeed active in later generations. 1243func (s *rangeState) activeRange(typ rangeType, isInitialGen bool) error { 1244 if isInitialGen { 1245 if s.hasRange(typ) { 1246 return fmt.Errorf("found named active range already in first gen: %v", typ) 1247 } 1248 s.inFlight = append(s.inFlight, typ) 1249 } else if !s.hasRange(typ) { 1250 return fmt.Errorf("resource is missing active range: %v %v", go122.Specs()[typ.typ].Name, s.inFlight) 1251 } 1252 return nil 1253} 1254 1255// hasRange returns true if a special time range on the goroutine as in progress. 1256func (s *rangeState) hasRange(typ rangeType) bool { 1257 for _, ftyp := range s.inFlight { 1258 if ftyp == typ { 1259 return true 1260 } 1261 } 1262 return false 1263} 1264 1265// endRange ends a special range in time on the goroutine. 1266// 1267// This must line up with the start event type of the range the goroutine is currently in. 1268func (s *rangeState) endRange(typ event.Type) (stringID, error) { 1269 st := go122.Specs()[typ].StartEv 1270 idx := -1 1271 for i, r := range s.inFlight { 1272 if r.typ == st { 1273 idx = i 1274 break 1275 } 1276 } 1277 if idx < 0 { 1278 return 0, fmt.Errorf("tried to end event %v, but not in-flight", go122.Specs()[st].Name) 1279 } 1280 // Swap remove. 1281 desc := s.inFlight[idx].desc 1282 s.inFlight[idx], s.inFlight[len(s.inFlight)-1] = s.inFlight[len(s.inFlight)-1], s.inFlight[idx] 1283 s.inFlight = s.inFlight[:len(s.inFlight)-1] 1284 return desc, nil 1285} 1286 1287// seqCounter represents a global sequence counter for a resource. 1288type seqCounter struct { 1289 gen uint64 // The generation for the local sequence counter seq. 1290 seq uint64 // The sequence number local to the generation. 1291} 1292 1293// makeSeq creates a new seqCounter. 1294func makeSeq(gen, seq uint64) seqCounter { 1295 return seqCounter{gen: gen, seq: seq} 1296} 1297 1298// succeeds returns true if a is the immediate successor of b. 1299func (a seqCounter) succeeds(b seqCounter) bool { 1300 return a.gen == b.gen && a.seq == b.seq+1 1301} 1302 1303// String returns a debug string representation of the seqCounter. 1304func (c seqCounter) String() string { 1305 return fmt.Sprintf("%d (gen=%d)", c.seq, c.gen) 1306} 1307 1308func dumpOrdering(order *ordering) string { 1309 var sb strings.Builder 1310 for id, state := range order.gStates { 1311 fmt.Fprintf(&sb, "G %d [status=%s seq=%s]\n", id, state.status, state.seq) 1312 } 1313 fmt.Fprintln(&sb) 1314 for id, state := range order.pStates { 1315 fmt.Fprintf(&sb, "P %d [status=%s seq=%s]\n", id, state.status, state.seq) 1316 } 1317 fmt.Fprintln(&sb) 1318 for id, state := range order.mStates { 1319 fmt.Fprintf(&sb, "M %d [g=%d p=%d]\n", id, state.g, state.p) 1320 } 1321 fmt.Fprintln(&sb) 1322 fmt.Fprintf(&sb, "GC %d %s\n", order.gcSeq, order.gcState) 1323 return sb.String() 1324} 1325 1326// taskState represents an active task. 1327type taskState struct { 1328 // name is the type of the active task. 1329 name string 1330 1331 // parentID is the parent ID of the active task. 1332 parentID TaskID 1333} 1334 1335// queue implements a growable ring buffer with a queue API. 1336type queue[T any] struct { 1337 start, end int 1338 buf []T 1339} 1340 1341// push adds a new event to the back of the queue. 1342func (q *queue[T]) push(value T) { 1343 if q.end-q.start == len(q.buf) { 1344 q.grow() 1345 } 1346 q.buf[q.end%len(q.buf)] = value 1347 q.end++ 1348} 1349 1350// grow increases the size of the queue. 1351func (q *queue[T]) grow() { 1352 if len(q.buf) == 0 { 1353 q.buf = make([]T, 2) 1354 return 1355 } 1356 1357 // Create new buf and copy data over. 1358 newBuf := make([]T, len(q.buf)*2) 1359 pivot := q.start % len(q.buf) 1360 first, last := q.buf[pivot:], q.buf[:pivot] 1361 copy(newBuf[:len(first)], first) 1362 copy(newBuf[len(first):], last) 1363 1364 // Update the queue state. 1365 q.start = 0 1366 q.end = len(q.buf) 1367 q.buf = newBuf 1368} 1369 1370// pop removes an event from the front of the queue. If the 1371// queue is empty, it returns an EventBad event. 1372func (q *queue[T]) pop() (T, bool) { 1373 if q.end-q.start == 0 { 1374 return *new(T), false 1375 } 1376 elem := &q.buf[q.start%len(q.buf)] 1377 value := *elem 1378 *elem = *new(T) // Clear the entry before returning, so we don't hold onto old tables. 1379 q.start++ 1380 return value, true 1381} 1382 1383// makeEvent creates an Event from the provided information. 1384// 1385// It's just a convenience function; it's always OK to construct 1386// an Event manually if this isn't quite the right way to express 1387// the contents of the event. 1388func makeEvent(table *evTable, ctx schedCtx, typ event.Type, time Time, args ...uint64) Event { 1389 ev := Event{ 1390 table: table, 1391 ctx: ctx, 1392 base: baseEvent{ 1393 typ: typ, 1394 time: time, 1395 }, 1396 } 1397 copy(ev.base.args[:], args) 1398 return ev 1399} 1400