1// Copyright 2014 Google Inc. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Package graph collects a set of samples into a directed graph. 16package graph 17 18import ( 19 "fmt" 20 "math" 21 "path/filepath" 22 "regexp" 23 "sort" 24 "strconv" 25 "strings" 26 27 "github.com/google/pprof/profile" 28) 29 30var ( 31 // Removes package name and method arguments for Java method names. 32 // See tests for examples. 33 javaRegExp = regexp.MustCompile(`^(?:[a-z]\w*\.)*([A-Z][\w\$]*\.(?:<init>|[a-z][\w\$]*(?:\$\d+)?))(?:(?:\()|$)`) 34 // Removes package name and method arguments for Go function names. 35 // See tests for examples. 36 goRegExp = regexp.MustCompile(`^(?:[\w\-\.]+\/)+([^.]+\..+)`) 37 // Removes potential module versions in a package path. 38 goVerRegExp = regexp.MustCompile(`^(.*?)/v(?:[2-9]|[1-9][0-9]+)([./].*)$`) 39 // Strips C++ namespace prefix from a C++ function / method name. 40 // NOTE: Make sure to keep the template parameters in the name. Normally, 41 // template parameters are stripped from the C++ names but when 42 // -symbolize=demangle=templates flag is used, they will not be. 43 // See tests for examples. 44 cppRegExp = regexp.MustCompile(`^(?:[_a-zA-Z]\w*::)+(_*[A-Z]\w*::~?[_a-zA-Z]\w*(?:<.*>)?)`) 45 cppAnonymousPrefixRegExp = regexp.MustCompile(`^\(anonymous namespace\)::`) 46) 47 48// Graph summarizes a performance profile into a format that is 49// suitable for visualization. 50type Graph struct { 51 Nodes Nodes 52} 53 54// Options encodes the options for constructing a graph 55type Options struct { 56 SampleValue func(s []int64) int64 // Function to compute the value of a sample 57 SampleMeanDivisor func(s []int64) int64 // Function to compute the divisor for mean graphs, or nil 58 FormatTag func(int64, string) string // Function to format a sample tag value into a string 59 ObjNames bool // Always preserve obj filename 60 OrigFnNames bool // Preserve original (eg mangled) function names 61 62 CallTree bool // Build a tree instead of a graph 63 DropNegative bool // Drop nodes with overall negative values 64 65 KeptNodes NodeSet // If non-nil, only use nodes in this set 66} 67 68// Nodes is an ordered collection of graph nodes. 69type Nodes []*Node 70 71// Node is an entry on a profiling report. It represents a unique 72// program location. 73type Node struct { 74 // Info describes the source location associated to this node. 75 Info NodeInfo 76 77 // Function represents the function that this node belongs to. On 78 // graphs with sub-function resolution (eg line number or 79 // addresses), two nodes in a NodeMap that are part of the same 80 // function have the same value of Node.Function. If the Node 81 // represents the whole function, it points back to itself. 82 Function *Node 83 84 // Values associated to this node. Flat is exclusive to this node, 85 // Cum includes all descendents. 86 Flat, FlatDiv, Cum, CumDiv int64 87 88 // In and out Contains the nodes immediately reaching or reached by 89 // this node. 90 In, Out EdgeMap 91 92 // LabelTags provide additional information about subsets of a sample. 93 LabelTags TagMap 94 95 // NumericTags provide additional values for subsets of a sample. 96 // Numeric tags are optionally associated to a label tag. The key 97 // for NumericTags is the name of the LabelTag they are associated 98 // to, or "" for numeric tags not associated to a label tag. 99 NumericTags map[string]TagMap 100} 101 102// FlatValue returns the exclusive value for this node, computing the 103// mean if a divisor is available. 104func (n *Node) FlatValue() int64 { 105 if n.FlatDiv == 0 { 106 return n.Flat 107 } 108 return n.Flat / n.FlatDiv 109} 110 111// CumValue returns the inclusive value for this node, computing the 112// mean if a divisor is available. 113func (n *Node) CumValue() int64 { 114 if n.CumDiv == 0 { 115 return n.Cum 116 } 117 return n.Cum / n.CumDiv 118} 119 120// AddToEdge increases the weight of an edge between two nodes. If 121// there isn't such an edge one is created. 122func (n *Node) AddToEdge(to *Node, v int64, residual, inline bool) { 123 n.AddToEdgeDiv(to, 0, v, residual, inline) 124} 125 126// AddToEdgeDiv increases the weight of an edge between two nodes. If 127// there isn't such an edge one is created. 128func (n *Node) AddToEdgeDiv(to *Node, dv, v int64, residual, inline bool) { 129 if n.Out[to] != to.In[n] { 130 panic(fmt.Errorf("asymmetric edges %v %v", *n, *to)) 131 } 132 133 if e := n.Out[to]; e != nil { 134 e.WeightDiv += dv 135 e.Weight += v 136 if residual { 137 e.Residual = true 138 } 139 if !inline { 140 e.Inline = false 141 } 142 return 143 } 144 145 info := &Edge{Src: n, Dest: to, WeightDiv: dv, Weight: v, Residual: residual, Inline: inline} 146 n.Out[to] = info 147 to.In[n] = info 148} 149 150// NodeInfo contains the attributes for a node. 151type NodeInfo struct { 152 Name string 153 OrigName string 154 Address uint64 155 File string 156 StartLine, Lineno int 157 Columnno int 158 Objfile string 159} 160 161// PrintableName calls the Node's Formatter function with a single space separator. 162func (i *NodeInfo) PrintableName() string { 163 return strings.Join(i.NameComponents(), " ") 164} 165 166// NameComponents returns the components of the printable name to be used for a node. 167func (i *NodeInfo) NameComponents() []string { 168 var name []string 169 if i.Address != 0 { 170 name = append(name, fmt.Sprintf("%016x", i.Address)) 171 } 172 if fun := i.Name; fun != "" { 173 name = append(name, fun) 174 } 175 176 switch { 177 case i.Lineno != 0: 178 s := fmt.Sprintf("%s:%d", i.File, i.Lineno) 179 if i.Columnno != 0 { 180 s += fmt.Sprintf(":%d", i.Columnno) 181 } 182 // User requested line numbers, provide what we have. 183 name = append(name, s) 184 case i.File != "": 185 // User requested file name, provide it. 186 name = append(name, i.File) 187 case i.Name != "": 188 // User requested function name. It was already included. 189 case i.Objfile != "": 190 // Only binary name is available 191 name = append(name, "["+filepath.Base(i.Objfile)+"]") 192 default: 193 // Do not leave it empty if there is no information at all. 194 name = append(name, "<unknown>") 195 } 196 return name 197} 198 199// NodeMap maps from a node info struct to a node. It is used to merge 200// report entries with the same info. 201type NodeMap map[NodeInfo]*Node 202 203// NodeSet is a collection of node info structs. 204type NodeSet map[NodeInfo]bool 205 206// NodePtrSet is a collection of nodes. Trimming a graph or tree requires a set 207// of objects which uniquely identify the nodes to keep. In a graph, NodeInfo 208// works as a unique identifier; however, in a tree multiple nodes may share 209// identical NodeInfos. A *Node does uniquely identify a node so we can use that 210// instead. Though a *Node also uniquely identifies a node in a graph, 211// currently, during trimming, graphs are rebuilt from scratch using only the 212// NodeSet, so there would not be the required context of the initial graph to 213// allow for the use of *Node. 214type NodePtrSet map[*Node]bool 215 216// FindOrInsertNode takes the info for a node and either returns a matching node 217// from the node map if one exists, or adds one to the map if one does not. 218// If kept is non-nil, nodes are only added if they can be located on it. 219func (nm NodeMap) FindOrInsertNode(info NodeInfo, kept NodeSet) *Node { 220 if kept != nil { 221 if _, ok := kept[info]; !ok { 222 return nil 223 } 224 } 225 226 if n, ok := nm[info]; ok { 227 return n 228 } 229 230 n := &Node{ 231 Info: info, 232 In: make(EdgeMap), 233 Out: make(EdgeMap), 234 LabelTags: make(TagMap), 235 NumericTags: make(map[string]TagMap), 236 } 237 nm[info] = n 238 if info.Address == 0 && info.Lineno == 0 { 239 // This node represents the whole function, so point Function 240 // back to itself. 241 n.Function = n 242 return n 243 } 244 // Find a node that represents the whole function. 245 info.Address = 0 246 info.Lineno = 0 247 info.Columnno = 0 248 n.Function = nm.FindOrInsertNode(info, nil) 249 return n 250} 251 252// EdgeMap is used to represent the incoming/outgoing edges from a node. 253type EdgeMap map[*Node]*Edge 254 255// Edge contains any attributes to be represented about edges in a graph. 256type Edge struct { 257 Src, Dest *Node 258 // The summary weight of the edge 259 Weight, WeightDiv int64 260 261 // residual edges connect nodes that were connected through a 262 // separate node, which has been removed from the report. 263 Residual bool 264 // An inline edge represents a call that was inlined into the caller. 265 Inline bool 266} 267 268// WeightValue returns the weight value for this edge, normalizing if a 269// divisor is available. 270func (e *Edge) WeightValue() int64 { 271 if e.WeightDiv == 0 { 272 return e.Weight 273 } 274 return e.Weight / e.WeightDiv 275} 276 277// Tag represent sample annotations 278type Tag struct { 279 Name string 280 Unit string // Describe the value, "" for non-numeric tags 281 Value int64 282 Flat, FlatDiv int64 283 Cum, CumDiv int64 284} 285 286// FlatValue returns the exclusive value for this tag, computing the 287// mean if a divisor is available. 288func (t *Tag) FlatValue() int64 { 289 if t.FlatDiv == 0 { 290 return t.Flat 291 } 292 return t.Flat / t.FlatDiv 293} 294 295// CumValue returns the inclusive value for this tag, computing the 296// mean if a divisor is available. 297func (t *Tag) CumValue() int64 { 298 if t.CumDiv == 0 { 299 return t.Cum 300 } 301 return t.Cum / t.CumDiv 302} 303 304// TagMap is a collection of tags, classified by their name. 305type TagMap map[string]*Tag 306 307// SortTags sorts a slice of tags based on their weight. 308func SortTags(t []*Tag, flat bool) []*Tag { 309 ts := tags{t, flat} 310 sort.Sort(ts) 311 return ts.t 312} 313 314// New summarizes performance data from a profile into a graph. 315func New(prof *profile.Profile, o *Options) *Graph { 316 if o.CallTree { 317 return newTree(prof, o) 318 } 319 g, _ := newGraph(prof, o) 320 return g 321} 322 323// newGraph computes a graph from a profile. It returns the graph, and 324// a map from the profile location indices to the corresponding graph 325// nodes. 326func newGraph(prof *profile.Profile, o *Options) (*Graph, map[uint64]Nodes) { 327 nodes, locationMap := CreateNodes(prof, o) 328 seenNode := make(map[*Node]bool) 329 seenEdge := make(map[nodePair]bool) 330 for _, sample := range prof.Sample { 331 var w, dw int64 332 w = o.SampleValue(sample.Value) 333 if o.SampleMeanDivisor != nil { 334 dw = o.SampleMeanDivisor(sample.Value) 335 } 336 if dw == 0 && w == 0 { 337 continue 338 } 339 for k := range seenNode { 340 delete(seenNode, k) 341 } 342 for k := range seenEdge { 343 delete(seenEdge, k) 344 } 345 var parent *Node 346 // A residual edge goes over one or more nodes that were not kept. 347 residual := false 348 349 labels := joinLabels(sample) 350 // Group the sample frames, based on a global map. 351 for i := len(sample.Location) - 1; i >= 0; i-- { 352 l := sample.Location[i] 353 locNodes := locationMap[l.ID] 354 for ni := len(locNodes) - 1; ni >= 0; ni-- { 355 n := locNodes[ni] 356 if n == nil { 357 residual = true 358 continue 359 } 360 // Add cum weight to all nodes in stack, avoiding double counting. 361 if _, ok := seenNode[n]; !ok { 362 seenNode[n] = true 363 n.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, false) 364 } 365 // Update edge weights for all edges in stack, avoiding double counting. 366 if _, ok := seenEdge[nodePair{n, parent}]; !ok && parent != nil && n != parent { 367 seenEdge[nodePair{n, parent}] = true 368 parent.AddToEdgeDiv(n, dw, w, residual, ni != len(locNodes)-1) 369 } 370 parent = n 371 residual = false 372 } 373 } 374 if parent != nil && !residual { 375 // Add flat weight to leaf node. 376 parent.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, true) 377 } 378 } 379 380 return selectNodesForGraph(nodes, o.DropNegative), locationMap 381} 382 383func selectNodesForGraph(nodes Nodes, dropNegative bool) *Graph { 384 // Collect nodes into a graph. 385 gNodes := make(Nodes, 0, len(nodes)) 386 for _, n := range nodes { 387 if n == nil { 388 continue 389 } 390 if n.Cum == 0 && n.Flat == 0 { 391 continue 392 } 393 if dropNegative && isNegative(n) { 394 continue 395 } 396 gNodes = append(gNodes, n) 397 } 398 return &Graph{gNodes} 399} 400 401type nodePair struct { 402 src, dest *Node 403} 404 405func newTree(prof *profile.Profile, o *Options) (g *Graph) { 406 parentNodeMap := make(map[*Node]NodeMap, len(prof.Sample)) 407 for _, sample := range prof.Sample { 408 var w, dw int64 409 w = o.SampleValue(sample.Value) 410 if o.SampleMeanDivisor != nil { 411 dw = o.SampleMeanDivisor(sample.Value) 412 } 413 if dw == 0 && w == 0 { 414 continue 415 } 416 var parent *Node 417 labels := joinLabels(sample) 418 // Group the sample frames, based on a per-node map. 419 for i := len(sample.Location) - 1; i >= 0; i-- { 420 l := sample.Location[i] 421 lines := l.Line 422 if len(lines) == 0 { 423 lines = []profile.Line{{}} // Create empty line to include location info. 424 } 425 for lidx := len(lines) - 1; lidx >= 0; lidx-- { 426 nodeMap := parentNodeMap[parent] 427 if nodeMap == nil { 428 nodeMap = make(NodeMap) 429 parentNodeMap[parent] = nodeMap 430 } 431 n := nodeMap.findOrInsertLine(l, lines[lidx], o) 432 if n == nil { 433 continue 434 } 435 n.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, false) 436 if parent != nil { 437 parent.AddToEdgeDiv(n, dw, w, false, lidx != len(lines)-1) 438 } 439 parent = n 440 } 441 } 442 if parent != nil { 443 parent.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, true) 444 } 445 } 446 447 nodes := make(Nodes, 0, len(prof.Location)) 448 for _, nm := range parentNodeMap { 449 nodes = append(nodes, nm.nodes()...) 450 } 451 return selectNodesForGraph(nodes, o.DropNegative) 452} 453 454// ShortenFunctionName returns a shortened version of a function's name. 455func ShortenFunctionName(f string) string { 456 f = cppAnonymousPrefixRegExp.ReplaceAllString(f, "") 457 f = goVerRegExp.ReplaceAllString(f, `${1}${2}`) 458 for _, re := range []*regexp.Regexp{goRegExp, javaRegExp, cppRegExp} { 459 if matches := re.FindStringSubmatch(f); len(matches) >= 2 { 460 return strings.Join(matches[1:], "") 461 } 462 } 463 return f 464} 465 466// TrimTree trims a Graph in forest form, keeping only the nodes in kept. This 467// will not work correctly if even a single node has multiple parents. 468func (g *Graph) TrimTree(kept NodePtrSet) { 469 // Creates a new list of nodes 470 oldNodes := g.Nodes 471 g.Nodes = make(Nodes, 0, len(kept)) 472 473 for _, cur := range oldNodes { 474 // A node may not have multiple parents 475 if len(cur.In) > 1 { 476 panic("TrimTree only works on trees") 477 } 478 479 // If a node should be kept, add it to the new list of nodes 480 if _, ok := kept[cur]; ok { 481 g.Nodes = append(g.Nodes, cur) 482 continue 483 } 484 485 // If a node has no parents, then delete all of the in edges of its 486 // children to make them each roots of their own trees. 487 if len(cur.In) == 0 { 488 for _, outEdge := range cur.Out { 489 delete(outEdge.Dest.In, cur) 490 } 491 continue 492 } 493 494 // Get the parent. This works since at this point cur.In must contain only 495 // one element. 496 if len(cur.In) != 1 { 497 panic("Get parent assertion failed. cur.In expected to be of length 1.") 498 } 499 var parent *Node 500 for _, edge := range cur.In { 501 parent = edge.Src 502 } 503 504 parentEdgeInline := parent.Out[cur].Inline 505 506 // Remove the edge from the parent to this node 507 delete(parent.Out, cur) 508 509 // Reconfigure every edge from the current node to now begin at the parent. 510 for _, outEdge := range cur.Out { 511 child := outEdge.Dest 512 513 delete(child.In, cur) 514 child.In[parent] = outEdge 515 parent.Out[child] = outEdge 516 517 outEdge.Src = parent 518 outEdge.Residual = true 519 // If the edge from the parent to the current node and the edge from the 520 // current node to the child are both inline, then this resulting residual 521 // edge should also be inline 522 outEdge.Inline = parentEdgeInline && outEdge.Inline 523 } 524 } 525 g.RemoveRedundantEdges() 526} 527 528func joinLabels(s *profile.Sample) string { 529 if len(s.Label) == 0 { 530 return "" 531 } 532 533 var labels []string 534 for key, vals := range s.Label { 535 for _, v := range vals { 536 labels = append(labels, key+":"+v) 537 } 538 } 539 sort.Strings(labels) 540 return strings.Join(labels, `\n`) 541} 542 543// isNegative returns true if the node is considered as "negative" for the 544// purposes of drop_negative. 545func isNegative(n *Node) bool { 546 switch { 547 case n.Flat < 0: 548 return true 549 case n.Flat == 0 && n.Cum < 0: 550 return true 551 default: 552 return false 553 } 554} 555 556// CreateNodes creates graph nodes for all locations in a profile. It 557// returns set of all nodes, plus a mapping of each location to the 558// set of corresponding nodes (one per location.Line). 559func CreateNodes(prof *profile.Profile, o *Options) (Nodes, map[uint64]Nodes) { 560 locations := make(map[uint64]Nodes, len(prof.Location)) 561 nm := make(NodeMap, len(prof.Location)) 562 for _, l := range prof.Location { 563 lines := l.Line 564 if len(lines) == 0 { 565 lines = []profile.Line{{}} // Create empty line to include location info. 566 } 567 nodes := make(Nodes, len(lines)) 568 for ln := range lines { 569 nodes[ln] = nm.findOrInsertLine(l, lines[ln], o) 570 } 571 locations[l.ID] = nodes 572 } 573 return nm.nodes(), locations 574} 575 576func (nm NodeMap) nodes() Nodes { 577 nodes := make(Nodes, 0, len(nm)) 578 for _, n := range nm { 579 nodes = append(nodes, n) 580 } 581 return nodes 582} 583 584func (nm NodeMap) findOrInsertLine(l *profile.Location, li profile.Line, o *Options) *Node { 585 var objfile string 586 if m := l.Mapping; m != nil && m.File != "" { 587 objfile = m.File 588 } 589 590 if ni := nodeInfo(l, li, objfile, o); ni != nil { 591 return nm.FindOrInsertNode(*ni, o.KeptNodes) 592 } 593 return nil 594} 595 596func nodeInfo(l *profile.Location, line profile.Line, objfile string, o *Options) *NodeInfo { 597 if line.Function == nil { 598 return &NodeInfo{Address: l.Address, Objfile: objfile} 599 } 600 ni := &NodeInfo{ 601 Address: l.Address, 602 Lineno: int(line.Line), 603 Columnno: int(line.Column), 604 Name: line.Function.Name, 605 } 606 if fname := line.Function.Filename; fname != "" { 607 ni.File = filepath.Clean(fname) 608 } 609 if o.OrigFnNames { 610 ni.OrigName = line.Function.SystemName 611 } 612 if o.ObjNames || (ni.Name == "" && ni.OrigName == "") { 613 ni.Objfile = objfile 614 ni.StartLine = int(line.Function.StartLine) 615 } 616 return ni 617} 618 619type tags struct { 620 t []*Tag 621 flat bool 622} 623 624func (t tags) Len() int { return len(t.t) } 625func (t tags) Swap(i, j int) { t.t[i], t.t[j] = t.t[j], t.t[i] } 626func (t tags) Less(i, j int) bool { 627 if !t.flat { 628 if t.t[i].Cum != t.t[j].Cum { 629 return abs64(t.t[i].Cum) > abs64(t.t[j].Cum) 630 } 631 } 632 if t.t[i].Flat != t.t[j].Flat { 633 return abs64(t.t[i].Flat) > abs64(t.t[j].Flat) 634 } 635 return t.t[i].Name < t.t[j].Name 636} 637 638// Sum adds the flat and cum values of a set of nodes. 639func (ns Nodes) Sum() (flat int64, cum int64) { 640 for _, n := range ns { 641 flat += n.Flat 642 cum += n.Cum 643 } 644 return 645} 646 647func (n *Node) addSample(dw, w int64, labels string, numLabel map[string][]int64, numUnit map[string][]string, format func(int64, string) string, flat bool) { 648 // Update sample value 649 if flat { 650 n.FlatDiv += dw 651 n.Flat += w 652 } else { 653 n.CumDiv += dw 654 n.Cum += w 655 } 656 657 // Add string tags 658 if labels != "" { 659 t := n.LabelTags.findOrAddTag(labels, "", 0) 660 if flat { 661 t.FlatDiv += dw 662 t.Flat += w 663 } else { 664 t.CumDiv += dw 665 t.Cum += w 666 } 667 } 668 669 numericTags := n.NumericTags[labels] 670 if numericTags == nil { 671 numericTags = TagMap{} 672 n.NumericTags[labels] = numericTags 673 } 674 // Add numeric tags 675 if format == nil { 676 format = defaultLabelFormat 677 } 678 for k, nvals := range numLabel { 679 units := numUnit[k] 680 for i, v := range nvals { 681 var t *Tag 682 if len(units) > 0 { 683 t = numericTags.findOrAddTag(format(v, units[i]), units[i], v) 684 } else { 685 t = numericTags.findOrAddTag(format(v, k), k, v) 686 } 687 if flat { 688 t.FlatDiv += dw 689 t.Flat += w 690 } else { 691 t.CumDiv += dw 692 t.Cum += w 693 } 694 } 695 } 696} 697 698func defaultLabelFormat(v int64, key string) string { 699 return strconv.FormatInt(v, 10) 700} 701 702func (m TagMap) findOrAddTag(label, unit string, value int64) *Tag { 703 l := m[label] 704 if l == nil { 705 l = &Tag{ 706 Name: label, 707 Unit: unit, 708 Value: value, 709 } 710 m[label] = l 711 } 712 return l 713} 714 715// String returns a text representation of a graph, for debugging purposes. 716func (g *Graph) String() string { 717 var s []string 718 719 nodeIndex := make(map[*Node]int, len(g.Nodes)) 720 721 for i, n := range g.Nodes { 722 nodeIndex[n] = i + 1 723 } 724 725 for i, n := range g.Nodes { 726 name := n.Info.PrintableName() 727 var in, out []int 728 729 for _, from := range n.In { 730 in = append(in, nodeIndex[from.Src]) 731 } 732 for _, to := range n.Out { 733 out = append(out, nodeIndex[to.Dest]) 734 } 735 s = append(s, fmt.Sprintf("%d: %s[flat=%d cum=%d] %x -> %v ", i+1, name, n.Flat, n.Cum, in, out)) 736 } 737 return strings.Join(s, "\n") 738} 739 740// DiscardLowFrequencyNodes returns a set of the nodes at or over a 741// specific cum value cutoff. 742func (g *Graph) DiscardLowFrequencyNodes(nodeCutoff int64) NodeSet { 743 return makeNodeSet(g.Nodes, nodeCutoff) 744} 745 746// DiscardLowFrequencyNodePtrs returns a NodePtrSet of nodes at or over a 747// specific cum value cutoff. 748func (g *Graph) DiscardLowFrequencyNodePtrs(nodeCutoff int64) NodePtrSet { 749 cutNodes := getNodesAboveCumCutoff(g.Nodes, nodeCutoff) 750 kept := make(NodePtrSet, len(cutNodes)) 751 for _, n := range cutNodes { 752 kept[n] = true 753 } 754 return kept 755} 756 757func makeNodeSet(nodes Nodes, nodeCutoff int64) NodeSet { 758 cutNodes := getNodesAboveCumCutoff(nodes, nodeCutoff) 759 kept := make(NodeSet, len(cutNodes)) 760 for _, n := range cutNodes { 761 kept[n.Info] = true 762 } 763 return kept 764} 765 766// getNodesAboveCumCutoff returns all the nodes which have a Cum value greater 767// than or equal to cutoff. 768func getNodesAboveCumCutoff(nodes Nodes, nodeCutoff int64) Nodes { 769 cutoffNodes := make(Nodes, 0, len(nodes)) 770 for _, n := range nodes { 771 if abs64(n.Cum) < nodeCutoff { 772 continue 773 } 774 cutoffNodes = append(cutoffNodes, n) 775 } 776 return cutoffNodes 777} 778 779// TrimLowFrequencyTags removes tags that have less than 780// the specified weight. 781func (g *Graph) TrimLowFrequencyTags(tagCutoff int64) { 782 // Remove nodes with value <= total*nodeFraction 783 for _, n := range g.Nodes { 784 n.LabelTags = trimLowFreqTags(n.LabelTags, tagCutoff) 785 for s, nt := range n.NumericTags { 786 n.NumericTags[s] = trimLowFreqTags(nt, tagCutoff) 787 } 788 } 789} 790 791func trimLowFreqTags(tags TagMap, minValue int64) TagMap { 792 kept := TagMap{} 793 for s, t := range tags { 794 if abs64(t.Flat) >= minValue || abs64(t.Cum) >= minValue { 795 kept[s] = t 796 } 797 } 798 return kept 799} 800 801// TrimLowFrequencyEdges removes edges that have less than 802// the specified weight. Returns the number of edges removed 803func (g *Graph) TrimLowFrequencyEdges(edgeCutoff int64) int { 804 var droppedEdges int 805 for _, n := range g.Nodes { 806 for src, e := range n.In { 807 if abs64(e.Weight) < edgeCutoff { 808 delete(n.In, src) 809 delete(src.Out, n) 810 droppedEdges++ 811 } 812 } 813 } 814 return droppedEdges 815} 816 817// SortNodes sorts the nodes in a graph based on a specific heuristic. 818func (g *Graph) SortNodes(cum bool, visualMode bool) { 819 // Sort nodes based on requested mode 820 switch { 821 case visualMode: 822 // Specialized sort to produce a more visually-interesting graph 823 g.Nodes.Sort(EntropyOrder) 824 case cum: 825 g.Nodes.Sort(CumNameOrder) 826 default: 827 g.Nodes.Sort(FlatNameOrder) 828 } 829} 830 831// SelectTopNodePtrs returns a set of the top maxNodes *Node in a graph. 832func (g *Graph) SelectTopNodePtrs(maxNodes int, visualMode bool) NodePtrSet { 833 set := make(NodePtrSet) 834 for _, node := range g.selectTopNodes(maxNodes, visualMode) { 835 set[node] = true 836 } 837 return set 838} 839 840// SelectTopNodes returns a set of the top maxNodes nodes in a graph. 841func (g *Graph) SelectTopNodes(maxNodes int, visualMode bool) NodeSet { 842 return makeNodeSet(g.selectTopNodes(maxNodes, visualMode), 0) 843} 844 845// selectTopNodes returns a slice of the top maxNodes nodes in a graph. 846func (g *Graph) selectTopNodes(maxNodes int, visualMode bool) Nodes { 847 if maxNodes > 0 { 848 if visualMode { 849 var count int 850 // If generating a visual graph, count tags as nodes. Update 851 // maxNodes to account for them. 852 for i, n := range g.Nodes { 853 tags := countTags(n) 854 if tags > maxNodelets { 855 tags = maxNodelets 856 } 857 if count += tags + 1; count >= maxNodes { 858 maxNodes = i + 1 859 break 860 } 861 } 862 } 863 } 864 if maxNodes > len(g.Nodes) { 865 maxNodes = len(g.Nodes) 866 } 867 return g.Nodes[:maxNodes] 868} 869 870// countTags counts the tags with flat count. This underestimates the 871// number of tags being displayed, but in practice is close enough. 872func countTags(n *Node) int { 873 count := 0 874 for _, e := range n.LabelTags { 875 if e.Flat != 0 { 876 count++ 877 } 878 } 879 for _, t := range n.NumericTags { 880 for _, e := range t { 881 if e.Flat != 0 { 882 count++ 883 } 884 } 885 } 886 return count 887} 888 889// RemoveRedundantEdges removes residual edges if the destination can 890// be reached through another path. This is done to simplify the graph 891// while preserving connectivity. 892func (g *Graph) RemoveRedundantEdges() { 893 // Walk the nodes and outgoing edges in reverse order to prefer 894 // removing edges with the lowest weight. 895 for i := len(g.Nodes); i > 0; i-- { 896 n := g.Nodes[i-1] 897 in := n.In.Sort() 898 for j := len(in); j > 0; j-- { 899 e := in[j-1] 900 if !e.Residual { 901 // Do not remove edges heavier than a non-residual edge, to 902 // avoid potential confusion. 903 break 904 } 905 if isRedundantEdge(e) { 906 delete(e.Src.Out, e.Dest) 907 delete(e.Dest.In, e.Src) 908 } 909 } 910 } 911} 912 913// isRedundantEdge determines if there is a path that allows e.Src 914// to reach e.Dest after removing e. 915func isRedundantEdge(e *Edge) bool { 916 src, n := e.Src, e.Dest 917 seen := map[*Node]bool{n: true} 918 queue := Nodes{n} 919 for len(queue) > 0 { 920 n := queue[0] 921 queue = queue[1:] 922 for _, ie := range n.In { 923 if e == ie || seen[ie.Src] { 924 continue 925 } 926 if ie.Src == src { 927 return true 928 } 929 seen[ie.Src] = true 930 queue = append(queue, ie.Src) 931 } 932 } 933 return false 934} 935 936// nodeSorter is a mechanism used to allow a report to be sorted 937// in different ways. 938type nodeSorter struct { 939 rs Nodes 940 less func(l, r *Node) bool 941} 942 943func (s nodeSorter) Len() int { return len(s.rs) } 944func (s nodeSorter) Swap(i, j int) { s.rs[i], s.rs[j] = s.rs[j], s.rs[i] } 945func (s nodeSorter) Less(i, j int) bool { return s.less(s.rs[i], s.rs[j]) } 946 947// Sort reorders a slice of nodes based on the specified ordering 948// criteria. The result is sorted in decreasing order for (absolute) 949// numeric quantities, alphabetically for text, and increasing for 950// addresses. 951func (ns Nodes) Sort(o NodeOrder) error { 952 var s nodeSorter 953 954 switch o { 955 case FlatNameOrder: 956 s = nodeSorter{ns, 957 func(l, r *Node) bool { 958 if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv { 959 return iv > jv 960 } 961 if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv { 962 return iv < jv 963 } 964 if iv, jv := abs64(l.Cum), abs64(r.Cum); iv != jv { 965 return iv > jv 966 } 967 return compareNodes(l, r) 968 }, 969 } 970 case FlatCumNameOrder: 971 s = nodeSorter{ns, 972 func(l, r *Node) bool { 973 if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv { 974 return iv > jv 975 } 976 if iv, jv := abs64(l.Cum), abs64(r.Cum); iv != jv { 977 return iv > jv 978 } 979 if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv { 980 return iv < jv 981 } 982 return compareNodes(l, r) 983 }, 984 } 985 case NameOrder: 986 s = nodeSorter{ns, 987 func(l, r *Node) bool { 988 if iv, jv := l.Info.Name, r.Info.Name; iv != jv { 989 return iv < jv 990 } 991 return compareNodes(l, r) 992 }, 993 } 994 case FileOrder: 995 s = nodeSorter{ns, 996 func(l, r *Node) bool { 997 if iv, jv := l.Info.File, r.Info.File; iv != jv { 998 return iv < jv 999 } 1000 if iv, jv := l.Info.StartLine, r.Info.StartLine; iv != jv { 1001 return iv < jv 1002 } 1003 return compareNodes(l, r) 1004 }, 1005 } 1006 case AddressOrder: 1007 s = nodeSorter{ns, 1008 func(l, r *Node) bool { 1009 if iv, jv := l.Info.Address, r.Info.Address; iv != jv { 1010 return iv < jv 1011 } 1012 return compareNodes(l, r) 1013 }, 1014 } 1015 case CumNameOrder, EntropyOrder: 1016 // Hold scoring for score-based ordering 1017 var score map[*Node]int64 1018 scoreOrder := func(l, r *Node) bool { 1019 if iv, jv := abs64(score[l]), abs64(score[r]); iv != jv { 1020 return iv > jv 1021 } 1022 if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv { 1023 return iv < jv 1024 } 1025 if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv { 1026 return iv > jv 1027 } 1028 return compareNodes(l, r) 1029 } 1030 1031 switch o { 1032 case CumNameOrder: 1033 score = make(map[*Node]int64, len(ns)) 1034 for _, n := range ns { 1035 score[n] = n.Cum 1036 } 1037 s = nodeSorter{ns, scoreOrder} 1038 case EntropyOrder: 1039 score = make(map[*Node]int64, len(ns)) 1040 for _, n := range ns { 1041 score[n] = entropyScore(n) 1042 } 1043 s = nodeSorter{ns, scoreOrder} 1044 } 1045 default: 1046 return fmt.Errorf("report: unrecognized sort ordering: %d", o) 1047 } 1048 sort.Sort(s) 1049 return nil 1050} 1051 1052// compareNodes compares two nodes to provide a deterministic ordering 1053// between them. Two nodes cannot have the same Node.Info value. 1054func compareNodes(l, r *Node) bool { 1055 return fmt.Sprint(l.Info) < fmt.Sprint(r.Info) 1056} 1057 1058// entropyScore computes a score for a node representing how important 1059// it is to include this node on a graph visualization. It is used to 1060// sort the nodes and select which ones to display if we have more 1061// nodes than desired in the graph. This number is computed by looking 1062// at the flat and cum weights of the node and the incoming/outgoing 1063// edges. The fundamental idea is to penalize nodes that have a simple 1064// fallthrough from their incoming to the outgoing edge. 1065func entropyScore(n *Node) int64 { 1066 score := float64(0) 1067 1068 if len(n.In) == 0 { 1069 score++ // Favor entry nodes 1070 } else { 1071 score += edgeEntropyScore(n, n.In, 0) 1072 } 1073 1074 if len(n.Out) == 0 { 1075 score++ // Favor leaf nodes 1076 } else { 1077 score += edgeEntropyScore(n, n.Out, n.Flat) 1078 } 1079 1080 return int64(score*float64(n.Cum)) + n.Flat 1081} 1082 1083// edgeEntropyScore computes the entropy value for a set of edges 1084// coming in or out of a node. Entropy (as defined in information 1085// theory) refers to the amount of information encoded by the set of 1086// edges. A set of edges that have a more interesting distribution of 1087// samples gets a higher score. 1088func edgeEntropyScore(n *Node, edges EdgeMap, self int64) float64 { 1089 score := float64(0) 1090 total := self 1091 for _, e := range edges { 1092 if e.Weight > 0 { 1093 total += abs64(e.Weight) 1094 } 1095 } 1096 if total != 0 { 1097 for _, e := range edges { 1098 frac := float64(abs64(e.Weight)) / float64(total) 1099 score += -frac * math.Log2(frac) 1100 } 1101 if self > 0 { 1102 frac := float64(abs64(self)) / float64(total) 1103 score += -frac * math.Log2(frac) 1104 } 1105 } 1106 return score 1107} 1108 1109// NodeOrder sets the ordering for a Sort operation 1110type NodeOrder int 1111 1112// Sorting options for node sort. 1113const ( 1114 FlatNameOrder NodeOrder = iota 1115 FlatCumNameOrder 1116 CumNameOrder 1117 NameOrder 1118 FileOrder 1119 AddressOrder 1120 EntropyOrder 1121) 1122 1123// Sort returns a slice of the edges in the map, in a consistent 1124// order. The sort order is first based on the edge weight 1125// (higher-to-lower) and then by the node names to avoid flakiness. 1126func (e EdgeMap) Sort() []*Edge { 1127 el := make(edgeList, 0, len(e)) 1128 for _, w := range e { 1129 el = append(el, w) 1130 } 1131 1132 sort.Sort(el) 1133 return el 1134} 1135 1136// Sum returns the total weight for a set of nodes. 1137func (e EdgeMap) Sum() int64 { 1138 var ret int64 1139 for _, edge := range e { 1140 ret += edge.Weight 1141 } 1142 return ret 1143} 1144 1145type edgeList []*Edge 1146 1147func (el edgeList) Len() int { 1148 return len(el) 1149} 1150 1151func (el edgeList) Less(i, j int) bool { 1152 if el[i].Weight != el[j].Weight { 1153 return abs64(el[i].Weight) > abs64(el[j].Weight) 1154 } 1155 1156 from1 := el[i].Src.Info.PrintableName() 1157 from2 := el[j].Src.Info.PrintableName() 1158 if from1 != from2 { 1159 return from1 < from2 1160 } 1161 1162 to1 := el[i].Dest.Info.PrintableName() 1163 to2 := el[j].Dest.Info.PrintableName() 1164 1165 return to1 < to2 1166} 1167 1168func (el edgeList) Swap(i, j int) { 1169 el[i], el[j] = el[j], el[i] 1170} 1171 1172func abs64(i int64) int64 { 1173 if i < 0 { 1174 return -i 1175 } 1176 return i 1177} 1178