1// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This file implements a decision tree for fast matching of requests to
6// patterns.
7//
8// The root of the tree branches on the host of the request.
9// The next level branches on the method.
10// The remaining levels branch on consecutive segments of the path.
11//
12// The "more specific wins" precedence rule can result in backtracking.
13// For example, given the patterns
14//     /a/b/z
15//     /a/{x}/c
16// we will first try to match the path "/a/b/c" with /a/b/z, and
17// when that fails we will try against /a/{x}/c.
18
19package http
20
21import (
22	"strings"
23)
24
25// A routingNode is a node in the decision tree.
26// The same struct is used for leaf and interior nodes.
27type routingNode struct {
28	// A leaf node holds a single pattern and the Handler it was registered
29	// with.
30	pattern *pattern
31	handler Handler
32
33	// An interior node maps parts of the incoming request to child nodes.
34	// special children keys:
35	//     "/"	trailing slash (resulting from {$})
36	//	   ""   single wildcard
37	children   mapping[string, *routingNode]
38	multiChild *routingNode // child with multi wildcard
39	emptyChild *routingNode // optimization: child with key ""
40}
41
42// addPattern adds a pattern and its associated Handler to the tree
43// at root.
44func (root *routingNode) addPattern(p *pattern, h Handler) {
45	// First level of tree is host.
46	n := root.addChild(p.host)
47	// Second level of tree is method.
48	n = n.addChild(p.method)
49	// Remaining levels are path.
50	n.addSegments(p.segments, p, h)
51}
52
53// addSegments adds the given segments to the tree rooted at n.
54// If there are no segments, then n is a leaf node that holds
55// the given pattern and handler.
56func (n *routingNode) addSegments(segs []segment, p *pattern, h Handler) {
57	if len(segs) == 0 {
58		n.set(p, h)
59		return
60	}
61	seg := segs[0]
62	if seg.multi {
63		if len(segs) != 1 {
64			panic("multi wildcard not last")
65		}
66		c := &routingNode{}
67		n.multiChild = c
68		c.set(p, h)
69	} else if seg.wild {
70		n.addChild("").addSegments(segs[1:], p, h)
71	} else {
72		n.addChild(seg.s).addSegments(segs[1:], p, h)
73	}
74}
75
76// set sets the pattern and handler for n, which
77// must be a leaf node.
78func (n *routingNode) set(p *pattern, h Handler) {
79	if n.pattern != nil || n.handler != nil {
80		panic("non-nil leaf fields")
81	}
82	n.pattern = p
83	n.handler = h
84}
85
86// addChild adds a child node with the given key to n
87// if one does not exist, and returns the child.
88func (n *routingNode) addChild(key string) *routingNode {
89	if key == "" {
90		if n.emptyChild == nil {
91			n.emptyChild = &routingNode{}
92		}
93		return n.emptyChild
94	}
95	if c := n.findChild(key); c != nil {
96		return c
97	}
98	c := &routingNode{}
99	n.children.add(key, c)
100	return c
101}
102
103// findChild returns the child of n with the given key, or nil
104// if there is no child with that key.
105func (n *routingNode) findChild(key string) *routingNode {
106	if key == "" {
107		return n.emptyChild
108	}
109	r, _ := n.children.find(key)
110	return r
111}
112
113// match returns the leaf node under root that matches the arguments, and a list
114// of values for pattern wildcards in the order that the wildcards appear.
115// For example, if the request path is "/a/b/c" and the pattern is "/{x}/b/{y}",
116// then the second return value will be []string{"a", "c"}.
117func (root *routingNode) match(host, method, path string) (*routingNode, []string) {
118	if host != "" {
119		// There is a host. If there is a pattern that specifies that host and it
120		// matches, we are done. If the pattern doesn't match, fall through to
121		// try patterns with no host.
122		if l, m := root.findChild(host).matchMethodAndPath(method, path); l != nil {
123			return l, m
124		}
125	}
126	return root.emptyChild.matchMethodAndPath(method, path)
127}
128
129// matchMethodAndPath matches the method and path.
130// Its return values are the same as [routingNode.match].
131// The receiver should be a child of the root.
132func (n *routingNode) matchMethodAndPath(method, path string) (*routingNode, []string) {
133	if n == nil {
134		return nil, nil
135	}
136	if l, m := n.findChild(method).matchPath(path, nil); l != nil {
137		// Exact match of method name.
138		return l, m
139	}
140	if method == "HEAD" {
141		// GET matches HEAD too.
142		if l, m := n.findChild("GET").matchPath(path, nil); l != nil {
143			return l, m
144		}
145	}
146	// No exact match; try patterns with no method.
147	return n.emptyChild.matchPath(path, nil)
148}
149
150// matchPath matches a path.
151// Its return values are the same as [routingNode.match].
152// matchPath calls itself recursively. The matches argument holds the wildcard matches
153// found so far.
154func (n *routingNode) matchPath(path string, matches []string) (*routingNode, []string) {
155	if n == nil {
156		return nil, nil
157	}
158	// If path is empty, then we are done.
159	// If n is a leaf node, we found a match; return it.
160	// If n is an interior node (which means it has a nil pattern),
161	// then we failed to match.
162	if path == "" {
163		if n.pattern == nil {
164			return nil, nil
165		}
166		return n, matches
167	}
168	// Get the first segment of path.
169	seg, rest := firstSegment(path)
170	// First try matching against patterns that have a literal for this position.
171	// We know by construction that such patterns are more specific than those
172	// with a wildcard at this position (they are either more specific, equivalent,
173	// or overlap, and we ruled out the first two when the patterns were registered).
174	if n, m := n.findChild(seg).matchPath(rest, matches); n != nil {
175		return n, m
176	}
177	// If matching a literal fails, try again with patterns that have a single
178	// wildcard (represented by an empty string in the child mapping).
179	// Again, by construction, patterns with a single wildcard must be more specific than
180	// those with a multi wildcard.
181	// We skip this step if the segment is a trailing slash, because single wildcards
182	// don't match trailing slashes.
183	if seg != "/" {
184		if n, m := n.emptyChild.matchPath(rest, append(matches, seg)); n != nil {
185			return n, m
186		}
187	}
188	// Lastly, match the pattern (there can be at most one) that has a multi
189	// wildcard in this position to the rest of the path.
190	if c := n.multiChild; c != nil {
191		// Don't record a match for a nameless wildcard (which arises from a
192		// trailing slash in the pattern).
193		if c.pattern.lastSegment().s != "" {
194			matches = append(matches, pathUnescape(path[1:])) // remove initial slash
195		}
196		return c, matches
197	}
198	return nil, nil
199}
200
201// firstSegment splits path into its first segment, and the rest.
202// The path must begin with "/".
203// If path consists of only a slash, firstSegment returns ("/", "").
204// The segment is returned unescaped, if possible.
205func firstSegment(path string) (seg, rest string) {
206	if path == "/" {
207		return "/", ""
208	}
209	path = path[1:] // drop initial slash
210	i := strings.IndexByte(path, '/')
211	if i < 0 {
212		i = len(path)
213	}
214	return pathUnescape(path[:i]), path[i:]
215}
216
217// matchingMethods adds to methodSet all the methods that would result in a
218// match if passed to routingNode.match with the given host and path.
219func (root *routingNode) matchingMethods(host, path string, methodSet map[string]bool) {
220	if host != "" {
221		root.findChild(host).matchingMethodsPath(path, methodSet)
222	}
223	root.emptyChild.matchingMethodsPath(path, methodSet)
224	if methodSet["GET"] {
225		methodSet["HEAD"] = true
226	}
227}
228
229func (n *routingNode) matchingMethodsPath(path string, set map[string]bool) {
230	if n == nil {
231		return
232	}
233	n.children.eachPair(func(method string, c *routingNode) bool {
234		if p, _ := c.matchPath(path, nil); p != nil {
235			set[method] = true
236		}
237		return true
238	})
239	// Don't look at the empty child. If there were an empty
240	// child, it would match on any method, but we only
241	// call this when we fail to match on a method.
242}
243