1// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Patterns for ServeMux routing.
6
7package http
8
9import (
10	"errors"
11	"fmt"
12	"net/url"
13	"strings"
14	"unicode"
15)
16
17// A pattern is something that can be matched against an HTTP request.
18// It has an optional method, an optional host, and a path.
19type pattern struct {
20	str    string // original string
21	method string
22	host   string
23	// The representation of a path differs from the surface syntax, which
24	// simplifies most algorithms.
25	//
26	// Paths ending in '/' are represented with an anonymous "..." wildcard.
27	// For example, the path "a/" is represented as a literal segment "a" followed
28	// by a segment with multi==true.
29	//
30	// Paths ending in "{$}" are represented with the literal segment "/".
31	// For example, the path "a/{$}" is represented as a literal segment "a" followed
32	// by a literal segment "/".
33	segments []segment
34	loc      string // source location of registering call, for helpful messages
35}
36
37func (p *pattern) String() string { return p.str }
38
39func (p *pattern) lastSegment() segment {
40	return p.segments[len(p.segments)-1]
41}
42
43// A segment is a pattern piece that matches one or more path segments, or
44// a trailing slash.
45//
46// If wild is false, it matches a literal segment, or, if s == "/", a trailing slash.
47// Examples:
48//
49//	"a" => segment{s: "a"}
50//	"/{$}" => segment{s: "/"}
51//
52// If wild is true and multi is false, it matches a single path segment.
53// Example:
54//
55//	"{x}" => segment{s: "x", wild: true}
56//
57// If both wild and multi are true, it matches all remaining path segments.
58// Example:
59//
60//	"{rest...}" => segment{s: "rest", wild: true, multi: true}
61type segment struct {
62	s     string // literal or wildcard name or "/" for "/{$}".
63	wild  bool
64	multi bool // "..." wildcard
65}
66
67// parsePattern parses a string into a Pattern.
68// The string's syntax is
69//
70//	[METHOD] [HOST]/[PATH]
71//
72// where:
73//   - METHOD is an HTTP method
74//   - HOST is a hostname
75//   - PATH consists of slash-separated segments, where each segment is either
76//     a literal or a wildcard of the form "{name}", "{name...}", or "{$}".
77//
78// METHOD, HOST and PATH are all optional; that is, the string can be "/".
79// If METHOD is present, it must be followed by at least one space or tab.
80// Wildcard names must be valid Go identifiers.
81// The "{$}" and "{name...}" wildcard must occur at the end of PATH.
82// PATH may end with a '/'.
83// Wildcard names in a path must be distinct.
84func parsePattern(s string) (_ *pattern, err error) {
85	if len(s) == 0 {
86		return nil, errors.New("empty pattern")
87	}
88	off := 0 // offset into string
89	defer func() {
90		if err != nil {
91			err = fmt.Errorf("at offset %d: %w", off, err)
92		}
93	}()
94
95	method, rest, found := s, "", false
96	if i := strings.IndexAny(s, " \t"); i >= 0 {
97		method, rest, found = s[:i], strings.TrimLeft(s[i+1:], " \t"), true
98	}
99	if !found {
100		rest = method
101		method = ""
102	}
103	if method != "" && !validMethod(method) {
104		return nil, fmt.Errorf("invalid method %q", method)
105	}
106	p := &pattern{str: s, method: method}
107
108	if found {
109		off = len(method) + 1
110	}
111	i := strings.IndexByte(rest, '/')
112	if i < 0 {
113		return nil, errors.New("host/path missing /")
114	}
115	p.host = rest[:i]
116	rest = rest[i:]
117	if j := strings.IndexByte(p.host, '{'); j >= 0 {
118		off += j
119		return nil, errors.New("host contains '{' (missing initial '/'?)")
120	}
121	// At this point, rest is the path.
122	off += i
123
124	// An unclean path with a method that is not CONNECT can never match,
125	// because paths are cleaned before matching.
126	if method != "" && method != "CONNECT" && rest != cleanPath(rest) {
127		return nil, errors.New("non-CONNECT pattern with unclean path can never match")
128	}
129
130	seenNames := map[string]bool{} // remember wildcard names to catch dups
131	for len(rest) > 0 {
132		// Invariant: rest[0] == '/'.
133		rest = rest[1:]
134		off = len(s) - len(rest)
135		if len(rest) == 0 {
136			// Trailing slash.
137			p.segments = append(p.segments, segment{wild: true, multi: true})
138			break
139		}
140		i := strings.IndexByte(rest, '/')
141		if i < 0 {
142			i = len(rest)
143		}
144		var seg string
145		seg, rest = rest[:i], rest[i:]
146		if i := strings.IndexByte(seg, '{'); i < 0 {
147			// Literal.
148			seg = pathUnescape(seg)
149			p.segments = append(p.segments, segment{s: seg})
150		} else {
151			// Wildcard.
152			if i != 0 {
153				return nil, errors.New("bad wildcard segment (must start with '{')")
154			}
155			if seg[len(seg)-1] != '}' {
156				return nil, errors.New("bad wildcard segment (must end with '}')")
157			}
158			name := seg[1 : len(seg)-1]
159			if name == "$" {
160				if len(rest) != 0 {
161					return nil, errors.New("{$} not at end")
162				}
163				p.segments = append(p.segments, segment{s: "/"})
164				break
165			}
166			name, multi := strings.CutSuffix(name, "...")
167			if multi && len(rest) != 0 {
168				return nil, errors.New("{...} wildcard not at end")
169			}
170			if name == "" {
171				return nil, errors.New("empty wildcard")
172			}
173			if !isValidWildcardName(name) {
174				return nil, fmt.Errorf("bad wildcard name %q", name)
175			}
176			if seenNames[name] {
177				return nil, fmt.Errorf("duplicate wildcard name %q", name)
178			}
179			seenNames[name] = true
180			p.segments = append(p.segments, segment{s: name, wild: true, multi: multi})
181		}
182	}
183	return p, nil
184}
185
186func isValidWildcardName(s string) bool {
187	if s == "" {
188		return false
189	}
190	// Valid Go identifier.
191	for i, c := range s {
192		if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) {
193			return false
194		}
195	}
196	return true
197}
198
199func pathUnescape(path string) string {
200	u, err := url.PathUnescape(path)
201	if err != nil {
202		// Invalidly escaped path; use the original
203		return path
204	}
205	return u
206}
207
208// relationship is a relationship between two patterns, p1 and p2.
209type relationship string
210
211const (
212	equivalent   relationship = "equivalent"   // both match the same requests
213	moreGeneral  relationship = "moreGeneral"  // p1 matches everything p2 does & more
214	moreSpecific relationship = "moreSpecific" // p2 matches everything p1 does & more
215	disjoint     relationship = "disjoint"     // there is no request that both match
216	overlaps     relationship = "overlaps"     // there is a request that both match, but neither is more specific
217)
218
219// conflictsWith reports whether p1 conflicts with p2, that is, whether
220// there is a request that both match but where neither is higher precedence
221// than the other.
222//
223//	Precedence is defined by two rules:
224//	1. Patterns with a host win over patterns without a host.
225//	2. Patterns whose method and path is more specific win. One pattern is more
226//	   specific than another if the second matches all the (method, path) pairs
227//	   of the first and more.
228//
229// If rule 1 doesn't apply, then two patterns conflict if their relationship
230// is either equivalence (they match the same set of requests) or overlap
231// (they both match some requests, but neither is more specific than the other).
232func (p1 *pattern) conflictsWith(p2 *pattern) bool {
233	if p1.host != p2.host {
234		// Either one host is empty and the other isn't, in which case the
235		// one with the host wins by rule 1, or neither host is empty
236		// and they differ, so they won't match the same paths.
237		return false
238	}
239	rel := p1.comparePathsAndMethods(p2)
240	return rel == equivalent || rel == overlaps
241}
242
243func (p1 *pattern) comparePathsAndMethods(p2 *pattern) relationship {
244	mrel := p1.compareMethods(p2)
245	// Optimization: avoid a call to comparePaths.
246	if mrel == disjoint {
247		return disjoint
248	}
249	prel := p1.comparePaths(p2)
250	return combineRelationships(mrel, prel)
251}
252
253// compareMethods determines the relationship between the method
254// part of patterns p1 and p2.
255//
256// A method can either be empty, "GET", or something else.
257// The empty string matches any method, so it is the most general.
258// "GET" matches both GET and HEAD.
259// Anything else matches only itself.
260func (p1 *pattern) compareMethods(p2 *pattern) relationship {
261	if p1.method == p2.method {
262		return equivalent
263	}
264	if p1.method == "" {
265		// p1 matches any method, but p2 does not, so p1 is more general.
266		return moreGeneral
267	}
268	if p2.method == "" {
269		return moreSpecific
270	}
271	if p1.method == "GET" && p2.method == "HEAD" {
272		// p1 matches GET and HEAD; p2 matches only HEAD.
273		return moreGeneral
274	}
275	if p2.method == "GET" && p1.method == "HEAD" {
276		return moreSpecific
277	}
278	return disjoint
279}
280
281// comparePaths determines the relationship between the path
282// part of two patterns.
283func (p1 *pattern) comparePaths(p2 *pattern) relationship {
284	// Optimization: if a path pattern doesn't end in a multi ("...") wildcard, then it
285	// can only match paths with the same number of segments.
286	if len(p1.segments) != len(p2.segments) && !p1.lastSegment().multi && !p2.lastSegment().multi {
287		return disjoint
288	}
289
290	// Consider corresponding segments in the two path patterns.
291	var segs1, segs2 []segment
292	rel := equivalent
293	for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
294		rel = combineRelationships(rel, compareSegments(segs1[0], segs2[0]))
295		if rel == disjoint {
296			return rel
297		}
298	}
299	// We've reached the end of the corresponding segments of the patterns.
300	// If they have the same number of segments, then we've already determined
301	// their relationship.
302	if len(segs1) == 0 && len(segs2) == 0 {
303		return rel
304	}
305	// Otherwise, the only way they could fail to be disjoint is if the shorter
306	// pattern ends in a multi. In that case, that multi is more general
307	// than the remainder of the longer pattern, so combine those two relationships.
308	if len(segs1) < len(segs2) && p1.lastSegment().multi {
309		return combineRelationships(rel, moreGeneral)
310	}
311	if len(segs2) < len(segs1) && p2.lastSegment().multi {
312		return combineRelationships(rel, moreSpecific)
313	}
314	return disjoint
315}
316
317// compareSegments determines the relationship between two segments.
318func compareSegments(s1, s2 segment) relationship {
319	if s1.multi && s2.multi {
320		return equivalent
321	}
322	if s1.multi {
323		return moreGeneral
324	}
325	if s2.multi {
326		return moreSpecific
327	}
328	if s1.wild && s2.wild {
329		return equivalent
330	}
331	if s1.wild {
332		if s2.s == "/" {
333			// A single wildcard doesn't match a trailing slash.
334			return disjoint
335		}
336		return moreGeneral
337	}
338	if s2.wild {
339		if s1.s == "/" {
340			return disjoint
341		}
342		return moreSpecific
343	}
344	// Both literals.
345	if s1.s == s2.s {
346		return equivalent
347	}
348	return disjoint
349}
350
351// combineRelationships determines the overall relationship of two patterns
352// given the relationships of a partition of the patterns into two parts.
353//
354// For example, if p1 is more general than p2 in one way but equivalent
355// in the other, then it is more general overall.
356//
357// Or if p1 is more general in one way and more specific in the other, then
358// they overlap.
359func combineRelationships(r1, r2 relationship) relationship {
360	switch r1 {
361	case equivalent:
362		return r2
363	case disjoint:
364		return disjoint
365	case overlaps:
366		if r2 == disjoint {
367			return disjoint
368		}
369		return overlaps
370	case moreGeneral, moreSpecific:
371		switch r2 {
372		case equivalent:
373			return r1
374		case inverseRelationship(r1):
375			return overlaps
376		default:
377			return r2
378		}
379	default:
380		panic(fmt.Sprintf("unknown relationship %q", r1))
381	}
382}
383
384// If p1 has relationship `r` to p2, then
385// p2 has inverseRelationship(r) to p1.
386func inverseRelationship(r relationship) relationship {
387	switch r {
388	case moreSpecific:
389		return moreGeneral
390	case moreGeneral:
391		return moreSpecific
392	default:
393		return r
394	}
395}
396
397// isLitOrSingle reports whether the segment is a non-dollar literal or a single wildcard.
398func isLitOrSingle(seg segment) bool {
399	if seg.wild {
400		return !seg.multi
401	}
402	return seg.s != "/"
403}
404
405// describeConflict returns an explanation of why two patterns conflict.
406func describeConflict(p1, p2 *pattern) string {
407	mrel := p1.compareMethods(p2)
408	prel := p1.comparePaths(p2)
409	rel := combineRelationships(mrel, prel)
410	if rel == equivalent {
411		return fmt.Sprintf("%s matches the same requests as %s", p1, p2)
412	}
413	if rel != overlaps {
414		panic("describeConflict called with non-conflicting patterns")
415	}
416	if prel == overlaps {
417		return fmt.Sprintf(`%[1]s and %[2]s both match some paths, like %[3]q.
418But neither is more specific than the other.
419%[1]s matches %[4]q, but %[2]s doesn't.
420%[2]s matches %[5]q, but %[1]s doesn't.`,
421			p1, p2, commonPath(p1, p2), differencePath(p1, p2), differencePath(p2, p1))
422	}
423	if mrel == moreGeneral && prel == moreSpecific {
424		return fmt.Sprintf("%s matches more methods than %s, but has a more specific path pattern", p1, p2)
425	}
426	if mrel == moreSpecific && prel == moreGeneral {
427		return fmt.Sprintf("%s matches fewer methods than %s, but has a more general path pattern", p1, p2)
428	}
429	return fmt.Sprintf("bug: unexpected way for two patterns %s and %s to conflict: methods %s, paths %s", p1, p2, mrel, prel)
430}
431
432// writeMatchingPath writes to b a path that matches the segments.
433func writeMatchingPath(b *strings.Builder, segs []segment) {
434	for _, s := range segs {
435		writeSegment(b, s)
436	}
437}
438
439func writeSegment(b *strings.Builder, s segment) {
440	b.WriteByte('/')
441	if !s.multi && s.s != "/" {
442		b.WriteString(s.s)
443	}
444}
445
446// commonPath returns a path that both p1 and p2 match.
447// It assumes there is such a path.
448func commonPath(p1, p2 *pattern) string {
449	var b strings.Builder
450	var segs1, segs2 []segment
451	for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
452		if s1 := segs1[0]; s1.wild {
453			writeSegment(&b, segs2[0])
454		} else {
455			writeSegment(&b, s1)
456		}
457	}
458	if len(segs1) > 0 {
459		writeMatchingPath(&b, segs1)
460	} else if len(segs2) > 0 {
461		writeMatchingPath(&b, segs2)
462	}
463	return b.String()
464}
465
466// differencePath returns a path that p1 matches and p2 doesn't.
467// It assumes there is such a path.
468func differencePath(p1, p2 *pattern) string {
469	var b strings.Builder
470
471	var segs1, segs2 []segment
472	for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
473		s1 := segs1[0]
474		s2 := segs2[0]
475		if s1.multi && s2.multi {
476			// From here the patterns match the same paths, so we must have found a difference earlier.
477			b.WriteByte('/')
478			return b.String()
479
480		}
481		if s1.multi && !s2.multi {
482			// s1 ends in a "..." wildcard but s2 does not.
483			// A trailing slash will distinguish them, unless s2 ends in "{$}",
484			// in which case any segment will do; prefer the wildcard name if
485			// it has one.
486			b.WriteByte('/')
487			if s2.s == "/" {
488				if s1.s != "" {
489					b.WriteString(s1.s)
490				} else {
491					b.WriteString("x")
492				}
493			}
494			return b.String()
495		}
496		if !s1.multi && s2.multi {
497			writeSegment(&b, s1)
498		} else if s1.wild && s2.wild {
499			// Both patterns will match whatever we put here; use
500			// the first wildcard name.
501			writeSegment(&b, s1)
502		} else if s1.wild && !s2.wild {
503			// s1 is a wildcard, s2 is a literal.
504			// Any segment other than s2.s will work.
505			// Prefer the wildcard name, but if it's the same as the literal,
506			// tweak the literal.
507			if s1.s != s2.s {
508				writeSegment(&b, s1)
509			} else {
510				b.WriteByte('/')
511				b.WriteString(s2.s + "x")
512			}
513		} else if !s1.wild && s2.wild {
514			writeSegment(&b, s1)
515		} else {
516			// Both are literals. A precondition of this function is that the
517			// patterns overlap, so they must be the same literal. Use it.
518			if s1.s != s2.s {
519				panic(fmt.Sprintf("literals differ: %q and %q", s1.s, s2.s))
520			}
521			writeSegment(&b, s1)
522		}
523	}
524	if len(segs1) > 0 {
525		// p1 is longer than p2, and p2 does not end in a multi.
526		// Anything that matches the rest of p1 will do.
527		writeMatchingPath(&b, segs1)
528	} else if len(segs2) > 0 {
529		writeMatchingPath(&b, segs2)
530	}
531	return b.String()
532}
533