1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package token 6 7import ( 8 "cmp" 9 "fmt" 10 "slices" 11 "strconv" 12 "sync" 13 "sync/atomic" 14) 15 16// If debug is set, invalid offset and position values cause a panic 17// (go.dev/issue/57490). 18const debug = false 19 20// ----------------------------------------------------------------------------- 21// Positions 22 23// Position describes an arbitrary source position 24// including the file, line, and column location. 25// A Position is valid if the line number is > 0. 26type Position struct { 27 Filename string // filename, if any 28 Offset int // offset, starting at 0 29 Line int // line number, starting at 1 30 Column int // column number, starting at 1 (byte count) 31} 32 33// IsValid reports whether the position is valid. 34func (pos *Position) IsValid() bool { return pos.Line > 0 } 35 36// String returns a string in one of several forms: 37// 38// file:line:column valid position with file name 39// file:line valid position with file name but no column (column == 0) 40// line:column valid position without file name 41// line valid position without file name and no column (column == 0) 42// file invalid position with file name 43// - invalid position without file name 44func (pos Position) String() string { 45 s := pos.Filename 46 if pos.IsValid() { 47 if s != "" { 48 s += ":" 49 } 50 s += strconv.Itoa(pos.Line) 51 if pos.Column != 0 { 52 s += fmt.Sprintf(":%d", pos.Column) 53 } 54 } 55 if s == "" { 56 s = "-" 57 } 58 return s 59} 60 61// Pos is a compact encoding of a source position within a file set. 62// It can be converted into a [Position] for a more convenient, but much 63// larger, representation. 64// 65// The Pos value for a given file is a number in the range [base, base+size], 66// where base and size are specified when a file is added to the file set. 67// The difference between a Pos value and the corresponding file base 68// corresponds to the byte offset of that position (represented by the Pos value) 69// from the beginning of the file. Thus, the file base offset is the Pos value 70// representing the first byte in the file. 71// 72// To create the Pos value for a specific source offset (measured in bytes), 73// first add the respective file to the current file set using [FileSet.AddFile] 74// and then call [File.Pos](offset) for that file. Given a Pos value p 75// for a specific file set fset, the corresponding [Position] value is 76// obtained by calling fset.Position(p). 77// 78// Pos values can be compared directly with the usual comparison operators: 79// If two Pos values p and q are in the same file, comparing p and q is 80// equivalent to comparing the respective source file offsets. If p and q 81// are in different files, p < q is true if the file implied by p was added 82// to the respective file set before the file implied by q. 83type Pos int 84 85// The zero value for [Pos] is NoPos; there is no file and line information 86// associated with it, and NoPos.IsValid() is false. NoPos is always 87// smaller than any other [Pos] value. The corresponding [Position] value 88// for NoPos is the zero value for [Position]. 89const NoPos Pos = 0 90 91// IsValid reports whether the position is valid. 92func (p Pos) IsValid() bool { 93 return p != NoPos 94} 95 96// ----------------------------------------------------------------------------- 97// File 98 99// A File is a handle for a file belonging to a [FileSet]. 100// A File has a name, size, and line offset table. 101type File struct { 102 name string // file name as provided to AddFile 103 base int // Pos value range for this file is [base...base+size] 104 size int // file size as provided to AddFile 105 106 // lines and infos are protected by mutex 107 mutex sync.Mutex 108 lines []int // lines contains the offset of the first character for each line (the first entry is always 0) 109 infos []lineInfo 110} 111 112// Name returns the file name of file f as registered with AddFile. 113func (f *File) Name() string { 114 return f.name 115} 116 117// Base returns the base offset of file f as registered with AddFile. 118func (f *File) Base() int { 119 return f.base 120} 121 122// Size returns the size of file f as registered with AddFile. 123func (f *File) Size() int { 124 return f.size 125} 126 127// LineCount returns the number of lines in file f. 128func (f *File) LineCount() int { 129 f.mutex.Lock() 130 n := len(f.lines) 131 f.mutex.Unlock() 132 return n 133} 134 135// AddLine adds the line offset for a new line. 136// The line offset must be larger than the offset for the previous line 137// and smaller than the file size; otherwise the line offset is ignored. 138func (f *File) AddLine(offset int) { 139 f.mutex.Lock() 140 if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size { 141 f.lines = append(f.lines, offset) 142 } 143 f.mutex.Unlock() 144} 145 146// MergeLine merges a line with the following line. It is akin to replacing 147// the newline character at the end of the line with a space (to not change the 148// remaining offsets). To obtain the line number, consult e.g. [Position.Line]. 149// MergeLine will panic if given an invalid line number. 150func (f *File) MergeLine(line int) { 151 if line < 1 { 152 panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line)) 153 } 154 f.mutex.Lock() 155 defer f.mutex.Unlock() 156 if line >= len(f.lines) { 157 panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines))) 158 } 159 // To merge the line numbered <line> with the line numbered <line+1>, 160 // we need to remove the entry in lines corresponding to the line 161 // numbered <line+1>. The entry in lines corresponding to the line 162 // numbered <line+1> is located at index <line>, since indices in lines 163 // are 0-based and line numbers are 1-based. 164 copy(f.lines[line:], f.lines[line+1:]) 165 f.lines = f.lines[:len(f.lines)-1] 166} 167 168// Lines returns the effective line offset table of the form described by [File.SetLines]. 169// Callers must not mutate the result. 170func (f *File) Lines() []int { 171 f.mutex.Lock() 172 lines := f.lines 173 f.mutex.Unlock() 174 return lines 175} 176 177// SetLines sets the line offsets for a file and reports whether it succeeded. 178// The line offsets are the offsets of the first character of each line; 179// for instance for the content "ab\nc\n" the line offsets are {0, 3}. 180// An empty file has an empty line offset table. 181// Each line offset must be larger than the offset for the previous line 182// and smaller than the file size; otherwise SetLines fails and returns 183// false. 184// Callers must not mutate the provided slice after SetLines returns. 185func (f *File) SetLines(lines []int) bool { 186 // verify validity of lines table 187 size := f.size 188 for i, offset := range lines { 189 if i > 0 && offset <= lines[i-1] || size <= offset { 190 return false 191 } 192 } 193 194 // set lines table 195 f.mutex.Lock() 196 f.lines = lines 197 f.mutex.Unlock() 198 return true 199} 200 201// SetLinesForContent sets the line offsets for the given file content. 202// It ignores position-altering //line comments. 203func (f *File) SetLinesForContent(content []byte) { 204 var lines []int 205 line := 0 206 for offset, b := range content { 207 if line >= 0 { 208 lines = append(lines, line) 209 } 210 line = -1 211 if b == '\n' { 212 line = offset + 1 213 } 214 } 215 216 // set lines table 217 f.mutex.Lock() 218 f.lines = lines 219 f.mutex.Unlock() 220} 221 222// LineStart returns the [Pos] value of the start of the specified line. 223// It ignores any alternative positions set using [File.AddLineColumnInfo]. 224// LineStart panics if the 1-based line number is invalid. 225func (f *File) LineStart(line int) Pos { 226 if line < 1 { 227 panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line)) 228 } 229 f.mutex.Lock() 230 defer f.mutex.Unlock() 231 if line > len(f.lines) { 232 panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines))) 233 } 234 return Pos(f.base + f.lines[line-1]) 235} 236 237// A lineInfo object describes alternative file, line, and column 238// number information (such as provided via a //line directive) 239// for a given file offset. 240type lineInfo struct { 241 // fields are exported to make them accessible to gob 242 Offset int 243 Filename string 244 Line, Column int 245} 246 247// AddLineInfo is like [File.AddLineColumnInfo] with a column = 1 argument. 248// It is here for backward-compatibility for code prior to Go 1.11. 249func (f *File) AddLineInfo(offset int, filename string, line int) { 250 f.AddLineColumnInfo(offset, filename, line, 1) 251} 252 253// AddLineColumnInfo adds alternative file, line, and column number 254// information for a given file offset. The offset must be larger 255// than the offset for the previously added alternative line info 256// and smaller than the file size; otherwise the information is 257// ignored. 258// 259// AddLineColumnInfo is typically used to register alternative position 260// information for line directives such as //line filename:line:column. 261func (f *File) AddLineColumnInfo(offset int, filename string, line, column int) { 262 f.mutex.Lock() 263 if i := len(f.infos); (i == 0 || f.infos[i-1].Offset < offset) && offset < f.size { 264 f.infos = append(f.infos, lineInfo{offset, filename, line, column}) 265 } 266 f.mutex.Unlock() 267} 268 269// fixOffset fixes an out-of-bounds offset such that 0 <= offset <= f.size. 270func (f *File) fixOffset(offset int) int { 271 switch { 272 case offset < 0: 273 if !debug { 274 return 0 275 } 276 case offset > f.size: 277 if !debug { 278 return f.size 279 } 280 default: 281 return offset 282 } 283 284 // only generate this code if needed 285 if debug { 286 panic(fmt.Sprintf("offset %d out of bounds [%d, %d] (position %d out of bounds [%d, %d])", 287 0 /* for symmetry */, offset, f.size, 288 f.base+offset, f.base, f.base+f.size)) 289 } 290 return 0 291} 292 293// Pos returns the Pos value for the given file offset. 294// 295// If offset is negative, the result is the file's start 296// position; if the offset is too large, the result is 297// the file's end position (see also go.dev/issue/57490). 298// 299// The following invariant, though not true for Pos values 300// in general, holds for the result p: 301// f.Pos(f.Offset(p)) == p. 302func (f *File) Pos(offset int) Pos { 303 return Pos(f.base + f.fixOffset(offset)) 304} 305 306// Offset returns the offset for the given file position p. 307// 308// If p is before the file's start position (or if p is NoPos), 309// the result is 0; if p is past the file's end position, the 310// the result is the file size (see also go.dev/issue/57490). 311// 312// The following invariant, though not true for offset values 313// in general, holds for the result offset: 314// f.Offset(f.Pos(offset)) == offset 315func (f *File) Offset(p Pos) int { 316 return f.fixOffset(int(p) - f.base) 317} 318 319// Line returns the line number for the given file position p; 320// p must be a [Pos] value in that file or [NoPos]. 321func (f *File) Line(p Pos) int { 322 return f.Position(p).Line 323} 324 325func searchLineInfos(a []lineInfo, x int) int { 326 i, found := slices.BinarySearchFunc(a, x, func(a lineInfo, x int) int { 327 return cmp.Compare(a.Offset, x) 328 }) 329 if !found { 330 // We want the lineInfo containing x, but if we didn't 331 // find x then i is the next one. 332 i-- 333 } 334 return i 335} 336 337// unpack returns the filename and line and column number for a file offset. 338// If adjusted is set, unpack will return the filename and line information 339// possibly adjusted by //line comments; otherwise those comments are ignored. 340func (f *File) unpack(offset int, adjusted bool) (filename string, line, column int) { 341 f.mutex.Lock() 342 filename = f.name 343 if i := searchInts(f.lines, offset); i >= 0 { 344 line, column = i+1, offset-f.lines[i]+1 345 } 346 if adjusted && len(f.infos) > 0 { 347 // few files have extra line infos 348 if i := searchLineInfos(f.infos, offset); i >= 0 { 349 alt := &f.infos[i] 350 filename = alt.Filename 351 if i := searchInts(f.lines, alt.Offset); i >= 0 { 352 // i+1 is the line at which the alternative position was recorded 353 d := line - (i + 1) // line distance from alternative position base 354 line = alt.Line + d 355 if alt.Column == 0 { 356 // alternative column is unknown => relative column is unknown 357 // (the current specification for line directives requires 358 // this to apply until the next PosBase/line directive, 359 // not just until the new newline) 360 column = 0 361 } else if d == 0 { 362 // the alternative position base is on the current line 363 // => column is relative to alternative column 364 column = alt.Column + (offset - alt.Offset) 365 } 366 } 367 } 368 } 369 // TODO(mvdan): move Unlock back under Lock with a defer statement once 370 // https://go.dev/issue/38471 is fixed to remove the performance penalty. 371 f.mutex.Unlock() 372 return 373} 374 375func (f *File) position(p Pos, adjusted bool) (pos Position) { 376 offset := f.fixOffset(int(p) - f.base) 377 pos.Offset = offset 378 pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted) 379 return 380} 381 382// PositionFor returns the Position value for the given file position p. 383// If p is out of bounds, it is adjusted to match the File.Offset behavior. 384// If adjusted is set, the position may be adjusted by position-altering 385// //line comments; otherwise those comments are ignored. 386// p must be a Pos value in f or NoPos. 387func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) { 388 if p != NoPos { 389 pos = f.position(p, adjusted) 390 } 391 return 392} 393 394// Position returns the Position value for the given file position p. 395// If p is out of bounds, it is adjusted to match the File.Offset behavior. 396// Calling f.Position(p) is equivalent to calling f.PositionFor(p, true). 397func (f *File) Position(p Pos) (pos Position) { 398 return f.PositionFor(p, true) 399} 400 401// ----------------------------------------------------------------------------- 402// FileSet 403 404// A FileSet represents a set of source files. 405// Methods of file sets are synchronized; multiple goroutines 406// may invoke them concurrently. 407// 408// The byte offsets for each file in a file set are mapped into 409// distinct (integer) intervals, one interval [base, base+size] 410// per file. [FileSet.Base] represents the first byte in the file, and size 411// is the corresponding file size. A [Pos] value is a value in such 412// an interval. By determining the interval a [Pos] value belongs 413// to, the file, its file base, and thus the byte offset (position) 414// the [Pos] value is representing can be computed. 415// 416// When adding a new file, a file base must be provided. That can 417// be any integer value that is past the end of any interval of any 418// file already in the file set. For convenience, [FileSet.Base] provides 419// such a value, which is simply the end of the Pos interval of the most 420// recently added file, plus one. Unless there is a need to extend an 421// interval later, using the [FileSet.Base] should be used as argument 422// for [FileSet.AddFile]. 423// 424// A [File] may be removed from a FileSet when it is no longer needed. 425// This may reduce memory usage in a long-running application. 426type FileSet struct { 427 mutex sync.RWMutex // protects the file set 428 base int // base offset for the next file 429 files []*File // list of files in the order added to the set 430 last atomic.Pointer[File] // cache of last file looked up 431} 432 433// NewFileSet creates a new file set. 434func NewFileSet() *FileSet { 435 return &FileSet{ 436 base: 1, // 0 == NoPos 437 } 438} 439 440// Base returns the minimum base offset that must be provided to 441// [FileSet.AddFile] when adding the next file. 442func (s *FileSet) Base() int { 443 s.mutex.RLock() 444 b := s.base 445 s.mutex.RUnlock() 446 return b 447} 448 449// AddFile adds a new file with a given filename, base offset, and file size 450// to the file set s and returns the file. Multiple files may have the same 451// name. The base offset must not be smaller than the [FileSet.Base], and 452// size must not be negative. As a special case, if a negative base is provided, 453// the current value of the [FileSet.Base] is used instead. 454// 455// Adding the file will set the file set's [FileSet.Base] value to base + size + 1 456// as the minimum base value for the next file. The following relationship 457// exists between a [Pos] value p for a given file offset offs: 458// 459// int(p) = base + offs 460// 461// with offs in the range [0, size] and thus p in the range [base, base+size]. 462// For convenience, [File.Pos] may be used to create file-specific position 463// values from a file offset. 464func (s *FileSet) AddFile(filename string, base, size int) *File { 465 // Allocate f outside the critical section. 466 f := &File{name: filename, size: size, lines: []int{0}} 467 468 s.mutex.Lock() 469 defer s.mutex.Unlock() 470 if base < 0 { 471 base = s.base 472 } 473 if base < s.base { 474 panic(fmt.Sprintf("invalid base %d (should be >= %d)", base, s.base)) 475 } 476 f.base = base 477 if size < 0 { 478 panic(fmt.Sprintf("invalid size %d (should be >= 0)", size)) 479 } 480 // base >= s.base && size >= 0 481 base += size + 1 // +1 because EOF also has a position 482 if base < 0 { 483 panic("token.Pos offset overflow (> 2G of source code in file set)") 484 } 485 // add the file to the file set 486 s.base = base 487 s.files = append(s.files, f) 488 s.last.Store(f) 489 return f 490} 491 492// RemoveFile removes a file from the [FileSet] so that subsequent 493// queries for its [Pos] interval yield a negative result. 494// This reduces the memory usage of a long-lived [FileSet] that 495// encounters an unbounded stream of files. 496// 497// Removing a file that does not belong to the set has no effect. 498func (s *FileSet) RemoveFile(file *File) { 499 s.last.CompareAndSwap(file, nil) // clear last file cache 500 501 s.mutex.Lock() 502 defer s.mutex.Unlock() 503 504 if i := searchFiles(s.files, file.base); i >= 0 && s.files[i] == file { 505 last := &s.files[len(s.files)-1] 506 s.files = append(s.files[:i], s.files[i+1:]...) 507 *last = nil // don't prolong lifetime when popping last element 508 } 509} 510 511// Iterate calls f for the files in the file set in the order they were added 512// until f returns false. 513func (s *FileSet) Iterate(f func(*File) bool) { 514 for i := 0; ; i++ { 515 var file *File 516 s.mutex.RLock() 517 if i < len(s.files) { 518 file = s.files[i] 519 } 520 s.mutex.RUnlock() 521 if file == nil || !f(file) { 522 break 523 } 524 } 525} 526 527func searchFiles(a []*File, x int) int { 528 i, found := slices.BinarySearchFunc(a, x, func(a *File, x int) int { 529 return cmp.Compare(a.base, x) 530 }) 531 if !found { 532 // We want the File containing x, but if we didn't 533 // find x then i is the next one. 534 i-- 535 } 536 return i 537} 538 539func (s *FileSet) file(p Pos) *File { 540 // common case: p is in last file. 541 if f := s.last.Load(); f != nil && f.base <= int(p) && int(p) <= f.base+f.size { 542 return f 543 } 544 545 s.mutex.RLock() 546 defer s.mutex.RUnlock() 547 548 // p is not in last file - search all files 549 if i := searchFiles(s.files, int(p)); i >= 0 { 550 f := s.files[i] 551 // f.base <= int(p) by definition of searchFiles 552 if int(p) <= f.base+f.size { 553 // Update cache of last file. A race is ok, 554 // but an exclusive lock causes heavy contention. 555 s.last.Store(f) 556 return f 557 } 558 } 559 return nil 560} 561 562// File returns the file that contains the position p. 563// If no such file is found (for instance for p == [NoPos]), 564// the result is nil. 565func (s *FileSet) File(p Pos) (f *File) { 566 if p != NoPos { 567 f = s.file(p) 568 } 569 return 570} 571 572// PositionFor converts a [Pos] p in the fileset into a [Position] value. 573// If adjusted is set, the position may be adjusted by position-altering 574// //line comments; otherwise those comments are ignored. 575// p must be a [Pos] value in s or [NoPos]. 576func (s *FileSet) PositionFor(p Pos, adjusted bool) (pos Position) { 577 if p != NoPos { 578 if f := s.file(p); f != nil { 579 return f.position(p, adjusted) 580 } 581 } 582 return 583} 584 585// Position converts a [Pos] p in the fileset into a Position value. 586// Calling s.Position(p) is equivalent to calling s.PositionFor(p, true). 587func (s *FileSet) Position(p Pos) (pos Position) { 588 return s.PositionFor(p, true) 589} 590 591// ----------------------------------------------------------------------------- 592// Helper functions 593 594func searchInts(a []int, x int) int { 595 // This function body is a manually inlined version of: 596 // 597 // return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1 598 // 599 // With better compiler optimizations, this may not be needed in the 600 // future, but at the moment this change improves the go/printer 601 // benchmark performance by ~30%. This has a direct impact on the 602 // speed of gofmt and thus seems worthwhile (2011-04-29). 603 // TODO(gri): Remove this when compilers have caught up. 604 i, j := 0, len(a) 605 for i < j { 606 h := int(uint(i+j) >> 1) // avoid overflow when computing h 607 // i ≤ h < j 608 if a[h] <= x { 609 i = h + 1 610 } else { 611 j = h 612 } 613 } 614 return i - 1 615} 616