1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// This file implements parsers to convert legacy profiles into the
16// profile.proto format.
17
18package profile
19
20import (
21	"bufio"
22	"bytes"
23	"fmt"
24	"io"
25	"math"
26	"regexp"
27	"strconv"
28	"strings"
29)
30
31var (
32	countStartRE = regexp.MustCompile(`\A(\S+) profile: total \d+\z`)
33	countRE      = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\z`)
34
35	heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
36	heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
37
38	contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
39
40	hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
41
42	growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz?`)
43
44	fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz?`)
45
46	threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
47	threadStartRE  = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
48
49	// Regular expressions to parse process mappings. Support the format used by Linux /proc/.../maps and other tools.
50	// Recommended format:
51	// Start   End     object file name     offset(optional)   linker build id
52	// 0x40000-0x80000 /path/to/binary      (@FF00)            abc123456
53	spaceDigits = `\s+[[:digit:]]+`
54	hexPair     = `\s+[[:xdigit:]]+:[[:xdigit:]]+`
55	oSpace      = `\s*`
56	// Capturing expressions.
57	cHex           = `(?:0x)?([[:xdigit:]]+)`
58	cHexRange      = `\s*` + cHex + `[\s-]?` + oSpace + cHex + `:?`
59	cSpaceString   = `(?:\s+(\S+))?`
60	cSpaceHex      = `(?:\s+([[:xdigit:]]+))?`
61	cSpaceAtOffset = `(?:\s+\(@([[:xdigit:]]+)\))?`
62	cPerm          = `(?:\s+([-rwxp]+))?`
63
64	procMapsRE  = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceHex + hexPair + spaceDigits + cSpaceString)
65	briefMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceString + cSpaceAtOffset + cSpaceHex)
66
67	// Regular expression to parse log data, of the form:
68	// ... file:line] msg...
69	logInfoRE = regexp.MustCompile(`^[^\[\]]+:[0-9]+]\s`)
70)
71
72func isSpaceOrComment(line string) bool {
73	trimmed := strings.TrimSpace(line)
74	return len(trimmed) == 0 || trimmed[0] == '#'
75}
76
77// parseGoCount parses a Go count profile (e.g., threadcreate or
78// goroutine) and returns a new Profile.
79func parseGoCount(b []byte) (*Profile, error) {
80	s := bufio.NewScanner(bytes.NewBuffer(b))
81	// Skip comments at the beginning of the file.
82	for s.Scan() && isSpaceOrComment(s.Text()) {
83	}
84	if err := s.Err(); err != nil {
85		return nil, err
86	}
87	m := countStartRE.FindStringSubmatch(s.Text())
88	if m == nil {
89		return nil, errUnrecognized
90	}
91	profileType := m[1]
92	p := &Profile{
93		PeriodType: &ValueType{Type: profileType, Unit: "count"},
94		Period:     1,
95		SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
96	}
97	locations := make(map[uint64]*Location)
98	for s.Scan() {
99		line := s.Text()
100		if isSpaceOrComment(line) {
101			continue
102		}
103		if strings.HasPrefix(line, "---") {
104			break
105		}
106		m := countRE.FindStringSubmatch(line)
107		if m == nil {
108			return nil, errMalformed
109		}
110		n, err := strconv.ParseInt(m[1], 0, 64)
111		if err != nil {
112			return nil, errMalformed
113		}
114		fields := strings.Fields(m[2])
115		locs := make([]*Location, 0, len(fields))
116		for _, stk := range fields {
117			addr, err := strconv.ParseUint(stk, 0, 64)
118			if err != nil {
119				return nil, errMalformed
120			}
121			// Adjust all frames by -1 to land on top of the call instruction.
122			addr--
123			loc := locations[addr]
124			if loc == nil {
125				loc = &Location{
126					Address: addr,
127				}
128				locations[addr] = loc
129				p.Location = append(p.Location, loc)
130			}
131			locs = append(locs, loc)
132		}
133		p.Sample = append(p.Sample, &Sample{
134			Location: locs,
135			Value:    []int64{n},
136		})
137	}
138	if err := s.Err(); err != nil {
139		return nil, err
140	}
141
142	if err := parseAdditionalSections(s, p); err != nil {
143		return nil, err
144	}
145	return p, nil
146}
147
148// remapLocationIDs ensures there is a location for each address
149// referenced by a sample, and remaps the samples to point to the new
150// location ids.
151func (p *Profile) remapLocationIDs() {
152	seen := make(map[*Location]bool, len(p.Location))
153	var locs []*Location
154
155	for _, s := range p.Sample {
156		for _, l := range s.Location {
157			if seen[l] {
158				continue
159			}
160			l.ID = uint64(len(locs) + 1)
161			locs = append(locs, l)
162			seen[l] = true
163		}
164	}
165	p.Location = locs
166}
167
168func (p *Profile) remapFunctionIDs() {
169	seen := make(map[*Function]bool, len(p.Function))
170	var fns []*Function
171
172	for _, l := range p.Location {
173		for _, ln := range l.Line {
174			fn := ln.Function
175			if fn == nil || seen[fn] {
176				continue
177			}
178			fn.ID = uint64(len(fns) + 1)
179			fns = append(fns, fn)
180			seen[fn] = true
181		}
182	}
183	p.Function = fns
184}
185
186// remapMappingIDs matches location addresses with existing mappings
187// and updates them appropriately. This is O(N*M), if this ever shows
188// up as a bottleneck, evaluate sorting the mappings and doing a
189// binary search, which would make it O(N*log(M)).
190func (p *Profile) remapMappingIDs() {
191	// Some profile handlers will incorrectly set regions for the main
192	// executable if its section is remapped. Fix them through heuristics.
193
194	if len(p.Mapping) > 0 {
195		// Remove the initial mapping if named '/anon_hugepage' and has a
196		// consecutive adjacent mapping.
197		if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
198			if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
199				p.Mapping = p.Mapping[1:]
200			}
201		}
202	}
203
204	// Subtract the offset from the start of the main mapping if it
205	// ends up at a recognizable start address.
206	if len(p.Mapping) > 0 {
207		const expectedStart = 0x400000
208		if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
209			m.Start = expectedStart
210			m.Offset = 0
211		}
212	}
213
214	// Associate each location with an address to the corresponding
215	// mapping. Create fake mapping if a suitable one isn't found.
216	var fake *Mapping
217nextLocation:
218	for _, l := range p.Location {
219		a := l.Address
220		if l.Mapping != nil || a == 0 {
221			continue
222		}
223		for _, m := range p.Mapping {
224			if m.Start <= a && a < m.Limit {
225				l.Mapping = m
226				continue nextLocation
227			}
228		}
229		// Work around legacy handlers failing to encode the first
230		// part of mappings split into adjacent ranges.
231		for _, m := range p.Mapping {
232			if m.Offset != 0 && m.Start-m.Offset <= a && a < m.Start {
233				m.Start -= m.Offset
234				m.Offset = 0
235				l.Mapping = m
236				continue nextLocation
237			}
238		}
239		// If there is still no mapping, create a fake one.
240		// This is important for the Go legacy handler, which produced
241		// no mappings.
242		if fake == nil {
243			fake = &Mapping{
244				ID:    1,
245				Limit: ^uint64(0),
246			}
247			p.Mapping = append(p.Mapping, fake)
248		}
249		l.Mapping = fake
250	}
251
252	// Reset all mapping IDs.
253	for i, m := range p.Mapping {
254		m.ID = uint64(i + 1)
255	}
256}
257
258var cpuInts = []func([]byte) (uint64, []byte){
259	get32l,
260	get32b,
261	get64l,
262	get64b,
263}
264
265func get32l(b []byte) (uint64, []byte) {
266	if len(b) < 4 {
267		return 0, nil
268	}
269	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
270}
271
272func get32b(b []byte) (uint64, []byte) {
273	if len(b) < 4 {
274		return 0, nil
275	}
276	return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
277}
278
279func get64l(b []byte) (uint64, []byte) {
280	if len(b) < 8 {
281		return 0, nil
282	}
283	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
284}
285
286func get64b(b []byte) (uint64, []byte) {
287	if len(b) < 8 {
288		return 0, nil
289	}
290	return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
291}
292
293// parseCPU parses a profilez legacy profile and returns a newly
294// populated Profile.
295//
296// The general format for profilez samples is a sequence of words in
297// binary format. The first words are a header with the following data:
298//
299//	1st word -- 0
300//	2nd word -- 3
301//	3rd word -- 0 if a c++ application, 1 if a java application.
302//	4th word -- Sampling period (in microseconds).
303//	5th word -- Padding.
304func parseCPU(b []byte) (*Profile, error) {
305	var parse func([]byte) (uint64, []byte)
306	var n1, n2, n3, n4, n5 uint64
307	for _, parse = range cpuInts {
308		var tmp []byte
309		n1, tmp = parse(b)
310		n2, tmp = parse(tmp)
311		n3, tmp = parse(tmp)
312		n4, tmp = parse(tmp)
313		n5, tmp = parse(tmp)
314
315		if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
316			b = tmp
317			return cpuProfile(b, int64(n4), parse)
318		}
319		if tmp != nil && n1 == 0 && n2 == 3 && n3 == 1 && n4 > 0 && n5 == 0 {
320			b = tmp
321			return javaCPUProfile(b, int64(n4), parse)
322		}
323	}
324	return nil, errUnrecognized
325}
326
327// cpuProfile returns a new Profile from C++ profilez data.
328// b is the profile bytes after the header, period is the profiling
329// period, and parse is a function to parse 8-byte chunks from the
330// profile in its native endianness.
331func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
332	p := &Profile{
333		Period:     period * 1000,
334		PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
335		SampleType: []*ValueType{
336			{Type: "samples", Unit: "count"},
337			{Type: "cpu", Unit: "nanoseconds"},
338		},
339	}
340	var err error
341	if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
342		return nil, err
343	}
344
345	// If *most* samples have the same second-to-the-bottom frame, it
346	// strongly suggests that it is an uninteresting artifact of
347	// measurement -- a stack frame pushed by the signal handler. The
348	// bottom frame is always correct as it is picked up from the signal
349	// structure, not the stack. Check if this is the case and if so,
350	// remove.
351
352	// Remove up to two frames.
353	maxiter := 2
354	// Allow one different sample for this many samples with the same
355	// second-to-last frame.
356	similarSamples := 32
357	margin := len(p.Sample) / similarSamples
358
359	for iter := 0; iter < maxiter; iter++ {
360		addr1 := make(map[uint64]int)
361		for _, s := range p.Sample {
362			if len(s.Location) > 1 {
363				a := s.Location[1].Address
364				addr1[a] = addr1[a] + 1
365			}
366		}
367
368		for id1, count := range addr1 {
369			if count >= len(p.Sample)-margin {
370				// Found uninteresting frame, strip it out from all samples
371				for _, s := range p.Sample {
372					if len(s.Location) > 1 && s.Location[1].Address == id1 {
373						s.Location = append(s.Location[:1], s.Location[2:]...)
374					}
375				}
376				break
377			}
378		}
379	}
380
381	if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
382		return nil, err
383	}
384
385	cleanupDuplicateLocations(p)
386	return p, nil
387}
388
389func cleanupDuplicateLocations(p *Profile) {
390	// The profile handler may duplicate the leaf frame, because it gets
391	// its address both from stack unwinding and from the signal
392	// context. Detect this and delete the duplicate, which has been
393	// adjusted by -1. The leaf address should not be adjusted as it is
394	// not a call.
395	for _, s := range p.Sample {
396		if len(s.Location) > 1 && s.Location[0].Address == s.Location[1].Address+1 {
397			s.Location = append(s.Location[:1], s.Location[2:]...)
398		}
399	}
400}
401
402// parseCPUSamples parses a collection of profilez samples from a
403// profile.
404//
405// profilez samples are a repeated sequence of stack frames of the
406// form:
407//
408//	1st word -- The number of times this stack was encountered.
409//	2nd word -- The size of the stack (StackSize).
410//	3rd word -- The first address on the stack.
411//	...
412//	StackSize + 2 -- The last address on the stack
413//
414// The last stack trace is of the form:
415//
416//	1st word -- 0
417//	2nd word -- 1
418//	3rd word -- 0
419//
420// Addresses from stack traces may point to the next instruction after
421// each call. Optionally adjust by -1 to land somewhere on the actual
422// call (except for the leaf, which is not a call).
423func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
424	locs := make(map[uint64]*Location)
425	for len(b) > 0 {
426		var count, nstk uint64
427		count, b = parse(b)
428		nstk, b = parse(b)
429		if b == nil || nstk > uint64(len(b)/4) {
430			return nil, nil, errUnrecognized
431		}
432		var sloc []*Location
433		addrs := make([]uint64, nstk)
434		for i := 0; i < int(nstk); i++ {
435			addrs[i], b = parse(b)
436		}
437
438		if count == 0 && nstk == 1 && addrs[0] == 0 {
439			// End of data marker
440			break
441		}
442		for i, addr := range addrs {
443			if adjust && i > 0 {
444				addr--
445			}
446			loc := locs[addr]
447			if loc == nil {
448				loc = &Location{
449					Address: addr,
450				}
451				locs[addr] = loc
452				p.Location = append(p.Location, loc)
453			}
454			sloc = append(sloc, loc)
455		}
456		p.Sample = append(p.Sample,
457			&Sample{
458				Value:    []int64{int64(count), int64(count) * p.Period},
459				Location: sloc,
460			})
461	}
462	// Reached the end without finding the EOD marker.
463	return b, locs, nil
464}
465
466// parseHeap parses a heapz legacy or a growthz profile and
467// returns a newly populated Profile.
468func parseHeap(b []byte) (p *Profile, err error) {
469	s := bufio.NewScanner(bytes.NewBuffer(b))
470	if !s.Scan() {
471		if err := s.Err(); err != nil {
472			return nil, err
473		}
474		return nil, errUnrecognized
475	}
476	p = &Profile{}
477
478	sampling := ""
479	hasAlloc := false
480
481	line := s.Text()
482	p.PeriodType = &ValueType{Type: "space", Unit: "bytes"}
483	if header := heapHeaderRE.FindStringSubmatch(line); header != nil {
484		sampling, p.Period, hasAlloc, err = parseHeapHeader(line)
485		if err != nil {
486			return nil, err
487		}
488	} else if header = growthHeaderRE.FindStringSubmatch(line); header != nil {
489		p.Period = 1
490	} else if header = fragmentationHeaderRE.FindStringSubmatch(line); header != nil {
491		p.Period = 1
492	} else {
493		return nil, errUnrecognized
494	}
495
496	if hasAlloc {
497		// Put alloc before inuse so that default pprof selection
498		// will prefer inuse_space.
499		p.SampleType = []*ValueType{
500			{Type: "alloc_objects", Unit: "count"},
501			{Type: "alloc_space", Unit: "bytes"},
502			{Type: "inuse_objects", Unit: "count"},
503			{Type: "inuse_space", Unit: "bytes"},
504		}
505	} else {
506		p.SampleType = []*ValueType{
507			{Type: "objects", Unit: "count"},
508			{Type: "space", Unit: "bytes"},
509		}
510	}
511
512	locs := make(map[uint64]*Location)
513	for s.Scan() {
514		line := strings.TrimSpace(s.Text())
515
516		if isSpaceOrComment(line) {
517			continue
518		}
519
520		if isMemoryMapSentinel(line) {
521			break
522		}
523
524		value, blocksize, addrs, err := parseHeapSample(line, p.Period, sampling, hasAlloc)
525		if err != nil {
526			return nil, err
527		}
528
529		var sloc []*Location
530		for _, addr := range addrs {
531			// Addresses from stack traces point to the next instruction after
532			// each call. Adjust by -1 to land somewhere on the actual call.
533			addr--
534			loc := locs[addr]
535			if locs[addr] == nil {
536				loc = &Location{
537					Address: addr,
538				}
539				p.Location = append(p.Location, loc)
540				locs[addr] = loc
541			}
542			sloc = append(sloc, loc)
543		}
544
545		p.Sample = append(p.Sample, &Sample{
546			Value:    value,
547			Location: sloc,
548			NumLabel: map[string][]int64{"bytes": {blocksize}},
549		})
550	}
551	if err := s.Err(); err != nil {
552		return nil, err
553	}
554	if err := parseAdditionalSections(s, p); err != nil {
555		return nil, err
556	}
557	return p, nil
558}
559
560func parseHeapHeader(line string) (sampling string, period int64, hasAlloc bool, err error) {
561	header := heapHeaderRE.FindStringSubmatch(line)
562	if header == nil {
563		return "", 0, false, errUnrecognized
564	}
565
566	if len(header[6]) > 0 {
567		if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
568			return "", 0, false, errUnrecognized
569		}
570	}
571
572	if (header[3] != header[1] && header[3] != "0") || (header[4] != header[2] && header[4] != "0") {
573		hasAlloc = true
574	}
575
576	switch header[5] {
577	case "heapz_v2", "heap_v2":
578		return "v2", period, hasAlloc, nil
579	case "heapprofile":
580		return "", 1, hasAlloc, nil
581	case "heap":
582		return "v2", period / 2, hasAlloc, nil
583	default:
584		return "", 0, false, errUnrecognized
585	}
586}
587
588// parseHeapSample parses a single row from a heap profile into a new Sample.
589func parseHeapSample(line string, rate int64, sampling string, includeAlloc bool) (value []int64, blocksize int64, addrs []uint64, err error) {
590	sampleData := heapSampleRE.FindStringSubmatch(line)
591	if len(sampleData) != 6 {
592		return nil, 0, nil, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
593	}
594
595	// This is a local-scoped helper function to avoid needing to pass
596	// around rate, sampling and many return parameters.
597	addValues := func(countString, sizeString string, label string) error {
598		count, err := strconv.ParseInt(countString, 10, 64)
599		if err != nil {
600			return fmt.Errorf("malformed sample: %s: %v", line, err)
601		}
602		size, err := strconv.ParseInt(sizeString, 10, 64)
603		if err != nil {
604			return fmt.Errorf("malformed sample: %s: %v", line, err)
605		}
606		if count == 0 && size != 0 {
607			return fmt.Errorf("%s count was 0 but %s bytes was %d", label, label, size)
608		}
609		if count != 0 {
610			blocksize = size / count
611			if sampling == "v2" {
612				count, size = scaleHeapSample(count, size, rate)
613			}
614		}
615		value = append(value, count, size)
616		return nil
617	}
618
619	if includeAlloc {
620		if err := addValues(sampleData[3], sampleData[4], "allocation"); err != nil {
621			return nil, 0, nil, err
622		}
623	}
624
625	if err := addValues(sampleData[1], sampleData[2], "inuse"); err != nil {
626		return nil, 0, nil, err
627	}
628
629	addrs, err = parseHexAddresses(sampleData[5])
630	if err != nil {
631		return nil, 0, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
632	}
633
634	return value, blocksize, addrs, nil
635}
636
637// parseHexAddresses extracts hex numbers from a string, attempts to convert
638// each to an unsigned 64-bit number and returns the resulting numbers as a
639// slice, or an error if the string contains hex numbers which are too large to
640// handle (which means a malformed profile).
641func parseHexAddresses(s string) ([]uint64, error) {
642	hexStrings := hexNumberRE.FindAllString(s, -1)
643	var addrs []uint64
644	for _, s := range hexStrings {
645		if addr, err := strconv.ParseUint(s, 0, 64); err == nil {
646			addrs = append(addrs, addr)
647		} else {
648			return nil, fmt.Errorf("failed to parse as hex 64-bit number: %s", s)
649		}
650	}
651	return addrs, nil
652}
653
654// scaleHeapSample adjusts the data from a heapz Sample to
655// account for its probability of appearing in the collected
656// data. heapz profiles are a sampling of the memory allocations
657// requests in a program. We estimate the unsampled value by dividing
658// each collected sample by its probability of appearing in the
659// profile. heapz v2 profiles rely on a poisson process to determine
660// which samples to collect, based on the desired average collection
661// rate R. The probability of a sample of size S to appear in that
662// profile is 1-exp(-S/R).
663func scaleHeapSample(count, size, rate int64) (int64, int64) {
664	if count == 0 || size == 0 {
665		return 0, 0
666	}
667
668	if rate <= 1 {
669		// if rate==1 all samples were collected so no adjustment is needed.
670		// if rate<1 treat as unknown and skip scaling.
671		return count, size
672	}
673
674	avgSize := float64(size) / float64(count)
675	scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
676
677	return int64(float64(count) * scale), int64(float64(size) * scale)
678}
679
680// parseContention parses a mutex or contention profile. There are 2 cases:
681// "--- contentionz " for legacy C++ profiles (and backwards compatibility)
682// "--- mutex:" or "--- contention:" for profiles generated by the Go runtime.
683func parseContention(b []byte) (*Profile, error) {
684	s := bufio.NewScanner(bytes.NewBuffer(b))
685	if !s.Scan() {
686		if err := s.Err(); err != nil {
687			return nil, err
688		}
689		return nil, errUnrecognized
690	}
691
692	switch l := s.Text(); {
693	case strings.HasPrefix(l, "--- contentionz "):
694	case strings.HasPrefix(l, "--- mutex:"):
695	case strings.HasPrefix(l, "--- contention:"):
696	default:
697		return nil, errUnrecognized
698	}
699
700	p := &Profile{
701		PeriodType: &ValueType{Type: "contentions", Unit: "count"},
702		Period:     1,
703		SampleType: []*ValueType{
704			{Type: "contentions", Unit: "count"},
705			{Type: "delay", Unit: "nanoseconds"},
706		},
707	}
708
709	var cpuHz int64
710	// Parse text of the form "attribute = value" before the samples.
711	const delimiter = "="
712	for s.Scan() {
713		line := s.Text()
714		if line = strings.TrimSpace(line); isSpaceOrComment(line) {
715			continue
716		}
717		if strings.HasPrefix(line, "---") {
718			break
719		}
720		attr := strings.SplitN(line, delimiter, 2)
721		if len(attr) != 2 {
722			break
723		}
724		key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
725		var err error
726		switch key {
727		case "cycles/second":
728			if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
729				return nil, errUnrecognized
730			}
731		case "sampling period":
732			if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
733				return nil, errUnrecognized
734			}
735		case "ms since reset":
736			ms, err := strconv.ParseInt(val, 0, 64)
737			if err != nil {
738				return nil, errUnrecognized
739			}
740			p.DurationNanos = ms * 1000 * 1000
741		case "format":
742			// CPP contentionz profiles don't have format.
743			return nil, errUnrecognized
744		case "resolution":
745			// CPP contentionz profiles don't have resolution.
746			return nil, errUnrecognized
747		case "discarded samples":
748		default:
749			return nil, errUnrecognized
750		}
751	}
752	if err := s.Err(); err != nil {
753		return nil, err
754	}
755
756	locs := make(map[uint64]*Location)
757	for {
758		line := strings.TrimSpace(s.Text())
759		if strings.HasPrefix(line, "---") {
760			break
761		}
762		if !isSpaceOrComment(line) {
763			value, addrs, err := parseContentionSample(line, p.Period, cpuHz)
764			if err != nil {
765				return nil, err
766			}
767			var sloc []*Location
768			for _, addr := range addrs {
769				// Addresses from stack traces point to the next instruction after
770				// each call. Adjust by -1 to land somewhere on the actual call.
771				addr--
772				loc := locs[addr]
773				if locs[addr] == nil {
774					loc = &Location{
775						Address: addr,
776					}
777					p.Location = append(p.Location, loc)
778					locs[addr] = loc
779				}
780				sloc = append(sloc, loc)
781			}
782			p.Sample = append(p.Sample, &Sample{
783				Value:    value,
784				Location: sloc,
785			})
786		}
787		if !s.Scan() {
788			break
789		}
790	}
791	if err := s.Err(); err != nil {
792		return nil, err
793	}
794
795	if err := parseAdditionalSections(s, p); err != nil {
796		return nil, err
797	}
798
799	return p, nil
800}
801
802// parseContentionSample parses a single row from a contention profile
803// into a new Sample.
804func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
805	sampleData := contentionSampleRE.FindStringSubmatch(line)
806	if sampleData == nil {
807		return nil, nil, errUnrecognized
808	}
809
810	v1, err := strconv.ParseInt(sampleData[1], 10, 64)
811	if err != nil {
812		return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
813	}
814	v2, err := strconv.ParseInt(sampleData[2], 10, 64)
815	if err != nil {
816		return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
817	}
818
819	// Unsample values if period and cpuHz are available.
820	// - Delays are scaled to cycles and then to nanoseconds.
821	// - Contentions are scaled to cycles.
822	if period > 0 {
823		if cpuHz > 0 {
824			cpuGHz := float64(cpuHz) / 1e9
825			v1 = int64(float64(v1) * float64(period) / cpuGHz)
826		}
827		v2 = v2 * period
828	}
829
830	value = []int64{v2, v1}
831	addrs, err = parseHexAddresses(sampleData[3])
832	if err != nil {
833		return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
834	}
835
836	return value, addrs, nil
837}
838
839// parseThread parses a Threadz profile and returns a new Profile.
840func parseThread(b []byte) (*Profile, error) {
841	s := bufio.NewScanner(bytes.NewBuffer(b))
842	// Skip past comments and empty lines seeking a real header.
843	for s.Scan() && isSpaceOrComment(s.Text()) {
844	}
845
846	line := s.Text()
847	if m := threadzStartRE.FindStringSubmatch(line); m != nil {
848		// Advance over initial comments until first stack trace.
849		for s.Scan() {
850			if line = s.Text(); isMemoryMapSentinel(line) || strings.HasPrefix(line, "-") {
851				break
852			}
853		}
854	} else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
855		return nil, errUnrecognized
856	}
857
858	p := &Profile{
859		SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
860		PeriodType: &ValueType{Type: "thread", Unit: "count"},
861		Period:     1,
862	}
863
864	locs := make(map[uint64]*Location)
865	// Recognize each thread and populate profile samples.
866	for !isMemoryMapSentinel(line) {
867		if strings.HasPrefix(line, "---- no stack trace for") {
868			break
869		}
870		if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
871			return nil, errUnrecognized
872		}
873
874		var addrs []uint64
875		var err error
876		line, addrs, err = parseThreadSample(s)
877		if err != nil {
878			return nil, err
879		}
880		if len(addrs) == 0 {
881			// We got a --same as previous threads--. Bump counters.
882			if len(p.Sample) > 0 {
883				s := p.Sample[len(p.Sample)-1]
884				s.Value[0]++
885			}
886			continue
887		}
888
889		var sloc []*Location
890		for i, addr := range addrs {
891			// Addresses from stack traces point to the next instruction after
892			// each call. Adjust by -1 to land somewhere on the actual call
893			// (except for the leaf, which is not a call).
894			if i > 0 {
895				addr--
896			}
897			loc := locs[addr]
898			if locs[addr] == nil {
899				loc = &Location{
900					Address: addr,
901				}
902				p.Location = append(p.Location, loc)
903				locs[addr] = loc
904			}
905			sloc = append(sloc, loc)
906		}
907
908		p.Sample = append(p.Sample, &Sample{
909			Value:    []int64{1},
910			Location: sloc,
911		})
912	}
913
914	if err := parseAdditionalSections(s, p); err != nil {
915		return nil, err
916	}
917
918	cleanupDuplicateLocations(p)
919	return p, nil
920}
921
922// parseThreadSample parses a symbolized or unsymbolized stack trace.
923// Returns the first line after the traceback, the sample (or nil if
924// it hits a 'same-as-previous' marker) and an error.
925func parseThreadSample(s *bufio.Scanner) (nextl string, addrs []uint64, err error) {
926	var line string
927	sameAsPrevious := false
928	for s.Scan() {
929		line = strings.TrimSpace(s.Text())
930		if line == "" {
931			continue
932		}
933
934		if strings.HasPrefix(line, "---") {
935			break
936		}
937		if strings.Contains(line, "same as previous thread") {
938			sameAsPrevious = true
939			continue
940		}
941
942		curAddrs, err := parseHexAddresses(line)
943		if err != nil {
944			return "", nil, fmt.Errorf("malformed sample: %s: %v", line, err)
945		}
946		addrs = append(addrs, curAddrs...)
947	}
948	if err := s.Err(); err != nil {
949		return "", nil, err
950	}
951	if sameAsPrevious {
952		return line, nil, nil
953	}
954	return line, addrs, nil
955}
956
957// parseAdditionalSections parses any additional sections in the
958// profile, ignoring any unrecognized sections.
959func parseAdditionalSections(s *bufio.Scanner, p *Profile) error {
960	for !isMemoryMapSentinel(s.Text()) && s.Scan() {
961	}
962	if err := s.Err(); err != nil {
963		return err
964	}
965	return p.ParseMemoryMapFromScanner(s)
966}
967
968// ParseProcMaps parses a memory map in the format of /proc/self/maps.
969// ParseMemoryMap should be called after setting on a profile to
970// associate locations to the corresponding mapping based on their
971// address.
972func ParseProcMaps(rd io.Reader) ([]*Mapping, error) {
973	s := bufio.NewScanner(rd)
974	return parseProcMapsFromScanner(s)
975}
976
977func parseProcMapsFromScanner(s *bufio.Scanner) ([]*Mapping, error) {
978	var mapping []*Mapping
979
980	var attrs []string
981	const delimiter = "="
982	r := strings.NewReplacer()
983	for s.Scan() {
984		line := r.Replace(removeLoggingInfo(s.Text()))
985		m, err := parseMappingEntry(line)
986		if err != nil {
987			if err == errUnrecognized {
988				// Recognize assignments of the form: attr=value, and replace
989				// $attr with value on subsequent mappings.
990				if attr := strings.SplitN(line, delimiter, 2); len(attr) == 2 {
991					attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
992					r = strings.NewReplacer(attrs...)
993				}
994				// Ignore any unrecognized entries
995				continue
996			}
997			return nil, err
998		}
999		if m == nil {
1000			continue
1001		}
1002		mapping = append(mapping, m)
1003	}
1004	if err := s.Err(); err != nil {
1005		return nil, err
1006	}
1007	return mapping, nil
1008}
1009
1010// removeLoggingInfo detects and removes log prefix entries generated
1011// by the glog package. If no logging prefix is detected, the string
1012// is returned unmodified.
1013func removeLoggingInfo(line string) string {
1014	if match := logInfoRE.FindStringIndex(line); match != nil {
1015		return line[match[1]:]
1016	}
1017	return line
1018}
1019
1020// ParseMemoryMap parses a memory map in the format of
1021// /proc/self/maps, and overrides the mappings in the current profile.
1022// It renumbers the samples and locations in the profile correspondingly.
1023func (p *Profile) ParseMemoryMap(rd io.Reader) error {
1024	return p.ParseMemoryMapFromScanner(bufio.NewScanner(rd))
1025}
1026
1027// ParseMemoryMapFromScanner parses a memory map in the format of
1028// /proc/self/maps or a variety of legacy format, and overrides the
1029// mappings in the current profile.  It renumbers the samples and
1030// locations in the profile correspondingly.
1031func (p *Profile) ParseMemoryMapFromScanner(s *bufio.Scanner) error {
1032	mapping, err := parseProcMapsFromScanner(s)
1033	if err != nil {
1034		return err
1035	}
1036	p.Mapping = append(p.Mapping, mapping...)
1037	p.massageMappings()
1038	p.remapLocationIDs()
1039	p.remapFunctionIDs()
1040	p.remapMappingIDs()
1041	return nil
1042}
1043
1044func parseMappingEntry(l string) (*Mapping, error) {
1045	var start, end, perm, file, offset, buildID string
1046	if me := procMapsRE.FindStringSubmatch(l); len(me) == 6 {
1047		start, end, perm, offset, file = me[1], me[2], me[3], me[4], me[5]
1048	} else if me := briefMapsRE.FindStringSubmatch(l); len(me) == 7 {
1049		start, end, perm, file, offset, buildID = me[1], me[2], me[3], me[4], me[5], me[6]
1050	} else {
1051		return nil, errUnrecognized
1052	}
1053
1054	var err error
1055	mapping := &Mapping{
1056		File:    file,
1057		BuildID: buildID,
1058	}
1059	if perm != "" && !strings.Contains(perm, "x") {
1060		// Skip non-executable entries.
1061		return nil, nil
1062	}
1063	if mapping.Start, err = strconv.ParseUint(start, 16, 64); err != nil {
1064		return nil, errUnrecognized
1065	}
1066	if mapping.Limit, err = strconv.ParseUint(end, 16, 64); err != nil {
1067		return nil, errUnrecognized
1068	}
1069	if offset != "" {
1070		if mapping.Offset, err = strconv.ParseUint(offset, 16, 64); err != nil {
1071			return nil, errUnrecognized
1072		}
1073	}
1074	return mapping, nil
1075}
1076
1077var memoryMapSentinels = []string{
1078	"--- Memory map: ---",
1079	"MAPPED_LIBRARIES:",
1080}
1081
1082// isMemoryMapSentinel returns true if the string contains one of the
1083// known sentinels for memory map information.
1084func isMemoryMapSentinel(line string) bool {
1085	for _, s := range memoryMapSentinels {
1086		if strings.Contains(line, s) {
1087			return true
1088		}
1089	}
1090	return false
1091}
1092
1093func (p *Profile) addLegacyFrameInfo() {
1094	switch {
1095	case isProfileType(p, heapzSampleTypes):
1096		p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
1097	case isProfileType(p, contentionzSampleTypes):
1098		p.DropFrames, p.KeepFrames = lockRxStr, ""
1099	default:
1100		p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
1101	}
1102}
1103
1104var heapzSampleTypes = [][]string{
1105	{"allocations", "size"}, // early Go pprof profiles
1106	{"objects", "space"},
1107	{"inuse_objects", "inuse_space"},
1108	{"alloc_objects", "alloc_space"},
1109	{"alloc_objects", "alloc_space", "inuse_objects", "inuse_space"}, // Go pprof legacy profiles
1110}
1111var contentionzSampleTypes = [][]string{
1112	{"contentions", "delay"},
1113}
1114
1115func isProfileType(p *Profile, types [][]string) bool {
1116	st := p.SampleType
1117nextType:
1118	for _, t := range types {
1119		if len(st) != len(t) {
1120			continue
1121		}
1122
1123		for i := range st {
1124			if st[i].Type != t[i] {
1125				continue nextType
1126			}
1127		}
1128		return true
1129	}
1130	return false
1131}
1132
1133var allocRxStr = strings.Join([]string{
1134	// POSIX entry points.
1135	`calloc`,
1136	`cfree`,
1137	`malloc`,
1138	`free`,
1139	`memalign`,
1140	`do_memalign`,
1141	`(__)?posix_memalign`,
1142	`pvalloc`,
1143	`valloc`,
1144	`realloc`,
1145
1146	// TC malloc.
1147	`tcmalloc::.*`,
1148	`tc_calloc`,
1149	`tc_cfree`,
1150	`tc_malloc`,
1151	`tc_free`,
1152	`tc_memalign`,
1153	`tc_posix_memalign`,
1154	`tc_pvalloc`,
1155	`tc_valloc`,
1156	`tc_realloc`,
1157	`tc_new`,
1158	`tc_delete`,
1159	`tc_newarray`,
1160	`tc_deletearray`,
1161	`tc_new_nothrow`,
1162	`tc_newarray_nothrow`,
1163
1164	// Memory-allocation routines on OS X.
1165	`malloc_zone_malloc`,
1166	`malloc_zone_calloc`,
1167	`malloc_zone_valloc`,
1168	`malloc_zone_realloc`,
1169	`malloc_zone_memalign`,
1170	`malloc_zone_free`,
1171
1172	// Go runtime
1173	`runtime\..*`,
1174
1175	// Other misc. memory allocation routines
1176	`BaseArena::.*`,
1177	`(::)?do_malloc_no_errno`,
1178	`(::)?do_malloc_pages`,
1179	`(::)?do_malloc`,
1180	`DoSampledAllocation`,
1181	`MallocedMemBlock::MallocedMemBlock`,
1182	`_M_allocate`,
1183	`__builtin_(vec_)?delete`,
1184	`__builtin_(vec_)?new`,
1185	`__gnu_cxx::new_allocator::allocate`,
1186	`__libc_malloc`,
1187	`__malloc_alloc_template::allocate`,
1188	`allocate`,
1189	`cpp_alloc`,
1190	`operator new(\[\])?`,
1191	`simple_alloc::allocate`,
1192}, `|`)
1193
1194var allocSkipRxStr = strings.Join([]string{
1195	// Preserve Go runtime frames that appear in the middle/bottom of
1196	// the stack.
1197	`runtime\.panic`,
1198	`runtime\.reflectcall`,
1199	`runtime\.call[0-9]*`,
1200}, `|`)
1201
1202var cpuProfilerRxStr = strings.Join([]string{
1203	`ProfileData::Add`,
1204	`ProfileData::prof_handler`,
1205	`CpuProfiler::prof_handler`,
1206	`__pthread_sighandler`,
1207	`__restore`,
1208}, `|`)
1209
1210var lockRxStr = strings.Join([]string{
1211	`RecordLockProfileData`,
1212	`(base::)?RecordLockProfileData.*`,
1213	`(base::)?SubmitMutexProfileData.*`,
1214	`(base::)?SubmitSpinLockProfileData.*`,
1215	`(base::Mutex::)?AwaitCommon.*`,
1216	`(base::Mutex::)?Unlock.*`,
1217	`(base::Mutex::)?UnlockSlow.*`,
1218	`(base::Mutex::)?ReaderUnlock.*`,
1219	`(base::MutexLock::)?~MutexLock.*`,
1220	`(Mutex::)?AwaitCommon.*`,
1221	`(Mutex::)?Unlock.*`,
1222	`(Mutex::)?UnlockSlow.*`,
1223	`(Mutex::)?ReaderUnlock.*`,
1224	`(MutexLock::)?~MutexLock.*`,
1225	`(SpinLock::)?Unlock.*`,
1226	`(SpinLock::)?SlowUnlock.*`,
1227	`(SpinLockHolder::)?~SpinLockHolder.*`,
1228}, `|`)
1229