parallel-perf.py - OpenGrok cross reference for /linux-6.14.4/tools/perf/scripts/python/parallel-perf.py

Lines Matching +full:cpu +full:- +full:nr
2 # SPDX-License-Identifier: GPL-2.0
5 # options --cpu and --time so that each job processes a different chunk
20 glb_prog_name = "parallel-perf.py"
88 				return [ f"Non-empty error file {self.stderr_name}" ]
139 		x = "0" * (10 - len(x)) + x
140 	return x[:len(x) - 9] + "." + x[-9:]
150 	max_len = len(str(cpus[-1]))
151 	cpu_dir_fmt = f"cpu-%.{max_len}u"
154 	for cpu in cpus:
155 		if cpu >= 0:
156 			cpu_dir = os.path.join(output_dir, cpu_dir_fmt % cpu)
157 			cpu_option = f"--cpu={cpu}"
162 		tr_dir_fmt = "time-range"
166 			tr_dir_fmt += f"-{pos}"
172 		tr_dir_fmt += f"-%.{max_len}u"
180 				time_option = "--time=" + NanoSecsToSecsStr(r[0]) + "," + NanoSecsToSecsStr(r[1])
263 …process.Popen([perf, "script", "--header-only", "--input", file_name], stdout=subprocess.PIPE).std…
271 			name = line[1:pos-1].strip()
275 				nr = 2
277 					name = f"{orig_name} {nr}"
280 					nr += 1
293 	def Init(self, opt_element=-1, value_element=-1, opt_pos=-1, value_pos=-1, error=None):
305 		pos = -1
308 			if m and opt[:2] == f"-{short_name}":
313 						self.Init(error = f"-{short_name} option missing value")
317 			if opt[:n] == f"--{long_name}":
322 						self.Init(error = f"--{long_name} option missing value")
326 					self.Init(error = f"--{long_name} option expected '='")
328 			if m and opt[:1] == "-" and opt[:2] != "--" and short_name in opt:
330 				if "-" in opt[1:]:
331 					hpos = opt[1:].index("-")
338 						self.Init(error = f"-{short_name} option missing value")
353 		if self.opt_element == -1:
390 	cpu_str = ReadPerfOption(cmd, "C", "cpu")
392 	# Use double-quick sampling to determine trace data density
393 	times_cmd = ["perf", "script", "--ns", "--input", file_name, "--itrace=qqi"]
395 		times_cmd.append(f"--cpu={cpu_str}")
397 		times_cmd.append(f"--time={time_str}")
399 	cnts_cmd.append("-Fcpu")
400 	times_cmd.append("-Fcpu,time")
404 	def __init__(self, cpu):  argument
405 		self.cpu = cpu
413 def CalcTimeRangesByCPU(line, cpu, cpu_time_ranges, max_time):  argument
414 	cpu_time_range = cpu_time_ranges[cpu]
415 	cpu_time_range.remaining -= 1
416 	cpu_time_range.interval_remaining -= 1
421 		time = TimeVal(line[1][:-1], 0)
423 		time_ranges[cpu_time_range.tr_pos][1] = time - 1
428 def CountSamplesByCPU(line, cpu, cpu_time_ranges):  argument
430 		cpu_time_ranges[cpu].sample_cnt += 1
433 		print("cpu", cpu)
438 	# Assume CPU number is at beginning of line and enclosed by []
439 	pat = re.compile(r"\s*\[[0-9]+\]")
444 			line = line.decode("utf-8")
448 					# Assumes CPU number is enclosed by []
449 					cpu = int(line[0][1:-1])
451 					cpu = 0
452 				fn(line, cpu, *x)
490 def SplitTimeRangesByTraceDataDensity(time_ranges, cpus, nr, cmd, file_name, per_cpu, min_size, min…  argument
497 	nr_cpus = cpus[-1] + 1 if per_cpu else 1
499 		nr_cpus = cpus[-1] + 1
500 		cpu_time_ranges = [ CPUTimeRange(cpu) for cpu in range(nr_cpus) ]
503 		cpu_time_ranges = [ CPUTimeRange(-1) ]
510 	# Count the number of "double quick" samples per CPU
521 			print("cpu:", cpu_time_range.cpu, "sample_cnt", cnt)
530 		if nr == 0:
531 			nr = 1
532 		return [ SplitTimeRangesIntoN(time_ranges, nr, min_interval) ]
534 	if nr:
535 		divisor = nr
550 	max_time = time_ranges[-1][1]
557 		# Adjust target interval for CPU to give approximately equal interval sizes
569 		# Init. time ranges for each CPU with the start time
580 	return [cpu_time_ranges[cpu].time_ranges for cpu in cpus]
587 	duration = int((end - start + 1) / n)
592 		time_ranges.append([start, start + duration - 1])
594 	time_ranges[-1][1] = end
598 	return r[1] - r[0] + 1
630 		if new_tr[i - 1][1] + 1 == new_tr[i][0]:
631 			new_tr[i][0] = new_tr[i - 1][0]
632 			del new_tr[i - 1]
640 	if time_ranges[-1][1] >= max_time:
641 		time_ranges[-1][1] = None
644 …e_str}'\nCheck also 'time of first sample' and 'time of last sample' in perf script --header-only")
651 		if i != 0 and start <= time_ranges[i - 1][1]:
672 …raise Exception(f"perf command bad cpu option: '{cpu_str}'\nCheck also 'nrcpus avail' in perf scri…
693 		return [-1]
696 		a = r.split("-")
719 		self.orig_nr = self.nr
724 		if self.jobs < 0 or self.nr < 0 or self.interval < 0:
725 			raise Exception("Bad options (negative values): try -h option for help")
726 		if self.nr != 0 and self.interval != 0:
730 		if self.nr == 0 and self.interval == 0:
732 				self.nr = 1
734 				self.nr = self.jobs
755 			self.cpu_str = ExtractPerfOption(self.cmd, "C", "cpu")
762 			self.cpus = [-1]
775 		elif self.nr:
776 …self.split_time_ranges_for_each_cpu = [ SplitTimeRangesIntoN(self.time_ranges, self.nr, self.min_i…
782 			# Re-combined time ranges should be the same
798 		if "--per-thread" in self.cmd_line.split():
803 		# --no-per-cpu option takes precedence
808 		# Default to per-cpu for Intel PT data that was recorded per-cpu,
809 		# because decoding can be done for each CPU separately.
825 		# Prefer open-ended time range to starting / ending with min_time / max_time resp.
851 --cpu and --time so that each job processes a different chunk of the data.
855 Follow the options by '--' and then the perf script command e.g.
857 	$ perf record -a -- sleep 10
858 	$ parallel-perf.py --nr=4 -- perf script --ns
860 	$ tree parallel-perf-output/
861 	parallel-perf-output/
862 	├── time-range-0
865 	├── time-range-1
868 	├── time-range-2
871 	└── time-range-3
874 	$ find parallel-perf-output -name cmd.txt | sort | xargs grep -H .
875 	parallel-perf-output/time-range-0/cmd.txt:perf script --time=,9466.504461499 --ns
876 	parallel-perf-output/time-range-1/cmd.txt:perf script --time=9466.504461500,9469.005396999 --ns
877 	parallel-perf-output/time-range-2/cmd.txt:perf script --time=9469.005397000,9471.506332499 --ns
878 	parallel-perf-output/time-range-3/cmd.txt:perf script --time=9471.506332500, --ns
881 --dlfilter and --script, so that the benefit of running parallel jobs
884 If option --pipe-to is used, standard output is first piped through that
894 If any job exits with a non-zero exit code, then all jobs are killed and no
895 more are started. A message is printed if any job results in a non-empty
898 There is a separate output subdirectory for each time range. If the --per-cpu
899 option is used, these are further grouped under cpu-n subdirectories, e.g.
901 	$ parallel-perf.py --per-cpu --nr=2 -- perf script --ns --cpu=0,1
903 	$ tree parallel-perf-output
904 	parallel-perf-output/
905 	├── cpu-0
906 	│   ├── time-range-0
909 	│   └── time-range-1
912 	└── cpu-1
913 	    ├── time-range-0
916 	    └── time-range-1
919 	$ find parallel-perf-output -name cmd.txt | sort | xargs grep -H .
920 	parallel-perf-output/cpu-0/time-range-0/cmd.txt:perf script --cpu=0 --time=,9469.005396999 --ns
921 	parallel-perf-output/cpu-0/time-range-1/cmd.txt:perf script --cpu=0 --time=9469.005397000, --ns
922 	parallel-perf-output/cpu-1/time-range-0/cmd.txt:perf script --cpu=1 --time=,9469.005396999 --ns
923 	parallel-perf-output/cpu-1/time-range-1/cmd.txt:perf script --cpu=1 --time=9469.005397000, --ns
925 Subdivisions of time range, and cpus if the --per-cpu option is used, are
926 expressed by the --time and --cpu perf script options respectively. If the
927 supplied perf script command has a --time option, then that time range is
929 'time of last sample' is used (refer perf script --header-only). Similarly, the
930 supplied perf script command may provide a --cpu option, and only those CPUs
933 To prevent time intervals becoming too small, the --min-interval option can
940 double-quick (--itrace=qqi) samples, and choosing time ranges that encompass
942 the same for each CPU processed. For Intel PT, --per-cpu is the default, but
943 that can be overridden by --no-per-cpu. Note, for Intel PT, double-quick
946 perf Intel PT documentation). The minimum number of double-quick samples that
947 will define a time range can be set by the --min_size option, which defaults to
950 …ap.add_argument("-o", "--output-dir", default="parallel-perf-output", help="output directory (defa…
951 …ap.add_argument("-j", "--jobs", type=int, default=0, help="maximum number of jobs to run in parall…
952 …ap.add_argument("-n", "--nr", type=int, default=0, help="number of time subdivisions (default is t…
953 …ap.add_argument("-i", "--interval", type=float, default=0, help="subdivide the time range using th…
954 …ap.add_argument("-c", "--per-cpu", action="store_true", help="process data for each CPU in paralle…
955 …ap.add_argument("-m", "--min-interval", type=float, default=glb_min_interval, help=f"minimum inter…
956 	ap.add_argument("-p", "--pipe-to", help="command to pipe output to (optional)")
957 …ap.add_argument("-N", "--no-per-cpu", action="store_true", help="do not process data for each CPU …
958 …ap.add_argument("-b", "--min_size", type=int, default=glb_min_samples, help="minimum data size (fo…
959 …ap.add_argument("-D", "--dry-run", action="store_true", help="do not run any jobs, just show the p…
960 …ap.add_argument("-q", "--quiet", action="store_true", help="do not print any messages except error…
961 	ap.add_argument("-v", "--verbose", action="store_true", help="print more messages")
962 	ap.add_argument("-d", "--debug", action="store_true", help="print debugging messages")
965 		split_pos = cmd_line.index("--")
979 			raise Exception("Command line must contain '--' before perf command")