import argparse import datetime import subprocess import sys import time import torch from .runner import get_nn_runners def run_rnn( name, rnn_creator, nloops=5, seqLength=100, numLayers=1, inputSize=512, hiddenSize=512, miniBatch=64, device="cuda", seed=None, ): def run_iter(modeldef): # Forward forward_output = modeldef.forward(*modeldef.inputs) # "loss computation" and backward if modeldef.backward_setup is not None: backward_input = modeldef.backward_setup(forward_output) else: backward_input = forward_output if modeldef.backward is not None: modeldef.backward(*backward_input) # "Update" parameters if modeldef.backward is not None: with torch.no_grad(): for param in modeldef.params: param.grad.zero_() torch.cuda.synchronize() assert device == "cuda" creator_args = dict( seqLength=seqLength, numLayers=numLayers, inputSize=inputSize, hiddenSize=hiddenSize, miniBatch=miniBatch, device=device, seed=seed, ) modeldef = rnn_creator(**creator_args) [run_iter(modeldef) for _ in range(nloops)] def profile( rnns, sleep_between_seconds=1, nloops=5, internal_run=True, # Unused, get rid of this TODO seqLength=100, numLayers=1, inputSize=512, hiddenSize=512, miniBatch=64, device="cuda", seed=None, ): params = dict( seqLength=seqLength, numLayers=numLayers, inputSize=inputSize, hiddenSize=hiddenSize, miniBatch=miniBatch, device=device, seed=seed, ) for name, creator, context in get_nn_runners(*rnns): with context(): run_rnn(name, creator, nloops, **params) time.sleep(sleep_between_seconds) def system(command): """Returns (return-code, stdout, stderr)""" print(f"[system] {command}") p = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True ) output, err = p.communicate() rc = p.returncode output = output.decode("ascii") err = err.decode("ascii") return rc, output, err def describe_sizes(**sizes): # seqLength, numLayers, inputSize, hiddenSize, miniBatch return "s{}-l{}-i{}-h{}-b{}".format( sizes["seqLength"], sizes["numLayers"], sizes["inputSize"], sizes["hiddenSize"], sizes["miniBatch"], ) OUTPUT_DIR = "~/profout/" def nvprof_output_filename(rnns, **params): rnn_tag = "-".join(rnns) size_tag = describe_sizes(**params) date_tag = datetime.datetime.now().strftime("%m%d%y-%H%M") return f"{OUTPUT_DIR}prof_{rnn_tag}_{size_tag}_{date_tag}.nvvp" def nvprof(cmd, outpath): return system(f"nvprof -o {outpath} {cmd}") def full_profile(rnns, **args): profile_args = [] for k, v in args.items(): profile_args.append(f"--{k}={v}") profile_args.append(f"--rnns {' '.join(rnns)}") profile_args.append("--internal-run") outpath = nvprof_output_filename(rnns, **args) cmd = f"{sys.executable} -m fastrnns.profile {' '.join(profile_args)}" rc, stdout, stderr = nvprof(cmd, outpath) if rc != 0: raise RuntimeError(f"stderr: {stderr}\nstdout: {stdout}") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Profile RNNs") parser.add_argument("--seqLength", default="100", type=int) parser.add_argument("--numLayers", default="1", type=int) parser.add_argument("--inputSize", default="512", type=int) parser.add_argument("--hiddenSize", default="512", type=int) parser.add_argument("--miniBatch", default="64", type=int) parser.add_argument( "--sleep-between-seconds", "--sleep_between_seconds", default="1", type=int ) parser.add_argument("--nloops", default="5", type=int) parser.add_argument("--rnns", nargs="*", help="What to run. cudnn, aten, jit, etc") # if internal_run, we actually run the rnns. # if not internal_run, we shell out to nvprof with internal_run=T parser.add_argument( "--internal-run", "--internal_run", default=False, action="store_true", help="Don't use this", ) args = parser.parse_args() if args.rnns is None: args.rnns = ["cudnn", "aten", "jit"] print(args) if args.internal_run: profile(**vars(args)) else: full_profile(**vars(args))