2017-03-29 13:44:02 +00:00
|
|
|
## @package lstm_benchmark
|
|
|
|
|
# Module caffe2.python.lstm_benchmark
|
2020-09-24 00:55:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2017-03-01 07:14:11 +00:00
|
|
|
|
|
|
|
|
from caffe2.proto import caffe2_pb2
|
2017-06-29 03:03:46 +00:00
|
|
|
from caffe2.python import workspace, core, utils, rnn_cell, model_helper
|
2017-09-08 23:19:23 +00:00
|
|
|
from caffe2.python import recurrent
|
2017-03-01 07:14:11 +00:00
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
|
import numpy as np
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
import logging
|
|
|
|
|
|
|
|
|
|
logging.basicConfig()
|
|
|
|
|
log = logging.getLogger("lstm_bench")
|
|
|
|
|
log.setLevel(logging.DEBUG)
|
|
|
|
|
|
|
|
|
|
|
2017-06-29 21:33:43 +00:00
|
|
|
def generate_data(T, shape, num_labels, fixed_shape):
|
2017-03-01 07:14:11 +00:00
|
|
|
'''
|
|
|
|
|
Fill a queue with input data
|
|
|
|
|
'''
|
|
|
|
|
log.info("Generating T={} sequence batches".format(T))
|
|
|
|
|
|
|
|
|
|
generate_input_init_net = core.Net('generate_input_init')
|
|
|
|
|
queue = generate_input_init_net.CreateBlobsQueue(
|
|
|
|
|
[], "inputqueue", num_blobs=1, capacity=T,
|
|
|
|
|
)
|
2017-03-30 22:46:44 +00:00
|
|
|
label_queue = generate_input_init_net.CreateBlobsQueue(
|
|
|
|
|
[], "labelqueue", num_blobs=1, capacity=T,
|
|
|
|
|
)
|
2017-03-01 07:14:11 +00:00
|
|
|
|
|
|
|
|
workspace.RunNetOnce(generate_input_init_net)
|
|
|
|
|
generate_input_net = core.Net('generate_input')
|
2017-04-05 21:05:12 +00:00
|
|
|
|
2017-03-28 21:06:33 +00:00
|
|
|
generate_input_net.EnqueueBlobs([queue, "scratch"], ["scratch"])
|
2017-03-30 22:46:44 +00:00
|
|
|
generate_input_net.EnqueueBlobs([label_queue, "label_scr"], ["label_scr"])
|
2017-03-28 21:06:33 +00:00
|
|
|
np.random.seed(2603)
|
|
|
|
|
|
2017-06-29 20:46:17 +00:00
|
|
|
entry_counts = []
|
2017-03-28 21:06:33 +00:00
|
|
|
for t in range(T):
|
2017-05-04 23:54:43 +00:00
|
|
|
if (t % (max(10, T // 10)) == 0):
|
2017-03-28 21:06:33 +00:00
|
|
|
print("Generating data {}/{}".format(t, T))
|
|
|
|
|
# Randomize the seqlength
|
2017-05-03 03:33:08 +00:00
|
|
|
random_shape = (
|
|
|
|
|
[np.random.randint(1, shape[0])] + shape[1:]
|
2017-06-29 21:33:43 +00:00
|
|
|
if t > 0 and not fixed_shape else shape
|
2017-05-03 03:33:08 +00:00
|
|
|
)
|
2017-03-28 21:06:33 +00:00
|
|
|
X = np.random.rand(*random_shape).astype(np.float32)
|
2017-03-30 22:46:44 +00:00
|
|
|
batch_size = random_shape[1]
|
|
|
|
|
L = num_labels * batch_size
|
|
|
|
|
labels = (np.random.rand(random_shape[0]) * L).astype(np.int32)
|
2017-03-28 21:06:33 +00:00
|
|
|
workspace.FeedBlob("scratch", X)
|
2017-03-30 22:46:44 +00:00
|
|
|
workspace.FeedBlob("label_scr", labels)
|
2017-03-28 21:06:33 +00:00
|
|
|
workspace.RunNetOnce(generate_input_net.Proto())
|
2017-06-29 20:46:17 +00:00
|
|
|
entry_counts.append(random_shape[0] * random_shape[1])
|
2017-04-05 21:05:12 +00:00
|
|
|
|
2017-03-01 07:14:11 +00:00
|
|
|
log.info("Finished data generation")
|
2017-04-05 21:05:12 +00:00
|
|
|
|
2017-06-29 20:46:17 +00:00
|
|
|
return queue, label_queue, entry_counts
|
2017-03-01 07:14:11 +00:00
|
|
|
|
|
|
|
|
|
2017-04-05 21:05:12 +00:00
|
|
|
def create_model(args, queue, label_queue, input_shape):
|
2017-06-29 03:03:46 +00:00
|
|
|
model = model_helper.ModelHelper(name="LSTM_bench")
|
2017-05-04 23:54:43 +00:00
|
|
|
seq_lengths, target = \
|
2017-03-01 07:14:11 +00:00
|
|
|
model.net.AddExternalInputs(
|
|
|
|
|
'seq_lengths',
|
|
|
|
|
'target',
|
|
|
|
|
)
|
2017-05-04 23:54:43 +00:00
|
|
|
|
2017-06-29 03:03:46 +00:00
|
|
|
input_blob = model.net.DequeueBlobs(queue, "input_data")
|
|
|
|
|
labels = model.net.DequeueBlobs(label_queue, "label")
|
2017-03-01 07:14:11 +00:00
|
|
|
|
2017-05-04 23:54:43 +00:00
|
|
|
init_blobs = []
|
2017-06-16 18:21:25 +00:00
|
|
|
if args.implementation in ["own", "static", "static_dag"]:
|
|
|
|
|
T = None
|
|
|
|
|
if "static" in args.implementation:
|
|
|
|
|
assert args.fixed_shape, \
|
|
|
|
|
"Random input length is not static RNN compatible"
|
|
|
|
|
T = args.seq_length
|
|
|
|
|
print("Using static RNN of size {}".format(T))
|
|
|
|
|
|
2017-05-04 23:54:43 +00:00
|
|
|
for i in range(args.num_layers):
|
2017-06-16 18:21:25 +00:00
|
|
|
hidden_init, cell_init = model.net.AddExternalInputs(
|
|
|
|
|
"hidden_init_{}".format(i),
|
|
|
|
|
"cell_init_{}".format(i)
|
|
|
|
|
)
|
|
|
|
|
init_blobs.extend([hidden_init, cell_init])
|
2017-05-04 23:54:43 +00:00
|
|
|
|
|
|
|
|
output, last_hidden, _, last_state = rnn_cell.LSTM(
|
2017-04-05 21:05:12 +00:00
|
|
|
model=model,
|
|
|
|
|
input_blob=input_blob,
|
|
|
|
|
seq_lengths=seq_lengths,
|
2017-05-04 23:54:43 +00:00
|
|
|
initial_states=init_blobs,
|
2017-04-05 21:05:12 +00:00
|
|
|
dim_in=args.input_dim,
|
2017-05-03 03:33:08 +00:00
|
|
|
dim_out=[args.hidden_dim] * args.num_layers,
|
2017-04-05 21:05:12 +00:00
|
|
|
scope="lstm1",
|
option to recompute blobs backward pass with massive memory savings
Summary:
This diff adds an option to recurrent_net to define some cell blobs to be recomputed on backward step, and thus they don't need to be stored in the step workspace. This is done by modifying the backward step to automatically include all operators that are needed to produce the output that is to be recomputed, and by storing those blobs in a shared workspace. To enable the shared workspace, i had to modify the stepworkspaces blob to also store a forward shared workspace. Making it a class field won't work since the lifecycle of the blob does not match the lifecycle of the operator.
For basic LSTM, the performance hit is quite modest (about 15% with one setting, but your mileage might vary. For Attention models, I am sure this is beneficial as computing the attention blobs is not expensive.
For basic LSTM, the memory saving is wonderful: each forward workspace only has 4 bytes (for timestep).
I also modified the neural_mt LSTM Cells, but there is no test available, so I am not 100% sure I did it correctly. Please have a look.
Added options to LSTM, MILSTM and LSTMAttention to enable memory mode.
Reviewed By: urikz
Differential Revision: D4853890
fbshipit-source-id: d8d0e0e75a5330d174fbfa39b96d8e4e8c446baa
2017-04-11 20:00:44 +00:00
|
|
|
memory_optimization=args.memory_optimization,
|
2017-04-24 21:04:51 +00:00
|
|
|
forward_only=args.forward_only,
|
2017-04-27 18:34:45 +00:00
|
|
|
drop_states=True,
|
2017-05-04 23:54:43 +00:00
|
|
|
return_last_layer_only=True,
|
2017-06-16 18:21:25 +00:00
|
|
|
static_rnn_unroll_size=T,
|
2017-04-05 21:05:12 +00:00
|
|
|
)
|
2017-06-16 18:21:25 +00:00
|
|
|
|
|
|
|
|
if "dag" in args.implementation:
|
|
|
|
|
print("Using DAG net type")
|
|
|
|
|
model.net.Proto().type = 'dag'
|
|
|
|
|
model.net.Proto().num_workers = 4
|
2017-09-08 23:19:23 +00:00
|
|
|
|
2017-04-05 21:05:12 +00:00
|
|
|
elif args.implementation == "cudnn":
|
|
|
|
|
# We need to feed a placeholder input so that RecurrentInitOp
|
|
|
|
|
# can infer the dimensions.
|
2017-05-04 23:54:43 +00:00
|
|
|
init_blobs = model.net.AddExternalInputs("hidden_init", "cell_init")
|
2017-04-05 21:05:12 +00:00
|
|
|
model.param_init_net.ConstantFill([], input_blob, shape=input_shape)
|
2017-04-18 07:33:06 +00:00
|
|
|
output, last_hidden, _ = rnn_cell.cudnn_LSTM(
|
2017-04-05 21:05:12 +00:00
|
|
|
model=model,
|
|
|
|
|
input_blob=input_blob,
|
2017-05-04 23:54:43 +00:00
|
|
|
initial_states=init_blobs,
|
2017-04-05 21:05:12 +00:00
|
|
|
dim_in=args.input_dim,
|
|
|
|
|
dim_out=args.hidden_dim,
|
|
|
|
|
scope="cudnnlstm",
|
2017-05-03 03:33:08 +00:00
|
|
|
num_layers=args.num_layers,
|
2017-04-05 21:05:12 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
assert False, "Unknown implementation"
|
|
|
|
|
|
2017-06-29 03:03:46 +00:00
|
|
|
weights = model.net.UniformFill(labels, "weights")
|
|
|
|
|
softmax, loss = model.net.SoftmaxWithLoss(
|
2017-04-05 21:05:12 +00:00
|
|
|
[model.Flatten(output), labels, weights],
|
2017-03-30 22:46:44 +00:00
|
|
|
['softmax', 'loss'],
|
|
|
|
|
)
|
|
|
|
|
|
2017-04-24 21:04:51 +00:00
|
|
|
if not args.forward_only:
|
|
|
|
|
model.AddGradientOperators([loss])
|
2017-03-01 07:14:11 +00:00
|
|
|
|
|
|
|
|
# carry states over
|
2017-05-04 23:54:43 +00:00
|
|
|
for init_blob in init_blobs:
|
|
|
|
|
model.net.Copy(last_hidden, init_blob)
|
|
|
|
|
|
|
|
|
|
sz = args.hidden_dim
|
|
|
|
|
if args.implementation == "cudnn":
|
|
|
|
|
sz *= args.num_layers
|
|
|
|
|
workspace.FeedBlob(init_blob, np.zeros(
|
|
|
|
|
[1, args.batch_size, sz], dtype=np.float32
|
|
|
|
|
))
|
2017-09-08 23:19:23 +00:00
|
|
|
|
|
|
|
|
if args.rnn_executor:
|
|
|
|
|
for op in model.net.Proto().op:
|
|
|
|
|
if op.type.startswith('RecurrentNetwork'):
|
|
|
|
|
recurrent.set_rnn_executor_config(
|
|
|
|
|
op,
|
|
|
|
|
num_threads=args.rnn_executor_num_threads,
|
|
|
|
|
max_cuda_streams=args.rnn_executor_max_cuda_streams,
|
|
|
|
|
)
|
2017-04-05 21:05:12 +00:00
|
|
|
return model, output
|
2017-03-01 07:14:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def Caffe2LSTM(args):
|
|
|
|
|
T = args.data_size // args.batch_size
|
2017-04-05 21:05:12 +00:00
|
|
|
|
2017-03-01 07:14:11 +00:00
|
|
|
input_blob_shape = [args.seq_length, args.batch_size, args.input_dim]
|
2017-06-29 20:46:17 +00:00
|
|
|
queue, label_queue, entry_counts = generate_data(T // args.seq_length,
|
2017-03-30 22:46:44 +00:00
|
|
|
input_blob_shape,
|
2017-06-29 21:33:43 +00:00
|
|
|
args.hidden_dim,
|
|
|
|
|
args.fixed_shape)
|
2017-03-01 07:14:11 +00:00
|
|
|
|
|
|
|
|
workspace.FeedBlob(
|
|
|
|
|
"seq_lengths",
|
2017-05-03 03:33:08 +00:00
|
|
|
np.array([args.seq_length] * args.batch_size, dtype=np.int32)
|
2017-03-01 07:14:11 +00:00
|
|
|
)
|
|
|
|
|
|
2017-04-05 21:05:12 +00:00
|
|
|
model, output = create_model(args, queue, label_queue, input_blob_shape)
|
2017-03-01 07:14:11 +00:00
|
|
|
|
|
|
|
|
workspace.RunNetOnce(model.param_init_net)
|
|
|
|
|
workspace.CreateNet(model.net)
|
|
|
|
|
|
2017-06-29 20:46:17 +00:00
|
|
|
start_time = time.time()
|
2017-03-01 07:14:11 +00:00
|
|
|
num_iters = T // args.seq_length
|
2017-05-03 03:33:08 +00:00
|
|
|
total_iters = 0
|
2017-03-01 07:14:11 +00:00
|
|
|
|
|
|
|
|
# Run the Benchmark
|
2017-05-03 03:33:08 +00:00
|
|
|
log.info("------ Warming up ------")
|
|
|
|
|
workspace.RunNet(model.net.Proto().name)
|
|
|
|
|
|
|
|
|
|
if (args.gpu):
|
|
|
|
|
log.info("Memory stats:")
|
|
|
|
|
stats = utils.GetGPUMemoryUsageStats()
|
|
|
|
|
log.info("GPU memory:\t{} MB".format(stats['max_total'] / 1024 / 1024))
|
|
|
|
|
|
2017-03-01 07:14:11 +00:00
|
|
|
log.info("------ Starting benchmark ------")
|
2017-05-03 03:33:08 +00:00
|
|
|
start_time = time.time()
|
2017-06-29 20:46:17 +00:00
|
|
|
last_time = time.time()
|
|
|
|
|
for iteration in range(1, num_iters, args.iters_to_report):
|
2017-03-01 07:14:11 +00:00
|
|
|
iters_once = min(args.iters_to_report, num_iters - iteration)
|
2017-05-03 03:33:08 +00:00
|
|
|
total_iters += iters_once
|
2017-03-01 07:14:11 +00:00
|
|
|
workspace.RunNet(model.net.Proto().name, iters_once)
|
2017-04-05 21:05:12 +00:00
|
|
|
|
2017-03-01 07:14:11 +00:00
|
|
|
new_time = time.time()
|
2017-06-29 20:46:17 +00:00
|
|
|
log.info(
|
|
|
|
|
"Iter: {} / {}. Entries Per Second: {}k.".format(
|
|
|
|
|
iteration,
|
|
|
|
|
num_iters,
|
|
|
|
|
np.sum(entry_counts[iteration:iteration + iters_once]) /
|
|
|
|
|
(new_time - last_time) // 100 / 10,
|
|
|
|
|
)
|
|
|
|
|
)
|
2017-03-01 07:14:11 +00:00
|
|
|
last_time = new_time
|
|
|
|
|
|
2017-09-06 19:04:45 +00:00
|
|
|
log.info("Done. Total EPS excluding 1st iteration: {}k {}".format(
|
2017-06-29 20:46:17 +00:00
|
|
|
np.sum(entry_counts[1:]) / (time.time() - start_time) // 100 / 10,
|
2017-09-06 19:04:45 +00:00
|
|
|
" (with RNN executor)" if args.rnn_executor else "",
|
2017-03-01 07:14:11 +00:00
|
|
|
))
|
|
|
|
|
|
2017-04-19 17:33:29 +00:00
|
|
|
if (args.gpu):
|
|
|
|
|
log.info("Memory stats:")
|
|
|
|
|
stats = utils.GetGPUMemoryUsageStats()
|
|
|
|
|
log.info("GPU memory:\t{} MB".format(stats['max_total'] / 1024 / 1024))
|
|
|
|
|
if (stats['max_total'] != stats['total']):
|
|
|
|
|
log.warning(
|
|
|
|
|
"Max usage differs from current total usage: {} > {}".
|
|
|
|
|
format(stats['max_total'], stats['total'])
|
|
|
|
|
)
|
2019-12-03 04:15:54 +00:00
|
|
|
log.warning("This means that costly deallocations occurred.")
|
2017-04-19 17:33:29 +00:00
|
|
|
|
2017-05-03 03:33:08 +00:00
|
|
|
return time.time() - start_time
|
|
|
|
|
|
2017-03-01 07:14:11 +00:00
|
|
|
|
2017-06-16 18:21:25 +00:00
|
|
|
@utils.debug
|
2017-03-01 07:14:11 +00:00
|
|
|
def Benchmark(args):
|
2017-05-03 03:33:08 +00:00
|
|
|
return Caffe2LSTM(args)
|
2017-03-01 07:14:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def GetArgumentParser():
|
|
|
|
|
parser = argparse.ArgumentParser(description="LSTM benchmark.")
|
2017-03-30 22:46:44 +00:00
|
|
|
|
2017-03-01 07:14:11 +00:00
|
|
|
parser.add_argument(
|
|
|
|
|
"--hidden_dim",
|
|
|
|
|
type=int,
|
2017-04-27 18:34:45 +00:00
|
|
|
default=800,
|
2017-03-01 07:14:11 +00:00
|
|
|
help="Hidden dimension",
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--input_dim",
|
|
|
|
|
type=int,
|
|
|
|
|
default=40,
|
|
|
|
|
help="Input dimension",
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--batch_size",
|
|
|
|
|
type=int,
|
2017-06-16 18:21:25 +00:00
|
|
|
default=128,
|
2017-03-01 07:14:11 +00:00
|
|
|
help="The batch size."
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--seq_length",
|
|
|
|
|
type=int,
|
|
|
|
|
default=20,
|
2017-03-28 21:06:33 +00:00
|
|
|
help="Max sequence length"
|
2017-03-01 07:14:11 +00:00
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--data_size",
|
|
|
|
|
type=int,
|
2017-06-16 18:21:25 +00:00
|
|
|
default=1000000,
|
2017-03-01 07:14:11 +00:00
|
|
|
help="Number of data points to generate"
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--iters_to_report",
|
|
|
|
|
type=int,
|
2017-06-16 18:21:25 +00:00
|
|
|
default=20,
|
2017-03-01 07:14:11 +00:00
|
|
|
help="Number of iteration to report progress"
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--gpu",
|
|
|
|
|
action="store_true",
|
|
|
|
|
help="Run all on GPU",
|
|
|
|
|
)
|
2017-04-05 21:05:12 +00:00
|
|
|
parser.add_argument(
|
|
|
|
|
"--implementation",
|
|
|
|
|
type=str,
|
|
|
|
|
default="own",
|
2017-06-16 18:21:25 +00:00
|
|
|
help="'cudnn', 'own', 'static' or 'static_dag'",
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--fixed_shape",
|
|
|
|
|
action="store_true",
|
|
|
|
|
help=("Whether to randomize shape of input batches. "
|
|
|
|
|
"Static RNN requires fixed shape"),
|
option to recompute blobs backward pass with massive memory savings
Summary:
This diff adds an option to recurrent_net to define some cell blobs to be recomputed on backward step, and thus they don't need to be stored in the step workspace. This is done by modifying the backward step to automatically include all operators that are needed to produce the output that is to be recomputed, and by storing those blobs in a shared workspace. To enable the shared workspace, i had to modify the stepworkspaces blob to also store a forward shared workspace. Making it a class field won't work since the lifecycle of the blob does not match the lifecycle of the operator.
For basic LSTM, the performance hit is quite modest (about 15% with one setting, but your mileage might vary. For Attention models, I am sure this is beneficial as computing the attention blobs is not expensive.
For basic LSTM, the memory saving is wonderful: each forward workspace only has 4 bytes (for timestep).
I also modified the neural_mt LSTM Cells, but there is no test available, so I am not 100% sure I did it correctly. Please have a look.
Added options to LSTM, MILSTM and LSTMAttention to enable memory mode.
Reviewed By: urikz
Differential Revision: D4853890
fbshipit-source-id: d8d0e0e75a5330d174fbfa39b96d8e4e8c446baa
2017-04-11 20:00:44 +00:00
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--memory_optimization",
|
|
|
|
|
action="store_true",
|
|
|
|
|
help="Whether to use memory optimized LSTM or not",
|
2017-04-05 21:05:12 +00:00
|
|
|
)
|
2017-04-24 21:04:51 +00:00
|
|
|
parser.add_argument(
|
|
|
|
|
"--forward_only",
|
|
|
|
|
action="store_true",
|
|
|
|
|
help="Whether to run only forward pass"
|
|
|
|
|
)
|
2017-05-03 03:33:08 +00:00
|
|
|
parser.add_argument(
|
|
|
|
|
"--num_layers",
|
|
|
|
|
type=int,
|
|
|
|
|
default=1,
|
|
|
|
|
help="Number of LSTM layers. All output dimensions are going to be"
|
|
|
|
|
"of hidden_dim size",
|
|
|
|
|
)
|
2017-09-06 19:04:45 +00:00
|
|
|
parser.add_argument(
|
|
|
|
|
"--rnn_executor",
|
|
|
|
|
action="store_true",
|
|
|
|
|
help="Whether to use RNN executor"
|
|
|
|
|
)
|
2017-09-08 23:19:23 +00:00
|
|
|
parser.add_argument(
|
|
|
|
|
"--rnn_executor_num_threads",
|
|
|
|
|
type=int,
|
|
|
|
|
default=None,
|
|
|
|
|
help="Number of threads used by CPU RNN Executor"
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--rnn_executor_max_cuda_streams",
|
|
|
|
|
type=int,
|
|
|
|
|
default=None,
|
|
|
|
|
help="Maximum number of CUDA streams used by RNN executor on GPU"
|
|
|
|
|
)
|
2017-03-01 07:14:11 +00:00
|
|
|
return parser
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2017-07-01 06:35:11 +00:00
|
|
|
args, extra_args = GetArgumentParser().parse_known_args()
|
2017-03-01 07:14:11 +00:00
|
|
|
|
2017-09-06 19:04:45 +00:00
|
|
|
rnn_executor_opt = 1 if args.rnn_executor else 0
|
|
|
|
|
|
2017-04-07 04:19:35 +00:00
|
|
|
workspace.GlobalInit([
|
|
|
|
|
'caffe2',
|
|
|
|
|
'--caffe2_log_level=0',
|
2017-04-19 17:33:29 +00:00
|
|
|
'--caffe2_print_blob_sizes_at_exit=0',
|
2017-09-06 19:04:45 +00:00
|
|
|
'--caffe2_rnn_executor={}'.format(rnn_executor_opt),
|
2017-07-01 06:35:11 +00:00
|
|
|
'--caffe2_gpu_memory_tracking=1'] + extra_args)
|
2017-03-01 07:14:11 +00:00
|
|
|
|
|
|
|
|
device = core.DeviceOption(
|
2018-11-29 21:58:11 +00:00
|
|
|
workspace.GpuDeviceType if args.gpu else caffe2_pb2.CPU, 4)
|
2017-03-01 07:14:11 +00:00
|
|
|
|
|
|
|
|
with core.DeviceScope(device):
|
|
|
|
|
Benchmark(args)
|