diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index 6ceafb2c06..53e80423be 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -286,9 +286,17 @@ def run_tensorflow(use_gpu, model_names, model_class, precision, batch_sizes, se input_ids = tf.constant(values, shape=(batch_size, sequence_length), dtype=tf.int32) try: - model(input_ids, training=False) + def encoder_forward(): + return model(input_ids, training=False) - runtimes = timeit.repeat(lambda: model(input_ids, training=False), repeat=repeat_times, number=1) + def encoder_decoder_forward(): + return model(input_ids, decoder_input_ids=input_ids, training=False) + + inference = encoder_decoder_forward if config.is_encoder_decoder else encoder_forward + + inference() + + runtimes = timeit.repeat(lambda: inference(), repeat=repeat_times, number=1) result = { "engine": "tensorflow", diff --git a/onnxruntime/python/tools/transformers/huggingface_models.py b/onnxruntime/python/tools/transformers/huggingface_models.py index 04c79ed3b6..91ce85e1fc 100644 --- a/onnxruntime/python/tools/transformers/huggingface_models.py +++ b/onnxruntime/python/tools/transformers/huggingface_models.py @@ -89,11 +89,11 @@ MODELS = { "albert-xlarge-v2": (["input_ids"], 12, True, "bert"), "albert-xxlarge-v2": (["input_ids"], 12, True, "bert"), # T5 - #"t5-small": (["input_ids"], 11, False, "bert"), - #"t5-base": (["input_ids"], 11, False, "bert"), - #"t5-large": (["input_ids"], 11, False, "bert"), - #"t5-3b": (["input_ids"], 11, False, "bert"), - #"t5-11b": (["input_ids"], 11, False, "bert"), + "t5-small": (["input_ids"], 12, False, "bert"), + "t5-base": (["input_ids"], 12, False, "bert"), + "t5-large": (["input_ids"], 12, True, "bert"), + "t5-3b": (["input_ids"], 12, True, "bert"), + "t5-11b": (["input_ids"], 12, True, "bert"), # XLM-RoBERTa "xlm-roberta-base": (["input_ids"], 11, False, "bert"), "xlm-roberta-large": (["input_ids"], 11, True, "bert"), diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index 0fa9e2d555..6f9ad1fc41 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -226,7 +226,9 @@ def load_pretrained_model(model_name, config, cache_dir, custom_model_class, is_ transformers_module = __import__("transformers", fromlist=[model_class_name]) model_class = getattr(transformers_module, model_class_name) - return model_class.from_pretrained(model_name, config=config, cache_dir=cache_dir) + use_cdn = False if model_name == 't5-11b' else True + + return model_class.from_pretrained(model_name, config=config, cache_dir=cache_dir, use_cdn=use_cdn) def validate_and_optimize_onnx(model_name, use_external_data_format, model_type, onnx_dir, input_names, use_gpu, diff --git a/onnxruntime/test/platform/threadpool_test.cc b/onnxruntime/test/platform/threadpool_test.cc index fda2cbbc49..34ca40b87e 100644 --- a/onnxruntime/test/platform/threadpool_test.cc +++ b/onnxruntime/test/platform/threadpool_test.cc @@ -137,6 +137,30 @@ void TestBurstScheduling(const std::string& name, int num_tasks) { } } +void TestPoolCreation(const std::string&, int iter) { + // Test creating and destroying thread pools. This can be used with Valgrind to help + // check for memory leaks related to the initialization and clean-up code. For instance + // + // valgrind --leak-check=full ./onnxruntime_test_all --gtest_filter=ThreadPoolTest.TestPoolCreation_10Iter + // + // We create #iter thread pools, and within each of them run a loop of #per_iter steps. + std::atomic ctr{0}; + constexpr std::ptrdiff_t per_iter = 1024; + constexpr int num_threads = 4; + for (auto i = 0; i < iter; i++) { + auto tp = onnxruntime::make_unique(&onnxruntime::Env::Default(), + onnxruntime::ThreadOptions(), + nullptr, + num_threads, + true); + tp->ParallelFor(per_iter, 0.0, + [&](std::ptrdiff_t s, std::ptrdiff_t e) { + ctr += e - s; + }); + } + ASSERT_EQ(ctr, iter * per_iter); +} + } // namespace namespace onnxruntime { @@ -253,6 +277,19 @@ TEST(ThreadPoolTest, TestBurstScheduling_65536Task) { // buffer tasks. TestBurstScheduling("TestBurstScheduling_65536Tasks", 65536); } + +TEST(ThreadPoolTest, TestPoolCreation_1Iter) { + TestPoolCreation("TestPoolCreation_1Iter", 1); +} + +TEST(ThreadPoolTest, TestPoolCreation_10Iter) { + TestPoolCreation("TestPoolCreation_10Iter", 10); +} + +TEST(ThreadPoolTest, TestPoolCreation_100Iter) { + TestPoolCreation("TestPoolCreation_100Iter", 100); +} + #ifdef _WIN32 TEST(ThreadPoolTest, TestStackSize) { ThreadOptions to;