Merge remote-tracking branch 'upstream/master' into DmlDev

2026-06-29 03:30:52 +00:00 · 2020-09-30 12:28:36 +00:00 · 2020-09-30 12:28:36 +00:00 · e237b9d7ed
commit e237b9d7ed
parent c443330e98 69dbaaa015
4 changed files with 55 additions and 8 deletions
--- a/onnxruntime/python/tools/transformers/benchmark.py
+++ b/onnxruntime/python/tools/transformers/benchmark.py
@ -286,9 +286,17 @@ def run_tensorflow(use_gpu, model_names, model_class, precision, batch_sizes, se
                input_ids = tf.constant(values, shape=(batch_size, sequence_length), dtype=tf.int32)

                try:
-                    model(input_ids, training=False)
+                    def encoder_forward():
+                        return model(input_ids, training=False)

-                    runtimes = timeit.repeat(lambda: model(input_ids, training=False), repeat=repeat_times, number=1)
+                    def encoder_decoder_forward():
+                        return model(input_ids, decoder_input_ids=input_ids, training=False)
+
+                    inference = encoder_decoder_forward if config.is_encoder_decoder else encoder_forward
+
+                    inference()
+
+                    runtimes = timeit.repeat(lambda: inference(), repeat=repeat_times, number=1)

                    result = {
                        "engine": "tensorflow",
--- a/onnxruntime/python/tools/transformers/huggingface_models.py
+++ b/onnxruntime/python/tools/transformers/huggingface_models.py
@ -89,11 +89,11 @@ MODELS = {
    "albert-xlarge-v2": (["input_ids"], 12, True, "bert"),
    "albert-xxlarge-v2": (["input_ids"], 12, True, "bert"),
    # T5
-    #"t5-small": (["input_ids"], 11, False, "bert"),
-    #"t5-base": (["input_ids"], 11, False, "bert"),
-    #"t5-large": (["input_ids"], 11, False, "bert"),
-    #"t5-3b": (["input_ids"], 11, False, "bert"),
-    #"t5-11b": (["input_ids"], 11, False, "bert"),
+    "t5-small": (["input_ids"], 12, False, "bert"),
+    "t5-base": (["input_ids"], 12, False, "bert"),
+    "t5-large": (["input_ids"], 12, True, "bert"),
+    "t5-3b": (["input_ids"], 12, True, "bert"),
+    "t5-11b": (["input_ids"], 12, True, "bert"),
    # XLM-RoBERTa
    "xlm-roberta-base": (["input_ids"], 11, False, "bert"),
    "xlm-roberta-large": (["input_ids"], 11, True, "bert"),
--- a/onnxruntime/python/tools/transformers/onnx_exporter.py
+++ b/onnxruntime/python/tools/transformers/onnx_exporter.py
@ -226,7 +226,9 @@ def load_pretrained_model(model_name, config, cache_dir, custom_model_class, is_
    transformers_module = __import__("transformers", fromlist=[model_class_name])
    model_class = getattr(transformers_module, model_class_name)

-    return model_class.from_pretrained(model_name, config=config, cache_dir=cache_dir)
+    use_cdn = False if model_name == 't5-11b' else True
+
+    return model_class.from_pretrained(model_name, config=config, cache_dir=cache_dir, use_cdn=use_cdn)


 def validate_and_optimize_onnx(model_name, use_external_data_format, model_type, onnx_dir, input_names, use_gpu,
--- a/onnxruntime/test/platform/threadpool_test.cc
+++ b/onnxruntime/test/platform/threadpool_test.cc
@ -137,6 +137,30 @@ void TestBurstScheduling(const std::string& name, int num_tasks) {
  }
 }

+void TestPoolCreation(const std::string&, int iter) {
+  // Test creating and destroying thread pools.  This can be used with Valgrind to help
+  // check for memory leaks related to the initialization and clean-up code.  For instance
+  //
+  //  valgrind --leak-check=full ./onnxruntime_test_all --gtest_filter=ThreadPoolTest.TestPoolCreation_10Iter
+  //
+  // We create #iter thread pools, and within each of them run a loop of #per_iter steps.
+  std::atomic<std::ptrdiff_t> ctr{0};
+  constexpr std::ptrdiff_t per_iter = 1024;
+  constexpr int num_threads = 4;
+  for (auto i = 0; i < iter; i++) {
+    auto tp = onnxruntime::make_unique<ThreadPool>(&onnxruntime::Env::Default(),
+                                                   onnxruntime::ThreadOptions(),
+                                                   nullptr,
+                                                   num_threads,
+                                                   true);
+    tp->ParallelFor(per_iter, 0.0,
+                    [&](std::ptrdiff_t s, std::ptrdiff_t e) {
+                      ctr += e - s;
+                    });
+  }
+  ASSERT_EQ(ctr, iter * per_iter);
+}
+
 }  // namespace

 namespace onnxruntime {
@ -253,6 +277,19 @@ TEST(ThreadPoolTest, TestBurstScheduling_65536Task) {
  // buffer tasks.
  TestBurstScheduling("TestBurstScheduling_65536Tasks", 65536);
 }
+
+TEST(ThreadPoolTest, TestPoolCreation_1Iter) {
+  TestPoolCreation("TestPoolCreation_1Iter", 1);
+}
+
+TEST(ThreadPoolTest, TestPoolCreation_10Iter) {
+  TestPoolCreation("TestPoolCreation_10Iter", 10);
+}
+
+TEST(ThreadPoolTest, TestPoolCreation_100Iter) {
+  TestPoolCreation("TestPoolCreation_100Iter", 100);
+}
+
 #ifdef _WIN32
 TEST(ThreadPoolTest, TestStackSize) {
  ThreadOptions to;