From bdd7bd084c8b3b82ca6641ebbd104b66481885ec Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 20 Mar 2023 09:43:56 -0700 Subject: [PATCH] Remove the use of eval in test code (#15097) ### Description Remove the use of `eval` in test code so we don't (1) use eval and (2) create "unused" local vars that ruff will remove. Predecessor to #15085 --- orttraining/orttraining/eager/test/ort_ops.py | 139 ++++--------- .../python/orttraining_test_ortmodule_api.py | 195 +++++++----------- 2 files changed, 121 insertions(+), 213 deletions(-) diff --git a/orttraining/orttraining/eager/test/ort_ops.py b/orttraining/orttraining/eager/test/ort_ops.py index 9f5fdfdf24..cf694432cd 100644 --- a/orttraining/orttraining/eager/test/ort_ops.py +++ b/orttraining/orttraining/eager/test/ort_ops.py @@ -3,12 +3,12 @@ # pylint: disable=missing-docstring, too-many-public-methods, no-member +import operator import unittest -import numpy as np import onnxruntime_pybind11_state as torch_ort import torch -from parameterized import parameterized, param +from parameterized import param, parameterized class OrtOpTests(unittest.TestCase): @@ -570,11 +570,6 @@ class OrtOpTests(unittest.TestCase): # for floor and erf, the ort produces a roundoff error for NaN input, but cpu keeps it a NaN. # Thus, we use nan_to_num to ensure actual numbers are passed in. - # As many of the following use eval and make it appear to pylint that there are many unused variables, - # we disable those warnings - - # pylint: disable=eval-used, unused-argument, unused-variable, no-self-argument, - ops = [ ["abs", torch.tensor([-1, -2, 3, -6, -7])], ["acos"], @@ -616,20 +611,20 @@ class OrtOpTests(unittest.TestCase): # @parameterized.expand generate test methods for ops and using name_func we renaming the test to be test_{ops} @parameterized.expand(ops, name_func=rename_func) def test_op(self, test_name, tensor_test=torch.rand(6)): - # compile eval- creates a code object that evaluates the operator (for example torch.abs(tensor_test)) and returns its result. - cpu_result = eval(compile("torch." + test_name + "(tensor_test)", "", "eval")) - ort_result = eval(compile("torch." + test_name + "(tensor_test.to(self.get_device()))", "", "eval")) + cpu_result = getattr(torch, test_name)(tensor_test) + ort_result = getattr(torch, test_name)(tensor_test.to(self.get_device())) + assert torch.allclose(cpu_result, ort_result.cpu(), equal_nan=True) @parameterized.expand(ops, name_func=rename_func) - def test_op_(self, test_name, tensor_test=torch.rand(6)): + def test_op_inplace(self, test_name, tensor_test=torch.rand(6)): device = self.get_device() cpu_tensor = tensor_test ort_tensor = cpu_tensor.to(device) - eval(compile("torch." + test_name + "_(cpu_tensor)", "", "eval")) - eval(compile("torch." + test_name + "_(ort_tensor)", "", "eval")) + getattr(torch, test_name + "_")(cpu_tensor) + getattr(torch, test_name + "_")(ort_tensor) assert torch.allclose(cpu_tensor, ort_tensor.cpu(), equal_nan=True) @@ -648,10 +643,8 @@ class OrtOpTests(unittest.TestCase): cpu_out_tensor = torch.tensor([], dtype=tensor_test.dtype) ort_out_tensor = cpu_out_tensor.to(device) - st_cpu = f"torch.{test_name}(cpu_tensor, out=cpu_out_tensor)" - st_ort = f"torch.{test_name}(ort_tensor, out=ort_out_tensor)" - cpu_result = eval(compile(st_cpu, "", "eval")) - ort_result = eval(compile(st_ort, "", "eval")) + cpu_result = getattr(torch, test_name)(cpu_tensor, out=cpu_out_tensor) + ort_result = getattr(torch, test_name)(ort_tensor, out=ort_out_tensor) assert torch.allclose(cpu_result, ort_result.cpu(), equal_nan=True) assert torch.allclose(cpu_out_tensor, ort_out_tensor.cpu(), equal_nan=True) @@ -670,12 +663,9 @@ class OrtOpTests(unittest.TestCase): for tensor_type in {torch.float, torch.bool}: cpu_out_tensor = torch.tensor([], dtype=tensor_type) ort_out_tensor = cpu_out_tensor.to(device) - cpu_a_b_result = eval( - compile("torch." + math_sign_ops + "(cpu_a, cpu_b, out=cpu_out_tensor)", "", "eval") - ) - ort_a_b_result = eval( - compile("torch." + math_sign_ops + "(ort_a, ort_b, out=ort_out_tensor)", "", "eval") - ) + cpu_a_b_result = getattr(torch, math_sign_ops)(cpu_a, cpu_b, out=cpu_out_tensor) + ort_a_b_result = getattr(torch, math_sign_ops)(ort_a, ort_b, out=ort_out_tensor) + assert torch.equal(cpu_a_b_result.to(device), ort_a_b_result) assert torch.equal(cpu_out_tensor, ort_out_tensor.to("cpu")) assert ort_out_tensor.dtype == tensor_type @@ -699,35 +689,15 @@ class OrtOpTests(unittest.TestCase): cpu_out_tensor = torch.tensor([], dtype=torch.bool) ort_out_tensor = cpu_out_tensor.to(device) - cpu_int_int_result = eval( - compile( - "torch." + math_sign_ops + "(cpu_tensor_int, cpu_scalar_int_lt, out=cpu_out_tensor)", "", "eval" - ) - ) - cpu_int_int_gt_result = eval( - compile("torch." + math_sign_ops + "(cpu_tensor_int, cpu_scalar_int_gt)", "", "eval") - ) - cpu_float_float_lt_result = eval( - compile("torch." + math_sign_ops + "(cpu_tensor_float, float_lt)", "", "eval") - ) - cpu_float_float_gt_result = eval( - compile("torch." + math_sign_ops + "(cpu_tensor_float, float_gt)", "", "eval") - ) + cpu_int_int_result = getattr(torch, math_sign_ops)(cpu_tensor_int, cpu_scalar_int_lt, out=cpu_out_tensor) + cpu_int_int_gt_result = getattr(torch, math_sign_ops)(cpu_tensor_int, cpu_scalar_int_gt) + cpu_float_float_lt_result = getattr(torch, math_sign_ops)(cpu_tensor_float, float_lt) + cpu_float_float_gt_result = getattr(torch, math_sign_ops)(cpu_tensor_float, float_gt) - ort_int_int_result = eval( - compile( - "torch." + math_sign_ops + "(ort_tensor_int, ort_scalar_int_lt, out=ort_out_tensor)", "", "eval" - ) - ) - ort_int_int_gt_result = eval( - compile("torch." + math_sign_ops + "(ort_tensor_int, ort_scalar_int_gt)", "", "eval") - ) - ort_float_float_lt_result = eval( - compile("torch." + math_sign_ops + "(ort_tensor_float, float_lt)", "", "eval") - ) - ort_float_float_gt_result = eval( - compile("torch." + math_sign_ops + "(ort_tensor_float, float_gt)", "", "eval") - ) + ort_int_int_result = getattr(torch, math_sign_ops)(ort_tensor_int, ort_scalar_int_lt, out=ort_out_tensor) + ort_int_int_gt_result = getattr(torch, math_sign_ops)(ort_tensor_int, ort_scalar_int_gt) + ort_float_float_lt_result = getattr(torch, math_sign_ops)(ort_tensor_float, float_lt) + ort_float_float_gt_result = getattr(torch, math_sign_ops)(ort_tensor_float, float_gt) assert torch.equal(cpu_out_tensor, ort_out_tensor.to("cpu")) assert torch.equal(cpu_int_int_result, ort_int_int_result.to("cpu")) @@ -735,88 +705,65 @@ class OrtOpTests(unittest.TestCase): assert torch.equal(cpu_float_float_lt_result, ort_float_float_lt_result.to("cpu")) assert torch.equal(cpu_float_float_gt_result, ort_float_float_gt_result.to("cpu")) - binary_ops = [ # [op, op_sign, alpha_supported] - ["add", "+", True], - ["sub", "-", True], - ["mul", "*", False], - ["div", "/", False], + binary_ops = [ # [op, op, alpha_supported] + ["add", operator.add, True], + ["sub", operator.sub, True], + ["mul", operator.mul, False], + ["div", operator.truediv, False], ] @parameterized.expand(binary_ops, name_func=rename_func) - def test_op_binary_tensor(self, binary_op, op_sign, alpha_supported): + def test_op_binary_tensor(self, binary_op, op, alpha_supported): device = self.get_device() cpu_input = torch.rand(3, 1) # use broadcasting in the second dim. ort_input = cpu_input.to(device) cpu_other = torch.rand(3, 3) ort_other = cpu_other.to(device) - # verify op_sign works - cpu_result = eval(compile("cpu_input " + op_sign + " cpu_other", "", "eval")) - ort_result = eval(compile("ort_input " + op_sign + " ort_other", "", "eval")) + # verify op works + cpu_result = op(cpu_input, cpu_other) + ort_result = op(ort_input, ort_other) assert torch.allclose(cpu_result, ort_result.cpu()) # verify torch op with out param works cpu_out_tensor = torch.tensor([]) ort_out_tensor = cpu_out_tensor.to(device) - cpu_result = eval( - compile("torch." + binary_op + "(cpu_input, cpu_other, out=cpu_out_tensor)", "", "eval") - ) - ort_result = eval( - compile("torch." + binary_op + "(ort_input, ort_other, out=ort_out_tensor)", "", "eval") - ) + cpu_result = getattr(torch, binary_op)(cpu_input, cpu_other, out=cpu_out_tensor) + ort_result = getattr(torch, binary_op)(ort_input, ort_other, out=ort_out_tensor) + assert torch.allclose(cpu_result, ort_result.cpu()) assert torch.allclose(cpu_out_tensor, ort_out_tensor.cpu()) if alpha_supported: - cpu_result = eval( - compile( - "torch." + binary_op + "(cpu_input, cpu_other, alpha=2.5, out=cpu_out_tensor)", "", "eval" - ) - ) - ort_result = eval( - compile( - "torch." + binary_op + "(ort_input, ort_other, alpha=2.5, out=ort_out_tensor)", "", "eval" - ) - ) + cpu_result = getattr(torch, binary_op)(cpu_input, cpu_other, alpha=2.5, out=cpu_out_tensor) + ort_result = getattr(torch, binary_op)(ort_input, ort_other, alpha=2.5, out=ort_out_tensor) assert torch.allclose(cpu_result, ort_result.cpu()) assert torch.allclose(cpu_out_tensor, ort_out_tensor.cpu()) @parameterized.expand(binary_ops, name_func=rename_func) - def test_op_binary_scalar(self, binary_op, op_sign, alpha_supported): + def test_op_binary_scalar(self, binary_op, op, alpha_supported): device = self.get_device() cpu_input = torch.ones(3, 3) ort_input = cpu_input.to(device) cpu_other = 3.1 ort_other = 3.1 - # verify op_sign works - cpu_result = eval(compile("cpu_input " + op_sign + " cpu_other", "", "eval")) - ort_result = eval(compile("ort_input " + op_sign + " ort_other", "", "eval")) + # verify op works + cpu_result = op(cpu_input, cpu_other) + ort_result = op(ort_input, ort_other) assert torch.allclose(cpu_result, ort_result.cpu()) # verify torch op with out param works cpu_out_tensor = torch.tensor([]) ort_out_tensor = cpu_out_tensor.to(device) - cpu_result = eval( - compile("torch." + binary_op + "(cpu_input, cpu_other, out=cpu_out_tensor)", "", "eval") - ) - ort_result = eval( - compile("torch." + binary_op + "(ort_input, ort_other, out=ort_out_tensor)", "", "eval") - ) + cpu_result = getattr(torch, binary_op)(cpu_input, cpu_other, out=cpu_out_tensor) + ort_result = getattr(torch, binary_op)(ort_input, ort_other, out=ort_out_tensor) assert torch.allclose(cpu_result, ort_result.cpu()) assert torch.allclose(cpu_out_tensor, ort_out_tensor.cpu()) if alpha_supported: - cpu_result = eval( - compile( - "torch." + binary_op + "(cpu_input, cpu_other, alpha=2.5, out=cpu_out_tensor)", "", "eval" - ) - ) - ort_result = eval( - compile( - "torch." + binary_op + "(ort_input, ort_other, alpha=2.5, out=ort_out_tensor)", "", "eval" - ) - ) + cpu_result = getattr(torch, binary_op)(cpu_input, cpu_other, alpha=2.5, out=cpu_out_tensor) + ort_result = getattr(torch, binary_op)(ort_input, ort_other, alpha=2.5, out=ort_out_tensor) assert torch.allclose(cpu_result, ort_result.cpu()) assert torch.allclose(cpu_out_tensor, ort_out_tensor.cpu()) diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py index f39d29a178..b291056f0e 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py @@ -3,19 +3,20 @@ # orttraining_test_ortmodule_api.py import copy +import inspect import itertools import math import os import pickle import random import tempfile +import time +import unittest.mock import warnings from collections import OrderedDict, namedtuple -from inspect import signature -from time import sleep -from unittest.mock import patch import _test_helpers +import numpy as np import onnx import pytest import torch @@ -380,12 +381,12 @@ def run_before_test_session(request): request.addfinalizer(remove_disable_fallback_from_env) -# TODO: This is a workaround for the problem that pytest is still cleaning up the previous test +# FIXME: This is a workaround for the problem that pytest is still cleaning up the previous test # while the next task already start. @pytest.fixture(autouse=True) def run_before_tests(): # wait for 50ms before starting the next test - sleep(0.05) + time.sleep(0.05) def _get_bert_for_sequence_classification_model( @@ -454,7 +455,7 @@ def test_forward_call_single_positional_argument(): model = NeuralNetSinglePositionalArgument(D_in, H, D_out).to(device) ort_model = ORTModule(model) # Check that the original forward signature is preserved. - assert signature(model.forward) == signature(ort_model.forward) + assert inspect.signature(model.forward) == inspect.signature(ort_model.forward) x = torch.randn(N, D_in, device=device) # Make sure model runs without any exception prediction = ort_model(x) @@ -470,7 +471,7 @@ def test_forward_call_multiple_positional_arguments(): model = NeuralNetMultiplePositionalArguments(input_size=D_in, hidden_size=H, num_classes=D_out).to(device) ort_model = ORTModule(model) # Check that the original forward signature is preserved. - assert signature(model.forward) == signature(ort_model.forward) + assert inspect.signature(model.forward) == inspect.signature(ort_model.forward) x = torch.randn(N, D_in, device=device) y = torch.randn(N, D_in, device=device) @@ -535,41 +536,42 @@ def test_forward_call_positional_and_keyword_arguments(): prediction.backward() +_ONE = torch.FloatTensor([1]) + + @pytest.mark.parametrize( - "forward_statement", + "forward_function", [ - "model(one)", - "model(x=one)", - "model(one, None, None)", - "model(one, None, z=None)", - "model(one, None)", - "model(x=one, y=one)", - "model(y=one, x=one)", - "model(y=one, z=None, x=one)", - "model(one, None, z=one)", - "model(x=one, z=one)", - "model(one, z=one)", - "model(one, z=one, y=one)", - "model(one, one, one)", - "model(one, None, one)", - "model(z=one, x=one, y=one)", - "model(z=one, x=one, y=None)", + lambda model: model(_ONE), + lambda model: model(x=_ONE), + lambda model: model(_ONE, None, None), + lambda model: model(_ONE, None, z=None), + lambda model: model(_ONE, None), + lambda model: model(x=_ONE, y=_ONE), + lambda model: model(y=_ONE, x=_ONE), + lambda model: model(y=_ONE, z=None, x=_ONE), + lambda model: model(_ONE, None, z=_ONE), + lambda model: model(x=_ONE, z=_ONE), + lambda model: model(_ONE, z=_ONE), + lambda model: model(_ONE, z=_ONE, y=_ONE), + lambda model: model(_ONE, _ONE, _ONE), + lambda model: model(_ONE, None, _ONE), + lambda model: model(z=_ONE, x=_ONE, y=_ONE), + lambda model: model(z=_ONE, x=_ONE, y=None), ], ) -def test_compare_pytorch_forward_call_positional_and_keyword_arguments(forward_statement): - one = torch.FloatTensor([1]) - +def test_compare_pytorch_forward_call_positional_and_keyword_arguments(forward_function): model = NeuralNetSimplePositionalAndKeywordArguments() - pytorch_result = eval(forward_statement + ".item()") + pytorch_result = forward_function(model).item() model = NeuralNetSimplePositionalAndKeywordArguments() model = ORTModule(model) - ortmodule_result = eval(forward_statement + ".item()") - ortmodule_result_again = eval(forward_statement + ".item()") + ortmodule_result = forward_function(model).item() + ortmodule_result_again = forward_function(model).item() assert ortmodule_result == ortmodule_result_again assert pytorch_result == ortmodule_result - prediction = eval(forward_statement).sum() + prediction = forward_function(model).sum() prediction.backward() @@ -1661,8 +1663,6 @@ def test_aten_multinomial(input_shape, num_samples, replacement): @pytest.mark.parametrize("input_shape", ([4, 2],)) def test_aten_argmax(input_shape): - import torch.nn.functional as F - class TopKGate(torch.nn.Module): def forward(self, input: torch.Tensor): indices = torch.argmax(input, dim=1) @@ -2189,7 +2189,6 @@ def test_ortmodule_inputs_with_dynamic_shape(): def test_bert_inputs_with_dynamic_shape(): - # create pytorch model with dropout disabled pt_model = _get_bert_for_sequence_classification_model( "cuda", is_training=True, hidden_dropout_prob=0.0, attention_probs_dropout_prob=0.0 @@ -2440,7 +2439,7 @@ def test_gpu_reserved_memory_with_torch_no_grad(): model_without_no_grad = ORTModule(model_without_no_grad) mem_reserved_after_export_without_torch_no_grad = 0 - with patch("torch.no_grad"): + with unittest.mock.patch("torch.no_grad"): model_without_no_grad(x, attention_mask=y, labels=z) mem_reserved_after_export_without_torch_no_grad = torch.cuda.memory_reserved(device) @@ -2608,9 +2607,7 @@ def test_exception_raised_for_custom_class_return_value_module(device): y = torch.randn(N, D_in, device=device) z = torch.randn(N, D_in, device=device) - from onnxruntime.training.ortmodule._fallback import _FallbackPolicy - - if _test_helpers.is_all_or_nothing_fallback_enabled(None, _FallbackPolicy.FALLBACK_UNSUPPORTED_DATA): + if _test_helpers.is_all_or_nothing_fallback_enabled(None, _fallback._FallbackPolicy.FALLBACK_UNSUPPORTED_DATA): # Fallback pt_out = pt_model(x, y, z) ort_out = ort_model(x, y, z) @@ -2664,9 +2661,7 @@ def test_model_with_multiple_devices_cpu_cuda(): pt_model = MultipleDeviceModel() x = torch.randn(20, 10) - from onnxruntime.training.ortmodule._fallback import _FallbackPolicy - - if _test_helpers.is_all_or_nothing_fallback_enabled(None, _FallbackPolicy.FALLBACK_UNSUPPORTED_DEVICE): + if _test_helpers.is_all_or_nothing_fallback_enabled(None, _fallback._FallbackPolicy.FALLBACK_UNSUPPORTED_DEVICE): # Fallback ort_model = ORTModule(copy.deepcopy(pt_model)) with pytest.raises(RuntimeError) as runtime_error: @@ -2695,9 +2690,8 @@ def test_model_with_multiple_devices_to_to(): pt_model = MultipleDeviceModel() x = torch.randn(20, 10) - from onnxruntime.training.ortmodule._fallback import _FallbackPolicy - if _test_helpers.is_all_or_nothing_fallback_enabled(None, _FallbackPolicy.FALLBACK_UNSUPPORTED_DEVICE): + if _test_helpers.is_all_or_nothing_fallback_enabled(None, _fallback._FallbackPolicy.FALLBACK_UNSUPPORTED_DEVICE): # Fallback with pytest.raises(RuntimeError) as runtime_error: ort_model = ORTModule(copy.deepcopy(pt_model)) @@ -2726,9 +2720,8 @@ def test_model_with_multiple_devices_to_cpu(): pt_model = MultipleDeviceModel() x = torch.randn(20, 10) - from onnxruntime.training.ortmodule._fallback import _FallbackPolicy - if _test_helpers.is_all_or_nothing_fallback_enabled(None, _FallbackPolicy.FALLBACK_UNSUPPORTED_DEVICE): + if _test_helpers.is_all_or_nothing_fallback_enabled(None, _fallback._FallbackPolicy.FALLBACK_UNSUPPORTED_DEVICE): # Fallback ort_model = ORTModule(copy.deepcopy(pt_model)) with pytest.raises(RuntimeError) as runtime_error: @@ -2757,9 +2750,8 @@ def test_model_with_multiple_devices_to_cuda(): pt_model = MultipleDeviceModel() x = torch.randn(20, 10) - from onnxruntime.training.ortmodule._fallback import _FallbackPolicy - if _test_helpers.is_all_or_nothing_fallback_enabled(None, _FallbackPolicy.FALLBACK_UNSUPPORTED_DEVICE): + if _test_helpers.is_all_or_nothing_fallback_enabled(None, _fallback._FallbackPolicy.FALLBACK_UNSUPPORTED_DEVICE): # Fallback ort_model = ORTModule(copy.deepcopy(pt_model)) with pytest.raises(RuntimeError) as runtime_error: @@ -2776,7 +2768,6 @@ def test_model_with_multiple_devices_to_cuda(): @pytest.mark.parametrize("device", ["cuda", "cuda:0", "cuda:1", "cuda:2"]) def test_model_with_different_cuda_devices(device): - # Trick to run this test in single GPU machines device_id = _utils.get_device_index(device) if device_id >= torch.cuda.device_count(): @@ -2933,7 +2924,6 @@ def test_nested_return_value_module(device): @pytest.mark.parametrize("data_device, model_device", (["cuda", "cpu"], ["cpu", "cuda"])) def test_forward_data_and_model_on_different_devices(data_device, model_device): - os.environ["ORTMODULE_SKIPCHECK_POLICY"] = "SKIP_CHECK_DISABLED" N, D_in, H, D_out = 64, 784, 500, 10 @@ -2941,13 +2931,12 @@ def test_forward_data_and_model_on_different_devices(data_device, model_device): ort_model = ORTModule(model) # When exporting the model, ensure device is same between input data and model (else pytorch will raise while exporting) x = torch.randn(N, D_in, device=model_device) - output = ort_model(x) + _ = ort_model(x) # Now that the model has been exported, feed in data from device other than the model device x = torch.randn(N, D_in, device=data_device) - from onnxruntime.training.ortmodule._fallback import ORTModuleDeviceException, _FallbackPolicy - if _test_helpers.is_all_or_nothing_fallback_enabled(None, _FallbackPolicy.FALLBACK_UNSUPPORTED_DEVICE): + if _test_helpers.is_all_or_nothing_fallback_enabled(None, _fallback._FallbackPolicy.FALLBACK_UNSUPPORTED_DEVICE): # Fallback with pytest.raises(RuntimeError) as runtime_error: ort_model(x) @@ -2956,7 +2945,7 @@ def test_forward_data_and_model_on_different_devices(data_device, model_device): ) else: # ORT backend - with pytest.raises(ORTModuleDeviceException) as runtime_error: + with pytest.raises(_fallback.ORTModuleDeviceException) as runtime_error: ort_model(x) assert ( f"Input argument to forward found on device {torch.device(x.device)}, but expected it to be on module device {ort_model._torch_module._execution_manager(ort_model._is_training())._device}." @@ -3067,7 +3056,6 @@ def test_model_wrapped_inside_torch_no_grad(): def test_model_initializer_requires_grad_changes_from_one_forward_to_next(): - os.environ["ORTMODULE_SKIPCHECK_POLICY"] = "SKIP_CHECK_DISABLED" device = "cuda" @@ -3129,7 +3117,7 @@ def test_model_with_registered_buffers(): model = NeuralNetWithRegisteredBuffer(D_in, H, D_out).to(device) ort_model = ORTModule(model) # Check that the original forward signature is preserved. - assert signature(model.forward) == signature(ort_model.forward) + assert inspect.signature(model.forward) == inspect.signature(ort_model.forward) x = torch.randn(N, D_in, device=device) # Make sure model runs without any exception output = ort_model(x) @@ -3161,7 +3149,7 @@ def test_model_with_unused_registered_buffers(): model = UnusedBufferNet(D_in, H, D_out).to(device) ort_model = ORTModule(model) # Check that the original forward signature is preserved. - assert signature(model.forward) == signature(ort_model.forward) + assert inspect.signature(model.forward) == inspect.signature(ort_model.forward) x = torch.randn(N, D_in, device=device) # Make sure model runs without any exception output = ort_model(x) @@ -3194,7 +3182,7 @@ def test_model_with_constant_and_registered_parameters(): model = NeuralNetWithRegisteredParamsWithConstant(D_in, H, D_out).to(device) ort_model = ORTModule(model) # Check that the original forward signature is preserved. - assert signature(model.forward) == signature(ort_model.forward) + assert inspect.signature(model.forward) == inspect.signature(ort_model.forward) x = torch.randn(N, D_in, device=device) # Make sure model runs without any exception output = ort_model(x) @@ -3460,7 +3448,6 @@ def test_train_eval_with_various_outputs(): def test_forward_dynamic_args(): - os.environ["ORTMODULE_SKIPCHECK_POLICY"] = "SKIP_CHECK_DISABLED" device = "cuda" @@ -3474,7 +3461,6 @@ def test_forward_dynamic_args(): # Make sure model runs without any exception for i in range(2): - # Test both train and inference mode if i % 2 == 0: model.train() @@ -3506,7 +3492,6 @@ def test_forward_dynamic_args(): def test_forward_dynamic_kwargs(): - os.environ["ORTMODULE_SKIPCHECK_POLICY"] = "SKIP_CHECK_DISABLED" one = torch.FloatTensor([1]) @@ -3515,7 +3500,6 @@ def test_forward_dynamic_kwargs(): # Make sure model runs without any exception for i in range(2): - # Test both train and inference mode if i % 2 == 0: model.train() @@ -3562,46 +3546,48 @@ def test_forward_dynamic_kwargs(): @pytest.mark.parametrize( - "forward_statement", + "forward_function", [ # Only pos_X, pos_X as positionals - "model(pos_0, pos_1)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0, pos_1), # Only pos_X, pos_X as keywords - "model(pos_0=pos_0, pos_1=pos_1)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0=pos_0, pos_1=pos_1), # pos_X + *args, pos_X as positionals - "model(pos_0, pos_1, *args)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0, pos_1, *args), # pos_X + kw_X, pos_X as positionals - "model(pos_0, pos_1, kw_0=kw_0, kw_1=kw_1)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0, pos_1, kw_0=kw_0, kw_1=kw_1), # pos_X + kw_X, pos_X as keywords - "model(pos_0=pos_0, pos_1=pos_1, kw_0=kw_0, kw_1=kw_1)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0=pos_0, pos_1=pos_1, kw_0=kw_0, kw_1=kw_1), # pos_X + kw_X, pos_X as positionals (missing kw_1) - "model(pos_0, pos_1, kw_0=kw_0)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0, pos_1, kw_0=kw_0), # pos_X + kw_X, pos_X as keywords (missing kw_1) - "model(pos_0=pos_0, pos_1=pos_1, kw_0=kw_0)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0=pos_0, pos_1=pos_1, kw_0=kw_0), # pos_X + kw_X, pos_X as positionals (missing kw_0) - "model(pos_0, pos_1, kw_1=kw_1)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0, pos_1, kw_1=kw_1), # pos_X + kw_X, pos_X as keywords (missing kw_0) - "model(pos_0=pos_0, pos_1=pos_1, kw_1=kw_1)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0=pos_0, pos_1=pos_1, kw_1=kw_1), # pos_X + kwargs, pos_X as positionals - "model(pos_0, pos_1, **kwargs)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0, pos_1, **kwargs), # pos_X + kwargs, pos_X as keywords - "model(pos_0=pos_0, pos_1=pos_1, **kwargs)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0=pos_0, pos_1=pos_1, **kwargs), # pos_X + *args + kw_X, pos_X as positionals - "model(pos_0, pos_1, *args, kw_0=kw_0, kw_1=kw_1)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0, pos_1, *args, kw_0=kw_0, kw_1=kw_1), # pos_X + *args + kw_X, pos_X as positionals (missing kw_0) - "model(pos_0, pos_1, *args, kw_1=kw_1)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0, pos_1, *args, kw_1=kw_1), # pos_X + *args + kw_X, pos_X as positionals (missing kw_1) - "model(pos_0, pos_1, *args, kw_0=kw_0)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0, pos_1, *args, kw_0=kw_0), # pos_X + *args + kwargs, pos_X as positionals - "model(pos_0, pos_1, *args, **kwargs)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0, pos_1, *args, **kwargs), # pos_X + *args + kw_X + kwargs, pos_X as positionals - "model(pos_0, pos_1, *args, kw_0=kw_0, kw_1=kw_1, **kwargs)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model( + pos_0, pos_1, *args, kw_0=kw_0, kw_1=kw_1, **kwargs + ), # pos_X + *args + kw_X + kwargs, pos_X as positionals (missing kw_0) - "model(pos_0, pos_1, *args, kw_1=kw_1, **kwargs)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0, pos_1, *args, kw_1=kw_1, **kwargs), # pos_X + *args + kw_X + kwargs, pos_X as positionals (missing kw_1) - "model(pos_0, pos_1, *args, kw_0=kw_0, **kwargs)", + lambda model, pos_0, pos_1, kw_0, kw_1, args, kwargs: model(pos_0, pos_1, *args, kw_0=kw_0, **kwargs), ], ) -def test_forward_call_kwargs_input(forward_statement): +def test_forward_call_kwargs_input(forward_function): class KwargsNet(torch.nn.Module): def __init__(self, input_size, hidden_size, num_classes): super(KwargsNet, self).__init__() @@ -3644,7 +3630,7 @@ def test_forward_call_kwargs_input(forward_statement): kwargs = {"kwargs_0": torch.randn(N, D_in, device=device), "kwargs_1": torch.randn(D_in, D_in, device=device)} # Training step - prediction = eval(forward_statement) + prediction = forward_function(model, pos_0, pos_1, kw_0, kw_1, args, kwargs) assert prediction is not None prediction = prediction.sum() prediction.backward() @@ -3669,7 +3655,6 @@ def test_repro_iscontiguous(): def test_forward_call_default_input(): - os.environ["ORTMODULE_SKIPCHECK_POLICY"] = "SKIP_CHECK_DISABLED" class UnusedNet(torch.nn.Module): @@ -3795,7 +3780,6 @@ def test_forward_call_kwargs_input_unexpected_order(): def test_forward_call_lots_None(): - os.environ["ORTMODULE_SKIPCHECK_POLICY"] = "SKIP_CHECK_DISABLED" class NoneNet(torch.nn.Module): @@ -3943,7 +3927,6 @@ def test_primitive_inputs(bool_argument, int_argument, float_argument): @pytest.mark.parametrize("bool_arguments", [(True, False), (False, True)]) def test_changing_bool_input_re_exports_model(bool_arguments): - os.environ["ORTMODULE_SKIPCHECK_POLICY"] = "SKIP_CHECK_DISABLED" class PrimitiveTypesInputNet(torch.nn.Module): @@ -4116,7 +4099,6 @@ def test_output_order(): @pytest.mark.parametrize("device", ["cuda", "cpu", None]) def test_stateless_model_specified_device(device): - N, D_in, H, D_out = 32, 784, 500, 10 pt_model = StatelessModel().to(device) ort_model = ORTModule(copy.deepcopy(pt_model)) @@ -4131,7 +4113,6 @@ def test_stateless_model_specified_device(device): def test_stateless_model_unspecified_device(): - N, D_in, H, D_out = 32, 784, 500, 10 pt_model = StatelessModel() ort_model = ORTModule(copy.deepcopy(pt_model)) @@ -4238,7 +4219,6 @@ def test_hf_save_pretrained(): def test_ortmodule_string_inputs_are_ignored(): - pt_model = MyStrNet() ort_model = ORTModule(copy.deepcopy(pt_model)) x = torch.randn(1, 2) @@ -4346,7 +4326,6 @@ def test_ortmodule_nested_list_input(): @pytest.mark.parametrize("mode", ["training", "inference"]) def test_debug_options_save_onnx_models_os_environment(mode): - device = "cuda" N, D_in, H, D_out = 64, 784, 500, 10 # Create a temporary directory for the onnx_models @@ -4370,7 +4349,6 @@ def test_debug_options_save_onnx_models_os_environment(mode): @pytest.mark.parametrize("mode", ["training", "inference"]) def test_debug_options_save_onnx_models_cwd(mode): - device = "cuda" N, D_in, H, D_out = 64, 784, 500, 10 model = NeuralNetSinglePositionalArgument(D_in, H, D_out).to(device) @@ -4395,7 +4373,6 @@ def test_debug_options_save_onnx_models_cwd(mode): def test_debug_options_save_onnx_models_validate_fail_on_non_writable_dir(): - os.environ["ORTMODULE_SAVE_ONNX_PATH"] = "/non/existent/directory" with pytest.raises(Exception) as ex_info: _ = DebugOptions(save_onnx=True, onnx_prefix="my_model") @@ -4793,7 +4770,6 @@ def test_ortmodule_setattr_ortmodule_attribute(): def test_ortmodule_setattr_signals_model_changed(): - os.environ["ORTMODULE_SKIPCHECK_POLICY"] = "SKIP_CHECK_DISABLED" class UserNet(torch.nn.Module): @@ -4928,7 +4904,6 @@ def test_ortmodule_skip_check_load_from_os_env(policy_str, policy): @pytest.mark.parametrize("is_training,deterministic", list(itertools.product([True, False], repeat=2))) def test_ortmodule_determinism_flag(is_training, deterministic): - torch.use_deterministic_algorithms(deterministic) N, D_in, H, D_out = 64, 784, 500, 10 @@ -4940,9 +4915,7 @@ def test_ortmodule_determinism_flag(is_training, deterministic): x = torch.randn(N, D_in) _ = model(x) - from onnxruntime.training.ortmodule import _are_deterministic_algorithms_enabled - - assert _are_deterministic_algorithms_enabled() is torch.are_deterministic_algorithms_enabled() + assert ortmodule_module._are_deterministic_algorithms_enabled() is torch.are_deterministic_algorithms_enabled() def test_ortmodule_gradient_builder(): @@ -5053,7 +5026,6 @@ def test_override_pytorch_exporter_kwargs_using_ortmodule_extension(): def test_ortmodule_fused_adam_optimizer_correctness(): - torch.manual_seed(8888) device = "cuda" @@ -5102,7 +5074,6 @@ def test_ortmodule_fused_adam_optimizer_correctness(): def test_ortmodule_fused_adam_optimizer_correctness_torch(): - torch.manual_seed(8888) device = "cuda" @@ -5225,13 +5196,11 @@ def test_tanh_grad(): def test__defined_from_envvar(): - from onnxruntime.training import ortmodule - os.environ["DUMMY_ORTMODULE"] = "15" - assert ortmodule._defined_from_envvar("DUMMY_ORTMODULE", 14) == 15 + assert ortmodule_module._defined_from_envvar("DUMMY_ORTMODULE", 14) == 15 os.environ["DUMMY_ORTMODULE"] = "15j" with warnings.catch_warnings(record=True) as w: - assert ortmodule._defined_from_envvar("DUMMY_ORTMODULE", 14) == 14 + assert ortmodule_module._defined_from_envvar("DUMMY_ORTMODULE", 14) == 14 assert len(w) == 1 assert issubclass(w[-1].category, UserWarning) assert "Unable to overwrite constant" in str(w[-1].message) @@ -5262,12 +5231,10 @@ def test_sigmoid_grad_opset13(): N, D_in, H, D_out = 120, 15360, 500, 15360 pt_model = NeuralNetSigmoid(D_in, H, D_out).to(device) - from onnxruntime.training import ortmodule - - old_opst_cst = ortmodule.ONNX_OPSET_VERSION + old_opst_cst = ortmodule_module.ONNX_OPSET_VERSION old_opset = os.getenv("ORTMODULE_ONNX_OPSET_VERSION", None) os.environ["ORTMODULE_ONNX_OPSET_VERSION"] = "13" - assert ortmodule.ONNX_OPSET_VERSION == 15 + assert ortmodule_module.ONNX_OPSET_VERSION == 15 ort_model = ORTModule(copy.deepcopy(pt_model)) @@ -5293,8 +5260,8 @@ def test_sigmoid_grad_opset13(): del os.environ["ORTMODULE_ONNX_OPSET_VERSION"] else: os.environ["ORTMODULE_ONNX_OPSET_VERSION"] = old_opset - assert ortmodule.ONNX_OPSET_VERSION == 13 - ortmodule.ONNX_OPSET_VERSION = old_opst_cst + assert ortmodule_module.ONNX_OPSET_VERSION == 13 + ortmodule_module.ONNX_OPSET_VERSION = old_opst_cst @pytest.mark.parametrize("opset_version", [12, 13, 14, 15]) @@ -5307,10 +5274,7 @@ def test_opset_version_change(opset_version): ort_model = ORTModule(model) - # Must import a namespace containing ONNX_OPSET_VERSION, not ONNX_OPSET_VERSION directly - from onnxruntime.training import ortmodule - - ortmodule.ONNX_OPSET_VERSION = opset_version + ortmodule_module.ONNX_OPSET_VERSION = opset_version # Make sure model runs without any exception prediction = ort_model(x) @@ -5324,7 +5288,6 @@ def test_opset_version_change(opset_version): def test_serialize_ortmodule(): - device = "cuda" N, D_in, H, D_out = 64, 784, 500, 10 pt_model = SerializationNet(D_in, H, D_out).to(device) @@ -5455,8 +5418,6 @@ def test_check_opset_is_default_opset_after_training(): def test_random_states_unchanged_for_ortmodule(): - import numpy - os.environ["ORTMODULE_FALLBACK_RETRY"] = "False" class NeuralNetSlice(torch.nn.Module): @@ -5473,8 +5434,8 @@ def test_random_states_unchanged_for_ortmodule(): if isinstance(a, tuple): assert len(a) == len(b) return all([random_state_equal(a_i, b_i) for a_i, b_i in zip(a, b)]) - if isinstance(a, numpy.ndarray): - return numpy.array_equal(a, b) + if isinstance(a, np.ndarray): + return np.array_equal(a, b) if isinstance(a, torch.Tensor): return torch.equal(a, b) return a == b