From 9fae8fcdff03aeaab39f418f2ca71e9e83957ddb Mon Sep 17 00:00:00 2001 From: Ansha Yu Date: Thu, 30 Aug 2018 22:38:42 -0700 Subject: [PATCH] framework for committed serialized tests (#10594) Summary: Generate serialized test inputs/outputs/backward graphs of tests inside `caffe2/python/operator_test` that call assertSerializedOperatorCheck(). Tests should be decorated with serialized_test.collect_tests.given_and_seeded to run hypothesis tests that are actually random and a single fixed seeded hypothesis tests. To use: 1. Refactor your test to be a SerializedTestCase 1a. Decorate it with given_and_seeded 1b. Call testWithArgs in main 2. Run your test with -g to generate the output. Check it in. 3. Subsequent runs of the test without generating the output will check against the checked in test case. Details: Run your test with `python caffe2/python/operator_test/[your_test].py -g` Outputs are in `caffe2/python/serialized_test/data`. The operator tests outputs are in a further subdirectory `operator_test`, to allow for other tests in the future (model zoo tests?) Currently, we've only refactored weighted_sum_test to use this, but in the next diff, we'll refactor as many as possible. The directory structure may also change as usually there are multiple tests in a single file, so we may create more structure to account for that. Pull Request resolved: https://github.com/pytorch/pytorch/pull/10594 Reviewed By: ezyang Differential Revision: D9370359 Pulled By: ajyu fbshipit-source-id: 2ce77389cd8bcc0255d3bccd61569833e545ede8 --- .jenkins/caffe2/test.sh | 4 +- caffe2/python/gradient_checker.py | 8 +- caffe2/python/hypothesis_test_util.py | 33 +++ caffe2/python/operator_test/__init__.py | 0 .../python/operator_test/weighted_sum_test.py | 18 +- caffe2/python/serialized_test/README.md | 12 + caffe2/python/serialized_test/__init__.py | 0 .../gradient_0.pb | Bin 0 -> 67 bytes .../inputs.npz | Bin 0 -> 390 bytes .../operator_0.pb | Bin 0 -> 43 bytes .../outputs.npz | Bin 0 -> 234 bytes .../serialized_test/serialized_test_util.py | 208 ++++++++++++++++++ 12 files changed, 274 insertions(+), 9 deletions(-) create mode 100644 caffe2/python/operator_test/__init__.py create mode 100644 caffe2/python/serialized_test/README.md create mode 100644 caffe2/python/serialized_test/__init__.py create mode 100644 caffe2/python/serialized_test/data/operator_test/weighted_sum_test.test_weighted_sum/gradient_0.pb create mode 100644 caffe2/python/serialized_test/data/operator_test/weighted_sum_test.test_weighted_sum/inputs.npz create mode 100644 caffe2/python/serialized_test/data/operator_test/weighted_sum_test.test_weighted_sum/operator_0.pb create mode 100644 caffe2/python/serialized_test/data/operator_test/weighted_sum_test.test_weighted_sum/outputs.npz create mode 100644 caffe2/python/serialized_test/serialized_test_util.py diff --git a/.jenkins/caffe2/test.sh b/.jenkins/caffe2/test.sh index 40e3e21417b..b0f9c413826 100755 --- a/.jenkins/caffe2/test.sh +++ b/.jenkins/caffe2/test.sh @@ -49,7 +49,7 @@ fi mkdir -p $TEST_DIR/{cpp,python} -cd ${INSTALL_PREFIX} +cd "${WORKSPACE}" # C++ tests echo "Running C++ tests.." @@ -137,6 +137,8 @@ echo "Running Python tests.." "$CAFFE2_PYPATH/python" \ "${EXTRA_TESTS[@]}" +cd ${INSTALL_PREFIX} + if [[ -n "$INTEGRATED" ]]; then pip install --user torchvision "$ROOT_DIR/scripts/onnx/test.sh" diff --git a/caffe2/python/gradient_checker.py b/caffe2/python/gradient_checker.py index 6b93aeceb88..f4aaf9297d3 100644 --- a/caffe2/python/gradient_checker.py +++ b/caffe2/python/gradient_checker.py @@ -11,6 +11,11 @@ from caffe2.python import core, workspace, net_drawer from caffe2.proto import caffe2_pb2 +def getGradientForOp(op): + return core.GradientRegistry.GetGradientForOp( + op, [s + '_grad' for s in op.output]) + + def _get_grad_blob(grad_map, input_to_check): grad_blob = grad_map[input_to_check] @@ -257,8 +262,7 @@ class GradientChecker: if grad_ops is None: # TODO(jiayq): use the gradient registration instead of the old # hack. - grad_ops, g_input = core.GradientRegistry.GetGradientForOp( - op, [s + '_grad' for s in op.output]) + grad_ops, g_input = getGradientForOp(op) dims_to_check = inputs[input_to_check].size _input_device_options = input_device_options or \ diff --git a/caffe2/python/hypothesis_test_util.py b/caffe2/python/hypothesis_test_util.py index f640f6db20e..5cc18f99bd9 100644 --- a/caffe2/python/hypothesis_test_util.py +++ b/caffe2/python/hypothesis_test_util.py @@ -318,6 +318,38 @@ def runOpBenchmark( return ret +def runOpOnInput( + device_option, + op, + inputs, + input_device_options=None, +): + op = copy.deepcopy(op) + op.device_option.CopyFrom(device_option) + + with temp_workspace(): + if (len(op.input) > len(inputs)): + raise ValueError( + 'must supply an input for each input on the op: %s vs %s' % + (op.input, inputs)) + _input_device_options = input_device_options or \ + core.InferOpBlobDevicesAsDict(op)[0] + for (n, b) in zip(op.input, inputs): + workspace.FeedBlob( + n, + b, + device_option=_input_device_options.get(n, device_option) + ) + workspace.RunOperatorOnce(op) + outputs_to_check = list(range(len(op.output))) + outs = [] + for output_index in outputs_to_check: + output_blob_name = op.output[output_index] + output = workspace.FetchBlob(output_blob_name) + outs.append(output) + return outs + + class HypothesisTestCase(test_util.TestCase): """ A unittest.TestCase subclass with some helper functions for @@ -594,6 +626,7 @@ class HypothesisTestCase(test_util.TestCase): op, inputs, reference_outputs, output_to_grad, grad_reference, threshold=threshold) + return outs def assertValidationChecks( diff --git a/caffe2/python/operator_test/__init__.py b/caffe2/python/operator_test/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/caffe2/python/operator_test/weighted_sum_test.py b/caffe2/python/operator_test/weighted_sum_test.py index 9fe052612cd..007f208de5e 100644 --- a/caffe2/python/operator_test/weighted_sum_test.py +++ b/caffe2/python/operator_test/weighted_sum_test.py @@ -4,20 +4,22 @@ from __future__ import print_function from __future__ import unicode_literals from caffe2.python import core -from hypothesis import given import caffe2.python.hypothesis_test_util as hu +import caffe2.python.serialized_test.serialized_test_util as serial import hypothesis.strategies as st import numpy as np -class TestWeightedSumOp(hu.HypothesisTestCase): +class TestWeightedSumOp(serial.SerializedTestCase): - @given(n=st.integers(5, 8), m=st.integers(1, 1), - d=st.integers(2, 4), grad_on_w=st.booleans(), - **hu.gcs_cpu_only) - def test_weighted_sum(self, n, m, d, grad_on_w, gc, dc): + @serial.given_and_seeded( + n=st.integers(5, 8), m=st.integers(1, 1), d=st.integers(2, 4), + grad_on_w=st.booleans(), seed=st.integers(min_value=0, max_value=65535), + **hu.gcs_cpu_only) + def test_weighted_sum(self, n, m, d, grad_on_w, seed, gc, dc): input_names = [] input_vars = [] + np.random.seed(seed) for i in range(m): X_name = 'X' + str(i) w_name = 'w' + str(i) @@ -59,3 +61,7 @@ class TestWeightedSumOp(hu.HypothesisTestCase): outputs_to_check=i, outputs_with_grads=[0], ) + + +if __name__ == "__main__": + serial.testWithArgs() diff --git a/caffe2/python/serialized_test/README.md b/caffe2/python/serialized_test/README.md new file mode 100644 index 00000000000..00d104d309f --- /dev/null +++ b/caffe2/python/serialized_test/README.md @@ -0,0 +1,12 @@ +# Serialized operator test framework + +Major functionality lives in `serialized_test_util.py` + +## How to use +1. Extend the test case class from `SerializedTestCase` +2. Change the `@given` decorator to `@given_and_seeded`. This runs a seeded hypothesis test instance which will generate outputs if desired in addition to the unseeded hypothesis tests normally run. +3. Change a call to `unittest.main()` in `__main__` to `testWithArgs`. +4. Run your test `python caffe2/python/operator_test/my_test.py -g` to generate serialized outputs. They will live in `caffe2/python/serialized_test/data/operator_test`, one folder per test function +5. Thereafter, runs of the test without the flag will load serialized outputs and gradient operators for comparison against the seeded run. If for any reason the seeded run's inputs are different (this can happen with different hypothesis versions or different setups), then we'll run the serialized inputs through the serialized operator to get a runtime output for comparison. + +If we'd like to extend the test framework beyond that for operator tests, we can create a new subfolder for them inside `caffe2/python/serialized_test/data`. diff --git a/caffe2/python/serialized_test/__init__.py b/caffe2/python/serialized_test/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/caffe2/python/serialized_test/data/operator_test/weighted_sum_test.test_weighted_sum/gradient_0.pb b/caffe2/python/serialized_test/data/operator_test/weighted_sum_test.test_weighted_sum/gradient_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..ba59745bd14a7ba83499096655b8daf060377400 GIT binary patch literal 67 zcmd;Li;Pb%N=)HmiZI|}DmM^fk1zlWNHHi0ho@$yXOyI-1efNz1Jz`v=9Osia&dyx Q$LHt8mrF1hF?cWn0Bc?nyZ`_I literal 0 HcmV?d00001 diff --git a/caffe2/python/serialized_test/data/operator_test/weighted_sum_test.test_weighted_sum/inputs.npz b/caffe2/python/serialized_test/data/operator_test/weighted_sum_test.test_weighted_sum/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..3f35572017ab82066d16477130bd4512b1aaca86 GIT binary patch literal 390 zcmWIWW@Zs#U|`^2(3&CU%Og9rTY!;)p&N*~fTEds1*Ij$dU*wvv)_onk`_35BH&oS z+If?r=EN+?Te=|b@%nX(7tEU$9}+ZWhWPyWDU-N_%Dm5L`muJ2r>lr9VfrcMW;M%- zYb)0jhIMSqp6t1Fr^##YlRt|>_TITPXVRG?ixwU6TK8kol_!7pJesuV(i5R|PHr7q z>-LCr$$)ln;4(OYBk{#Ns5bL9ZW)otd*8p*x$ z=g+@hufQOD@0+UU+>Na{Qj$HA9E)!_x^1j7-kQqrb@@z(uf+{tZ!dV2&hU!=>Wv+r zT4Oh!bFsemHS^oj2JLz1-yC4a`W(w zSqCplo?2vYDf(jCl}ReL3<2JZOd`y1RM)k zJ8x3doR}qfOBdu#PFu8Y@q&5N;zNR_%n+X+KV=e^P`URRO+VHy@pKijB}_l1+N`*i zS#i0|vf|px^@MTVzop`Dba-dI;d%G_W_}>g=IPreY