From d5f8c8f3babd27bd448aeafcc4e890098bf31b17 Mon Sep 17 00:00:00 2001
From: Edward Yang <ezyang@fb.com>
Date: Thu, 12 Mar 2020 20:22:23 -0700
Subject: [PATCH] Revert D20121169: [pytorch][PR] ONNX Export Support for
 CrossEntropyLoss

Test Plan: revert-hammer

Differential Revision:
D20121169

Original commit changeset: 7b56617e8c60

fbshipit-source-id: d7f302d1e54f3c978c3be0a0ad1ee600790a5b27
---
 aten/src/ATen/core/interned_strings.h         |   3 -
 caffe2/python/onnx/tests/onnx_backend_test.py |  13 --
 ...tOperators.test_softmaxcrossentropy.expect |  60 ------
 ...st_softmaxcrossentropy_ignore_index.expect | 178 ------------------
 test/onnx/test_operators.py                   |  10 -
 test/onnx/test_pytorch_onnx_onnxruntime.py    |  57 ------
 third_party/onnx                              |   2 +-
 torch/csrc/jit/passes/onnx/peephole.cpp       |  29 ---
 8 files changed, 1 insertion(+), 351 deletions(-)
 delete mode 100644 test/onnx/expect/TestOperators.test_softmaxcrossentropy.expect
 delete mode 100644 test/onnx/expect/TestOperators.test_softmaxcrossentropy_ignore_index.expect

diff --git a/aten/src/ATen/core/interned_strings.h b/aten/src/ATen/core/interned_strings.h
index 2e98dd47795..e868985b158 100644
--- a/aten/src/ATen/core/interned_strings.h
+++ b/aten/src/ATen/core/interned_strings.h
@@ -221,9 +221,6 @@ namespace c10 {
   _(onnx, SequenceInsert)            \
   _(onnx, ConcatFromSequence)        \
   _(onnx, Identity)                  \
-  _(onnx, SoftmaxCrossEntropyLoss)   \
-  _(onnx, NegativeLogLikelihoodLoss) \
-  _(onnx, LogSoftmax)                \
   FORALL_ATTR_BASE_SYMBOLS(_)        \
   _(attr, Subgraph)                  \
   _(attr, ReverseSubgraph)           \
diff --git a/caffe2/python/onnx/tests/onnx_backend_test.py b/caffe2/python/onnx/tests/onnx_backend_test.py
index 4dc130c98d8..45bf6762f9d 100644
--- a/caffe2/python/onnx/tests/onnx_backend_test.py
+++ b/caffe2/python/onnx/tests/onnx_backend_test.py
@@ -96,18 +96,6 @@ backend_test.exclude(r'(test_hardsigmoid'  # Does not support Hardsigmoid.
                      '|test_.*negative_ind.*'  # negative axis is not supported yet
                      '|test_argmax_.*select_last_index.*'  # unsupported case
                      '|test_argmin_.*select_last_index_.*'  # unsupported case
-                     '|test_gradient_of_add_and_mul_cpu'  # cannot translate Gradient op
-                     '|test_celu_cpu'  # cannot translate Celu op
-                     '|test_celu_expanded_cpu'
-                     '|test_gradient_of_add_cpu' # cannot translate Gradient op
-                     '|test_greater_equal.*'
-                     '|test_min_.*_cpu'  # Tensor type mismatch
-                     '|test_max_.*_cpu'  # Tensor type mismatch
-                     '|test_less_equal.*'
-                     '|test_inverse.*'
-                     '|test_softmax_cross_entropy.*'
-                     '|test_mean_square_distance.*'
-                     '|test_unfoldtodepth.*'
                      ')')
 
 # Quick patch to unbreak master CI, is working on the debugging.
@@ -130,7 +118,6 @@ if 'JENKINS_URL' in os.environ:
     backend_test.exclude(r'(test_vgg19|test_vgg)')
 
 # import all test cases at global scope to make them visible to python.unittest
-print(backend_test)
 globals().update(backend_test
                  .enable_report()
                  .test_cases)
diff --git a/test/onnx/expect/TestOperators.test_softmaxcrossentropy.expect b/test/onnx/expect/TestOperators.test_softmaxcrossentropy.expect
deleted file mode 100644
index c6de0f9d58d..00000000000
--- a/test/onnx/expect/TestOperators.test_softmaxcrossentropy.expect
+++ /dev/null
@@ -1,60 +0,0 @@
-ir_version: 6
-producer_name: "pytorch"
-producer_version: "1.4"
-graph {
-  node {
-    input: "input"
-    input: "target"
-    output: "2"
-    name: "SoftmaxCrossEntropyLoss_0"
-    op_type: "SoftmaxCrossEntropyLoss"
-    attribute {
-      name: "reduction"
-      s: "mean"
-      type: STRING
-    }
-  }
-  name: "torch-jit-export"
-  input {
-    name: "input"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 3
-          }
-          dim {
-            dim_value: 5
-          }
-        }
-      }
-    }
-  }
-  input {
-    name: "target"
-    type {
-      tensor_type {
-        elem_type: 7
-        shape {
-          dim {
-            dim_value: 3
-          }
-        }
-      }
-    }
-  }
-  output {
-    name: "2"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-        }
-      }
-    }
-  }
-}
-opset_import {
-  version: 12
-}
diff --git a/test/onnx/expect/TestOperators.test_softmaxcrossentropy_ignore_index.expect b/test/onnx/expect/TestOperators.test_softmaxcrossentropy_ignore_index.expect
deleted file mode 100644
index 68965c57bd9..00000000000
--- a/test/onnx/expect/TestOperators.test_softmaxcrossentropy_ignore_index.expect
+++ /dev/null
@@ -1,178 +0,0 @@
-ir_version: 6
-producer_name: "pytorch"
-producer_version: "1.4"
-graph {
-  node {
-    input: "input"
-    input: "target"
-    output: "2"
-    name: "SoftmaxCrossEntropyLoss_0"
-    op_type: "SoftmaxCrossEntropyLoss"
-    attribute {
-      name: "reduction"
-      s: "none"
-      type: STRING
-    }
-  }
-  node {
-    input: "2"
-    output: "3"
-    name: "Shape_1"
-    op_type: "Shape"
-  }
-  node {
-    input: "3"
-    output: "4"
-    name: "ConstantOfShape_2"
-    op_type: "ConstantOfShape"
-    attribute {
-      name: "value"
-      t {
-        dims: 1
-        data_type: 1
-        raw_data: "\000\000\000\000"
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    output: "5"
-    name: "Constant_3"
-    op_type: "Constant"
-    attribute {
-      name: "value"
-      t {
-        data_type: 7
-        raw_data: "\001\000\000\000\000\000\000\000"
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    input: "target"
-    input: "5"
-    output: "6"
-    name: "Equal_4"
-    op_type: "Equal"
-  }
-  node {
-    input: "6"
-    input: "4"
-    input: "2"
-    output: "7"
-    name: "Where_5"
-    op_type: "Where"
-  }
-  node {
-    input: "7"
-    output: "8"
-    name: "ReduceSum_6"
-    op_type: "ReduceSum"
-  }
-  node {
-    input: "target"
-    output: "9"
-    name: "Shape_7"
-    op_type: "Shape"
-  }
-  node {
-    input: "9"
-    output: "10"
-    name: "ConstantOfShape_8"
-    op_type: "ConstantOfShape"
-    attribute {
-      name: "value"
-      t {
-        dims: 1
-        data_type: 1
-        raw_data: "\000\000\000\000"
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    input: "target"
-    output: "11"
-    name: "Shape_9"
-    op_type: "Shape"
-  }
-  node {
-    input: "11"
-    output: "12"
-    name: "ConstantOfShape_10"
-    op_type: "ConstantOfShape"
-    attribute {
-      name: "value"
-      t {
-        dims: 1
-        data_type: 1
-        raw_data: "\000\000\200?"
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    input: "6"
-    input: "10"
-    input: "12"
-    output: "13"
-    name: "Where_11"
-    op_type: "Where"
-  }
-  node {
-    input: "13"
-    output: "14"
-    name: "ReduceSum_12"
-    op_type: "ReduceSum"
-  }
-  node {
-    input: "8"
-    input: "14"
-    output: "15"
-    name: "Div_13"
-    op_type: "Div"
-  }
-  name: "torch-jit-export"
-  input {
-    name: "input"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 3
-          }
-          dim {
-            dim_value: 5
-          }
-        }
-      }
-    }
-  }
-  input {
-    name: "target"
-    type {
-      tensor_type {
-        elem_type: 7
-        shape {
-          dim {
-            dim_value: 3
-          }
-        }
-      }
-    }
-  }
-  output {
-    name: "15"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-        }
-      }
-    }
-  }
-}
-opset_import {
-  version: 12
-}
diff --git a/test/onnx/test_operators.py b/test/onnx/test_operators.py
index 923a4b14fe1..b9e8b9da433 100644
--- a/test/onnx/test_operators.py
+++ b/test/onnx/test_operators.py
@@ -861,16 +861,6 @@ class TestOperators(TestCase):
         x = torch.randn(2, 3, 5, 5, device=torch.device('cpu'))
         self.assertONNX(lambda x: torch.det(x), x, opset_version=11)
 
-    def test_softmaxcrossentropy(self):
-        x = torch.randn(3, 5)
-        y = torch.empty(3, dtype=torch.long).random_(5)
-        self.assertONNX(torch.nn.CrossEntropyLoss(), (x, y), opset_version=12)
-
-    def test_softmaxcrossentropy_ignore_index(self):
-        x = torch.randn(3, 5)
-        y = torch.empty(3, dtype=torch.long).random_(5)
-        self.assertONNX(torch.nn.CrossEntropyLoss(ignore_index=1), (x, y), opset_version=12)
-
 
 if __name__ == '__main__':
     no_onnx_dep_flag = '--no-onnx'
diff --git a/test/onnx/test_pytorch_onnx_onnxruntime.py b/test/onnx/test_pytorch_onnx_onnxruntime.py
index b5426f3cb92..6740c243894 100644
--- a/test/onnx/test_pytorch_onnx_onnxruntime.py
+++ b/test/onnx/test_pytorch_onnx_onnxruntime.py
@@ -2692,63 +2692,6 @@ class TestONNXRuntime(unittest.TestCase):
         x = torch.randn(3, 4)
         self.run_test(EinsumModelTranspose(), input=(x,))
 
-    @unittest.skip("Enable this once ORT version is updated")
-    @skipIfUnsupportedMinOpsetVersion(12)
-    def test_crossentropyloss(self):
-        class CrossEntropyLossNone(torch.nn.Module):
-            def forward(self, input, target):
-                loss = torch.nn.CrossEntropyLoss(reduction='none')
-                return loss(input, target)
-
-        x = torch.randn(3, 5)
-        y = torch.empty(3, dtype=torch.long).random_(5)
-        self.run_test(CrossEntropyLossNone(), input=(x, y))
-
-        class CrossEntropyLossNoneWeight(torch.nn.Module):
-            def forward(self, input, target):
-                loss = torch.nn.CrossEntropyLoss(reduction='none', weight=torch.randn(5))
-                return loss(input, target)
-
-        x = torch.randn(3, 5)
-        y = torch.empty(3, dtype=torch.long).random_(5)
-        self.run_test(CrossEntropyLossNoneWeight(), input=(x, y))
-
-        class CrossEntropyLossSum(torch.nn.Module):
-            def forward(self, input, target):
-                loss = torch.nn.CrossEntropyLoss(reduction='sum')
-                return loss(input, target)
-
-        x = torch.randn(3, 5, 2)
-        y = torch.empty(3, 2, dtype=torch.long).random_(5)
-        self.run_test(CrossEntropyLossSum(), input=(x, y))
-
-        class CrossEntropyLossSumWeight(torch.nn.Module):
-            def forward(self, input, target, weight):
-                loss = torch.nn.CrossEntropyLoss(reduction='sum', weight=torch.randn(5))
-                return loss(input, target)
-
-        x = torch.randn(3, 5, 2)
-        y = torch.empty(3, 2, dtype=torch.long).random_(5)
-        self.run_test(CrossEntropyLossSumWeight(), input=(x, y))
-
-        class CrossEntropyLossMean(torch.nn.Module):
-            def forward(self, input, target):
-                loss = torch.nn.CrossEntropyLoss()
-                return loss(input, target)
-
-        x = torch.randn(3, 5, 2)
-        y = torch.empty(3, 2, dtype=torch.long).random_(5)
-        self.run_test(CrossEntropyLossMean(), input=(x, y))
-
-        class CrossEntropyLossMeanWeight(torch.nn.Module):
-            def forward(self, input, target, weight):
-                loss = torch.nn.CrossEntropyLoss(weight=torch.randn(5))
-                return loss(input, target)
-
-        x = torch.randn(3, 5, 2)
-        y = torch.empty(3, 2, dtype=torch.long).random_(5)
-        self.run_test(CrossEntropyLossMeanWeight(), input=(x, y))
-
     def test_empty_branch(self):
         class EmptyBranchModel(torch.jit.ScriptModule):
             @torch.jit.script_method
diff --git a/third_party/onnx b/third_party/onnx
index 4cd2538df04..9fdae4c6896 160000
--- a/third_party/onnx
+++ b/third_party/onnx
@@ -1 +1 @@
-Subproject commit 4cd2538df044d7637492ec5635bf47a3057749e9
+Subproject commit 9fdae4c68960a2d44cd1cc871c74a6a9d469fa1f
diff --git a/torch/csrc/jit/passes/onnx/peephole.cpp b/torch/csrc/jit/passes/onnx/peephole.cpp
index de29e4f8f42..a27e4b200bd 100644
--- a/torch/csrc/jit/passes/onnx/peephole.cpp
+++ b/torch/csrc/jit/passes/onnx/peephole.cpp
@@ -815,34 +815,6 @@ void removeMaxPoolUnusedOutput(Block* b) {
   }
 }
 
-// This optimization fuses LogSoftmax and NegativeLogLikelihoodLoss operators into
-// one operator: SoftmaxCrossEntropyLoss.
-static void fuseLogSoftmaxNllLoss(Block* b) {
-  for (auto it = b->nodes().begin(), end = b->nodes().end(); it != end; ++it) {
-    for (auto* child_block : it->blocks()) {
-      fuseLogSoftmaxNllLoss(child_block);
-    }
-    if (it->kind() == onnx::NegativeLogLikelihoodLoss &&
-        it->input(0)->node()->kind() == onnx::LogSoftmax) {
-      auto origLogSoftmaxNode= it->input(0)->node();
-      auto origNllLossNode = *it;
-
-      Node* softmaxCrossEntropyNode = b->owningGraph()->create(onnx::SoftmaxCrossEntropyLoss, it->outputs().size());
-      for (size_t i = 0; i < softmaxCrossEntropyNode->outputs().size(); ++i) {
-         softmaxCrossEntropyNode->outputs()[i]->copyMetadata(it->outputs()[i]);
-      }
-      softmaxCrossEntropyNode->copyAttributes(*origNllLossNode);
-      softmaxCrossEntropyNode->insertBefore(origLogSoftmaxNode);
-      softmaxCrossEntropyNode->addInput(origLogSoftmaxNode->inputs().at(0));
-      softmaxCrossEntropyNode->addInput(origNllLossNode->inputs().at(1));
-      it->replaceAllUsesWith(softmaxCrossEntropyNode);
-      it->removeAllInputs();
-      origLogSoftmaxNode->destroy();
-      continue;
-    }
-  }
-}
-
 // This optimization does ONNX-specific peephole optimizations.
 //
 // At the moment, here are the optimizations it does:
@@ -885,7 +857,6 @@ void PeepholeOptimizeONNX(std::shared_ptr<Graph>& graph, int opset_version, bool
   convertSplitToDynamic(graph->block(), opset_version);
   eraseListConstruct(graph->block(), opset_version);
   removeMaxPoolUnusedOutput(graph->block());
-  fuseLogSoftmaxNllLoss(graph->block());
 }
 
 } // namespace jit