Revert D20121169: [pytorch][PR] ONNX Export Support for CrossEntropyLoss

Test Plan: revert-hammer Differential Revision: D20121169 Original commit changeset: 7b56617e8c60 fbshipit-source-id: d7f302d1e54f3c978c3be0a0ad1ee600790a5b27
2026-05-14 20:57:59 +00:00 · 2020-03-12 20:22:23 -07:00 · 2020-03-12 20:22:23 -07:00 · d5f8c8f3ba
commit d5f8c8f3ba
parent 4ae74b3b25
8 changed files with 1 additions and 351 deletions
--- a/aten/src/ATen/core/interned_strings.h
+++ b/aten/src/ATen/core/interned_strings.h
@ -221,9 +221,6 @@ namespace c10 {
  _(onnx, SequenceInsert)            \
  _(onnx, ConcatFromSequence)        \
  _(onnx, Identity)                  \
-  _(onnx, SoftmaxCrossEntropyLoss)   \
-  _(onnx, NegativeLogLikelihoodLoss) \
-  _(onnx, LogSoftmax)                \
  FORALL_ATTR_BASE_SYMBOLS(_)        \
  _(attr, Subgraph)                  \
  _(attr, ReverseSubgraph)           \
--- a/caffe2/python/onnx/tests/onnx_backend_test.py
+++ b/caffe2/python/onnx/tests/onnx_backend_test.py
@ -96,18 +96,6 @@ backend_test.exclude(r'(test_hardsigmoid'  # Does not support Hardsigmoid.
                     '|test_.*negative_ind.*'  # negative axis is not supported yet
                     '|test_argmax_.*select_last_index.*'  # unsupported case
                     '|test_argmin_.*select_last_index_.*'  # unsupported case
-                     '|test_gradient_of_add_and_mul_cpu'  # cannot translate Gradient op
-                     '|test_celu_cpu'  # cannot translate Celu op
-                     '|test_celu_expanded_cpu'
-                     '|test_gradient_of_add_cpu' # cannot translate Gradient op
-                     '|test_greater_equal.*'
-                     '|test_min_.*_cpu'  # Tensor type mismatch
-                     '|test_max_.*_cpu'  # Tensor type mismatch
-                     '|test_less_equal.*'
-                     '|test_inverse.*'
-                     '|test_softmax_cross_entropy.*'
-                     '|test_mean_square_distance.*'
-                     '|test_unfoldtodepth.*'
                     ')')

 # Quick patch to unbreak master CI, is working on the debugging.
@ -130,7 +118,6 @@ if 'JENKINS_URL' in os.environ:
    backend_test.exclude(r'(test_vgg19|test_vgg)')

 # import all test cases at global scope to make them visible to python.unittest
-print(backend_test)
 globals().update(backend_test
                 .enable_report()
                 .test_cases)
--- a/test/onnx/expect/TestOperators.test_softmaxcrossentropy.expect
+++ b/test/onnx/expect/TestOperators.test_softmaxcrossentropy.expect
@ -1,60 +0,0 @@
-ir_version: 6
-producer_name: "pytorch"
-producer_version: "1.4"
-graph {
-  node {
-    input: "input"
-    input: "target"
-    output: "2"
-    name: "SoftmaxCrossEntropyLoss_0"
-    op_type: "SoftmaxCrossEntropyLoss"
-    attribute {
-      name: "reduction"
-      s: "mean"
-      type: STRING
-    }
-  }
-  name: "torch-jit-export"
-  input {
-    name: "input"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 3
-          }
-          dim {
-            dim_value: 5
-          }
-        }
-      }
-    }
-  }
-  input {
-    name: "target"
-    type {
-      tensor_type {
-        elem_type: 7
-        shape {
-          dim {
-            dim_value: 3
-          }
-        }
-      }
-    }
-  }
-  output {
-    name: "2"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-        }
-      }
-    }
-  }
-}
-opset_import {
-  version: 12
-}
--- a/test/onnx/expect/TestOperators.test_softmaxcrossentropy_ignore_index.expect
+++ b/test/onnx/expect/TestOperators.test_softmaxcrossentropy_ignore_index.expect
@ -1,178 +0,0 @@
-ir_version: 6
-producer_name: "pytorch"
-producer_version: "1.4"
-graph {
-  node {
-    input: "input"
-    input: "target"
-    output: "2"
-    name: "SoftmaxCrossEntropyLoss_0"
-    op_type: "SoftmaxCrossEntropyLoss"
-    attribute {
-      name: "reduction"
-      s: "none"
-      type: STRING
-    }
-  }
-  node {
-    input: "2"
-    output: "3"
-    name: "Shape_1"
-    op_type: "Shape"
-  }
-  node {
-    input: "3"
-    output: "4"
-    name: "ConstantOfShape_2"
-    op_type: "ConstantOfShape"
-    attribute {
-      name: "value"
-      t {
-        dims: 1
-        data_type: 1
-        raw_data: "\000\000\000\000"
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    output: "5"
-    name: "Constant_3"
-    op_type: "Constant"
-    attribute {
-      name: "value"
-      t {
-        data_type: 7
-        raw_data: "\001\000\000\000\000\000\000\000"
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    input: "target"
-    input: "5"
-    output: "6"
-    name: "Equal_4"
-    op_type: "Equal"
-  }
-  node {
-    input: "6"
-    input: "4"
-    input: "2"
-    output: "7"
-    name: "Where_5"
-    op_type: "Where"
-  }
-  node {
-    input: "7"
-    output: "8"
-    name: "ReduceSum_6"
-    op_type: "ReduceSum"
-  }
-  node {
-    input: "target"
-    output: "9"
-    name: "Shape_7"
-    op_type: "Shape"
-  }
-  node {
-    input: "9"
-    output: "10"
-    name: "ConstantOfShape_8"
-    op_type: "ConstantOfShape"
-    attribute {
-      name: "value"
-      t {
-        dims: 1
-        data_type: 1
-        raw_data: "\000\000\000\000"
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    input: "target"
-    output: "11"
-    name: "Shape_9"
-    op_type: "Shape"
-  }
-  node {
-    input: "11"
-    output: "12"
-    name: "ConstantOfShape_10"
-    op_type: "ConstantOfShape"
-    attribute {
-      name: "value"
-      t {
-        dims: 1
-        data_type: 1
-        raw_data: "\000\000\200?"
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    input: "6"
-    input: "10"
-    input: "12"
-    output: "13"
-    name: "Where_11"
-    op_type: "Where"
-  }
-  node {
-    input: "13"
-    output: "14"
-    name: "ReduceSum_12"
-    op_type: "ReduceSum"
-  }
-  node {
-    input: "8"
-    input: "14"
-    output: "15"
-    name: "Div_13"
-    op_type: "Div"
-  }
-  name: "torch-jit-export"
-  input {
-    name: "input"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 3
-          }
-          dim {
-            dim_value: 5
-          }
-        }
-      }
-    }
-  }
-  input {
-    name: "target"
-    type {
-      tensor_type {
-        elem_type: 7
-        shape {
-          dim {
-            dim_value: 3
-          }
-        }
-      }
-    }
-  }
-  output {
-    name: "15"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-        }
-      }
-    }
-  }
-}
-opset_import {
-  version: 12
-}
--- a/test/onnx/test_operators.py
+++ b/test/onnx/test_operators.py
@ -861,16 +861,6 @@ class TestOperators(TestCase):
        x = torch.randn(2, 3, 5, 5, device=torch.device('cpu'))
        self.assertONNX(lambda x: torch.det(x), x, opset_version=11)

-    def test_softmaxcrossentropy(self):
-        x = torch.randn(3, 5)
-        y = torch.empty(3, dtype=torch.long).random_(5)
-        self.assertONNX(torch.nn.CrossEntropyLoss(), (x, y), opset_version=12)
-
-    def test_softmaxcrossentropy_ignore_index(self):
-        x = torch.randn(3, 5)
-        y = torch.empty(3, dtype=torch.long).random_(5)
-        self.assertONNX(torch.nn.CrossEntropyLoss(ignore_index=1), (x, y), opset_version=12)
-

 if __name__ == '__main__':
    no_onnx_dep_flag = '--no-onnx'
--- a/test/onnx/test_pytorch_onnx_onnxruntime.py
+++ b/test/onnx/test_pytorch_onnx_onnxruntime.py
@ -2692,63 +2692,6 @@ class TestONNXRuntime(unittest.TestCase):
        x = torch.randn(3, 4)
        self.run_test(EinsumModelTranspose(), input=(x,))

-    @unittest.skip("Enable this once ORT version is updated")
-    @skipIfUnsupportedMinOpsetVersion(12)
-    def test_crossentropyloss(self):
-        class CrossEntropyLossNone(torch.nn.Module):
-            def forward(self, input, target):
-                loss = torch.nn.CrossEntropyLoss(reduction='none')
-                return loss(input, target)
-
-        x = torch.randn(3, 5)
-        y = torch.empty(3, dtype=torch.long).random_(5)
-        self.run_test(CrossEntropyLossNone(), input=(x, y))
-
-        class CrossEntropyLossNoneWeight(torch.nn.Module):
-            def forward(self, input, target):
-                loss = torch.nn.CrossEntropyLoss(reduction='none', weight=torch.randn(5))
-                return loss(input, target)
-
-        x = torch.randn(3, 5)
-        y = torch.empty(3, dtype=torch.long).random_(5)
-        self.run_test(CrossEntropyLossNoneWeight(), input=(x, y))
-
-        class CrossEntropyLossSum(torch.nn.Module):
-            def forward(self, input, target):
-                loss = torch.nn.CrossEntropyLoss(reduction='sum')
-                return loss(input, target)
-
-        x = torch.randn(3, 5, 2)
-        y = torch.empty(3, 2, dtype=torch.long).random_(5)
-        self.run_test(CrossEntropyLossSum(), input=(x, y))
-
-        class CrossEntropyLossSumWeight(torch.nn.Module):
-            def forward(self, input, target, weight):
-                loss = torch.nn.CrossEntropyLoss(reduction='sum', weight=torch.randn(5))
-                return loss(input, target)
-
-        x = torch.randn(3, 5, 2)
-        y = torch.empty(3, 2, dtype=torch.long).random_(5)
-        self.run_test(CrossEntropyLossSumWeight(), input=(x, y))
-
-        class CrossEntropyLossMean(torch.nn.Module):
-            def forward(self, input, target):
-                loss = torch.nn.CrossEntropyLoss()
-                return loss(input, target)
-
-        x = torch.randn(3, 5, 2)
-        y = torch.empty(3, 2, dtype=torch.long).random_(5)
-        self.run_test(CrossEntropyLossMean(), input=(x, y))
-
-        class CrossEntropyLossMeanWeight(torch.nn.Module):
-            def forward(self, input, target, weight):
-                loss = torch.nn.CrossEntropyLoss(weight=torch.randn(5))
-                return loss(input, target)
-
-        x = torch.randn(3, 5, 2)
-        y = torch.empty(3, 2, dtype=torch.long).random_(5)
-        self.run_test(CrossEntropyLossMeanWeight(), input=(x, y))
-
    def test_empty_branch(self):
        class EmptyBranchModel(torch.jit.ScriptModule):
            @torch.jit.script_method
--- a/third_party/onnx
+++ b/third_party/onnx
@ -1 +1 @@
-Subproject commit 4cd2538df044d7637492ec5635bf47a3057749e9
+Subproject commit 9fdae4c68960a2d44cd1cc871c74a6a9d469fa1f
--- a/torch/csrc/jit/passes/onnx/peephole.cpp
+++ b/torch/csrc/jit/passes/onnx/peephole.cpp
@ -815,34 +815,6 @@ void removeMaxPoolUnusedOutput(Block* b) {
  }
 }

-// This optimization fuses LogSoftmax and NegativeLogLikelihoodLoss operators into
-// one operator: SoftmaxCrossEntropyLoss.
-static void fuseLogSoftmaxNllLoss(Block* b) {
-  for (auto it = b->nodes().begin(), end = b->nodes().end(); it != end; ++it) {
-    for (auto* child_block : it->blocks()) {
-      fuseLogSoftmaxNllLoss(child_block);
-    }
-    if (it->kind() == onnx::NegativeLogLikelihoodLoss &&
-        it->input(0)->node()->kind() == onnx::LogSoftmax) {
-      auto origLogSoftmaxNode= it->input(0)->node();
-      auto origNllLossNode = *it;
-
-      Node* softmaxCrossEntropyNode = b->owningGraph()->create(onnx::SoftmaxCrossEntropyLoss, it->outputs().size());
-      for (size_t i = 0; i < softmaxCrossEntropyNode->outputs().size(); ++i) {
-         softmaxCrossEntropyNode->outputs()[i]->copyMetadata(it->outputs()[i]);
-      }
-      softmaxCrossEntropyNode->copyAttributes(*origNllLossNode);
-      softmaxCrossEntropyNode->insertBefore(origLogSoftmaxNode);
-      softmaxCrossEntropyNode->addInput(origLogSoftmaxNode->inputs().at(0));
-      softmaxCrossEntropyNode->addInput(origNllLossNode->inputs().at(1));
-      it->replaceAllUsesWith(softmaxCrossEntropyNode);
-      it->removeAllInputs();
-      origLogSoftmaxNode->destroy();
-      continue;
-    }
-  }
-}
-
 // This optimization does ONNX-specific peephole optimizations.
 //
 // At the moment, here are the optimizations it does:
@ -885,7 +857,6 @@ void PeepholeOptimizeONNX(std::shared_ptr<Graph>& graph, int opset_version, bool
  convertSplitToDynamic(graph->block(), opset_version);
  eraseListConstruct(graph->block(), opset_version);
  removeMaxPoolUnusedOutput(graph->block());
-  fuseLogSoftmaxNllLoss(graph->block());
 }

 } // namespace jit