diff --git a/orttraining/orttraining/eager/opgen/opgen/atenops.py b/orttraining/orttraining/eager/opgen/opgen/atenops.py index 58bb63fb97..0dd125030d 100644 --- a/orttraining/orttraining/eager/opgen/opgen/atenops.py +++ b/orttraining/orttraining/eager/opgen/opgen/atenops.py @@ -138,7 +138,8 @@ hand_implemented = { "aten::addmm": Gemm("mat1", "mat2", "self", alpha="alpha", beta="beta"), "aten::add_.Tensor": SignatureOnly(), "aten::t": Transpose("self"), - "aten::mm.out": MatMul("self", "mat2"), + # MatMul("self", "mat2"), fails since it resizes based on self but should be based on result shape of the mult + "aten::mm.out": MakeTorchFallback(), "aten::zeros_like": ConstantOfShape( Shape("self") ), # the default constant is 0, so don't need to speicify attribute @@ -169,6 +170,10 @@ hand_implemented = { "aten::argmax.out": SignatureOnly(), "aten::nonzero": Transpose(NonZero("self")), "aten::nonzero.out": SignatureOnly(), + "aten::_log_softmax.out": MakeTorchFallback(), + "aten::nll_loss_forward.output": MakeTorchFallback(), + "aten::nll_loss_backward.grad_input": MakeTorchFallback(), + "aten::_log_softmax_backward_data.out": MakeTorchFallback(), } # If the aten op expects a specific output type that differs from self diff --git a/orttraining/orttraining/eager/test_model/mnist_fc_training.py b/orttraining/orttraining/eager/test_model_OrtModule/mnist_fc_training.py similarity index 100% rename from orttraining/orttraining/eager/test_model/mnist_fc_training.py rename to orttraining/orttraining/eager/test_model_OrtModule/mnist_fc_training.py diff --git a/orttraining/orttraining/eager/test_models/mnist_fc_training.py b/orttraining/orttraining/eager/test_models/mnist_fc_training.py index 54b5e0a771..95ba3bf060 100644 --- a/orttraining/orttraining/eager/test_models/mnist_fc_training.py +++ b/orttraining/orttraining/eager/test_models/mnist_fc_training.py @@ -1,26 +1,27 @@ ## This code is from https://github.com/pytorch/examples/blob/master/mnist/main.py -## with modification to do training using onnxruntime as backend on cuda device. -## A private PyTorch build from https://aiinfra.visualstudio.com/Lotus/_git/pytorch (ORTTraining branch) is needed to run the demo. +## with modification to do training using onnxruntime as backend. -## Model testing is not complete. +# pylint: disable=missing-docstring +# pylint: disable=C0103 from __future__ import print_function + import argparse -import torch -import onnxruntime_pybind11_state as torch_ort -import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim -from torchvision import datasets, transforms -import numpy as np import os -dataset_root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data") +import onnxruntime_pybind11_state as torch_ort +import torch +import torch.nn.functional as F +from torch import nn, optim +from torchvision import datasets, transforms + +# we use the build directory so gitignore applies. +dataset_root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "build/data") class NeuralNet(nn.Module): def __init__(self, input_size, hidden_size, num_classes): - super(NeuralNet, self).__init__() + super().__init__() self.fc1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.fc2 = nn.Linear(hidden_size, num_classes) @@ -40,9 +41,10 @@ def train_with_eager(args, model, optimizer, device, train_loader, epoch): for batch_idx, (data, target) in enumerate(train_loader): data_cpu = data.reshape(data.shape[0], -1) data = data_cpu.to(device) + target_ort = target.to(device) x = model(data) - loss = my_loss(x.cpu(), target) + loss = my_loss(x, target_ort) loss.backward() optimizer.step() @@ -70,9 +72,8 @@ def main(): parser.add_argument( "--test-batch-size", type=int, default=1000, metavar="N", help="input batch size for testing (default: 1000)" ) - parser.add_argument("--epochs", type=int, default=10, metavar="N", help="number of epochs to train (default: 10)") + parser.add_argument("--epochs", type=int, default=1, metavar="N", help="number of epochs to train (default: 1)") parser.add_argument("--lr", type=float, default=0.01, metavar="LR", help="learning rate (default: 0.01)") - parser.add_argument("--no-cuda", action="store_true", default=False, help="disables CUDA training") parser.add_argument("--seed", type=int, default=1, metavar="S", help="random seed (default: 1)") parser.add_argument( "--log-interval", @@ -83,7 +84,6 @@ def main(): ) args = parser.parse_args() - use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) @@ -110,18 +110,18 @@ def main(): **kwargs, ) - device = torch.device("ort") + device_ort = torch_ort.device() input_size = 784 hidden_size = 500 num_classes = 10 - model = NeuralNet(input_size, hidden_size, num_classes) - model.to(device) - optimizer = optim.SGD(model.parameters(), lr=0.01) + model_nn = NeuralNet(input_size, hidden_size, num_classes) + model_nn.to(device_ort) + optimizer = optim.SGD(model_nn.parameters(), lr=0.01) print("\nStart Training.") for epoch in range(1, args.epochs + 1): - train_with_eager(args, model, optimizer, device, train_loader, epoch) + train_with_eager(args, model_nn, optimizer, device_ort, train_loader, epoch) if __name__ == "__main__": diff --git a/orttraining/orttraining/eager/test_models/training_test.py b/orttraining/orttraining/eager/test_models/training_test.py new file mode 100644 index 0000000000..c0103e55cf --- /dev/null +++ b/orttraining/orttraining/eager/test_models/training_test.py @@ -0,0 +1,99 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +# pylint: disable=missing-docstring +# pylint: disable=C0103 +# pylint: disable=R0903 + +# The following is a simple neural network trained and tested using FashinMINST data. +# It is using eager mode targeting the ort device. After building eager mode run +# PYTHONPATH=~/{repo root}/build/Linux/Debug python ~/{repo root}/orttraining/orttraining/eager/test/training_test.py + +import os + +import onnxruntime_pybind11_state as torch_ort +import torch +from torch import nn +from torch.utils.data import DataLoader +from torchvision import datasets +from torchvision.transforms import ToTensor + +# we copy traing data to build folder as it is gitignored +dataset_root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "build/data") +training_data = datasets.FashionMNIST(root=dataset_root_dir, train=True, download=True, transform=ToTensor()) +test_data = datasets.FashionMNIST(root=dataset_root_dir, train=False, download=True, transform=ToTensor()) + +train_dataloader = DataLoader(training_data, batch_size=64) +test_dataloader = DataLoader(test_data, batch_size=64) + +device = torch_ort.device() + + +class NeuralNetwork(nn.Module): + def __init__(self): + super().__init__() + self.flatten = nn.Flatten() + self.linear_relu_stack = nn.Sequential( + nn.Linear(28 * 28, 512), + nn.ReLU(), + nn.Linear(512, 512), + nn.ReLU(), + nn.Linear(512, 10), + ) + + def forward(self, sample): + sample = self.flatten(sample) + logits = self.linear_relu_stack(sample) + return logits + + +def train_loop(dataloader, model, loss_fn, optimizer): + size = len(dataloader.dataset) + for batch, (X, y) in enumerate(dataloader): + # Compute prediction and loss + x_ort = X.to(device) + y_ort = y.to(device) + pred = model(x_ort) + loss = loss_fn(pred, y_ort) + + # Backpropagation + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if batch % 100 == 0: + loss, current = loss.item(), batch * len(X) + print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]") + + +def test_loop(dataloader, model, loss_fn): + size = len(dataloader.dataset) + num_batches = len(dataloader) + test_loss, correct = 0, 0 + + with torch.no_grad(): + for X, y in dataloader: + x_ort = X.to(device) + y_ort = y.to(device) + pred = model(x_ort) + test_loss += loss_fn(pred, y_ort).item() + correct += (pred.argmax(1) == y_ort).type(torch.float).sum().item() + + test_loss /= num_batches + correct /= size + print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n") + + +model_nn = NeuralNetwork().to(device) +learning_rate = 1e-3 + +loss_fn_nn = nn.CrossEntropyLoss().to(device) +optimizer_nn = torch.optim.SGD(model_nn.parameters(), lr=learning_rate) + +batch_size = 64 +epochs = 1 +for t in range(epochs): + print(f"Epoch {t+1}\n-------------------------------") + train_loop(train_dataloader, model_nn, loss_fn_nn, optimizer_nn) + test_loop(test_dataloader, model_nn, loss_fn_nn) +print("Done!")