mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-15 21:00:47 +00:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/19705 Optimizing for a case when there's a consecutive dims that are not broadcasted followed by another consecutive dims that are broadcasted. For example, MulGradient(["dC", "A", "B"], ["dA", "dB"], broadcast=True, axis=0) where A.shape == dC.shape == [9508, 80] and B.shape == [80] . Test Plan: In SKL T6, Running mul_gradient_benchmark without this optimization Operator #0 (dA, MulGradient) 11.9119 ms/iter After this optimization, Operator #0 (dA, MulGradient) 0.672759 ms/iter Need to land D15291800 before to fix the unit test error Reviewed By: dmudiger Differential Revision: D15075415 fbshipit-source-id: 0f97be17cf8f1dacbafa34cd637fb8bc1c5e5387
38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
import argparse
|
|
import numpy as np
|
|
|
|
from caffe2.python import core, workspace
|
|
|
|
|
|
def benchmark_mul_gradient(args):
|
|
workspace.FeedBlob("dC", np.random.rand(args.m, args.n).astype(np.float32))
|
|
workspace.FeedBlob("A", np.random.rand(args.m, args.n).astype(np.float32))
|
|
workspace.FeedBlob("B", np.random.rand(args.m).astype(np.float32))
|
|
|
|
net = core.Net("mynet")
|
|
net.MulGradient(["dC", "A", "B"], ["dA", "dB"], broadcast=True, axis=0)
|
|
workspace.CreateNet(net)
|
|
|
|
workspace.BenchmarkNet(net.Name(), 1, args.iteration, True)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description="benchmark for MulGradient.")
|
|
parser.add_argument(
|
|
'-m', type=int, default=9508,
|
|
help="The number of rows of A")
|
|
parser.add_argument(
|
|
"-n", type=int, default=80,
|
|
help="The number of columns of A")
|
|
parser.add_argument(
|
|
'-i', "--iteration", type=int, default=100,
|
|
help="The number of iterations.")
|
|
args, extra_args = parser.parse_known_args()
|
|
core.GlobalInit(['python'] + extra_args)
|
|
benchmark_mul_gradient(args)
|