mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/19705 Optimizing for a case when there's a consecutive dims that are not broadcasted followed by another consecutive dims that are broadcasted. For example, MulGradient(["dC", "A", "B"], ["dA", "dB"], broadcast=True, axis=0) where A.shape == dC.shape == [9508, 80] and B.shape == [80] . Test Plan: In SKL T6, Running mul_gradient_benchmark without this optimization Operator #0 (dA, MulGradient) 11.9119 ms/iter After this optimization, Operator #0 (dA, MulGradient) 0.672759 ms/iter Need to land D15291800 before to fix the unit test error Reviewed By: dmudiger Differential Revision: D15075415 fbshipit-source-id: 0f97be17cf8f1dacbafa34cd637fb8bc1c5e5387
48 lines
1 KiB
C++
48 lines
1 KiB
C++
#ifndef CAFFE2_OPERATORS_ELEMENTWISE_MUL_OP_H_
|
|
#define CAFFE2_OPERATORS_ELEMENTWISE_MUL_OP_H_
|
|
|
|
#include <vector>
|
|
|
|
#include "caffe2/operators/elementwise_ops.h"
|
|
#include "caffe2/utils/math.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
template <class Context>
|
|
struct MulFunctor {
|
|
template <typename TIn, typename TOut>
|
|
bool Forward(
|
|
const std::vector<int>& A_dims,
|
|
const std::vector<int>& B_dims,
|
|
const TIn* A,
|
|
const TIn* B,
|
|
TOut* C,
|
|
Context* context) const {
|
|
math::Mul(
|
|
A_dims.size(),
|
|
A_dims.data(),
|
|
B_dims.size(),
|
|
B_dims.data(),
|
|
A,
|
|
B,
|
|
C,
|
|
context);
|
|
return true;
|
|
}
|
|
|
|
template <typename TGrad, typename TIn, typename TOut>
|
|
bool Backward(
|
|
const std::vector<int>& A_dims,
|
|
const std::vector<int>& B_dims,
|
|
const TGrad* dC_data,
|
|
const TIn* A_data,
|
|
const TIn* B_data,
|
|
const TOut* C_data,
|
|
TGrad* dA_data,
|
|
TGrad* dB_data,
|
|
Context* context) const;
|
|
};
|
|
|
|
} // namespace caffe2
|
|
|
|
#endif // CAFFE2_OPERATORS_ELEMENTWISE_MUL_OP_H_
|