pytorch/caffe2/utils/math.h
Yangqing Jia 7b8c7b11d2 Changes for Windows build to pass.
Summary:
After this, we should have contbuild guarding the Windows build both with
and without CUDA.

This includes a series of changes that are needed to make Windows build,
specifically:

(1) Various flags that are needed in the cmake system, specially dealing
with /MD, /MT, cuda, cudnn, whole static linking, etc.
(2) Contbuild scripts based on appveyo.
(3) For Windows build, note that one will need to use "cmake --build" to
build stuff so that the build type is consistent between configuration and
actual build. see scripts\build_windows.bat for details.
(4) In logging.h, ERROR is already defined by Windows. I don't have a good
solution now, and as a result, LOG(ERROR) on windows is going to be
LOG(INFO).
(5) variable length array is not supported by MSVC (and it is not part of
C++ standard). As a result I replaced them with vectors.
(6) sched.h is not available on Windows, so akyrola 's awesome simple
async net might encounter some slowdown due to no affinity setting on
Windows.
(7) MSVC has a
Closes https://github.com/caffe2/caffe2/pull/183

Reviewed By: ajtulloch

Differential Revision: D4657831

Pulled By: Yangqing

fbshipit-source-id: 070ded372ed78a7e3e3919fdffa1d337640f146e
2017-03-06 20:03:37 -08:00

318 lines
11 KiB
C++

#ifndef CAFFE2_UTILS_MATH_H_
#define CAFFE2_UTILS_MATH_H_
// This is a simple translation from the old Caffe math interfaces. We aim to
// still keep it simple, so all platforms would be able to support it fairly
// easily.
// We include the cblas header here so that we can obtain the macros from cblas.
extern "C" {
#include "caffe2/utils/cblas.h"
}
#ifdef CAFFE2_USE_ACCELERATE
#include <Accelerate/Accelerate.h>
#endif // CAFFE2_USE_ACCELERATE
#include "caffe2/core/common.h"
#include "caffe2/core/types.h"
#ifndef __CUDACC__
#include "Eigen/Core"
#include "Eigen/Dense"
#endif
namespace caffe2 {
// An empty class as a placeholder for a math function that has no specific
// engine specified.
class DefaultEngine {};
#ifndef __CUDACC__
// Common Eigen types that we will often use
template <typename T>
using EigenMatrixMap =
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> >;
template <typename T>
using EigenArrayMap =
Eigen::Map<Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic> >;
template <typename T>
using EigenVectorMap = Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1> >;
template <typename T>
using EigenVectorArrayMap = Eigen::Map<Eigen::Array<T, Eigen::Dynamic, 1> >;
template <typename T>
using ConstEigenMatrixMap =
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> >;
template <typename T>
using ConstEigenArrayMap =
Eigen::Map<const Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic> >;
template <typename T>
using ConstEigenVectorMap =
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1> >;
template <typename T>
using ConstEigenVectorArrayMap =
Eigen::Map<const Eigen::Array<T, Eigen::Dynamic, 1> >;
#endif
namespace math {
template <typename T, class Context>
void Exp(const int N, const T* x, T* y, Context* context);
template <typename T, class Context>
void Log(const int N, const T* x, T* y, Context* context);
template <typename T, class Context>
void Sqr(const int N, const T* x, T* y, Context* context);
template <typename T, class Context>
void Not(const int N, const T* x, T* y, Context* context);
template <typename T, class Context>
void Powx(const int N, const T* a, const T b, T* y, Context* context);
#define CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(name) \
template <typename T, class Context> \
void name(const int N, const T* a, const T* b, bool* y, Context* context); \
template <typename T, class Context> \
void name##ToRow( \
const int M, \
const int N, \
const T* a, \
const T* b, \
bool* y, \
Context* context);
CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(LT);
CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(LE);
CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(GT);
CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(GE);
CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(And);
CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(Or);
CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(Xor);
#undef CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT
#define CAFFE2_DECLARE_BINARY_OP(name) \
template <typename T, class Context> \
void name(const int N, const T* a, const T* b, T* y, Context* context); \
template <typename T, class Context> \
void name##ToRow( \
const int M, \
const int N, \
const T* a, \
const T* b, \
T* y, \
Context* context); \
template <typename T, class Context> \
void name##ToRow( \
const int M, const int N, const T* x, T* y, Context* context); \
template <typename T, class Context> \
void name##ToCol( \
const int M, const int N, const T* x, T* y, Context* context);
CAFFE2_DECLARE_BINARY_OP(Add);
CAFFE2_DECLARE_BINARY_OP(Sub);
CAFFE2_DECLARE_BINARY_OP(Mul);
CAFFE2_DECLARE_BINARY_OP(Div);
#undef CAFFE2_DECLARE_BINARY_OP
// Compute the row-wise max of a N*D matrix X, and write it to a N
// dimensional vector y.
template <typename T, class Context>
void RowwiseMax(const int N, const int D, const T* x, T* y,
Context* context);
// Compute the column-wise max of a N*D matrix X, and write it to a D
// dimensional vector y.
template <typename T, class Context>
void ColwiseMax(const int N, const int D, const T* x, T* y,
Context* context);
// Decaf gemm provides a simpler interface to the gemm functions, with the
// limitation that the data has to be contiguous in memory.
template <typename T, class Context, class Engine=DefaultEngine>
void Gemm(const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB,
const int M, const int N, const int K, const T alpha, const T* A,
const T* B, const T beta, T* C, Context* context);
// We also provide a gemm that has explicit lda, ldb and ldc specified.
// In most cases you probably want to use the function above, though.
template <typename T, class Context, class Engine = DefaultEngine>
void GemmEx(
const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB,
const int M,
const int N,
const int K,
const T alpha,
const T* A,
const int lda,
const T* B,
const int ldb,
const T beta,
T* C,
const int ldc,
Context* context);
// Gemv always takes in a M*N matrix A, and depending on whether we set TransA
// to Trans, the output is:
// CblasNoTrans: x is an N dim vector and y is an M dim vector.
// CblasTrans: x is an M dim vector and y is an N dim vector.
template <typename T, class Context, class Engine=DefaultEngine>
void Gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
const T alpha, const T* A, const T* x, const T beta,
T* y, Context* context);
template <typename T, class Context>
void Set(const TIndex N, const T alpha, T* X, Context* context);
template <typename T, class Context>
void RandUniform(const int n, const T a, const T b, T* r,
Context* context);
template <typename T, class Context>
void RandUniformUnique(
const size_t n,
const T a,
const T b,
T* r,
const size_t m,
const T* avoid,
Context* context);
template <typename T, class Context>
void RandGaussian(
const int n,
const T mean,
const T std,
T* r,
Context* context);
// Dot matrix of vector a and b, and writes the result to a single value y.
template <typename T, class Context>
void Dot(const int N, const T* a, const T* b, T* y, Context* context);
// Sum of vector x, and writes the result to a single value y.
template <typename T, class Context>
void Sum(const int N, const T* x, T* y, Context* context);
// Select does index selection of the rows a N*D matrix x, and gives the N
// dimensional vector y that contains the selected data.
template <typename T, class Context>
void Select(const int N, const int D, const T* x, const int* idx, T* y,
Context* context);
template <typename T, class Context>
void Scale(const int N, const T alpha, const T* x, T* y,
Context* context);
// Different from the Scale function above, if alpha is passed in
// as a pointer, we will assume that it lives on the Context device,
// for example on GPU.
template <typename T, class Context>
void Scale(const int N, const T* alpha, const T* x, T* y,
Context* context);
template <typename T, class Context>
void Axpy(const int N, const T alpha, const T* x, T* y,
Context* context);
// Different from the Axpy function above, if alpha is passed in
// as a pointer, we will assume that it lives on the Context device,
// for example on GPU.
template <typename T, class Context>
void Axpy(const int N, const T* alpha, const T* x, T* y,
Context* context);
template <typename T, class Context>
void Axpby(const int N, const T alpha, const T* x, const T b, T* y,
Context* context);
template <typename T, class Context, int order>
void Im2col(
const T* data_im,
const int channels,
const int height,
const int width,
const int kernel_h,
const int kernel_w,
const int dilation_h,
const int dilation_w,
const int pad_t,
const int pad_l,
const int pad_b,
const int pad_r,
const int stride_h,
const int stride_w,
T* data_col,
Context* context);
template <typename T, class Context, int order>
void Col2im(
const T* data_col,
const int channels,
const int height,
const int width,
const int patch_h,
const int patch_w,
const int dilation_h,
const int dilation_w,
const int pad_t,
const int pad_l,
const int pad_b,
const int pad_r,
const int stride_h,
const int stride_w,
T* data_im,
Context* context);
// Applies a per-channel bias value to each channel of the input
// image. image_size is H * W
template <typename T, class Context>
void BiasCHW(
const T* bias,
const int bias_channels,
const int image_size,
T* image,
Context* context);
template <class Context>
void CopyMatrix(const size_t item_size, const int M, const int N, const void* A,
const int lda, void* B, const int ldb, Context* context);
uint32_t randomNumberSeed();
// Function uses casting from int to unsigned to compare if value of
// parameter a is greater or equal to zero and lower than value of
// parameter b. The b parameter is of type signed and is always
// positive,
// therefore its value is always lower than 0x800... where casting
// negative value of a parameter converts it to value higher than
// 0x800...
// The casting allows to use one condition instead of two.
inline bool is_a_ge_zero_and_a_lt_b(int a, int b) {
return static_cast<unsigned>(a) < static_cast<unsigned>(b);
}
// Calculates ceil(a / b). User must be careful to ensure that there
// is no overflow or underflow in the calculation.
template <typename T>
inline T divUp(T a, T b) {
return (a + b - (T) 1) / b;
}
// Rounds a up to the next highest multiple of b. User must be careful
// to ensure that there is no overflow or underflow in the calculation
// of divUp.
template <typename T>
inline T roundUp(T a, T b) {
return divUp<T>(a, b) * b;
}
} // namespace math
} // namespace caffe2
#include "caffe2/utils/math-detail.h"
#endif // CAFFE2_UTILS_MATH_H_