From 92fbf7cf97fb09af2b2a627bb35cd928eaf734f6 Mon Sep 17 00:00:00 2001 From: Jongsoo Park Date: Thu, 13 Feb 2020 21:11:58 -0800 Subject: [PATCH] [caffe2] use JIT'ed fp16 SLS (#32432) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/32432 Use JIT'ed fp16 SLS in D19477209 from Caffe2 operators Test Plan: CI Reviewed By: jianyuh Differential Revision: D19477208 fbshipit-source-id: ef2ccba10f5f4c475166141bf09c266dedb92d38 --- caffe2/operators/lengths_reducer_ops.h | 158 ++++++++++++++++--------- 1 file changed, 105 insertions(+), 53 deletions(-) diff --git a/caffe2/operators/lengths_reducer_ops.h b/caffe2/operators/lengths_reducer_ops.h index 3dbbd9919f2..518ced55b9f 100644 --- a/caffe2/operators/lengths_reducer_ops.h +++ b/caffe2/operators/lengths_reducer_ops.h @@ -80,32 +80,56 @@ class CPUSparseLengthsReductionOp : public Operator { } #ifdef USE_FBGEMM - if (std::is_same::value) { - // If this is the first call or block size has changed (should never - // happen actually), generate a kernel. - if (D != last_block_size) { - last_block_size = D; + // If this is the first call or block size has changed (should never + // happen actually), generate a kernel. + if (D != last_block_size) { + last_block_size = D; + if (std::is_same::value) { if (std::is_same::value) { - kernel32_ = fbgemm::GenerateEmbeddingSpMDM( - D, - USE_WEIGHT, - USE_MEAN, - /*prefetch distance*/ 16, - USE_POSITIONAL_WEIGHT); + kernel_fp32_i32_ = + fbgemm::GenerateEmbeddingSpMDM( + D, + USE_WEIGHT, + USE_MEAN, + /*prefetch distance*/ 16, + USE_POSITIONAL_WEIGHT); } else { CAFFE_ENFORCE((std::is_same::value)); - kernel64_ = fbgemm::GenerateEmbeddingSpMDM( - D, - USE_WEIGHT, - USE_MEAN, - /*prefetch distance*/ 16, - USE_POSITIONAL_WEIGHT); + kernel_fp32_i64_ = + fbgemm::GenerateEmbeddingSpMDM( + D, + USE_WEIGHT, + USE_MEAN, + /*prefetch distance*/ 16, + USE_POSITIONAL_WEIGHT); + } + } else { + CAFFE_ENFORCE((std::is_same::value)); + if (std::is_same::value) { + kernel_fp16_i32_ = + fbgemm::GenerateEmbeddingSpMDM( + D, + USE_WEIGHT, + USE_MEAN, + /*prefetch distance*/ 16, + USE_POSITIONAL_WEIGHT); + } else { + CAFFE_ENFORCE((std::is_same::value)); + kernel_fp16_i64_ = + fbgemm::GenerateEmbeddingSpMDM( + D, + USE_WEIGHT, + USE_MEAN, + /*prefetch distance*/ 16, + USE_POSITIONAL_WEIGHT); } } + } - bool success; + bool success; + if (std::is_same::value) { if (std::is_same::value) { - success = kernel32_( + success = kernel_fp32_i32_( M, indices_size, N, @@ -115,7 +139,7 @@ class CPUSparseLengthsReductionOp : public Operator { in_weight, out_data); } else { - success = kernel64_( + success = kernel_fp32_i64_( M, indices_size, N, @@ -125,39 +149,61 @@ class CPUSparseLengthsReductionOp : public Operator { in_weight, out_data); } - - if (success) { - return true; + } else { + if (std::is_same::value) { + success = kernel_fp16_i32_( + M, + indices_size, + N, + reinterpret_cast(in_data), + indicesInput.template data(), + lengths, + in_weight, + out_data); + } else { + success = kernel_fp16_i64_( + M, + indices_size, + N, + reinterpret_cast(in_data), + indicesInput.template data(), + lengths, + in_weight, + out_data); } - - int64_t current = 0; - for (int m = 0; m < M; ++m) { - for (int i = 0; i < lengths[m]; ++i) { - CAFFE_ENFORCE_LT( - current, - indices_size, - "Your input seems to be incorrect: the sum of lengths values " - "should be the size of the indices tensor, but it appears not."); - IndexType idx = indices[current]; - CAFFE_ENFORCE( - 0 <= idx && idx < N, - "Index ", - current, - " is out of bounds: ", - idx, - ", range 0 to ", - N); - ++current; - } - } - CAFFE_ENFORCE_EQ( - current, - indices_size, - "Your input seems to be incorrect: the sum of lengths values should be " - "the size of the indices tensor, but it appears not."); - - return false; } + + if (success) { + return true; + } + + int64_t current = 0; + for (int m = 0; m < M; ++m) { + for (int i = 0; i < lengths[m]; ++i) { + CAFFE_ENFORCE_LT( + current, + indices_size, + "Your input seems to be incorrect: the sum of lengths values " + "should be the size of the indices tensor, but it appears not."); + IndexType idx = indices[current]; + CAFFE_ENFORCE( + 0 <= idx && idx < N, + "Index ", + current, + " is out of bounds: ", + idx, + ", range 0 to ", + N); + ++current; + } + } + CAFFE_ENFORCE_EQ( + current, + indices_size, + "Your input seems to be incorrect: the sum of lengths values should be " + "the size of the indices tensor, but it appears not."); + + return false; #endif // delegate work to perfkernel that branches based on architecture @@ -188,8 +234,14 @@ class CPUSparseLengthsReductionOp : public Operator { #ifdef USE_FBGEMM private: std::int64_t last_block_size{-1}; - fbgemm::EmbeddingSpMDMKernelSignature::Type kernel32_; - fbgemm::EmbeddingSpMDMKernelSignature::Type kernel64_; + fbgemm::EmbeddingSpMDMKernelSignature::Type + kernel_fp32_i32_; + fbgemm::EmbeddingSpMDMKernelSignature::Type + kernel_fp32_i64_; + fbgemm::EmbeddingSpMDMKernelSignature::Type + kernel_fp16_i32_; + fbgemm::EmbeddingSpMDMKernelSignature::Type + kernel_fp16_i64_; #endif };