mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/49105 (1) Add a safety check `C10_CUDA_KERNEL_LAUNCH_CHECK()` after each kernel launch. This diff only changes the files inside the directory /fbsource/fbcode/caffe2/modules/, /fbsource/fbcode/caffe2/fb/, /fbsource/fbcode/caffe2/test/. (2) Get rid of old check `AT_CUDA_CHECK(cudaGetLastError())` when necessary. Test Plan: Test build: ``` buck build //caffe2/modules/detectron: buck build //caffe2/torch/fb/: ``` To check for launches without checks: ``` python3 caffe2/torch/testing/check_kernel_launches.py ``` Make sure none of the updated files are in the returned list. Reviewed By: r-barnes Differential Revision: D25325039 fbshipit-source-id: 2043d6e63c7d029c35576d3101c18247ffe92f01
24 lines
743 B
Text
24 lines
743 B
Text
#include <cuda.h>
|
|
#include <cuda_runtime.h>
|
|
|
|
#include <ATen/ATen.h>
|
|
|
|
__global__ void sigmoid_add_kernel(
|
|
const float* __restrict__ x,
|
|
const float* __restrict__ y,
|
|
float* __restrict__ output,
|
|
const int size) {
|
|
const int index = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (index < size) {
|
|
const float sigmoid_x = 1.0f / (1.0f + __expf(-x[index]));
|
|
const float sigmoid_y = 1.0f / (1.0f + __expf(-y[index]));
|
|
output[index] = sigmoid_x + sigmoid_y;
|
|
}
|
|
}
|
|
|
|
void sigmoid_add_cuda(const float* x, const float* y, float* output, int size) {
|
|
const int threads = 1024;
|
|
const int blocks = (size + threads - 1) / threads;
|
|
sigmoid_add_kernel<<<blocks, threads>>>(x, y, output, size);
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
}
|