mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Test Plan: revert-hammer
Differential Revision:
D25325039 (f5e9ffbc27)
Original commit changeset: 2043d6e63c7d
fbshipit-source-id: 5377dd2aa7c6f58c8641c956b7642c7c559bbc40
23 lines
709 B
Text
23 lines
709 B
Text
#include <cuda.h>
|
|
#include <cuda_runtime.h>
|
|
|
|
#include <ATen/ATen.h>
|
|
|
|
__global__ void sigmoid_add_kernel(
|
|
const float* __restrict__ x,
|
|
const float* __restrict__ y,
|
|
float* __restrict__ output,
|
|
const int size) {
|
|
const int index = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (index < size) {
|
|
const float sigmoid_x = 1.0f / (1.0f + __expf(-x[index]));
|
|
const float sigmoid_y = 1.0f / (1.0f + __expf(-y[index]));
|
|
output[index] = sigmoid_x + sigmoid_y;
|
|
}
|
|
}
|
|
|
|
void sigmoid_add_cuda(const float* x, const float* y, float* output, int size) {
|
|
const int threads = 1024;
|
|
const int blocks = (size + threads - 1) / threads;
|
|
sigmoid_add_kernel<<<blocks, threads>>>(x, y, output, size);
|
|
}
|