mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-15 21:00:47 +00:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/17456 Using an instruction sequence similar to function in fbgemm/src/QuantUtilAvx2.cc elementwise_sum_benchmark added Reviewed By: protonu Differential Revision: D14205695 fbshipit-source-id: 84939c9d3551f123deec3baf7086c8d31fbc873e
36 lines
820 B
C++
36 lines
820 B
C++
#include <chrono>
|
|
#include <cstdint>
|
|
#include <iostream>
|
|
#include <vector>
|
|
|
|
#include "utility_dnnlowp_ops.h"
|
|
|
|
using namespace std;
|
|
|
|
int main(int argc, const char* argv[]) {
|
|
int LEN = argc > 1 ? atoi(argv[1]) : 65536;
|
|
|
|
vector<uint8_t> a(LEN), b(LEN), c_avx2(LEN), c_avx512(LEN);
|
|
for (int i = 0; i < LEN; ++i) {
|
|
a[i] = i % 256;
|
|
b[i] = (i * 2) % 256;
|
|
}
|
|
|
|
chrono::time_point<chrono::system_clock> t = chrono::system_clock::now();
|
|
caffe2::internal::ElementWiseSumAVX2<uint8_t, false>(
|
|
a.data(),
|
|
b.data(),
|
|
c_avx2.data(),
|
|
a.size(),
|
|
1.0f,
|
|
11,
|
|
2.0f,
|
|
22,
|
|
3.0f,
|
|
33);
|
|
double dt = chrono::duration<double>(chrono::system_clock::now() - t).count();
|
|
double bytes = 3. * LEN * sizeof(a[0]);
|
|
cout << bytes / dt / 1e9 << " GB/s" << endl;
|
|
|
|
return 0;
|
|
}
|