diff --git a/benchmarks/static_runtime/deep_wide_pt.h b/benchmarks/static_runtime/deep_wide_pt.h index 73a943146f2..5b18c96364b 100644 --- a/benchmarks/static_runtime/deep_wide_pt.h +++ b/benchmarks/static_runtime/deep_wide_pt.h @@ -60,7 +60,7 @@ struct DeepAndWideFast : torch::nn::Module { auto dp_unflatten = at::cpu::bmm(ad_emb_packed, user_emb_t); // auto dp = at::native::flatten(dp_unflatten, 1); auto dp = dp_unflatten.view({dp_unflatten.size(0), 1}); - auto input = at::native::_cat_cpu({dp, wide_preproc}, 1); + auto input = at::cpu::cat({dp, wide_preproc}, 1); // fc1 = torch::nn::functional::linear(input, fc_w_, fc_b_); fc_w_t_ = torch::t(fc_w_); @@ -114,7 +114,7 @@ struct DeepAndWideFast : torch::nn::Module { // Potential optimization: we can replace cat with carefully constructed // tensor views on the output that are passed to the _out ops above. - at::native::_cat_out_cpu( + at::cpu::cat_outf( {prealloc_tensors[5], prealloc_tensors[2]}, 1, prealloc_tensors[6]); at::cpu::addmm_out( prealloc_tensors[7], fc_b_, prealloc_tensors[6], fc_w_t_, 1, 1);