mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/30144 Create script to produce libtorch that only contains ops needed by specific models. Developers can use this workflow to further optimize mobile build size. Need keep a dummy stub for unused (stripped) ops because some JIT side logic requires certain function schemas to be existed in the JIT op registry. Test Steps: 1. Build "dump_operator_names" binary and use it to dump root ops needed by a specific model: ``` build/bin/dump_operator_names --model=mobilenetv2.pk --output=mobilenetv2.yaml ``` 2. The MobileNetV2 model should use the following ops: ``` - aten::t - aten::dropout - aten::mean.dim - aten::add.Tensor - prim::ListConstruct - aten::addmm - aten::_convolution - aten::batch_norm - aten::hardtanh_ - aten::mm ``` NOTE that for some reason it outputs "aten::addmm" but actually uses "aten::mm". You need fix it manually for now. 3. Run custom build script locally (use Android as an example): ``` SELECTED_OP_LIST=mobilenetv2.yaml scripts/build_pytorch_android.sh armeabi-v7a ``` 4. Checkout demo app that uses locally built library instead of downloading from jcenter repo: ``` git clone --single-branch --branch custom_build git@github.com:ljk53/android-demo-app.git ``` 5. Copy locally built libraries to demo app folder: ``` find ${HOME}/src/pytorch/android -name '*.aar' -exec cp {} ${HOME}/src/android-demo-app/HelloWorldApp/app/libs/ \; ``` 6. Build demo app with locally built libtorch: ``` cd ${HOME}/src/android-demo-app/HelloWorldApp ./gradlew clean && ./gradlew assembleDebug ``` 7. Install and run the demo app. In-APK arm-v7 libpytorch_jni.so build size reduced from 5.5M to 2.9M. Test Plan: Imported from OSS Differential Revision: D18612127 Pulled By: ljk53 fbshipit-source-id: fa8d5e1d3259143c7346abd1c862773be8c7e29a
135 lines
3.8 KiB
C++
135 lines
3.8 KiB
C++
#include "torch/csrc/jit/operator.h"
|
|
#include "torch/csrc/jit/custom_operator.h"
|
|
|
|
#include "torch/csrc/autograd/profiler.h"
|
|
#include "torch/csrc/autograd/generated/variable_factories.h"
|
|
|
|
#include <ATen/ATen.h>
|
|
#include <ATen/core/functional.h>
|
|
#include <ATen/core/interned_strings.h>
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <cstddef>
|
|
#include <cstring>
|
|
#include <sstream>
|
|
#include <stdexcept>
|
|
#include <tuple>
|
|
#include <unordered_map>
|
|
#include <unordered_set>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
// ${generated_comment}
|
|
|
|
// NOTE [Sharded File]: This file is generated in a sharded fashion to speed up
|
|
// incremental rebuilds. See the comment at the top of
|
|
// templates/VariableType.cpp for an analogous, in-depth discussion.
|
|
//
|
|
// Note that unlike VariableType.cpp, when sharding this file we take
|
|
// care to generate all overloads of a particular name in a single
|
|
// file and in a particular order. See gen_jit_dispatch.py for
|
|
// details.
|
|
|
|
namespace torch { namespace jit {
|
|
|
|
using autograd::Variable;
|
|
using autograd::variable_list;
|
|
using at::Scalar;
|
|
using at::ScalarType;
|
|
using at::Tensor;
|
|
using at::TensorOptions;
|
|
using at::DeviceGuard;
|
|
using at::MemoryFormat;
|
|
|
|
using ::c10::fmap;
|
|
using ::c10::filter;
|
|
|
|
namespace {
|
|
|
|
// TODO: remove the toOptionalTensor and toListOfOptionalTensor
|
|
// when we remove the undefined tensor semantic from TH
|
|
|
|
// XXX: This function is to specialize IValue for tensor type in
|
|
// interpreter, it should only be used in this file
|
|
at::Tensor toOptionalTensor(const IValue& v) {
|
|
if (v.isNone()) {
|
|
return at::Tensor();
|
|
}
|
|
return v.toTensor();
|
|
}
|
|
|
|
// XXX: This function is to specialize IValue for list of optional
|
|
// tensor type in interpreter, it should only be used in this file
|
|
std::vector<Tensor> toListOfOptionalTensor(const IValue& v) {
|
|
// v is a list of optional tensor, loop over as generic list
|
|
auto vlist = v.toGenericListRef();
|
|
std::vector<Tensor> res;
|
|
|
|
for (const IValue &v: vlist) {
|
|
res.emplace_back(toOptionalTensor(v));
|
|
}
|
|
return res;
|
|
}
|
|
|
|
template<size_t N>
|
|
std::array<bool, N> as_bool_array(const c10::List<bool>& list) {
|
|
std::array<bool, N> res;
|
|
AT_ASSERT(list.size() == N);
|
|
std::copy(list.begin(), list.end(), res.begin());
|
|
return res;
|
|
}
|
|
|
|
c10::OperatorOptions atenOperatorOptions() {
|
|
c10::OperatorOptions result;
|
|
result.setAliasAnalysis(c10::AliasAnalysisKind::FROM_SCHEMA);
|
|
return result;
|
|
}
|
|
|
|
int (*DUMMY_OPERATION)(Stack&) = [](Stack& stack) -> int {
|
|
TORCH_CHECK(false, "Operator has been stripped in the custom build.")
|
|
return 0;
|
|
};
|
|
|
|
RegisterOperators reg(
|
|
{Operator(
|
|
"aten::get_device(Tensor self) -> int",
|
|
[](Stack& stack) {
|
|
RECORD_FUNCTION("get_device", std::vector<c10::IValue>());
|
|
auto result =
|
|
at::get_device((std::move(peek(stack, 0, 1))).toTensor());
|
|
drop(stack, 1);
|
|
pack(stack, std::move(result));
|
|
return 0;
|
|
},
|
|
atenOperatorOptions()),
|
|
Operator(
|
|
"aten::storage_offset(Tensor self) -> int",
|
|
[](Stack& stack) {
|
|
RECORD_FUNCTION("storage_offset", std::vector<c10::IValue>());
|
|
auto result =
|
|
((std::move(peek(stack, 0, 1))).toTensor()).storage_offset();
|
|
drop(stack, 1);
|
|
pack(stack, std::move(result));
|
|
return 0;
|
|
},
|
|
atenOperatorOptions()),
|
|
Operator(
|
|
"aten::is_contiguous(Tensor self) -> bool",
|
|
[](Stack& stack) {
|
|
RECORD_FUNCTION("is_contiguous", std::vector<c10::IValue>());
|
|
auto result =
|
|
((std::move(peek(stack, 0, 1))).toTensor()).is_contiguous();
|
|
drop(stack, 1);
|
|
pack(stack, std::move(result));
|
|
return 0;
|
|
},
|
|
atenOperatorOptions()),
|
|
|
|
// Generated operators
|
|
${constructors}});
|
|
|
|
} // anon namespace
|
|
|
|
|
|
}} // namespace torch::jit
|