[NNAPI EP] Add support of Elu, merge in NNAPI updates for API level 30 (#8001)

* Add elu, integrate new Android NNAPI API changes * add slice check * update previous typo * Move sdk level check to nnapi feature level check * update readme
2026-07-05 04:17:53 +00:00 · 2021-06-09 12:39:02 -07:00 · 2021-06-09 12:39:02 -07:00 · f013b0c0eb
commit f013b0c0eb
parent aa45545af7
17 changed files with 1182 additions and 149 deletions
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@ -197,9 +197,9 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const
        return false;
      }

-      if (params.android_sdk_ver < 29) {
+      if (params.android_feature_level < ANEURALNETWORKS_FEATURE_LEVEL_3) {
        LOGS_DEFAULT(VERBOSE) << op_type << " only supports per-channel quantization on Android API 29+, "
-                              << "system API level: " << params.android_sdk_ver;
+                              << "system NNAPI feature level: " << params.android_feature_level;
        return false;
      }

@ -476,9 +476,11 @@ bool IsInputSupported(const NodeArg& input, const std::string& parent_name) {

 std::vector<std::vector<size_t>> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params) {
  std::vector<std::vector<size_t>> supported_node_groups;
-  if (params.android_sdk_ver < ORT_NNAPI_MIN_API_LEVEL) {
-    LOGS_DEFAULT(WARNING) << "All ops will fallback to CPU EP, because Android API level [" << params.android_sdk_ver
-                          << "] is lower than minimal supported API level [" << ORT_NNAPI_MIN_API_LEVEL
+  if (params.android_feature_level < ORT_NNAPI_MIN_API_LEVEL) {
+    LOGS_DEFAULT(WARNING) << "All ops will fallback to CPU EP, because system NNAPI feature level ["
+                          << params.android_feature_level
+                          << "] is lower than minimal supported NNAPI API feature level ["
+                          << ORT_NNAPI_MIN_API_LEVEL
                          << "] of this build for NNAPI";
    return supported_node_groups;
  }
@ -537,5 +539,15 @@ std::string Shape2String(const std::vector<uint32_t>& shape) {
  return os.str();
 }

+bool CheckIsInitializerTensor(const InitializedTensorSet& initializers, const Node& node,
+                              size_t input_idx, const char* input_name) {
+  if (!Contains(initializers, node.InputDefs()[input_idx]->Name())) {
+    LOGS_DEFAULT(VERBOSE) << input_name << " of " << node.OpType() << " must be an initializer tensor";
+    return false;
+  }
+
+  return true;
+}
+
 }  // namespace nnapi
 }  // namespace onnxruntime
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
@ -10,7 +10,7 @@
 // This is the minimal Android API Level required by ORT NNAPI EP to run
 // ORT running on any host system with Android API level less than this will fall back to CPU EP
 #ifndef ORT_NNAPI_MIN_API_LEVEL
-#define ORT_NNAPI_MIN_API_LEVEL 27
+#define ORT_NNAPI_MIN_API_LEVEL ANEURALNETWORKS_FEATURE_LEVEL_1
 #endif

 // This is the maximum Android API level supported in the ort model conversion for NNAPI EP
@ -132,5 +132,9 @@ std::vector<std::vector<size_t>> GetSupportedNodes(const GraphViewer& graph_view
 // Get string representation of a Shape
 std::string Shape2String(const std::vector<uint32_t>& shape);

+// Check the given input is an initializer tensor
+bool CheckIsInitializerTensor(const InitializedTensorSet& initializers, const Node& node,
+                              size_t index, const char* input_name) ORT_MUST_USE_RESULT;
+
 }  // namespace nnapi
 }  // namespace onnxruntime
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
@ -22,8 +22,8 @@ using std::vector;
 ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer)
    : nnapi_(NnApiImplementation()), graph_viewer_(graph_viewer) {}

-int32_t ModelBuilder::GetAndroidSdkVer() const {
-  return nnapi_ ? nnapi_->android_sdk_version : 0;
+int32_t ModelBuilder::GetNNAPIFeatureLevel() const {
+  return nnapi_ ? nnapi_->nnapi_runtime_feature_level : 0;
 }

 // Scalar operand is copied into the model, no need to persist
@ -78,7 +78,7 @@ static size_t GetPaddedByteSize(size_t size) {

 Status ModelBuilder::GetTargetDevices() {
  // GetTargetDevices is only supported on API 29+
-  if (GetAndroidSdkVer() < 29)
+  if (GetNNAPIFeatureLevel() < ANEURALNETWORKS_FEATURE_LEVEL_3)
    return Status::OK();

  if (target_device_option_ == TargetDeviceOption::ALL_DEVICES)
@ -417,10 +417,10 @@ Status ModelBuilder::AddNewNNAPIOperand(const OperandType& operand_type, uint32_
  index = next_index_++;

  if (operand_type.channelQuant) {
-    if (GetAndroidSdkVer() < 29) {
+    if (GetNNAPIFeatureLevel() < ANEURALNETWORKS_FEATURE_LEVEL_3) {
      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                             "Per-channel quantization is only supported on Android API level 29+,",
-                             " system API level: ", GetAndroidSdkVer());
+                             " system NNAPI feature level: ", GetNNAPIFeatureLevel());
    }

    RETURN_STATUS_ON_ERROR(nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
@ -535,7 +535,7 @@ Status ModelBuilder::Compile(std::unique_ptr<Model>& model) {
      "on identifyInputsAndOutputs");

  // relax fp32tofp16 is only available on API 28+
-  if (use_fp16_ && GetAndroidSdkVer() > 27) {
+  if (use_fp16_ && GetNNAPIFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_1) {
    RETURN_STATUS_ON_ERROR_WITH_NOTE(
        nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16(
            nnapi_model_->model_, true),
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
@ -37,7 +37,7 @@ class ModelBuilder {

  Status Compile(std::unique_ptr<Model>& model) ORT_MUST_USE_RESULT;

-  int32_t GetAndroidSdkVer() const;
+  int32_t GetNNAPIFeatureLevel() const;

  // Add an NNAPI operation (operator)
  Status AddOperation(int op, const std::vector<uint32_t>& input_indices,
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
@ -223,9 +223,9 @@ static Status AddSqueezeOp(ModelBuilder& model_builder,
                           const std::string& node_name,
                           const std::string& input, const std::string& output,
                           vector<int32_t> axes) {
-  if (model_builder.GetAndroidSdkVer() < 28) {
+  if (model_builder.GetNNAPIFeatureLevel() < ANEURALNETWORKS_FEATURE_LEVEL_2) {
    return ORT_MAKE_STATUS(
-        ONNXRUNTIME, FAIL, "Squeeze is not supported on API level ", model_builder.GetAndroidSdkVer());
+        ONNXRUNTIME, FAIL, "Squeeze is not supported on API level ", model_builder.GetNNAPIFeatureLevel());
  }

  auto& shaper(model_builder.GetShaper());
@ -739,7 +739,7 @@ class BaseOpBuilder : public IOpBuilder {

 Status BaseOpBuilder::AddToModelBuilder(ModelBuilder& model_builder, const Node& node) const {
  OpSupportCheckParams params{
-      model_builder.GetAndroidSdkVer(),
+      model_builder.GetNNAPIFeatureLevel(),
      model_builder.UseNCHW(),
  };

@ -1310,7 +1310,7 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
  ADD_SCALAR_OPERAND(model_builder, input_indices, kernel_shape[0]);
  ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code);

-  if (model_builder.GetAndroidSdkVer() > 28) {  // nchw only supported on api 29+
+  if (model_builder.GetNNAPIFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {  // nchw only supported on api 29+
    ADD_SCALAR_OPERAND(model_builder, input_indices, use_nchw);
  }

@ -1554,7 +1554,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
  int32_t fuse_code = model_builder.FindActivation(node, *node.OutputDefs()[0]);
  ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code);

-  if (model_builder.GetAndroidSdkVer() > 28) {
+  if (model_builder.GetNNAPIFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
    ADD_SCALAR_OPERAND(model_builder, input_indices, use_nchw);

    // 1. NNAPI Grouped Conv does not support dilations
@ -1644,13 +1644,13 @@ Status SoftMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons
  auto& shaper(model_builder.GetShaper());
  const auto& operand_indices(model_builder.GetOperandIndices());
  const auto& operand_types(model_builder.GetOperandTypes());
-  const auto android_sdk_ver = model_builder.GetAndroidSdkVer();
+  const auto android_feature_level = model_builder.GetNNAPIFeatureLevel();
  NodeAttrHelper helper(node);

  auto input = node.InputDefs()[0]->Name();
  bool input_is_nhwc = model_builder.IsOperandNHWC(input);
  bool output_is_nhwc = input_is_nhwc;
-  if (android_sdk_ver < 29) {
+  if (android_feature_level < ANEURALNETWORKS_FEATURE_LEVEL_3) {
    if (model_builder.IsOperandNHWC(input)) {
      output_is_nhwc = false;
      // We want to transpose nhwc operand back to nchw before softmax
@ -1670,7 +1670,7 @@ Status SoftMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons
  input_indices.push_back(operand_indices.at(input));
  ADD_SCALAR_OPERAND(model_builder, input_indices, beta);

-  if (android_sdk_ver > 28) {
+  if (android_feature_level > ANEURALNETWORKS_FEATURE_LEVEL_2) {
    // you can only specify axis for android api level 29+
    ADD_SCALAR_OPERAND(model_builder, input_indices, axis);
  }
@ -2227,12 +2227,12 @@ Status LRNOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const No
  const auto& operand_indices(model_builder.GetOperandIndices());
  const auto& operand_types(model_builder.GetOperandTypes());
  NodeAttrHelper helper(node);
-  const auto android_sdk_ver = model_builder.GetAndroidSdkVer();
+  const auto android_feature_level = model_builder.GetNNAPIFeatureLevel();

  auto input = node.InputDefs()[0]->Name();
  const auto& output = node.OutputDefs()[0]->Name();
  bool output_is_nhwc = model_builder.IsOperandNHWC(input);
-  if (android_sdk_ver < 29) {
+  if (android_feature_level < ANEURALNETWORKS_FEATURE_LEVEL_3) {
    // on android api level 28, we need to transpose the nchw input to nhwc
    output_is_nhwc = true;
    if (!model_builder.IsOperandNHWC(input)) {
@ -2256,7 +2256,7 @@ Status LRNOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const No
  ADD_SCALAR_OPERAND(model_builder, input_indices, beta);

  // specify axis is only available on api level >= 29
-  if (android_sdk_ver > 28) {
+  if (android_feature_level > ANEURALNETWORKS_FEATURE_LEVEL_2) {
    // ONNX LRN is always performed on C dimension
    int32_t axis = output_is_nhwc
                       ? 3   // nhwc
@ -2359,7 +2359,7 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
  const auto& initializers(model_builder.GetInitializerTensors());
  NodeAttrHelper helper(node);
  const auto input_defs = node.InputDefs();
-  const auto android_sdk_ver = model_builder.GetAndroidSdkVer();
+  const auto android_feature_level = model_builder.GetNNAPIFeatureLevel();
  const auto& output = node.OutputDefs()[0]->Name();

  auto input = input_defs[0]->Name();
@ -2409,7 +2409,7 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
  ADD_SCALAR_OPERAND(model_builder, input_indices, output_w);
  ADD_SCALAR_OPERAND(model_builder, input_indices, output_h);

-  if (android_sdk_ver > 28) {
+  if (android_feature_level > ANEURALNETWORKS_FEATURE_LEVEL_2) {
    // using nchw is only available on API level 29
    ADD_SCALAR_OPERAND(model_builder, input_indices, use_nchw);
  }
@ -2417,7 +2417,7 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
  // Currently we only support align_corners and half_pixel on bilinear resize
  // TODO, investigate nearest neighbor resize difference between NNAPI(based on TF) and ONNX
  if (is_linear_resize) {
-    if (android_sdk_ver > 29 && (using_align_corners || using_half_pixel)) {
+    if (android_feature_level > ANEURALNETWORKS_FEATURE_LEVEL_3 && (using_align_corners || using_half_pixel)) {
      ADD_SCALAR_OPERAND(model_builder, input_indices, using_align_corners);
      if (using_half_pixel)
        ADD_SCALAR_OPERAND(model_builder, input_indices, using_half_pixel);
@ -2533,6 +2533,34 @@ Status MinMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const

 #pragma endregion

+#pragma region op_elu
+
+class EluOpBuilder : public BaseOpBuilder {
+ public:
+ private:
+  Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const override ORT_MUST_USE_RESULT;
+};
+
+Status EluOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const {
+  auto& shaper(model_builder.GetShaper());
+  const auto& operand_indices(model_builder.GetOperandIndices());
+  const auto& operand_types(model_builder.GetOperandTypes());
+  const auto& input = node.InputDefs()[0]->Name();
+  const auto& output = node.OutputDefs()[0]->Name();
+  bool output_is_nhwc = model_builder.IsOperandNHWC(input);
+  ORT_RETURN_IF_ERROR(shaper.Identity(input, output));
+  const OperandType output_operand_type(operand_types.at(input).type, shaper[output]);
+  NodeAttrHelper helper(node);
+  const auto alpha = helper.Get("alpha", 1.0f);
+  std::vector<uint32_t> input_indices;
+  input_indices.push_back(operand_indices.at(input));
+  ADD_SCALAR_OPERAND(model_builder, input_indices, alpha);
+  return model_builder.AddOperation(ANEURALNETWORKS_ELU, input_indices,
+                                    {output}, {output_operand_type}, {output_is_nhwc});
+}
+
+#pragma endregion
+
 #pragma region CreateGetOpBuilders

 // The reason we use macros to create OpBuilders is for easy exclusion in build if certain op(s) are not used
@ -2615,6 +2643,8 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() {
    NNAPI_EP_ADD_SHARED_OP_BUILDER("Max", MinMaxOpBuilder);
  }

+  NNAPI_EP_ADD_SINGLE_OP_BUILDER("Elu", EluOpBuilder);
+
  return op_registrations;
 }

--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc
@ -76,10 +76,10 @@ class BaseOpSupportChecker : public IOpSupportChecker {
    return true;
  }

-  virtual int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& /* params */) const {
-    // Android API level 27 is the baseline version of NNAPI,
+  virtual int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const {
+    // ANEURALNETWORKS_FEATURE_LEVEL_1 is the baseline version of NNAPI,
    // There is no NNAPI support for Android API level 26-
-    return 27;
+    return ANEURALNETWORKS_FEATURE_LEVEL_1;
  }

  virtual bool HasSupportedInputsImpl(const Node& node) const;
@ -104,11 +104,11 @@ class BaseOpSupportChecker : public IOpSupportChecker {

 bool BaseOpSupportChecker::IsOpSupported(const InitializedTensorSet& initializers, const Node& node,
                                         const OpSupportCheckParams& params) const {
-  int32_t required_sdk_ver = GetMinSupportedSdkVer(node, params);
-  if (required_sdk_ver > params.android_sdk_ver) {
-    LOGS_DEFAULT(VERBOSE) << "Current Android API level [" << params.android_sdk_ver
+  int32_t required_feature_level = GetMinSupportedNNAPIFeatureLevel(node, params);
+  if (required_feature_level > params.android_feature_level) {
+    LOGS_DEFAULT(VERBOSE) << "Current Android API level [" << params.android_feature_level
                          << "], Operator [" << node.OpType()
-                          << "] is only supported on API >" << required_sdk_ver;
+                          << "] is only supported on API >" << required_feature_level;
    return false;
  }

@ -180,7 +180,7 @@ class BinaryOpSupportChecker : public BaseOpSupportChecker {
      const std::string& op_type, OpSupportCheckerRegistrations& op_registrations);

 private:
-  int32_t GetMinSupportedSdkVer(const Node& node, const OpSupportCheckParams& params) const override;
+  int32_t GetMinSupportedNNAPIFeatureLevel(const Node& node, const OpSupportCheckParams& params) const override;
  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
                         const OpSupportCheckParams& params) const override;
  bool HasSupportedInputsImpl(const Node& node) const override;
@ -201,18 +201,18 @@ class BinaryOpSupportChecker : public BaseOpSupportChecker {
      });
 }

-int32_t BinaryOpSupportChecker::GetMinSupportedSdkVer(
+int32_t BinaryOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(
    const Node& node, const OpSupportCheckParams& /* params */) const {
  const auto& op(node.OpType());
  if (op == "Sub" || op == "Div") {
-    return 28;
+    return ANEURALNETWORKS_FEATURE_LEVEL_2;
  }

  if (op == "Pow") {
-    return 29;
+    return ANEURALNETWORKS_FEATURE_LEVEL_3;
  }

-  return 27;
+  return ANEURALNETWORKS_FEATURE_LEVEL_1;
 }

 int BinaryOpSupportChecker::GetMinSupportedOpSet(const Node& node) const {
@ -319,8 +319,8 @@ class TransposeOpSupportChecker : public BaseOpSupportChecker {
  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
                         const OpSupportCheckParams& params) const override;

-  int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
-    return 28;
+  int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
+    return ANEURALNETWORKS_FEATURE_LEVEL_2;
  }

  bool HasSupportedInputsImpl(const Node& node) const override;
@ -490,8 +490,8 @@ class PoolOpSupportChecker : public BaseOpSupportChecker {
  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
                         const OpSupportCheckParams& params) const override;

-  int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& params) const override {
-    return params.use_nchw ? 29 : 28;
+  int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& params) const override {
+    return params.use_nchw ? ANEURALNETWORKS_FEATURE_LEVEL_3 : ANEURALNETWORKS_FEATURE_LEVEL_2;
  }

  bool HasSupportedInputsImpl(const Node& node) const override;
@ -667,8 +667,8 @@ class ConvOpSupportChecker : public BaseOpSupportChecker {
  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
                         const OpSupportCheckParams& params) const override;

-  int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& params) const override {
-    return params.use_nchw ? 29 : 28;
+  int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& params) const override {
+    return params.use_nchw ? ANEURALNETWORKS_FEATURE_LEVEL_3 : ANEURALNETWORKS_FEATURE_LEVEL_2;
  }

  bool HasSupportedInputsImpl(const Node& node) const override;
@ -725,9 +725,9 @@ bool ConvOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial
        return false;
      }

-      if (params.android_sdk_ver < 29) {
+      if (params.android_feature_level < ANEURALNETWORKS_FEATURE_LEVEL_3) {
        LOGS_DEFAULT(VERBOSE) << op_type << " dilations is only supported on Android API level 29+, "
-                              << "actual API level: " << params.android_sdk_ver;
+                              << "actual API level: " << params.android_feature_level;
        return false;
      }
    }
@ -775,8 +775,8 @@ class CastOpSupportChecker : public BaseOpSupportChecker {
  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
                         const OpSupportCheckParams& params) const override;

-  int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
-    return 29;
+  int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
+    return ANEURALNETWORKS_FEATURE_LEVEL_3;
  }

  // Cast opset 5- uses string attribute for to type, is not supported for now
@ -805,8 +805,8 @@ class SoftMaxOpSupportChecker : public BaseOpSupportChecker {
  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
                         const OpSupportCheckParams& params) const override;

-  int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
-    return 28;
+  int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
+    return ANEURALNETWORKS_FEATURE_LEVEL_2;
  }
 };

@ -823,12 +823,12 @@ bool SoftMaxOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* i
    return false;
  }

-  if (params.android_sdk_ver < 29) {
+  if (params.android_feature_level < ANEURALNETWORKS_FEATURE_LEVEL_3) {
    NodeAttrHelper helper(node);
    int32_t axis = helper.Get("axis", 1);
    if (axis != 1) {
      LOGS_DEFAULT(VERBOSE)
-          << "SoftMax only support axis 1 on Android API level: " << params.android_sdk_ver
+          << "SoftMax only support axis 1 on Android API level: " << params.android_feature_level
          << " input axis: " << axis;
      return false;
    }
@ -878,7 +878,7 @@ bool GemmOpSupportChecker::HasSupportedInputsImpl(const Node& node) const {
 // Get the bias size (C) of Gemm op
 // ANEURALNETWORKS_FULLY_CONNECTED only supports 1d bias
 // Will test if C of Gemm can be squeezed and return the 1d vector size after squeeze
-static bool GetBiasSize(const Shape& c_shape, int32_t android_sdk_ver, uint32_t& size) {
+static bool GetBiasSize(const Shape& c_shape, int32_t android_feature_level, uint32_t& size) {
  // TODO add support of scalar C for Gemm
  size_t c_dim = c_shape.size();
  if (c_dim == 0) {
@ -886,8 +886,8 @@ static bool GetBiasSize(const Shape& c_shape, int32_t android_sdk_ver, uint32_t&
    return false;
  }

-  if (c_dim != 1 && android_sdk_ver < 28) {
-    LOGS_DEFAULT(VERBOSE) << "C of Gemm can only be 1d tensor for API level " << android_sdk_ver
+  if (c_dim != 1 && android_feature_level < ANEURALNETWORKS_FEATURE_LEVEL_2) {
+    LOGS_DEFAULT(VERBOSE) << "C of Gemm can only be 1d tensor for API level " << android_feature_level
                          << " shape of C, " << Shape2String(c_shape);
    return false;
  }
@ -982,7 +982,7 @@ bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial
        return false;

      uint32_t c_size;
-      if (!GetBiasSize(c_shape, params.android_sdk_ver, c_size))
+      if (!GetBiasSize(c_shape, params.android_feature_level, c_size))
        return false;

      if (c_size != (transB == 0 ? b_shape[1] : b_shape[0])) {
@ -1043,7 +1043,7 @@ class UnaryOpSupportChecker : public BaseOpSupportChecker {
  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
                         const OpSupportCheckParams& params) const override;

-  int32_t GetMinSupportedSdkVer(const Node& node, const OpSupportCheckParams& params) const override;
+  int32_t GetMinSupportedNNAPIFeatureLevel(const Node& node, const OpSupportCheckParams& params) const override;

  bool HasSupportedInputsImpl(const Node& node) const override;

@ -1079,7 +1079,7 @@ bool UnaryOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initia
    return true;
 }

-int32_t UnaryOpSupportChecker::GetMinSupportedSdkVer(
+int32_t UnaryOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(
    const Node& node, const OpSupportCheckParams& /* params */) const {
  const auto& op(node.OpType());
  if (op == "Abs" ||
@ -1088,10 +1088,10 @@ int32_t UnaryOpSupportChecker::GetMinSupportedSdkVer(
      op == "Sin" ||
      op == "Sqrt" ||
      op == "Log") {
-    return 29;
+    return ANEURALNETWORKS_FEATURE_LEVEL_3;
  }

-  return 27;
+  return ANEURALNETWORKS_FEATURE_LEVEL_1;
 }

 bool UnaryOpSupportChecker::HasSupportedInputsImpl(const Node& node) const {
@ -1216,8 +1216,8 @@ class SqueezeOpSupportChecker : public BaseOpSupportChecker {
  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
                         const OpSupportCheckParams& params) const override;

-  int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
-    return 28;
+  int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
+    return ANEURALNETWORKS_FEATURE_LEVEL_2;
  }
 };

@ -1255,8 +1255,8 @@ class QuantizeLinearOpSupportChecker : public BaseOpSupportChecker {
  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
                         const OpSupportCheckParams& params) const override;

-  int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
-    return 29;
+  int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
+    return ANEURALNETWORKS_FEATURE_LEVEL_3;
  }
 };

@ -1296,8 +1296,8 @@ class DequantizeLinearOpSupportChecker : public BaseOpSupportChecker {
  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
                         const OpSupportCheckParams& params) const override;

-  int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
-    return 27;
+  int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
+    return ANEURALNETWORKS_FEATURE_LEVEL_1;
  }
  bool HasSupportedInputsImpl(const Node& node) const override;
 };
@ -1340,8 +1340,8 @@ class LRNOpSupportChecker : public BaseOpSupportChecker {
  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
                         const OpSupportCheckParams& params) const override;

-  int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
-    return 28;
+  int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
+    return ANEURALNETWORKS_FEATURE_LEVEL_2;
  }
 };

@ -1397,7 +1397,7 @@ class ResizeOpSupportChecker : public BaseOpSupportChecker {
  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
                         const OpSupportCheckParams& params) const override;

-  int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& /* params */) const override;
+  int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override;

  // Resize opset 10- is very different than Resize opset 11+, with many key attributes missing
  // We only support Resize opset 11+ here
@ -1445,9 +1445,9 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi
        return false;
      }

-      if (params.android_sdk_ver < 30 && (using_half_pixel || using_align_corners)) {
+      if (params.android_feature_level < 30 && (using_half_pixel || using_align_corners)) {
        LOGS_DEFAULT(VERBOSE) << "Resize bilinear only support half_pixel/align_corners on API level 30+, current API level is "
-                              << params.android_sdk_ver;
+                              << params.android_feature_level;
        return false;
      }
    } else {
@ -1516,7 +1516,7 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi
  return true;
 }

-int32_t ResizeOpSupportChecker::GetMinSupportedSdkVer(const Node& node, const OpSupportCheckParams& /* params */) const {
+int32_t ResizeOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(const Node& node, const OpSupportCheckParams& /* params */) const {
  int32_t input_type;

  // This should not happen, but if it happens make sure this will require an impossible version
@ -1524,9 +1524,9 @@ int32_t ResizeOpSupportChecker::GetMinSupportedSdkVer(const Node& node, const Op
    return std::numeric_limits<int32_t>::max();

  if (input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8)
-    return 29;
+    return ANEURALNETWORKS_FEATURE_LEVEL_3;

-  return 28;
+  return ANEURALNETWORKS_FEATURE_LEVEL_2;
 }

 bool ResizeOpSupportChecker::HasSupportedInputsImpl(const Node& node) const {
@ -1590,8 +1590,8 @@ class MinMaxOpSupportChecker : public BaseOpSupportChecker {
      const std::string& op_type, OpSupportCheckerRegistrations& op_registrations);

 private:
-  int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
-    return 29;
+  int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
+    return ANEURALNETWORKS_FEATURE_LEVEL_3;
  }

  // Min/Max opset 5- uses consumed_inputs attribute which is not supported for now
@ -1625,6 +1625,20 @@ bool MinMaxOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* in

 #pragma endregion

+#pragma region op_elu
+
+class EluOpSupportChecker : public BaseOpSupportChecker {
+ private:
+  int32_t GetMinSupportedNNAPIFeatureLevel(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
+    return ANEURALNETWORKS_FEATURE_LEVEL_4;
+  }
+
+  // Elu opset 5- uses consumed_inputs attribute which is not supported for now
+  int GetMinSupportedOpSet(const Node& /* node */) const override { return 6; }
+};
+
+#pragma endregion
+
 #pragma region CreateGetOpSupportCheckers

 // The reason we use macros to create OpBuilders is for easy exclusion in build if certain op(s) are not used
@ -1710,6 +1724,9 @@ static OpSupportCheckerRegistrations CreateOpSupportCheckerRegistrations() {
    NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Min", MinMaxOpSupportChecker);
    NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Max", MinMaxOpSupportChecker);
  }
+
+  NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Elu", EluOpSupportChecker);
+
  return op_registrations;
 }

--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h
@ -9,12 +9,12 @@ namespace onnxruntime {
 namespace nnapi {

 struct OpSupportCheckParams {
-  OpSupportCheckParams(int32_t android_sdk_ver, bool use_nchw)
-      : android_sdk_ver(android_sdk_ver),
+  OpSupportCheckParams(int32_t android_feature_level, bool use_nchw)
+      : android_feature_level(android_feature_level),
        use_nchw(use_nchw) {
  }

-  int32_t android_sdk_ver = 0;
+  int32_t android_feature_level = 0;
  bool use_nchw = false;
 };

--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/model.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/model.cc
@ -87,8 +87,8 @@ size_t Model::GetMappedOutputIdx(const std::string& name) const {
 }

 bool Model::SupportsDynamicOutputShape() const {
-  // dynamic output shape is only supported on Android API level 29+
-  return GetAndroidSdkVer() >= 29 && dynamic_output_buffer_size_ > 0;
+  // dynamic output shape is only supported on Android API level 29+ (ANEURALNETWORKS_FEATURE_LEVEL_3)
+  return GetNNAPIFeatureLevel() >= ANEURALNETWORKS_FEATURE_LEVEL_3 && dynamic_output_buffer_size_ > 0;
 }

 Status Model::PrepareForExecution(std::unique_ptr<Execution>& execution) {
@ -103,8 +103,8 @@ Status Model::PrepareForExecution(std::unique_ptr<Execution>& execution) {
  return Status::OK();
 }

-int32_t Model::GetAndroidSdkVer() const {
-  return nnapi_ ? nnapi_->android_sdk_version : 0;
+int32_t Model::GetNNAPIFeatureLevel() const {
+  return nnapi_ ? nnapi_->nnapi_runtime_feature_level : 0;
 }

 #pragma region Model::NNMemory
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h
@ -145,7 +145,7 @@ class Model {

  void SetShaper(const Shaper shaper) { shaper_ = shaper; }

-  int32_t GetAndroidSdkVer() const;
+  int32_t GetNNAPIFeatureLevel() const;
 };

 class Execution {
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc
@ -63,16 +63,16 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view
  // If we are actually running on Android system, we can get the API level by querying the system
  // However, since we also allow the NNAPI EP run GetCapability for model conversion on a non-Android system,
  // since we cannot get the runtime system API level, we have to specify it using complie definition.
-  int32_t android_sdk_ver;
+  int32_t android_feature_level;
 #ifdef __ANDROID__
  const auto* _nnapi = NnApiImplementation();
-  android_sdk_ver = _nnapi->android_sdk_version;
+  android_feature_level = _nnapi->nnapi_runtime_feature_level;
 #else
-  android_sdk_ver = ORT_NNAPI_MAX_SUPPORTED_API_LEVEL;
+  android_feature_level = ORT_NNAPI_MAX_SUPPORTED_API_LEVEL;
 #endif

  nnapi::OpSupportCheckParams params{
-      android_sdk_ver,
+      android_feature_level,
      !!(nnapi_flags_ & NNAPI_FLAG_USE_NCHW),
  };
  const auto supported_nodes_vector = GetSupportedNodes(graph_view, params);
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksTypes.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksTypes.h
@ -1,11 +1,8 @@
 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
    http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -49,6 +46,8 @@ enum {
  ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL = 11,
  ANEURALNETWORKS_TENSOR_QUANT16_ASYMM = 12,
  ANEURALNETWORKS_TENSOR_QUANT8_SYMM = 13,
+  ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED = 14,
+  ANEURALNETWORKS_MODEL = 15,
 };

 /**
@ -140,6 +139,13 @@ enum {
  ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM = 92,
  ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN = 93,
  ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR = 94,
+  ANEURALNETWORKS_QUANTIZED_LSTM = 95,
+  ANEURALNETWORKS_IF = 96,
+  ANEURALNETWORKS_WHILE = 97,
+  ANEURALNETWORKS_ELU = 98,
+  ANEURALNETWORKS_HARD_SWISH = 99,
+  ANEURALNETWORKS_FILL = 100,
+  ANEURALNETWORKS_RANK = 101,
 };

 /**
@ -212,6 +218,46 @@ enum {
  ANEURALNETWORKS_DEVICE_ACCELERATOR = 4,
 };

+/**
+ * Relative execution priority.
+ *
+ * Available since API level 30.
+ */
+enum {
+  ANEURALNETWORKS_PRIORITY_LOW = 90,
+  ANEURALNETWORKS_PRIORITY_MEDIUM = 100,
+  ANEURALNETWORKS_PRIORITY_HIGH = 110,
+  ANEURALNETWORKS_PRIORITY_DEFAULT = ANEURALNETWORKS_PRIORITY_MEDIUM,
+};
+
+/**
+ * NNAPI feature levels.
+ *
+ * Each update of the NNAPI specification yields a new NNAPI feature level enum
+ * value. NNAPI feature level corrseponds to an NNAPI specification version that
+ * a driver and/or the NNAPI runtime can implement.
+ */
+enum {
+  /** NNAPI specification available in Android O-MR1, Android NNAPI feature
+     level 1 */
+  ANEURALNETWORKS_FEATURE_LEVEL_1 = 27,
+  /** NNAPI specification available in Android P, Android NNAPI feature level 2
+   */
+  ANEURALNETWORKS_FEATURE_LEVEL_2 = 28,
+  /** NNAPI specification available in Android Q, Android NNAPI feature level 3
+   */
+  ANEURALNETWORKS_FEATURE_LEVEL_3 = 29,
+  /** NNAPI specification available in Android R, Android NNAPI feature level 4
+   */
+  ANEURALNETWORKS_FEATURE_LEVEL_4 = 30,
+  /**
+   * NNAPI specification available in Android S, Android NNAPI feature level 5.
+   * After Android S, the NNAPI specification can be updated between Android
+   * API releases.
+   */
+  ANEURALNETWORKS_FEATURE_LEVEL_5 = 31,
+};
+
 /**
 * For ANeuralNetworksModel_setOperandValue,
 * values with a length smaller or equal to this
@ -223,6 +269,51 @@ enum {
  ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES = 128
 };

+/**
+ * ANeuralNetworksMemoryDesc is an opaque type that represents a memory
+ * descriptor.
+ *
+ * A memory descriptor describes the properties of a memory object, and is used
+ * by
+ * {@link ANeuralNetworksMemory_createFromDesc}.
+ *
+ * To use:
+ *   - Create a new memory descriptor by calling
+ *     {@link ANeuralNetworksMemoryDesc_create}.
+ *   - Specify all of the intended input and output roles by calling
+ *     {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ *     {@link ANeuralNetworksMemoryDesc_addOutputRole}.
+ *   - Optionally, specify the memory dimensions by calling
+ *     {@link ANeuralNetworksMemoryDesc_setDimensions}.
+ *   - Complete the memory descriptor with {@link
+ * ANeuralNetworksMemoryDesc_finish}.
+ *   - Use the memory descriptor as many times as needed with
+ *     {@link ANeuralNetworksMemory_createFromDesc}.
+ *   - Destroy the memory descriptor with {@link
+ * ANeuralNetworksMemoryDesc_free}.
+ *
+ * A memory descriptor is completed by calling {@link
+ * ANeuralNetworksMemoryDesc_finish}. A memory descriptor is destroyed by
+ * calling {@link ANeuralNetworksMemoryDesc_free}.
+ *
+ * A memory descriptor must not be modified once
+ * {@link ANeuralNetworksMemoryDesc_finish}
+ * has been called on it.
+ *
+ * It is the application's responsibility to make sure that only
+ * one thread modifies a memory descriptor at a given time. It is however
+ * safe for more than one thread to use the memory descriptor once
+ * {@link ANeuralNetworksMemoryDesc_finish} has returned.
+ *
+ * It is also the application's responsibility to ensure that there are no other
+ * uses of the memory descriptor after calling {@link
+ * ANeuralNetworksMemoryDesc_free}. It is however safe to continue using a
+ * {@link ANeuralNetworksMemory} object created from the memory descriptor.
+ *
+ * Available since API level 30.
+ */
+typedef struct ANeuralNetworksMemoryDesc ANeuralNetworksMemoryDesc;
+
 /**
 * ANeuralNetworksMemory is an opaque type that represents memory.
 *
@ -536,9 +627,21 @@ typedef int (*ANeuralNetworksCompilation_setCaching_fn)(
    ANeuralNetworksCompilation* compilation, const char* cacheDir,
    const uint8_t* token);

+typedef int (*ANeuralNetworksCompilation_setTimeout_fn)(
+    ANeuralNetworksCompilation* compilation, uint64_t duration);
+
+typedef int (*ANeuralNetworksCompilation_setPriority_fn)(
+    ANeuralNetworksCompilation* compilation, int priority);
+
 typedef int (*ANeuralNetworksExecution_compute_fn)(
    ANeuralNetworksExecution* execution);

+typedef int (*ANeuralNetworksExecution_setTimeout_fn)(
+    ANeuralNetworksExecution* execution, uint64_t duration);
+
+typedef int (*ANeuralNetworksExecution_setLoopTimeout_fn)(
+    ANeuralNetworksExecution* execution, uint64_t duration);
+
 typedef int (*ANeuralNetworksExecution_getOutputOperandRank_fn)(
    ANeuralNetworksExecution* execution, int32_t index, uint32_t* rank);

@ -566,6 +669,26 @@ typedef enum {
  // such as that of the runtime itself and the IPC needed for the runtime to
  // communicate with the driver.
  ANEURALNETWORKS_DURATION_IN_DRIVER = 1,
+  // Execution time on hardware, after all dependencies have been signaled.
+  // If no dependencies specified (for example, if the execution was scheduled
+  // other
+  // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}),
+  // the
+  // reported time will be the same as ANEURALNETWORKS_DURATION_ON_HARDWARE.
+  // Available since API level 30.
+  ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE = 2,
+  // Execution time in driver, after all dependencies have been signaled.
+  // Excludes
+  // overhead such as that of the runtime itself and the IPC needed for the
+  // runtime
+  // to communicate with the driver.
+  // If no dependencies specified (for example, if the execution was scheduled
+  // other
+  // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}),
+  // the
+  // reported time will be the same as ANEURALNETWORKS_DURATION_IN_DRIVER.
+  // Available since API level 30.
+  ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER = 3,
 } DurationCode;

 typedef int (*ANeuralNetworksExecution_getDuration_fn)(
@ -588,4 +711,50 @@ typedef int (*ANeuralNetworksModel_setOperandExtensionData_fn)(
    ANeuralNetworksModel* model, int32_t index, const void* data,
    size_t length);

+typedef int (*ANeuralNetworksMemoryDesc_create_fn)(
+    ANeuralNetworksMemoryDesc** desc);
+
+typedef void (*ANeuralNetworksMemoryDesc_free_fn)(
+    ANeuralNetworksMemoryDesc* desc);
+
+typedef int (*ANeuralNetworksMemoryDesc_addInputRole_fn)(
+    ANeuralNetworksMemoryDesc* desc,
+    const ANeuralNetworksCompilation* compilation, uint32_t index,
+    float frequency);
+
+typedef int (*ANeuralNetworksMemoryDesc_addOutputRole_fn)(
+    ANeuralNetworksMemoryDesc* desc,
+    const ANeuralNetworksCompilation* compilation, uint32_t index,
+    float frequency);
+
+typedef int (*ANeuralNetworksMemoryDesc_setDimensions_fn)(
+    ANeuralNetworksMemoryDesc* desc, uint32_t rank, const uint32_t* dimensions);
+
+typedef int (*ANeuralNetworksMemoryDesc_finish_fn)(
+    ANeuralNetworksMemoryDesc* desc);
+
+typedef int (*ANeuralNetworksMemory_createFromDesc_fn)(
+    const ANeuralNetworksMemoryDesc* desc, ANeuralNetworksMemory** memory);
+
+typedef int (*ANeuralNetworksMemory_copy_fn)(const ANeuralNetworksMemory* src,
+                                             const ANeuralNetworksMemory* dst);
+
+typedef int (*ANeuralNetworksEvent_createFromSyncFenceFd_fn)(
+    int sync_fence_fd, ANeuralNetworksEvent** event);
+
+typedef int (*ANeuralNetworksEvent_getSyncFenceFd_fn)(
+    const ANeuralNetworksEvent* event, int* sync_fence_fd);
+
+typedef int (*ANeuralNetworksExecution_startComputeWithDependencies_fn)(
+    ANeuralNetworksExecution* execution,
+    const ANeuralNetworksEvent* const* dependencies, uint32_t num_dependencies,
+    uint64_t duration, ANeuralNetworksEvent** event);
+
+typedef int (*ANeuralNetworksExecution_enableInputAndOutputPadding_fn)(
+    ANeuralNetworksExecution* execution, bool enable);
+
+typedef int (*ANeuralNetworksExecution_setReusable_fn)(
+    ANeuralNetworksExecution* execution, bool reusable);
+
+typedef int64_t (*ANeuralNetworks_getRuntimeFeatureLevel_fn)();
 #endif  // TENSORFLOW_LITE_NNAPI_NEURALNETWORKSTYPES_H_
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.cc
@ -71,6 +71,7 @@ size_t OperandType::GetElementByteSize() const {
      element_size = 1;
      break;
    case Type::TENSOR_FLOAT16:
+    case Type::FLOAT16:
      element_size = 2;
      break;
    case Type::TENSOR_FLOAT32:
@ -80,15 +81,12 @@ size_t OperandType::GetElementByteSize() const {
    case Type::TENSOR_INT32:
      element_size = 4;
      break;
+    case Type::TENSOR_QUANT8_ASYMM:
+    case Type::TENSOR_QUANT8_ASYMM_SIGNED:
    case Type::TENSOR_QUANT8_SYMM_PER_CHANNEL:
      element_size = 1;
      break;
-    case Type::TENSOR_QUANT8_ASYMM:
-      element_size = 1;
-      break;
    case Type::TENSOR_QUANT16_SYMM:
-      element_size = 2;
-      break;
    case Type::TENSOR_QUANT16_ASYMM:
      element_size = 2;
      break;
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h
@ -41,6 +41,7 @@ enum class Type {
  FLOAT16 = ANEURALNETWORKS_FLOAT16,
  TENSOR_QUANT8_SYMM_PER_CHANNEL = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL,
  TENSOR_QUANT16_ASYMM = ANEURALNETWORKS_TENSOR_QUANT16_ASYMM,
+  TENSOR_QUANT8_ASYMM_SIGNED = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED,
 };

 enum class ExecutePreference {
@ -87,10 +88,10 @@ inline std::string TypeToStr(const Type& type) {
    return "TENSOR_BOOL8";
  } else if (type == Type::FLOAT16) {
    return "FLOAT16";
-  } else if (type == Type::FLOAT16) {
-    return "FLOAT16";
  } else if (type == Type::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
    return "TENSOR_QUANT8_SYMM_PER_CHANNEL";
+  } else if (type == Type::TENSOR_QUANT8_ASYMM_SIGNED) {
+    return "TENSOR_QUANT8_ASYMM_SIGNED";
  } else {
    return "Unknown type";
  }
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.cc
@ -1,11 +1,8 @@
 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
    http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -20,8 +17,11 @@ limitations under the License.
 #include <sys/stat.h>
 #include <unistd.h>

+#include <algorithm>
 #include <cstdlib>

+#include "NeuralNetworksTypes.h"
+
 #ifdef __ANDROID__
 #include <sys/system_properties.h>
 #endif  // __ANDROID__
@ -45,19 +45,6 @@ int32_t GetAndroidSdkVersion() {
      }
      result = result * 10 + digit;
    }
-    // TODO(levp): remove once SDK gets updated to 29th level
-    // Upgrade SDK version for pre-release Q to be able to test functionality
-    // available from SDK level 29.
-    if (result == 28) {
-      char versionCodename[PROP_VALUE_MAX];
-      const char* versionCodenameProp = "ro.build.version.codename";
-      length = __system_property_get(versionCodenameProp, versionCodename);
-      if (length != 0) {
-        if (versionCodename[0] == 'Q') {
-          return 29;
-        }
-      }
-    }
    return result;
  }
  return 0;
@ -78,7 +65,14 @@ void* LoadFunction(void* handle, const char* name, bool optional) {
 #ifndef __ANDROID__
 // Add /dev/shm implementation of shared memory for non-Android platforms
 int ASharedMemory_create(const char* name, size_t size) {
-  int fd = shm_open(name, O_RDWR | O_CREAT, 0644);
+  // Each call to ASharedMemory_create produces a unique memory space, hence
+  // name should be unique, otherwise two calls to create memory regions using
+  // the same 'name', will collide.
+  // Caller is responsible to provide a unique name.
+
+  // Make sure new shared memory region is created: shm_open return an error if
+  // shm object with given name already exists (O_CREAT | O_EXCL)
+  int fd = shm_open(name, O_RDWR | O_CREAT | O_EXCL, 0644);
  if (fd < 0) {
    return fd;
  }
@ -89,6 +83,63 @@ int ASharedMemory_create(const char* name, size_t size) {
  }
  return fd;
 }
+
+// Determine the NnApi version from loaded entry points
+uint32_t CalculateAndroidSdkVersion(NnApi const& nnapi) {
+  // Test for specific NNAPI 1.0, 1.1, 1.2 and 1.3 functions
+  bool has_10 = nnapi.ANeuralNetworksMemory_createFromFd != nullptr;
+  bool has_11 =
+      nnapi.ANeuralNetworksModel_relaxComputationFloat32toFloat16 != nullptr;
+  bool has_12 = nnapi.ANeuralNetworks_getDeviceCount != nullptr;
+  bool has_13 = nnapi.ANeuralNetworksCompilation_setTimeout != nullptr;
+  bool has_14 = nnapi.ANeuralNetworks_getRuntimeFeatureLevel != nullptr;
+
+  uint32_t sdk_version = 0;
+  if (has_10) {
+    sdk_version = 27;
+  }
+  if (sdk_version == 27 && has_11) {
+    sdk_version = 28;
+  }
+  if (sdk_version == 28 && has_12) {
+    sdk_version = 29;
+  }
+  if (sdk_version == 29 && has_13) {
+    sdk_version = 30;
+  }
+  if (sdk_version == 30 && has_14) {
+    sdk_version = 31;
+  }
+  return sdk_version;
+}
+#else
+
+ASharedMemory_create_fn getASharedMemory_create() {
+  // ASharedMemory_create has different implementations in Android depending on
+  // the partition. Generally it can be loaded from libandroid.so but in vendor
+  // partition (e.g. if a HAL wants to use NNAPI) it is only accessible through
+  // libcutils.
+  void* libandroid = nullptr;
+  libandroid = dlopen("libandroid.so", RTLD_LAZY | RTLD_LOCAL);
+  if (libandroid != nullptr) {
+    return reinterpret_cast<ASharedMemory_create_fn>(
+        LoadFunction(libandroid, "ASharedMemory_create", false));
+  }
+
+  std::string libandroid_error = dlerror();
+  void* cutils_handle = dlopen("libcutils.so", RTLD_LAZY | RTLD_LOCAL);
+  if (cutils_handle != nullptr) {
+    return reinterpret_cast<ASharedMemory_create_fn>(
+        LoadFunction(cutils_handle, "ashmem_create_region", false));
+  }
+
+  NNAPI_LOG(
+      "nnapi error: unable to open both library %s (%s) and library %s "
+      "(%s)",
+      "libandroid.so", libandroid_error.c_str(), "libcutils.so", dlerror());
+  return nullptr;
+}
+
 #endif  // __ANDROID__

 #define LOAD_FUNCTION(handle, name)         \
@ -120,10 +171,22 @@ const NnApi LoadNnApi() {
  void* libneuralnetworks = nullptr;
  // TODO(b/123243014): change RTLD_LOCAL? Assumes there can be multiple
  // instances of nn api RT
-  libneuralnetworks = dlopen("libneuralnetworks.so", RTLD_LAZY | RTLD_LOCAL);
+  static const char nnapi_library_name[] = "libneuralnetworks.so";
+  libneuralnetworks = dlopen(nnapi_library_name, RTLD_LAZY | RTLD_LOCAL);
+#ifdef __ANDROID__
+  // Note: If there is an problem trying to open the NNAPI library on a
+  // non-Android system, the error message is suppressed. This is to avoid
+  // showing confusing errors when running in environments that do not support
+  // NNAPI. As more platforms support NNAPI, the #ifdef logic above can be
+  // expanded.
  if (libneuralnetworks == nullptr) {
-    NNAPI_LOG("nnapi error: unable to open library %s", "libneuralnetworks.so");
+    const char* error = dlerror();
+    if (error) {
+      NNAPI_LOG("%s\n", error);
+    }
+    NNAPI_LOG("nnapi error: unable to open library %s", nnapi_library_name);
  }
+#endif  // __ANDROID__

  nnapi.nnapi_exists = libneuralnetworks != nullptr;

@ -158,25 +221,8 @@ const NnApi LoadNnApi() {
  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksEvent_wait);
  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksEvent_free);

-  // ASharedMemory_create has different implementations in Android depending on
-  // the partition. Generally it can be loaded from libandroid.so but in vendor
-  // partition (e.g. if a HAL wants to use NNAPI) it is only accessible through
-  // libcutils.
 #ifdef __ANDROID__
-  void* libandroid = nullptr;
-  libandroid = dlopen("libandroid.so", RTLD_LAZY | RTLD_LOCAL);
-  if (libandroid != nullptr) {
-    LOAD_FUNCTION(libandroid, ASharedMemory_create);
-  } else {
-    void* cutils_handle = dlopen("libcutils.so", RTLD_LAZY | RTLD_LOCAL);
-    if (cutils_handle != nullptr) {
-      LOAD_FUNCTION_RENAME(cutils_handle, ASharedMemory_create,
-                           "ashmem_create_region");
-    } else {
-      NNAPI_LOG("nnapi error: unable to open neither libraries %s and %s",
-                "libandroid.so", "libcutils.so");
-    }
-  }
+  nnapi.ASharedMemory_create = getASharedMemory_create();
 #else
  // Mock ASharedMemory_create only if libneuralnetworks.so was successfully
  // loaded. This ensures identical behaviour on platforms which use this
@ -228,6 +274,58 @@ const NnApi LoadNnApi() {
                         ANeuralNetworksModel_getExtensionOperationType);
  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
                         ANeuralNetworksModel_setOperandExtensionData);
+
+  // API 30 (NNAPI 1.3) methods.
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+                         ANeuralNetworksCompilation_setTimeout);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+                         ANeuralNetworksCompilation_setPriority);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+                         ANeuralNetworksExecution_setTimeout);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+                         ANeuralNetworksExecution_setLoopTimeout);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks, ANeuralNetworksMemoryDesc_create);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks, ANeuralNetworksMemoryDesc_free);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+                         ANeuralNetworksMemoryDesc_addInputRole);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+                         ANeuralNetworksMemoryDesc_addOutputRole);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+                         ANeuralNetworksMemoryDesc_setDimensions);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks, ANeuralNetworksMemoryDesc_finish);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+                         ANeuralNetworksMemory_createFromDesc);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks, ANeuralNetworksMemory_copy);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+                         ANeuralNetworksEvent_createFromSyncFenceFd);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+                         ANeuralNetworksEvent_getSyncFenceFd);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+                         ANeuralNetworksExecution_startComputeWithDependencies);
+
+  // API 31 methods
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+                         ANeuralNetworks_getRuntimeFeatureLevel);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+                         ANeuralNetworksExecution_enableInputAndOutputPadding);
+  LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+                         ANeuralNetworksExecution_setReusable);
+#ifndef __ANDROID__
+  // If libneuralnetworks.so is loaded, but android_sdk_version is not set,
+  // then determine android_sdk_version by testing which functions are
+  // available.
+  if (nnapi.nnapi_exists && nnapi.android_sdk_version == 0) {
+    nnapi.android_sdk_version = CalculateAndroidSdkVersion(nnapi);
+  }
+#endif  // __ANDROID__
+  // Determin NNAPI Runtime feature level.
+  if (nnapi.ANeuralNetworks_getRuntimeFeatureLevel) {
+    nnapi.nnapi_runtime_feature_level =
+        nnapi.ANeuralNetworks_getRuntimeFeatureLevel();
+  } else {
+    nnapi.nnapi_runtime_feature_level = nnapi.android_sdk_version;
+  }
+
  return nnapi;
 }

--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.h
@ -1,11 +1,8 @@
 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
    http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -19,11 +16,18 @@ limitations under the License.
 #include <stdio.h>
 #include <stdlib.h>

+#include <memory>
+
 #include "NeuralNetworksTypes.h"

 struct NnApi {
  bool nnapi_exists;
  int32_t android_sdk_version;
+  // NNAPI feature level should be used when deciding which NNAPI feature to
+  // use, as feature levels after Android API level 31 have no association with
+  // API level because the NNAPI specification can be updated between Android
+  // API releases.
+  int64_t nnapi_runtime_feature_level;

  /**
   * Creates a shared memory object from a file descriptor.
@ -272,6 +276,7 @@ struct NnApi {
   *
   * See {@link ANeuralNetworksModel} for information on multithreaded usage.
   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
   */
  int (*ANeuralNetworksModel_identifyInputsAndOutputs)(
      ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs,
@ -298,6 +303,8 @@ struct NnApi {
   * Available since API level 28.
   *
   * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
   */
  int (*ANeuralNetworksModel_relaxComputationFloat32toFloat16)(
      ANeuralNetworksModel* model, bool allow);
@ -789,6 +796,76 @@ struct NnApi {
      ANeuralNetworksCompilation* compilation, const char* cacheDir,
      const uint8_t* token);

+  /**
+   * Set the maximum expected duration for compiling the model.
+   *
+   * If the device is not able to complete the compilation within the specified
+   * duration, the compilation may be aborted. The timeout duration begins at
+   * the call to {@link ANeuralNetworksCompilation_finish}.
+   *
+   * This timeout duration acts as a hint to drivers, and can be used to both
+   * free up compute resources within the driver and return control back to the
+   * application quicker than is possible without the hint. It enables drivers
+   * that are able to estimate how long a compilation will take to abort the
+   * compilation before it has even started if the driver believes the
+   * compilation cannot be completed within the timeout duration. Similarly, it
+   * enables drivers to abort an ongoing compilation if it is taking too long.
+   * However, this call does not guarantee that the compilation will complete or
+   * abort within the timeout duration.
+   *
+   * By default (i.e., unless ANeuralNetworksCompilation_setTimeout is called),
+   * the timeout duration for compiling the model is considered infinite.
+   *
+   * The {@link ANeuralNetworksCompilation} must have been created with
+   * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
+   * otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the
+   * device has a feature level reported by
+   * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then
+   * the timeout duration hint will be ignored.
+   *
+   * See {@link ANeuralNetworksCompilation} for information on multithreaded
+   * usage.
+   *
+   * @param compilation The compilation to be modified.
+   * @param duration The maximum amount of time in nanoseconds that is expected
+   * to be spent finishing a compilation. If this duration is exceeded, the
+   *     compilation may be aborted. If set to 0, the timeout duration is
+   *     considered infinite.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   *
+   * Available since API level 30.
+   */
+  int (*ANeuralNetworksCompilation_setTimeout)(
+      ANeuralNetworksCompilation* compilation, uint64_t duration);
+
+  /**
+   * Set the execution priority.
+   *
+   * Execution priorities are relative to other executions created by the same
+   * application (specifically same uid) for the same device. Specifically,
+   * priorities of executions from one application will not affect executions
+   * from another application. Similarly, priorities of executions on one device
+   * will not affect executions on another device.
+   *
+   * Higher priority executions may use more compute resources than lower
+   * priority executions, and may preempt or starve lower priority executions.
+   *
+   * See {@link ANeuralNetworksCompilation} for information on multithreaded
+   * usage.
+   *
+   * Available since API level 30.
+   *
+   * @param compilation The compilation to be modified.
+   * @param priority The relative priority of the execution compared to other
+   *     executions created by the application. Must be one of
+   *     ANEURALNETWORKS_PRIORITY_*.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksCompilation_setPriority)(
+      ANeuralNetworksCompilation* compilation, int priority);
+
  /**
   * Schedule synchronous evaluation of the execution.
   *
@ -813,6 +890,84 @@ struct NnApi {
   */
  int (*ANeuralNetworksExecution_compute)(ANeuralNetworksExecution* execution);

+  /**
+   * Set the maximum expected duration of the specified execution.
+   *
+   * If the device is not able to complete the execution within the specified
+   * duration, the execution may be aborted. The timeout duration begins at a
+   * call to one of:
+   * - {@link ANeuralNetworksExecution_burstCompute}
+   * - {@link ANeuralNetworksExecution_compute}
+   * - {@link ANeuralNetworksExecution_startCompute}
+   * - {@link ANeuralNetworksExecution_startComputeWithDependencies}
+   *
+   * This timeout duration acts as a hint to drivers, and can be used to both
+   * free up compute resources within the driver and return control back to the
+   * application quicker than is possible without the hint. It enables drivers
+   * that are able to estimate how long an execution will take to abort the
+   * execution before it has even started if the driver believes the execution
+   * cannot be completed within the timeout duration. Similarly, it enables
+   * drivers to abort an ongoing execution if it is taking too long. However,
+   * this call does not guarantee that the execution will complete or abort
+   * within the timeout duration.
+   *
+   * By default (i.e., unless ANeuralNetworksExecution_setTimeout is called),
+   * the timeout duration for execution is considered infinite.
+   *
+   * The {@link ANeuralNetworksExecution} must have been created from an
+   * {@link ANeuralNetworksCompilation} which in turn was created from
+   * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
+   * otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the
+   * device has a feature level reported by
+   * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then
+   * the timeout duration hint will be ignored.
+   *
+   * See {@link ANeuralNetworksExecution} for information on multithreaded
+   * usage.
+   *
+   * @param execution The execution to be modified.
+   * @param duration The maximum amount of time in nanoseconds that is expected
+   * to be spent executing a model. If this duration is exceeded, the execution
+   *     may be aborted. If set to 0, the timeout duration is considered
+   * infinite.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   *
+   * Available since API level 30.
+   */
+  int (*ANeuralNetworksExecution_setTimeout)(
+      ANeuralNetworksExecution* execution, uint64_t duration);
+
+  /**
+   * Set the maximum duration of WHILE loops in the specified execution.
+   *
+   * This is a fuzzy per-loop timeout intended to prevent infinite loops.
+   *
+   * If a WHILE loop condition model does not output false within the specified
+   * duration, the execution will be aborted.
+   *
+   * See {@link ANeuralNetworks_getDefaultLoopTimeout} and
+   * {@link ANeuralNetworks_getMaximumLoopTimeout} for the default
+   * and maximum timeout values.
+   *
+   * See {@link ANeuralNetworksExecution} for information on multithreaded
+   * usage.
+   *
+   * @param execution The execution to be modified.
+   * @param duration The maximum amount of time in nanoseconds that can be spent
+   *     executing a WHILE loop. If the specified duration value exceeds the
+   * value produced by {@link ANeuralNetworks_getMaximumLoopTimeout}, it will be
+   *     overridden by that value.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   *         ANEURALNETWORKS_BAD_STATE if execution has started.
+   *         ANEURALNETWORKS_UNEXPECTED_NULL if execution is NULL.
+   *
+   * Available since API level 30.
+   */
+  int (*ANeuralNetworksExecution_setLoopTimeout)(
+      ANeuralNetworksExecution* execution, uint64_t duration);
+
  /**
   * Get the dimensional information of the specified output operand of the
   * model of the
@ -1077,7 +1232,555 @@ struct NnApi {
      ANeuralNetworksModel* model, int32_t index, const void* data,
      size_t length);

-  /**/
+  /**
+   * Create a {@link ANeuralNetworksMemoryDesc} with no properties.
+   *
+   * This only creates the memory descriptor. Its properties should be set with
+   * calls to
+   * {@link ANeuralNetworksMemoryDesc_addInputRole},
+   * {@link ANeuralNetworksMemoryDesc_addOutputRole}, and
+   * {@link ANeuralNetworksMemoryDesc_setDimensions}.
+   *
+   * {@link ANeuralNetworksMemoryDesc_finish} must be called once all properties
+   * have been set.
+   *
+   * {@link ANeuralNetworksMemoryDesc_free} must be called once the memory
+   * descriptor is no longer needed.
+   *
+   * Available since API level 30.
+   *
+   * @param desc The {@link ANeuralNetworksMemoryDesc} to be created.
+   *             Set to NULL if unsuccessful.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksMemoryDesc_create)(ANeuralNetworksMemoryDesc** desc);
+
+  /**
+   * Destroy a memory descriptor.
+   *
+   * The memory descriptor need not have been finished by a call to
+   * {@link ANeuralNetworksMemoryDesc_finish}.
+   *
+   * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded
+   * usage.
+   *
+   * Available since API level 30.
+   *
+   * @param desc The memory descriptor to be destroyed. Passing NULL is
+   * acceptable and results in no operation.
+   */
+  void (*ANeuralNetworksMemoryDesc_free)(ANeuralNetworksMemoryDesc* desc);
+
+  /**
+   * Specify that a memory object will be playing the role of an input to an
+   * execution created from a particular compilation.
+   *
+   * The compilation and the input index fully specify an input operand. This
+   * function may be invoked multiple times on the same memory descriptor with
+   * different input operands, and the same input operand may be specified on
+   * multiple memory descriptors. However, specifying the same input operand on
+   * the same memory descriptor more than once will return an error.
+   *
+   * The dimensions of the corresponding model operands of all the roles
+   * specified by
+   * {@link ANeuralNetworksMemoryDesc_addInputRole} and
+   * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be compatible with
+   * each other. Two dimensions are incompatible if both ranks are fully
+   * specified but have different values, or if there is at least one axis that
+   * is fully specified in both but has different values.
+   *
+   * At least one of {@link ANeuralNetworksMemoryDesc_addInputRole} and
+   * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be called on a memory
+   * descriptor before invoking {@link ANeuralNetworksMemoryDesc_finish}.
+   *
+   * Attempting to modify a memory descriptor once
+   * {@link ANeuralNetworksMemoryDesc_finish} has been called will return an
+   * error.
+   *
+   * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded
+   * usage.
+   *
+   * Available since API level 30.
+   *
+   * @param desc The memory descriptor to be modified.
+   * @param compilation The compilation object. It must already have been
+   * finished by calling {@link ANeuralNetworksCompilation_finish}, and must
+   * outlive the memory descriptor.
+   * @param index The index of the input argument we are referencing from the
+   * compilation. It is an index into the inputs list passed to
+   *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is
+   * not the index associated with {@link ANeuralNetworksModel_addOperand}.
+   * @param frequency A floating-point value within the range (0.0, 1.0].
+   * Describes how likely the memory is to be used in the specified role. This
+   * is provided as a hint to optimize the case when different roles prefer
+   * different memory locations or data layouts.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksMemoryDesc_addInputRole)(
+      ANeuralNetworksMemoryDesc* desc,
+      const ANeuralNetworksCompilation* compilation, uint32_t index,
+      float frequency);
+
+  /**
+   * Specify that a memory object will be playing the role of an output to an
+   * execution created from a particular compilation.
+   *
+   * The compilation and the output index fully specify an output operand. This
+   * function may be invoked multiple times on the same memory descriptor with
+   * different output operands, and the same output operand may be specified on
+   * multiple memory descriptors. However, specifying the same output operand on
+   * the same memory descriptor object more than once will return an error.
+   *
+   * The dimensions of the corresponding model operands of all the roles
+   * specified by
+   * {@link ANeuralNetworksMemoryDesc_addInputRole} and
+   * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be compatible with
+   * each other. Two dimensions are incompatible if both ranks are fully
+   * specified but have different values, or if there is at least one axis that
+   * is fully specified in both but has different values.
+   *
+   * At least one of {@link ANeuralNetworksMemoryDesc_addInputRole} and
+   * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be called on the
+   * memory descriptor before invoking {@link ANeuralNetworksMemoryDesc_finish}.
+   *
+   * Attempting to modify a memory descriptor once
+   * {@link ANeuralNetworksMemoryDesc_finish} has been called will return an
+   * error.
+   *
+   * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded
+   * usage.
+   *
+   * Available since API level 30.
+   *
+   * @param desc The memory descriptor to be modified.
+   * @param compilation The compilation object. It must already have been
+   * finished by calling {@link ANeuralNetworksCompilation_finish}, and must
+   * outlive the memory descriptor.
+   * @param index The index of the output argument we are referencing from the
+   *              compilation. It is an index into the outputs list passed to
+   *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is
+   * not the index associated with {@link ANeuralNetworksModel_addOperand}.
+   * @param frequency A floating-point value within the range (0.0, 1.0].
+   * Describes how likely the memory is to be used in the specified role. This
+   * is provided as a hint to optimize the case when multiple roles prefer
+   * different memory locations or data layouts.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksMemoryDesc_addOutputRole)(
+      ANeuralNetworksMemoryDesc* desc,
+      const ANeuralNetworksCompilation* compilation, uint32_t index,
+      float frequency);
+
+  /**
+   * Set the dimensional information of the memory descriptor.
+   *
+   * The specified dimensions must be compatible with the dimensions of the
+   * corresponding model operands of all the roles specified by
+   * {@link ANeuralNetworksMemoryDesc_addInputRole} and
+   * {@link ANeuralNetworksMemoryDesc_addOutputRole}. Two dimensions are
+   * incompatible if both ranks are fully specified but have different values,
+   * or if there is at least one axis that is fully specified in both but has
+   * different values.
+   *
+   * Attempting to modify a memory descriptor once
+   * {@link ANeuralNetworksMemoryDesc_finish} has been called will return an
+   * error.
+   *
+   * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded
+   * usage.
+   *
+   * Available since API level 30.
+   *
+   * @param desc The memory descriptor to be modified.
+   * @param rank The number of dimensions. Must be 0 for scalars.
+   * @param dimensions An array of dimensions. An entry with the value 0
+   * indicates that the corresponding axis has an unknown size.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksMemoryDesc_setDimensions)(
+      ANeuralNetworksMemoryDesc* desc, uint32_t rank,
+      const uint32_t* dimensions);
+
+  /**
+   * Indicate that we have finished modifying a memory descriptor. Required
+   * before calling
+   * {@link ANeuralNetworksMemory_createFromDesc}.
+   *
+   * This function must only be called once for a given memory descriptor.
+   *
+   * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded
+   * usage.
+   *
+   * Available since API level 30.
+   *
+   * @param desc The memory descriptor to be finished.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksMemoryDesc_finish)(ANeuralNetworksMemoryDesc* desc);
+
+  /**
+   * Creates a memory object from a memory descriptor.
+   *
+   * The memory object is created with an uninitialized buffer. A memory object
+   * with an uninitialized buffer may only be used according to the roles
+   * specified by
+   * {@link ANeuralNetworksMemoryDesc_addOutputRole}, or as the destination
+   * memory in
+   * {@link ANeuralNetworksMemory_copy}. The buffer of a memory object is
+   * initialized after the memory object is used as an output in a successful
+   * execution, or used as the destination memory in a successful {@link
+   * ANeuralNetworksMemory_copy}. A memory object with an initialized buffer may
+   * be used according to all roles specified in
+   * {@link ANeuralNetworksMemoryDesc}, or as the source or destination memory
+   * in
+   * {@link ANeuralNetworksMemory_copy}. The buffer of a memory object will
+   * return to the uninitialized state if the memory object is used as an output
+   * in a failed execution, or used as the destination memory in a failed {@link
+   * ANeuralNetworksMemory_copy}.
+   *
+   * The dimensions of the memory descriptor are deduced from the dimensions of
+   * the corresponding model operands of all the roles specified by
+   * {@link ANeuralNetworksMemoryDesc_addInputRole} and
+   * {@link ANeuralNetworksMemoryDesc_addOutputRole}, as well as the dimensions
+   * set by the call to {@link ANeuralNetworksMemoryDesc_setDimensions}, if any.
+   * The memory descriptor may have unspecified dimensions or rank. In such a
+   * case, the same memory object may be used with different shapes of outputs
+   * in different executions. When the memory is used as an input, the input
+   * shape must be the same as the output shape from the last execution using
+   * this memory object as an output, or the last
+   * {@link ANeuralNetworkMemory_copy} using this memory object as the
+   * destination memory. Creating a memory object with unspecified dimensions or
+   * rank may fail for certain sets of roles.
+   *
+   * Using the memory in roles or shapes that are not compatible with the rules
+   * specified above will return an error.
+   *
+   * When calling {@link ANeuralNetworksExecution_setInputFromMemory} or
+   * {@link ANeuralNetworksExecution_setOutputFromMemory} with the memory
+   * object, both offset and length must be set to zero and the entire memory
+   * region will be associated with the specified input or output operand.
+   *
+   * Calling {@link ANeuralNetworksModel_setOperandValueFromMemory} with the
+   * memory created from this function will return an error.
+   *
+   * {@link ANeuralNetworksMemory_free} must be called once the memory is no
+   * longer needed.
+   *
+   * Attempting to create memory from an unfinished memory descriptor will
+   * return an error.
+   *
+   * The provided {@link ANeuralNetworksMemoryDesc} need not outlive the
+   * {@link ANeuralNetworksMemory} object.
+   *
+   * Available since API level 30.
+   *
+   * @param desc The memory descriptor.
+   * @param memory The memory object to be created.
+   *               Set to NULL if unsuccessful.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful; ANEURALNETWORKS_OP_FAILED
+   * if the memory is created with unspecified dimensions or rank and it is not
+   * supported for this set of roles.
+   */
+  int (*ANeuralNetworksMemory_createFromDesc)(
+      const ANeuralNetworksMemoryDesc* desc, ANeuralNetworksMemory** memory);
+
+  /**
+   * Copies data from one memory object to another.
+   *
+   * If at most one of the src and dst is created from
+   * {@link ANeuralNetworksMemory_createFromDesc}, the src and dst must have the
+   * same logical size:
+   * - If the memory is created from {@link ANeuralNetworksMemory_createFromFd},
+   * or if it is created from {@link
+   * ANeuralNetworksMemory_createFromAHardwareBuffer} with format of
+   * AHARDWAREBUFFER_FORMAT_BLOB, the logical size equals the size of the
+   * memory.
+   * - If the memory is created from
+   *   {@link ANeuralNetworksMemory_createFromAHardwareBuffer} with a format
+   * other than AHARDWAREBUFFER_FORMAT_BLOB, the logical size equals the size
+   * when there is no padding and the data is tightly packed. This function may
+   * fail if the AHardwareBuffer cannot be accessed.
+   * - If the memory is created from {@link
+   * ANeuralNetworksMemory_createFromDesc}, the logical size equals the size
+   * indicated by the {@link OperandCode} multiplied by the number of elements.
+   * This function will fail if the number of elements is unknown.
+   *
+   * If both src and dst are created from {@link
+   * ANeuralNetworksMemory_createFromDesc}, they must have compatible
+   * dimensions. Two dimensions are incompatible if both ranks are fully
+   * specified but have different values, or if there is at least one axis that
+   * is fully specified in both but has different values. The dst may have
+   * unspecified dimensions or rank. In such a case, the dimensions of dst will
+   * get updated according to the dimensions of the src.
+   *
+   * In both cases, if the src is created from
+   * {@link ANeuralNetworksMemory_createFromDesc}, it must have been used as an
+   * output in a successful execution, or used as the destination memory in a
+   * successful
+   * {@link ANeuralNetworksMemory_copy}.
+   *
+   * The src and dst may have different data layout, in which case the data
+   * copying is performed logically with data layout transformation.
+   *
+   * Available since API level 30.
+   *
+   * @param src The source memory object.
+   * @param dst The destination memory object.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksMemory_copy)(const ANeuralNetworksMemory* src,
+                                    const ANeuralNetworksMemory* dst);
+
+  /**
+   * Create a {@link ANeuralNetworksEvent} from a sync_fence file descriptor.
+   *
+   * The newly created ANeuralNetworksEvent does not take ownership of the
+   * provided sync_fence_fd, it will instead dup the provided sync_fence_fd and
+   * own the duplicate.
+   *
+   * @param sync_fence_fd The sync_fence file descriptor.
+   * @param event The newly created object or NULL if unsuccessful.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   *
+   * Available since API level 30.
+   */
+  int (*ANeuralNetworksEvent_createFromSyncFenceFd)(
+      int sync_fence_fd, ANeuralNetworksEvent** event);
+
+  /**
+   * Get sync_fence file descriptor from the event.
+   *
+   * If the ANeuralNetworksEvent is not backed by a sync fence, the
+   * sync_fence_fd will be set to -1, and ANEURALNETWORKS_BAD_DATA will be
+   * returned.
+   *
+   * See {@link ANeuralNetworksEvent_createFromSyncFenceFd} and
+   * {@link ANeuralNetworksExecution_startComputeWithDependencies} to see how to
+   * create an event backed by a sync fence.
+   *
+   * The user takes ownership of the returned fd, and must close the returned
+   * file descriptor when it is no longer needed.
+   *
+   * @param event An event that is backed by a sync fence.
+   * @param sync_fence_fd The sync_fence file descriptor. The file descriptor
+   * will be set to -1 if there is an error.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   *
+   * Available since API level 30.
+   */
+  int (*ANeuralNetworksEvent_getSyncFenceFd)(const ANeuralNetworksEvent* event,
+                                             int* sync_fence_fd);
+
+  /**
+   * Schedule asynchronous evaluation of the execution with dependencies.
+   *
+   * The execution will wait for all the depending events to be signaled before
+   * starting the evaluation. Once the execution has completed and the outputs
+   * are ready to be consumed, the returned event will be signaled. Depending on
+   * which devices are handling the execution, the event could be backed by a
+   * sync fence. Use {@link ANeuralNetworksEvent_wait} to wait for that event.
+   *
+   * ANeuralNetworksEvent_wait must be called to recurperate the resources used
+   * by the execution.
+   *
+   * If parts of the execution are scheduled on devices that do not support
+   * fenced execution, the function call may wait for such parts to finish
+   * before returning.
+   *
+   * The function will return an error if any of the events in dependencies is
+   * already in a bad state. After the execution is scheduled, if any of the
+   * events in dependencies does not complete normally, the execution will fail,
+   * and {@link ANeuralNetworksEvent_wait} on the returned event will return an
+   * error.
+   *
+   * The function will return an error if any of the execution outputs has a
+   * tensor operand type that is not fully specified.
+   *
+   * The function can be passed a timeout duration in nanoseconds. This timeout
+   * duration acts as a hint to drivers in the same way that the timeout
+   * durations in {@link ANeuralNetworksCompilation_setTimeout} and {@link
+   * ANeuralNetworksExecution_setTimeout} act as hints to drivers. The duration
+   * begins when all waitFor sync fences have been signaled, and can be used
+   * together with {@link ANeuralNetworksExecution_setTimeout} which specifies
+   * the maximum timeout duration beginning at the call to
+   * {@link ANeuralNetworksExecution_startComputeWithDependencies}.
+   * If the duration is non-zero, the {@link ANeuralNetworksExecution} must have
+   * been created from an {@link ANeuralNetworksCompilation} which in turn was
+   * created from
+   * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
+   * otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If either
+   * the timeout duration from {@link ANeuralNetworksExecution_setTimeout} or
+   * the timeout duration passed to this call is exceeded, the execution may be
+   * aborted, in which case {@link ANEURALNETWORKS_MISSED_DEADLINE_*} will be
+   * returned through {@link
+   * ANeuralNetworksExecution_startComputeWithDependencies} or {@link
+   * ANeuralNetworksEvent_wait} on the event object. If the device has a feature
+   * level reported by {@link ANeuralNetworksDevice_getFeatureLevel} that is
+   * lower than 30, then the timeout duration hints will be ignored.
+   *
+   * If this execution contains a {@link ANEURALNETWORKS_WHILE} operation, and
+   * the condition model does not output false within the loop timeout duration,
+   * then execution will be aborted and {@link
+   * ANEURALNETWORKS_MISSED_DEADLINE_*} will be returned through {@link
+   * ANeuralNetworksEvent_wait} on the event object.
+   *
+   * See {@link ANeuralNetworksExecution} for information on multithreaded
+   * usage.
+   *
+   * See {@link ANeuralNetworksExecution_compute} for synchronous execution.
+   * See {@link ANeuralNetworksExecution_burstCompute} for burst synchronous
+   * execution. See {@link ANeuralNetworksExecution_startCompute} for regular
+   * asynchronous execution.
+   *
+   * @param execution The execution to be scheduled and executed.
+   * @param dependencies A set of depending events. The actual evaluation will
+   * not start until all the events are signaled.
+   * @param num_dependencies The number of events in the dependencies set.
+   * @param duration The maximum amount of time in nanoseconds that is expected
+   * to be spent executing the model after all dependencies are signaled. If set
+   * to 0, the timeout duration is considered infinite.
+   * @param event The event that will be signaled on completion. event is set to
+   *              NULL if there's an error.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if the evaluation is successfully
+   * scheduled.
+   *
+   * Available since API level 30.
+   */
+  int (*ANeuralNetworksExecution_startComputeWithDependencies)(
+      ANeuralNetworksExecution* execution,
+      const ANeuralNetworksEvent* const* dependencies,
+      uint32_t num_dependencies, uint64_t duration,
+      ANeuralNetworksEvent** event);
+
+  /**
+   * Specifies whether the {@link ANeuralNetworksExecution} is able to accept
+   * padded input and output buffers and memory objects.
+   *
+   * By default, the input and output buffers and memory objects of {@link
+   * ANeuralNetworksExecution} do not allow padding.
+   *
+   * Setting the execution to accept padded input and output buffers and memory
+   * objects enables the length argument of {@link
+   * ANeuralNetworksExecution_setInput},
+   * {@link ANeuralNetworksExecution_setInputFromMemory}, {@link
+   * ANeuralNetworksExecution_setOutput}, and {@link
+   * ANeuralNetworksExecution_setOutputFromMemory} to be greater than the raw
+   * size of the operand (i.e. the size of an element multiplied by the number
+   * of elements). The extra bytes at the end of the buffer or memory region may
+   * be used by the driver to access data in chunks, for efficiency.
+   *
+   * This method must not be called after {@link
+   * ANeuralNetworksExecution_setInput},
+   * {@link ANeuralNetworksExecution_setInputFromMemory}, {@link
+   * ANeuralNetworksExecution_setOutput}, or {@link
+   * ANeuralNetworksExecution_setOutputFromMemory}.
+   *
+   * See {@link ANeuralNetworksExecution} for information on multithreaded
+   * usage.
+   *
+   * @param execution The execution to be modified.
+   * @param enable 'true' if the execution is to be able to accept padded input
+   * and output buffers and memory objects, 'false' if not.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   *         ANEURALNETWORKS_UNEXPECTED_NULL if execution is NULL.
+   *         ANEURALNETWORKS_BAD_STATE if {@link
+   * ANeuralNetworksExecution_setInput},
+   *         {@link ANeuralNetworksExecution_setInputFromMemory},
+   *         {@link ANeuralNetworksExecution_setOutput}, or
+   *         {@link ANeuralNetworksExecution_setOutputFromMemory} has been
+   * called on the execution.
+   *
+   * Available since API level 31.
+   */
+  int (*ANeuralNetworksExecution_enableInputAndOutputPadding)(
+      ANeuralNetworksExecution* execution, bool enable);
+
+  /**
+   * Specifies whether the {@link ANeuralNetworksExecution} can be reused for
+   * multiple computations.
+   *
+   * By default, the {@link ANeuralNetworksExecution} is not reusable.
+   *
+   * Setting the execution to be reusable enables multiple computations to be
+   * scheduled and evaluated on the same execution sequentially, either by means
+   * of
+   * {@link ANeuralNetworksExecution_burstCompute}, {@link
+   * ANeuralNetworksExecution_compute},
+   * {@link ANeuralNetworksExecution_startCompute} or
+   * {@link ANeuralNetworksExecution_startComputeWithDependencies}.
+   *
+   * This function may only be invoked when the execution is in the preparation
+   * state.
+   *
+   * See {@link ANeuralNetworksExecution} for information on execution states
+   * and multithreaded usage.
+   *
+   * @param execution The execution to be modified.
+   * @param reusable 'true' if the execution is to be reusable, 'false' if not.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   *         ANEURALNETWORKS_UNEXPECTED_NULL if execution is NULL.
+   *         ANEURALNETWORKS_BAD_STATE if the execution is not in the
+   * preparation state.
+   *
+   * Available since API level 31.
+   */
+  int (*ANeuralNetworksExecution_setReusable)(
+      ANeuralNetworksExecution* execution, bool reusable);
+
+  /**
+   * Get the NNAPI runtime feature level.
+   *
+   * Since API level 31 (NNAPI feature level 5), the NNAPI runtime
+   * (libneuralnetworks.so) and its API specification can be updated between
+   * Android API releases.
+   *
+   * On Android devices with API level 31 and newer, for NNAPI runtime feature
+   * discovery, the NNAPI runtime feature level must be used instead of the
+   * Android device API level.
+   *
+   * On Android devices with API level 30 and older, the Android API level of
+   * the Android device must be used for NNAPI runtime feature discovery. Enum
+   * values in
+   * {@link FeatureLevelCode} from feature level 1 to 5 have their corresponding
+   * Android API levels listed in their documentation, and each such enum value
+   * equals the corresponding API level. This allows using the Android API level
+   * as the feature level. This mapping between enum value and Android API level
+   * does not exist for feature levels after NNAPI feature level 5 and API
+   * levels after S (31).
+   *
+   * Example usage:
+   * int device_api_level = android_get_device_api_level();
+   * int64_t runtime_feature_level = (device_api_level < __ANDROID_API_S__) ?
+   *                                  device_api_level :
+   * ANeuralNetworks_getRuntimeFeatureLevel();
+   *
+   * Runtime feature level is closely related to NNAPI device feature level
+   * ({@link ANeuralNetworksDevice_getFeatureLevel}), which indicates an NNAPI
+   * device feature level (the most advanced NNAPI specification and features
+   * that the driver implements). This function expresses NNAPI runtime feature
+   * level, which indicates the most advanced NNAPI specification and features
+   * the runtime implements. An NNAPI device feature level is always less than
+   * or equal to the runtime feature level.
+   *
+   * This function returns a {@link FeatureLevelCode} enum value,
+   * which is the NNAPI specification version that this NNAPI runtime
+   * implements. It is NOT an Android API level.
+   *
+   * Available since NNAPI feature level 5.
+   */
+  int64_t (*ANeuralNetworks_getRuntimeFeatureLevel)();
 };

 /**
@ -1087,8 +1790,4 @@ struct NnApi {
 */
 const NnApi* NnApiImplementation();

-#ifdef __ANDROID__
-int32_t GetAndroidSdkVersion();
-#endif // __ANDROID__
-
-#endif  // TENSORFLOW_LITE_NNAPI_NNAPI_IMPLEMENTATION_H_
+#endif  // TENSORFLOW_LITE_NNAPI_NNAPI_IMPLEMENTATION_H_
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/readme.txt
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/readme.txt
@ -7,9 +7,13 @@ https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/nnapi
 These files do not need to be updated frequently, unless new functionalities are
 introduced in new Android OS versions, and we will integrate the new functionalities.

-The only modification to these files is,
-The enum ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES was added
-to the NeuralNetworksTypes.h.
+The modifications to these files,
+NeuralNetworksTypes.h
+* The enum ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES was added.
+* The operation ANEURALNETWORKS_GROUPED_CONV_2D was added.
+nnapi_implementation.h/cc
+* CreateNnApiFromSupportLibrary was removed
+[TODO, add support of CreateNnApiFromSupportLibrary for Android 12]

 2. Files: NeuralNetworksWrapper.h
          NeuralNetworksWrapper.cc
--- a/onnxruntime/python/tools/quantization/notebooks/imagenet_v2/mobilenet.ipynb
+++ b/onnxruntime/python/tools/quantization/notebooks/imagenet_v2/mobilenet.ipynb
@ -359,7 +359,7 @@
    "* mobilenet_v2_float.ort\n",
    "* mobilenet_v2_uint8.ort\n",
    "\n",
-    "The above models are used in [ONNX Runtime Mobile image classification Android sample application](https://github.com/microsoft/onnxruntime-inference-examples/tree/gwang-msft/update_mobile_example/mobile/examples/image_classifications/android).\n",
+    "The above models are used in [ONNX Runtime Mobile image classification Android sample application](https://github.com/microsoft/onnxruntime-inference-examples/tree/main/mobile/examples/image_classifications/android).\n",
    "\n",
    "Please note, there are temporary ONNX model files generated by the quantization process, which are converted to ORT format as well, please ignore these files."
   ]
@ -416,3 +416,4 @@
 "nbformat": 4,
 "nbformat_minor": 2
 }
+