Support of sparse initializers with smaller indices data type (#8834)

Support of sparse initializers with smaller indices data type to save space. Make the script more efficient by selecting indices data type and checking resulting sparse bytes Exclude new code from SPARSE_TENSORS
2026-07-18 18:52:16 +00:00 · 2021-08-27 14:02:48 -07:00 · 2021-08-27 14:02:48 -07:00 · f3083f4bf3
commit f3083f4bf3
parent 775f862067
6 changed files with 453 additions and 197 deletions
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@ -842,37 +842,93 @@ common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& n
 }

 #if !defined(DISABLE_SPARSE_TENSORS)
-template <typename T>
 static Status CopySparseData(size_t n_sparse_elements,
                             const ONNX_NAMESPACE::TensorProto& indices,
+                             const Path& model_path,
                             gsl::span<const int64_t> dims,
                             std::function<void(size_t from_idx, size_t to_idx)> copier) {
  Status status = Status::OK();
  TensorShape indices_shape(indices.dims().data(), indices.dims().size());
+  const auto elements = gsl::narrow<size_t>(indices_shape.Size());

-  ORT_RETURN_IF_NOT(indices.data_type() == ONNX_NAMESPACE ::TensorProto_DataType_INT64, "Indicies expected to be INT64");
-
+  std::vector<int64_t> indices_values;  // used for conversion of smaller size indices
+  std::vector<uint8_t> unpack_buffer;
  gsl::span<const int64_t> indices_data;
-  const auto elements = static_cast<size_t>(indices_shape.Size());
-  if (indices.int64_data_size() > 0) {
-    indices_data = gsl::make_span<const int64_t>(indices.int64_data().data(), elements);
-  } else if (indices.has_raw_data()) {
-    ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int64_t)),
-                      "Sparse Indicies raw data size does not match expected.");
-    indices_data = gsl::make_span<const int64_t>(reinterpret_cast<const int64_t*>(indices.raw_data().data()), elements);
-  } else {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH, "Invalid SparseTensor indices. Should either have raw or int64 data");
+  const bool has_raw_data = indices.has_raw_data();
+  switch (indices.data_type()) {
+    case ONNX_NAMESPACE::TensorProto_DataType_INT64:
+      if (has_raw_data) {
+        ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int64_t)),
+                          "Sparse Indices raw data size does not match expected.");
+        ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer));
+        indices_data = gsl::make_span(unpack_buffer).as_span<const int64_t>();
+      } else {
+        ORT_RETURN_IF_NOT(indices.int64_data_size() == static_cast<int64_t>(elements), "Sparse indices int64 data size does not match expected");
+        indices_data = gsl::make_span(indices.int64_data().data(), elements);
+      }
+      break;
+    case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
+      if (has_raw_data) {
+        ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int32_t)),
+                          "Sparse Indices raw data size does not match expected.");
+        ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer));
+        auto int32_span = gsl::make_span(unpack_buffer).as_span<const int32_t>();
+        indices_values.insert(indices_values.cend(), int32_span.cbegin(), int32_span.cend());
+        unpack_buffer.clear();
+        unpack_buffer.shrink_to_fit();
+      } else {
+        ORT_RETURN_IF_NOT(indices.int32_data_size() == static_cast<int64_t>(elements), "Sparse indices int32 data size does not match expected");
+        indices_values.insert(indices_values.cend(), indices.int32_data().cbegin(), indices.int32_data().cend());
+      }
+      indices_data = gsl::make_span(indices_values);
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_INT16: {
+      if (has_raw_data) {
+        ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int16_t)),
+                          "Sparse Indices raw data size does not match expected.");
+        ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer));
+        auto int16_span = gsl::make_span(unpack_buffer).as_span<const int16_t>();
+        indices_values.insert(indices_values.cend(), int16_span.cbegin(), int16_span.cend());
+        indices_data = gsl::make_span(indices_values);
+        unpack_buffer.clear();
+        unpack_buffer.shrink_to_fit();
+      } else {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
+                               "Invalid SparseTensor indices. INT16 indices must be in the raw data of indices tensor");
+      }
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
+      if (has_raw_data) {
+        ORT_RETURN_IF_NOT(indices.raw_data().size() == elements,
+                          "Sparse Indices raw data size does not match expected.");
+        ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer));
+        auto int8_span = gsl::make_span(unpack_buffer).as_span<const int8_t>();
+        indices_values.insert(indices_values.cend(), int8_span.cbegin(), int8_span.cend());
+        indices_data = gsl::make_span(indices_values);
+        unpack_buffer.clear();
+        unpack_buffer.shrink_to_fit();
+      } else {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
+                               "Invalid SparseTensor indices. INT8 indices must be in the raw data of indices tensor");
+      }
+      break;
+    }
+    default:
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
+                             "Invalid SparseTensor indices. Should one of the following types: int8, int16, int32 or int64");
  }

  if (indices_shape.NumDimensions() == 1) {
    // flattened indexes
    for (size_t i = 0; i < n_sparse_elements; ++i) {
-      copier(i, static_cast<size_t>(indices_data[i]));
+      copier(i, gsl::narrow<size_t>(indices_data[i]));
    }
  } else if (indices_shape.NumDimensions() == 2) {
    // entries in format {NNZ, rank}
-    size_t rank = static_cast<size_t>(indices_shape[1]);
-    ORT_ENFORCE(rank == dims.size() && rank > 0);
+    ORT_ENFORCE(indices_shape[1] > 0 && static_cast<size_t>(indices_shape[1]) == dims.size());
+    auto rank = static_cast<size_t>(indices_shape[1]);
    const int64_t* cur_index = indices_data.data();
    std::vector<size_t> multipliers;
    multipliers.resize(rank);
@ -880,20 +936,20 @@ static Status CopySparseData(size_t n_sparse_elements,
    // calculate sum of inner dimension elements for each dimension.
    // e.g. if shape {2,3,4}, the result should be {3*4, 4, 1}
    multipliers[rank - 1] = 1;
-    for (int32_t r = static_cast<int32_t>(rank) - 2; r >= 0; --r) {
-      multipliers[r] = static_cast<size_t>(dims[r + 1]) * multipliers[r + 1];
+    for (auto r = rank - 1; r > 0; --r) {
+      multipliers[r - 1] = SafeInt<size_t>(dims[r]) * multipliers[r];
    }

    // calculate the offset for the entry
    // e.g. if shape was {2,3,4} and entry was (1, 0, 2) the offset is 14
    // as there are 2 rows, each with 12 entries per row
    for (size_t i = 0; i < n_sparse_elements; ++i) {
-      size_t idx = 0;
+      SafeInt<int64_t> idx = 0;
      for (size_t j = 0; j < rank; ++j) {
-        idx += static_cast<size_t>(cur_index[j]) * multipliers[j];
+        idx += SafeInt<int64_t>(cur_index[j]) * multipliers[j];
      }

-      copier(i, idx);
+      copier(i, static_cast<size_t>(idx));
      cur_index += rank;
    }

@ -905,29 +961,7 @@ static Status CopySparseData(size_t n_sparse_elements,

  return status;
 }
-#endif  // !defined(DISABLE_SPARSE_TENSORS)

-namespace conversion_internal {
-#if !defined(DISABLE_SPARSE_TENSORS)
-struct UnsupportedSparseDataType {
-  void operator()(int32_t dt_type, Status& status) const {
-    status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported sparse tensor data type of ", dt_type);
-  }
-};
-#endif
-template <typename T>
-struct GetElementSize {
-  Status operator()(size_t& element_size) const {
-    element_size = sizeof(T);
-    return Status::OK();
-  }
-};
-
-using SupportedConversionTypeList = onnxruntime::TypeList<float, double, MLFloat16, BFloat16,
-                                                          int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t>;
-}  // namespace conversion_internal
-
-#if !defined(DISABLE_SPARSE_TENSORS)
 common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseTensorProto& sparse,
                                                   const Path& model_path,
                                                   ONNX_NAMESPACE::TensorProto& dense) {
@ -953,74 +987,70 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
  auto dims = gsl::make_span<const int64_t>(dense.dims().data(), dense.dims().size());

  if (type != TensorProto_DataType_STRING) {
+    auto ml_data = DataTypeImpl::TensorTypeFromONNXEnum(type)->GetElementType();
+    size_t element_size = ml_data->Size();
+
    // need to read in sparse data first as it could be in a type specific field, in raw data, or in external data
    std::vector<uint8_t> sparse_data_storage;
    ORT_RETURN_IF_ERROR(UnpackInitializerData(sparse_values, model_path, sparse_data_storage));
    void* sparse_data = sparse_data_storage.data();
-    size_t element_size = 0;
-    // We want to this list to match the one used below in DenseTensorToSparseTensorProto()
-    MLTypeCallDispatcherFromTypeList<conversion_internal::SupportedConversionTypeList> type_disp(type);
-    ORT_RETURN_IF_ERROR(
-        (type_disp.InvokeRetWithUnsupportedPolicy<Status, conversion_internal::GetElementSize, conversion_internal::UnsupportedSparseDataType>(element_size)));

    // by putting the data into a std::string we can avoid a copy as set_raw_data can do a std::move
-    // into the TensorProto. however to actually write to the buffer we have created in the std::string we need
-    // this somewhat dirty hack to get a mutable pointer. we could alternatively use &dense_data_storage.front()
-    // but using const_cast makes it more obvious we're doing something ugly.
-    // C++17 add non-const data() where we could remove const_cast
+    // into the TensorProto.
    std::string dense_data_storage(n_dense_elements * element_size, 0);
    if (n_sparse_elements > 0) {
-      void* dense_data = const_cast<char*>(dense_data_storage.data());
+      void* dense_data = dense_data_storage.data();

      switch (element_size) {
        case 1: {
-          auto dense_data_span = gsl::make_span<uint8_t>(static_cast<uint8_t*>(dense_data), n_dense_elements);
-          status = CopySparseData<uint8_t>(
+          status = CopySparseData(
              n_sparse_elements,
-              indices, dims,
-              [sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
-                dense_data_span[to_idx] = static_cast<const uint8_t*>(sparse_data)[from_idx];
+              indices, model_path, dims,
+              [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
+                static_cast<uint8_t*>(dense_data)[to_idx] = static_cast<const uint8_t*>(sparse_data)[from_idx];
              });

          break;
        }
        case 2: {
-          auto dense_data_span = gsl::make_span<uint16_t>(static_cast<uint16_t*>(dense_data), n_dense_elements);
-          status = CopySparseData<uint16_t>(
+          status = CopySparseData(
              n_sparse_elements,
-              indices, dims,
-              [sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
-                dense_data_span[to_idx] = static_cast<const uint16_t*>(sparse_data)[from_idx];
+              indices, model_path, dims,
+              [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
+                const auto* src = static_cast<const uint16_t*>(sparse_data) + from_idx;
+                auto* dst = static_cast<uint16_t*>(dense_data) + to_idx;
+                memcpy(dst, src, sizeof(uint16_t));
              });

          break;
        }
        case 4: {
-          auto dense_data_span = gsl::make_span<uint32_t>(static_cast<uint32_t*>(dense_data), n_dense_elements);
-          status = CopySparseData<uint32_t>(
+          status = CopySparseData(
              n_sparse_elements,
-              indices, dims,
-              [sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
-                dense_data_span[to_idx] = static_cast<const uint32_t*>(sparse_data)[from_idx];
+              indices, model_path, dims,
+              [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
+                const auto* src = static_cast<const uint32_t*>(sparse_data) + from_idx;
+                auto* dst = static_cast<uint32_t*>(dense_data) + to_idx;
+                memcpy(dst, src, sizeof(uint32_t));
              });

          break;
        }
        case 8: {
-          auto dense_data_span = gsl::make_span<uint64_t>(static_cast<uint64_t*>(dense_data), n_dense_elements);
-          status = CopySparseData<uint64_t>(
+          status = CopySparseData(
              n_sparse_elements,
-              indices, dims,
-              [sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
-                dense_data_span[to_idx] = static_cast<const uint64_t*>(sparse_data)[from_idx];
+              indices, model_path, dims,
+              [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
+                const auto* src = static_cast<const uint64_t*>(sparse_data) + from_idx;
+                auto* dst = static_cast<uint64_t*>(dense_data) + to_idx;
+                memcpy(dst, src, sizeof(uint64_t));
              });
          break;
        }

        default:
          return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
-                                 " BUG! Report to onnxruntime team. element_size of: ",
-                                 element_size, " is not supported.", " type: ", type);
+                                 "Element_size of: ", element_size, " is not supported.", " type: ", type);
      }

      ORT_RETURN_IF_ERROR(status);
@ -1029,7 +1059,8 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT

  } else {
    // No request for std::string
-    conversion_internal::UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status);
+    status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported sparse tensor data type of ",
+                             ONNX_NAMESPACE::TensorProto_DataType_STRING);
  }
  return status;
 }
@ -1040,36 +1071,6 @@ using IsZeroFunc = bool (*)(const void*);
 // Copy element
 using CopyElementFunc = void (*)(void* dest, const void* src, int64_t dest_index, int64_t src_index);

-static void SparsifyGeneric(const void* dense_raw_data, size_t n_dense_elements, size_t element_size,
-                            IsZeroFunc is_zero, CopyElementFunc copy,
-                            TensorProto& values, TensorProto& indices) {
-  auto advance = [element_size](const void* start, size_t elements) -> const void* {
-    return (reinterpret_cast<const uint8_t*>(start) + elements * element_size);
-  };
-
-  const auto* cbegin = dense_raw_data;
-  const auto* const cend = advance(cbegin, n_dense_elements);
-  auto& indices_data = *indices.mutable_int64_data();
-  int64_t index = 0;
-  while (cbegin != cend) {
-    if (!is_zero(cbegin)) {
-      indices_data.Add(index);
-    }
-    ++index;
-    cbegin = advance(cbegin, 1U);
-  }
-
-  auto& raw_data = *values.mutable_raw_data();
-  raw_data.resize(indices.int64_data_size() * element_size);
-  void* data_dest = const_cast<char*>(raw_data.data());
-
-  int64_t dest_index = 0;
-  for (auto src_index : indices.int64_data()) {
-    copy(data_dest, dense_raw_data, dest_index, src_index);
-    ++dest_index;
-  }
-}
-
 // Here we are not using tolerance for FP types since these dense tensors were
 // created from sparse initializers where zeros were absolute
 template <typename T>
@ -1079,7 +1080,85 @@ inline bool IsZero(const void* p) {

 template <typename T>
 inline void CopyElement(void* dst, const void* src, int64_t dst_index, int64_t src_index) {
-  reinterpret_cast<T*>(dst)[dst_index] = reinterpret_cast<const T*>(src)[src_index];
+  const auto* src_p = reinterpret_cast<const T*>(src) + src_index;
+  auto* dst_p = reinterpret_cast<T*>(dst) + dst_index;
+  memcpy(dst_p, src_p, sizeof(T));
+}
+
+template <>
+inline void CopyElement<uint8_t>(void* dst, const void* src, int64_t dst_index, int64_t src_index) {
+  reinterpret_cast<uint8_t*>(dst)[dst_index] = reinterpret_cast<const uint8_t*>(src)[src_index];
+}
+
+
+template <typename T>
+static void SetIndices(gsl::span<int64_t> gathered_indices,
+                       std::string& raw_indices,
+                       TensorProto& indices) {
+  raw_indices.resize(gathered_indices.size() * sizeof(T));
+  auto* ind_dest = reinterpret_cast<T*>(raw_indices.data());
+  size_t dest_index = 0;
+  for (auto src_index : gathered_indices) {
+    ORT_IF_CONSTEXPR(sizeof(T) == sizeof(int8_t)) {
+      ind_dest[dest_index] = static_cast<T>(src_index);
+    } else {
+      auto* dst = ind_dest + dest_index;
+      T v = static_cast<T>(src_index);
+      memcpy(dst, &v, sizeof(T));
+    }
+    ++dest_index;
+  }
+  indices.set_data_type(utils::ToTensorProtoElementType<T>());
+}
+
+static void SparsifyGeneric(const void* dense_raw_data, size_t n_dense_elements, size_t element_size,
+                            IsZeroFunc is_zero, CopyElementFunc copy,
+                            TensorProto& values, TensorProto& indices,
+                            size_t& nnz) {
+  auto advance = [element_size](const void* start, size_t elements) -> const void* {
+    return (reinterpret_cast<const uint8_t*>(start) + elements * element_size);
+  };
+
+  const auto* cbegin = dense_raw_data;
+  const auto* const cend = advance(cbegin, n_dense_elements);
+  std::vector<int64_t> gathered_indices;
+  int64_t index = 0;
+  while (cbegin != cend) {
+    if (!is_zero(cbegin)) {
+      gathered_indices.push_back(index);
+    }
+    ++index;
+    cbegin = advance(cbegin, 1U);
+  }
+
+  if (!gathered_indices.empty()) {
+    auto& raw_data = *values.mutable_raw_data();
+    raw_data.resize(gathered_indices.size() * element_size);
+    void* data_dest = raw_data.data();
+
+    int64_t dest_index = 0;
+    for (auto src_index : gathered_indices) {
+      copy(data_dest, dense_raw_data, dest_index, src_index);
+      ++dest_index;
+    }
+
+    auto gathered_span = gsl::make_span(gathered_indices);
+    auto& raw_indices = *indices.mutable_raw_data();
+    const auto max_index = gathered_indices.back();
+    if (max_index <= std::numeric_limits<int8_t>::max()) {
+      SetIndices<int8_t>(gathered_span, raw_indices, indices);
+    } else if (max_index <= std::numeric_limits<int16_t>::max()) {
+      SetIndices<int16_t>(gathered_span, raw_indices, indices);
+    } else if (max_index <= std::numeric_limits<int32_t>::max()) {
+      SetIndices<int32_t>(gathered_span, raw_indices, indices);
+    } else {
+      SetIndices<int64_t>(gathered_span, raw_indices, indices);
+    }
+  } else {
+    indices.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT8);
+    indices.set_raw_data(std::string());
+  }
+  nnz = gathered_indices.size();
 }

 common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto& dense_proto,
@ -1087,11 +1166,9 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto&
                                              ONNX_NAMESPACE::SparseTensorProto& result) {
  ORT_ENFORCE(HasDataType(dense_proto), "Must have a valid data type");

-  const bool is_string_data = dense_proto.data_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING;
-  if (is_string_data) {
-    Status status{};
-    conversion_internal::UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status);
-    return status;
+  if (dense_proto.data_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported sparse tensor data type of ",
+                           ONNX_NAMESPACE::TensorProto_DataType_STRING);
  }

  const auto data_type = dense_proto.data_type();
@ -1101,51 +1178,47 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto&
  values.set_data_type(data_type);

  auto& indices = *sparse_proto.mutable_indices();
-  indices.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);

  SafeInt<size_t> n_dense_elements = 1;
  for (auto dim : dense_proto.dims()) {
    n_dense_elements *= dim;
  }

+  auto ml_data = DataTypeImpl::TensorTypeFromONNXEnum(data_type)->GetElementType();
+  size_t element_size = ml_data->Size();
+
  std::vector<uint8_t> dense_raw_data;
  ORT_RETURN_IF_ERROR(UnpackInitializerData(dense_proto, model_path, dense_raw_data));
-  size_t element_size = 0;
-  // We want this type list to match the one above in SparseTensorProtoToDenseTensorProto
-  MLTypeCallDispatcherFromTypeList<conversion_internal::SupportedConversionTypeList> type_disp(data_type);
-  ORT_RETURN_IF_ERROR(
-      (type_disp.InvokeRetWithUnsupportedPolicy<Status, conversion_internal::GetElementSize, conversion_internal::UnsupportedSparseDataType>(element_size)));

+  size_t nnz = 0;
  void* dense_data = dense_raw_data.data();
  switch (element_size) {
    case 1: {
      SparsifyGeneric(dense_data, n_dense_elements, element_size,
-                      IsZero<uint8_t>, CopyElement<uint8_t>, values, indices);
+                      IsZero<uint8_t>, CopyElement<uint8_t>, values, indices, nnz);
      break;
    }
    case 2: {
      SparsifyGeneric(dense_data, n_dense_elements, element_size,
-                      IsZero<uint16_t>, CopyElement<uint16_t>, values, indices);
+                      IsZero<uint16_t>, CopyElement<uint16_t>, values, indices, nnz);
      break;
    }
    case 4: {
      SparsifyGeneric(dense_data, n_dense_elements, element_size,
-                      IsZero<uint32_t>, CopyElement<uint32_t>, values, indices);
+                      IsZero<uint32_t>, CopyElement<uint32_t>, values, indices, nnz);
      break;
    }
    case 8: {
      SparsifyGeneric(dense_data, n_dense_elements, element_size,
-                      IsZero<uint64_t>, CopyElement<uint64_t>, values, indices);
+                      IsZero<uint64_t>, CopyElement<uint64_t>, values, indices, nnz);
      break;
    }
    default:
      return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
-                             " BUG! Report to onnxruntime team. element_size of: ",
-                             element_size, " is not supported.", " data_type: ", data_type);
+                             "Element_size of: ", element_size, " is not supported.", " data_type: ", data_type);
  }

  // Fix up shapes
-  const auto nnz = indices.int64_data_size();
  values.add_dims(nnz);
  indices.add_dims(nnz);

--- a/onnxruntime/test/framework/sparse_kernels_test.cc
+++ b/onnxruntime/test/framework/sparse_kernels_test.cc
@ -669,8 +669,51 @@ static void CreateTensorWithExternalData(
  tensor_proto.set_data_type(type);
 }

+namespace {
+
+void insert_indices_data(bool indices_1D,
+                         size_t values_size, size_t shape_size,
+                         std::vector<int8_t>& indices_data,
+                         TensorProto& indices_tp) {
+  if (indices_1D) {
+    indices_data = {2, 5, 6, 10};
+    indices_tp.add_dims(indices_data.size());
+  } else {
+    // indices are shape {NNZ, rank} so convert flattened values of 2, 5, 6 and 10 to rank 3 values
+    indices_tp.add_dims(values_size);
+    indices_tp.add_dims(shape_size);
+    indices_data = {
+        0, 1, 0,
+        0, 2, 1,
+        1, 0, 0,
+        1, 2, 0};
+  }
+}
+
 template <typename T>
-static NodeProto CreateConstantNode(bool indices_1D,
+struct InsertIndices {
+  void operator()(bool indices_1D, size_t values_size, size_t shape_size, TensorProto& indices_tp) const {
+    static_assert(std::is_integral_v<T>, "indices data must be integral data type");
+    static_assert(std::is_signed_v<T>, "indices must be signed data type");
+    std::vector<int8_t> indices_data;
+    insert_indices_data(indices_1D, values_size, shape_size, indices_data, indices_tp);
+    indices_tp.set_data_type(utils::ToTensorProtoElementType<T>());
+    ORT_IF_CONSTEXPR (sizeof(T) == sizeof(int8_t)) {
+      indices_tp.mutable_raw_data()->assign(reinterpret_cast<const char*>(indices_data.data()), indices_data.size());
+    } else {
+      // Conversion on the fly to the target data type
+      std::vector<T> indices(indices_data.cbegin(), indices_data.cend());
+      indices_tp.mutable_raw_data()->assign(reinterpret_cast<const char*>(indices.data()), indices.size() * sizeof(T));
+    }
+  }
+};
+
+using SupportedIndicesTypeList = onnxruntime::TypeList<int8_t, int16_t, int32_t, int64_t>;
+
+}  // namespace
+
+template <typename T>
+static NodeProto CreateConstantNode(bool indices_1D, int32_t indices_type,
                                    std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
                                    std::vector<T>& expected_data) {
  NodeProto constant_node;
@ -678,7 +721,6 @@ static NodeProto CreateConstantNode(bool indices_1D,
  constant_node.add_output("dense_tensor_output");

  std::vector<T> values = CreateValues<T>();
-  std::vector<int64_t> indices;
  std::vector<int64_t> shape{2, 3, 2};

  AttributeProto& attrib = *constant_node.mutable_attribute()->Add();
@ -686,26 +728,11 @@ static NodeProto CreateConstantNode(bool indices_1D,
  attrib.set_type(AttributeProto_AttributeType_SPARSE_TENSOR);

  SparseTensorProto& stp = *attrib.mutable_sparse_tensor();
-  TensorProto& indices_tp = *stp.mutable_indices();
-
  stp.mutable_dims()->Add(shape.cbegin(), shape.cend());

-  if (indices_1D) {
-    indices = {2, 5, 6, 10};
-    indices_tp.add_dims(indices.size());
-  } else {
-    // indices are shape {NNZ, rank} so convert flattened values of 2, 5, 6 and 10 to rank 3 values
-    indices_tp.add_dims(values.size());
-    indices_tp.add_dims(shape.size());
-    indices = {
-        0, 1, 0,
-        0, 2, 1,
-        1, 0, 0,
-        1, 2, 0};
-  }
-
-  indices_tp.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
-  indices_tp.mutable_int64_data()->Add(indices.cbegin(), indices.cend());
+  TensorProto& indices_tp = *stp.mutable_indices();
+  utils::MLTypeCallDispatcherFromTypeList<SupportedIndicesTypeList> type_disp(indices_type);
+  type_disp.Invoke<InsertIndices>(indices_1D, values.size(), shape.size(), indices_tp);

  expected_data.resize(2 * 3 * 2);
  expected_data[2] = values[0];
@ -733,10 +760,9 @@ static NodeProto CreateConstantNodeAllZeros(bool indices_1D, std::vector<T>& exp
  attrib.set_type(AttributeProto_AttributeType_SPARSE_TENSOR);

  SparseTensorProto& stp = *attrib.mutable_sparse_tensor();
-  TensorProto& indices_tp = *stp.mutable_indices();
-
  stp.mutable_dims()->Add(shape.cbegin(), shape.cend());

+  TensorProto& indices_tp = *stp.mutable_indices();
  if (indices_1D) {
    indices_tp.add_dims(0);
  } else {
@ -759,11 +785,11 @@ static NodeProto CreateConstantNodeAllZeros(bool indices_1D, std::vector<T>& exp
 }

 template <typename T>
-static void TestConversion(bool use_1D_indices,
+static void TestConversion(bool use_1D_indices, int32_t indices_type,
                           std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
                           std::function<void(gsl::span<const T> expected, const TensorProto& actual)> checker) {
  std::vector<T> expected;
-  auto node = CreateConstantNode<T>(use_1D_indices, inserter, expected);
+  auto node = CreateConstantNode<T>(use_1D_indices, indices_type, inserter, expected);

  TensorProto dense;
  // Path is required for loading external data (if any)
@ -793,8 +819,17 @@ template <typename T>
 static void TestConversion(
    std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
    std::function<void(gsl::span<const T> expected, const TensorProto& actual)> checker) {
-  TestConversion(true, inserter, checker);
-  TestConversion(false, inserter, checker);
+  std::vector<TensorProto_DataType> indices_types{
+      TensorProto_DataType_INT8,
+      TensorProto_DataType_INT16,
+      TensorProto_DataType_INT32,
+      TensorProto_DataType_INT64
+  };
+
+  for (auto dt : indices_types) {
+    TestConversion(true, dt, inserter, checker);
+    TestConversion(false, dt, inserter, checker);
+  }
  TestConversionAllZeros(true, checker);
  TestConversionAllZeros(false, checker);
 }
@ -820,7 +855,7 @@ static void RawDataChecker(gsl::span<const T> expected, const TensorProto& actua
  const T* raw_data = reinterpret_cast<const T*>(actual.raw_data().data());
  auto actual_span = gsl::make_span<const T>(raw_data, actual_size);

-  EXPECT_THAT(actual_span, testing::ContainerEq(expected));
+  ASSERT_THAT(actual_span, testing::ContainerEq(expected));
 }

 template <>
@ -831,7 +866,7 @@ void RawDataChecker<MLFloat16>(gsl::span<const MLFloat16> expected_bfloat, const
  const uint16_t* raw_data = reinterpret_cast<const uint16_t*>(actual.raw_data().data());
  auto actual_span = gsl::make_span<const uint16_t>(raw_data, actual_size);

-  EXPECT_THAT(actual_span, testing::ContainerEq(expected));
+  ASSERT_THAT(actual_span, testing::ContainerEq(expected));
 }

 template <>
@ -842,7 +877,7 @@ void RawDataChecker<BFloat16>(gsl::span<const BFloat16> expected_bfloat, const T
  const uint16_t* raw_data = reinterpret_cast<const uint16_t*>(actual.raw_data().data());
  auto actual_span = gsl::make_span<const uint16_t>(raw_data, actual_size);

-  EXPECT_THAT(actual_span, testing::ContainerEq(expected));
+  ASSERT_THAT(actual_span, testing::ContainerEq(expected));
 }

 TEST(SparseTensorConversionTests, TestConstantNodeConversion) {
@ -938,6 +973,7 @@ TEST(SparseTensorConversionTests, TestConstantNodeConversion) {
  PathString tensor_filename(ORT_TSTR("tensor_XXXXXX"));
  TestConversion<float>(
      true,
+      TensorProto_DataType_INT64,
      [&tensor_filename](const std::vector<float>& values, TensorProto& tp) {
        CreateTensorWithExternalData<float>(TensorProto_DataType_FLOAT, values, tensor_filename, tp);
      },
@ -950,8 +986,11 @@ TEST(SparseTensorConversionTests, TestConstantNodeConversion) {
 #if !defined(ORT_MINIMAL_BUILD)

 template <typename T>
-static std::vector<T> CreateSparseValues() {
-  return {0, 2, 3, 0};
+static std::vector<T> CreateSparseValues(size_t indices_start) {
+  std::vector<T> result(indices_start + 2);
+  result[indices_start] = 2;
+  result[indices_start + 1] = 3;
+  return result;
 }

 /* std::string support in the future
@ -962,13 +1001,19 @@ std::vector<std::string> CreateSparseValues<std::string>() {
 */

 template <>
-std::vector<BFloat16> CreateSparseValues<BFloat16>() {
-  return {BFloat16(0.f), BFloat16(2.f), BFloat16(3.f), BFloat16(0.f)};
+std::vector<BFloat16> CreateSparseValues<BFloat16>(size_t indices_start) {
+  std::vector<BFloat16> result(indices_start + 2);
+  result[indices_start] = BFloat16(2.f);
+  result[indices_start + 1] = BFloat16(3.f);
+  return result;
 }

 template <>
-std::vector<MLFloat16> CreateSparseValues<MLFloat16>() {
-  return {MLFloat16(0.f), MLFloat16(2.f), MLFloat16(3.f), MLFloat16(0.f)};
+std::vector<MLFloat16> CreateSparseValues<MLFloat16>(size_t indices_start) {
+  std::vector<MLFloat16> result(indices_start + 2);
+  result[indices_start] = MLFloat16(2.f);
+  result[indices_start + 1] = MLFloat16(3.f);
+  return result;
 }

 template <typename T>
@ -987,11 +1032,13 @@ std::vector<MLFloat16> CreateSparseValuesAllZeros<MLFloat16>() {
 }

 template <typename T>
-TensorProto CreateDenseTensor(std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
+TensorProto CreateDenseTensor(size_t indices_start,
+                              std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
                              std::vector<T>& expected_values, std::vector<int64_t>& expected_indicies) {
  TensorProto result;
-  std::vector<T> values = CreateSparseValues<T>();
-  expected_indicies = {1, 2};
+  std::vector<T> values = CreateSparseValues<T>(indices_start);
+  auto ind_start = static_cast<int64_t>(indices_start); 
+  expected_indicies = {ind_start, ind_start + 1};
  for (const auto& ind : expected_indicies) {
    expected_values.push_back(values[ind]);
  }
@ -1026,12 +1073,9 @@ static void RawSparseDataChecker(gsl::span<const T> expected_values,
  const T* raw_data = reinterpret_cast<const T*>(actual.values().raw_data().data());
  auto actual_span = gsl::make_span<const T>(raw_data, actual_size);

-  EXPECT_THAT(actual_span, testing::ContainerEq(expected_values));
+  ASSERT_THAT(actual_span, testing::ContainerEq(expected_values));

-  // Check indicies
-  EXPECT_THAT(actual.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64);
-  auto actual_indicies = gsl::make_span<const int64_t>(actual.indices().int64_data().data(), actual.indices().int64_data_size());
-  EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies));
+  SparseIndicesChecker(actual.indices(), expected_indicies);
 }

 template <>
@ -1045,11 +1089,8 @@ void RawSparseDataChecker<BFloat16>(gsl::span<const BFloat16> expected_bfloat,
  const uint16_t* raw_data = reinterpret_cast<const uint16_t*>(actual.values().raw_data().data());
  auto actual_span = gsl::make_span<const uint16_t>(raw_data, actual_size);

-  EXPECT_THAT(actual_span, testing::ContainerEq(expected));
-  // Check indicies
-  EXPECT_THAT(actual.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64);
-  auto actual_indicies = gsl::make_span<const int64_t>(actual.indices().int64_data().data(), actual.indices().int64_data_size());
-  EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies));
+  ASSERT_THAT(actual_span, testing::ContainerEq(expected));
+  SparseIndicesChecker(actual.indices(), expected_indicies);
 }

 template <>
@ -1063,15 +1104,12 @@ void RawSparseDataChecker<MLFloat16>(gsl::span<const MLFloat16> expected_bfloat,
  const uint16_t* raw_data = reinterpret_cast<const uint16_t*>(actual.values().raw_data().data());
  auto actual_span = gsl::make_span<const uint16_t>(raw_data, actual_size);

-  EXPECT_THAT(actual_span, testing::ContainerEq(expected));
-  // Check indicies
-  EXPECT_THAT(actual.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64);
-  auto actual_indicies = gsl::make_span<const int64_t>(actual.indices().int64_data().data(), actual.indices().int64_data_size());
-  EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies));
+  ASSERT_THAT(actual_span, testing::ContainerEq(expected));
+  SparseIndicesChecker(actual.indices(), expected_indicies);
 }

 template <typename T>
-static void TestDenseToSparseConversionValues(
+static void TestDenseToSparseConversionValues(size_t indices_start,
    std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
    std::function<void(gsl::span<const T> expected,
                       gsl::span<const int64_t> expected_indicies,
@ -1082,7 +1120,7 @@ static void TestDenseToSparseConversionValues(
  // Path is required for loading external data
  // Using empty path here since the data is not external
  Path model_path;
-  TensorProto dense_tensor = CreateDenseTensor(inserter, expected_values, expected_indicies);
+  TensorProto dense_tensor = CreateDenseTensor(indices_start, inserter, expected_values, expected_indicies);

  SparseTensorProto sparse_tensor;
  utils::DenseTensorToSparseTensorProto(dense_tensor, model_path, sparse_tensor);
@ -1117,17 +1155,21 @@ static void TestDenseAllZerosToSparseConversion(
 }

 template <typename T>
-static void TestDenseToSparseConversion(std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
+static void TestDenseToSparseConversion(size_t indices_start,
+                                        std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
                                        std::function<void(gsl::span<const T> expected,
                                                           gsl::span<const int64_t> expected_indicies,
                                                           const SparseTensorProto& actual)>
                                            checker) {
-  TestDenseToSparseConversionValues<T>(inserter, checker);
+  TestDenseToSparseConversionValues<T>(indices_start, inserter, checker);
  TestDenseAllZerosToSparseConversion<T>(inserter, checker);
 }

 TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
+  // This one will test indices that are less than max int8 value
+  // which should result in int8 indices
  TestDenseToSparseConversion<float>(
+      20U,
      [](const std::vector<float>& values, TensorProto& tp) {
        tp.set_data_type(TensorProto_DataType_FLOAT);
        tp.set_name("dense_float");
@ -1135,7 +1177,10 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
      },
      RawSparseDataChecker<float>);

+  // This one will test indices that are max(int8) < ind < max(int16) value
+  // which should result in int16 indices
  TestDenseToSparseConversion<double>(
+      static_cast<size_t>(std::numeric_limits<int8_t>::max()) + 20U,
      [](const std::vector<double>& values, TensorProto& tp) {
        tp.set_data_type(TensorProto_DataType_DOUBLE);
        tp.set_name("dense_double");
@ -1143,7 +1188,10 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
      },
      RawSparseDataChecker<double>);

+  // This one will test indices that are max(int16) < ind < max(int32) value
+  // which should result in int32 indices
  TestDenseToSparseConversion<BFloat16>(
+      static_cast<size_t>(std::numeric_limits<int16_t>::max()) + 20U,
      [](const std::vector<BFloat16>& values, TensorProto& tp) {
        tp.set_data_type(TensorProto_DataType_BFLOAT16);
        tp.set_name("dense_bfloat16");
@ -1153,7 +1201,11 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
      },
      RawSparseDataChecker<BFloat16>);

+  // Protobuf can not hold anything more than 2Gb and it overflows. Can't test 64-bit indices
+  // on conversion unless explicitly created.
+  // which should result in int32 indices
  TestDenseToSparseConversion<MLFloat16>(
+      20U,
      [](const std::vector<MLFloat16>& values, TensorProto& tp) {
        tp.set_data_type(TensorProto_DataType_FLOAT16);
        tp.set_name("dense_float16");
@ -1164,6 +1216,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
      RawSparseDataChecker<MLFloat16>);

  TestDenseToSparseConversion<int16_t>(
+      20U,
      [](const std::vector<int16_t>& values, TensorProto& tp) {
        tp.set_name("dense_int16");
        tp.set_data_type(TensorProto_DataType_INT16);
@ -1172,6 +1225,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
      RawSparseDataChecker<int16_t>);

  TestDenseToSparseConversion<uint16_t>(
+      20U,
      [](const std::vector<uint16_t>& values, TensorProto& tp) {
        tp.set_name("dense_uint16");
        tp.set_data_type(TensorProto_DataType_UINT16);
@ -1180,6 +1234,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
      RawSparseDataChecker<uint16_t>);

  TestDenseToSparseConversion<int32_t>(
+      20U,
      [](const std::vector<int32_t>& values, TensorProto& tp) {
        tp.set_name("dense_int32");
        tp.set_data_type(TensorProto_DataType_INT32);
@ -1188,6 +1243,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
      RawSparseDataChecker<int32_t>);

  TestDenseToSparseConversion<uint32_t>(
+      20U,
      [](const std::vector<uint32_t>& values, TensorProto& tp) {
        tp.set_name("dense_uint32");
        tp.set_data_type(TensorProto_DataType_UINT32);
@ -1196,6 +1252,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
      RawSparseDataChecker<uint32_t>);

  TestDenseToSparseConversion<int64_t>(
+      20U,
      [](const std::vector<int64_t>& values, TensorProto& tp) {
        tp.set_name("dense_int64");
        tp.set_data_type(TensorProto_DataType_INT64);
@ -1204,6 +1261,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
      RawSparseDataChecker<int64_t>);

  TestDenseToSparseConversion<uint64_t>(
+      20U,
      [](const std::vector<uint64_t>& values, TensorProto& tp) {
        tp.set_name("dense_uint64");
        tp.set_data_type(TensorProto_DataType_UINT64);
@ -1212,6 +1270,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
      RawSparseDataChecker<uint64_t>);

  TestDenseToSparseConversion<int8_t>(
+      20U,
      [](const std::vector<int8_t>& values, TensorProto& tp) {
        tp.set_name("dense_int8");
        tp.set_data_type(TensorProto_DataType_INT8);
@ -1220,6 +1279,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
      RawSparseDataChecker<int8_t>);

  TestDenseToSparseConversion<uint8_t>(
+      20U,
      [](const std::vector<uint8_t>& values, TensorProto& tp) {
        tp.set_name("dense_int64");
        RawDataWriter(values, tp, TensorProto_DataType_UINT8);
--- a/onnxruntime/test/framework/test_utils.h
+++ b/onnxruntime/test/framework/test_utils.h
@ -97,5 +97,9 @@ void AllocateMLValue(AllocatorPtr alloc, const std::vector<int64_t>& dims, OrtVa
 // Helper function to check that the graph transformations have been successfully applied.
 std::map<std::string, int> CountOpsInGraph(const Graph& graph, bool recurse_into_subgraphs = true);

+#if !defined(DISABLE_SPARSE_TENSORS)
+void SparseIndicesChecker(const ONNX_NAMESPACE::TensorProto& indices_proto, gsl::span<const int64_t> expected_indicies);
+#endif // DISABLE_SPARSE_TENSORS
+
 }  // namespace test
 }  // namespace onnxruntime
--- a/onnxruntime/test/ir/graph_test.cc
+++ b/onnxruntime/test/ir/graph_test.cc
@ -10,6 +10,7 @@
 #include "gmock/gmock.h"
 #include "onnx/defs/function.h"
 #include "core/graph/function_impl.h"
+#include "test/framework/test_utils.h"

 #ifdef __GNUC__
 #define UNUSED __attribute__((unused))
@ -233,6 +234,7 @@ static void ConstructSparseTensor(const std::string& name,
  std::copy(values.cbegin(), values.cend(), dest_span.begin());

  const std::vector<int64_t>& indices = sparse_details::indices;  // Not to exceed 59
+
  auto& m_indicies = *sparse_proto.mutable_indices();
  m_indicies.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
  *m_indicies.mutable_dims()->Add() = static_cast<int64_t>(indices.size());
@ -264,10 +266,9 @@ static void ValidateSparseTensorProto(const SparseTensorProto& proto) {
    ++expected_begin;
  }
  // Check indices
-  EXPECT_EQ(proto.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64);
+  const auto& indices = proto.indices();
  auto expected_indices = gsl::make_span(sparse_details::indices);
-  auto actual_indices = gsl::make_span<const int64_t>(proto.indices().int64_data().data(), proto.indices().int64_data_size());
-  EXPECT_THAT(actual_indices, testing::ContainerEq(expected_indices));
+  SparseIndicesChecker(indices, expected_indices);
  // check shape
  const auto& dims = proto.dims();
  auto actual_shape = gsl::make_span<const int64_t>(dims.data(), dims.size());
--- a/onnxruntime/test/util/test_utils.cc
+++ b/onnxruntime/test/util/test_utils.cc
@ -4,7 +4,9 @@
 #include "test/util/include/test_utils.h"

 #include "core/framework/ort_value.h"
+#include "core/graph/onnx_protobuf.h"
 #include "core/session/inference_session.h"
+#include "core/framework/tensorprotoutils.h"

 #include "test/util/include/asserts.h"
 #include "test/util/include/test/test_environment.h"
@ -115,5 +117,68 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id,
  VerifyOutputs(output_names, expected_fetches, fetches);
 }

+#if !defined(DISABLE_SPARSE_TENSORS)
+void SparseIndicesChecker(const ONNX_NAMESPACE::TensorProto& indices_proto, gsl::span<const int64_t> expected_indicies) {
+  using namespace ONNX_NAMESPACE;
+  Path model_path;
+  std::vector<uint8_t> unpack_buffer;
+  gsl::span<const int64_t> ind_span;
+  std::vector<int64_t> converted_indices;
+  TensorShape ind_shape(indices_proto.dims().data(), indices_proto.dims().size());
+  const auto elements = gsl::narrow<size_t>(ind_shape.Size());
+  const bool has_raw_data = indices_proto.has_raw_data();
+  switch (indices_proto.data_type()) {
+    case ONNX_NAMESPACE::TensorProto_DataType_INT64: {
+      if (has_raw_data) {
+        const auto& rd = indices_proto.raw_data();
+        ASSERT_EQ(rd.size(), elements * sizeof(int64_t));
+        ASSERT_STATUS_OK(utils::UnpackInitializerData(indices_proto, model_path, unpack_buffer));
+        ind_span = gsl::make_span(unpack_buffer).as_span<const int64_t>();
+      } else {
+        ind_span = gsl::make_span(indices_proto.int64_data().cbegin(), indices_proto.int64_data().cend());
+      }
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
+      if (has_raw_data) {
+        const auto& rd = indices_proto.raw_data();
+        ASSERT_EQ(rd.size(), elements * sizeof(int32_t));
+        ASSERT_STATUS_OK(utils::UnpackInitializerData(indices_proto, model_path, unpack_buffer));
+        auto int32_span = gsl::make_span(unpack_buffer).as_span<const int32_t>();
+        converted_indices.insert(converted_indices.cend(), int32_span.cbegin(), int32_span.cend());
+      } else {
+        converted_indices.insert(converted_indices.cend(), indices_proto.int32_data().cbegin(), indices_proto.int32_data().cend());
+      }
+      ind_span = gsl::make_span(converted_indices);
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_INT16: {
+      ASSERT_TRUE(has_raw_data);
+      const auto& rd = indices_proto.raw_data();
+      ASSERT_EQ(rd.size(), elements * sizeof(int16_t));
+      ASSERT_STATUS_OK(utils::UnpackInitializerData(indices_proto, model_path, unpack_buffer));
+      auto int16_span = gsl::make_span(unpack_buffer).as_span<const int16_t>();
+      converted_indices.insert(converted_indices.cend(), int16_span.cbegin(), int16_span.cend());
+      ind_span = gsl::make_span(converted_indices);
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
+      ASSERT_TRUE(has_raw_data);
+      const auto& rd = indices_proto.raw_data();
+      ASSERT_EQ(rd.size(), elements);
+      ASSERT_STATUS_OK(utils::UnpackInitializerData(indices_proto, model_path, unpack_buffer));
+      auto int8_span = gsl::make_span(unpack_buffer).as_span<const int8_t>();
+      converted_indices.insert(converted_indices.cend(), int8_span.cbegin(), int8_span.cend());
+      ind_span = gsl::make_span(converted_indices);
+      break;
+    }
+    default:
+      ASSERT_TRUE(false);
+  }
+  ASSERT_THAT(ind_span, testing::ContainerEq(expected_indicies));
+}
+
+#endif // DISABLE_SPARSE_TENSORS
+
 }  // namespace test
 }  // namespace onnxruntime
--- a/tools/python/sparsify_initializers.py
+++ b/tools/python/sparsify_initializers.py
@ -26,7 +26,7 @@ def parse_arguments():
    parser.add_argument('--exclude', required=False, type=str,
                        help='semicolon separated list of initializer names to exclude')
    parser.add_argument('--tolerance', required=False, type=float, default=1e-6,
-                        help='FP absolute tolerance. If not given simple compare to 0')
+                        help='FP absolute tolerance.')
    parser.add_argument('--sparsity_threshold', required=False,
                        type=float, default=0.5,
                        help='convert to sparse initializers if sparsity is at least this much')
@ -49,11 +49,13 @@ def setup_logging(verbose):  # type: (bool)  -> None
    logger.setLevel(logging_level)


-def convert_tensor_to_sparse(tensor, tolerance):  # type: (TensorProto) -> Tuple[SparseTensorProto, float]
+def convert_tensor_to_sparse(tensor,
+                             sparsity_threshold,
+                             tolerance):  # type: (TensorProto, float, float) -> Tuple[SparseTensorProto, float]
    """ returns a tuple of sparse_tensor and sparsity level
    """
    values = []
-    indicies = []
+    indices = []
    nnz_count = 0
    tensor_data = numpy_helper.to_array(tensor).flatten()
    data_len = len(tensor_data)
@ -62,25 +64,76 @@ def convert_tensor_to_sparse(tensor, tolerance):  # type: (TensorProto) -> Tuple
            el = tensor_data[index]
            if abs(el) <= tolerance:
                values.append(el)
-                indicies.append(index)
+                indices.append(index)
                nnz_count += 1
    else:
        for index in range(data_len):
            el = tensor_data[index]
            if el != 0:
                values.append(el)
-                indicies.append(index)
+                indices.append(index)
                nnz_count += 1

    sparsity = float(1.) - float(nnz_count)/data_len
-    logger.debug(f"initializer={tensor.name}, dtype={tensor_data.dtype}, \
-                 len={data_len}, nnz={nnz_count}, sparsity={sparsity}")

-    values_tensor = onnx.helper.make_tensor(tensor.name, tensor.data_type,
-                                            [len(values)], np.array(values).astype(tensor_data.dtype))
+    ind_data_type = TensorProto.INT8
+    ind_dtype = np.int8
+    ind_len = len(indices)
+    max_indices_value = 0
+    if ind_len > 0:
+        max_indices_value = indices[-1]
+        if max_indices_value <= np.iinfo(np.int8).max:
+            ind_data_type = TensorProto.INT8
+            ind_dtype = np.int8
+        elif max_indices_value <= np.iinfo(np.int16).max:
+            ind_data_type = TensorProto.INT16
+            ind_dtype = np.int16
+        elif max_indices_value <= np.iinfo(np.int32).max:
+            ind_data_type = TensorProto.INT32
+            ind_dtype = np.int32
+        else:
+            ind_data_type = TensorProto.INT64
+            ind_dtype = np.int64
+
+    logger.debug(f"initializer={tensor.name}, dtype={tensor_data.dtype}, \
+                 data_len={data_len}, nnz={nnz_count}, sparsity={sparsity}, \
+                 max_indices_value={max_indices_value}, sparse_indices_type={ind_dtype}")
+
+    if sparsity < sparsity_threshold:
+        return (object(), sparsity)
+
+    tensor_data_bytes = tensor_data.nbytes
+    # create np array and cast data to the appropriate type
+    np_values = np.array(values).astype(tensor_data.dtype)
+    # create np array and cast data to the inferred index type
+    np_indices = np.array(indices).astype(ind_dtype)
+    total_sparse_bytes = np_values.nbytes + np_indices.nbytes
+
+    logger.debug(f"initializer={tensor.name}, initializer_bytes={tensor_data_bytes}, \
+                sparse_initializer_bytes={total_sparse_bytes}")
+
+    # This check is usually useful for sparsity_threshold=0.5 where much
+    # depends on the size of the indices entries and the size of the original tensor.
+    # Big dense tensors command larger indices data type and for large float32 tensors
+    # int32 indices are often selected, thus we really want to guard against loosing
+    # rather than winning.
+    if tensor_data_bytes <= total_sparse_bytes:
+        sparsity = float(1.) - float(tensor_data_bytes)/total_sparse_bytes
+        logger.debug(f"initializer={tensor.name}, adjusted_sparsity={sparsity}")
+        return (object(), sparsity)
+
+    values_tensor = onnx.helper.make_tensor(tensor.name,
+                                            tensor.data_type,
+                                            [len(values)],
+                                            np_values.tobytes(),
+                                            raw=True)
+
    indicies_tensor = onnx.helper.make_tensor(tensor.name + '_indicies',
-                                              TensorProto.INT64,
-                                              [len(indicies)], np.array(indicies).astype(np.int64))
+                                              ind_data_type,
+                                              [ind_len],
+                                              np_indices.tobytes(),
+                                              raw=True)
+
    sparse_tensor = onnx.helper.make_sparse_tensor(values_tensor, indicies_tensor, tensor.dims)
    return (sparse_tensor, sparsity)

@ -88,7 +141,7 @@ def convert_tensor_to_sparse(tensor, tolerance):  # type: (TensorProto) -> Tuple
 def convert_initializers(model,
                         exclude_names,
                         sparsity_threshold,
-                         tolerance):  # type: (ModelProto, List[str], float) -> None
+                         tolerance):  # type: (ModelProto, List[str], float, float) -> None
    graph = model.graph
    converted_sparse = []
    remaining_initializers = []
@ -100,7 +153,7 @@ def convert_initializers(model,
            logger.info(f"initializer={initializer.name} contains bool, not converted")
            remaining_initializers.append(initializer)
            continue
-        sparse_tensor, sparsity = convert_tensor_to_sparse(initializer, tolerance)
+        sparse_tensor, sparsity = convert_tensor_to_sparse(initializer, sparsity_threshold, tolerance)
        if sparsity >= sparsity_threshold:
            logger.info(f"initializer={initializer.name} converted. sparsity={sparsity}")
            converted_sparse.append(sparse_tensor)