From 8713d76dd14baeb00328b9a20d246912bf70ea77 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <yuslepukhin@users.noreply.github.com>
Date: Mon, 16 Aug 2021 16:33:47 -0700
Subject: [PATCH] Introduce C and C++ APIs for Sparse Tensors (#8621)

Add IsSparseTensor
  Add CreateSparseTensor
 Add utilities and test fully sparse instantiation
 Fully sparse blocksparse
 Add test and docs for fully sparse tensor instantiation
 Rework creation API
 Use API
 Non string API
 Retrofit of existing String API
 Add tests
 Add documentation
 Address build issues (Winml pending)
 Add inference test
 Bump binary size
 Add ifdef DISABLE CONTRIB
---
 .../core/framework/sparse_tensor.h            | 154 ++++-
 .../core/session/onnxruntime_c_api.h          | 303 ++++++++-
 .../core/session/onnxruntime_cxx_api.h        | 261 ++++++++
 .../core/session/onnxruntime_cxx_inline.h     | 101 +++
 onnxruntime/core/framework/execution_frame.cc |   4 +-
 .../core/framework/onnxruntime_typeinfo.cc    |   2 +-
 onnxruntime/core/framework/sparse_tensor.cc   | 180 ++++-
 .../core/framework/tensor_type_and_shape.cc   |  60 +-
 onnxruntime/core/framework/utils.cc           |   8 +-
 .../optimizer/optimizer_execution_frame.cc    |   4 +-
 onnxruntime/core/session/onnxruntime_c_api.cc | 393 +++++++++--
 onnxruntime/core/session/ort_apis.h           |  27 +
 .../test/framework/sparse_kernels_test.cc     | 255 +++++++-
 onnxruntime/test/shared_lib/test_inference.cc | 120 ++++
 .../test/shared_lib/test_nontensor_types.cc   | 616 ++++++++++++++++++
 ...rt_android_baseline_and_report_bin_size.sh |   2 +-
 16 files changed, 2335 insertions(+), 155 deletions(-)
diff --git a/include/onnxruntime/core/framework/sparse_tensor.h b/include/onnxruntime/core/framework/sparse_tensor.h
index af66a80874..6a0596882a 100644
--- a/include/onnxruntime/core/framework/sparse_tensor.h
+++ b/include/onnxruntime/core/framework/sparse_tensor.h
@@ -7,13 +7,15 @@
 #include "core/framework/tensor_shape.h"
 #include "core/framework/tensor.h"
 
+struct OrtValue;
+
 namespace onnxruntime {
 
 class IDataTransfer;
 class DataTransferManager;
 
 /**
- * @brief This is a Sparse Format enumeration representing bitflags
+ * @brief This is a Sparse Format enumeration
  * 
  * 
  */
@@ -59,8 +61,8 @@ class SparseTensor final {
   /// </summary>
   /// <param name="elt_type">MlDataType</param>
   /// <param name="dense_shape">a shape of original tensor in dense form</param>
-  /// <param name="values_shape">shape for user supplied values</param>
-  /// <param name="values_data">a pointer to values</param>
+  /// <param name="values_shape">shape for user supplied values. Use {0} shape for fully sparse tensors.</param>
+  /// <param name="values_data">a pointer to values. Use nullptr for fully sparse tensors.</param>
   /// <param name="location">description of the user allocated memory</param>
   SparseTensor(MLDataType elt_type,
                const TensorShape& dense_shape,
@@ -70,7 +72,7 @@ class SparseTensor final {
 
   /// <summary>
   /// Use this constructor to hold sparse data in the buffer
-  /// allocated with the specificed allocator. Use Make*() methods
+  /// allocated with the specified allocator. Use Make*() methods
   /// to populate the instance with data which will be copied into the
   /// allocated buffer.
   /// </summary>
@@ -87,6 +89,57 @@ class SparseTensor final {
 
   ORT_DISALLOW_COPY_AND_ASSIGNMENT(SparseTensor);
 
+  /// <summary>
+  /// The factory function creates an instance of SparseTensor on the heap
+  /// using appropriate constructor and initializes OrtValue instance wit it.
+  /// </summary>
+  /// <param name="elt_type">element data type</param>
+  /// <param name="dense_shape">dense shape of the sparse tensor</param>
+  /// <param name="values_shape">values shape. Use {0} for fully sparse tensors.</param>
+  /// <param name="values_data">pointer to a user allocated buffer. Use nullptr for fully sparse tensors.</param>
+  /// <param name="location">description of the user allocated buffer</param>
+  /// <param name="ort_value">default constructed input/output ort_value</param>
+  static void InitOrtValue(MLDataType elt_type,
+                           const TensorShape& dense_shape,
+                           const TensorShape& values_shape,
+                           void* values_data,
+                           const OrtMemoryInfo& location,
+                           OrtValue& ort_value);
+
+  /// <summary>
+  /// The factory function creates an instance of SparseTensor on the heap
+  /// using appropriate constructor and initializes OrtValue instance wit it.
+  /// </summary>
+  /// <param name="elt_type">element data type</param>
+  /// <param name="dense_shape">dense shape of the sparse tensor</param>
+  /// <param name="allocator">allocator to use</param>
+  /// <param name="ort_value">default constructed input/output ort_value</param>
+  static void InitOrtValue(MLDataType elt_type,
+                           const TensorShape& dense_shape,
+                           std::shared_ptr<IAllocator> allocator,
+                           OrtValue& ort_value);
+
+  /// <summary>
+  /// The function will check if the OrtValue is allocated
+  /// fetch the containing SparseTensor instance or throw if it
+  /// does not contain one. It will check that the SparseTensor has
+  /// sparse format set (i.e. fully constructed).
+  /// </summary>
+  /// <param name="v">OrtValue instance</param>
+  /// <returns>const SparseTensor Reference</returns>
+  static const SparseTensor& GetSparseTensorFromOrtValue(const OrtValue& v);
+
+  /// <summary>
+  /// /// The function will check if the OrtValue is allocated
+  /// fetch the containing SparseTensor instance or throw if it
+  /// does not contain one. It will check that the SparseTensor does not
+  /// have sparse format set and will return non-const ref to so indices
+  /// can be added to it.
+  /// </summary>
+  /// <param name="v">OrtValue</param>
+  /// <returns>non-const reference to SparseTensor</returns>
+  static SparseTensor& GetSparseTensorFromOrtValue(OrtValue& v);
+
   /// <summary>
   // Returns the number of non-zero values (aka "NNZ")
   // For block sparse formats this may include some zeros in the blocks
@@ -195,7 +248,7 @@ class SparseTensor final {
   /// index shape would be 1-D (values_count) or it must be twice the number of values
   /// in which case its shape would be 2-D (values_count, 2)
   /// </summary>
-  /// <param name="indices">user allocated buffer span</param>
+  /// <param name="indices">user allocated buffer span. Use empty span for fully sparse tensors.</param>
   /// <returns>Status</returns>
   Status UseCooIndices(gsl::span<int64_t> indices);
 
@@ -209,13 +262,25 @@ class SparseTensor final {
   ///
   /// Values shape is supplied at construction time and its Size() must match values_count.
   /// </summary>
-  /// <param name="values_count"></param>
-  /// <param name="values_data"></param>
+  /// <param name="values_count">Use 0 for fully sparse tensors.</param>
+  /// <param name="values_data">pointer to a buffer to be copied. Use nullptr for fully sparse tensors.</param>
   /// <param name="indices"></param>
   /// <returns></returns>
   Status MakeCooData(const IDataTransfer& data_transfer, const OrtMemoryInfo& data_location,
                      size_t values_count, const void* values_data, gsl::span<const int64_t> indices);
 
+  /// <summary>
+  /// The method allocates a single contiguous buffer and creates instances of std::strings in it, with
+  /// copies of the supplied zero-terminated strings followed by COO indices.
+  /// All data is assumed to be on CPU and the allocator supplied must be
+  /// a CPU based allocator.
+  /// </summary>
+  /// <param name="string_count">use 0 for fully sparse tensors</param>
+  /// <param name="strings">array of char* pointers. use nullptr for fully sparse tensors</param>
+  /// <param name="indices">span of indices. Use empty span for fully sparse tensors.</param>
+  /// <returns>Status</returns>
+  Status MakeCooStrings(size_t string_count, const char* const* strings, gsl::span<const int64_t> indices);
+
   /// <summary>
   /// Gives mutable access to Coo buffers so they can be populated
   /// </summary>
@@ -234,8 +299,8 @@ class SparseTensor final {
   /// Allocates memory for values and index and returns a mutator so
   /// data can be copied into the buffer.
   /// </summary>
-  /// <param name="values_count"></param>
-  /// <param name="index_count"></param>
+  /// <param name="values_count">use 0 for fully sparse tensors</param>
+  /// <param name="index_count">use 0 for fully sparse tensors</param>
   /// <returns></returns>
   CooMutator MakeCooData(size_t values_count, size_t index_count);
 
@@ -255,17 +320,17 @@ class SparseTensor final {
   };
 
   /// <summary>
-  /// Returns Csr indices readonly view
+  /// Returns Csr indices read only view
   /// </summary>
   /// <returns></returns>
   CsrView AsCsr() const;
 
   /// <summary>
   /// This function will use Csr indices contained within the user allocated buffers.
-  /// The lifespan of the buffers must exclipse the lifespan of sparse tensor instance.
+  /// The lifespan of the buffers must eclipse the lifespan of sparse tensor instance.
   /// </summary>
-  /// <param name="inner_index"></param>
-  /// <param name="outer_index"></param>
+  /// <param name="inner_index">User allocated buffer span. use empty span for fully sparse tensors</param>
+  /// <param name="outer_index">User allocated buffer span. Use empty span for fully sparse tensors</param>
   /// <returns></returns>
   Status UseCsrIndices(gsl::span<int64_t> inner_index, gsl::span<int64_t> outer_index);
 
@@ -275,10 +340,10 @@ class SparseTensor final {
   /// </summary>
   /// <param name="data_transfer"></param>
   /// <param name="data_location"></param>
-  /// <param name="values_count"></param>
-  /// <param name="values_data"></param>
-  /// <param name="inner_index"></param>
-  /// <param name="outer_index"></param>
+  /// <param name="values_count">use 0 for fully sparse tensors</param>
+  /// <param name="values_data">pointer to data to be copied. Use nullptr for fully sparse tensors.</param>
+  /// <param name="inner_index">inner index to be copied. Use empty span for fully sparse tensors.</param>
+  /// <param name="outer_index">outer index to be copied. Use empty span for fully sparse tensors.</param>
   /// <returns></returns>
   Status MakeCsrData(const IDataTransfer& data_transfer,
                      const OrtMemoryInfo& data_location,
@@ -286,6 +351,21 @@ class SparseTensor final {
                      gsl::span<const int64_t> inner_index,
                      gsl::span<const int64_t> outer_index);
 
+  /// <summary>
+  /// The method allocates a single contiguous buffer and creates instances of std::strings in it, with
+  /// copies of the supplied zero-terminated strings followed by COO indices.
+  /// All data is assumed to be on CPU and the allocator supplied must be
+  /// a CPU based allocator
+  /// </summary>
+  /// <param name="string_count"></param>
+  /// <param name="strings">array of char* pointers</param>
+  /// <param name="inner_index">inner index to be copied. Use empty span for fully sparse tensors.</param>
+  /// <param name="outer_index">outer index to be copied. Use empty span for fully sparse tensors.</param>
+  /// <returns></returns>
+  Status MakeCsrStrings(size_t string_count, const char* const* strings,
+                        gsl::span<const int64_t> inner_index,
+                        gsl::span<const int64_t> outer_index);
+
   /// <summary>
   /// Give writable access to Csr values and indices
   /// </summary>
@@ -307,9 +387,9 @@ class SparseTensor final {
   /// Allocates memory for values and index and returns mutator so
   /// data can be populated.
   /// </summary>
-  /// <param name="values_count"></param>
-  /// <param name="inner_index_count"></param>
-  /// <param name="outer_index_count"></param>
+  /// <param name="values_count">Use 0 for fully sparse tensors.</param>
+  /// <param name="inner_index_count">Use 0 for fully sparse tensors.</param>
+  /// <param name="outer_index_count">Use 0 for fully sparse tensors.</param>
   /// <returns></returns>
   CsrMutator MakeCsrData(size_t values_count, size_t inner_index_count, size_t outer_index_count);
 
@@ -338,8 +418,8 @@ class SparseTensor final {
   /// were supplied to the constructor. The supplied buffer lifespan must eclipse the life
   /// of sparse tensor instance.
   /// </summary>
-  /// <param name="indices_shape"></param>
-  /// <param name="indices_data"></param>
+  /// <param name="indices_shape">Use {0} for fully sparse tensors.</param>
+  /// <param name="indices_data">Ptr to user allocated buffer. Use nullptr for fully spare tensors.</param>
   /// <returns></returns>
   Status UseBlockSparseIndices(const TensorShape& indices_shape, int32_t* indices_data);
 
@@ -350,20 +430,35 @@ class SparseTensor final {
   ///
   // The shape of the index is must be at least 2-D and must contain one tuple per each of
   // the value blocks that  were supplied to the constructor. Each index tuple is a
-  // (row, col) coordindate of the values block in a dense matrix.
+  // (row, col) coordinates of the values block in a dense matrix.
   /// </summary>
   /// <param name="data_transfer"></param>
   /// <param name="data_location"></param>
-  /// <param name="values_shape"></param>
-  /// <param name="values_data"></param>
-  /// <param name="indices_shape"></param>
-  /// <param name="indices_data"></param>
+  /// <param name="values_shape">The shape is expected to be at least 3-D. However, use {0} for fully sparse tensors.</param>
+  /// <param name="values_data">Pointer to a data to be copied. Use nullptr for fully sparse tensors.</param>
+  /// <param name="indices_shape">The shape is expected to be 2-D. However, you can use {0} for fully sparse tensors.</param>
+  /// <param name="indices_data">Pointer to index data to be copied. Use nullptr for fully sparse tensors.</param>
   /// <returns></returns>
   Status MakeBlockSparseData(const IDataTransfer& data_transfer,
                              const OrtMemoryInfo& data_location,
                              const TensorShape& values_shape, const void* values_data,
                              const TensorShape& indices_shape, const int32_t* indices_data);
 
+
+  /// <summary>
+  /// The method allocates a single contiguous buffer and creates instances of std::strings in it, with
+  /// copies of the supplied zero-terminated strings followed by COO indices.
+  /// All data is assumed to be on CPU and the allocator supplied must be
+  /// a CPU based allocator.
+  /// </summary>
+  /// <param name="values_shape">Use {0} shape for fully sparse tensors</param>
+  /// <param name="strings">array of char* ptrs, use nullptr for fully sparse tensor</param>
+  /// <param name="indices_shape">Use {0} for fully sparse tensors</param>
+  /// <param name="indices_data">use nullptr for fully sparse tensors</param>
+  /// <returns></returns>
+  Status MakeBlockSparseStrings(const TensorShape& values_shape, const char* const* strings,
+                                const TensorShape& indices_shape, const int32_t* indices_data);
+
   /// <summary>
   /// Mutable data access
   /// </summary>
@@ -383,8 +478,8 @@ class SparseTensor final {
   /// Allocates memory for values and index and returns mutator so
   /// data can be populated
   /// </summary>
-  /// <param name="values_shape"></param>
-  /// <param name="indices_shape"></param>
+  /// <param name="values_shape">Shape is expected to be 3-D, use {0} for fully sparse tensors</param>
+  /// <param name="indices_shape">Shape is expected to be 2-D, use {0} for fully sparse tensors </param>
   /// <returns></returns>
   BlockSparseMutator MakeBlockSparseData(const TensorShape& values_shape, const TensorShape& indices_shape);
 
@@ -416,6 +511,7 @@ class SparseTensor final {
 
   Status ValidateCsrIndices(size_t values_count, size_t inner_size, size_t outer_size) const;
   void InitCsrIndices(size_t inner_size, const int64_t* inner, size_t outer_size, const int64_t* outer);
+  void InitBlockSparseIndices(const TensorShape& indices_shape, int32_t* indices_data);
 
   SparseFormat format_;                        // sparse format enum value
   TensorShape dense_shape_;                    // a shape of a corresponding dense tensor
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 868695c7f7..dda996234e 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -120,7 +120,6 @@ typedef enum ONNXTensorElementDataType {
   ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16     // Non-IEEE floating-point format based on IEEE754 single-precision
 } ONNXTensorElementDataType;
 
-
 // Synced with onnx TypeProto oneof
 typedef enum ONNXType {
   ONNX_TYPE_UNKNOWN,
@@ -132,8 +131,7 @@ typedef enum ONNXType {
 } ONNXType;
 
 // These types are synced with internal
-// SparseFormatFlags but are not exposed
-// as flags
+// SparseFormatFlags
 typedef enum OrtSparseFormat {
   ORT_SPARSE_UNDEFINED = 0,
   ORT_SPARSE_COO = 0x1,
@@ -141,6 +139,13 @@ typedef enum OrtSparseFormat {
   ORT_SPARSE_BLOCK_SPARSE = 0x4
 } OrtSparseFormat;
 
+// Enum allows to query sparse tensor indices
+enum OrtSparseIndicesFormat {
+  ORT_SPARSE_COO_INDICES,
+  ORT_SPARSE_CSR_INNER_INDICES,
+  ORT_SPARSE_CSR_OUTER_INDICES,
+  ORT_SPARSE_BLOCK_SPARSE_INDICES
+};
 
 typedef enum OrtLoggingLevel {
   ORT_LOGGING_LEVEL_VERBOSE,
@@ -589,23 +594,36 @@ struct OrtApi {
   ORT_API2_STATUS(FillStringTensor, _Inout_ OrtValue* value, _In_ const char* const* s, size_t s_len);
 
   /**
-     * \param value A tensor created from OrtCreateTensor... function.
-     * \param len total data length, not including the trailing '\0' chars.
+     * Obtain a total length of strings contained within a tensor.
+     * For sparse tensors it returns the total length of values (nnz) strings.
+     * \param[in] value A tensor created from OrtCreateTensor... function.
+     * \param[out] len total data length, not including the trailing '\0' chars.
      */
   ORT_API2_STATUS(GetStringTensorDataLength, _In_ const OrtValue* value, _Out_ size_t* len);
 
   /**
-     * \param s string contents. Each string is NOT null-terminated.
-     * \param value A tensor created from OrtCreateTensor... function.
-     * \param s_len total data length, get it from OrtGetStringTensorDataLength
+     * This API returns all of of UTF-8 encoded strings that are contained within a tensor
+     * or in non-empty values of a sparse tensor in one single buffer. Use offsets to calculate
+     * the length of each string such as len[i] = offsets[i + 1] - offsets[i] except the last
+     * string for which the length is calculated as total_len - offset[i].
+     * 
+     * \param[in] value A tensor created from OrtCreateTensor... API or a sparse tensor
+     *   created with OrtCreateSparseTensor... API.
+     * \param[in,out] s string contents. Each string is NOT null-terminated.
+     * \param[in] s_len total data length, get it from OrtGetStringTensorDataLength
+     * \param[in,out] offsets pointer to a preallocated buffer where offsets for each of the string
+     *        element are returned. The number of offsets must match the number of string elements.
+     * \param[in] offsets_len number of offsets expected in the buffer.
      */
   ORT_API2_STATUS(GetStringTensorContent, _In_ const OrtValue* value, _Out_writes_bytes_all_(s_len) void* s,
                   size_t s_len, _Out_writes_all_(offsets_len) size_t* offsets, size_t offsets_len);
 
-  /**
-     * Don't free the 'out' value
-     */
-  ORT_API2_STATUS(CastTypeInfoToTensorInfo, _In_ const OrtTypeInfo*,
+  /** Retrieves OrtTensorTypeAndShapeInfo part of the OrtTypeInfo
+    * 
+    * \param[in] type_info
+    * \param[out] out a returned ptr. Don't free the 'out' value, it is owned by type_info
+    */
+  ORT_API2_STATUS(CastTypeInfoToTensorInfo, _In_ const OrtTypeInfo* type_info,
                   _Outptr_result_maybenull_ const OrtTensorTypeAndShapeInfo** out);
 
   /**
@@ -647,25 +665,39 @@ struct OrtApi {
   ORT_API2_STATUS(GetTensorShapeElementCount, _In_ const OrtTensorTypeAndShapeInfo* info, _Out_ size_t* out);
 
   /**
- * \param out Should be freed by ReleaseTensorTypeAndShapeInfo after use
- */
+   * Returns data type and shape iff OrtValue contains a Tensor or a SparseTensor.
+   * For sparse tensors it returns a dense shape of the tensor.
+   * 
+   * \param[in] value OrtValue that contains tensor or a sparse tensor
+   * \param[out] out Should be freed by ReleaseTensorTypeAndShapeInfo after use
+   */
   ORT_API2_STATUS(GetTensorTypeAndShape, _In_ const OrtValue* value, _Outptr_ OrtTensorTypeAndShapeInfo** out);
 
   /**
- * Get the type information of an OrtValue
- * \param value
- * \param out The returned value should be freed by ReleaseTypeInfo after use
- */
+   * Get the type information of an OrtValue. API works for tensors and sparse tensors.
+   * 
+   * \param[in] value
+   * \param[in,out] out The returned value should be freed by ReleaseTypeInfo after use
+   */
   ORT_API2_STATUS(GetTypeInfo, _In_ const OrtValue* value, _Outptr_result_maybenull_ OrtTypeInfo** out);
 
   ORT_API2_STATUS(GetValueType, _In_ const OrtValue* value, _Out_ enum ONNXType* out);
 
-  ORT_API2_STATUS(CreateMemoryInfo, _In_ const char* name1, enum OrtAllocatorType type, int id1,
-                  enum OrtMemType mem_type1, _Outptr_ OrtMemoryInfo** out);
+  /**
+   * Creates an instance of OrtMemoryInfo. It must be freed by ReleaseMemoryInfo after use.
+   * This may describe one of the existing ORT allocator types OR a custom allocator.
+   * 
+   * \param[in] name such as "cpu", "gpu"
+   * \param[in] type one of the enum values
+   * \param[in] device ID. For GPU gpu id.
+   * \param[in] mem_type. Memory type enum value.
+   */
+  ORT_API2_STATUS(CreateMemoryInfo, _In_ const char* name, enum OrtAllocatorType type, int id,
+                  enum OrtMemType mem_type, _Outptr_ OrtMemoryInfo** out);
 
   /**
- * Convenience function for special case of CreateMemoryInfo, for the CPU allocator. Uses name = "Cpu" and id = 0.
- */
+   * Convenience function for special case of CreateMemoryInfo, for the CPU allocator. Uses name = "Cpu" and id = 0.
+   */
   ORT_API2_STATUS(CreateCpuMemoryInfo, enum OrtAllocatorType type, enum OrtMemType mem_type1,
                   _Outptr_ OrtMemoryInfo** out);
 
@@ -990,13 +1022,21 @@ struct OrtApi {
                   _In_ int providers_length);
 
   /**
-     * \param value - A tensor created from OrtCreateTensor... function.
-     * \param index - index of string tensor element, length of element at index will be returned.
-     * \param out - number of UTF-8 bytes that the string contains
+     * This API returns a length of string element at [index]. For sparse tensors
+     * it will return a string element of sparse values. It is an error to request
+     * an out of bounds element.
+     * 
+     * \param[in] value - A tensor created from OrtCreateTensor... function.
+     * \param[in] index - flat index of string tensor element, length of element at index will be returned.
+     * \param[out] out - number of UTF-8 bytes that the string contains
      */
   ORT_API2_STATUS(GetStringTensorElementLength, _In_ const OrtValue* value, size_t index, _Out_ size_t* out);
 
   /**
+     * This API will return a copy UTF-8 data contained with a string element at the specified index.
+     * For sparse tensors it would return a string element of sparse values. It is an error to request an out
+     * of bounds element.
+     * 
      * \param s string element contents in UTF-8 encoding. The string is NOT null-terminated.
      * \param value A tensor created from OrtCreateTensor... function.
      * \param s_len element length, get it from OrtGetStringTensorElementLength.
@@ -1472,12 +1512,15 @@ struct OrtApi {
    * Registers a custom allocator instance with the env to enable
    * sharing between multiple sessions that use the same env instance.
    * Returns an error if an allocator with the same OrtMemoryInfo is already registered.
-   * \param env OrtEnv instance (must be non-null).
-   * \param allocator user provided allocator (must be non-null).
+   * 
    * The behavior of this API is exactly the same as CreateAndRegisterAllocator() except
    * instead of ORT creating an allocator based on provided info, in this case 
    * ORT uses the user-provided custom allocator.
    * See docs/C_API.md for details.
+   * 
+   * \param[in,out] env OrtEnv instance (must be non-null).
+   * \param[in] allocator user provided allocator (must be non-null).
+   * 
   */
   ORT_API2_STATUS(RegisterAllocator, _Inout_ OrtEnv* env, _In_ OrtAllocator* allocator);
 
@@ -1489,6 +1532,212 @@ struct OrtApi {
   */
   ORT_API2_STATUS(UnregisterAllocator, _Inout_ OrtEnv* env,
                   _In_ const OrtMemoryInfo* mem_info);
+
+  /**
+   * Sets *out to 1 iff an OrtValue is a SparseTensor, and 0 otherwise
+   * 
+   * \param[in] value existing OrtValue
+   * \param[out] out unless an error occurs, contains 1 iff the value contains an instance
+   *  of sparse tensor or 0 otherwise.
+   */
+  ORT_API2_STATUS(IsSparseTensor, _In_ const OrtValue* value, _Out_ int* out);
+
+  /**
+   * Create an OrtValue with a sparse tensor that is empty.
+   * Use FillSparseTensor<Format>() functions to populate sparse tensor with non-zero values and
+   * format specific indices data.
+   * Use ReleaseValue to destroy the sparse tensor, this will also release the buffer inside the output value
+   * if any was allocated.
+   * \param[in,out] allocator allocator to use when performing an allocation. Allocation will be performed
+   *   by FillSparseTensor<Format>() APIs. The lifespan of the allocator instance must eclipse the lifespan
+   *   this sparse tensor instance as the same allocator will be used to free memory.
+   * \param[in] dense_shape shape of the original dense tensor
+   * \param[in] dense_shape_len number of shape dimensions being passed
+   * \param[in] type must be one of TENSOR_ELEMENT_DATA_TYPE_xxxx
+   * \param[out] out Should be freed by calling ReleaseValue
+   * \return OrtStatus*
+   */
+  ORT_API2_STATUS(CreateSparseTensorAsOrtValue, _Inout_ OrtAllocator* allocator, _In_ const int64_t* dense_shape,
+                  size_t dense_shape_len, ONNXTensorElementDataType type, _Outptr_ OrtValue** out);
+
+  /**
+   * This API fills populates an empty tensor that was created using CreateSparseTensorAsOrtValue API.
+   * The API will allocate required memory and copy the supplied NNZ values and COO indices into that memory allocation.
+   * Memory allocation is performed using the allocator that was specified with CreateSparseTensorAsOrtValue.
+   * 
+   * \param[in,out] ort_value OrtValue to populate with data
+   * \param[in] mem_info serves to identify the location of the data to be copied. If the allocator specified 
+   *  at the creation time has memory info that is not the same as mem_info argument to this function a X-device copy will be performed.
+   *  String data is assumed to be on CPU and will only be copied into a CPU allocated buffer.
+   * \param[in] values_shape pointer to values shape array
+   * \param[in] values_shape_len length of the values_shape
+   * \param[in] values pointer to an array of values. For strings, pass const char**.
+   * \param[in] indices_data pointer to a location of COO indices
+   * \param[in] indices_num number of COO indices
+   */
+  ORT_API2_STATUS(FillSparseTensorCoo, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                  _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                  _In_ const int64_t* indices_data, size_t indices_num);
+
+  /**
+   * This API fills populates an empty tensor that was created using CreateSparseTensorAsOrtValue API.
+   * The API will allocate required memory and copy the supplied NNZ values and CSR indices into that memory allocation.
+   * Memory allocation is performed using the allocator that was specified with CreateSparseTensorAsOrtValue.
+   * 
+   * \param[in,out] ort_value OrtValue to populate with data
+   * \param[in] mem_info serves to identify the location of the data to be copied. If the allocator specified 
+   *  at the creation time has memory info that is not the same as mem_info argument to this function a X-device copy will be performed.
+   *  String data is assumed to be on CPU and will only be copied into a CPU allocated buffer.
+   * \param[in] values_shape pointer to values shape array
+   * \param[in] values_shape_len length of the values_shape
+   * \param[in] values - pointer to an array of values. For strings, pass const char**.
+   * \param[in] inner_indices_data pointer to a location of CSR inner indices
+   * \param[in] inner_indices_num number of CSR inner indices
+   * \param[in] outer_indices_data pointer to a location of CSR outer indices
+   * \param[in] outer_indices_num number of CSR outer indices
+   */
+  ORT_API2_STATUS(FillSparseTensorCsr, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                  _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                  _In_ const int64_t* inner_indices_data, size_t inner_indices_num,
+                  _In_ const int64_t* outer_indices_data, size_t outer_indices_num);
+
+  /**
+   * This API fills populates an empty tensor that was created using CreateSparseTensorAsOrtValue API.
+   * The API will allocate required memory and copy the supplied NNZ values and BlockSparse indices into that memory allocation.
+   * Memory allocation is performed using the allocator that was specified with CreateSparseTensorAsOrtValue.
+   * 
+   * \param[in,out] ort_value OrtValue to populate with data
+   * \param[in] mem_info serves to identify the location of the data to be copied. If the allocator specified 
+   *  at the creation time has memory info that is not the same as mem_info argument to this function a X-device copy will be performed.
+   *  String data is assumed to be on CPU and will only be copied into a CPU allocated buffer.
+   * \param[in] values structure with values information
+   * \param[in] indices_shape_data pointer to a location of indices shape
+   * \param[in] indices_shape_len length of the block sparse indices shape
+   * \param[in] indices_data pointer to a location of indices data. Shape will determine the length of the indices data.
+   */
+  ORT_API2_STATUS(FillSparseTensorBlockSparse, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                  _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                  _In_ const int64_t* indices_shape_data, size_t indices_shape_len,
+                  _In_ const int32_t* indices_data);
+
+  /**
+   * Create an OrtValue with a sparse tensor. This is the first step.
+   * Next, use Use<Format>Indices() functions to supply sparse tensor with
+   * format specific indices data and set its sparse format to a specific enum value.
+   * This API will not perform memory allocations. It will
+   * use supplied user buffer which should outlive the created sparse tensor.
+   * Use ReleaseValue to destroy the sparse tensor. It would not release the supplied values buffer.
+   * This API can not be used to map strings from the user allocated memory. Strings must always be copied
+   * and have UTF-8 encoding. Therefore, use CreateSparseTensorAsOrtValue() API above and then fill it with data
+   * using appropriate Make*() function.
+   * 
+   * \param[in] info memory info where sparse values reside.
+   * \param[in,out] p_data pointer to a user allocated buffer with values. To create a full sparse tensor with no non-zero
+   *   values, pass nullptr
+   * \param[in] dense_shape shape of the original dense tensor
+   * \param[in] dense_shape_len number of shape dimensions being passed
+   * \param[in] values_shape shape of the values data. To create a fully sparse tensor with no non-zero values,
+   *   pass {0} shape.
+   * \param[in] values_shape_len number of values shape dimensions
+   * \param[in] type must be one of TENSOR_ELEMENT_DATA_TYPE_xxxx
+   * \param[out] out Should be freed by calling ReleaseValue
+   * \return OrtStatus*
+   */
+  ORT_API2_STATUS(CreateSparseTensorWithValuesAsOrtValue, _In_ const OrtMemoryInfo* info, _Inout_ void* p_data,
+                  _In_ const int64_t* dense_shape, size_t dense_shape_len,
+                  _In_ const int64_t* values_shape, size_t values_shape_len,
+                  ONNXTensorElementDataType type, _Outptr_ OrtValue** out);
+
+  /**
+   * The API assigns Coo format indices to the SparseTensor that was created by 
+   * CreateSparseTensorWithValuesAsOrtValue API above. It also sets OrtSparseFormat to 
+   * ORT_SPARSE_COO. The API will not allocate any additional memory for data. The life span of
+   * indices_data buffer should eclipse the life span of this OrtValue.
+   * 
+   * \param[in,out] ort_value OrtValue instance constructed with CreateSparseTensorWithValuesAsOrtValue
+   * \param[in,out] indices_data pointer to a user pre-allocated buffer or nullptr for fully sparse tensors.
+   * \param[in] indices_num  number of COO indices. Should either be 0 for fully sparse tensors, be equal
+   *  to the number of nnz values specified to CreateSparseTensorWithValuesAsOrtValue for 1-D {nnz} indices or
+   *  be twice as number of nnz values for a  2-D indices {nnz, 2}
+   */
+  ORT_API2_STATUS(UseCooIndices, _Inout_ OrtValue* ort_value, _Inout_ int64_t* indices_data, size_t indices_num);
+
+  /**
+   * The API assigns CSR format indices to the SparseTensor that was created by 
+   * CreateSparseTensorWithValuesAsOrtValue API above. It also sets OrtSparseFormat to 
+   * ORT_SPARSE_CSRC. The API will not allocate any additional memory for data. The life spans of
+   * indner_data and outer_data buffers should eclipse the life span of this OrtValue.
+   * 
+   * \param[in,out] ort_value OrtValue instance constructed with CreateSparseTensorWithValuesAsOrtValue
+   * \param[in,out] inner_data pointer to a user pre-allocated buffer or nullptr for fully sparse tensors.
+   * \param[in] inner_num  number of inner CSR indices. Should either be 0 for fully sparse tensors or be equal
+   * to the number of nnz values specified to CreateSparseTensorWithValuesAsOrtValue.
+   * \param[in,out] outer_data pointer to user pre-allocated buffer or nullptr for fully sparse tensors.
+   * \param[in] outer_num number of CSR outer indices. Should either be 0 for fully sparse tensors or
+   * equal to rows + 1 of the dense shape.
+   */
+  ORT_API2_STATUS(UseCsrIndices, _Inout_ OrtValue* ort_value, _Inout_ int64_t* inner_data, size_t inner_num,
+                  _Inout_ int64_t* outer_data, size_t outer_num);
+
+  /**
+   * The API assigns BlockSparse format indices to the SparseTensor that was created by 
+   * CreateSparseTensorWithValuesAsOrtValue API above. It also sets OrtSparseFormat to 
+   * ORT_SPARSE_BLOCK_SPARSE. The API will not allocate any additional memory for data. The life span of
+   * indices_data buffer must eclipse the lifespan of this OrtValue.
+   * 
+   * \param[in,out] ort_value OrtValue instance constructed with CreateSparseTensorWithValuesAsOrtValue
+   * \param[in] indices_shape pointer to indices shape. Use {0} for fully sparse tensors
+   * \param[in] indices_shape_len length of the indices shape
+   * \param[in,out] indices_data pointer to user pre-allocated buffer or nullptr for fully sparse tensors.
+   */
+  ORT_API2_STATUS(UseBlockSparseIndices, _Inout_ OrtValue* ort_value, const int64_t* indices_shape, size_t indices_shape_len, _Inout_ int32_t* indices_data);
+
+  /**
+   * The API returns sparse tensor format enum iff a given ort value contains an instance of sparse tensor.
+   * 
+   * \param[in] ort_value OrtValue that contains an instance of sparse tensor
+   * \param[out] out pointer to out parameter
+   */
+  ORT_API2_STATUS(GetSparseTensorFormat, _In_ const OrtValue* ort_value, _Out_ enum OrtSparseFormat* out);
+
+  /**
+   *  The API Returns data type and shape of sparse tensor values (nnz) iff OrtValue contains a SparseTensor.
+   * 
+   * \param[in] ort_value an OrtValue that contains a fully constructed sparse tensor
+   * \param[out] out Should be freed by ReleaseTensorTypeAndShapeInfo after use
+   */
+  ORT_API2_STATUS(GetSparseTensorValuesTypeAndShape, _In_ const OrtValue* ort_value, _Outptr_ OrtTensorTypeAndShapeInfo** out);
+
+  /**
+   * The API returns numeric data for sparse tensor values (nnz). For string values use GetStringTensor*() API.
+   * 
+   * \param[in] ort_value an instance of OrtValue containing sparse tensor
+   * \param[out] out returns a pointer to values data.  Do not attempt to free this ptr.
+   */
+  ORT_API2_STATUS(GetSparseTensorValues, _In_ const OrtValue* ort_value, _Outptr_ const void** out);
+
+  /**
+   * The API returns data type, shape for the type of indices specified by
+   * indices_format.
+   * 
+   * \param[in] ort_value OrtValue containing sparse tensor.
+   * \param[in] indices_format - one of the indices formats. It is an error to request a format that the sparse
+   * tensor does not contain.
+   * \param[out] an instance of OrtTensorTypeAndShapeInfo. Must be freed by the ReleaseTensorTypeAndShapeInfo.
+   */
+  ORT_API2_STATUS(GetSparseTensorIndicesTypeShape, _In_ const OrtValue* ort_value, enum OrtSparseIndicesFormat indices_format, _Outptr_ OrtTensorTypeAndShapeInfo** out);
+
+  /**
+   * The API returns indices data for the type of the indices specified by indices_format.
+   * Do not free the returned ptr as it points directly to the internal sparse tensor buffer.
+   * 
+   * \param[in] ort_value OrtValue containing sparse tensor.
+   * \param[in] indices_format - one of the indices formats. It is an error to request a format that the sparse
+   * tensor does not contain.
+   * \param[out] num_indices ptr where the number of indices entries is returned
+   * \param[out] indices out param where the pointer to the internal buffer is returned. Do not free this buffer.
+   */
+  ORT_API2_STATUS(GetSparseTensorIndices, _In_ const OrtValue* ort_value, enum OrtSparseIndicesFormat indices_format, _Out_ size_t* num_indices, _Outptr_ const void** indices);
 };
 
 /*
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 63f2202357..0ae27590c6 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -420,14 +420,208 @@ struct TypeInfo : Base<OrtTypeInfo> {
 };
 
 struct Value : Base<OrtValue> {
+  // This structure is used to feed  sparse tensor values
+  // information for use with FillSparseTensor<Format>() API
+  // if the data type for the sparse tensor values is numeric
+  // use data.p_data, otherwise, use data.str pointer to feed
+  // values. data.str is an array of const char* that are zero terminated.
+  // number of strings in the array must match shape size.
+  // For fully sparse tensors use shape {0} and set p_data/str
+  // to nullptr.
+  struct OrtSparseValuesParam {
+    const int64_t* values_shape;
+    size_t values_shape_len;
+    union {
+      const void* p_data;
+      const char** str;
+    } data;
+  };
+
+  // Provides a way to pass shape in a single
+  // argument
+  struct Shape {
+    const int64_t* shape;
+    size_t shape_len;
+  };
+
   template <typename T>
   static Value CreateTensor(const OrtMemoryInfo* info, T* p_data, size_t p_data_element_count, const int64_t* shape, size_t shape_len);
   static Value CreateTensor(const OrtMemoryInfo* info, void* p_data, size_t p_data_byte_count, const int64_t* shape, size_t shape_len,
                             ONNXTensorElementDataType type);
+
+
+  /// <summary>
+  /// This is a simple forwarding method to the other overload that helps deducing
+  /// data type enum value from the type of the buffer.
+  /// </summary>
+  /// <typeparam name="T">numeric datatype. This API is not suitable for strings.</typeparam>
+  /// <param name="info">Memory description where the user buffers reside (CPU vs GPU etc)</param>
+  /// <param name="p_data">pointer to the user supplied buffer, use nullptr for fully sparse tensors</param>
+  /// <param name="dense_shape">a would be dense shape of the tensor</param>
+  /// <param name="values_shape">non zero values shape. Use a single 0 shape for fully sparse tensors.</param>
+  /// <returns></returns>
+  template <typename T>
+  static Value CreateSparseTensor(const OrtMemoryInfo* info, T* p_data, const Shape& dense_shape,
+                                  const Shape& values_shape);
+
+  /// <summary>
+  /// Creates an OrtValue instance containing SparseTensor. This constructs
+  /// a sparse tensor that makes use of user allocated buffers. It does not make copies
+  /// of the user provided data and does not modify it. The lifespan of user provided buffers should
+  /// eclipse the life span of the resulting OrtValue. This call constructs an instance that only contain
+  /// a pointer to non-zero values. To fully populate the sparse tensor call Use<Format>Indices() API below
+  /// to supply a sparse format specific indices.
+  /// This API is not suitable for string data. Use CreateSparseTensor() with allocator specified so strings
+  /// can be properly copied into the allocated buffer.
+  /// </summary>
+  /// <param name="info">Memory description where the user buffers reside (CPU vs GPU etc)</param>
+  /// <param name="p_data">pointer to the user supplied buffer, use nullptr for fully sparse tensors</param>
+  /// <param name="dense_shape">a would be dense shape of the tensor</param>
+  /// <param name="values_shape">non zero values shape. Use a single 0 shape for fully sparse tensors.</param>
+  /// <param name="type">data type</param>
+  /// <returns>Ort::Value instance containing SparseTensor</returns>
+  static Value CreateSparseTensor(const OrtMemoryInfo* info, void* p_data, const Shape& dense_shape,
+                                  const Shape& values_shape, ONNXTensorElementDataType type);
+
+  /// <summary>
+  /// Supplies COO format specific indices and marks the contained sparse tensor as being a COO format tensor.
+  /// Values are supplied with a CreateSparseTensor() API. The supplied indices are not copied and the user
+  /// allocated buffers lifespan must eclipse that of the OrtValue.
+  /// The location of the indices is assumed to be the same as specified by OrtMemoryInfo argument at the creation time.
+  /// </summary>
+  /// <param name="indices_data">pointer to the user allocated buffer with indices. Use nullptr for fully sparse tensors.</param>
+  /// <param name="indices_num">number of indices entries. Use 0 for fully sparse tensors</param>
+  void UseCooIndices(int64_t* indices_data, size_t indices_num);
+
+  /// <summary>
+  /// Supplies CSR format specific indices and marks the contained sparse tensor as being a CSR format tensor.
+  /// Values are supplied with a CreateSparseTensor() API. The supplied indices are not copied and the user
+  /// allocated buffers lifespan must eclipse that of the OrtValue.
+  /// The location of the indices is assumed to be the same as specified by OrtMemoryInfo argument at the creation time.
+  /// </summary>
+  /// <param name="inner_data">pointer to the user allocated buffer with inner indices or nullptr for fully sparse tensors</param>
+  /// <param name="inner_num">number of csr inner indices or 0 for fully sparse tensors</param>
+  /// <param name="outer_data">pointer to the user allocated buffer with outer indices or nullptr for fully sparse tensors</param>
+  /// <param name="outer_num">number of csr outer indices or 0 for fully sparse tensors</param>
+  void UseCsrIndices(int64_t* inner_data, size_t inner_num, int64_t* outer_data, size_t outer_num);
+
+  /// <summary>
+  /// Supplies BlockSparse format specific indices and marks the contained sparse tensor as being a BlockSparse format tensor.
+  /// Values are supplied with a CreateSparseTensor() API. The supplied indices are not copied and the user
+  /// allocated buffers lifespan must eclipse that of the OrtValue.
+  /// The location of the indices is assumed to be the same as specified by OrtMemoryInfo argument at the creation time.
+  /// </summary>
+  /// <param name="indices_shape">indices shape or a {0} for fully sparse</param>
+  /// <param name="indices_data">user allocated buffer with indices or nullptr for fully spare tensors</param>
+  void UseBlockSparseIndices(const Shape& indices_shape, int32_t* indices_data);
+
   template <typename T>
   static Value CreateTensor(OrtAllocator* allocator, const int64_t* shape, size_t shape_len);
   static Value CreateTensor(OrtAllocator* allocator, const int64_t* shape, size_t shape_len, ONNXTensorElementDataType type);
 
+  /// <summary>
+  /// This is a simple forwarding method the below CreateSparseTensor.
+  /// This helps to specify data type enum in terms of C++ data type.
+  /// Use CreateSparseTensor<T>
+  /// </summary>
+  /// <typeparam name="T">numeric data type only. String data enum must be specified explicitly.</typeparam>
+  /// <param name="allocator">allocator to use</param>
+  /// <param name="dense_shape">a would be dense shape of the tensor</param>
+  /// <returns>Ort::Value</returns>
+  template <typename T>
+  static Value CreateSparseTensor(OrtAllocator* allocator, const Shape& dense_shape);
+
+  /// <summary>
+  /// Creates an instance of OrtValue containing sparse tensor. The created instance has no data.
+  /// The data must be supplied by on of the FillSparseTensor<Format>() methods that take both non-zero values
+  /// and indices. The data will be copied into a buffer that would be allocated using the supplied allocator.
+  /// Use this API to create OrtValues that contain sparse tensors with all supported data types including
+  /// strings.
+  /// </summary>
+  /// <param name="allocator">allocator to use. The allocator lifespan must eclipse that of the resulting OrtValue</param>
+  /// <param name="dense_shape">a would be dense shape of the tensor</param>
+  /// <param name="type">data type</param>
+  /// <returns>an instance of Ort::Value</returns>
+  static Value CreateSparseTensor(OrtAllocator* allocator, const Shape& dense_shape, ONNXTensorElementDataType type);
+
+  /// <summary>
+  /// The API will allocate memory using the allocator instance supplied to the CreateSparseTensor() API
+  /// and copy the values and COO indices into it. If data_mem_info specifies that the data is located
+  /// at difference device than the allocator, a X-device copy will be performed if possible.
+  /// </summary>
+  /// <param name="data_mem_info">specified buffer memory description</param>
+  /// <param name="values_param">values buffer information.</param>
+  /// <param name="indices_data">coo indices buffer or nullptr for fully sparse data</param>
+  /// <param name="indices_num">number of COO indices or 0 for fully sparse data</param>
+  void FillSparseTensorCoo(const OrtMemoryInfo* data_mem_info, const OrtSparseValuesParam& values_param,
+                           const int64_t* indices_data, size_t indices_num);
+
+  /// <summary>
+  /// The API will allocate memory using the allocator instance supplied to the CreateSparseTensor() API
+  /// and copy the values and CSR indices into it. If data_mem_info specifies that the data is located
+  /// at difference device than the allocator, a X-device copy will be performed if possible.
+  /// </summary>
+  /// <param name="data_mem_info">specified buffer memory description</param>
+  /// <param name="values_param">values buffer information</param>
+  /// <param name="inner_indices_data">csr inner indices pointer or nullptr for fully sparse tensors</param>
+  /// <param name="inner_indices_num">number of csr inner indices or 0 for fully sparse tensors</param>
+  /// <param name="outer_indices_data">pointer to csr indices data or nullptr for fully sparse tensors</param>
+  /// <param name="outer_indices_num">number of csr outer indices or 0</param>
+  void FillSparseTensorCsr(const OrtMemoryInfo* data_mem_info,
+                           const OrtSparseValuesParam& values,
+                           const int64_t* inner_indices_data, size_t inner_indices_num,
+                           const int64_t* outer_indices_data, size_t outer_indices_num);
+
+  /// <summary>
+  /// The API will allocate memory using the allocator instance supplied to the CreateSparseTensor() API
+  /// and copy the values and BlockSparse indices into it. If data_mem_info specifies that the data is located
+  /// at difference device than the allocator, a X-device copy will be performed if possible.
+  /// </summary>
+  /// <param name="data_mem_info">specified buffer memory description</param>
+  /// <param name="values_param">values buffer information</param>
+  /// <param name="indices_shape">indices shape. use {0} for fully sparse tensors</param>
+  /// <param name="indices_data">pointer to indices data or nullptr for fully sparse tensors</param>
+  void FillSparseTensorBlockSparse(const OrtMemoryInfo* data_mem_info,
+                                   const OrtSparseValuesParam& values,
+                                   const Shape& indices_shape,
+                                   const int32_t* indices_data);
+
+  /// <summary>
+  /// The API returns the sparse data format this OrtValue holds in a sparse tensor.
+  /// If the sparse tensor was not fully constructed, i.e. Use*() or Fill*() API were not used
+  /// the value returned is ORT_SPARSE_UNDEFINED.
+  /// </summary>
+  /// <returns>Format enum</returns>
+  OrtSparseFormat GetSparseFormat() const;
+
+  /// <summary>
+  /// The API returns type and shape information for stored non-zero values of the
+  /// sparse tensor. Use GetSparseTensorValues() to obtain values buffer pointer.
+  /// </summary>
+  /// <returns>TensorTypeAndShapeInfo values information</returns>
+  TensorTypeAndShapeInfo GetSparseTensorValuesTypeAndShapeInfo() const;
+
+  /// <summary>
+  /// The API returns type and shape information for the specified indices. Each supported
+  /// indices have their own enum values even if a give format has more than one kind of indices.
+  /// Use GetSparseTensorIndicesData() to obtain pointer to indices buffer.
+  /// </summary>
+  /// <param name="">enum requested</param>
+  /// <returns>type and shape information</returns>
+  TensorTypeAndShapeInfo GetSparseTensorIndicesTypeShapeInfo(OrtSparseIndicesFormat) const;
+
+  /// <summary>
+  /// The API retrieves a pointer to the internal indices buffer. The API merely performs
+  /// a convenience data type casting on the return type pointer. Make sure you are requesting
+  /// the right type, use GetSparseTensorIndicesTypeShapeInfo();
+  /// </summary>
+  /// <typeparam name="T">type to cast to</typeparam>
+  /// <param name="indices_format">requested indices kind</param>
+  /// <param name="num_indices">number of indices entries</param>
+  /// <returns>Pinter to the internal sparse tensor buffer containing indices. Do not free this pointer.</returns>
+  template <typename T>
+  const T* GetSparseTensorIndicesData(OrtSparseIndicesFormat indices_format, size_t& num_indices) const;
+
   static Value CreateMap(Value& keys, Value& values);
   static Value CreateSequence(std::vector<Value>& values);
 
@@ -443,10 +637,38 @@ struct Value : Base<OrtValue> {
   Value& operator=(Value&&) = default;
 
   bool IsTensor() const;
+
+  /// <summary>
+  /// Returns true if the OrtValue contains a sparse tensor
+  /// </summary>
+  /// <returns></returns>
+  bool IsSparseTensor() const;
+
   size_t GetCount() const;  // If a non tensor, returns 2 for map and N for sequence, where N is the number of elements
   Value GetValue(int index, OrtAllocator* allocator) const;
 
+  /// <summary>
+  /// This API returns a full length of string data contained within either a tensor or a sparse Tensor.
+  /// For sparse tensor it returns a full length of stored non-empty strings (values). The API is useful
+  /// for allocating necessary memory and calling GetStringTensorContent().
+  /// </summary>
+  /// <returns>total length of UTF-8 encoded bytes contained. No zero terminators counted.</returns>
   size_t GetStringTensorDataLength() const;
+
+  /// <summary>
+  /// The API copies all of the UTF-8 encoded string data contained within a tensor or a sparse tensor
+  /// into a supplied buffer. Use GetStringTensorDataLength() to find out the length of the buffer to allocate.
+  /// The user must also allocate offsets buffer with the number of entries equal to that of the contained
+  /// strings.
+  /// 
+  /// Strings are always assumed to be on CPU, no X-device copy.
+  /// </summary>
+  /// <param name="buffer">user allocated buffer</param>
+  /// <param name="buffer_length">length in bytes of the allocated buffer</param>
+  /// <param name="offsets">a pointer to the offsets user allocated buffer</param>
+  /// <param name="offsets_count">count of offsets, must be equal to the number of strings contained.
+  ///   that can be obtained from the shape of the tensor or from GetSparseTensorValuesTypeAndShapeInfo()
+  ///   for sparse tensors</param>
   void GetStringTensorContent(void* buffer, size_t buffer_length, size_t* offsets, size_t offsets_count) const;
 
   template <typename T>
@@ -455,13 +677,52 @@ struct Value : Base<OrtValue> {
   template <typename T>
   const T* GetTensorData() const;
 
+  /// <summary>
+  /// The API returns a pointer to an internal buffer of the sparse tensor
+  /// containing non-zero values. The API merely does casting. Make sure you
+  /// are requesting the right data type by calling GetSparseTensorValuesTypeAndShapeInfo()
+  /// first.
+  /// </summary>
+  /// <typeparam name="T">numeric data types only. Use GetStringTensor*() to retrieve strings.</typeparam>
+  /// <returns>a pointer to the internal values buffer. Do not free this pointer.</returns>
+  template <typename T>
+  const T* GetSparseTensorValues() const;
+
   template <typename T>
   T& At(const std::vector<int64_t>& location);
 
+  /// <summary>
+  /// The API returns type information for data contained in a tensor. For sparse
+  /// tensors it returns type information for contained non-zero values.
+  /// It returns dense shape for sparse tensors.
+  /// </summary>
+  /// <returns>TypeInfo</returns>
   TypeInfo GetTypeInfo() const;
+
+  /// <summary>
+  /// The API returns type information for data contained in a tensor. For sparse
+  /// tensors it returns type information for contained non-zero values.
+  /// It returns dense shape for sparse tensors.
+  /// </summary>
+  /// <returns>TensorTypeAndShapeInfo</returns>
   TensorTypeAndShapeInfo GetTensorTypeAndShapeInfo() const;
 
+  /// <summary>
+  /// The API returns a byte length of UTF-8 encoded string element
+  /// contained in either a tensor or a spare tensor values.
+  /// </summary>
+  /// <param name="element_index"></param>
+  /// <returns>byte length for the specified string element</returns>
   size_t GetStringTensorElementLength(size_t element_index) const;
+
+  /// <summary>
+  /// The API copies UTF-8 encoded bytes for the requested string element
+  /// contained within a tensor or a sparse tensor into a provided buffer.
+  /// Use GetStringTensorElementLength() to obtain the length of the buffer to allocate.
+  /// </summary>
+  /// <param name="buffer_length"></param>
+  /// <param name="element_index"></param>
+  /// <param name="buffer"></param>
   void GetStringTensorElement(size_t buffer_length, size_t element_index, void* buffer) const;
 
   void FillStringTensor(const char* const* s, size_t s_len);
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index a4596fc205..684c8fbaa4 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -755,6 +755,82 @@ inline Value Value::CreateTensor(const OrtMemoryInfo* info, void* p_data, size_t
   return Value{out};
 }
 
+template <typename T>
+inline Value Value::CreateSparseTensor(const OrtMemoryInfo* info, T* p_data, const Shape& dense_shape,
+                                       const Shape& values_shape) {
+  return CreateSparseTensor(info, p_data, dense_shape, values_shape, TypeToTensorType<T>::type);
+}
+
+inline Value Value::CreateSparseTensor(const OrtMemoryInfo* info, void* p_data, const Shape& dense_shape,
+                                       const Shape& values_shape, ONNXTensorElementDataType type) {
+  OrtValue* out;
+  ThrowOnError(GetApi().CreateSparseTensorWithValuesAsOrtValue(info, p_data, dense_shape.shape, dense_shape.shape_len,
+                                                               values_shape.shape, values_shape.shape_len, type, &out));
+  return Value{out};
+}
+
+inline void Value::FillSparseTensorCoo(const OrtMemoryInfo* mem_info, const OrtSparseValuesParam& values_param,
+                                       const int64_t* indices_data, size_t indices_num) {
+  ThrowOnError(GetApi().FillSparseTensorCoo(p_, mem_info, values_param.values_shape,
+                                            values_param.values_shape_len, values_param.data.p_data,
+                                            indices_data, indices_num));
+}
+
+inline void Value::FillSparseTensorCsr(const OrtMemoryInfo* data_mem_info,
+                                       const OrtSparseValuesParam& values,
+                                       const int64_t* inner_indices_data, size_t inner_indices_num,
+                                       const int64_t* outer_indices_data, size_t outer_indices_num) {
+  ThrowOnError(GetApi().FillSparseTensorCsr(p_, data_mem_info, values.values_shape, values.values_shape_len, values.data.p_data,
+                                            inner_indices_data, inner_indices_num,
+                                            outer_indices_data, outer_indices_num));
+}
+
+inline void Value::FillSparseTensorBlockSparse(const OrtMemoryInfo* data_mem_info,
+                                               const OrtSparseValuesParam& values,
+                                               const Shape& indices_shape,
+                                               const int32_t* indices_data) {
+  ThrowOnError(GetApi().FillSparseTensorBlockSparse(p_, data_mem_info, values.values_shape, values.values_shape_len, values.data.p_data,
+                                                    indices_shape.shape, indices_shape.shape_len,
+                                                    indices_data));
+}
+
+inline void Value::UseCooIndices(int64_t* indices_data, size_t indices_num) {
+  ThrowOnError(GetApi().UseCooIndices(p_, indices_data, indices_num));
+}
+
+inline void Value::UseCsrIndices(int64_t* inner_data, size_t inner_num, int64_t* outer_data, size_t outer_num) {
+  ThrowOnError(GetApi().UseCsrIndices(p_, inner_data, inner_num, outer_data, outer_num));
+}
+
+inline void Value::UseBlockSparseIndices(const Shape& indices_shape, int32_t* indices_data) {
+  ThrowOnError(GetApi().UseBlockSparseIndices(p_, indices_shape.shape, indices_shape.shape_len, indices_data));
+}
+
+inline OrtSparseFormat Value::GetSparseFormat() const {
+  OrtSparseFormat format;
+  ThrowOnError(GetApi().GetSparseTensorFormat(p_, &format));
+  return format;
+}
+
+inline TensorTypeAndShapeInfo Value::GetSparseTensorValuesTypeAndShapeInfo() const {
+  OrtTensorTypeAndShapeInfo* output;
+  ThrowOnError(GetApi().GetSparseTensorValuesTypeAndShape(p_, &output));
+  return TensorTypeAndShapeInfo{output};
+}
+
+inline TensorTypeAndShapeInfo Value::GetSparseTensorIndicesTypeShapeInfo(OrtSparseIndicesFormat indices_format) const {
+  OrtTensorTypeAndShapeInfo* output;
+  ThrowOnError(GetApi().GetSparseTensorIndicesTypeShape(p_, indices_format, &output));
+  return TensorTypeAndShapeInfo{output};
+}
+
+template <typename T>
+inline const T* Value::GetSparseTensorIndicesData(OrtSparseIndicesFormat indices_format, size_t& num_indices) const {
+  const void* out;
+  ThrowOnError(GetApi().GetSparseTensorIndices(p_, indices_format, &num_indices, &out));
+  return reinterpret_cast<const T*>(out);
+}
+
 template <typename T>
 inline Value Value::CreateTensor(OrtAllocator* allocator, const int64_t* shape, size_t shape_len) {
   return CreateTensor(allocator, shape, shape_len, TypeToTensorType<T>::type);
@@ -766,6 +842,18 @@ inline Value Value::CreateTensor(OrtAllocator* allocator, const int64_t* shape,
   return Value{out};
 }
 
+template <typename T>
+inline Value Value::CreateSparseTensor(OrtAllocator* allocator, const Shape& dense_shape) {
+  return CreateSparseTensor(allocator, dense_shape, TypeToTensorType<T>::type);
+}
+
+inline Value Value::CreateSparseTensor(OrtAllocator* allocator, const Shape& dense_shape,
+                                       ONNXTensorElementDataType type) {
+  OrtValue* out;
+  ThrowOnError(GetApi().CreateSparseTensorAsOrtValue(allocator, dense_shape.shape, dense_shape.shape_len, type, &out));
+  return Value{out};
+}
+
 inline Value Value::CreateMap(Value& keys, Value& values) {
   OrtValue* out;
   OrtValue* inputs[2] = {keys, values};
@@ -798,6 +886,12 @@ inline bool Value::IsTensor() const {
   return out != 0;
 }
 
+inline bool Value::IsSparseTensor() const {
+  int out;
+  ThrowOnError(GetApi().IsSparseTensor(p_, &out));
+  return out != 0;
+}
+
 inline size_t Value::GetCount() const {
   size_t out;
   ThrowOnError(GetApi().GetValueCount(p_, &out));
@@ -852,6 +946,13 @@ const T* Value::GetTensorData() const {
   return out;
 }
 
+template <typename T>
+inline const T* Value::GetSparseTensorValues() const {
+  const void* out;
+  ThrowOnError(GetApi().GetSparseTensorValues(p_, &out));
+  return reinterpret_cast<const T*>(out);
+}
+
 template <typename T>
 inline T& Value::At(const std::vector<int64_t>& location) {
   static_assert(!std::is_same<T, std::string>::value, "this api does not support std::string");
diff --git a/onnxruntime/core/framework/execution_frame.cc b/onnxruntime/core/framework/execution_frame.cc
index caaa78f63f..7a0798782a 100644
--- a/onnxruntime/core/framework/execution_frame.cc
+++ b/onnxruntime/core/framework/execution_frame.cc
@@ -618,9 +618,7 @@ static Status AllocateSparseTensor(OrtValue& mlvalue, const DataTypeImpl& ml_typ
                                    const TensorShape& shape, bool create_fence,
                                    const SessionState& session_state) {
   auto element_type = ml_type.AsSparseTensorType()->GetElementType();
-  auto sparse = std::make_unique<SparseTensor>(element_type, shape, allocator);
-  auto deleter = DataTypeImpl::GetType<SparseTensor>()->GetDeleteFunc();
-  mlvalue.Init(sparse.release(), DataTypeImpl::GetType<SparseTensor>(), deleter);
+  SparseTensor::InitOrtValue(element_type, shape, std::move(allocator), mlvalue);
 
   // create fence if needed
   if (create_fence) {
diff --git a/onnxruntime/core/framework/onnxruntime_typeinfo.cc b/onnxruntime/core/framework/onnxruntime_typeinfo.cc
index 9b2e5db7e8..bcfc36d131 100644
--- a/onnxruntime/core/framework/onnxruntime_typeinfo.cc
+++ b/onnxruntime/core/framework/onnxruntime_typeinfo.cc
@@ -56,7 +56,7 @@ ORT_API_STATUS_IMPL(OrtApis::GetOnnxTypeFromTypeInfo, _In_ const struct OrtTypeI
 
 ORT_API_STATUS_IMPL(OrtApis::CastTypeInfoToTensorInfo, _In_ const struct OrtTypeInfo* input,
                     _Outptr_result_maybenull_ const struct OrtTensorTypeAndShapeInfo** out) {
-  *out = input->type == ONNX_TYPE_TENSOR ? input->data : nullptr;
+  *out = (input->type == ONNX_TYPE_TENSOR || input->type == ONNX_TYPE_SPARSETENSOR) ? input->data : nullptr;
   return nullptr;
 }
 
diff --git a/onnxruntime/core/framework/sparse_tensor.cc b/onnxruntime/core/framework/sparse_tensor.cc
index 640b62b168..817ee8b7af 100644
--- a/onnxruntime/core/framework/sparse_tensor.cc
+++ b/onnxruntime/core/framework/sparse_tensor.cc
@@ -4,6 +4,7 @@
 #include "core/framework/data_types.h"
 #include "core/framework/sparse_tensor.h"
 #include "core/framework/data_transfer_manager.h"
+#include "core/framework/ort_value.h"
 #include "core/framework/utils.h"
 
 #include <safeint/SafeInt.hpp>
@@ -42,13 +43,13 @@ inline std::vector<std::reference_wrapper<const Tensor>> MakeListConst(const T&.
   return std::vector{std::cref(t)...};
 }
 
-void CopyStrings(const Tensor& src, Tensor& dst) {
-  auto src_span = src.DataAsSpan<std::string>();
-  auto* dst_iter = dst.MutableData<std::string>();
-  std::copy(src_span.cbegin(), src_span.cend(), dst_iter);
+void CopyStrings(const Tensor& src_t, Tensor& dst_t) {
+  auto src_span = src_t.DataAsSpan<std::string>();
+  std::string* dst = dst_t.MutableData<std::string>();
+  std::copy(src_span.cbegin(), src_span.cend(), dst);
 }
 
-Status CopyData(const IDataTransfer& data_transfer,
+Status CopyData(const IDataTransfer* data_transfer,
                 const std::vector<std::reference_wrapper<const Tensor>>& src,
                 const std::vector<std::reference_wrapper<Tensor>>& dst) {
   ORT_RETURN_IF_NOT(src.size() == dst.size(), "Must have the same size. Got src_size: ",
@@ -59,12 +60,26 @@ Status CopyData(const IDataTransfer& data_transfer,
     if (src_t.IsDataTypeString()) {
       CopyStrings(src_t, dst_t);
     } else {
-      ORT_RETURN_IF_ERROR(data_transfer.CopyTensor(src_t, dst_t));
+      if (data_transfer != nullptr) {
+        ORT_RETURN_IF_ERROR(data_transfer->CopyTensor(src_t, dst_t));
+      } else {
+        memcpy(dst_t.MutableDataRaw(), src_t.DataRaw(), src_t.SizeInBytes());
+      }
     }
   }
   return Status::OK();
 }
 
+Status CopyStringsAndIndices(size_t string_count, const char* const strings[], Tensor& values,
+                             const std::vector<std::reference_wrapper<const Tensor>>& src_ind,
+                             const std::vector<std::reference_wrapper<Tensor>>& dst_ind) {
+  auto* str_dest = values.MutableData<std::string>();
+  for (size_t i = 0; i < string_count; ++i) {
+    str_dest[i] = strings[i];
+  }
+
+  return CopyData(nullptr, src_ind, dst_ind);
+}
 }  // namespace
 
 const void* SparseTensor::IndicesStart(int64_t values_bytes) const {
@@ -149,12 +164,58 @@ SparseTensor::~SparseTensor() {
   ReleaseBuffer();
 }
 
+void SparseTensor::InitOrtValue(MLDataType elt_type,
+                                const TensorShape& dense_shape,
+                                const TensorShape& values_shape,
+                                void* values_data,
+                                const OrtMemoryInfo& location,
+                                OrtValue& ort_value) {
+  auto sparse_tensor = std::make_unique<SparseTensor>(elt_type, dense_shape, values_shape, values_data, location);
+  auto ml_tensor = DataTypeImpl::GetType<SparseTensor>();
+  ort_value.Init(sparse_tensor.release(),
+                 ml_tensor,
+                 ml_tensor->GetDeleteFunc());
+}
+
+void SparseTensor::InitOrtValue(MLDataType elt_type,
+                                const TensorShape& dense_shape,
+                                std::shared_ptr<IAllocator> allocator,
+                                OrtValue& ort_value) {
+  auto sparse_tensor = std::make_unique<SparseTensor>(elt_type, dense_shape, std::move(allocator));
+  auto ml_tensor = DataTypeImpl::GetType<SparseTensor>();
+  ort_value.Init(sparse_tensor.release(),
+                 ml_tensor,
+                 ml_tensor->GetDeleteFunc());
+}
+
+const SparseTensor& SparseTensor::GetSparseTensorFromOrtValue(const OrtValue& v) {
+  if (!v.IsAllocated()) {
+    ORT_THROW("the ort_value must contain a constructed sparse tensor");
+  }
+  const auto& sparse_tensor = v.Get<onnxruntime::SparseTensor>();
+  if (sparse_tensor.Format() == onnxruntime::SparseFormat::kUndefined) {
+    ORT_THROW("Sparse Tensor does not contain sparse data");
+  }
+  return sparse_tensor;
+}
+
+SparseTensor& SparseTensor::GetSparseTensorFromOrtValue(OrtValue& v) {
+  if (!v.IsAllocated()) {
+    ORT_THROW("the ort_value must contain a constructed sparse tensor");
+  }
+  auto& sparse_tensor = *v.GetMutable<SparseTensor>();
+  if (sparse_tensor.Format() != SparseFormat::kUndefined) {
+    ORT_THROW("this tensor already has populated sparse_indices");
+  }
+  return sparse_tensor;
+}
+
 Status SparseTensor::AllocateBuffer(int64_t buffer_size, size_t num_values) {
   if (buffer_size > 0) {
     SafeInt<size_t> buffer_size_t(buffer_size);
     const auto values_bytes = SafeInt<size_t>(num_values) * ml_data_type_->Size();
     ORT_RETURN_IF_NOT(buffer_size_t > values_bytes,
-                "Values size ", static_cast<size_t>(values_bytes), " must be less than total buffer size: ", buffer_size);
+                      "Values size ", static_cast<size_t>(values_bytes), " must be less than total buffer size: ", buffer_size);
     auto data_ptr = IAllocator::MakeUniquePtr<void>(allocator_, buffer_size_t);
     ORT_RETURN_IF(data_ptr == nullptr, "SparseTensor Allocation failed for size: ", buffer_size);
     if (IsDataTypeString()) {
@@ -206,6 +267,7 @@ void SparseTensor::InitCooIndex(const TensorShape& index_shape, int64_t* index_d
 }
 
 Status SparseTensor::UseCooIndices(gsl::span<int64_t> indices) {
+  ORT_RETURN_IF_NOT(Format() == SparseFormat::kUndefined, "Sparse format must not be set. Already contains format: ", Format());
   ORT_RETURN_IF_NOT(allocator_ == nullptr, "Not expecting an allocator set");
   TensorShape index_shape(GetCooIndexDims(NumValues(), indices.size()));
   InitCooIndex(index_shape, indices.data());
@@ -216,6 +278,7 @@ Status SparseTensor::MakeCooData(const IDataTransfer& data_transfer,
                                  const OrtMemoryInfo& data_location,
                                  size_t values_count, const void* values_data,
                                  gsl::span<const int64_t> indices) {
+  ORT_RETURN_IF(IsDataTypeString(), "Use MakeCooStrings");
   auto mutator = MakeCooData(values_count, indices.size());
   if (values_count > 0) {
     auto& dst_values = mutator.Values();
@@ -223,12 +286,26 @@ Status SparseTensor::MakeCooData(const IDataTransfer& data_transfer,
 
     Tensor src_values(dst_values.DataType(), dst_values.Shape(), const_cast<void*>(values_data), data_location);
     Tensor src_index(dst_index.DataType(), dst_index.Shape(), const_cast<int64_t*>(indices.data()), data_location);
-    ORT_RETURN_IF_ERROR(CopyData(data_transfer, MakeListConst(src_values, src_index), MakeListNonConst(dst_values, dst_index)));
+    ORT_RETURN_IF_ERROR(CopyData(&data_transfer, MakeListConst(src_values, src_index), MakeListNonConst(dst_values, dst_index)));
+  }
+  return Status::OK();
+}
+
+Status SparseTensor::MakeCooStrings(size_t string_count, const char* const* strings,
+                                    gsl::span<const int64_t> indices) {
+  ORT_RETURN_IF_NOT(IsDataTypeString(), "Expecting data type to be set as string");
+  auto mutator = MakeCooData(string_count, indices.size());
+  if (string_count > 0) {
+    auto& dst_values = mutator.Values();
+    auto& dst_indices = mutator.Indices();
+    Tensor src_indices(dst_indices.DataType(), dst_indices.Shape(), const_cast<int64_t*>(indices.data()), Location());
+    ORT_RETURN_IF_ERROR(CopyStringsAndIndices(string_count, strings, dst_values, {std::cref(src_indices)}, {std::ref(dst_indices)}));
   }
   return Status::OK();
 }
 
 SparseTensor::CooMutator SparseTensor::MakeCooData(size_t values_count, size_t index_count) {
+  ORT_ENFORCE(Format() == SparseFormat::kUndefined, "Sparse format must not be set. Already contains format: ", Format());
   ORT_ENFORCE(allocator_ != nullptr, "This method should follow a call to constructor that supplies the allocator");
   const auto num_values = gsl::narrow<int64_t>(values_count);
   TensorShape values_shape{num_values};
@@ -253,11 +330,13 @@ SparseTensor::CsrView SparseTensor::AsCsr() const {
 
 Status SparseTensor::ValidateCsrIndices(size_t values_count, size_t inner_size, size_t outer_size) const {
   ORT_RETURN_IF_NOT(dense_shape_.NumDimensions() == 2U, "dense shape must 2-D. Got: ", dense_shape_.NumDimensions());
+  ORT_RETURN_IF_NOT((inner_size == 0 && outer_size == 0) || (inner_size > 0 && outer_size > 0),
+                    "Inner and Outer indices must either be both zero or non-zero");
   ORT_RETURN_IF_NOT(inner_size == values_count,
-              "Expecting inner index size: ", inner_size, " the same as values size: ", values_count);
+                    "Expecting inner index size: ", inner_size, " the same as values size: ", values_count);
   const auto rows = dense_shape_.GetDims()[0];
   ORT_RETURN_IF_NOT(outer_size == 0 || outer_size == static_cast<size_t>(rows + 1),
-              "Outer index count must be rows + 1 or zero. Got: ", outer_size, " rows: ", rows);
+                    "Outer index count must be rows + 1 or zero. Got: ", outer_size, " rows: ", rows);
   return Status::OK();
 }
 
@@ -274,6 +353,7 @@ void SparseTensor::InitCsrIndices(size_t inner_size, const int64_t* inner, size_
 
 Status SparseTensor::UseCsrIndices(gsl::span<int64_t> inner_index, gsl::span<int64_t> outer_index) {
   ORT_RETURN_IF_NOT(allocator_ == nullptr, "This method does not expect allocator to be set");
+  ORT_RETURN_IF_NOT(Format() == SparseFormat::kUndefined, "Sparse format must not be set. Already contains format: ", Format());
   ORT_RETURN_IF_ERROR(ValidateCsrIndices(NumValues(), inner_index.size(), outer_index.size()));
   InitCsrIndices(inner_index.size(), inner_index.data(), outer_index.size(), outer_index.data());
   return Status::OK();
@@ -282,6 +362,7 @@ Status SparseTensor::UseCsrIndices(gsl::span<int64_t> inner_index, gsl::span<int
 Status SparseTensor::MakeCsrData(const IDataTransfer& data_transfer, const OrtMemoryInfo& data_location,
                                  size_t values_count, const void* values_data,
                                  gsl::span<const int64_t> inner_index, gsl::span<const int64_t> outer_index) {
+  ORT_RETURN_IF(IsDataTypeString(), "Use MakeCsrStrings");
   auto mutator = MakeCsrData(values_count, inner_index.size(), outer_index.size());
   if (values_count > 0) {
     auto& dst_values = mutator.Values();
@@ -291,16 +372,34 @@ Status SparseTensor::MakeCsrData(const IDataTransfer& data_transfer, const OrtMe
     Tensor src_values(dst_values.DataType(), dst_values.Shape(), const_cast<void*>(values_data), data_location);
     Tensor src_inner(dst_inner.DataType(), dst_inner.Shape(), const_cast<int64_t*>(inner_index.data()), data_location);
     Tensor src_outer(dst_outer.DataType(), dst_outer.Shape(), const_cast<int64_t*>(outer_index.data()), data_location);
-    ORT_RETURN_IF_ERROR(CopyData(data_transfer, MakeListConst(src_values, src_inner, src_outer),
+    ORT_RETURN_IF_ERROR(CopyData(&data_transfer, MakeListConst(src_values, src_inner, src_outer),
                                  MakeListNonConst(dst_values, dst_inner, dst_outer)));
   }
   return Status::OK();
 }
 
+Status SparseTensor::MakeCsrStrings(size_t string_count, const char* const* strings,
+                                    gsl::span<const int64_t> inner_index, gsl::span<const int64_t> outer_index) {
+  ORT_RETURN_IF_NOT(IsDataTypeString(), "Expecting data type to be set as string");
+  auto mutator = MakeCsrData(string_count, inner_index.size(), outer_index.size());
+  if (string_count > 0) {
+    auto& dst_values = mutator.Values();
+    auto& dst_inner = mutator.Inner();
+    auto& dst_outer = mutator.Outer();
+    Tensor src_inner(dst_inner.DataType(), dst_inner.Shape(), const_cast<int64_t*>(inner_index.data()), Location());
+    Tensor src_outer(dst_outer.DataType(), dst_outer.Shape(), const_cast<int64_t*>(outer_index.data()), Location());
+    ORT_RETURN_IF_ERROR(CopyStringsAndIndices(string_count, strings, dst_values,
+                                              MakeListConst(src_inner, src_outer),
+                                              MakeListNonConst(dst_inner, dst_outer)));
+  }
+  return Status::OK();
+}
+
 SparseTensor::CsrMutator SparseTensor::MakeCsrData(size_t values_count,
                                                    size_t inner_index_count,
                                                    size_t outer_index_count) {
   ORT_ENFORCE(allocator_ != nullptr, "This method should follow a call to constructor that supplies the allocator");
+  ORT_ENFORCE(Format() == SparseFormat::kUndefined, "Sparse format must not be set. Already contains format: ", Format());
   ORT_THROW_IF_ERROR(ValidateCsrIndices(values_count, inner_index_count, outer_index_count));
 
   if (values_count > 0) {
@@ -326,44 +425,70 @@ SparseTensor::BlockSparseView SparseTensor::AsBlockSparse() const {
 }
 
 Status SparseTensor::ValidateBlockSparseShapes(const TensorShape& values_shape, const TensorShape& indices_shape) const {
-  ORT_RETURN_IF_NOT(values_shape.NumDimensions() >= 3,
-                    "Expecting values dimensions to be at least 3. Got:", values_shape.NumDimensions());
-  ORT_RETURN_IF_NOT(indices_shape.NumDimensions() == 2,
-                    "Expecting index dimensions to be 2. Got: ", indices_shape.NumDimensions());
-  const auto values_blocks = values_shape.SizeFromDimension(2);
-  const auto index_blocks = indices_shape.Size() / 2;  // Two integers per block
-  ORT_RETURN_IF_NOT(values_blocks == index_blocks,
-                    "Expecting index blocks: ", index_blocks, " to be equal to values blocks: ", values_blocks);
+  if (values_shape.Size() > 0) {
+    ORT_RETURN_IF_NOT(values_shape.NumDimensions() >= 3,
+                      "Expecting to have at lest 3-D shape. Got:", values_shape.NumDimensions());
+    ORT_RETURN_IF_NOT(indices_shape.NumDimensions() == 2,
+                      "Expecting indices to have 2-D shape . Got: ", indices_shape.NumDimensions());
+    ORT_RETURN_IF_NOT(indices_shape.GetDims()[0] == 2, "Indices shape must have dim[0] == 2");
+    const auto values_blocks = values_shape.SizeFromDimension(2);
+    const auto index_blocks = indices_shape.Size() / 2;  // Two integers per block
+    ORT_RETURN_IF_NOT(values_blocks == index_blocks,
+                      "Expecting index blocks: ", index_blocks, " to be equal to values blocks: ", values_blocks);
+  } else {
+    ORT_RETURN_IF_NOT(values_shape.GetDims().size() == 1, "Expecting fully sparse tensors to have value shape {0}");
+    ORT_RETURN_IF_NOT(indices_shape.GetDims().size() == 1, "Expecting fully sparse tensors to have indices shape {0}");
+  }
   return Status::OK();
 }
 
-Status SparseTensor::UseBlockSparseIndices(const TensorShape& index_shape, int32_t* indices_data) {
-  ORT_RETURN_IF_NOT(allocator_ == nullptr, "Not expecting an allocator set");
-  ORT_RETURN_IF_ERROR(ValidateBlockSparseShapes(Values().Shape(), index_shape));
-
+void SparseTensor::InitBlockSparseIndices(const TensorShape& indices_shape, int32_t* indices_data) {
   format_data_.resize(1);
-  format_data_[0] = Tensor(DataTypeImpl::GetType<int32_t>(), index_shape,
+  format_data_[0] = Tensor(DataTypeImpl::GetType<int32_t>(), indices_shape,
                            indices_data, Location());
   format_ = SparseFormat::kBlockSparse;
+}
+
+Status SparseTensor::UseBlockSparseIndices(const TensorShape& indices_shape, int32_t* indices_data) {
+  ORT_RETURN_IF_NOT(allocator_ == nullptr, "Not expecting an allocator set");
+  ORT_RETURN_IF_NOT(Format() == SparseFormat::kUndefined, "Sparse format must not be set. Already contains format: ", Format());
+  ORT_RETURN_IF_ERROR(ValidateBlockSparseShapes(Values().Shape(), indices_shape));
+  InitBlockSparseIndices(indices_shape, indices_data);
   return Status::OK();
 }
 
 Status SparseTensor::MakeBlockSparseData(const IDataTransfer& data_transfer, const OrtMemoryInfo& data_location,
                                          const TensorShape& values_shape, const void* values_data,
                                          const TensorShape& indices_shape, const int32_t* indices_data) {
+  ORT_RETURN_IF(IsDataTypeString(), "Use MakeBlockSparseStrings");
   auto mutator = MakeBlockSparseData(values_shape, indices_shape);
   if (values_shape.Size() > 0) {
     auto& dst_values = mutator.Values();
     auto& dst_indices = mutator.Indices();
     Tensor src_values(dst_values.DataType(), dst_values.Shape(), const_cast<void*>(values_data), data_location);
     Tensor src_index(dst_indices.DataType(), dst_indices.Shape(), const_cast<int32_t*>(indices_data), data_location);
-    ORT_RETURN_IF_ERROR(CopyData(data_transfer, MakeListConst(src_values, src_index), MakeListNonConst(dst_values, dst_indices)));
+    ORT_RETURN_IF_ERROR(CopyData(&data_transfer, MakeListConst(src_values, src_index), MakeListNonConst(dst_values, dst_indices)));
+  }
+  return Status::OK();
+}
+
+Status SparseTensor::MakeBlockSparseStrings(const TensorShape& values_shape, const char* const* strings,
+                                            const TensorShape& indices_shape, const int32_t* indices_data) {
+  ORT_RETURN_IF_NOT(IsDataTypeString(), "Expecting data type to be set as string");
+  auto mutator = MakeBlockSparseData(values_shape, indices_shape);
+  auto string_count = gsl::narrow<size_t>(values_shape.Size());
+  if (string_count > 0) {
+    auto& dst_values = mutator.Values();
+    auto& dst_indices = mutator.Indices();
+    Tensor src_indices(dst_indices.DataType(), dst_indices.Shape(), const_cast<int32_t*>(indices_data), Location());
+    ORT_RETURN_IF_ERROR(CopyStringsAndIndices(string_count, strings, dst_values, {std::cref(src_indices)}, {std::ref(dst_indices)}));
   }
   return Status::OK();
 }
 
 SparseTensor::BlockSparseMutator SparseTensor::MakeBlockSparseData(const TensorShape& values_shape, const TensorShape& indices_shape) {
   ORT_ENFORCE(allocator_ != nullptr, "This method should follow a call to constructor that supplies the allocator");
+  ORT_ENFORCE(Format() == SparseFormat::kUndefined, "Sparse format must not be set. Already contains format: ", Format());
   ORT_THROW_IF_ERROR(ValidateBlockSparseShapes(values_shape, indices_shape));
   if (values_shape.Size() > 0) {
     const auto data_size = SafeInt<int64_t>(values_shape.Size()) * ml_data_type_->Size();
@@ -372,10 +497,9 @@ SparseTensor::BlockSparseMutator SparseTensor::MakeBlockSparseData(const TensorS
                                                                   gsl::narrow<int64_t>(index_size));
     ORT_THROW_IF_ERROR(AllocateBuffer(required_buffer_size, static_cast<size_t>(data_size / ml_data_type_->Size())));
   }
+
   values_ = Tensor(DataType(), values_shape, p_data_, Location());
-  format_data_.resize(1);
-  format_data_[0] = Tensor(DataTypeImpl::GetType<int32_t>(), indices_shape, IndicesStart(values_.SizeInBytes()), Location());
-  format_ = SparseFormat::kBlockSparse;
+  InitBlockSparseIndices(indices_shape, reinterpret_cast<int32_t*>(IndicesStart(values_.SizeInBytes())));
   return BlockSparseMutator(values_, format_data_[0]);
 }
 
diff --git a/onnxruntime/core/framework/tensor_type_and_shape.cc b/onnxruntime/core/framework/tensor_type_and_shape.cc
index fa512f629b..f818ef7a26 100644
--- a/onnxruntime/core/framework/tensor_type_and_shape.cc
+++ b/onnxruntime/core/framework/tensor_type_and_shape.cc
@@ -203,12 +203,13 @@ OrtStatus* OrtTensorTypeAndShapeInfo::Clone(OrtTensorTypeAndShapeInfo** out) {
 
 ORT_API_STATUS_IMPL(OrtApis::GetTensorTypeAndShape, _In_ const OrtValue* v, _Outptr_ OrtTensorTypeAndShapeInfo** out) {
   API_IMPL_BEGIN
-  onnxruntime::MLDataType type = v->Type();
-  ORT_ENFORCE(type != nullptr, "OrtValue is not a Tensor");
-  if (type->IsTensorType() || type->IsSparseTensorType()) {
+  if (!v->IsAllocated()) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "the ort_value must contain a constructed tensor or sparse tensor");
+  }
+  if (v->IsTensor() || v->IsSparseTensor()) {
     const onnxruntime::TensorShape* shape = nullptr;
     onnxruntime::MLDataType data_type = nullptr;
-    if (type->IsTensorType()) {
+    if (v->IsTensor()) {
       const Tensor& tensor = v->Get<onnxruntime::Tensor>();
       shape = &tensor.Shape();
       data_type = tensor.DataType();
@@ -224,6 +225,57 @@ ORT_API_STATUS_IMPL(OrtApis::GetTensorTypeAndShape, _In_ const OrtValue* v, _Out
   API_IMPL_END
 }
 
+ORT_API_STATUS_IMPL(OrtApis::GetSparseTensorValuesTypeAndShape, _In_ const OrtValue* v,
+                    _Outptr_ OrtTensorTypeAndShapeInfo** out) {
+  API_IMPL_BEGIN
+  const auto& sparse_tensor = SparseTensor::GetSparseTensorFromOrtValue(*v);
+  const auto& values = sparse_tensor.Values();
+  return GetTensorShapeAndType(values.Shape(), *values.DataType(), out);
+  API_IMPL_END
+}
+
+namespace {
+const Tensor& GetIndicesTensor(const OrtValue& v, OrtSparseIndicesFormat indices_format) {
+  const auto& sparse_tensor = SparseTensor::GetSparseTensorFromOrtValue(v);
+  const Tensor* indices_tensor = nullptr;
+  switch (indices_format) {
+    case OrtSparseIndicesFormat::ORT_SPARSE_COO_INDICES:
+      indices_tensor = &sparse_tensor.AsCoo().Indices();
+      break;
+    case OrtSparseIndicesFormat::ORT_SPARSE_CSR_INNER_INDICES:
+      indices_tensor = &sparse_tensor.AsCsr().Inner();
+      break;
+    case OrtSparseIndicesFormat::ORT_SPARSE_CSR_OUTER_INDICES:
+      indices_tensor = &sparse_tensor.AsCsr().Outer();
+      break;
+    case OrtSparseIndicesFormat::ORT_SPARSE_BLOCK_SPARSE_INDICES:
+      indices_tensor = &sparse_tensor.AsBlockSparse().Indices();
+      break;
+    default:
+      ORT_THROW(ORT_INVALID_ARGUMENT, "Unsupported indices_format passed");
+  }
+  return *indices_tensor;
+}
+}  // namespace
+
+ORT_API_STATUS_IMPL(OrtApis::GetSparseTensorIndicesTypeShape, _In_ const OrtValue* v,
+                    OrtSparseIndicesFormat indices_format, _Outptr_ OrtTensorTypeAndShapeInfo** out) {
+  API_IMPL_BEGIN
+  const Tensor& indices_tensor = GetIndicesTensor(*v, indices_format);
+  return GetTensorShapeAndType(indices_tensor.Shape(), *indices_tensor.DataType(), out);
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::GetSparseTensorIndices, _In_ const OrtValue* v,
+                    enum OrtSparseIndicesFormat indices_format, _Out_ size_t* num_indices, _Outptr_ const void** indices) {
+  API_IMPL_BEGIN
+  const Tensor& indices_tensor = GetIndicesTensor(*v, indices_format);
+  *num_indices = gsl::narrow<size_t>(indices_tensor.Shape().Size());
+  *indices = indices_tensor.DataRaw();
+  return nullptr;
+  API_IMPL_END
+}
+
 ORT_API_STATUS_IMPL(OrtApis::GetValueType, _In_ const OrtValue* v, _Out_ ONNXType* out) {
   API_IMPL_BEGIN
   OrtTypeInfo* type_info;
diff --git a/onnxruntime/core/framework/utils.cc b/onnxruntime/core/framework/utils.cc
index 2766573e1d..022182062e 100644
--- a/onnxruntime/core/framework/utils.cc
+++ b/onnxruntime/core/framework/utils.cc
@@ -140,13 +140,7 @@ static common::Status AllocateHelper(const AllocatorPtr& allocator,
                          allocator, target_mlvalue);
   } else if (source_mlvalue.IsSparseTensor()) {
     const SparseTensor& source_tensor = source_mlvalue.Get<SparseTensor>();
-    auto p_tensor = std::make_unique<SparseTensor>(source_tensor.DataType(),
-                                                   source_tensor.DenseShape(),
-                                                   allocator);
-    auto ml_tensor = DataTypeImpl::GetType<SparseTensor>();
-    target_mlvalue.Init(p_tensor.release(),
-                        ml_tensor,
-                        ml_tensor->GetDeleteFunc());
+    SparseTensor::InitOrtValue(source_tensor.DataType(), source_tensor.DenseShape(), allocator, target_mlvalue);
   } else if (source_mlvalue.IsTensorSequence()) {
     const TensorSeq& source_tensor_seq = source_mlvalue.Get<TensorSeq>();
     auto target_tensor_seq = std::make_unique<TensorSeq>(source_tensor_seq.DataType());
diff --git a/onnxruntime/core/optimizer/optimizer_execution_frame.cc b/onnxruntime/core/optimizer/optimizer_execution_frame.cc
index 3b5ee4f98b..8bc84056b6 100644
--- a/onnxruntime/core/optimizer/optimizer_execution_frame.cc
+++ b/onnxruntime/core/optimizer/optimizer_execution_frame.cc
@@ -146,9 +146,7 @@ Status OptimizerExecutionFrame::CreateNodeOutputMLValueImpl(OrtValue& ort_value,
                   "Tried to allocate without valid type information, ort_value index=" + std::to_string(ort_value_idx));
   if (ml_type->IsSparseTensorType()) {
     auto element_type = ml_type->AsSparseTensorType()->GetElementType();
-    auto container_type = DataTypeImpl::GetType<SparseTensor>();
-    auto sparse = std::make_unique<SparseTensor>(element_type, *shape, info_.GetAllocator());
-    ort_value.Init(sparse.release(), container_type, container_type->GetDeleteFunc());
+    SparseTensor::InitOrtValue(element_type, *shape, info_.GetAllocator(), ort_value);
     return Status::OK();
   }
 
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 6a027e403f..7fffb25682 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -37,6 +37,14 @@
 #include "core/framework/TensorSeq.h"
 #include "core/platform/ort_mutex.h"
 
+#ifdef USE_CUDA
+#include "core/providers/cuda/cuda_provider_factory.h"
+#include "core/providers/cuda/cuda_execution_provider_info.h"
+namespace onnxruntime {
+ProviderInfo_CUDA* TryGetProviderInfo_CUDA();
+}
+#endif
+
 #ifdef ENABLE_EXTENSION_CUSTOM_OPS
 #include "ortcustomops.h"
 #endif
@@ -220,6 +228,224 @@ ORT_API_STATUS_IMPL(OrtApis::CreateTensorAsOrtValue, _Inout_ OrtAllocator* alloc
   API_IMPL_END
 }
 
+ORT_API_STATUS_IMPL(OrtApis::CreateSparseTensorAsOrtValue, _Inout_ OrtAllocator* allocator, _In_ const int64_t* dense_shape,
+                    size_t dense_shape_len, ONNXTensorElementDataType type, _Outptr_ OrtValue** out) {
+  API_IMPL_BEGIN
+  auto sparse_tensor_type = DataTypeImpl::SparseTensorTypeFromONNXEnum(type);
+  auto element_type = sparse_tensor_type->GetElementType();
+  assert(element_type->AsPrimitiveDataType() != nullptr);
+  TensorShape shape(dense_shape, dense_shape_len);
+  if (std::any_of(shape.GetDims().cbegin(), shape.GetDims().cend(),
+                  [](int64_t v) { return v < 0; })) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "tried creating tensor with negative value in shape");
+  }
+
+  auto alloc_ptr = std::make_shared<onnxruntime::IAllocatorImplWrappingOrtAllocator>(allocator);
+  auto value = std::make_unique<OrtValue>();
+  SparseTensor::InitOrtValue(element_type, shape, std::move(alloc_ptr), *value);
+  *out = value.release();
+  return nullptr;
+  API_IMPL_END
+}
+
+namespace {
+std::unique_ptr<IDataTransfer> GetDataTransfer(const OrtDevice& src_device, const OrtDevice& dst_device) {
+  if (src_device.Type() == OrtDevice::CPU && dst_device.Type() == OrtDevice::CPU) {
+    return std::make_unique<CPUDataTransfer>();
+  }
+#ifdef USE_CUDA
+  if (src_device.Type() == OrtDevice::GPU || dst_device.Type() == OrtDevice::GPU) {
+    if (auto* provider_info = TryGetProviderInfo_CUDA()) {
+      return provider_info->CreateGPUDataTransfer(nullptr);
+    }
+  }
+#endif
+  ORT_THROW("Not able to find appropriate IDataTransfer to copy sparse data");
+}
+
+SparseTensor& ValidateFillInputArgs(OrtValue* v, const TensorShape& values_shape, const OrtMemoryInfo* data_mem_info) {
+  auto& sparse_tensor = SparseTensor::GetSparseTensorFromOrtValue(*v);
+  if (sparse_tensor.IsDataTypeString()) {
+    if ((data_mem_info->device.Type() != OrtDevice::CPU) || sparse_tensor.Location().device.Type() != OrtDevice::CPU) {
+      ORT_THROW("Strings can only reside in CPU memory");
+    }
+  }
+  if (std::any_of(values_shape.GetDims().cbegin(), values_shape.GetDims().cend(),
+                  [](int64_t v) { return v < 0; })) {
+    ORT_THROW("tried Filling sparse tensor with negative value in values shape");
+  }
+
+  return sparse_tensor;
+}
+
+union PtrConvert {
+  explicit PtrConvert(const void* p_p) : p(p_p) {}
+  const void* p;
+  const char** strings;
+};
+
+}  // namespace
+
+ORT_API_STATUS_IMPL(OrtApis::FillSparseTensorCoo, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                    _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                    _In_ const int64_t* indices_data, size_t indices_num) {
+  API_IMPL_BEGIN
+  TensorShape values_t_shape(values_shape, values_shape_len);
+  auto& sparse_tensor = ValidateFillInputArgs(ort_value, values_t_shape, data_mem_info);
+
+  auto values_size = gsl::narrow<size_t>(values_t_shape.Size());
+  auto indices_span = gsl::make_span(indices_data, indices_num);
+
+  if (sparse_tensor.IsDataTypeString()) {
+    PtrConvert conv(values);
+    ORT_THROW_IF_ERROR(sparse_tensor.MakeCooStrings(values_size, conv.strings, indices_span));
+  } else {
+    auto data_transfer = GetDataTransfer(data_mem_info->device, sparse_tensor.Location().device);
+    ORT_THROW_IF_ERROR(sparse_tensor.MakeCooData(*data_transfer, *data_mem_info, values_size,
+                                                 values, indices_span));
+  }
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::FillSparseTensorCsr, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                    _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                    _In_ const int64_t* inner_indices_data, size_t inner_indices_num,
+                    _In_ const int64_t* outer_indices_data, size_t outer_indices_num) {
+  API_IMPL_BEGIN
+  TensorShape values_t_shape(values_shape, values_shape_len);
+  auto& sparse_tensor = ValidateFillInputArgs(ort_value, values_t_shape, data_mem_info);
+  auto values_size = gsl::narrow<size_t>(values_t_shape.Size());
+
+  auto inner_indices_span = gsl::make_span(inner_indices_data, inner_indices_num);
+  auto outer_indices_span = gsl::make_span(outer_indices_data, outer_indices_num);
+  if (sparse_tensor.IsDataTypeString()) {
+    PtrConvert conv(values);
+    ORT_THROW_IF_ERROR(sparse_tensor.MakeCsrStrings(values_size, conv.strings, inner_indices_span, outer_indices_span));
+  } else {
+    auto data_transfer = GetDataTransfer(data_mem_info->device, sparse_tensor.Location().device);
+    ORT_THROW_IF_ERROR(sparse_tensor.MakeCsrData(*data_transfer, *data_mem_info, values_size,
+                                                 values, inner_indices_span, outer_indices_span));
+  }
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::FillSparseTensorBlockSparse, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                    _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                    _In_ const int64_t* indices_shape_data, size_t indices_shape_len,
+                    _In_ const int32_t* indices_data) {
+  API_IMPL_BEGIN
+  TensorShape values_t_shape(values_shape, values_shape_len);
+  auto& sparse_tensor = ValidateFillInputArgs(ort_value, values_t_shape, data_mem_info);
+
+  TensorShape indices_t_shape(indices_shape_data, indices_shape_len);
+  if (std::any_of(indices_t_shape.GetDims().cbegin(), indices_t_shape.GetDims().cend(),
+                  [](int64_t v) { return v < 0; })) {
+    ORT_THROW("tried Filling sparse tensor with negative value in block sparse indices shape");
+  }
+
+  if (sparse_tensor.IsDataTypeString()) {
+    PtrConvert conv(values);
+    ORT_THROW_IF_ERROR(sparse_tensor.MakeBlockSparseStrings(values_t_shape, conv.strings, indices_t_shape, indices_data));
+  } else {
+    auto data_transfer = GetDataTransfer(data_mem_info->device, sparse_tensor.Location().device);
+    ORT_THROW_IF_ERROR(sparse_tensor.MakeBlockSparseData(*data_transfer, *data_mem_info, values_t_shape,
+                                                         values, indices_t_shape, indices_data));
+  }
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::CreateSparseTensorWithValuesAsOrtValue, _In_ const OrtMemoryInfo* info, _Inout_ void* p_data,
+                    _In_ const int64_t* dense_shape, size_t dense_shape_len,
+                    _In_ const int64_t* values_shape, size_t values_shape_len,
+                    ONNXTensorElementDataType type, _Outptr_ OrtValue** out) {
+  API_IMPL_BEGIN
+  auto sparse_tensor_type = DataTypeImpl::SparseTensorTypeFromONNXEnum(type);
+  auto element_type = sparse_tensor_type->GetElementType();
+  assert(element_type->AsPrimitiveDataType() != nullptr);
+  if (utils::IsDataTypeString(element_type)) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT,
+                                 "Can not use strings in pre-allocated memory."
+                                 " Use CreateSparseTensorAsOrtValue() to allocate memory inside and copy");
+  }
+  TensorShape tensor_dense_shape(dense_shape, dense_shape_len);
+  TensorShape tensor_values_shape(values_shape, values_shape_len);
+  if (std::any_of(tensor_values_shape.GetDims().cbegin(), tensor_values_shape.GetDims().cend(),
+                  [](int64_t v) { return v < 0; })) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "tried creating tensor with negative value in shape");
+  }
+  auto value = std::make_unique<OrtValue>();
+  SparseTensor::InitOrtValue(element_type, tensor_dense_shape, tensor_values_shape, p_data, *info, *value);
+  *out = value.release();
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::UseCooIndices, _Inout_ OrtValue* ort_value, _Inout_ int64_t* indices_data, size_t indices_num) {
+  API_IMPL_BEGIN
+  auto v = reinterpret_cast<::OrtValue*>(ort_value);
+  auto& sparse_tensor = SparseTensor::GetSparseTensorFromOrtValue(*v);
+  auto indices_span = (indices_num == 0 || indices_data == nullptr)
+                          ? gsl::span<int64_t>()
+                          : gsl::make_span(indices_data, indices_num);
+
+  ORT_THROW_IF_ERROR(sparse_tensor.UseCooIndices(indices_span));
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::UseCsrIndices, _Inout_ OrtValue* ort_value,
+                    _Inout_ int64_t* inner_data, size_t inner_num,
+                    _Inout_ int64_t* outer_data, size_t outer_num) {
+  API_IMPL_BEGIN
+  auto& sparse_tensor = SparseTensor::GetSparseTensorFromOrtValue(*ort_value);
+  auto inner_span = (inner_num == 0 || inner_data == nullptr)
+                        ? gsl::span<int64_t>()
+                        : gsl::make_span(inner_data, inner_num);
+  auto outer_span = (outer_num == 0 || outer_data == nullptr)
+                        ? gsl::span<int64_t>()
+                        : gsl::make_span(outer_data, outer_num);
+  ORT_THROW_IF_ERROR(sparse_tensor.UseCsrIndices(inner_span, outer_span));
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::UseBlockSparseIndices, _Inout_ OrtValue* ort_value, const int64_t* indices_shape, size_t indices_shape_len,
+                    _Inout_ int32_t* indices_data) {
+  API_IMPL_BEGIN
+  auto& sparse_tensor = SparseTensor::GetSparseTensorFromOrtValue(*ort_value);
+  TensorShape ind_shape(indices_shape, indices_shape_len);
+  ORT_THROW_IF_ERROR(sparse_tensor.UseBlockSparseIndices(ind_shape, indices_data));
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::GetSparseTensorFormat, _In_ const OrtValue* ort_value, _Out_ enum OrtSparseFormat* out) {
+  API_IMPL_BEGIN
+  auto v = reinterpret_cast<const ::OrtValue*>(ort_value);
+  if (!v->IsAllocated()) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "the ort_value must contain a constructed tensor");
+  }
+  const auto& sparse_tensor = v->Get<SparseTensor>();
+  *out = static_cast<OrtSparseFormat>(sparse_tensor.Format());
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::GetSparseTensorValues, _In_ const OrtValue* ort_value, _Outptr_ const void** out) {
+  API_IMPL_BEGIN
+  const auto& sparse_tensor = SparseTensor::GetSparseTensorFromOrtValue(*ort_value);
+  if (sparse_tensor.IsDataTypeString()) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Use GetStringTensor*() API to retrieve strings");
+  }
+  const auto& values = sparse_tensor.Values();
+  *out = values.DataRaw();
+  return nullptr;
+  API_IMPL_END
+}
+
 ORT_API_STATUS_IMPL(OrtApis::CreateCustomOpDomain, _In_ const char* domain, _Outptr_ OrtCustomOpDomain** out) {
   API_IMPL_BEGIN
   auto custom_op_domain = std::make_unique<OrtCustomOpDomain>();
@@ -656,9 +882,18 @@ ORT_API_STATUS_IMPL(OrtApis::IsTensor, _In_ const OrtValue* value, _Out_ int* ou
   return nullptr;
 }
 
+ORT_API_STATUS_IMPL(OrtApis::IsSparseTensor, _In_ const OrtValue* value, _Out_ int* out) {
+  auto v = reinterpret_cast<const ::OrtValue*>(value);
+  *out = v->IsSparseTensor() ? 1 : 0;
+  return nullptr;
+}
+
 ORT_API_STATUS_IMPL(OrtApis::GetTensorMutableData, _Inout_ OrtValue* value, _Outptr_ void** output) {
   TENSOR_READWRITE_API_BEGIN
-  //TODO: test if it's a string tensor
+  // Uncomment when WinML fixed their code
+  //if (tensor->IsDataTypeString()) {
+  //  return OrtApis::CreateStatus(ORT_NOT_IMPLEMENTED, "this API does not support strings");
+  //}
   *output = tensor->MutableDataRaw();
   return nullptr;
   API_IMPL_END
@@ -693,79 +928,127 @@ ORT_API_STATUS_IMPL(OrtApis::FillStringTensorElement, _Inout_ OrtValue* value, _
   API_IMPL_END
 }
 
-ORT_API_STATUS_IMPL(OrtApis::GetStringTensorDataLength, _In_ const OrtValue* value, _Out_ size_t* out) {
-  TENSOR_READ_API_BEGIN
-  const auto* src = tensor.Data<std::string>();
-  int64_t len = tensor.Shape().Size();
-  if (len >= 0) {
-    size_t ret = 0;
-    for (int64_t i = 0; i != len; ++i) {
-      ret += src[i].size();
+namespace {
+
+OrtStatusPtr GetTensorStringSpan(const ::OrtValue& v, gsl::span<const std::string>& span) {
+  if (!v.IsAllocated()) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "OrtValue should contain a Tensor or a Sparse Tensor");
+  }
+  gsl::span<const std::string> str_span;
+  int64_t items = 0;
+  // Data type will be enforced on DataAsSpan() call.
+  if (v.IsTensor()) {
+    const auto& tensor = v.Get<onnxruntime::Tensor>();
+    items = tensor.Shape().Size();
+    if (items >= 0) {
+      str_span = tensor.DataAsSpan<std::string>();
     }
-    *out = ret;
-  } else
+  } else if (v.IsSparseTensor()) {
+    const auto& sparse_tensor = v.Get<SparseTensor>();
+    if (sparse_tensor.Format() == onnxruntime::SparseFormat::kUndefined) {
+      return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Sparse Tensor does not contain sparse data");
+    }
+    items = sparse_tensor.Values().Shape().Size();
+    if (items >= 0) {
+      str_span = sparse_tensor.Values().DataAsSpan<std::string>();
+    }
+  } else {
+    return OrtApis::CreateStatus(ORT_NOT_IMPLEMENTED, "This API supports Tensors or SparseTensors");
+  }
+
+  if (items < 0) {
     return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "shape is invalid");
+  }
+  span = str_span;
+  return nullptr;
+}
+}  // namespace
+
+ORT_API_STATUS_IMPL(OrtApis::GetStringTensorDataLength, _In_ const OrtValue* value, _Out_ size_t* out) {
+  API_IMPL_BEGIN
+  gsl::span<const std::string> str_span;
+  if (auto* status = GetTensorStringSpan(*value, str_span)) {
+    return status;
+  }
+
+  size_t ret = 0;
+  for (const auto& s : str_span) {
+    ret += s.size();
+  }
+
+  *out = ret;
   return nullptr;
   API_IMPL_END
 }
 
 ORT_API_STATUS_IMPL(OrtApis::GetStringTensorElementLength, _In_ const OrtValue* value, size_t index, _Out_ size_t* out) {
-  TENSOR_READ_API_BEGIN
-  const auto* src = tensor.Data<std::string>();
-  auto len = static_cast<size_t>(tensor.Shape().Size());
-  if (index < len) {
-    *out = src[index].size();
-  } else
-    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "shape is invalid");
+  API_IMPL_BEGIN
+  gsl::span<const std::string> str_span;
+  if (auto* status = GetTensorStringSpan(*value, str_span)) {
+    return status;
+  }
+
+  if (index < str_span.size()) {
+    *out = str_span[index].size();
+  } else {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "index is out of bounds");
+  }
+
   return nullptr;
   API_IMPL_END
 }
 
 ORT_API_STATUS_IMPL(OrtApis::GetStringTensorContent, _In_ const OrtValue* value, _Out_writes_bytes_all_(s_len) void* s,
                     size_t s_len, _Out_writes_all_(offsets_len) size_t* offsets, size_t offsets_len) {
-  TENSOR_READ_API_BEGIN
-  const auto* input = tensor.Data<std::string>();
-  auto len = static_cast<size_t>(tensor.Shape().Size());
-  if (offsets_len != len) {
+  API_IMPL_BEGIN
+
+  gsl::span<const std::string> str_span;
+  if (auto* status = GetTensorStringSpan(*value, str_span)) {
+    return status;
+  }
+
+  if (offsets_len != str_span.size()) {
     return OrtApis::CreateStatus(ORT_FAIL, "offsets buffer is not equal to tensor size");
   }
-  {
-    size_t ret = 0;
-    for (size_t i = 0; i != len; ++i) {
-      ret += input[i].size();
-    }
-    if (s_len < ret) {
-      return OrtApis::CreateStatus(ORT_FAIL, "output buffer is too small");
-    }
+
+  size_t total_size = 0;
+  for (const auto& str : str_span) {
+    total_size += str.size();
   }
+
+  if (s_len < total_size) {
+    return OrtApis::CreateStatus(ORT_FAIL, "output buffer is too small. Use GetStringTensorDataLength.");
+  }
+
   size_t f = 0;
   char* p = static_cast<char*>(s);
-  for (size_t i = 0; i != len; ++i, ++offsets) {
-    memcpy(p, input[i].data(), input[i].size());
-    p += input[i].size();
-    *offsets = f;
-    f += input[i].size();
+  for (const auto& str : str_span) {
+    memcpy(p, str.data(), str.size());
+    p += str.size();
+    *offsets++ = f;
+    f += str.size();
   }
   return nullptr;
   API_IMPL_END
 }
 
-ORT_API_STATUS_IMPL(OrtApis::GetStringTensorElement, _In_ const OrtValue* value, size_t s_len, size_t index, _Out_writes_bytes_all_(s_len) void* s) {
-  TENSOR_READ_API_BEGIN
-  const auto* input = tensor.Data<std::string>();
-  auto len = static_cast<size_t>(tensor.Shape().Size());
+ORT_API_STATUS_IMPL(OrtApis::GetStringTensorElement, _In_ const OrtValue* value,
+                    size_t s_len, size_t index, _Out_writes_bytes_all_(s_len) void* s) {
+  API_IMPL_BEGIN
+  gsl::span<const std::string> str_span;
+  if (auto* status = GetTensorStringSpan(*value, str_span)) {
+    return status;
+  }
 
-  if (index >= len) {
+  if (index < str_span.size()) {
+    const auto& str = str_span[index];
+    if (s_len < str.size()) {
+      return OrtApis::CreateStatus(ORT_FAIL, "buffer size is too small for string element");
+    }
+    memcpy(s, str.data(), str.size());
+  } else {
     return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "element index is out of bounds");
   }
-
-  size_t ret = input[index].size();
-  if (s_len < ret) {
-    return OrtApis::CreateStatus(ORT_FAIL, "buffer size is too small for string");
-  }
-
-  memcpy(s, input[index].data(), input[index].size());
-
   return nullptr;
   API_IMPL_END
 }
@@ -2097,6 +2380,20 @@ static constexpr OrtApi ort_api_1_to_9 = {
     &OrtApis::EnableOrtCustomOps,
     &OrtApis::RegisterAllocator,
     &OrtApis::UnregisterAllocator,
+    &OrtApis::IsSparseTensor,
+    &OrtApis::CreateSparseTensorAsOrtValue,
+    &OrtApis::FillSparseTensorCoo,
+    &OrtApis::FillSparseTensorCsr,
+    &OrtApis::FillSparseTensorBlockSparse,
+    &OrtApis::CreateSparseTensorWithValuesAsOrtValue,
+    &OrtApis::UseCooIndices,
+    &OrtApis::UseCsrIndices,
+    &OrtApis::UseBlockSparseIndices,
+    &OrtApis::GetSparseTensorFormat,
+    &OrtApis::GetSparseTensorValuesTypeAndShape,
+    &OrtApis::GetSparseTensorValues,
+    &OrtApis::GetSparseTensorIndicesTypeShape,
+    &OrtApis::GetSparseTensorIndices,
 };
 
 // Asserts to do a some checks to ensure older Versions of the OrtApi never change (will detect an addition or deletion but not if they cancel out each other)
diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h
index 080431028f..6a6b3fa817 100644
--- a/onnxruntime/core/session/ort_apis.h
+++ b/onnxruntime/core/session/ort_apis.h
@@ -288,4 +288,31 @@ ORT_API(void, ReleaseTensorRTProviderOptions, _Frees_ptr_opt_ OrtTensorRTProvide
 ORT_API_STATUS_IMPL(EnableOrtCustomOps, _Inout_ OrtSessionOptions* options);
 ORT_API_STATUS_IMPL(RegisterAllocator, _Inout_ OrtEnv* env, _In_ OrtAllocator* allocator);
 ORT_API_STATUS_IMPL(UnregisterAllocator, _Inout_ OrtEnv* env, _In_ const OrtMemoryInfo* mem_info);
+// SparseTensor related API
+ORT_API_STATUS_IMPL(IsSparseTensor, _In_ const OrtValue* value, _Out_ int* out);
+ORT_API_STATUS_IMPL(CreateSparseTensorAsOrtValue, _Inout_ OrtAllocator* allocator, _In_ const int64_t* dense_shape,
+                    size_t dense_shape_len, ONNXTensorElementDataType type, _Outptr_ OrtValue** out);
+ORT_API_STATUS_IMPL(FillSparseTensorCoo, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* mem_info,
+                    _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                    _In_ const int64_t* indices_data, size_t indices_num);
+ORT_API_STATUS_IMPL(FillSparseTensorCsr, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                    _In_ const int64_t* values_shape, size_t values_shape_len, const void* values,
+                    _In_ const int64_t* inner_indices_data, size_t inner_indices_num,
+                    _In_ const int64_t* outer_indices_data, size_t outer_indices_num);
+ORT_API_STATUS_IMPL(FillSparseTensorBlockSparse, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                    _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                    _In_ const int64_t* indices_shape_data, size_t indices_shape_len,
+                    _In_ const int32_t* indices_data);
+ORT_API_STATUS_IMPL(CreateSparseTensorWithValuesAsOrtValue, _In_ const OrtMemoryInfo* info, _Inout_ void* p_data,
+                    _In_ const int64_t* dense_shape, size_t dense_shape_len,
+                    _In_ const int64_t* values_shape, size_t values_shape_len,
+                    ONNXTensorElementDataType type, _Outptr_ OrtValue** out);
+ORT_API_STATUS_IMPL(UseCooIndices, _Inout_ OrtValue* ort_value, _Inout_ int64_t* indices_data, size_t indices_num);
+ORT_API_STATUS_IMPL(UseCsrIndices, _Inout_ OrtValue*, _Inout_ int64_t* inner_data, size_t inner_num, _Inout_ int64_t* outer_data, size_t outer_num);
+ORT_API_STATUS_IMPL(UseBlockSparseIndices, _Inout_ OrtValue* ort_value, const int64_t* indices_shape, size_t indices_shape_len, _Inout_ int32_t* indices_data);
+ORT_API_STATUS_IMPL(GetSparseTensorFormat, _In_ const OrtValue* ort_value, _Out_ enum OrtSparseFormat* out);
+ORT_API_STATUS_IMPL(GetSparseTensorValuesTypeAndShape, _In_ const OrtValue* ort_value, _Outptr_ OrtTensorTypeAndShapeInfo** out);
+ORT_API_STATUS_IMPL(GetSparseTensorValues, _In_ const OrtValue* ort_value, _Outptr_ const void** out);
+ORT_API_STATUS_IMPL(GetSparseTensorIndicesTypeShape, _In_ const OrtValue* ort_value, enum OrtSparseIndicesFormat indices_format, _Outptr_ OrtTensorTypeAndShapeInfo** out);
+ORT_API_STATUS_IMPL(GetSparseTensorIndices, _In_ const OrtValue* ort_value, enum OrtSparseIndicesFormat indices_format, _Out_ size_t* num_indices, _Outptr_ const void** indices);
 }  // namespace OrtApis
diff --git a/onnxruntime/test/framework/sparse_kernels_test.cc b/onnxruntime/test/framework/sparse_kernels_test.cc
index c2d0f86090..01a5adc387 100644
--- a/onnxruntime/test/framework/sparse_kernels_test.cc
+++ b/onnxruntime/test/framework/sparse_kernels_test.cc
@@ -36,7 +36,6 @@ inline int64_t vector_len(const std::vector<T>& v) {
   return static_cast<int64_t>(v.size());
 }
 
-
 // This file contains sample implementations of several ops with sparse-tensor inputs/outputs.
 // Each op is implemented as a struct with the following signature:
 // struct SparseOp {
@@ -1209,9 +1208,6 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
       RawSparseDataChecker<uint8_t>);
 }
 
-template <class T>
-using SparseMatrixRowMajor = Eigen::SparseMatrix<T, Eigen::RowMajor, int64_t>;
-
 TEST(SparseTensorConversionTests, CsrConversion) {
   auto* cpu_provider = TestCPUExecutionProvider();
   auto cpu_allocator = cpu_provider->GetAllocator(0, OrtMemTypeDefault);
@@ -1234,6 +1230,7 @@ TEST(SparseTensorConversionTests, CsrConversion) {
 
   const std::vector<int32_t> expected_values = {1, 1, 1};
   const std::vector<std::string> expected_values_str = {"1", "1", "1"};
+  const char* const strings[] = {"1", "1", "1"};
   const std::vector<int64_t> expected_inner = {2, 0, 2};
   const std::vector<int64_t> expected_outer = {0, 1, 3, 3};
 
@@ -1242,6 +1239,49 @@ TEST(SparseTensorConversionTests, CsrConversion) {
     auto cpu_transfer = cpu_provider->GetDataTransfer();
     dtm.RegisterDataTransfer(std::move(cpu_transfer));
   }
+  {
+    {
+      // Test CSR initialization of 100% sparse tensor, passing 0 in the shape
+      SparseTensor fully_sparse(DataTypeImpl::GetType<int32_t>(), TensorShape{3, 3}, cpu_allocator);
+      ASSERT_STATUS_OK(fully_sparse.MakeCsrData(*cpu_provider->GetDataTransfer(), cpu_allocator->Info(),
+                                                0U, nullptr, gsl::span<int64_t>(), gsl::span<int64_t>()));
+      ASSERT_EQ(fully_sparse.Format(), SparseFormat::kCsrc);
+      ASSERT_EQ(0, fully_sparse.RequiredAllocationSize());
+      ASSERT_EQ(0U, fully_sparse.NumValues());
+      ASSERT_EQ(1U, fully_sparse.Values().Shape().GetDims().size());
+      ASSERT_EQ(0, fully_sparse.Values().Shape().Size());
+      ASSERT_TRUE(fully_sparse.Values().DataAsSpan<int32_t>().empty());
+      auto csr_view = fully_sparse.AsCsr();
+      const auto& inner = csr_view.Inner();
+      ASSERT_EQ(0, inner.Shape().Size());
+      ASSERT_EQ(1U, inner.Shape().GetDims().size());
+      ASSERT_TRUE(inner.DataAsSpan<int64_t>().empty());
+      const auto& outer = csr_view.Outer();
+      ASSERT_EQ(0, outer.Shape().Size());
+      ASSERT_EQ(1U, outer.Shape().GetDims().size());
+      ASSERT_TRUE(outer.DataAsSpan<int64_t>().empty());
+    }
+    {
+      // Test CSR initialization of 100% sparse tensor, passing 0 in the shape
+      SparseTensor fully_sparse(DataTypeImpl::GetType<int32_t>(), TensorShape{3, 3}, TensorShape{0}, nullptr, cpu_allocator->Info());
+      ASSERT_STATUS_OK(fully_sparse.UseCsrIndices(gsl::span<int64_t>(), gsl::span<int64_t>()));
+      ASSERT_EQ(fully_sparse.Format(), SparseFormat::kCsrc);
+      ASSERT_EQ(0, fully_sparse.RequiredAllocationSize());
+      ASSERT_EQ(0U, fully_sparse.NumValues());
+      ASSERT_EQ(1U, fully_sparse.Values().Shape().GetDims().size());
+      ASSERT_EQ(0, fully_sparse.Values().Shape().Size());
+      ASSERT_TRUE(fully_sparse.Values().DataAsSpan<int32_t>().empty());
+      auto csr_view = fully_sparse.AsCsr();
+      const auto& inner = csr_view.Inner();
+      ASSERT_EQ(0, inner.Shape().Size());
+      ASSERT_EQ(1U, inner.Shape().GetDims().size());
+      ASSERT_TRUE(inner.DataAsSpan<int64_t>().empty());
+      const auto& outer = csr_view.Outer();
+      ASSERT_EQ(0, outer.Shape().Size());
+      ASSERT_EQ(1U, outer.Shape().GetDims().size());
+      ASSERT_TRUE(outer.DataAsSpan<int64_t>().empty());
+    }
+  }
 
   Tensor dense_cpu_src(DataTypeImpl::GetType<int32_t>(), dense_shape, dense_data.data(), cpu_allocator->Info());
   {
@@ -1309,6 +1349,28 @@ TEST(SparseTensorConversionTests, CsrConversion) {
     ASSERT_TRUE(std::equal(dense_values_dst.cbegin(), dense_values_dst.cend(), dense_data_str.cbegin(), dense_data_str.cend()));
   }
 
+  {
+    // Use MakeCsrStrings()
+    SparseTensor str_cpu_src(DataTypeImpl::GetType<std::string>(), dense_shape, cpu_allocator);
+    ASSERT_STATUS_OK(str_cpu_src.MakeCsrStrings(expected_values_str.size(), strings,
+                                                gsl::make_span(expected_inner), gsl::make_span(expected_outer)));
+    ASSERT_EQ(str_cpu_src.Format(), SparseFormat::kCsrc);
+    ASSERT_TRUE(str_cpu_src.IsDataTypeString());
+    ASSERT_EQ(str_cpu_src.DenseShape().GetDims(), dense_shape);
+    ASSERT_EQ(str_cpu_src.NumValues(), expected_values_str.size());
+    auto values = str_cpu_src.Values().DataAsSpan<std::string>();
+    ASSERT_TRUE(std::equal(expected_values_str.cbegin(), expected_values_str.cend(), values.cbegin(), values.cend()));
+
+    auto csr_view = str_cpu_src.AsCsr();
+    auto inner = csr_view.Inner().DataAsSpan<int64_t>();
+    ASSERT_EQ(expected_inner.size(), inner.size());
+    ASSERT_TRUE(std::equal(expected_inner.cbegin(), expected_inner.cend(), inner.cbegin(), inner.cend()));
+
+    auto outer = csr_view.Outer().DataAsSpan<int64_t>();
+    ASSERT_EQ(expected_outer.size(), outer.size());
+    ASSERT_TRUE(std::equal(expected_outer.cbegin(), expected_outer.cend(), outer.cbegin(), outer.cend()));
+  }
+
 #ifdef USE_CUDA
   auto cuda_provider = DefaultCudaExecutionProvider();
   auto cuda_allocator = cuda_provider->GetAllocator(0, OrtMemTypeDefault);
@@ -1387,6 +1449,7 @@ TEST(SparseTensorConversionTests, CooConversion) {
 
   const std::vector<int32_t> expected_values = {1, 1, 1};
   const std::vector<std::string> expected_values_str = {"1", "1", "1"};
+  const char* const strings[] = {"1", "1", "1"};
   const std::vector<int64_t> expected_linear_indices = {2, 3, 5};
   const std::vector<int64_t> expected_2d_indices = {0, 2, 1, 0, 1, 2};
 
@@ -1395,6 +1458,43 @@ TEST(SparseTensorConversionTests, CooConversion) {
     auto cpu_transfer = cpu_provider->GetDataTransfer();
     dtm.RegisterDataTransfer(std::move(cpu_transfer));
   }
+
+  {
+    // Test COO initialization of 100% sparse tensor, passing 0 in the shape
+    SparseTensor fully_sparse(DataTypeImpl::GetType<int32_t>(), TensorShape{3, 3}, cpu_allocator);
+    ASSERT_STATUS_OK(fully_sparse.MakeCooData(*cpu_provider->GetDataTransfer(), cpu_allocator->Info(), 0, nullptr, gsl::span<int64_t>()));
+    ASSERT_EQ(fully_sparse.Format(), SparseFormat::kCoo);
+    ASSERT_EQ(0, fully_sparse.RequiredAllocationSize());
+    ASSERT_EQ(0U, fully_sparse.NumValues());
+    ASSERT_EQ(1U, fully_sparse.Values().Shape().GetDims().size());
+    ASSERT_EQ(0, fully_sparse.Values().Shape().Size());
+    ASSERT_TRUE(fully_sparse.Values().DataAsSpan<int32_t>().empty());
+    auto coo_view = fully_sparse.AsCoo();
+    const auto& indices = coo_view.Indices();
+    ASSERT_EQ(0, indices.Shape().Size());
+    // For fully sparse we assume a 2-D indices.
+    ASSERT_EQ(2U, indices.Shape().GetDims().size());
+    ASSERT_TRUE(indices.DataAsSpan<int64_t>().empty());
+  }
+
+  {
+    // Test COO initialization of 100% sparse tensor, passing 0 in the shape
+    SparseTensor fully_sparse(DataTypeImpl::GetType<int32_t>(), TensorShape{3, 3}, TensorShape{0}, nullptr, cpu_allocator->Info());
+    ASSERT_STATUS_OK(fully_sparse.UseCooIndices(gsl::span<int64_t>()));
+    ASSERT_EQ(fully_sparse.Format(), SparseFormat::kCoo);
+    ASSERT_EQ(0, fully_sparse.RequiredAllocationSize());
+    ASSERT_EQ(0U, fully_sparse.NumValues());
+    ASSERT_EQ(1U, fully_sparse.Values().Shape().GetDims().size());
+    ASSERT_EQ(0, fully_sparse.Values().Shape().Size());
+    ASSERT_TRUE(fully_sparse.Values().DataAsSpan<int32_t>().empty());
+    auto coo_view = fully_sparse.AsCoo();
+    const auto& indices = coo_view.Indices();
+    ASSERT_EQ(0, indices.Shape().Size());
+    // For fully sparse we assume a 2-D indices.
+    ASSERT_EQ(2U, indices.Shape().GetDims().size());
+    ASSERT_TRUE(indices.DataAsSpan<int64_t>().empty());
+  }
+
   Tensor dense_cpu_src(DataTypeImpl::GetType<int32_t>(), dense_shape, dense_data.data(), cpu_allocator->Info());
   {
     // test where both src and destination are on CPU. Linear index.
@@ -1452,6 +1552,25 @@ TEST(SparseTensorConversionTests, CooConversion) {
     ASSERT_TRUE(std::equal(dense_values_dst.cbegin(), dense_values_dst.cend(), dense_data_str.cbegin(), dense_data_str.cend()));
   }
 
+  {
+    // Use MakeCooStrings()
+    SparseTensor str_cpu_src(DataTypeImpl::GetType<std::string>(), dense_shape, cpu_allocator);
+    ASSERT_STATUS_OK(str_cpu_src.MakeCooStrings(expected_values_str.size(), strings,
+                                                gsl::make_span(expected_linear_indices)));
+    ASSERT_EQ(str_cpu_src.Format(), SparseFormat::kCoo);
+    ASSERT_TRUE(str_cpu_src.IsDataTypeString());
+    ASSERT_EQ(str_cpu_src.DenseShape().GetDims(), dense_shape);
+    ASSERT_EQ(str_cpu_src.NumValues(), expected_values_str.size());
+    auto values = str_cpu_src.Values().DataAsSpan<std::string>();
+    ASSERT_TRUE(std::equal(expected_values_str.cbegin(), expected_values_str.cend(), values.cbegin(), values.cend()));
+
+    auto coo_view = str_cpu_src.AsCoo();
+    auto indices = coo_view.Indices().DataAsSpan<int64_t>();
+    ASSERT_EQ(expected_linear_indices.size(), indices.size());
+    ASSERT_TRUE(std::equal(expected_linear_indices.cbegin(), expected_linear_indices.cend(), indices.cbegin(), indices.cend()));
+  }
+
+
   {
     // test where both src and destination are on CPU. 2-D index
     SparseTensor dst;
@@ -1539,5 +1658,133 @@ TEST(SparseTensorConversionTests, CooConversion) {
 #endif
 }
 #endif  // !ORT_MINIMAL_BUILD
+
+TEST(SparseTensorConversionTests, BlockSparse) {
+  auto* cpu_provider = TestCPUExecutionProvider();
+  auto cpu_allocator = cpu_provider->GetAllocator(0, OrtMemTypeDefault);
+
+  DataTransferManager dtm;
+  {
+    auto cpu_transfer = cpu_provider->GetDataTransfer();
+    dtm.RegisterDataTransfer(std::move(cpu_transfer));
+  }
+
+  {
+    // Fully sparse
+    SparseTensor fully_sparse(DataTypeImpl::GetType<int32_t>(), TensorShape{3, 3}, cpu_allocator);
+    ASSERT_STATUS_OK(fully_sparse.MakeBlockSparseData(*cpu_provider->GetDataTransfer(), cpu_allocator->Info(),
+                                                      TensorShape{0}, nullptr, TensorShape{0}, nullptr));
+    ASSERT_EQ(fully_sparse.Format(), SparseFormat::kBlockSparse);
+    ASSERT_EQ(0, fully_sparse.RequiredAllocationSize());
+    ASSERT_EQ(0U, fully_sparse.NumValues());
+    ASSERT_EQ(1U, fully_sparse.Values().Shape().GetDims().size());
+    ASSERT_EQ(0, fully_sparse.Values().Shape().Size());
+    ASSERT_TRUE(fully_sparse.Values().DataAsSpan<int32_t>().empty());
+    auto blocksparse_view = fully_sparse.AsBlockSparse();
+    const auto& indices = blocksparse_view.Indices();
+    ASSERT_EQ(0, indices.Shape().Size());
+    ASSERT_EQ(1U, indices.Shape().GetDims().size());
+    ASSERT_TRUE(indices.DataAsSpan<int32_t>().empty());
+  }
+
+  {
+    // Fully sparse
+    SparseTensor fully_sparse(DataTypeImpl::GetType<int32_t>(), TensorShape{3, 3},
+                              TensorShape{0}, nullptr, cpu_allocator->Info());
+    ASSERT_STATUS_OK(fully_sparse.UseBlockSparseIndices(TensorShape{0}, nullptr));
+    ASSERT_EQ(fully_sparse.Format(), SparseFormat::kBlockSparse);
+    ASSERT_EQ(0, fully_sparse.RequiredAllocationSize());
+    ASSERT_EQ(0U, fully_sparse.NumValues());
+    ASSERT_EQ(1U, fully_sparse.Values().Shape().GetDims().size());
+    ASSERT_EQ(0, fully_sparse.Values().Shape().Size());
+    ASSERT_TRUE(fully_sparse.Values().DataAsSpan<int32_t>().empty());
+    auto blocksparse_view = fully_sparse.AsBlockSparse();
+    const auto& indices = blocksparse_view.Indices();
+    ASSERT_EQ(0, indices.Shape().Size());
+    ASSERT_EQ(1U, indices.Shape().GetDims().size());
+    ASSERT_TRUE(indices.DataAsSpan<int32_t>().empty());
+  }
+
+  const TensorShape dense_shape{8, 8};
+  constexpr int64_t block_size = 2;
+  const TensorShape values_shape{2, block_size, block_size};
+  // Two dense blocks
+  std::vector<int32_t> data_blocks{
+      1, 2, 3, 4, 5, 6, 7, 8};
+
+  const char* const strings[] = {
+      "1", "2", "3", "4", "5", "6", "7", "8"};
+
+  const std::string expected_strings[] = {
+      "1", "2", "3", "4", "5", "6", "7", "8"};
+
+
+  const TensorShape indices_shape{2, 2};  // two blocks by two coordinates
+  // (0, 0), (0,1)
+  std::vector<int32_t> blocksparse_indices = {
+      0, 0, 0, 1};
+
+  {
+    // Test instantiation only
+    SparseTensor own_buffer_tensor(DataTypeImpl::GetType<int32_t>(), dense_shape, cpu_allocator);
+    ASSERT_STATUS_OK(own_buffer_tensor.MakeBlockSparseData(*cpu_provider->GetDataTransfer(), cpu_allocator->Info(),
+                                                           values_shape, data_blocks.data(),
+                                                           indices_shape, blocksparse_indices.data()));
+    ASSERT_EQ(own_buffer_tensor.Format(), SparseFormat::kBlockSparse);
+    ASSERT_EQ(dense_shape, own_buffer_tensor.DenseShape());
+    ASSERT_EQ(data_blocks.size(), own_buffer_tensor.NumValues());
+    ASSERT_EQ(values_shape, own_buffer_tensor.Values().Shape());
+    auto data_span = own_buffer_tensor.Values().DataAsSpan<int32_t>();
+    ASSERT_EQ(data_blocks.size(), data_span.size());
+    ASSERT_TRUE(std::equal(data_blocks.cbegin(), data_blocks.cend(), data_span.cbegin(), data_span.cend()));
+
+    const auto& indices = own_buffer_tensor.AsBlockSparse().Indices();
+    ASSERT_EQ(indices_shape, indices.Shape());
+    auto indices_span = indices.DataAsSpan<int32_t>();
+    ASSERT_TRUE(std::equal(blocksparse_indices.cbegin(), blocksparse_indices.cend(),
+                           indices_span.cbegin(), indices_span.cend()));
+  }
+
+  {
+    // Test instantiation only
+    SparseTensor user_buffer_tensor(DataTypeImpl::GetType<int32_t>(), dense_shape, values_shape, data_blocks.data(), cpu_allocator->Info());
+    ASSERT_STATUS_OK(user_buffer_tensor.UseBlockSparseIndices(indices_shape, blocksparse_indices.data()));
+    ASSERT_EQ(user_buffer_tensor.Format(), SparseFormat::kBlockSparse);
+    ASSERT_EQ(dense_shape, user_buffer_tensor.DenseShape());
+    ASSERT_EQ(data_blocks.size(), user_buffer_tensor.NumValues());
+    ASSERT_EQ(values_shape, user_buffer_tensor.Values().Shape());
+    auto data_span = user_buffer_tensor.Values().DataAsSpan<int32_t>();
+    ASSERT_EQ(data_blocks.size(), data_span.size());
+    ASSERT_TRUE(std::equal(data_blocks.cbegin(), data_blocks.cend(), data_span.cbegin(), data_span.cend()));
+
+    const auto& indices = user_buffer_tensor.AsBlockSparse().Indices();
+    ASSERT_EQ(indices_shape, indices.Shape());
+    auto indices_span = indices.DataAsSpan<int32_t>();
+    ASSERT_TRUE(std::equal(blocksparse_indices.cbegin(), blocksparse_indices.cend(),
+                           indices_span.cbegin(), indices_span.cend()));
+  }
+
+  {
+    // Use MakeBlockSparseStrings()
+    SparseTensor own_buffer_tensor(DataTypeImpl::GetType<std::string>(), dense_shape, cpu_allocator);
+    ASSERT_STATUS_OK(own_buffer_tensor.MakeBlockSparseStrings(values_shape, strings, indices_shape, blocksparse_indices.data()));
+    ASSERT_TRUE(own_buffer_tensor.IsDataTypeString());
+    ASSERT_EQ(own_buffer_tensor.Format(), SparseFormat::kBlockSparse);
+    ASSERT_EQ(dense_shape, own_buffer_tensor.DenseShape());
+    ASSERT_EQ(data_blocks.size(), own_buffer_tensor.NumValues());
+    ASSERT_EQ(values_shape, own_buffer_tensor.Values().Shape());
+    auto data_span = own_buffer_tensor.Values().DataAsSpan<std::string>();
+    auto expected_span = gsl::make_span(expected_strings);
+    ASSERT_EQ(expected_span.size(), data_span.size());
+    ASSERT_TRUE(std::equal(expected_span.cbegin(), expected_span.cend(), data_span.cbegin(), data_span.cend()));
+
+    const auto& indices = own_buffer_tensor.AsBlockSparse().Indices();
+    ASSERT_EQ(indices_shape, indices.Shape());
+    auto indices_span = indices.DataAsSpan<int32_t>();
+    ASSERT_TRUE(std::equal(blocksparse_indices.cbegin(), blocksparse_indices.cend(),
+                           indices_span.cbegin(), indices_span.cend()));
+
+  }
+}
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index 4c43a575b2..4c9316e6dd 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -23,6 +23,7 @@
 #include "test_fixture.h"
 #include "utils.h"
 #include "custom_op_utils.h"
+#include <gsl/gsl>
 
 #ifdef _WIN32
 #include <Windows.h>
@@ -175,6 +176,10 @@ static constexpr PATH_TYPE VARIED_INPUT_CUSTOM_OP_MODEL_URI_2 = TSTR("testdata/f
 static constexpr PATH_TYPE OPTIONAL_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI = TSTR("testdata/foo_bar_1.onnx");
 static constexpr PATH_TYPE OPTIONAL_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI_2 = TSTR("testdata/foo_bar_2.onnx");
 static constexpr PATH_TYPE CUSTOM_OP_MODEL_WITH_ATTRIBUTES_URI = TSTR("testdata/foo_bar_3.onnx");
+static constexpr PATH_TYPE SPARSE_OUTPUT_MODEL_URI = TSTR("testdata/sparse_initializer_as_output.onnx");
+#ifndef DISABLE_CONTRIB_OPS
+static constexpr PATH_TYPE SPARSE_INPUT_MATMUL_MODEL_URI = TSTR("testdata/sparse_to_dense_matmul.onnx");
+#endif
 
 #ifdef ENABLE_EXTENSION_CUSTOM_OPS
 static constexpr PATH_TYPE ORT_CUSTOM_OPS_MODEL_URI = TSTR("testdata/custom_op_string_lower.onnx");
@@ -239,6 +244,121 @@ INSTANTIATE_TEST_SUITE_P(CApiTestWithProviders,
                          CApiTestWithProvider,
                          ::testing::Values(0, 1, 2, 3, 4));
 
+TEST(CApiTest, SparseOutputModel) {
+  std::vector<int64_t> dense_shape{3, 3};
+  std::vector<float> values{1.764052391052246, 0.40015721321105957, 0.978738009929657};
+  std::vector<int64_t> values_shape{3};
+  std::vector<int64_t> coo_indices{2, 3, 5};
+  std::vector<int64_t> indices_shape{3};
+
+  std::vector<Ort::Value> ort_inputs;
+  std::vector<const char*> input_names;
+  const char* const output_names[] = {"values"};
+  Ort::Session session(*ort_env, SPARSE_OUTPUT_MODEL_URI, Ort::SessionOptions{});
+  auto ort_outputs = session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(),
+                                   output_names, 1);
+  ASSERT_EQ(ort_outputs.size(), 1U);
+  const auto& sparse_output = ort_outputs[0];
+  auto ti = sparse_output.GetTypeInfo();
+  ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+  auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+  ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+  ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, tensor_type_shape.GetElementType());
+
+  ASSERT_EQ(ORT_SPARSE_COO, sparse_output.GetSparseFormat());
+  auto values_ts = sparse_output.GetSparseTensorValuesTypeAndShapeInfo();
+  ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, values_ts.GetElementType());
+  ASSERT_EQ(values_shape, values_ts.GetShape());
+
+  const auto* values_fetch = sparse_output.GetSparseTensorValues<float>();
+  auto val_span = gsl::make_span(values_fetch, values.size());
+  ASSERT_TRUE(std::equal(values.cbegin(), values.cend(), val_span.cbegin(), val_span.cend()));
+
+  auto indices_ts = sparse_output.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_COO_INDICES);
+  ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+  ASSERT_EQ(indices_shape, indices_ts.GetShape());
+
+  size_t num_indices = 0;
+  const int64_t* indices = sparse_output.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_COO_INDICES, num_indices);
+  ASSERT_EQ(num_indices, static_cast<size_t>(indices_shape[0]));
+  auto ind_span = gsl::make_span(indices, num_indices);
+  ASSERT_TRUE(std::equal(coo_indices.cbegin(), coo_indices.cend(), ind_span.cbegin(), ind_span.cend()));
+}
+
+#ifndef DISABLE_CONTRIB_OPS
+TEST(CApiTest, SparseInputModel) {
+
+  std::vector<int64_t> common_shape{9, 9};  // inputs and outputs same shape
+  std::vector<float> A_values{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0,
+                              10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+                              18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
+                              26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0,
+                              34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0,
+                              42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0,
+                              50.0, 51.0, 52.0, 53.0};
+
+  // 2 - D index
+  std::vector<int64_t> indices_shape{gsl::narrow<int64_t>(A_values.size()), 2};
+  std::vector<int64_t> A_indices{0, 1, 0, 2, 0, 6, 0, 7, 0, 8, 1, 0, 1,
+                                 1, 1, 2, 1, 6, 1, 7, 1, 8, 2, 0, 2, 1,
+                                 2, 2, 2, 6, 2, 7, 2, 8, 3, 3, 3, 4, 3,
+                                 5, 3, 6, 3, 7, 3, 8, 4, 3, 4, 4, 4, 5,
+                                 4, 6, 4, 7, 4, 8, 5, 3, 5, 4, 5, 5, 5,
+                                 6, 5, 7, 5, 8, 6, 0, 6, 1, 6, 2, 6, 3,
+                                 6, 4, 6, 5, 7, 0, 7, 1, 7, 2, 7, 3, 7,
+                                 4, 7, 5, 8, 0, 8, 1, 8, 2, 8, 3, 8, 4,
+                                 8, 5};
+
+  std::vector<float> B_data{0, 1, 2, 0, 0, 0, 3, 4, 5,
+                            6, 7, 8, 0, 0, 0, 9, 10, 11,
+                            12, 13, 14, 0, 0, 0, 15, 16, 17,
+                            0, 0, 0, 18, 19, 20, 21, 22, 23,
+                            0, 0, 0, 24, 25, 26, 27, 28, 29,
+                            0, 0, 0, 30, 31, 32, 33, 34, 35,
+                            36, 37, 38, 39, 40, 41, 0, 0, 0,
+                            42, 43, 44, 45, 46, 47, 0, 0, 0,
+                            48, 49, 50, 51, 52, 53, 0, 0, 0};
+
+   std::vector<float> Y_result{546, 561, 576, 552, 564, 576, 39, 42, 45,
+                              1410, 1461, 1512, 1362, 1392, 1422, 201, 222, 243,
+                              2274, 2361, 2448, 2172, 2220, 2268, 363, 402, 441,
+                              2784, 2850, 2916, 4362, 4485, 4608, 1551, 1608, 1665,
+                              3540, 3624, 3708, 5604, 5763, 5922, 2037, 2112, 2187,
+                              4296, 4398, 4500, 6846, 7041, 7236, 2523, 2616, 2709,
+                              678, 789, 900, 2892, 3012, 3132, 4263, 4494, 4725,
+                              786, 915, 1044, 3324, 3462, 3600, 4911, 5178, 5445,
+                              894, 1041, 1188, 3756, 3912, 4068, 5559, 5862, 6165};
+
+   Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+   Ort::Value::Shape ort_dense_shape{common_shape.data(), common_shape.size()};
+   Ort::Value::Shape ort_values_shape{&indices_shape[0], 1U};
+   auto a_st = Ort::Value::CreateSparseTensor(info, A_values.data(), ort_dense_shape, ort_values_shape);
+   a_st.UseCooIndices(A_indices.data(), A_indices.size());
+
+   auto b_tensor = Ort::Value::CreateTensor(info, B_data.data(), B_data.size(), common_shape.data(), common_shape.size());
+
+   std::vector<Ort::Value> ort_inputs;
+   ort_inputs.push_back(std::move(a_st));
+   ort_inputs.push_back(std::move(b_tensor));
+   const char* input_names[] = {"sparse_A", "dense_B"};
+   const char* const output_names[] = {"dense_Y"};
+   Ort::Session session(*ort_env, SPARSE_INPUT_MATMUL_MODEL_URI, Ort::SessionOptions{});
+   auto ort_outputs = session.Run(Ort::RunOptions{}, input_names, ort_inputs.data(), ort_inputs.size(),
+                                  output_names, 1);
+   ASSERT_EQ(ort_outputs.size(), 1U);
+   const auto& dense_Y = ort_outputs[0];
+   ASSERT_TRUE(dense_Y.IsTensor());
+
+   auto result_ts = dense_Y.GetTensorTypeAndShapeInfo();
+   ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, result_ts.GetElementType());
+   ASSERT_EQ(common_shape, result_ts.GetShape());
+
+   const auto* result_vals = dense_Y.GetTensorData<float>();
+   auto result_span = gsl::make_span(result_vals, Y_result.size());
+   ASSERT_TRUE(std::equal(Y_result.cbegin(), Y_result.cend(), result_span.cbegin(), result_span.cend()));
+}
+#endif // DISABLE_CONTRIB_OPS
+
 TEST(CApiTest, custom_op_handler) {
   std::cout << "Running custom op inference" << std::endl;
 
diff --git a/onnxruntime/test/shared_lib/test_nontensor_types.cc b/onnxruntime/test/shared_lib/test_nontensor_types.cc
index 8110074638..232b1d8a62 100644
--- a/onnxruntime/test/shared_lib/test_nontensor_types.cc
+++ b/onnxruntime/test/shared_lib/test_nontensor_types.cc
@@ -9,6 +9,8 @@
 #include "core/session/onnxruntime_cxx_api.h"
 #include "test_allocator.h"
 
+#include <gsl/gsl>
+
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -306,3 +308,617 @@ TEST(CApiTest, TypeInfoSequence) {
   ASSERT_EQ(seq_type_info.GetSequenceElementType().GetTensorTypeAndShapeInfo().GetElementType(),
             ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64);
 }
+
+TEST(CApiTest, SparseTensorUsingAPI) {
+  Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+
+  {
+    // COO
+    const std::vector<int64_t> dense_shape{3, 3};
+    const std::vector<int64_t> values_shape{3};
+    std::vector<int32_t> expected_values = {1, 1, 1};
+    constexpr int64_t values_len = 3;
+    std::vector<int64_t> expected_linear_indices = {2, 3, 5};
+    const std::vector<int64_t> indices_shape{3};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    Ort::Value::Shape ort_values_shape{&values_len, 1U};
+    auto coo_st = Ort::Value::CreateSparseTensor(info, expected_values.data(), ort_dense_shape, ort_values_shape);
+    coo_st.UseCooIndices(expected_linear_indices.data(), expected_linear_indices.size());
+
+    {
+      auto ti = coo_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+
+    {
+      auto t_type_shape = coo_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+
+    ASSERT_EQ(ORT_SPARSE_COO, coo_st.GetSparseFormat());
+
+    {
+      auto values_ts = coo_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+    }
+
+    {
+      const auto* values = coo_st.GetSparseTensorValues<int32_t>();
+      auto val_span = gsl::make_span(values, values_shape[0]);
+      ASSERT_TRUE(std::equal(expected_values.cbegin(), expected_values.cend(), val_span.cbegin(), val_span.cend()));
+    }
+
+    {
+      auto indices_ts = coo_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_COO_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(indices_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = coo_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_COO_INDICES, num_indices);
+      ASSERT_EQ(num_indices, static_cast<size_t>(indices_shape[0]));
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_linear_indices.cbegin(), expected_linear_indices.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+
+  {
+    // CSR test
+    const std::vector<int64_t> dense_shape{3, 3};
+    const std::vector<int64_t> values_shape{3};
+    const std::vector<int64_t> inner_shape{3};
+    const std::vector<int64_t> outer_shape{4};
+    std::vector<int32_t> expected_values = {1, 1, 1};
+    const std::vector<std::string> expected_values_str = {"1", "1", "1"};
+    std::vector<int64_t> expected_inner = {2, 0, 2};
+    std::vector<int64_t> expected_outer = {0, 1, 3, 3};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    constexpr int64_t values_len = 3;
+    Ort::Value::Shape ort_values_shape{&values_len, 1U};
+    auto csr_st = Ort::Value::CreateSparseTensor(info, expected_values.data(), ort_dense_shape, ort_values_shape);
+    csr_st.UseCsrIndices(expected_inner.data(), expected_inner.size(), expected_outer.data(), expected_outer.size());
+    {
+      auto ti = csr_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+
+    {
+      auto t_type_shape = csr_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+
+    ASSERT_EQ(ORT_SPARSE_CSRC, csr_st.GetSparseFormat());
+
+    {
+      auto values_ts = csr_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+    }
+
+    {
+      const auto* values = csr_st.GetSparseTensorValues<int32_t>();
+      auto val_span = gsl::make_span(values, expected_values.size());
+      ASSERT_TRUE(std::equal(expected_values.cbegin(), expected_values.cend(), val_span.cbegin(), val_span.cend()));
+    }
+
+    {
+      auto indices_ts = csr_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_CSR_INNER_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(inner_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = csr_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_CSR_INNER_INDICES, num_indices);
+      ASSERT_EQ(num_indices, expected_inner.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_inner.cbegin(), expected_inner.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+
+    {
+      auto indices_ts = csr_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_CSR_OUTER_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(outer_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = csr_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_CSR_OUTER_INDICES, num_indices);
+      ASSERT_EQ(num_indices, expected_outer.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_outer.cbegin(), expected_outer.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+
+  {
+    // BlockSparse test
+    const std::vector<int64_t> dense_shape{8, 8};
+    constexpr int64_t block_size = 2;
+    const std::vector<int64_t> values_shape{2, block_size, block_size};
+    // Two dense blocks
+    std::vector<int32_t> data_blocks{
+        1, 2, 3, 4, 5, 6, 7, 8};
+    const std::vector<int64_t> indices_shape{2, 2};  // two blocks by two coordinates
+    // (0, 0), (0,1)
+    std::vector<int32_t> blocksparse_indices = {
+        0, 0, 0, 1};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    Ort::Value::Shape ort_values_shape{values_shape.data(), values_shape.size()};
+    auto bsp_st = Ort::Value::CreateSparseTensor(info, data_blocks.data(), ort_dense_shape, ort_values_shape);
+    bsp_st.UseBlockSparseIndices({indices_shape.data(), indices_shape.size()}, blocksparse_indices.data());
+    {
+      auto ti = bsp_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+    {
+      auto t_type_shape = bsp_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+    ASSERT_EQ(ORT_SPARSE_BLOCK_SPARSE, bsp_st.GetSparseFormat());
+    {
+      auto values_ts = bsp_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+    }
+    {
+      const auto* values = bsp_st.GetSparseTensorValues<int32_t>();
+      auto val_span = gsl::make_span(values, data_blocks.size());
+      ASSERT_TRUE(std::equal(data_blocks.cbegin(), data_blocks.cend(), val_span.cbegin(), val_span.cend()));
+    }
+    {
+      auto indices_ts = bsp_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_BLOCK_SPARSE_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, indices_ts.GetElementType());
+      ASSERT_EQ(indices_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int32_t* indices = bsp_st.GetSparseTensorIndicesData<int32_t>(ORT_SPARSE_BLOCK_SPARSE_INDICES, num_indices);
+      ASSERT_EQ(num_indices, blocksparse_indices.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(blocksparse_indices.cbegin(), blocksparse_indices.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+}
+
+TEST(CApiTest, SparseTensorFillSparseTensorFormatAPI) {
+  auto allocator = Ort::AllocatorWithDefaultOptions();
+  Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+  {
+    // COO
+    const std::vector<int64_t> dense_shape{3, 3};
+    const std::vector<int64_t> values_shape{3};
+    std::vector<int32_t> expected_values = {1, 1, 1};
+    constexpr int64_t values_len = 3;
+    std::vector<int64_t> expected_linear_indices = {2, 3, 5};
+    const std::vector<int64_t> indices_shape{3};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    auto coo_st = Ort::Value::CreateSparseTensor<int32_t>(allocator, ort_dense_shape);
+    coo_st.FillSparseTensorCoo(info, {&values_len, 1U, {expected_values.data()}},
+                               expected_linear_indices.data(), expected_linear_indices.size());
+    {
+      auto ti = coo_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+
+    {
+      auto t_type_shape = coo_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+
+    ASSERT_EQ(ORT_SPARSE_COO, coo_st.GetSparseFormat());
+
+    {
+      auto values_ts = coo_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+    }
+
+    {
+      const auto* values = coo_st.GetSparseTensorValues<int32_t>();
+      auto val_span = gsl::make_span(values, values_shape[0]);
+      ASSERT_TRUE(std::equal(expected_values.cbegin(), expected_values.cend(), val_span.cbegin(), val_span.cend()));
+    }
+
+    {
+      auto indices_ts = coo_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_COO_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(indices_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = coo_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_COO_INDICES, num_indices);
+      ASSERT_EQ(num_indices, static_cast<size_t>(indices_shape[0]));
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_linear_indices.cbegin(), expected_linear_indices.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+  {
+    // CSR test
+    const std::vector<int64_t> dense_shape{3, 3};
+    const std::vector<int64_t> values_shape{3};
+    const std::vector<int64_t> inner_shape{3};
+    const std::vector<int64_t> outer_shape{4};
+    const std::vector<int32_t> expected_values = {1, 1, 1};
+    const std::vector<int64_t> expected_inner = {2, 0, 2};
+    const std::vector<int64_t> expected_outer = {0, 1, 3, 3};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    auto csr_st = Ort::Value::CreateSparseTensor<int32_t>(allocator, ort_dense_shape);
+    csr_st.FillSparseTensorCsr(info, {values_shape.data(), values_shape.size(), {expected_values.data()}},
+                               expected_inner.data(), expected_inner.size(),
+                               expected_outer.data(), expected_outer.size());
+    {
+      auto ti = csr_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+
+    {
+      auto t_type_shape = csr_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+
+    ASSERT_EQ(ORT_SPARSE_CSRC, csr_st.GetSparseFormat());
+
+    {
+      auto values_ts = csr_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+    }
+
+    {
+      const auto* values = csr_st.GetSparseTensorValues<int32_t>();
+      auto val_span = gsl::make_span(values, expected_values.size());
+      ASSERT_TRUE(std::equal(expected_values.cbegin(), expected_values.cend(), val_span.cbegin(), val_span.cend()));
+    }
+
+    {
+      auto indices_ts = csr_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_CSR_INNER_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(inner_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = csr_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_CSR_INNER_INDICES, num_indices);
+      ASSERT_EQ(num_indices, expected_inner.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_inner.cbegin(), expected_inner.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+
+    {
+      auto indices_ts = csr_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_CSR_OUTER_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(outer_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = csr_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_CSR_OUTER_INDICES, num_indices);
+      ASSERT_EQ(num_indices, expected_outer.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_outer.cbegin(), expected_outer.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+  {
+    // BlockSparse test
+    const std::vector<int64_t> dense_shape{8, 8};
+    constexpr int64_t block_size = 2;
+    const std::vector<int64_t> values_shape{2, block_size, block_size};
+    // Two dense blocks
+    std::vector<int32_t> data_blocks{
+        1, 2, 3, 4, 5, 6, 7, 8};
+    const std::vector<int64_t> indices_shape{2, 2};  // two blocks by two coordinates
+    // (0, 0), (0,1)
+    std::vector<int32_t> blocksparse_indices = {
+        0, 0, 0, 1};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    auto bsp_st = Ort::Value::CreateSparseTensor<int32_t>(allocator, ort_dense_shape);
+    bsp_st.FillSparseTensorBlockSparse(info, {values_shape.data(), values_shape.size(), {data_blocks.data()}},
+                                       {indices_shape.data(), indices_shape.size()}, blocksparse_indices.data());
+    {
+      auto ti = bsp_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+    {
+      auto t_type_shape = bsp_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+    ASSERT_EQ(ORT_SPARSE_BLOCK_SPARSE, bsp_st.GetSparseFormat());
+    {
+      auto values_ts = bsp_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+    }
+    {
+      const auto* values = bsp_st.GetSparseTensorValues<int32_t>();
+      auto val_span = gsl::make_span(values, data_blocks.size());
+      ASSERT_TRUE(std::equal(data_blocks.cbegin(), data_blocks.cend(), val_span.cbegin(), val_span.cend()));
+    }
+    {
+      auto indices_ts = bsp_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_BLOCK_SPARSE_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, indices_ts.GetElementType());
+      ASSERT_EQ(indices_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int32_t* indices = bsp_st.GetSparseTensorIndicesData<int32_t>(ORT_SPARSE_BLOCK_SPARSE_INDICES, num_indices);
+      ASSERT_EQ(num_indices, blocksparse_indices.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(blocksparse_indices.cbegin(), blocksparse_indices.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+}
+
+TEST(CApiTest, SparseTensorFillSparseFormatStringsAPI) {
+  auto allocator = Ort::AllocatorWithDefaultOptions();
+  Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+
+  {
+    // COO
+    const std::vector<int64_t> dense_shape{3, 3};
+    const std::vector<int64_t> values_shape{3};
+    std::vector<std::string> expected_values = {"1", "1", "1"};
+    const char* const strings[] = {"1", "1", "1"};
+    constexpr int64_t values_len = 3;
+    std::vector<int64_t> expected_linear_indices = {2, 3, 5};
+    const std::vector<int64_t> indices_shape{3};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    auto coo_st = Ort::Value::CreateSparseTensor(allocator, ort_dense_shape, ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
+    coo_st.FillSparseTensorCoo(info, {&values_len, 1U, {strings}},
+                               expected_linear_indices.data(), expected_linear_indices.size());
+    {
+      auto ti = coo_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+
+    {
+      auto t_type_shape = coo_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+
+    ASSERT_EQ(ORT_SPARSE_COO, coo_st.GetSparseFormat());
+
+    {
+      auto values_ts = coo_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+
+      for (size_t i = 0; i < values_len; ++i) {
+        const auto& ex = expected_values[i];
+        size_t len = coo_st.GetStringTensorElementLength(i);
+        ASSERT_EQ(ex.size(), len);
+        auto buffer = std::make_unique<char[]>(len);
+        coo_st.GetStringTensorElement(len, i, buffer.get());
+        ASSERT_EQ(0, ex.compare(0U, ex.size(), buffer.get(), len));
+      }
+
+      size_t data_len = coo_st.GetStringTensorDataLength();
+      auto buffer = std::make_unique<char[]>(data_len);
+      auto offsets = std::make_unique<size_t[]>(expected_values.size());
+      /// XXX: Do something about this API.
+      /// Need to add N + 1 terminating offset, or skip the first zero offset
+      /// altogether and add the N + 1
+      coo_st.GetStringTensorContent(buffer.get(), data_len, offsets.get(), values_len);
+      for (size_t i = 0, limit = expected_values.size(); i < limit; ++i) {
+        const auto& ex = expected_values[i];
+        const char* p = &buffer[offsets[i]];
+        size_t len = (i == (limit - 1)) ? (data_len - offsets[i]) : offsets[i + 1] - offsets[i];
+        ASSERT_EQ(ex.size(), len);
+        std::string s(p, len);
+        ASSERT_EQ(expected_values[i], s);
+      }
+    }
+
+    {
+      auto indices_ts = coo_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_COO_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(indices_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = coo_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_COO_INDICES, num_indices);
+      ASSERT_EQ(num_indices, static_cast<size_t>(indices_shape[0]));
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_linear_indices.cbegin(), expected_linear_indices.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+  {
+    // CSR strings
+    const std::vector<int64_t> dense_shape{3, 3};
+    const std::vector<int64_t> values_shape{3};
+    const std::vector<int64_t> inner_shape{3};
+    const std::vector<int64_t> outer_shape{4};
+    const std::vector<std::string> expected_values{"1", "1", "1"};
+    const char* const strings[] = {"1", "1", "1"};
+    const std::vector<int64_t> expected_inner{2, 0, 2};
+    const std::vector<int64_t> expected_outer{0, 1, 3, 3};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    const int64_t values_len = static_cast<int64_t>(expected_values.size());
+    auto csr_st = Ort::Value::CreateSparseTensor(allocator, ort_dense_shape, ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
+    csr_st.FillSparseTensorCsr(info, {values_shape.data(), values_shape.size(), {strings}},
+                               expected_inner.data(), expected_inner.size(),
+                               expected_outer.data(), expected_outer.size());
+    {
+      auto ti = csr_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+
+    {
+      auto t_type_shape = csr_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+
+    ASSERT_EQ(ORT_SPARSE_CSRC, csr_st.GetSparseFormat());
+    {
+      auto values_ts = csr_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+
+      for (size_t i = 0; i < static_cast<size_t>(values_len); ++i) {
+        const auto& ex = expected_values[i];
+        size_t len = csr_st.GetStringTensorElementLength(i);
+        ASSERT_EQ(ex.size(), len);
+        auto buffer = std::make_unique<char[]>(len);
+        csr_st.GetStringTensorElement(len, i, buffer.get());
+        ASSERT_EQ(0, ex.compare(0U, ex.size(), buffer.get(), len));
+      }
+
+      size_t data_len = csr_st.GetStringTensorDataLength();
+      auto buffer = std::make_unique<char[]>(data_len);
+      auto offsets = std::make_unique<size_t[]>(expected_values.size());
+      /// XXX: Do something about this API.
+      /// Need to add N + 1 terminating offset, or skip the first zero offset
+      /// altogether and add the N + 1
+      csr_st.GetStringTensorContent(buffer.get(), data_len, offsets.get(), values_len);
+      for (size_t i = 0, limit = expected_values.size(); i < limit; ++i) {
+        const auto& ex = expected_values[i];
+        const char* p = &buffer[offsets[i]];
+        size_t len = (i == (limit - 1)) ? (data_len - offsets[i]) : offsets[i + 1] - offsets[i];
+        ASSERT_EQ(ex.size(), len);
+        std::string s(p, len);
+        ASSERT_EQ(ex, s);
+      }
+    }
+    {
+      auto indices_ts = csr_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_CSR_INNER_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(inner_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = csr_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_CSR_INNER_INDICES, num_indices);
+      ASSERT_EQ(num_indices, expected_inner.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_inner.cbegin(), expected_inner.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+
+    {
+      auto indices_ts = csr_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_CSR_OUTER_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(outer_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = csr_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_CSR_OUTER_INDICES, num_indices);
+      ASSERT_EQ(num_indices, expected_outer.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_outer.cbegin(), expected_outer.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+  {
+    // BlockSparse test
+    const std::vector<int64_t> dense_shape{8, 8};
+    constexpr int64_t block_size = 2;
+    const std::vector<int64_t> values_shape{2, block_size, block_size};
+    // Two dense blocks
+    const std::vector<std::string> data_blocks{
+        "1", "2", "3", "4", "5", "6", "7", "8"};
+    const char* const strings[] = {"1", "2", "3", "4", "5", "6", "7", "8"};
+    const std::vector<int64_t> indices_shape{2, 2};  // two blocks by two coordinates
+    // (0, 0), (0,1)
+    std::vector<int32_t> blocksparse_indices = {
+        0, 0, 0, 1};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    auto bsp_st = Ort::Value::CreateSparseTensor(allocator, ort_dense_shape, ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
+    bsp_st.FillSparseTensorBlockSparse(info, {values_shape.data(), values_shape.size(), {strings}},
+                                       {indices_shape.data(), indices_shape.size()}, blocksparse_indices.data());
+    {
+      auto ti = bsp_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+    {
+      auto t_type_shape = bsp_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+    ASSERT_EQ(ORT_SPARSE_BLOCK_SPARSE, bsp_st.GetSparseFormat());
+    {
+      auto values_ts = bsp_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+
+      for (size_t i = 0, limit = data_blocks.size(); i < limit; ++i) {
+        const auto& ex = data_blocks[i];
+        size_t len = bsp_st.GetStringTensorElementLength(i);
+        ASSERT_EQ(ex.size(), len);
+        auto buffer = std::make_unique<char[]>(len);
+        bsp_st.GetStringTensorElement(len, i, buffer.get());
+        ASSERT_EQ(0, ex.compare(0U, ex.size(), buffer.get(), len));
+      }
+
+      size_t data_len = bsp_st.GetStringTensorDataLength();
+      auto buffer = std::make_unique<char[]>(data_len);
+      /// XXX: Do something about this API.
+      /// Need to add N + 1 terminating offset, or skip the first zero offset
+      /// altogether and add the N + 1
+      auto offsets = std::make_unique<size_t[]>(data_blocks.size());
+      bsp_st.GetStringTensorContent(buffer.get(), data_len, offsets.get(), data_blocks.size());
+      for (size_t i = 0, limit = data_blocks.size(); i < limit; ++i) {
+        const auto& ex = data_blocks[i];
+        const char* p = &buffer[offsets[i]];
+        size_t len = (i == (limit - 1)) ? (data_len - offsets[i]) : offsets[i + 1] - offsets[i];
+        ASSERT_EQ(ex.size(), len);
+        std::string s(p, len);
+        ASSERT_EQ(ex, s);
+      }
+    }
+    {
+      auto indices_ts = bsp_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_BLOCK_SPARSE_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, indices_ts.GetElementType());
+      ASSERT_EQ(indices_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int32_t* indices = bsp_st.GetSparseTensorIndicesData<int32_t>(ORT_SPARSE_BLOCK_SPARSE_INDICES, num_indices);
+      ASSERT_EQ(num_indices, blocksparse_indices.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(blocksparse_indices.cbegin(), blocksparse_indices.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+}
\ No newline at end of file
diff --git a/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_android_baseline_and_report_bin_size.sh b/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_android_baseline_and_report_bin_size.sh
index 13a0c79144..8da5048e43 100644
--- a/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_android_baseline_and_report_bin_size.sh
+++ b/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_android_baseline_and_report_bin_size.sh
@@ -29,7 +29,7 @@ python3 /onnxruntime_src/tools/ci_build/build.py \
     --include_ops_by_config /home/onnxruntimedev/.test_data/include_no_operators.config
 
 # set current size limit to BINARY_SIZE_LIMIT_IN_BYTES.
-BINARY_SIZE_LIMIT_IN_BYTES=1235000
+BINARY_SIZE_LIMIT_IN_BYTES=1255000
 echo "The current preset binary size limit is $BINARY_SIZE_LIMIT_IN_BYTES"
 python3 /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/check_build_binary_size.py \
     --threshold=$BINARY_SIZE_LIMIT_IN_BYTES \