diff --git a/include/onnxruntime/core/framework/sparse_tensor.h b/include/onnxruntime/core/framework/sparse_tensor.h
index af66a80874..6a0596882a 100644
--- a/include/onnxruntime/core/framework/sparse_tensor.h
+++ b/include/onnxruntime/core/framework/sparse_tensor.h
@@ -7,13 +7,15 @@
 #include "core/framework/tensor_shape.h"
 #include "core/framework/tensor.h"
 
+struct OrtValue;
+
 namespace onnxruntime {
 
 class IDataTransfer;
 class DataTransferManager;
 
 /**
- * @brief This is a Sparse Format enumeration representing bitflags
+ * @brief This is a Sparse Format enumeration
  * 
  * 
  */
@@ -59,8 +61,8 @@ class SparseTensor final {
   /// </summary>
   /// <param name="elt_type">MlDataType</param>
   /// <param name="dense_shape">a shape of original tensor in dense form</param>
-  /// <param name="values_shape">shape for user supplied values</param>
-  /// <param name="values_data">a pointer to values</param>
+  /// <param name="values_shape">shape for user supplied values. Use {0} shape for fully sparse tensors.</param>
+  /// <param name="values_data">a pointer to values. Use nullptr for fully sparse tensors.</param>
   /// <param name="location">description of the user allocated memory</param>
   SparseTensor(MLDataType elt_type,
                const TensorShape& dense_shape,
@@ -70,7 +72,7 @@ class SparseTensor final {
 
   /// <summary>
   /// Use this constructor to hold sparse data in the buffer
-  /// allocated with the specificed allocator. Use Make*() methods
+  /// allocated with the specified allocator. Use Make*() methods
   /// to populate the instance with data which will be copied into the
   /// allocated buffer.
   /// </summary>
@@ -87,6 +89,57 @@ class SparseTensor final {
 
   ORT_DISALLOW_COPY_AND_ASSIGNMENT(SparseTensor);
 
+  /// <summary>
+  /// The factory function creates an instance of SparseTensor on the heap
+  /// using appropriate constructor and initializes OrtValue instance wit it.
+  /// </summary>
+  /// <param name="elt_type">element data type</param>
+  /// <param name="dense_shape">dense shape of the sparse tensor</param>
+  /// <param name="values_shape">values shape. Use {0} for fully sparse tensors.</param>
+  /// <param name="values_data">pointer to a user allocated buffer. Use nullptr for fully sparse tensors.</param>
+  /// <param name="location">description of the user allocated buffer</param>
+  /// <param name="ort_value">default constructed input/output ort_value</param>
+  static void InitOrtValue(MLDataType elt_type,
+                           const TensorShape& dense_shape,
+                           const TensorShape& values_shape,
+                           void* values_data,
+                           const OrtMemoryInfo& location,
+                           OrtValue& ort_value);
+
+  /// <summary>
+  /// The factory function creates an instance of SparseTensor on the heap
+  /// using appropriate constructor and initializes OrtValue instance wit it.
+  /// </summary>
+  /// <param name="elt_type">element data type</param>
+  /// <param name="dense_shape">dense shape of the sparse tensor</param>
+  /// <param name="allocator">allocator to use</param>
+  /// <param name="ort_value">default constructed input/output ort_value</param>
+  static void InitOrtValue(MLDataType elt_type,
+                           const TensorShape& dense_shape,
+                           std::shared_ptr<IAllocator> allocator,
+                           OrtValue& ort_value);
+
+  /// <summary>
+  /// The function will check if the OrtValue is allocated
+  /// fetch the containing SparseTensor instance or throw if it
+  /// does not contain one. It will check that the SparseTensor has
+  /// sparse format set (i.e. fully constructed).
+  /// </summary>
+  /// <param name="v">OrtValue instance</param>
+  /// <returns>const SparseTensor Reference</returns>
+  static const SparseTensor& GetSparseTensorFromOrtValue(const OrtValue& v);
+
+  /// <summary>
+  /// /// The function will check if the OrtValue is allocated
+  /// fetch the containing SparseTensor instance or throw if it
+  /// does not contain one. It will check that the SparseTensor does not
+  /// have sparse format set and will return non-const ref to so indices
+  /// can be added to it.
+  /// </summary>
+  /// <param name="v">OrtValue</param>
+  /// <returns>non-const reference to SparseTensor</returns>
+  static SparseTensor& GetSparseTensorFromOrtValue(OrtValue& v);
+
   /// <summary>
   // Returns the number of non-zero values (aka "NNZ")
   // For block sparse formats this may include some zeros in the blocks
@@ -195,7 +248,7 @@ class SparseTensor final {
   /// index shape would be 1-D (values_count) or it must be twice the number of values
   /// in which case its shape would be 2-D (values_count, 2)
   /// </summary>
-  /// <param name="indices">user allocated buffer span</param>
+  /// <param name="indices">user allocated buffer span. Use empty span for fully sparse tensors.</param>
   /// <returns>Status</returns>
   Status UseCooIndices(gsl::span<int64_t> indices);
 
@@ -209,13 +262,25 @@ class SparseTensor final {
   ///
   /// Values shape is supplied at construction time and its Size() must match values_count.
   /// </summary>
-  /// <param name="values_count"></param>
-  /// <param name="values_data"></param>
+  /// <param name="values_count">Use 0 for fully sparse tensors.</param>
+  /// <param name="values_data">pointer to a buffer to be copied. Use nullptr for fully sparse tensors.</param>
   /// <param name="indices"></param>
   /// <returns></returns>
   Status MakeCooData(const IDataTransfer& data_transfer, const OrtMemoryInfo& data_location,
                      size_t values_count, const void* values_data, gsl::span<const int64_t> indices);
 
+  /// <summary>
+  /// The method allocates a single contiguous buffer and creates instances of std::strings in it, with
+  /// copies of the supplied zero-terminated strings followed by COO indices.
+  /// All data is assumed to be on CPU and the allocator supplied must be
+  /// a CPU based allocator.
+  /// </summary>
+  /// <param name="string_count">use 0 for fully sparse tensors</param>
+  /// <param name="strings">array of char* pointers. use nullptr for fully sparse tensors</param>
+  /// <param name="indices">span of indices. Use empty span for fully sparse tensors.</param>
+  /// <returns>Status</returns>
+  Status MakeCooStrings(size_t string_count, const char* const* strings, gsl::span<const int64_t> indices);
+
   /// <summary>
   /// Gives mutable access to Coo buffers so they can be populated
   /// </summary>
@@ -234,8 +299,8 @@ class SparseTensor final {
   /// Allocates memory for values and index and returns a mutator so
   /// data can be copied into the buffer.
   /// </summary>
-  /// <param name="values_count"></param>
-  /// <param name="index_count"></param>
+  /// <param name="values_count">use 0 for fully sparse tensors</param>
+  /// <param name="index_count">use 0 for fully sparse tensors</param>
   /// <returns></returns>
   CooMutator MakeCooData(size_t values_count, size_t index_count);
 
@@ -255,17 +320,17 @@ class SparseTensor final {
   };
 
   /// <summary>
-  /// Returns Csr indices readonly view
+  /// Returns Csr indices read only view
   /// </summary>
   /// <returns></returns>
   CsrView AsCsr() const;
 
   /// <summary>
   /// This function will use Csr indices contained within the user allocated buffers.
-  /// The lifespan of the buffers must exclipse the lifespan of sparse tensor instance.
+  /// The lifespan of the buffers must eclipse the lifespan of sparse tensor instance.
   /// </summary>
-  /// <param name="inner_index"></param>
-  /// <param name="outer_index"></param>
+  /// <param name="inner_index">User allocated buffer span. use empty span for fully sparse tensors</param>
+  /// <param name="outer_index">User allocated buffer span. Use empty span for fully sparse tensors</param>
   /// <returns></returns>
   Status UseCsrIndices(gsl::span<int64_t> inner_index, gsl::span<int64_t> outer_index);
 
@@ -275,10 +340,10 @@ class SparseTensor final {
   /// </summary>
   /// <param name="data_transfer"></param>
   /// <param name="data_location"></param>
-  /// <param name="values_count"></param>
-  /// <param name="values_data"></param>
-  /// <param name="inner_index"></param>
-  /// <param name="outer_index"></param>
+  /// <param name="values_count">use 0 for fully sparse tensors</param>
+  /// <param name="values_data">pointer to data to be copied. Use nullptr for fully sparse tensors.</param>
+  /// <param name="inner_index">inner index to be copied. Use empty span for fully sparse tensors.</param>
+  /// <param name="outer_index">outer index to be copied. Use empty span for fully sparse tensors.</param>
   /// <returns></returns>
   Status MakeCsrData(const IDataTransfer& data_transfer,
                      const OrtMemoryInfo& data_location,
@@ -286,6 +351,21 @@ class SparseTensor final {
                      gsl::span<const int64_t> inner_index,
                      gsl::span<const int64_t> outer_index);
 
+  /// <summary>
+  /// The method allocates a single contiguous buffer and creates instances of std::strings in it, with
+  /// copies of the supplied zero-terminated strings followed by COO indices.
+  /// All data is assumed to be on CPU and the allocator supplied must be
+  /// a CPU based allocator
+  /// </summary>
+  /// <param name="string_count"></param>
+  /// <param name="strings">array of char* pointers</param>
+  /// <param name="inner_index">inner index to be copied. Use empty span for fully sparse tensors.</param>
+  /// <param name="outer_index">outer index to be copied. Use empty span for fully sparse tensors.</param>
+  /// <returns></returns>
+  Status MakeCsrStrings(size_t string_count, const char* const* strings,
+                        gsl::span<const int64_t> inner_index,
+                        gsl::span<const int64_t> outer_index);
+
   /// <summary>
   /// Give writable access to Csr values and indices
   /// </summary>
@@ -307,9 +387,9 @@ class SparseTensor final {
   /// Allocates memory for values and index and returns mutator so
   /// data can be populated.
   /// </summary>
-  /// <param name="values_count"></param>
-  /// <param name="inner_index_count"></param>
-  /// <param name="outer_index_count"></param>
+  /// <param name="values_count">Use 0 for fully sparse tensors.</param>
+  /// <param name="inner_index_count">Use 0 for fully sparse tensors.</param>
+  /// <param name="outer_index_count">Use 0 for fully sparse tensors.</param>
   /// <returns></returns>
   CsrMutator MakeCsrData(size_t values_count, size_t inner_index_count, size_t outer_index_count);
 
@@ -338,8 +418,8 @@ class SparseTensor final {
   /// were supplied to the constructor. The supplied buffer lifespan must eclipse the life
   /// of sparse tensor instance.
   /// </summary>
-  /// <param name="indices_shape"></param>
-  /// <param name="indices_data"></param>
+  /// <param name="indices_shape">Use {0} for fully sparse tensors.</param>
+  /// <param name="indices_data">Ptr to user allocated buffer. Use nullptr for fully spare tensors.</param>
   /// <returns></returns>
   Status UseBlockSparseIndices(const TensorShape& indices_shape, int32_t* indices_data);
 
@@ -350,20 +430,35 @@ class SparseTensor final {
   ///
   // The shape of the index is must be at least 2-D and must contain one tuple per each of
   // the value blocks that  were supplied to the constructor. Each index tuple is a
-  // (row, col) coordindate of the values block in a dense matrix.
+  // (row, col) coordinates of the values block in a dense matrix.
   /// </summary>
   /// <param name="data_transfer"></param>
   /// <param name="data_location"></param>
-  /// <param name="values_shape"></param>
-  /// <param name="values_data"></param>
-  /// <param name="indices_shape"></param>
-  /// <param name="indices_data"></param>
+  /// <param name="values_shape">The shape is expected to be at least 3-D. However, use {0} for fully sparse tensors.</param>
+  /// <param name="values_data">Pointer to a data to be copied. Use nullptr for fully sparse tensors.</param>
+  /// <param name="indices_shape">The shape is expected to be 2-D. However, you can use {0} for fully sparse tensors.</param>
+  /// <param name="indices_data">Pointer to index data to be copied. Use nullptr for fully sparse tensors.</param>
   /// <returns></returns>
   Status MakeBlockSparseData(const IDataTransfer& data_transfer,
                              const OrtMemoryInfo& data_location,
                              const TensorShape& values_shape, const void* values_data,
                              const TensorShape& indices_shape, const int32_t* indices_data);
 
+
+  /// <summary>
+  /// The method allocates a single contiguous buffer and creates instances of std::strings in it, with
+  /// copies of the supplied zero-terminated strings followed by COO indices.
+  /// All data is assumed to be on CPU and the allocator supplied must be
+  /// a CPU based allocator.
+  /// </summary>
+  /// <param name="values_shape">Use {0} shape for fully sparse tensors</param>
+  /// <param name="strings">array of char* ptrs, use nullptr for fully sparse tensor</param>
+  /// <param name="indices_shape">Use {0} for fully sparse tensors</param>
+  /// <param name="indices_data">use nullptr for fully sparse tensors</param>
+  /// <returns></returns>
+  Status MakeBlockSparseStrings(const TensorShape& values_shape, const char* const* strings,
+                                const TensorShape& indices_shape, const int32_t* indices_data);
+
   /// <summary>
   /// Mutable data access
   /// </summary>
@@ -383,8 +478,8 @@ class SparseTensor final {
   /// Allocates memory for values and index and returns mutator so
   /// data can be populated
   /// </summary>
-  /// <param name="values_shape"></param>
-  /// <param name="indices_shape"></param>
+  /// <param name="values_shape">Shape is expected to be 3-D, use {0} for fully sparse tensors</param>
+  /// <param name="indices_shape">Shape is expected to be 2-D, use {0} for fully sparse tensors </param>
   /// <returns></returns>
   BlockSparseMutator MakeBlockSparseData(const TensorShape& values_shape, const TensorShape& indices_shape);
 
@@ -416,6 +511,7 @@ class SparseTensor final {
 
   Status ValidateCsrIndices(size_t values_count, size_t inner_size, size_t outer_size) const;
   void InitCsrIndices(size_t inner_size, const int64_t* inner, size_t outer_size, const int64_t* outer);
+  void InitBlockSparseIndices(const TensorShape& indices_shape, int32_t* indices_data);
 
   SparseFormat format_;                        // sparse format enum value
   TensorShape dense_shape_;                    // a shape of a corresponding dense tensor
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 868695c7f7..dda996234e 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -120,7 +120,6 @@ typedef enum ONNXTensorElementDataType {
   ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16     // Non-IEEE floating-point format based on IEEE754 single-precision
 } ONNXTensorElementDataType;
 
-
 // Synced with onnx TypeProto oneof
 typedef enum ONNXType {
   ONNX_TYPE_UNKNOWN,
@@ -132,8 +131,7 @@ typedef enum ONNXType {
 } ONNXType;
 
 // These types are synced with internal
-// SparseFormatFlags but are not exposed
-// as flags
+// SparseFormatFlags
 typedef enum OrtSparseFormat {
   ORT_SPARSE_UNDEFINED = 0,
   ORT_SPARSE_COO = 0x1,
@@ -141,6 +139,13 @@ typedef enum OrtSparseFormat {
   ORT_SPARSE_BLOCK_SPARSE = 0x4
 } OrtSparseFormat;
 
+// Enum allows to query sparse tensor indices
+enum OrtSparseIndicesFormat {
+  ORT_SPARSE_COO_INDICES,
+  ORT_SPARSE_CSR_INNER_INDICES,
+  ORT_SPARSE_CSR_OUTER_INDICES,
+  ORT_SPARSE_BLOCK_SPARSE_INDICES
+};
 
 typedef enum OrtLoggingLevel {
   ORT_LOGGING_LEVEL_VERBOSE,
@@ -589,23 +594,36 @@ struct OrtApi {
   ORT_API2_STATUS(FillStringTensor, _Inout_ OrtValue* value, _In_ const char* const* s, size_t s_len);
 
   /**
-     * \param value A tensor created from OrtCreateTensor... function.
-     * \param len total data length, not including the trailing '\0' chars.
+     * Obtain a total length of strings contained within a tensor.
+     * For sparse tensors it returns the total length of values (nnz) strings.
+     * \param[in] value A tensor created from OrtCreateTensor... function.
+     * \param[out] len total data length, not including the trailing '\0' chars.
      */
   ORT_API2_STATUS(GetStringTensorDataLength, _In_ const OrtValue* value, _Out_ size_t* len);
 
   /**
-     * \param s string contents. Each string is NOT null-terminated.
-     * \param value A tensor created from OrtCreateTensor... function.
-     * \param s_len total data length, get it from OrtGetStringTensorDataLength
+     * This API returns all of of UTF-8 encoded strings that are contained within a tensor
+     * or in non-empty values of a sparse tensor in one single buffer. Use offsets to calculate
+     * the length of each string such as len[i] = offsets[i + 1] - offsets[i] except the last
+     * string for which the length is calculated as total_len - offset[i].
+     * 
+     * \param[in] value A tensor created from OrtCreateTensor... API or a sparse tensor
+     *   created with OrtCreateSparseTensor... API.
+     * \param[in,out] s string contents. Each string is NOT null-terminated.
+     * \param[in] s_len total data length, get it from OrtGetStringTensorDataLength
+     * \param[in,out] offsets pointer to a preallocated buffer where offsets for each of the string
+     *        element are returned. The number of offsets must match the number of string elements.
+     * \param[in] offsets_len number of offsets expected in the buffer.
      */
   ORT_API2_STATUS(GetStringTensorContent, _In_ const OrtValue* value, _Out_writes_bytes_all_(s_len) void* s,
                   size_t s_len, _Out_writes_all_(offsets_len) size_t* offsets, size_t offsets_len);
 
-  /**
-     * Don't free the 'out' value
-     */
-  ORT_API2_STATUS(CastTypeInfoToTensorInfo, _In_ const OrtTypeInfo*,
+  /** Retrieves OrtTensorTypeAndShapeInfo part of the OrtTypeInfo
+    * 
+    * \param[in] type_info
+    * \param[out] out a returned ptr. Don't free the 'out' value, it is owned by type_info
+    */
+  ORT_API2_STATUS(CastTypeInfoToTensorInfo, _In_ const OrtTypeInfo* type_info,
                   _Outptr_result_maybenull_ const OrtTensorTypeAndShapeInfo** out);
 
   /**
@@ -647,25 +665,39 @@ struct OrtApi {
   ORT_API2_STATUS(GetTensorShapeElementCount, _In_ const OrtTensorTypeAndShapeInfo* info, _Out_ size_t* out);
 
   /**
- * \param out Should be freed by ReleaseTensorTypeAndShapeInfo after use
- */
+   * Returns data type and shape iff OrtValue contains a Tensor or a SparseTensor.
+   * For sparse tensors it returns a dense shape of the tensor.
+   * 
+   * \param[in] value OrtValue that contains tensor or a sparse tensor
+   * \param[out] out Should be freed by ReleaseTensorTypeAndShapeInfo after use
+   */
   ORT_API2_STATUS(GetTensorTypeAndShape, _In_ const OrtValue* value, _Outptr_ OrtTensorTypeAndShapeInfo** out);
 
   /**
- * Get the type information of an OrtValue
- * \param value
- * \param out The returned value should be freed by ReleaseTypeInfo after use
- */
+   * Get the type information of an OrtValue. API works for tensors and sparse tensors.
+   * 
+   * \param[in] value
+   * \param[in,out] out The returned value should be freed by ReleaseTypeInfo after use
+   */
   ORT_API2_STATUS(GetTypeInfo, _In_ const OrtValue* value, _Outptr_result_maybenull_ OrtTypeInfo** out);
 
   ORT_API2_STATUS(GetValueType, _In_ const OrtValue* value, _Out_ enum ONNXType* out);
 
-  ORT_API2_STATUS(CreateMemoryInfo, _In_ const char* name1, enum OrtAllocatorType type, int id1,
-                  enum OrtMemType mem_type1, _Outptr_ OrtMemoryInfo** out);
+  /**
+   * Creates an instance of OrtMemoryInfo. It must be freed by ReleaseMemoryInfo after use.
+   * This may describe one of the existing ORT allocator types OR a custom allocator.
+   * 
+   * \param[in] name such as "cpu", "gpu"
+   * \param[in] type one of the enum values
+   * \param[in] device ID. For GPU gpu id.
+   * \param[in] mem_type. Memory type enum value.
+   */
+  ORT_API2_STATUS(CreateMemoryInfo, _In_ const char* name, enum OrtAllocatorType type, int id,
+                  enum OrtMemType mem_type, _Outptr_ OrtMemoryInfo** out);
 
   /**
- * Convenience function for special case of CreateMemoryInfo, for the CPU allocator. Uses name = "Cpu" and id = 0.
- */
+   * Convenience function for special case of CreateMemoryInfo, for the CPU allocator. Uses name = "Cpu" and id = 0.
+   */
   ORT_API2_STATUS(CreateCpuMemoryInfo, enum OrtAllocatorType type, enum OrtMemType mem_type1,
                   _Outptr_ OrtMemoryInfo** out);
 
@@ -990,13 +1022,21 @@ struct OrtApi {
                   _In_ int providers_length);
 
   /**
-     * \param value - A tensor created from OrtCreateTensor... function.
-     * \param index - index of string tensor element, length of element at index will be returned.
-     * \param out - number of UTF-8 bytes that the string contains
+     * This API returns a length of string element at [index]. For sparse tensors
+     * it will return a string element of sparse values. It is an error to request
+     * an out of bounds element.
+     * 
+     * \param[in] value - A tensor created from OrtCreateTensor... function.
+     * \param[in] index - flat index of string tensor element, length of element at index will be returned.
+     * \param[out] out - number of UTF-8 bytes that the string contains
      */
   ORT_API2_STATUS(GetStringTensorElementLength, _In_ const OrtValue* value, size_t index, _Out_ size_t* out);
 
   /**
+     * This API will return a copy UTF-8 data contained with a string element at the specified index.
+     * For sparse tensors it would return a string element of sparse values. It is an error to request an out
+     * of bounds element.
+     * 
      * \param s string element contents in UTF-8 encoding. The string is NOT null-terminated.
      * \param value A tensor created from OrtCreateTensor... function.
      * \param s_len element length, get it from OrtGetStringTensorElementLength.
@@ -1472,12 +1512,15 @@ struct OrtApi {
    * Registers a custom allocator instance with the env to enable
    * sharing between multiple sessions that use the same env instance.
    * Returns an error if an allocator with the same OrtMemoryInfo is already registered.
-   * \param env OrtEnv instance (must be non-null).
-   * \param allocator user provided allocator (must be non-null).
+   * 
    * The behavior of this API is exactly the same as CreateAndRegisterAllocator() except
    * instead of ORT creating an allocator based on provided info, in this case 
    * ORT uses the user-provided custom allocator.
    * See docs/C_API.md for details.
+   * 
+   * \param[in,out] env OrtEnv instance (must be non-null).
+   * \param[in] allocator user provided allocator (must be non-null).
+   * 
   */
   ORT_API2_STATUS(RegisterAllocator, _Inout_ OrtEnv* env, _In_ OrtAllocator* allocator);
 
@@ -1489,6 +1532,212 @@ struct OrtApi {
   */
   ORT_API2_STATUS(UnregisterAllocator, _Inout_ OrtEnv* env,
                   _In_ const OrtMemoryInfo* mem_info);
+
+  /**
+   * Sets *out to 1 iff an OrtValue is a SparseTensor, and 0 otherwise
+   * 
+   * \param[in] value existing OrtValue
+   * \param[out] out unless an error occurs, contains 1 iff the value contains an instance
+   *  of sparse tensor or 0 otherwise.
+   */
+  ORT_API2_STATUS(IsSparseTensor, _In_ const OrtValue* value, _Out_ int* out);
+
+  /**
+   * Create an OrtValue with a sparse tensor that is empty.
+   * Use FillSparseTensor<Format>() functions to populate sparse tensor with non-zero values and
+   * format specific indices data.
+   * Use ReleaseValue to destroy the sparse tensor, this will also release the buffer inside the output value
+   * if any was allocated.
+   * \param[in,out] allocator allocator to use when performing an allocation. Allocation will be performed
+   *   by FillSparseTensor<Format>() APIs. The lifespan of the allocator instance must eclipse the lifespan
+   *   this sparse tensor instance as the same allocator will be used to free memory.
+   * \param[in] dense_shape shape of the original dense tensor
+   * \param[in] dense_shape_len number of shape dimensions being passed
+   * \param[in] type must be one of TENSOR_ELEMENT_DATA_TYPE_xxxx
+   * \param[out] out Should be freed by calling ReleaseValue
+   * \return OrtStatus*
+   */
+  ORT_API2_STATUS(CreateSparseTensorAsOrtValue, _Inout_ OrtAllocator* allocator, _In_ const int64_t* dense_shape,
+                  size_t dense_shape_len, ONNXTensorElementDataType type, _Outptr_ OrtValue** out);
+
+  /**
+   * This API fills populates an empty tensor that was created using CreateSparseTensorAsOrtValue API.
+   * The API will allocate required memory and copy the supplied NNZ values and COO indices into that memory allocation.
+   * Memory allocation is performed using the allocator that was specified with CreateSparseTensorAsOrtValue.
+   * 
+   * \param[in,out] ort_value OrtValue to populate with data
+   * \param[in] mem_info serves to identify the location of the data to be copied. If the allocator specified 
+   *  at the creation time has memory info that is not the same as mem_info argument to this function a X-device copy will be performed.
+   *  String data is assumed to be on CPU and will only be copied into a CPU allocated buffer.
+   * \param[in] values_shape pointer to values shape array
+   * \param[in] values_shape_len length of the values_shape
+   * \param[in] values pointer to an array of values. For strings, pass const char**.
+   * \param[in] indices_data pointer to a location of COO indices
+   * \param[in] indices_num number of COO indices
+   */
+  ORT_API2_STATUS(FillSparseTensorCoo, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                  _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                  _In_ const int64_t* indices_data, size_t indices_num);
+
+  /**
+   * This API fills populates an empty tensor that was created using CreateSparseTensorAsOrtValue API.
+   * The API will allocate required memory and copy the supplied NNZ values and CSR indices into that memory allocation.
+   * Memory allocation is performed using the allocator that was specified with CreateSparseTensorAsOrtValue.
+   * 
+   * \param[in,out] ort_value OrtValue to populate with data
+   * \param[in] mem_info serves to identify the location of the data to be copied. If the allocator specified 
+   *  at the creation time has memory info that is not the same as mem_info argument to this function a X-device copy will be performed.
+   *  String data is assumed to be on CPU and will only be copied into a CPU allocated buffer.
+   * \param[in] values_shape pointer to values shape array
+   * \param[in] values_shape_len length of the values_shape
+   * \param[in] values - pointer to an array of values. For strings, pass const char**.
+   * \param[in] inner_indices_data pointer to a location of CSR inner indices
+   * \param[in] inner_indices_num number of CSR inner indices
+   * \param[in] outer_indices_data pointer to a location of CSR outer indices
+   * \param[in] outer_indices_num number of CSR outer indices
+   */
+  ORT_API2_STATUS(FillSparseTensorCsr, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                  _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                  _In_ const int64_t* inner_indices_data, size_t inner_indices_num,
+                  _In_ const int64_t* outer_indices_data, size_t outer_indices_num);
+
+  /**
+   * This API fills populates an empty tensor that was created using CreateSparseTensorAsOrtValue API.
+   * The API will allocate required memory and copy the supplied NNZ values and BlockSparse indices into that memory allocation.
+   * Memory allocation is performed using the allocator that was specified with CreateSparseTensorAsOrtValue.
+   * 
+   * \param[in,out] ort_value OrtValue to populate with data
+   * \param[in] mem_info serves to identify the location of the data to be copied. If the allocator specified 
+   *  at the creation time has memory info that is not the same as mem_info argument to this function a X-device copy will be performed.
+   *  String data is assumed to be on CPU and will only be copied into a CPU allocated buffer.
+   * \param[in] values structure with values information
+   * \param[in] indices_shape_data pointer to a location of indices shape
+   * \param[in] indices_shape_len length of the block sparse indices shape
+   * \param[in] indices_data pointer to a location of indices data. Shape will determine the length of the indices data.
+   */
+  ORT_API2_STATUS(FillSparseTensorBlockSparse, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                  _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                  _In_ const int64_t* indices_shape_data, size_t indices_shape_len,
+                  _In_ const int32_t* indices_data);
+
+  /**
+   * Create an OrtValue with a sparse tensor. This is the first step.
+   * Next, use Use<Format>Indices() functions to supply sparse tensor with
+   * format specific indices data and set its sparse format to a specific enum value.
+   * This API will not perform memory allocations. It will
+   * use supplied user buffer which should outlive the created sparse tensor.
+   * Use ReleaseValue to destroy the sparse tensor. It would not release the supplied values buffer.
+   * This API can not be used to map strings from the user allocated memory. Strings must always be copied
+   * and have UTF-8 encoding. Therefore, use CreateSparseTensorAsOrtValue() API above and then fill it with data
+   * using appropriate Make*() function.
+   * 
+   * \param[in] info memory info where sparse values reside.
+   * \param[in,out] p_data pointer to a user allocated buffer with values. To create a full sparse tensor with no non-zero
+   *   values, pass nullptr
+   * \param[in] dense_shape shape of the original dense tensor
+   * \param[in] dense_shape_len number of shape dimensions being passed
+   * \param[in] values_shape shape of the values data. To create a fully sparse tensor with no non-zero values,
+   *   pass {0} shape.
+   * \param[in] values_shape_len number of values shape dimensions
+   * \param[in] type must be one of TENSOR_ELEMENT_DATA_TYPE_xxxx
+   * \param[out] out Should be freed by calling ReleaseValue
+   * \return OrtStatus*
+   */
+  ORT_API2_STATUS(CreateSparseTensorWithValuesAsOrtValue, _In_ const OrtMemoryInfo* info, _Inout_ void* p_data,
+                  _In_ const int64_t* dense_shape, size_t dense_shape_len,
+                  _In_ const int64_t* values_shape, size_t values_shape_len,
+                  ONNXTensorElementDataType type, _Outptr_ OrtValue** out);
+
+  /**
+   * The API assigns Coo format indices to the SparseTensor that was created by 
+   * CreateSparseTensorWithValuesAsOrtValue API above. It also sets OrtSparseFormat to 
+   * ORT_SPARSE_COO. The API will not allocate any additional memory for data. The life span of
+   * indices_data buffer should eclipse the life span of this OrtValue.
+   * 
+   * \param[in,out] ort_value OrtValue instance constructed with CreateSparseTensorWithValuesAsOrtValue
+   * \param[in,out] indices_data pointer to a user pre-allocated buffer or nullptr for fully sparse tensors.
+   * \param[in] indices_num  number of COO indices. Should either be 0 for fully sparse tensors, be equal
+   *  to the number of nnz values specified to CreateSparseTensorWithValuesAsOrtValue for 1-D {nnz} indices or
+   *  be twice as number of nnz values for a  2-D indices {nnz, 2}
+   */
+  ORT_API2_STATUS(UseCooIndices, _Inout_ OrtValue* ort_value, _Inout_ int64_t* indices_data, size_t indices_num);
+
+  /**
+   * The API assigns CSR format indices to the SparseTensor that was created by 
+   * CreateSparseTensorWithValuesAsOrtValue API above. It also sets OrtSparseFormat to 
+   * ORT_SPARSE_CSRC. The API will not allocate any additional memory for data. The life spans of
+   * indner_data and outer_data buffers should eclipse the life span of this OrtValue.
+   * 
+   * \param[in,out] ort_value OrtValue instance constructed with CreateSparseTensorWithValuesAsOrtValue
+   * \param[in,out] inner_data pointer to a user pre-allocated buffer or nullptr for fully sparse tensors.
+   * \param[in] inner_num  number of inner CSR indices. Should either be 0 for fully sparse tensors or be equal
+   * to the number of nnz values specified to CreateSparseTensorWithValuesAsOrtValue.
+   * \param[in,out] outer_data pointer to user pre-allocated buffer or nullptr for fully sparse tensors.
+   * \param[in] outer_num number of CSR outer indices. Should either be 0 for fully sparse tensors or
+   * equal to rows + 1 of the dense shape.
+   */
+  ORT_API2_STATUS(UseCsrIndices, _Inout_ OrtValue* ort_value, _Inout_ int64_t* inner_data, size_t inner_num,
+                  _Inout_ int64_t* outer_data, size_t outer_num);
+
+  /**
+   * The API assigns BlockSparse format indices to the SparseTensor that was created by 
+   * CreateSparseTensorWithValuesAsOrtValue API above. It also sets OrtSparseFormat to 
+   * ORT_SPARSE_BLOCK_SPARSE. The API will not allocate any additional memory for data. The life span of
+   * indices_data buffer must eclipse the lifespan of this OrtValue.
+   * 
+   * \param[in,out] ort_value OrtValue instance constructed with CreateSparseTensorWithValuesAsOrtValue
+   * \param[in] indices_shape pointer to indices shape. Use {0} for fully sparse tensors
+   * \param[in] indices_shape_len length of the indices shape
+   * \param[in,out] indices_data pointer to user pre-allocated buffer or nullptr for fully sparse tensors.
+   */
+  ORT_API2_STATUS(UseBlockSparseIndices, _Inout_ OrtValue* ort_value, const int64_t* indices_shape, size_t indices_shape_len, _Inout_ int32_t* indices_data);
+
+  /**
+   * The API returns sparse tensor format enum iff a given ort value contains an instance of sparse tensor.
+   * 
+   * \param[in] ort_value OrtValue that contains an instance of sparse tensor
+   * \param[out] out pointer to out parameter
+   */
+  ORT_API2_STATUS(GetSparseTensorFormat, _In_ const OrtValue* ort_value, _Out_ enum OrtSparseFormat* out);
+
+  /**
+   *  The API Returns data type and shape of sparse tensor values (nnz) iff OrtValue contains a SparseTensor.
+   * 
+   * \param[in] ort_value an OrtValue that contains a fully constructed sparse tensor
+   * \param[out] out Should be freed by ReleaseTensorTypeAndShapeInfo after use
+   */
+  ORT_API2_STATUS(GetSparseTensorValuesTypeAndShape, _In_ const OrtValue* ort_value, _Outptr_ OrtTensorTypeAndShapeInfo** out);
+
+  /**
+   * The API returns numeric data for sparse tensor values (nnz). For string values use GetStringTensor*() API.
+   * 
+   * \param[in] ort_value an instance of OrtValue containing sparse tensor
+   * \param[out] out returns a pointer to values data.  Do not attempt to free this ptr.
+   */
+  ORT_API2_STATUS(GetSparseTensorValues, _In_ const OrtValue* ort_value, _Outptr_ const void** out);
+
+  /**
+   * The API returns data type, shape for the type of indices specified by
+   * indices_format.
+   * 
+   * \param[in] ort_value OrtValue containing sparse tensor.
+   * \param[in] indices_format - one of the indices formats. It is an error to request a format that the sparse
+   * tensor does not contain.
+   * \param[out] an instance of OrtTensorTypeAndShapeInfo. Must be freed by the ReleaseTensorTypeAndShapeInfo.
+   */
+  ORT_API2_STATUS(GetSparseTensorIndicesTypeShape, _In_ const OrtValue* ort_value, enum OrtSparseIndicesFormat indices_format, _Outptr_ OrtTensorTypeAndShapeInfo** out);
+
+  /**
+   * The API returns indices data for the type of the indices specified by indices_format.
+   * Do not free the returned ptr as it points directly to the internal sparse tensor buffer.
+   * 
+   * \param[in] ort_value OrtValue containing sparse tensor.
+   * \param[in] indices_format - one of the indices formats. It is an error to request a format that the sparse
+   * tensor does not contain.
+   * \param[out] num_indices ptr where the number of indices entries is returned
+   * \param[out] indices out param where the pointer to the internal buffer is returned. Do not free this buffer.
+   */
+  ORT_API2_STATUS(GetSparseTensorIndices, _In_ const OrtValue* ort_value, enum OrtSparseIndicesFormat indices_format, _Out_ size_t* num_indices, _Outptr_ const void** indices);
 };
 
 /*
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 63f2202357..0ae27590c6 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -420,14 +420,208 @@ struct TypeInfo : Base<OrtTypeInfo> {
 };
 
 struct Value : Base<OrtValue> {
+  // This structure is used to feed  sparse tensor values
+  // information for use with FillSparseTensor<Format>() API
+  // if the data type for the sparse tensor values is numeric
+  // use data.p_data, otherwise, use data.str pointer to feed
+  // values. data.str is an array of const char* that are zero terminated.
+  // number of strings in the array must match shape size.
+  // For fully sparse tensors use shape {0} and set p_data/str
+  // to nullptr.
+  struct OrtSparseValuesParam {
+    const int64_t* values_shape;
+    size_t values_shape_len;
+    union {
+      const void* p_data;
+      const char** str;
+    } data;
+  };
+
+  // Provides a way to pass shape in a single
+  // argument
+  struct Shape {
+    const int64_t* shape;
+    size_t shape_len;
+  };
+
   template <typename T>
   static Value CreateTensor(const OrtMemoryInfo* info, T* p_data, size_t p_data_element_count, const int64_t* shape, size_t shape_len);
   static Value CreateTensor(const OrtMemoryInfo* info, void* p_data, size_t p_data_byte_count, const int64_t* shape, size_t shape_len,
                             ONNXTensorElementDataType type);
+
+
+  /// <summary>
+  /// This is a simple forwarding method to the other overload that helps deducing
+  /// data type enum value from the type of the buffer.
+  /// </summary>
+  /// <typeparam name="T">numeric datatype. This API is not suitable for strings.</typeparam>
+  /// <param name="info">Memory description where the user buffers reside (CPU vs GPU etc)</param>
+  /// <param name="p_data">pointer to the user supplied buffer, use nullptr for fully sparse tensors</param>
+  /// <param name="dense_shape">a would be dense shape of the tensor</param>
+  /// <param name="values_shape">non zero values shape. Use a single 0 shape for fully sparse tensors.</param>
+  /// <returns></returns>
+  template <typename T>
+  static Value CreateSparseTensor(const OrtMemoryInfo* info, T* p_data, const Shape& dense_shape,
+                                  const Shape& values_shape);
+
+  /// <summary>
+  /// Creates an OrtValue instance containing SparseTensor. This constructs
+  /// a sparse tensor that makes use of user allocated buffers. It does not make copies
+  /// of the user provided data and does not modify it. The lifespan of user provided buffers should
+  /// eclipse the life span of the resulting OrtValue. This call constructs an instance that only contain
+  /// a pointer to non-zero values. To fully populate the sparse tensor call Use<Format>Indices() API below
+  /// to supply a sparse format specific indices.
+  /// This API is not suitable for string data. Use CreateSparseTensor() with allocator specified so strings
+  /// can be properly copied into the allocated buffer.
+  /// </summary>
+  /// <param name="info">Memory description where the user buffers reside (CPU vs GPU etc)</param>
+  /// <param name="p_data">pointer to the user supplied buffer, use nullptr for fully sparse tensors</param>
+  /// <param name="dense_shape">a would be dense shape of the tensor</param>
+  /// <param name="values_shape">non zero values shape. Use a single 0 shape for fully sparse tensors.</param>
+  /// <param name="type">data type</param>
+  /// <returns>Ort::Value instance containing SparseTensor</returns>
+  static Value CreateSparseTensor(const OrtMemoryInfo* info, void* p_data, const Shape& dense_shape,
+                                  const Shape& values_shape, ONNXTensorElementDataType type);
+
+  /// <summary>
+  /// Supplies COO format specific indices and marks the contained sparse tensor as being a COO format tensor.
+  /// Values are supplied with a CreateSparseTensor() API. The supplied indices are not copied and the user
+  /// allocated buffers lifespan must eclipse that of the OrtValue.
+  /// The location of the indices is assumed to be the same as specified by OrtMemoryInfo argument at the creation time.
+  /// </summary>
+  /// <param name="indices_data">pointer to the user allocated buffer with indices. Use nullptr for fully sparse tensors.</param>
+  /// <param name="indices_num">number of indices entries. Use 0 for fully sparse tensors</param>
+  void UseCooIndices(int64_t* indices_data, size_t indices_num);
+
+  /// <summary>
+  /// Supplies CSR format specific indices and marks the contained sparse tensor as being a CSR format tensor.
+  /// Values are supplied with a CreateSparseTensor() API. The supplied indices are not copied and the user
+  /// allocated buffers lifespan must eclipse that of the OrtValue.
+  /// The location of the indices is assumed to be the same as specified by OrtMemoryInfo argument at the creation time.
+  /// </summary>
+  /// <param name="inner_data">pointer to the user allocated buffer with inner indices or nullptr for fully sparse tensors</param>
+  /// <param name="inner_num">number of csr inner indices or 0 for fully sparse tensors</param>
+  /// <param name="outer_data">pointer to the user allocated buffer with outer indices or nullptr for fully sparse tensors</param>
+  /// <param name="outer_num">number of csr outer indices or 0 for fully sparse tensors</param>
+  void UseCsrIndices(int64_t* inner_data, size_t inner_num, int64_t* outer_data, size_t outer_num);
+
+  /// <summary>
+  /// Supplies BlockSparse format specific indices and marks the contained sparse tensor as being a BlockSparse format tensor.
+  /// Values are supplied with a CreateSparseTensor() API. The supplied indices are not copied and the user
+  /// allocated buffers lifespan must eclipse that of the OrtValue.
+  /// The location of the indices is assumed to be the same as specified by OrtMemoryInfo argument at the creation time.
+  /// </summary>
+  /// <param name="indices_shape">indices shape or a {0} for fully sparse</param>
+  /// <param name="indices_data">user allocated buffer with indices or nullptr for fully spare tensors</param>
+  void UseBlockSparseIndices(const Shape& indices_shape, int32_t* indices_data);
+
   template <typename T>
   static Value CreateTensor(OrtAllocator* allocator, const int64_t* shape, size_t shape_len);
   static Value CreateTensor(OrtAllocator* allocator, const int64_t* shape, size_t shape_len, ONNXTensorElementDataType type);
 
+  /// <summary>
+  /// This is a simple forwarding method the below CreateSparseTensor.
+  /// This helps to specify data type enum in terms of C++ data type.
+  /// Use CreateSparseTensor<T>
+  /// </summary>
+  /// <typeparam name="T">numeric data type only. String data enum must be specified explicitly.</typeparam>
+  /// <param name="allocator">allocator to use</param>
+  /// <param name="dense_shape">a would be dense shape of the tensor</param>
+  /// <returns>Ort::Value</returns>
+  template <typename T>
+  static Value CreateSparseTensor(OrtAllocator* allocator, const Shape& dense_shape);
+
+  /// <summary>
+  /// Creates an instance of OrtValue containing sparse tensor. The created instance has no data.
+  /// The data must be supplied by on of the FillSparseTensor<Format>() methods that take both non-zero values
+  /// and indices. The data will be copied into a buffer that would be allocated using the supplied allocator.
+  /// Use this API to create OrtValues that contain sparse tensors with all supported data types including
+  /// strings.
+  /// </summary>
+  /// <param name="allocator">allocator to use. The allocator lifespan must eclipse that of the resulting OrtValue</param>
+  /// <param name="dense_shape">a would be dense shape of the tensor</param>
+  /// <param name="type">data type</param>
+  /// <returns>an instance of Ort::Value</returns>
+  static Value CreateSparseTensor(OrtAllocator* allocator, const Shape& dense_shape, ONNXTensorElementDataType type);
+
+  /// <summary>
+  /// The API will allocate memory using the allocator instance supplied to the CreateSparseTensor() API
+  /// and copy the values and COO indices into it. If data_mem_info specifies that the data is located
+  /// at difference device than the allocator, a X-device copy will be performed if possible.
+  /// </summary>
+  /// <param name="data_mem_info">specified buffer memory description</param>
+  /// <param name="values_param">values buffer information.</param>
+  /// <param name="indices_data">coo indices buffer or nullptr for fully sparse data</param>
+  /// <param name="indices_num">number of COO indices or 0 for fully sparse data</param>
+  void FillSparseTensorCoo(const OrtMemoryInfo* data_mem_info, const OrtSparseValuesParam& values_param,
+                           const int64_t* indices_data, size_t indices_num);
+
+  /// <summary>
+  /// The API will allocate memory using the allocator instance supplied to the CreateSparseTensor() API
+  /// and copy the values and CSR indices into it. If data_mem_info specifies that the data is located
+  /// at difference device than the allocator, a X-device copy will be performed if possible.
+  /// </summary>
+  /// <param name="data_mem_info">specified buffer memory description</param>
+  /// <param name="values_param">values buffer information</param>
+  /// <param name="inner_indices_data">csr inner indices pointer or nullptr for fully sparse tensors</param>
+  /// <param name="inner_indices_num">number of csr inner indices or 0 for fully sparse tensors</param>
+  /// <param name="outer_indices_data">pointer to csr indices data or nullptr for fully sparse tensors</param>
+  /// <param name="outer_indices_num">number of csr outer indices or 0</param>
+  void FillSparseTensorCsr(const OrtMemoryInfo* data_mem_info,
+                           const OrtSparseValuesParam& values,
+                           const int64_t* inner_indices_data, size_t inner_indices_num,
+                           const int64_t* outer_indices_data, size_t outer_indices_num);
+
+  /// <summary>
+  /// The API will allocate memory using the allocator instance supplied to the CreateSparseTensor() API
+  /// and copy the values and BlockSparse indices into it. If data_mem_info specifies that the data is located
+  /// at difference device than the allocator, a X-device copy will be performed if possible.
+  /// </summary>
+  /// <param name="data_mem_info">specified buffer memory description</param>
+  /// <param name="values_param">values buffer information</param>
+  /// <param name="indices_shape">indices shape. use {0} for fully sparse tensors</param>
+  /// <param name="indices_data">pointer to indices data or nullptr for fully sparse tensors</param>
+  void FillSparseTensorBlockSparse(const OrtMemoryInfo* data_mem_info,
+                                   const OrtSparseValuesParam& values,
+                                   const Shape& indices_shape,
+                                   const int32_t* indices_data);
+
+  /// <summary>
+  /// The API returns the sparse data format this OrtValue holds in a sparse tensor.
+  /// If the sparse tensor was not fully constructed, i.e. Use*() or Fill*() API were not used
+  /// the value returned is ORT_SPARSE_UNDEFINED.
+  /// </summary>
+  /// <returns>Format enum</returns>
+  OrtSparseFormat GetSparseFormat() const;
+
+  /// <summary>
+  /// The API returns type and shape information for stored non-zero values of the
+  /// sparse tensor. Use GetSparseTensorValues() to obtain values buffer pointer.
+  /// </summary>
+  /// <returns>TensorTypeAndShapeInfo values information</returns>
+  TensorTypeAndShapeInfo GetSparseTensorValuesTypeAndShapeInfo() const;
+
+  /// <summary>
+  /// The API returns type and shape information for the specified indices. Each supported
+  /// indices have their own enum values even if a give format has more than one kind of indices.
+  /// Use GetSparseTensorIndicesData() to obtain pointer to indices buffer.
+  /// </summary>
+  /// <param name="">enum requested</param>
+  /// <returns>type and shape information</returns>
+  TensorTypeAndShapeInfo GetSparseTensorIndicesTypeShapeInfo(OrtSparseIndicesFormat) const;
+
+  /// <summary>
+  /// The API retrieves a pointer to the internal indices buffer. The API merely performs
+  /// a convenience data type casting on the return type pointer. Make sure you are requesting
+  /// the right type, use GetSparseTensorIndicesTypeShapeInfo();
+  /// </summary>
+  /// <typeparam name="T">type to cast to</typeparam>
+  /// <param name="indices_format">requested indices kind</param>
+  /// <param name="num_indices">number of indices entries</param>
+  /// <returns>Pinter to the internal sparse tensor buffer containing indices. Do not free this pointer.</returns>
+  template <typename T>
+  const T* GetSparseTensorIndicesData(OrtSparseIndicesFormat indices_format, size_t& num_indices) const;
+
   static Value CreateMap(Value& keys, Value& values);
   static Value CreateSequence(std::vector<Value>& values);
 
@@ -443,10 +637,38 @@ struct Value : Base<OrtValue> {
   Value& operator=(Value&&) = default;
 
   bool IsTensor() const;
+
+  /// <summary>
+  /// Returns true if the OrtValue contains a sparse tensor
+  /// </summary>
+  /// <returns></returns>
+  bool IsSparseTensor() const;
+
   size_t GetCount() const;  // If a non tensor, returns 2 for map and N for sequence, where N is the number of elements
   Value GetValue(int index, OrtAllocator* allocator) const;
 
+  /// <summary>
+  /// This API returns a full length of string data contained within either a tensor or a sparse Tensor.
+  /// For sparse tensor it returns a full length of stored non-empty strings (values). The API is useful
+  /// for allocating necessary memory and calling GetStringTensorContent().
+  /// </summary>
+  /// <returns>total length of UTF-8 encoded bytes contained. No zero terminators counted.</returns>
   size_t GetStringTensorDataLength() const;
+
+  /// <summary>
+  /// The API copies all of the UTF-8 encoded string data contained within a tensor or a sparse tensor
+  /// into a supplied buffer. Use GetStringTensorDataLength() to find out the length of the buffer to allocate.
+  /// The user must also allocate offsets buffer with the number of entries equal to that of the contained
+  /// strings.
+  /// 
+  /// Strings are always assumed to be on CPU, no X-device copy.
+  /// </summary>
+  /// <param name="buffer">user allocated buffer</param>
+  /// <param name="buffer_length">length in bytes of the allocated buffer</param>
+  /// <param name="offsets">a pointer to the offsets user allocated buffer</param>
+  /// <param name="offsets_count">count of offsets, must be equal to the number of strings contained.
+  ///   that can be obtained from the shape of the tensor or from GetSparseTensorValuesTypeAndShapeInfo()
+  ///   for sparse tensors</param>
   void GetStringTensorContent(void* buffer, size_t buffer_length, size_t* offsets, size_t offsets_count) const;
 
   template <typename T>
@@ -455,13 +677,52 @@ struct Value : Base<OrtValue> {
   template <typename T>
   const T* GetTensorData() const;
 
+  /// <summary>
+  /// The API returns a pointer to an internal buffer of the sparse tensor
+  /// containing non-zero values. The API merely does casting. Make sure you
+  /// are requesting the right data type by calling GetSparseTensorValuesTypeAndShapeInfo()
+  /// first.
+  /// </summary>
+  /// <typeparam name="T">numeric data types only. Use GetStringTensor*() to retrieve strings.</typeparam>
+  /// <returns>a pointer to the internal values buffer. Do not free this pointer.</returns>
+  template <typename T>
+  const T* GetSparseTensorValues() const;
+
   template <typename T>
   T& At(const std::vector<int64_t>& location);
 
+  /// <summary>
+  /// The API returns type information for data contained in a tensor. For sparse
+  /// tensors it returns type information for contained non-zero values.
+  /// It returns dense shape for sparse tensors.
+  /// </summary>
+  /// <returns>TypeInfo</returns>
   TypeInfo GetTypeInfo() const;
+
+  /// <summary>
+  /// The API returns type information for data contained in a tensor. For sparse
+  /// tensors it returns type information for contained non-zero values.
+  /// It returns dense shape for sparse tensors.
+  /// </summary>
+  /// <returns>TensorTypeAndShapeInfo</returns>
   TensorTypeAndShapeInfo GetTensorTypeAndShapeInfo() const;
 
+  /// <summary>
+  /// The API returns a byte length of UTF-8 encoded string element
+  /// contained in either a tensor or a spare tensor values.
+  /// </summary>
+  /// <param name="element_index"></param>
+  /// <returns>byte length for the specified string element</returns>
   size_t GetStringTensorElementLength(size_t element_index) const;
+
+  /// <summary>
+  /// The API copies UTF-8 encoded bytes for the requested string element
+  /// contained within a tensor or a sparse tensor into a provided buffer.
+  /// Use GetStringTensorElementLength() to obtain the length of the buffer to allocate.
+  /// </summary>
+  /// <param name="buffer_length"></param>
+  /// <param name="element_index"></param>
+  /// <param name="buffer"></param>
   void GetStringTensorElement(size_t buffer_length, size_t element_index, void* buffer) const;
 
   void FillStringTensor(const char* const* s, size_t s_len);
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index a4596fc205..684c8fbaa4 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -755,6 +755,82 @@ inline Value Value::CreateTensor(const OrtMemoryInfo* info, void* p_data, size_t
   return Value{out};
 }
 
+template <typename T>
+inline Value Value::CreateSparseTensor(const OrtMemoryInfo* info, T* p_data, const Shape& dense_shape,
+                                       const Shape& values_shape) {
+  return CreateSparseTensor(info, p_data, dense_shape, values_shape, TypeToTensorType<T>::type);
+}
+
+inline Value Value::CreateSparseTensor(const OrtMemoryInfo* info, void* p_data, const Shape& dense_shape,
+                                       const Shape& values_shape, ONNXTensorElementDataType type) {
+  OrtValue* out;
+  ThrowOnError(GetApi().CreateSparseTensorWithValuesAsOrtValue(info, p_data, dense_shape.shape, dense_shape.shape_len,
+                                                               values_shape.shape, values_shape.shape_len, type, &out));
+  return Value{out};
+}
+
+inline void Value::FillSparseTensorCoo(const OrtMemoryInfo* mem_info, const OrtSparseValuesParam& values_param,
+                                       const int64_t* indices_data, size_t indices_num) {
+  ThrowOnError(GetApi().FillSparseTensorCoo(p_, mem_info, values_param.values_shape,
+                                            values_param.values_shape_len, values_param.data.p_data,
+                                            indices_data, indices_num));
+}
+
+inline void Value::FillSparseTensorCsr(const OrtMemoryInfo* data_mem_info,
+                                       const OrtSparseValuesParam& values,
+                                       const int64_t* inner_indices_data, size_t inner_indices_num,
+                                       const int64_t* outer_indices_data, size_t outer_indices_num) {
+  ThrowOnError(GetApi().FillSparseTensorCsr(p_, data_mem_info, values.values_shape, values.values_shape_len, values.data.p_data,
+                                            inner_indices_data, inner_indices_num,
+                                            outer_indices_data, outer_indices_num));
+}
+
+inline void Value::FillSparseTensorBlockSparse(const OrtMemoryInfo* data_mem_info,
+                                               const OrtSparseValuesParam& values,
+                                               const Shape& indices_shape,
+                                               const int32_t* indices_data) {
+  ThrowOnError(GetApi().FillSparseTensorBlockSparse(p_, data_mem_info, values.values_shape, values.values_shape_len, values.data.p_data,
+                                                    indices_shape.shape, indices_shape.shape_len,
+                                                    indices_data));
+}
+
+inline void Value::UseCooIndices(int64_t* indices_data, size_t indices_num) {
+  ThrowOnError(GetApi().UseCooIndices(p_, indices_data, indices_num));
+}
+
+inline void Value::UseCsrIndices(int64_t* inner_data, size_t inner_num, int64_t* outer_data, size_t outer_num) {
+  ThrowOnError(GetApi().UseCsrIndices(p_, inner_data, inner_num, outer_data, outer_num));
+}
+
+inline void Value::UseBlockSparseIndices(const Shape& indices_shape, int32_t* indices_data) {
+  ThrowOnError(GetApi().UseBlockSparseIndices(p_, indices_shape.shape, indices_shape.shape_len, indices_data));
+}
+
+inline OrtSparseFormat Value::GetSparseFormat() const {
+  OrtSparseFormat format;
+  ThrowOnError(GetApi().GetSparseTensorFormat(p_, &format));
+  return format;
+}
+
+inline TensorTypeAndShapeInfo Value::GetSparseTensorValuesTypeAndShapeInfo() const {
+  OrtTensorTypeAndShapeInfo* output;
+  ThrowOnError(GetApi().GetSparseTensorValuesTypeAndShape(p_, &output));
+  return TensorTypeAndShapeInfo{output};
+}
+
+inline TensorTypeAndShapeInfo Value::GetSparseTensorIndicesTypeShapeInfo(OrtSparseIndicesFormat indices_format) const {
+  OrtTensorTypeAndShapeInfo* output;
+  ThrowOnError(GetApi().GetSparseTensorIndicesTypeShape(p_, indices_format, &output));
+  return TensorTypeAndShapeInfo{output};
+}
+
+template <typename T>
+inline const T* Value::GetSparseTensorIndicesData(OrtSparseIndicesFormat indices_format, size_t& num_indices) const {
+  const void* out;
+  ThrowOnError(GetApi().GetSparseTensorIndices(p_, indices_format, &num_indices, &out));
+  return reinterpret_cast<const T*>(out);
+}
+
 template <typename T>
 inline Value Value::CreateTensor(OrtAllocator* allocator, const int64_t* shape, size_t shape_len) {
   return CreateTensor(allocator, shape, shape_len, TypeToTensorType<T>::type);
@@ -766,6 +842,18 @@ inline Value Value::CreateTensor(OrtAllocator* allocator, const int64_t* shape,
   return Value{out};
 }
 
+template <typename T>
+inline Value Value::CreateSparseTensor(OrtAllocator* allocator, const Shape& dense_shape) {
+  return CreateSparseTensor(allocator, dense_shape, TypeToTensorType<T>::type);
+}
+
+inline Value Value::CreateSparseTensor(OrtAllocator* allocator, const Shape& dense_shape,
+                                       ONNXTensorElementDataType type) {
+  OrtValue* out;
+  ThrowOnError(GetApi().CreateSparseTensorAsOrtValue(allocator, dense_shape.shape, dense_shape.shape_len, type, &out));
+  return Value{out};
+}
+
 inline Value Value::CreateMap(Value& keys, Value& values) {
   OrtValue* out;
   OrtValue* inputs[2] = {keys, values};
@@ -798,6 +886,12 @@ inline bool Value::IsTensor() const {
   return out != 0;
 }
 
+inline bool Value::IsSparseTensor() const {
+  int out;
+  ThrowOnError(GetApi().IsSparseTensor(p_, &out));
+  return out != 0;
+}
+
 inline size_t Value::GetCount() const {
   size_t out;
   ThrowOnError(GetApi().GetValueCount(p_, &out));
@@ -852,6 +946,13 @@ const T* Value::GetTensorData() const {
   return out;
 }
 
+template <typename T>
+inline const T* Value::GetSparseTensorValues() const {
+  const void* out;
+  ThrowOnError(GetApi().GetSparseTensorValues(p_, &out));
+  return reinterpret_cast<const T*>(out);
+}
+
 template <typename T>
 inline T& Value::At(const std::vector<int64_t>& location) {
   static_assert(!std::is_same<T, std::string>::value, "this api does not support std::string");
diff --git a/onnxruntime/core/framework/execution_frame.cc b/onnxruntime/core/framework/execution_frame.cc
index caaa78f63f..7a0798782a 100644
--- a/onnxruntime/core/framework/execution_frame.cc
+++ b/onnxruntime/core/framework/execution_frame.cc
@@ -618,9 +618,7 @@ static Status AllocateSparseTensor(OrtValue& mlvalue, const DataTypeImpl& ml_typ
                                    const TensorShape& shape, bool create_fence,
                                    const SessionState& session_state) {
   auto element_type = ml_type.AsSparseTensorType()->GetElementType();
-  auto sparse = std::make_unique<SparseTensor>(element_type, shape, allocator);
-  auto deleter = DataTypeImpl::GetType<SparseTensor>()->GetDeleteFunc();
-  mlvalue.Init(sparse.release(), DataTypeImpl::GetType<SparseTensor>(), deleter);
+  SparseTensor::InitOrtValue(element_type, shape, std::move(allocator), mlvalue);
 
   // create fence if needed
   if (create_fence) {
diff --git a/onnxruntime/core/framework/onnxruntime_typeinfo.cc b/onnxruntime/core/framework/onnxruntime_typeinfo.cc
index 9b2e5db7e8..bcfc36d131 100644
--- a/onnxruntime/core/framework/onnxruntime_typeinfo.cc
+++ b/onnxruntime/core/framework/onnxruntime_typeinfo.cc
@@ -56,7 +56,7 @@ ORT_API_STATUS_IMPL(OrtApis::GetOnnxTypeFromTypeInfo, _In_ const struct OrtTypeI
 
 ORT_API_STATUS_IMPL(OrtApis::CastTypeInfoToTensorInfo, _In_ const struct OrtTypeInfo* input,
                     _Outptr_result_maybenull_ const struct OrtTensorTypeAndShapeInfo** out) {
-  *out = input->type == ONNX_TYPE_TENSOR ? input->data : nullptr;
+  *out = (input->type == ONNX_TYPE_TENSOR || input->type == ONNX_TYPE_SPARSETENSOR) ? input->data : nullptr;
   return nullptr;
 }
 
diff --git a/onnxruntime/core/framework/sparse_tensor.cc b/onnxruntime/core/framework/sparse_tensor.cc
index 640b62b168..817ee8b7af 100644
--- a/onnxruntime/core/framework/sparse_tensor.cc
+++ b/onnxruntime/core/framework/sparse_tensor.cc
@@ -4,6 +4,7 @@
 #include "core/framework/data_types.h"
 #include "core/framework/sparse_tensor.h"
 #include "core/framework/data_transfer_manager.h"
+#include "core/framework/ort_value.h"
 #include "core/framework/utils.h"
 
 #include <safeint/SafeInt.hpp>
@@ -42,13 +43,13 @@ inline std::vector<std::reference_wrapper<const Tensor>> MakeListConst(const T&.
   return std::vector{std::cref(t)...};
 }
 
-void CopyStrings(const Tensor& src, Tensor& dst) {
-  auto src_span = src.DataAsSpan<std::string>();
-  auto* dst_iter = dst.MutableData<std::string>();
-  std::copy(src_span.cbegin(), src_span.cend(), dst_iter);
+void CopyStrings(const Tensor& src_t, Tensor& dst_t) {
+  auto src_span = src_t.DataAsSpan<std::string>();
+  std::string* dst = dst_t.MutableData<std::string>();
+  std::copy(src_span.cbegin(), src_span.cend(), dst);
 }
 
-Status CopyData(const IDataTransfer& data_transfer,
+Status CopyData(const IDataTransfer* data_transfer,
                 const std::vector<std::reference_wrapper<const Tensor>>& src,
                 const std::vector<std::reference_wrapper<Tensor>>& dst) {
   ORT_RETURN_IF_NOT(src.size() == dst.size(), "Must have the same size. Got src_size: ",
@@ -59,12 +60,26 @@ Status CopyData(const IDataTransfer& data_transfer,
     if (src_t.IsDataTypeString()) {
       CopyStrings(src_t, dst_t);
     } else {
-      ORT_RETURN_IF_ERROR(data_transfer.CopyTensor(src_t, dst_t));
+      if (data_transfer != nullptr) {
+        ORT_RETURN_IF_ERROR(data_transfer->CopyTensor(src_t, dst_t));
+      } else {
+        memcpy(dst_t.MutableDataRaw(), src_t.DataRaw(), src_t.SizeInBytes());
+      }
     }
   }
   return Status::OK();
 }
 
+Status CopyStringsAndIndices(size_t string_count, const char* const strings[], Tensor& values,
+                             const std::vector<std::reference_wrapper<const Tensor>>& src_ind,
+                             const std::vector<std::reference_wrapper<Tensor>>& dst_ind) {
+  auto* str_dest = values.MutableData<std::string>();
+  for (size_t i = 0; i < string_count; ++i) {
+    str_dest[i] = strings[i];
+  }
+
+  return CopyData(nullptr, src_ind, dst_ind);
+}
 }  // namespace
 
 const void* SparseTensor::IndicesStart(int64_t values_bytes) const {
@@ -149,12 +164,58 @@ SparseTensor::~SparseTensor() {
   ReleaseBuffer();
 }
 
+void SparseTensor::InitOrtValue(MLDataType elt_type,
+                                const TensorShape& dense_shape,
+                                const TensorShape& values_shape,
+                                void* values_data,
+                                const OrtMemoryInfo& location,
+                                OrtValue& ort_value) {
+  auto sparse_tensor = std::make_unique<SparseTensor>(elt_type, dense_shape, values_shape, values_data, location);
+  auto ml_tensor = DataTypeImpl::GetType<SparseTensor>();
+  ort_value.Init(sparse_tensor.release(),
+                 ml_tensor,
+                 ml_tensor->GetDeleteFunc());
+}
+
+void SparseTensor::InitOrtValue(MLDataType elt_type,
+                                const TensorShape& dense_shape,
+                                std::shared_ptr<IAllocator> allocator,
+                                OrtValue& ort_value) {
+  auto sparse_tensor = std::make_unique<SparseTensor>(elt_type, dense_shape, std::move(allocator));
+  auto ml_tensor = DataTypeImpl::GetType<SparseTensor>();
+  ort_value.Init(sparse_tensor.release(),
+                 ml_tensor,
+                 ml_tensor->GetDeleteFunc());
+}
+
+const SparseTensor& SparseTensor::GetSparseTensorFromOrtValue(const OrtValue& v) {
+  if (!v.IsAllocated()) {
+    ORT_THROW("the ort_value must contain a constructed sparse tensor");
+  }
+  const auto& sparse_tensor = v.Get<onnxruntime::SparseTensor>();
+  if (sparse_tensor.Format() == onnxruntime::SparseFormat::kUndefined) {
+    ORT_THROW("Sparse Tensor does not contain sparse data");
+  }
+  return sparse_tensor;
+}
+
+SparseTensor& SparseTensor::GetSparseTensorFromOrtValue(OrtValue& v) {
+  if (!v.IsAllocated()) {
+    ORT_THROW("the ort_value must contain a constructed sparse tensor");
+  }
+  auto& sparse_tensor = *v.GetMutable<SparseTensor>();
+  if (sparse_tensor.Format() != SparseFormat::kUndefined) {
+    ORT_THROW("this tensor already has populated sparse_indices");
+  }
+  return sparse_tensor;
+}
+
 Status SparseTensor::AllocateBuffer(int64_t buffer_size, size_t num_values) {
   if (buffer_size > 0) {
     SafeInt<size_t> buffer_size_t(buffer_size);
     const auto values_bytes = SafeInt<size_t>(num_values) * ml_data_type_->Size();
     ORT_RETURN_IF_NOT(buffer_size_t > values_bytes,
-                "Values size ", static_cast<size_t>(values_bytes), " must be less than total buffer size: ", buffer_size);
+                      "Values size ", static_cast<size_t>(values_bytes), " must be less than total buffer size: ", buffer_size);
     auto data_ptr = IAllocator::MakeUniquePtr<void>(allocator_, buffer_size_t);
     ORT_RETURN_IF(data_ptr == nullptr, "SparseTensor Allocation failed for size: ", buffer_size);
     if (IsDataTypeString()) {
@@ -206,6 +267,7 @@ void SparseTensor::InitCooIndex(const TensorShape& index_shape, int64_t* index_d
 }
 
 Status SparseTensor::UseCooIndices(gsl::span<int64_t> indices) {
+  ORT_RETURN_IF_NOT(Format() == SparseFormat::kUndefined, "Sparse format must not be set. Already contains format: ", Format());
   ORT_RETURN_IF_NOT(allocator_ == nullptr, "Not expecting an allocator set");
   TensorShape index_shape(GetCooIndexDims(NumValues(), indices.size()));
   InitCooIndex(index_shape, indices.data());
@@ -216,6 +278,7 @@ Status SparseTensor::MakeCooData(const IDataTransfer& data_transfer,
                                  const OrtMemoryInfo& data_location,
                                  size_t values_count, const void* values_data,
                                  gsl::span<const int64_t> indices) {
+  ORT_RETURN_IF(IsDataTypeString(), "Use MakeCooStrings");
   auto mutator = MakeCooData(values_count, indices.size());
   if (values_count > 0) {
     auto& dst_values = mutator.Values();
@@ -223,12 +286,26 @@ Status SparseTensor::MakeCooData(const IDataTransfer& data_transfer,
 
     Tensor src_values(dst_values.DataType(), dst_values.Shape(), const_cast<void*>(values_data), data_location);
     Tensor src_index(dst_index.DataType(), dst_index.Shape(), const_cast<int64_t*>(indices.data()), data_location);
-    ORT_RETURN_IF_ERROR(CopyData(data_transfer, MakeListConst(src_values, src_index), MakeListNonConst(dst_values, dst_index)));
+    ORT_RETURN_IF_ERROR(CopyData(&data_transfer, MakeListConst(src_values, src_index), MakeListNonConst(dst_values, dst_index)));
+  }
+  return Status::OK();
+}
+
+Status SparseTensor::MakeCooStrings(size_t string_count, const char* const* strings,
+                                    gsl::span<const int64_t> indices) {
+  ORT_RETURN_IF_NOT(IsDataTypeString(), "Expecting data type to be set as string");
+  auto mutator = MakeCooData(string_count, indices.size());
+  if (string_count > 0) {
+    auto& dst_values = mutator.Values();
+    auto& dst_indices = mutator.Indices();
+    Tensor src_indices(dst_indices.DataType(), dst_indices.Shape(), const_cast<int64_t*>(indices.data()), Location());
+    ORT_RETURN_IF_ERROR(CopyStringsAndIndices(string_count, strings, dst_values, {std::cref(src_indices)}, {std::ref(dst_indices)}));
   }
   return Status::OK();
 }
 
 SparseTensor::CooMutator SparseTensor::MakeCooData(size_t values_count, size_t index_count) {
+  ORT_ENFORCE(Format() == SparseFormat::kUndefined, "Sparse format must not be set. Already contains format: ", Format());
   ORT_ENFORCE(allocator_ != nullptr, "This method should follow a call to constructor that supplies the allocator");
   const auto num_values = gsl::narrow<int64_t>(values_count);
   TensorShape values_shape{num_values};
@@ -253,11 +330,13 @@ SparseTensor::CsrView SparseTensor::AsCsr() const {
 
 Status SparseTensor::ValidateCsrIndices(size_t values_count, size_t inner_size, size_t outer_size) const {
   ORT_RETURN_IF_NOT(dense_shape_.NumDimensions() == 2U, "dense shape must 2-D. Got: ", dense_shape_.NumDimensions());
+  ORT_RETURN_IF_NOT((inner_size == 0 && outer_size == 0) || (inner_size > 0 && outer_size > 0),
+                    "Inner and Outer indices must either be both zero or non-zero");
   ORT_RETURN_IF_NOT(inner_size == values_count,
-              "Expecting inner index size: ", inner_size, " the same as values size: ", values_count);
+                    "Expecting inner index size: ", inner_size, " the same as values size: ", values_count);
   const auto rows = dense_shape_.GetDims()[0];
   ORT_RETURN_IF_NOT(outer_size == 0 || outer_size == static_cast<size_t>(rows + 1),
-              "Outer index count must be rows + 1 or zero. Got: ", outer_size, " rows: ", rows);
+                    "Outer index count must be rows + 1 or zero. Got: ", outer_size, " rows: ", rows);
   return Status::OK();
 }
 
@@ -274,6 +353,7 @@ void SparseTensor::InitCsrIndices(size_t inner_size, const int64_t* inner, size_
 
 Status SparseTensor::UseCsrIndices(gsl::span<int64_t> inner_index, gsl::span<int64_t> outer_index) {
   ORT_RETURN_IF_NOT(allocator_ == nullptr, "This method does not expect allocator to be set");
+  ORT_RETURN_IF_NOT(Format() == SparseFormat::kUndefined, "Sparse format must not be set. Already contains format: ", Format());
   ORT_RETURN_IF_ERROR(ValidateCsrIndices(NumValues(), inner_index.size(), outer_index.size()));
   InitCsrIndices(inner_index.size(), inner_index.data(), outer_index.size(), outer_index.data());
   return Status::OK();
@@ -282,6 +362,7 @@ Status SparseTensor::UseCsrIndices(gsl::span<int64_t> inner_index, gsl::span<int
 Status SparseTensor::MakeCsrData(const IDataTransfer& data_transfer, const OrtMemoryInfo& data_location,
                                  size_t values_count, const void* values_data,
                                  gsl::span<const int64_t> inner_index, gsl::span<const int64_t> outer_index) {
+  ORT_RETURN_IF(IsDataTypeString(), "Use MakeCsrStrings");
   auto mutator = MakeCsrData(values_count, inner_index.size(), outer_index.size());
   if (values_count > 0) {
     auto& dst_values = mutator.Values();
@@ -291,16 +372,34 @@ Status SparseTensor::MakeCsrData(const IDataTransfer& data_transfer, const OrtMe
     Tensor src_values(dst_values.DataType(), dst_values.Shape(), const_cast<void*>(values_data), data_location);
     Tensor src_inner(dst_inner.DataType(), dst_inner.Shape(), const_cast<int64_t*>(inner_index.data()), data_location);
     Tensor src_outer(dst_outer.DataType(), dst_outer.Shape(), const_cast<int64_t*>(outer_index.data()), data_location);
-    ORT_RETURN_IF_ERROR(CopyData(data_transfer, MakeListConst(src_values, src_inner, src_outer),
+    ORT_RETURN_IF_ERROR(CopyData(&data_transfer, MakeListConst(src_values, src_inner, src_outer),
                                  MakeListNonConst(dst_values, dst_inner, dst_outer)));
   }
   return Status::OK();
 }
 
+Status SparseTensor::MakeCsrStrings(size_t string_count, const char* const* strings,
+                                    gsl::span<const int64_t> inner_index, gsl::span<const int64_t> outer_index) {
+  ORT_RETURN_IF_NOT(IsDataTypeString(), "Expecting data type to be set as string");
+  auto mutator = MakeCsrData(string_count, inner_index.size(), outer_index.size());
+  if (string_count > 0) {
+    auto& dst_values = mutator.Values();
+    auto& dst_inner = mutator.Inner();
+    auto& dst_outer = mutator.Outer();
+    Tensor src_inner(dst_inner.DataType(), dst_inner.Shape(), const_cast<int64_t*>(inner_index.data()), Location());
+    Tensor src_outer(dst_outer.DataType(), dst_outer.Shape(), const_cast<int64_t*>(outer_index.data()), Location());
+    ORT_RETURN_IF_ERROR(CopyStringsAndIndices(string_count, strings, dst_values,
+                                              MakeListConst(src_inner, src_outer),
+                                              MakeListNonConst(dst_inner, dst_outer)));
+  }
+  return Status::OK();
+}
+
 SparseTensor::CsrMutator SparseTensor::MakeCsrData(size_t values_count,
                                                    size_t inner_index_count,
                                                    size_t outer_index_count) {
   ORT_ENFORCE(allocator_ != nullptr, "This method should follow a call to constructor that supplies the allocator");
+  ORT_ENFORCE(Format() == SparseFormat::kUndefined, "Sparse format must not be set. Already contains format: ", Format());
   ORT_THROW_IF_ERROR(ValidateCsrIndices(values_count, inner_index_count, outer_index_count));
 
   if (values_count > 0) {
@@ -326,44 +425,70 @@ SparseTensor::BlockSparseView SparseTensor::AsBlockSparse() const {
 }
 
 Status SparseTensor::ValidateBlockSparseShapes(const TensorShape& values_shape, const TensorShape& indices_shape) const {
-  ORT_RETURN_IF_NOT(values_shape.NumDimensions() >= 3,
-                    "Expecting values dimensions to be at least 3. Got:", values_shape.NumDimensions());
-  ORT_RETURN_IF_NOT(indices_shape.NumDimensions() == 2,
-                    "Expecting index dimensions to be 2. Got: ", indices_shape.NumDimensions());
-  const auto values_blocks = values_shape.SizeFromDimension(2);
-  const auto index_blocks = indices_shape.Size() / 2;  // Two integers per block
-  ORT_RETURN_IF_NOT(values_blocks == index_blocks,
-                    "Expecting index blocks: ", index_blocks, " to be equal to values blocks: ", values_blocks);
+  if (values_shape.Size() > 0) {
+    ORT_RETURN_IF_NOT(values_shape.NumDimensions() >= 3,
+                      "Expecting to have at lest 3-D shape. Got:", values_shape.NumDimensions());
+    ORT_RETURN_IF_NOT(indices_shape.NumDimensions() == 2,
+                      "Expecting indices to have 2-D shape . Got: ", indices_shape.NumDimensions());
+    ORT_RETURN_IF_NOT(indices_shape.GetDims()[0] == 2, "Indices shape must have dim[0] == 2");
+    const auto values_blocks = values_shape.SizeFromDimension(2);
+    const auto index_blocks = indices_shape.Size() / 2;  // Two integers per block
+    ORT_RETURN_IF_NOT(values_blocks == index_blocks,
+                      "Expecting index blocks: ", index_blocks, " to be equal to values blocks: ", values_blocks);
+  } else {
+    ORT_RETURN_IF_NOT(values_shape.GetDims().size() == 1, "Expecting fully sparse tensors to have value shape {0}");
+    ORT_RETURN_IF_NOT(indices_shape.GetDims().size() == 1, "Expecting fully sparse tensors to have indices shape {0}");
+  }
   return Status::OK();
 }
 
-Status SparseTensor::UseBlockSparseIndices(const TensorShape& index_shape, int32_t* indices_data) {
-  ORT_RETURN_IF_NOT(allocator_ == nullptr, "Not expecting an allocator set");
-  ORT_RETURN_IF_ERROR(ValidateBlockSparseShapes(Values().Shape(), index_shape));
-
+void SparseTensor::InitBlockSparseIndices(const TensorShape& indices_shape, int32_t* indices_data) {
   format_data_.resize(1);
-  format_data_[0] = Tensor(DataTypeImpl::GetType<int32_t>(), index_shape,
+  format_data_[0] = Tensor(DataTypeImpl::GetType<int32_t>(), indices_shape,
                            indices_data, Location());
   format_ = SparseFormat::kBlockSparse;
+}
+
+Status SparseTensor::UseBlockSparseIndices(const TensorShape& indices_shape, int32_t* indices_data) {
+  ORT_RETURN_IF_NOT(allocator_ == nullptr, "Not expecting an allocator set");
+  ORT_RETURN_IF_NOT(Format() == SparseFormat::kUndefined, "Sparse format must not be set. Already contains format: ", Format());
+  ORT_RETURN_IF_ERROR(ValidateBlockSparseShapes(Values().Shape(), indices_shape));
+  InitBlockSparseIndices(indices_shape, indices_data);
   return Status::OK();
 }
 
 Status SparseTensor::MakeBlockSparseData(const IDataTransfer& data_transfer, const OrtMemoryInfo& data_location,
                                          const TensorShape& values_shape, const void* values_data,
                                          const TensorShape& indices_shape, const int32_t* indices_data) {
+  ORT_RETURN_IF(IsDataTypeString(), "Use MakeBlockSparseStrings");
   auto mutator = MakeBlockSparseData(values_shape, indices_shape);
   if (values_shape.Size() > 0) {
     auto& dst_values = mutator.Values();
     auto& dst_indices = mutator.Indices();
     Tensor src_values(dst_values.DataType(), dst_values.Shape(), const_cast<void*>(values_data), data_location);
     Tensor src_index(dst_indices.DataType(), dst_indices.Shape(), const_cast<int32_t*>(indices_data), data_location);
-    ORT_RETURN_IF_ERROR(CopyData(data_transfer, MakeListConst(src_values, src_index), MakeListNonConst(dst_values, dst_indices)));
+    ORT_RETURN_IF_ERROR(CopyData(&data_transfer, MakeListConst(src_values, src_index), MakeListNonConst(dst_values, dst_indices)));
+  }
+  return Status::OK();
+}
+
+Status SparseTensor::MakeBlockSparseStrings(const TensorShape& values_shape, const char* const* strings,
+                                            const TensorShape& indices_shape, const int32_t* indices_data) {
+  ORT_RETURN_IF_NOT(IsDataTypeString(), "Expecting data type to be set as string");
+  auto mutator = MakeBlockSparseData(values_shape, indices_shape);
+  auto string_count = gsl::narrow<size_t>(values_shape.Size());
+  if (string_count > 0) {
+    auto& dst_values = mutator.Values();
+    auto& dst_indices = mutator.Indices();
+    Tensor src_indices(dst_indices.DataType(), dst_indices.Shape(), const_cast<int32_t*>(indices_data), Location());
+    ORT_RETURN_IF_ERROR(CopyStringsAndIndices(string_count, strings, dst_values, {std::cref(src_indices)}, {std::ref(dst_indices)}));
   }
   return Status::OK();
 }
 
 SparseTensor::BlockSparseMutator SparseTensor::MakeBlockSparseData(const TensorShape& values_shape, const TensorShape& indices_shape) {
   ORT_ENFORCE(allocator_ != nullptr, "This method should follow a call to constructor that supplies the allocator");
+  ORT_ENFORCE(Format() == SparseFormat::kUndefined, "Sparse format must not be set. Already contains format: ", Format());
   ORT_THROW_IF_ERROR(ValidateBlockSparseShapes(values_shape, indices_shape));
   if (values_shape.Size() > 0) {
     const auto data_size = SafeInt<int64_t>(values_shape.Size()) * ml_data_type_->Size();
@@ -372,10 +497,9 @@ SparseTensor::BlockSparseMutator SparseTensor::MakeBlockSparseData(const TensorS
                                                                   gsl::narrow<int64_t>(index_size));
     ORT_THROW_IF_ERROR(AllocateBuffer(required_buffer_size, static_cast<size_t>(data_size / ml_data_type_->Size())));
   }
+
   values_ = Tensor(DataType(), values_shape, p_data_, Location());
-  format_data_.resize(1);
-  format_data_[0] = Tensor(DataTypeImpl::GetType<int32_t>(), indices_shape, IndicesStart(values_.SizeInBytes()), Location());
-  format_ = SparseFormat::kBlockSparse;
+  InitBlockSparseIndices(indices_shape, reinterpret_cast<int32_t*>(IndicesStart(values_.SizeInBytes())));
   return BlockSparseMutator(values_, format_data_[0]);
 }
 
diff --git a/onnxruntime/core/framework/tensor_type_and_shape.cc b/onnxruntime/core/framework/tensor_type_and_shape.cc
index fa512f629b..f818ef7a26 100644
--- a/onnxruntime/core/framework/tensor_type_and_shape.cc
+++ b/onnxruntime/core/framework/tensor_type_and_shape.cc
@@ -203,12 +203,13 @@ OrtStatus* OrtTensorTypeAndShapeInfo::Clone(OrtTensorTypeAndShapeInfo** out) {
 
 ORT_API_STATUS_IMPL(OrtApis::GetTensorTypeAndShape, _In_ const OrtValue* v, _Outptr_ OrtTensorTypeAndShapeInfo** out) {
   API_IMPL_BEGIN
-  onnxruntime::MLDataType type = v->Type();
-  ORT_ENFORCE(type != nullptr, "OrtValue is not a Tensor");
-  if (type->IsTensorType() || type->IsSparseTensorType()) {
+  if (!v->IsAllocated()) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "the ort_value must contain a constructed tensor or sparse tensor");
+  }
+  if (v->IsTensor() || v->IsSparseTensor()) {
     const onnxruntime::TensorShape* shape = nullptr;
     onnxruntime::MLDataType data_type = nullptr;
-    if (type->IsTensorType()) {
+    if (v->IsTensor()) {
       const Tensor& tensor = v->Get<onnxruntime::Tensor>();
       shape = &tensor.Shape();
       data_type = tensor.DataType();
@@ -224,6 +225,57 @@ ORT_API_STATUS_IMPL(OrtApis::GetTensorTypeAndShape, _In_ const OrtValue* v, _Out
   API_IMPL_END
 }
 
+ORT_API_STATUS_IMPL(OrtApis::GetSparseTensorValuesTypeAndShape, _In_ const OrtValue* v,
+                    _Outptr_ OrtTensorTypeAndShapeInfo** out) {
+  API_IMPL_BEGIN
+  const auto& sparse_tensor = SparseTensor::GetSparseTensorFromOrtValue(*v);
+  const auto& values = sparse_tensor.Values();
+  return GetTensorShapeAndType(values.Shape(), *values.DataType(), out);
+  API_IMPL_END
+}
+
+namespace {
+const Tensor& GetIndicesTensor(const OrtValue& v, OrtSparseIndicesFormat indices_format) {
+  const auto& sparse_tensor = SparseTensor::GetSparseTensorFromOrtValue(v);
+  const Tensor* indices_tensor = nullptr;
+  switch (indices_format) {
+    case OrtSparseIndicesFormat::ORT_SPARSE_COO_INDICES:
+      indices_tensor = &sparse_tensor.AsCoo().Indices();
+      break;
+    case OrtSparseIndicesFormat::ORT_SPARSE_CSR_INNER_INDICES:
+      indices_tensor = &sparse_tensor.AsCsr().Inner();
+      break;
+    case OrtSparseIndicesFormat::ORT_SPARSE_CSR_OUTER_INDICES:
+      indices_tensor = &sparse_tensor.AsCsr().Outer();
+      break;
+    case OrtSparseIndicesFormat::ORT_SPARSE_BLOCK_SPARSE_INDICES:
+      indices_tensor = &sparse_tensor.AsBlockSparse().Indices();
+      break;
+    default:
+      ORT_THROW(ORT_INVALID_ARGUMENT, "Unsupported indices_format passed");
+  }
+  return *indices_tensor;
+}
+}  // namespace
+
+ORT_API_STATUS_IMPL(OrtApis::GetSparseTensorIndicesTypeShape, _In_ const OrtValue* v,
+                    OrtSparseIndicesFormat indices_format, _Outptr_ OrtTensorTypeAndShapeInfo** out) {
+  API_IMPL_BEGIN
+  const Tensor& indices_tensor = GetIndicesTensor(*v, indices_format);
+  return GetTensorShapeAndType(indices_tensor.Shape(), *indices_tensor.DataType(), out);
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::GetSparseTensorIndices, _In_ const OrtValue* v,
+                    enum OrtSparseIndicesFormat indices_format, _Out_ size_t* num_indices, _Outptr_ const void** indices) {
+  API_IMPL_BEGIN
+  const Tensor& indices_tensor = GetIndicesTensor(*v, indices_format);
+  *num_indices = gsl::narrow<size_t>(indices_tensor.Shape().Size());
+  *indices = indices_tensor.DataRaw();
+  return nullptr;
+  API_IMPL_END
+}
+
 ORT_API_STATUS_IMPL(OrtApis::GetValueType, _In_ const OrtValue* v, _Out_ ONNXType* out) {
   API_IMPL_BEGIN
   OrtTypeInfo* type_info;
diff --git a/onnxruntime/core/framework/utils.cc b/onnxruntime/core/framework/utils.cc
index 2766573e1d..022182062e 100644
--- a/onnxruntime/core/framework/utils.cc
+++ b/onnxruntime/core/framework/utils.cc
@@ -140,13 +140,7 @@ static common::Status AllocateHelper(const AllocatorPtr& allocator,
                          allocator, target_mlvalue);
   } else if (source_mlvalue.IsSparseTensor()) {
     const SparseTensor& source_tensor = source_mlvalue.Get<SparseTensor>();
-    auto p_tensor = std::make_unique<SparseTensor>(source_tensor.DataType(),
-                                                   source_tensor.DenseShape(),
-                                                   allocator);
-    auto ml_tensor = DataTypeImpl::GetType<SparseTensor>();
-    target_mlvalue.Init(p_tensor.release(),
-                        ml_tensor,
-                        ml_tensor->GetDeleteFunc());
+    SparseTensor::InitOrtValue(source_tensor.DataType(), source_tensor.DenseShape(), allocator, target_mlvalue);
   } else if (source_mlvalue.IsTensorSequence()) {
     const TensorSeq& source_tensor_seq = source_mlvalue.Get<TensorSeq>();
     auto target_tensor_seq = std::make_unique<TensorSeq>(source_tensor_seq.DataType());
diff --git a/onnxruntime/core/optimizer/optimizer_execution_frame.cc b/onnxruntime/core/optimizer/optimizer_execution_frame.cc
index 3b5ee4f98b..8bc84056b6 100644
--- a/onnxruntime/core/optimizer/optimizer_execution_frame.cc
+++ b/onnxruntime/core/optimizer/optimizer_execution_frame.cc
@@ -146,9 +146,7 @@ Status OptimizerExecutionFrame::CreateNodeOutputMLValueImpl(OrtValue& ort_value,
                   "Tried to allocate without valid type information, ort_value index=" + std::to_string(ort_value_idx));
   if (ml_type->IsSparseTensorType()) {
     auto element_type = ml_type->AsSparseTensorType()->GetElementType();
-    auto container_type = DataTypeImpl::GetType<SparseTensor>();
-    auto sparse = std::make_unique<SparseTensor>(element_type, *shape, info_.GetAllocator());
-    ort_value.Init(sparse.release(), container_type, container_type->GetDeleteFunc());
+    SparseTensor::InitOrtValue(element_type, *shape, info_.GetAllocator(), ort_value);
     return Status::OK();
   }
 
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 6a027e403f..7fffb25682 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -37,6 +37,14 @@
 #include "core/framework/TensorSeq.h"
 #include "core/platform/ort_mutex.h"
 
+#ifdef USE_CUDA
+#include "core/providers/cuda/cuda_provider_factory.h"
+#include "core/providers/cuda/cuda_execution_provider_info.h"
+namespace onnxruntime {
+ProviderInfo_CUDA* TryGetProviderInfo_CUDA();
+}
+#endif
+
 #ifdef ENABLE_EXTENSION_CUSTOM_OPS
 #include "ortcustomops.h"
 #endif
@@ -220,6 +228,224 @@ ORT_API_STATUS_IMPL(OrtApis::CreateTensorAsOrtValue, _Inout_ OrtAllocator* alloc
   API_IMPL_END
 }
 
+ORT_API_STATUS_IMPL(OrtApis::CreateSparseTensorAsOrtValue, _Inout_ OrtAllocator* allocator, _In_ const int64_t* dense_shape,
+                    size_t dense_shape_len, ONNXTensorElementDataType type, _Outptr_ OrtValue** out) {
+  API_IMPL_BEGIN
+  auto sparse_tensor_type = DataTypeImpl::SparseTensorTypeFromONNXEnum(type);
+  auto element_type = sparse_tensor_type->GetElementType();
+  assert(element_type->AsPrimitiveDataType() != nullptr);
+  TensorShape shape(dense_shape, dense_shape_len);
+  if (std::any_of(shape.GetDims().cbegin(), shape.GetDims().cend(),
+                  [](int64_t v) { return v < 0; })) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "tried creating tensor with negative value in shape");
+  }
+
+  auto alloc_ptr = std::make_shared<onnxruntime::IAllocatorImplWrappingOrtAllocator>(allocator);
+  auto value = std::make_unique<OrtValue>();
+  SparseTensor::InitOrtValue(element_type, shape, std::move(alloc_ptr), *value);
+  *out = value.release();
+  return nullptr;
+  API_IMPL_END
+}
+
+namespace {
+std::unique_ptr<IDataTransfer> GetDataTransfer(const OrtDevice& src_device, const OrtDevice& dst_device) {
+  if (src_device.Type() == OrtDevice::CPU && dst_device.Type() == OrtDevice::CPU) {
+    return std::make_unique<CPUDataTransfer>();
+  }
+#ifdef USE_CUDA
+  if (src_device.Type() == OrtDevice::GPU || dst_device.Type() == OrtDevice::GPU) {
+    if (auto* provider_info = TryGetProviderInfo_CUDA()) {
+      return provider_info->CreateGPUDataTransfer(nullptr);
+    }
+  }
+#endif
+  ORT_THROW("Not able to find appropriate IDataTransfer to copy sparse data");
+}
+
+SparseTensor& ValidateFillInputArgs(OrtValue* v, const TensorShape& values_shape, const OrtMemoryInfo* data_mem_info) {
+  auto& sparse_tensor = SparseTensor::GetSparseTensorFromOrtValue(*v);
+  if (sparse_tensor.IsDataTypeString()) {
+    if ((data_mem_info->device.Type() != OrtDevice::CPU) || sparse_tensor.Location().device.Type() != OrtDevice::CPU) {
+      ORT_THROW("Strings can only reside in CPU memory");
+    }
+  }
+  if (std::any_of(values_shape.GetDims().cbegin(), values_shape.GetDims().cend(),
+                  [](int64_t v) { return v < 0; })) {
+    ORT_THROW("tried Filling sparse tensor with negative value in values shape");
+  }
+
+  return sparse_tensor;
+}
+
+union PtrConvert {
+  explicit PtrConvert(const void* p_p) : p(p_p) {}
+  const void* p;
+  const char** strings;
+};
+
+}  // namespace
+
+ORT_API_STATUS_IMPL(OrtApis::FillSparseTensorCoo, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                    _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                    _In_ const int64_t* indices_data, size_t indices_num) {
+  API_IMPL_BEGIN
+  TensorShape values_t_shape(values_shape, values_shape_len);
+  auto& sparse_tensor = ValidateFillInputArgs(ort_value, values_t_shape, data_mem_info);
+
+  auto values_size = gsl::narrow<size_t>(values_t_shape.Size());
+  auto indices_span = gsl::make_span(indices_data, indices_num);
+
+  if (sparse_tensor.IsDataTypeString()) {
+    PtrConvert conv(values);
+    ORT_THROW_IF_ERROR(sparse_tensor.MakeCooStrings(values_size, conv.strings, indices_span));
+  } else {
+    auto data_transfer = GetDataTransfer(data_mem_info->device, sparse_tensor.Location().device);
+    ORT_THROW_IF_ERROR(sparse_tensor.MakeCooData(*data_transfer, *data_mem_info, values_size,
+                                                 values, indices_span));
+  }
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::FillSparseTensorCsr, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                    _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                    _In_ const int64_t* inner_indices_data, size_t inner_indices_num,
+                    _In_ const int64_t* outer_indices_data, size_t outer_indices_num) {
+  API_IMPL_BEGIN
+  TensorShape values_t_shape(values_shape, values_shape_len);
+  auto& sparse_tensor = ValidateFillInputArgs(ort_value, values_t_shape, data_mem_info);
+  auto values_size = gsl::narrow<size_t>(values_t_shape.Size());
+
+  auto inner_indices_span = gsl::make_span(inner_indices_data, inner_indices_num);
+  auto outer_indices_span = gsl::make_span(outer_indices_data, outer_indices_num);
+  if (sparse_tensor.IsDataTypeString()) {
+    PtrConvert conv(values);
+    ORT_THROW_IF_ERROR(sparse_tensor.MakeCsrStrings(values_size, conv.strings, inner_indices_span, outer_indices_span));
+  } else {
+    auto data_transfer = GetDataTransfer(data_mem_info->device, sparse_tensor.Location().device);
+    ORT_THROW_IF_ERROR(sparse_tensor.MakeCsrData(*data_transfer, *data_mem_info, values_size,
+                                                 values, inner_indices_span, outer_indices_span));
+  }
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::FillSparseTensorBlockSparse, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                    _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                    _In_ const int64_t* indices_shape_data, size_t indices_shape_len,
+                    _In_ const int32_t* indices_data) {
+  API_IMPL_BEGIN
+  TensorShape values_t_shape(values_shape, values_shape_len);
+  auto& sparse_tensor = ValidateFillInputArgs(ort_value, values_t_shape, data_mem_info);
+
+  TensorShape indices_t_shape(indices_shape_data, indices_shape_len);
+  if (std::any_of(indices_t_shape.GetDims().cbegin(), indices_t_shape.GetDims().cend(),
+                  [](int64_t v) { return v < 0; })) {
+    ORT_THROW("tried Filling sparse tensor with negative value in block sparse indices shape");
+  }
+
+  if (sparse_tensor.IsDataTypeString()) {
+    PtrConvert conv(values);
+    ORT_THROW_IF_ERROR(sparse_tensor.MakeBlockSparseStrings(values_t_shape, conv.strings, indices_t_shape, indices_data));
+  } else {
+    auto data_transfer = GetDataTransfer(data_mem_info->device, sparse_tensor.Location().device);
+    ORT_THROW_IF_ERROR(sparse_tensor.MakeBlockSparseData(*data_transfer, *data_mem_info, values_t_shape,
+                                                         values, indices_t_shape, indices_data));
+  }
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::CreateSparseTensorWithValuesAsOrtValue, _In_ const OrtMemoryInfo* info, _Inout_ void* p_data,
+                    _In_ const int64_t* dense_shape, size_t dense_shape_len,
+                    _In_ const int64_t* values_shape, size_t values_shape_len,
+                    ONNXTensorElementDataType type, _Outptr_ OrtValue** out) {
+  API_IMPL_BEGIN
+  auto sparse_tensor_type = DataTypeImpl::SparseTensorTypeFromONNXEnum(type);
+  auto element_type = sparse_tensor_type->GetElementType();
+  assert(element_type->AsPrimitiveDataType() != nullptr);
+  if (utils::IsDataTypeString(element_type)) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT,
+                                 "Can not use strings in pre-allocated memory."
+                                 " Use CreateSparseTensorAsOrtValue() to allocate memory inside and copy");
+  }
+  TensorShape tensor_dense_shape(dense_shape, dense_shape_len);
+  TensorShape tensor_values_shape(values_shape, values_shape_len);
+  if (std::any_of(tensor_values_shape.GetDims().cbegin(), tensor_values_shape.GetDims().cend(),
+                  [](int64_t v) { return v < 0; })) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "tried creating tensor with negative value in shape");
+  }
+  auto value = std::make_unique<OrtValue>();
+  SparseTensor::InitOrtValue(element_type, tensor_dense_shape, tensor_values_shape, p_data, *info, *value);
+  *out = value.release();
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::UseCooIndices, _Inout_ OrtValue* ort_value, _Inout_ int64_t* indices_data, size_t indices_num) {
+  API_IMPL_BEGIN
+  auto v = reinterpret_cast<::OrtValue*>(ort_value);
+  auto& sparse_tensor = SparseTensor::GetSparseTensorFromOrtValue(*v);
+  auto indices_span = (indices_num == 0 || indices_data == nullptr)
+                          ? gsl::span<int64_t>()
+                          : gsl::make_span(indices_data, indices_num);
+
+  ORT_THROW_IF_ERROR(sparse_tensor.UseCooIndices(indices_span));
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::UseCsrIndices, _Inout_ OrtValue* ort_value,
+                    _Inout_ int64_t* inner_data, size_t inner_num,
+                    _Inout_ int64_t* outer_data, size_t outer_num) {
+  API_IMPL_BEGIN
+  auto& sparse_tensor = SparseTensor::GetSparseTensorFromOrtValue(*ort_value);
+  auto inner_span = (inner_num == 0 || inner_data == nullptr)
+                        ? gsl::span<int64_t>()
+                        : gsl::make_span(inner_data, inner_num);
+  auto outer_span = (outer_num == 0 || outer_data == nullptr)
+                        ? gsl::span<int64_t>()
+                        : gsl::make_span(outer_data, outer_num);
+  ORT_THROW_IF_ERROR(sparse_tensor.UseCsrIndices(inner_span, outer_span));
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::UseBlockSparseIndices, _Inout_ OrtValue* ort_value, const int64_t* indices_shape, size_t indices_shape_len,
+                    _Inout_ int32_t* indices_data) {
+  API_IMPL_BEGIN
+  auto& sparse_tensor = SparseTensor::GetSparseTensorFromOrtValue(*ort_value);
+  TensorShape ind_shape(indices_shape, indices_shape_len);
+  ORT_THROW_IF_ERROR(sparse_tensor.UseBlockSparseIndices(ind_shape, indices_data));
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::GetSparseTensorFormat, _In_ const OrtValue* ort_value, _Out_ enum OrtSparseFormat* out) {
+  API_IMPL_BEGIN
+  auto v = reinterpret_cast<const ::OrtValue*>(ort_value);
+  if (!v->IsAllocated()) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "the ort_value must contain a constructed tensor");
+  }
+  const auto& sparse_tensor = v->Get<SparseTensor>();
+  *out = static_cast<OrtSparseFormat>(sparse_tensor.Format());
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::GetSparseTensorValues, _In_ const OrtValue* ort_value, _Outptr_ const void** out) {
+  API_IMPL_BEGIN
+  const auto& sparse_tensor = SparseTensor::GetSparseTensorFromOrtValue(*ort_value);
+  if (sparse_tensor.IsDataTypeString()) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Use GetStringTensor*() API to retrieve strings");
+  }
+  const auto& values = sparse_tensor.Values();
+  *out = values.DataRaw();
+  return nullptr;
+  API_IMPL_END
+}
+
 ORT_API_STATUS_IMPL(OrtApis::CreateCustomOpDomain, _In_ const char* domain, _Outptr_ OrtCustomOpDomain** out) {
   API_IMPL_BEGIN
   auto custom_op_domain = std::make_unique<OrtCustomOpDomain>();
@@ -656,9 +882,18 @@ ORT_API_STATUS_IMPL(OrtApis::IsTensor, _In_ const OrtValue* value, _Out_ int* ou
   return nullptr;
 }
 
+ORT_API_STATUS_IMPL(OrtApis::IsSparseTensor, _In_ const OrtValue* value, _Out_ int* out) {
+  auto v = reinterpret_cast<const ::OrtValue*>(value);
+  *out = v->IsSparseTensor() ? 1 : 0;
+  return nullptr;
+}
+
 ORT_API_STATUS_IMPL(OrtApis::GetTensorMutableData, _Inout_ OrtValue* value, _Outptr_ void** output) {
   TENSOR_READWRITE_API_BEGIN
-  //TODO: test if it's a string tensor
+  // Uncomment when WinML fixed their code
+  //if (tensor->IsDataTypeString()) {
+  //  return OrtApis::CreateStatus(ORT_NOT_IMPLEMENTED, "this API does not support strings");
+  //}
   *output = tensor->MutableDataRaw();
   return nullptr;
   API_IMPL_END
@@ -693,79 +928,127 @@ ORT_API_STATUS_IMPL(OrtApis::FillStringTensorElement, _Inout_ OrtValue* value, _
   API_IMPL_END
 }
 
-ORT_API_STATUS_IMPL(OrtApis::GetStringTensorDataLength, _In_ const OrtValue* value, _Out_ size_t* out) {
-  TENSOR_READ_API_BEGIN
-  const auto* src = tensor.Data<std::string>();
-  int64_t len = tensor.Shape().Size();
-  if (len >= 0) {
-    size_t ret = 0;
-    for (int64_t i = 0; i != len; ++i) {
-      ret += src[i].size();
+namespace {
+
+OrtStatusPtr GetTensorStringSpan(const ::OrtValue& v, gsl::span<const std::string>& span) {
+  if (!v.IsAllocated()) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "OrtValue should contain a Tensor or a Sparse Tensor");
+  }
+  gsl::span<const std::string> str_span;
+  int64_t items = 0;
+  // Data type will be enforced on DataAsSpan() call.
+  if (v.IsTensor()) {
+    const auto& tensor = v.Get<onnxruntime::Tensor>();
+    items = tensor.Shape().Size();
+    if (items >= 0) {
+      str_span = tensor.DataAsSpan<std::string>();
     }
-    *out = ret;
-  } else
+  } else if (v.IsSparseTensor()) {
+    const auto& sparse_tensor = v.Get<SparseTensor>();
+    if (sparse_tensor.Format() == onnxruntime::SparseFormat::kUndefined) {
+      return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Sparse Tensor does not contain sparse data");
+    }
+    items = sparse_tensor.Values().Shape().Size();
+    if (items >= 0) {
+      str_span = sparse_tensor.Values().DataAsSpan<std::string>();
+    }
+  } else {
+    return OrtApis::CreateStatus(ORT_NOT_IMPLEMENTED, "This API supports Tensors or SparseTensors");
+  }
+
+  if (items < 0) {
     return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "shape is invalid");
+  }
+  span = str_span;
+  return nullptr;
+}
+}  // namespace
+
+ORT_API_STATUS_IMPL(OrtApis::GetStringTensorDataLength, _In_ const OrtValue* value, _Out_ size_t* out) {
+  API_IMPL_BEGIN
+  gsl::span<const std::string> str_span;
+  if (auto* status = GetTensorStringSpan(*value, str_span)) {
+    return status;
+  }
+
+  size_t ret = 0;
+  for (const auto& s : str_span) {
+    ret += s.size();
+  }
+
+  *out = ret;
   return nullptr;
   API_IMPL_END
 }
 
 ORT_API_STATUS_IMPL(OrtApis::GetStringTensorElementLength, _In_ const OrtValue* value, size_t index, _Out_ size_t* out) {
-  TENSOR_READ_API_BEGIN
-  const auto* src = tensor.Data<std::string>();
-  auto len = static_cast<size_t>(tensor.Shape().Size());
-  if (index < len) {
-    *out = src[index].size();
-  } else
-    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "shape is invalid");
+  API_IMPL_BEGIN
+  gsl::span<const std::string> str_span;
+  if (auto* status = GetTensorStringSpan(*value, str_span)) {
+    return status;
+  }
+
+  if (index < str_span.size()) {
+    *out = str_span[index].size();
+  } else {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "index is out of bounds");
+  }
+
   return nullptr;
   API_IMPL_END
 }
 
 ORT_API_STATUS_IMPL(OrtApis::GetStringTensorContent, _In_ const OrtValue* value, _Out_writes_bytes_all_(s_len) void* s,
                     size_t s_len, _Out_writes_all_(offsets_len) size_t* offsets, size_t offsets_len) {
-  TENSOR_READ_API_BEGIN
-  const auto* input = tensor.Data<std::string>();
-  auto len = static_cast<size_t>(tensor.Shape().Size());
-  if (offsets_len != len) {
+  API_IMPL_BEGIN
+
+  gsl::span<const std::string> str_span;
+  if (auto* status = GetTensorStringSpan(*value, str_span)) {
+    return status;
+  }
+
+  if (offsets_len != str_span.size()) {
     return OrtApis::CreateStatus(ORT_FAIL, "offsets buffer is not equal to tensor size");
   }
-  {
-    size_t ret = 0;
-    for (size_t i = 0; i != len; ++i) {
-      ret += input[i].size();
-    }
-    if (s_len < ret) {
-      return OrtApis::CreateStatus(ORT_FAIL, "output buffer is too small");
-    }
+
+  size_t total_size = 0;
+  for (const auto& str : str_span) {
+    total_size += str.size();
   }
+
+  if (s_len < total_size) {
+    return OrtApis::CreateStatus(ORT_FAIL, "output buffer is too small. Use GetStringTensorDataLength.");
+  }
+
   size_t f = 0;
   char* p = static_cast<char*>(s);
-  for (size_t i = 0; i != len; ++i, ++offsets) {
-    memcpy(p, input[i].data(), input[i].size());
-    p += input[i].size();
-    *offsets = f;
-    f += input[i].size();
+  for (const auto& str : str_span) {
+    memcpy(p, str.data(), str.size());
+    p += str.size();
+    *offsets++ = f;
+    f += str.size();
   }
   return nullptr;
   API_IMPL_END
 }
 
-ORT_API_STATUS_IMPL(OrtApis::GetStringTensorElement, _In_ const OrtValue* value, size_t s_len, size_t index, _Out_writes_bytes_all_(s_len) void* s) {
-  TENSOR_READ_API_BEGIN
-  const auto* input = tensor.Data<std::string>();
-  auto len = static_cast<size_t>(tensor.Shape().Size());
+ORT_API_STATUS_IMPL(OrtApis::GetStringTensorElement, _In_ const OrtValue* value,
+                    size_t s_len, size_t index, _Out_writes_bytes_all_(s_len) void* s) {
+  API_IMPL_BEGIN
+  gsl::span<const std::string> str_span;
+  if (auto* status = GetTensorStringSpan(*value, str_span)) {
+    return status;
+  }
 
-  if (index >= len) {
+  if (index < str_span.size()) {
+    const auto& str = str_span[index];
+    if (s_len < str.size()) {
+      return OrtApis::CreateStatus(ORT_FAIL, "buffer size is too small for string element");
+    }
+    memcpy(s, str.data(), str.size());
+  } else {
     return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "element index is out of bounds");
   }
-
-  size_t ret = input[index].size();
-  if (s_len < ret) {
-    return OrtApis::CreateStatus(ORT_FAIL, "buffer size is too small for string");
-  }
-
-  memcpy(s, input[index].data(), input[index].size());
-
   return nullptr;
   API_IMPL_END
 }
@@ -2097,6 +2380,20 @@ static constexpr OrtApi ort_api_1_to_9 = {
     &OrtApis::EnableOrtCustomOps,
     &OrtApis::RegisterAllocator,
     &OrtApis::UnregisterAllocator,
+    &OrtApis::IsSparseTensor,
+    &OrtApis::CreateSparseTensorAsOrtValue,
+    &OrtApis::FillSparseTensorCoo,
+    &OrtApis::FillSparseTensorCsr,
+    &OrtApis::FillSparseTensorBlockSparse,
+    &OrtApis::CreateSparseTensorWithValuesAsOrtValue,
+    &OrtApis::UseCooIndices,
+    &OrtApis::UseCsrIndices,
+    &OrtApis::UseBlockSparseIndices,
+    &OrtApis::GetSparseTensorFormat,
+    &OrtApis::GetSparseTensorValuesTypeAndShape,
+    &OrtApis::GetSparseTensorValues,
+    &OrtApis::GetSparseTensorIndicesTypeShape,
+    &OrtApis::GetSparseTensorIndices,
 };
 
 // Asserts to do a some checks to ensure older Versions of the OrtApi never change (will detect an addition or deletion but not if they cancel out each other)
diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h
index 080431028f..6a6b3fa817 100644
--- a/onnxruntime/core/session/ort_apis.h
+++ b/onnxruntime/core/session/ort_apis.h
@@ -288,4 +288,31 @@ ORT_API(void, ReleaseTensorRTProviderOptions, _Frees_ptr_opt_ OrtTensorRTProvide
 ORT_API_STATUS_IMPL(EnableOrtCustomOps, _Inout_ OrtSessionOptions* options);
 ORT_API_STATUS_IMPL(RegisterAllocator, _Inout_ OrtEnv* env, _In_ OrtAllocator* allocator);
 ORT_API_STATUS_IMPL(UnregisterAllocator, _Inout_ OrtEnv* env, _In_ const OrtMemoryInfo* mem_info);
+// SparseTensor related API
+ORT_API_STATUS_IMPL(IsSparseTensor, _In_ const OrtValue* value, _Out_ int* out);
+ORT_API_STATUS_IMPL(CreateSparseTensorAsOrtValue, _Inout_ OrtAllocator* allocator, _In_ const int64_t* dense_shape,
+                    size_t dense_shape_len, ONNXTensorElementDataType type, _Outptr_ OrtValue** out);
+ORT_API_STATUS_IMPL(FillSparseTensorCoo, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* mem_info,
+                    _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                    _In_ const int64_t* indices_data, size_t indices_num);
+ORT_API_STATUS_IMPL(FillSparseTensorCsr, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                    _In_ const int64_t* values_shape, size_t values_shape_len, const void* values,
+                    _In_ const int64_t* inner_indices_data, size_t inner_indices_num,
+                    _In_ const int64_t* outer_indices_data, size_t outer_indices_num);
+ORT_API_STATUS_IMPL(FillSparseTensorBlockSparse, _Inout_ OrtValue* ort_value, _In_ const OrtMemoryInfo* data_mem_info,
+                    _In_ const int64_t* values_shape, size_t values_shape_len, _In_ const void* values,
+                    _In_ const int64_t* indices_shape_data, size_t indices_shape_len,
+                    _In_ const int32_t* indices_data);
+ORT_API_STATUS_IMPL(CreateSparseTensorWithValuesAsOrtValue, _In_ const OrtMemoryInfo* info, _Inout_ void* p_data,
+                    _In_ const int64_t* dense_shape, size_t dense_shape_len,
+                    _In_ const int64_t* values_shape, size_t values_shape_len,
+                    ONNXTensorElementDataType type, _Outptr_ OrtValue** out);
+ORT_API_STATUS_IMPL(UseCooIndices, _Inout_ OrtValue* ort_value, _Inout_ int64_t* indices_data, size_t indices_num);
+ORT_API_STATUS_IMPL(UseCsrIndices, _Inout_ OrtValue*, _Inout_ int64_t* inner_data, size_t inner_num, _Inout_ int64_t* outer_data, size_t outer_num);
+ORT_API_STATUS_IMPL(UseBlockSparseIndices, _Inout_ OrtValue* ort_value, const int64_t* indices_shape, size_t indices_shape_len, _Inout_ int32_t* indices_data);
+ORT_API_STATUS_IMPL(GetSparseTensorFormat, _In_ const OrtValue* ort_value, _Out_ enum OrtSparseFormat* out);
+ORT_API_STATUS_IMPL(GetSparseTensorValuesTypeAndShape, _In_ const OrtValue* ort_value, _Outptr_ OrtTensorTypeAndShapeInfo** out);
+ORT_API_STATUS_IMPL(GetSparseTensorValues, _In_ const OrtValue* ort_value, _Outptr_ const void** out);
+ORT_API_STATUS_IMPL(GetSparseTensorIndicesTypeShape, _In_ const OrtValue* ort_value, enum OrtSparseIndicesFormat indices_format, _Outptr_ OrtTensorTypeAndShapeInfo** out);
+ORT_API_STATUS_IMPL(GetSparseTensorIndices, _In_ const OrtValue* ort_value, enum OrtSparseIndicesFormat indices_format, _Out_ size_t* num_indices, _Outptr_ const void** indices);
 }  // namespace OrtApis
diff --git a/onnxruntime/test/framework/sparse_kernels_test.cc b/onnxruntime/test/framework/sparse_kernels_test.cc
index c2d0f86090..01a5adc387 100644
--- a/onnxruntime/test/framework/sparse_kernels_test.cc
+++ b/onnxruntime/test/framework/sparse_kernels_test.cc
@@ -36,7 +36,6 @@ inline int64_t vector_len(const std::vector<T>& v) {
   return static_cast<int64_t>(v.size());
 }
 
-
 // This file contains sample implementations of several ops with sparse-tensor inputs/outputs.
 // Each op is implemented as a struct with the following signature:
 // struct SparseOp {
@@ -1209,9 +1208,6 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
       RawSparseDataChecker<uint8_t>);
 }
 
-template <class T>
-using SparseMatrixRowMajor = Eigen::SparseMatrix<T, Eigen::RowMajor, int64_t>;
-
 TEST(SparseTensorConversionTests, CsrConversion) {
   auto* cpu_provider = TestCPUExecutionProvider();
   auto cpu_allocator = cpu_provider->GetAllocator(0, OrtMemTypeDefault);
@@ -1234,6 +1230,7 @@ TEST(SparseTensorConversionTests, CsrConversion) {
 
   const std::vector<int32_t> expected_values = {1, 1, 1};
   const std::vector<std::string> expected_values_str = {"1", "1", "1"};
+  const char* const strings[] = {"1", "1", "1"};
   const std::vector<int64_t> expected_inner = {2, 0, 2};
   const std::vector<int64_t> expected_outer = {0, 1, 3, 3};
 
@@ -1242,6 +1239,49 @@ TEST(SparseTensorConversionTests, CsrConversion) {
     auto cpu_transfer = cpu_provider->GetDataTransfer();
     dtm.RegisterDataTransfer(std::move(cpu_transfer));
   }
+  {
+    {
+      // Test CSR initialization of 100% sparse tensor, passing 0 in the shape
+      SparseTensor fully_sparse(DataTypeImpl::GetType<int32_t>(), TensorShape{3, 3}, cpu_allocator);
+      ASSERT_STATUS_OK(fully_sparse.MakeCsrData(*cpu_provider->GetDataTransfer(), cpu_allocator->Info(),
+                                                0U, nullptr, gsl::span<int64_t>(), gsl::span<int64_t>()));
+      ASSERT_EQ(fully_sparse.Format(), SparseFormat::kCsrc);
+      ASSERT_EQ(0, fully_sparse.RequiredAllocationSize());
+      ASSERT_EQ(0U, fully_sparse.NumValues());
+      ASSERT_EQ(1U, fully_sparse.Values().Shape().GetDims().size());
+      ASSERT_EQ(0, fully_sparse.Values().Shape().Size());
+      ASSERT_TRUE(fully_sparse.Values().DataAsSpan<int32_t>().empty());
+      auto csr_view = fully_sparse.AsCsr();
+      const auto& inner = csr_view.Inner();
+      ASSERT_EQ(0, inner.Shape().Size());
+      ASSERT_EQ(1U, inner.Shape().GetDims().size());
+      ASSERT_TRUE(inner.DataAsSpan<int64_t>().empty());
+      const auto& outer = csr_view.Outer();
+      ASSERT_EQ(0, outer.Shape().Size());
+      ASSERT_EQ(1U, outer.Shape().GetDims().size());
+      ASSERT_TRUE(outer.DataAsSpan<int64_t>().empty());
+    }
+    {
+      // Test CSR initialization of 100% sparse tensor, passing 0 in the shape
+      SparseTensor fully_sparse(DataTypeImpl::GetType<int32_t>(), TensorShape{3, 3}, TensorShape{0}, nullptr, cpu_allocator->Info());
+      ASSERT_STATUS_OK(fully_sparse.UseCsrIndices(gsl::span<int64_t>(), gsl::span<int64_t>()));
+      ASSERT_EQ(fully_sparse.Format(), SparseFormat::kCsrc);
+      ASSERT_EQ(0, fully_sparse.RequiredAllocationSize());
+      ASSERT_EQ(0U, fully_sparse.NumValues());
+      ASSERT_EQ(1U, fully_sparse.Values().Shape().GetDims().size());
+      ASSERT_EQ(0, fully_sparse.Values().Shape().Size());
+      ASSERT_TRUE(fully_sparse.Values().DataAsSpan<int32_t>().empty());
+      auto csr_view = fully_sparse.AsCsr();
+      const auto& inner = csr_view.Inner();
+      ASSERT_EQ(0, inner.Shape().Size());
+      ASSERT_EQ(1U, inner.Shape().GetDims().size());
+      ASSERT_TRUE(inner.DataAsSpan<int64_t>().empty());
+      const auto& outer = csr_view.Outer();
+      ASSERT_EQ(0, outer.Shape().Size());
+      ASSERT_EQ(1U, outer.Shape().GetDims().size());
+      ASSERT_TRUE(outer.DataAsSpan<int64_t>().empty());
+    }
+  }
 
   Tensor dense_cpu_src(DataTypeImpl::GetType<int32_t>(), dense_shape, dense_data.data(), cpu_allocator->Info());
   {
@@ -1309,6 +1349,28 @@ TEST(SparseTensorConversionTests, CsrConversion) {
     ASSERT_TRUE(std::equal(dense_values_dst.cbegin(), dense_values_dst.cend(), dense_data_str.cbegin(), dense_data_str.cend()));
   }
 
+  {
+    // Use MakeCsrStrings()
+    SparseTensor str_cpu_src(DataTypeImpl::GetType<std::string>(), dense_shape, cpu_allocator);
+    ASSERT_STATUS_OK(str_cpu_src.MakeCsrStrings(expected_values_str.size(), strings,
+                                                gsl::make_span(expected_inner), gsl::make_span(expected_outer)));
+    ASSERT_EQ(str_cpu_src.Format(), SparseFormat::kCsrc);
+    ASSERT_TRUE(str_cpu_src.IsDataTypeString());
+    ASSERT_EQ(str_cpu_src.DenseShape().GetDims(), dense_shape);
+    ASSERT_EQ(str_cpu_src.NumValues(), expected_values_str.size());
+    auto values = str_cpu_src.Values().DataAsSpan<std::string>();
+    ASSERT_TRUE(std::equal(expected_values_str.cbegin(), expected_values_str.cend(), values.cbegin(), values.cend()));
+
+    auto csr_view = str_cpu_src.AsCsr();
+    auto inner = csr_view.Inner().DataAsSpan<int64_t>();
+    ASSERT_EQ(expected_inner.size(), inner.size());
+    ASSERT_TRUE(std::equal(expected_inner.cbegin(), expected_inner.cend(), inner.cbegin(), inner.cend()));
+
+    auto outer = csr_view.Outer().DataAsSpan<int64_t>();
+    ASSERT_EQ(expected_outer.size(), outer.size());
+    ASSERT_TRUE(std::equal(expected_outer.cbegin(), expected_outer.cend(), outer.cbegin(), outer.cend()));
+  }
+
 #ifdef USE_CUDA
   auto cuda_provider = DefaultCudaExecutionProvider();
   auto cuda_allocator = cuda_provider->GetAllocator(0, OrtMemTypeDefault);
@@ -1387,6 +1449,7 @@ TEST(SparseTensorConversionTests, CooConversion) {
 
   const std::vector<int32_t> expected_values = {1, 1, 1};
   const std::vector<std::string> expected_values_str = {"1", "1", "1"};
+  const char* const strings[] = {"1", "1", "1"};
   const std::vector<int64_t> expected_linear_indices = {2, 3, 5};
   const std::vector<int64_t> expected_2d_indices = {0, 2, 1, 0, 1, 2};
 
@@ -1395,6 +1458,43 @@ TEST(SparseTensorConversionTests, CooConversion) {
     auto cpu_transfer = cpu_provider->GetDataTransfer();
     dtm.RegisterDataTransfer(std::move(cpu_transfer));
   }
+
+  {
+    // Test COO initialization of 100% sparse tensor, passing 0 in the shape
+    SparseTensor fully_sparse(DataTypeImpl::GetType<int32_t>(), TensorShape{3, 3}, cpu_allocator);
+    ASSERT_STATUS_OK(fully_sparse.MakeCooData(*cpu_provider->GetDataTransfer(), cpu_allocator->Info(), 0, nullptr, gsl::span<int64_t>()));
+    ASSERT_EQ(fully_sparse.Format(), SparseFormat::kCoo);
+    ASSERT_EQ(0, fully_sparse.RequiredAllocationSize());
+    ASSERT_EQ(0U, fully_sparse.NumValues());
+    ASSERT_EQ(1U, fully_sparse.Values().Shape().GetDims().size());
+    ASSERT_EQ(0, fully_sparse.Values().Shape().Size());
+    ASSERT_TRUE(fully_sparse.Values().DataAsSpan<int32_t>().empty());
+    auto coo_view = fully_sparse.AsCoo();
+    const auto& indices = coo_view.Indices();
+    ASSERT_EQ(0, indices.Shape().Size());
+    // For fully sparse we assume a 2-D indices.
+    ASSERT_EQ(2U, indices.Shape().GetDims().size());
+    ASSERT_TRUE(indices.DataAsSpan<int64_t>().empty());
+  }
+
+  {
+    // Test COO initialization of 100% sparse tensor, passing 0 in the shape
+    SparseTensor fully_sparse(DataTypeImpl::GetType<int32_t>(), TensorShape{3, 3}, TensorShape{0}, nullptr, cpu_allocator->Info());
+    ASSERT_STATUS_OK(fully_sparse.UseCooIndices(gsl::span<int64_t>()));
+    ASSERT_EQ(fully_sparse.Format(), SparseFormat::kCoo);
+    ASSERT_EQ(0, fully_sparse.RequiredAllocationSize());
+    ASSERT_EQ(0U, fully_sparse.NumValues());
+    ASSERT_EQ(1U, fully_sparse.Values().Shape().GetDims().size());
+    ASSERT_EQ(0, fully_sparse.Values().Shape().Size());
+    ASSERT_TRUE(fully_sparse.Values().DataAsSpan<int32_t>().empty());
+    auto coo_view = fully_sparse.AsCoo();
+    const auto& indices = coo_view.Indices();
+    ASSERT_EQ(0, indices.Shape().Size());
+    // For fully sparse we assume a 2-D indices.
+    ASSERT_EQ(2U, indices.Shape().GetDims().size());
+    ASSERT_TRUE(indices.DataAsSpan<int64_t>().empty());
+  }
+
   Tensor dense_cpu_src(DataTypeImpl::GetType<int32_t>(), dense_shape, dense_data.data(), cpu_allocator->Info());
   {
     // test where both src and destination are on CPU. Linear index.
@@ -1452,6 +1552,25 @@ TEST(SparseTensorConversionTests, CooConversion) {
     ASSERT_TRUE(std::equal(dense_values_dst.cbegin(), dense_values_dst.cend(), dense_data_str.cbegin(), dense_data_str.cend()));
   }
 
+  {
+    // Use MakeCooStrings()
+    SparseTensor str_cpu_src(DataTypeImpl::GetType<std::string>(), dense_shape, cpu_allocator);
+    ASSERT_STATUS_OK(str_cpu_src.MakeCooStrings(expected_values_str.size(), strings,
+                                                gsl::make_span(expected_linear_indices)));
+    ASSERT_EQ(str_cpu_src.Format(), SparseFormat::kCoo);
+    ASSERT_TRUE(str_cpu_src.IsDataTypeString());
+    ASSERT_EQ(str_cpu_src.DenseShape().GetDims(), dense_shape);
+    ASSERT_EQ(str_cpu_src.NumValues(), expected_values_str.size());
+    auto values = str_cpu_src.Values().DataAsSpan<std::string>();
+    ASSERT_TRUE(std::equal(expected_values_str.cbegin(), expected_values_str.cend(), values.cbegin(), values.cend()));
+
+    auto coo_view = str_cpu_src.AsCoo();
+    auto indices = coo_view.Indices().DataAsSpan<int64_t>();
+    ASSERT_EQ(expected_linear_indices.size(), indices.size());
+    ASSERT_TRUE(std::equal(expected_linear_indices.cbegin(), expected_linear_indices.cend(), indices.cbegin(), indices.cend()));
+  }
+
+
   {
     // test where both src and destination are on CPU. 2-D index
     SparseTensor dst;
@@ -1539,5 +1658,133 @@ TEST(SparseTensorConversionTests, CooConversion) {
 #endif
 }
 #endif  // !ORT_MINIMAL_BUILD
+
+TEST(SparseTensorConversionTests, BlockSparse) {
+  auto* cpu_provider = TestCPUExecutionProvider();
+  auto cpu_allocator = cpu_provider->GetAllocator(0, OrtMemTypeDefault);
+
+  DataTransferManager dtm;
+  {
+    auto cpu_transfer = cpu_provider->GetDataTransfer();
+    dtm.RegisterDataTransfer(std::move(cpu_transfer));
+  }
+
+  {
+    // Fully sparse
+    SparseTensor fully_sparse(DataTypeImpl::GetType<int32_t>(), TensorShape{3, 3}, cpu_allocator);
+    ASSERT_STATUS_OK(fully_sparse.MakeBlockSparseData(*cpu_provider->GetDataTransfer(), cpu_allocator->Info(),
+                                                      TensorShape{0}, nullptr, TensorShape{0}, nullptr));
+    ASSERT_EQ(fully_sparse.Format(), SparseFormat::kBlockSparse);
+    ASSERT_EQ(0, fully_sparse.RequiredAllocationSize());
+    ASSERT_EQ(0U, fully_sparse.NumValues());
+    ASSERT_EQ(1U, fully_sparse.Values().Shape().GetDims().size());
+    ASSERT_EQ(0, fully_sparse.Values().Shape().Size());
+    ASSERT_TRUE(fully_sparse.Values().DataAsSpan<int32_t>().empty());
+    auto blocksparse_view = fully_sparse.AsBlockSparse();
+    const auto& indices = blocksparse_view.Indices();
+    ASSERT_EQ(0, indices.Shape().Size());
+    ASSERT_EQ(1U, indices.Shape().GetDims().size());
+    ASSERT_TRUE(indices.DataAsSpan<int32_t>().empty());
+  }
+
+  {
+    // Fully sparse
+    SparseTensor fully_sparse(DataTypeImpl::GetType<int32_t>(), TensorShape{3, 3},
+                              TensorShape{0}, nullptr, cpu_allocator->Info());
+    ASSERT_STATUS_OK(fully_sparse.UseBlockSparseIndices(TensorShape{0}, nullptr));
+    ASSERT_EQ(fully_sparse.Format(), SparseFormat::kBlockSparse);
+    ASSERT_EQ(0, fully_sparse.RequiredAllocationSize());
+    ASSERT_EQ(0U, fully_sparse.NumValues());
+    ASSERT_EQ(1U, fully_sparse.Values().Shape().GetDims().size());
+    ASSERT_EQ(0, fully_sparse.Values().Shape().Size());
+    ASSERT_TRUE(fully_sparse.Values().DataAsSpan<int32_t>().empty());
+    auto blocksparse_view = fully_sparse.AsBlockSparse();
+    const auto& indices = blocksparse_view.Indices();
+    ASSERT_EQ(0, indices.Shape().Size());
+    ASSERT_EQ(1U, indices.Shape().GetDims().size());
+    ASSERT_TRUE(indices.DataAsSpan<int32_t>().empty());
+  }
+
+  const TensorShape dense_shape{8, 8};
+  constexpr int64_t block_size = 2;
+  const TensorShape values_shape{2, block_size, block_size};
+  // Two dense blocks
+  std::vector<int32_t> data_blocks{
+      1, 2, 3, 4, 5, 6, 7, 8};
+
+  const char* const strings[] = {
+      "1", "2", "3", "4", "5", "6", "7", "8"};
+
+  const std::string expected_strings[] = {
+      "1", "2", "3", "4", "5", "6", "7", "8"};
+
+
+  const TensorShape indices_shape{2, 2};  // two blocks by two coordinates
+  // (0, 0), (0,1)
+  std::vector<int32_t> blocksparse_indices = {
+      0, 0, 0, 1};
+
+  {
+    // Test instantiation only
+    SparseTensor own_buffer_tensor(DataTypeImpl::GetType<int32_t>(), dense_shape, cpu_allocator);
+    ASSERT_STATUS_OK(own_buffer_tensor.MakeBlockSparseData(*cpu_provider->GetDataTransfer(), cpu_allocator->Info(),
+                                                           values_shape, data_blocks.data(),
+                                                           indices_shape, blocksparse_indices.data()));
+    ASSERT_EQ(own_buffer_tensor.Format(), SparseFormat::kBlockSparse);
+    ASSERT_EQ(dense_shape, own_buffer_tensor.DenseShape());
+    ASSERT_EQ(data_blocks.size(), own_buffer_tensor.NumValues());
+    ASSERT_EQ(values_shape, own_buffer_tensor.Values().Shape());
+    auto data_span = own_buffer_tensor.Values().DataAsSpan<int32_t>();
+    ASSERT_EQ(data_blocks.size(), data_span.size());
+    ASSERT_TRUE(std::equal(data_blocks.cbegin(), data_blocks.cend(), data_span.cbegin(), data_span.cend()));
+
+    const auto& indices = own_buffer_tensor.AsBlockSparse().Indices();
+    ASSERT_EQ(indices_shape, indices.Shape());
+    auto indices_span = indices.DataAsSpan<int32_t>();
+    ASSERT_TRUE(std::equal(blocksparse_indices.cbegin(), blocksparse_indices.cend(),
+                           indices_span.cbegin(), indices_span.cend()));
+  }
+
+  {
+    // Test instantiation only
+    SparseTensor user_buffer_tensor(DataTypeImpl::GetType<int32_t>(), dense_shape, values_shape, data_blocks.data(), cpu_allocator->Info());
+    ASSERT_STATUS_OK(user_buffer_tensor.UseBlockSparseIndices(indices_shape, blocksparse_indices.data()));
+    ASSERT_EQ(user_buffer_tensor.Format(), SparseFormat::kBlockSparse);
+    ASSERT_EQ(dense_shape, user_buffer_tensor.DenseShape());
+    ASSERT_EQ(data_blocks.size(), user_buffer_tensor.NumValues());
+    ASSERT_EQ(values_shape, user_buffer_tensor.Values().Shape());
+    auto data_span = user_buffer_tensor.Values().DataAsSpan<int32_t>();
+    ASSERT_EQ(data_blocks.size(), data_span.size());
+    ASSERT_TRUE(std::equal(data_blocks.cbegin(), data_blocks.cend(), data_span.cbegin(), data_span.cend()));
+
+    const auto& indices = user_buffer_tensor.AsBlockSparse().Indices();
+    ASSERT_EQ(indices_shape, indices.Shape());
+    auto indices_span = indices.DataAsSpan<int32_t>();
+    ASSERT_TRUE(std::equal(blocksparse_indices.cbegin(), blocksparse_indices.cend(),
+                           indices_span.cbegin(), indices_span.cend()));
+  }
+
+  {
+    // Use MakeBlockSparseStrings()
+    SparseTensor own_buffer_tensor(DataTypeImpl::GetType<std::string>(), dense_shape, cpu_allocator);
+    ASSERT_STATUS_OK(own_buffer_tensor.MakeBlockSparseStrings(values_shape, strings, indices_shape, blocksparse_indices.data()));
+    ASSERT_TRUE(own_buffer_tensor.IsDataTypeString());
+    ASSERT_EQ(own_buffer_tensor.Format(), SparseFormat::kBlockSparse);
+    ASSERT_EQ(dense_shape, own_buffer_tensor.DenseShape());
+    ASSERT_EQ(data_blocks.size(), own_buffer_tensor.NumValues());
+    ASSERT_EQ(values_shape, own_buffer_tensor.Values().Shape());
+    auto data_span = own_buffer_tensor.Values().DataAsSpan<std::string>();
+    auto expected_span = gsl::make_span(expected_strings);
+    ASSERT_EQ(expected_span.size(), data_span.size());
+    ASSERT_TRUE(std::equal(expected_span.cbegin(), expected_span.cend(), data_span.cbegin(), data_span.cend()));
+
+    const auto& indices = own_buffer_tensor.AsBlockSparse().Indices();
+    ASSERT_EQ(indices_shape, indices.Shape());
+    auto indices_span = indices.DataAsSpan<int32_t>();
+    ASSERT_TRUE(std::equal(blocksparse_indices.cbegin(), blocksparse_indices.cend(),
+                           indices_span.cbegin(), indices_span.cend()));
+
+  }
+}
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index 4c43a575b2..4c9316e6dd 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -23,6 +23,7 @@
 #include "test_fixture.h"
 #include "utils.h"
 #include "custom_op_utils.h"
+#include <gsl/gsl>
 
 #ifdef _WIN32
 #include <Windows.h>
@@ -175,6 +176,10 @@ static constexpr PATH_TYPE VARIED_INPUT_CUSTOM_OP_MODEL_URI_2 = TSTR("testdata/f
 static constexpr PATH_TYPE OPTIONAL_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI = TSTR("testdata/foo_bar_1.onnx");
 static constexpr PATH_TYPE OPTIONAL_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI_2 = TSTR("testdata/foo_bar_2.onnx");
 static constexpr PATH_TYPE CUSTOM_OP_MODEL_WITH_ATTRIBUTES_URI = TSTR("testdata/foo_bar_3.onnx");
+static constexpr PATH_TYPE SPARSE_OUTPUT_MODEL_URI = TSTR("testdata/sparse_initializer_as_output.onnx");
+#ifndef DISABLE_CONTRIB_OPS
+static constexpr PATH_TYPE SPARSE_INPUT_MATMUL_MODEL_URI = TSTR("testdata/sparse_to_dense_matmul.onnx");
+#endif
 
 #ifdef ENABLE_EXTENSION_CUSTOM_OPS
 static constexpr PATH_TYPE ORT_CUSTOM_OPS_MODEL_URI = TSTR("testdata/custom_op_string_lower.onnx");
@@ -239,6 +244,121 @@ INSTANTIATE_TEST_SUITE_P(CApiTestWithProviders,
                          CApiTestWithProvider,
                          ::testing::Values(0, 1, 2, 3, 4));
 
+TEST(CApiTest, SparseOutputModel) {
+  std::vector<int64_t> dense_shape{3, 3};
+  std::vector<float> values{1.764052391052246, 0.40015721321105957, 0.978738009929657};
+  std::vector<int64_t> values_shape{3};
+  std::vector<int64_t> coo_indices{2, 3, 5};
+  std::vector<int64_t> indices_shape{3};
+
+  std::vector<Ort::Value> ort_inputs;
+  std::vector<const char*> input_names;
+  const char* const output_names[] = {"values"};
+  Ort::Session session(*ort_env, SPARSE_OUTPUT_MODEL_URI, Ort::SessionOptions{});
+  auto ort_outputs = session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(),
+                                   output_names, 1);
+  ASSERT_EQ(ort_outputs.size(), 1U);
+  const auto& sparse_output = ort_outputs[0];
+  auto ti = sparse_output.GetTypeInfo();
+  ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+  auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+  ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+  ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, tensor_type_shape.GetElementType());
+
+  ASSERT_EQ(ORT_SPARSE_COO, sparse_output.GetSparseFormat());
+  auto values_ts = sparse_output.GetSparseTensorValuesTypeAndShapeInfo();
+  ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, values_ts.GetElementType());
+  ASSERT_EQ(values_shape, values_ts.GetShape());
+
+  const auto* values_fetch = sparse_output.GetSparseTensorValues<float>();
+  auto val_span = gsl::make_span(values_fetch, values.size());
+  ASSERT_TRUE(std::equal(values.cbegin(), values.cend(), val_span.cbegin(), val_span.cend()));
+
+  auto indices_ts = sparse_output.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_COO_INDICES);
+  ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+  ASSERT_EQ(indices_shape, indices_ts.GetShape());
+
+  size_t num_indices = 0;
+  const int64_t* indices = sparse_output.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_COO_INDICES, num_indices);
+  ASSERT_EQ(num_indices, static_cast<size_t>(indices_shape[0]));
+  auto ind_span = gsl::make_span(indices, num_indices);
+  ASSERT_TRUE(std::equal(coo_indices.cbegin(), coo_indices.cend(), ind_span.cbegin(), ind_span.cend()));
+}
+
+#ifndef DISABLE_CONTRIB_OPS
+TEST(CApiTest, SparseInputModel) {
+
+  std::vector<int64_t> common_shape{9, 9};  // inputs and outputs same shape
+  std::vector<float> A_values{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0,
+                              10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+                              18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
+                              26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0,
+                              34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0,
+                              42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0,
+                              50.0, 51.0, 52.0, 53.0};
+
+  // 2 - D index
+  std::vector<int64_t> indices_shape{gsl::narrow<int64_t>(A_values.size()), 2};
+  std::vector<int64_t> A_indices{0, 1, 0, 2, 0, 6, 0, 7, 0, 8, 1, 0, 1,
+                                 1, 1, 2, 1, 6, 1, 7, 1, 8, 2, 0, 2, 1,
+                                 2, 2, 2, 6, 2, 7, 2, 8, 3, 3, 3, 4, 3,
+                                 5, 3, 6, 3, 7, 3, 8, 4, 3, 4, 4, 4, 5,
+                                 4, 6, 4, 7, 4, 8, 5, 3, 5, 4, 5, 5, 5,
+                                 6, 5, 7, 5, 8, 6, 0, 6, 1, 6, 2, 6, 3,
+                                 6, 4, 6, 5, 7, 0, 7, 1, 7, 2, 7, 3, 7,
+                                 4, 7, 5, 8, 0, 8, 1, 8, 2, 8, 3, 8, 4,
+                                 8, 5};
+
+  std::vector<float> B_data{0, 1, 2, 0, 0, 0, 3, 4, 5,
+                            6, 7, 8, 0, 0, 0, 9, 10, 11,
+                            12, 13, 14, 0, 0, 0, 15, 16, 17,
+                            0, 0, 0, 18, 19, 20, 21, 22, 23,
+                            0, 0, 0, 24, 25, 26, 27, 28, 29,
+                            0, 0, 0, 30, 31, 32, 33, 34, 35,
+                            36, 37, 38, 39, 40, 41, 0, 0, 0,
+                            42, 43, 44, 45, 46, 47, 0, 0, 0,
+                            48, 49, 50, 51, 52, 53, 0, 0, 0};
+
+   std::vector<float> Y_result{546, 561, 576, 552, 564, 576, 39, 42, 45,
+                              1410, 1461, 1512, 1362, 1392, 1422, 201, 222, 243,
+                              2274, 2361, 2448, 2172, 2220, 2268, 363, 402, 441,
+                              2784, 2850, 2916, 4362, 4485, 4608, 1551, 1608, 1665,
+                              3540, 3624, 3708, 5604, 5763, 5922, 2037, 2112, 2187,
+                              4296, 4398, 4500, 6846, 7041, 7236, 2523, 2616, 2709,
+                              678, 789, 900, 2892, 3012, 3132, 4263, 4494, 4725,
+                              786, 915, 1044, 3324, 3462, 3600, 4911, 5178, 5445,
+                              894, 1041, 1188, 3756, 3912, 4068, 5559, 5862, 6165};
+
+   Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+   Ort::Value::Shape ort_dense_shape{common_shape.data(), common_shape.size()};
+   Ort::Value::Shape ort_values_shape{&indices_shape[0], 1U};
+   auto a_st = Ort::Value::CreateSparseTensor(info, A_values.data(), ort_dense_shape, ort_values_shape);
+   a_st.UseCooIndices(A_indices.data(), A_indices.size());
+
+   auto b_tensor = Ort::Value::CreateTensor(info, B_data.data(), B_data.size(), common_shape.data(), common_shape.size());
+
+   std::vector<Ort::Value> ort_inputs;
+   ort_inputs.push_back(std::move(a_st));
+   ort_inputs.push_back(std::move(b_tensor));
+   const char* input_names[] = {"sparse_A", "dense_B"};
+   const char* const output_names[] = {"dense_Y"};
+   Ort::Session session(*ort_env, SPARSE_INPUT_MATMUL_MODEL_URI, Ort::SessionOptions{});
+   auto ort_outputs = session.Run(Ort::RunOptions{}, input_names, ort_inputs.data(), ort_inputs.size(),
+                                  output_names, 1);
+   ASSERT_EQ(ort_outputs.size(), 1U);
+   const auto& dense_Y = ort_outputs[0];
+   ASSERT_TRUE(dense_Y.IsTensor());
+
+   auto result_ts = dense_Y.GetTensorTypeAndShapeInfo();
+   ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, result_ts.GetElementType());
+   ASSERT_EQ(common_shape, result_ts.GetShape());
+
+   const auto* result_vals = dense_Y.GetTensorData<float>();
+   auto result_span = gsl::make_span(result_vals, Y_result.size());
+   ASSERT_TRUE(std::equal(Y_result.cbegin(), Y_result.cend(), result_span.cbegin(), result_span.cend()));
+}
+#endif // DISABLE_CONTRIB_OPS
+
 TEST(CApiTest, custom_op_handler) {
   std::cout << "Running custom op inference" << std::endl;
 
diff --git a/onnxruntime/test/shared_lib/test_nontensor_types.cc b/onnxruntime/test/shared_lib/test_nontensor_types.cc
index 8110074638..232b1d8a62 100644
--- a/onnxruntime/test/shared_lib/test_nontensor_types.cc
+++ b/onnxruntime/test/shared_lib/test_nontensor_types.cc
@@ -9,6 +9,8 @@
 #include "core/session/onnxruntime_cxx_api.h"
 #include "test_allocator.h"
 
+#include <gsl/gsl>
+
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -306,3 +308,617 @@ TEST(CApiTest, TypeInfoSequence) {
   ASSERT_EQ(seq_type_info.GetSequenceElementType().GetTensorTypeAndShapeInfo().GetElementType(),
             ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64);
 }
+
+TEST(CApiTest, SparseTensorUsingAPI) {
+  Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+
+  {
+    // COO
+    const std::vector<int64_t> dense_shape{3, 3};
+    const std::vector<int64_t> values_shape{3};
+    std::vector<int32_t> expected_values = {1, 1, 1};
+    constexpr int64_t values_len = 3;
+    std::vector<int64_t> expected_linear_indices = {2, 3, 5};
+    const std::vector<int64_t> indices_shape{3};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    Ort::Value::Shape ort_values_shape{&values_len, 1U};
+    auto coo_st = Ort::Value::CreateSparseTensor(info, expected_values.data(), ort_dense_shape, ort_values_shape);
+    coo_st.UseCooIndices(expected_linear_indices.data(), expected_linear_indices.size());
+
+    {
+      auto ti = coo_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+
+    {
+      auto t_type_shape = coo_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+
+    ASSERT_EQ(ORT_SPARSE_COO, coo_st.GetSparseFormat());
+
+    {
+      auto values_ts = coo_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+    }
+
+    {
+      const auto* values = coo_st.GetSparseTensorValues<int32_t>();
+      auto val_span = gsl::make_span(values, values_shape[0]);
+      ASSERT_TRUE(std::equal(expected_values.cbegin(), expected_values.cend(), val_span.cbegin(), val_span.cend()));
+    }
+
+    {
+      auto indices_ts = coo_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_COO_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(indices_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = coo_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_COO_INDICES, num_indices);
+      ASSERT_EQ(num_indices, static_cast<size_t>(indices_shape[0]));
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_linear_indices.cbegin(), expected_linear_indices.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+
+  {
+    // CSR test
+    const std::vector<int64_t> dense_shape{3, 3};
+    const std::vector<int64_t> values_shape{3};
+    const std::vector<int64_t> inner_shape{3};
+    const std::vector<int64_t> outer_shape{4};
+    std::vector<int32_t> expected_values = {1, 1, 1};
+    const std::vector<std::string> expected_values_str = {"1", "1", "1"};
+    std::vector<int64_t> expected_inner = {2, 0, 2};
+    std::vector<int64_t> expected_outer = {0, 1, 3, 3};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    constexpr int64_t values_len = 3;
+    Ort::Value::Shape ort_values_shape{&values_len, 1U};
+    auto csr_st = Ort::Value::CreateSparseTensor(info, expected_values.data(), ort_dense_shape, ort_values_shape);
+    csr_st.UseCsrIndices(expected_inner.data(), expected_inner.size(), expected_outer.data(), expected_outer.size());
+    {
+      auto ti = csr_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+
+    {
+      auto t_type_shape = csr_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+
+    ASSERT_EQ(ORT_SPARSE_CSRC, csr_st.GetSparseFormat());
+
+    {
+      auto values_ts = csr_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+    }
+
+    {
+      const auto* values = csr_st.GetSparseTensorValues<int32_t>();
+      auto val_span = gsl::make_span(values, expected_values.size());
+      ASSERT_TRUE(std::equal(expected_values.cbegin(), expected_values.cend(), val_span.cbegin(), val_span.cend()));
+    }
+
+    {
+      auto indices_ts = csr_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_CSR_INNER_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(inner_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = csr_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_CSR_INNER_INDICES, num_indices);
+      ASSERT_EQ(num_indices, expected_inner.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_inner.cbegin(), expected_inner.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+
+    {
+      auto indices_ts = csr_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_CSR_OUTER_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(outer_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = csr_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_CSR_OUTER_INDICES, num_indices);
+      ASSERT_EQ(num_indices, expected_outer.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_outer.cbegin(), expected_outer.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+
+  {
+    // BlockSparse test
+    const std::vector<int64_t> dense_shape{8, 8};
+    constexpr int64_t block_size = 2;
+    const std::vector<int64_t> values_shape{2, block_size, block_size};
+    // Two dense blocks
+    std::vector<int32_t> data_blocks{
+        1, 2, 3, 4, 5, 6, 7, 8};
+    const std::vector<int64_t> indices_shape{2, 2};  // two blocks by two coordinates
+    // (0, 0), (0,1)
+    std::vector<int32_t> blocksparse_indices = {
+        0, 0, 0, 1};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    Ort::Value::Shape ort_values_shape{values_shape.data(), values_shape.size()};
+    auto bsp_st = Ort::Value::CreateSparseTensor(info, data_blocks.data(), ort_dense_shape, ort_values_shape);
+    bsp_st.UseBlockSparseIndices({indices_shape.data(), indices_shape.size()}, blocksparse_indices.data());
+    {
+      auto ti = bsp_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+    {
+      auto t_type_shape = bsp_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+    ASSERT_EQ(ORT_SPARSE_BLOCK_SPARSE, bsp_st.GetSparseFormat());
+    {
+      auto values_ts = bsp_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+    }
+    {
+      const auto* values = bsp_st.GetSparseTensorValues<int32_t>();
+      auto val_span = gsl::make_span(values, data_blocks.size());
+      ASSERT_TRUE(std::equal(data_blocks.cbegin(), data_blocks.cend(), val_span.cbegin(), val_span.cend()));
+    }
+    {
+      auto indices_ts = bsp_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_BLOCK_SPARSE_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, indices_ts.GetElementType());
+      ASSERT_EQ(indices_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int32_t* indices = bsp_st.GetSparseTensorIndicesData<int32_t>(ORT_SPARSE_BLOCK_SPARSE_INDICES, num_indices);
+      ASSERT_EQ(num_indices, blocksparse_indices.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(blocksparse_indices.cbegin(), blocksparse_indices.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+}
+
+TEST(CApiTest, SparseTensorFillSparseTensorFormatAPI) {
+  auto allocator = Ort::AllocatorWithDefaultOptions();
+  Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+  {
+    // COO
+    const std::vector<int64_t> dense_shape{3, 3};
+    const std::vector<int64_t> values_shape{3};
+    std::vector<int32_t> expected_values = {1, 1, 1};
+    constexpr int64_t values_len = 3;
+    std::vector<int64_t> expected_linear_indices = {2, 3, 5};
+    const std::vector<int64_t> indices_shape{3};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    auto coo_st = Ort::Value::CreateSparseTensor<int32_t>(allocator, ort_dense_shape);
+    coo_st.FillSparseTensorCoo(info, {&values_len, 1U, {expected_values.data()}},
+                               expected_linear_indices.data(), expected_linear_indices.size());
+    {
+      auto ti = coo_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+
+    {
+      auto t_type_shape = coo_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+
+    ASSERT_EQ(ORT_SPARSE_COO, coo_st.GetSparseFormat());
+
+    {
+      auto values_ts = coo_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+    }
+
+    {
+      const auto* values = coo_st.GetSparseTensorValues<int32_t>();
+      auto val_span = gsl::make_span(values, values_shape[0]);
+      ASSERT_TRUE(std::equal(expected_values.cbegin(), expected_values.cend(), val_span.cbegin(), val_span.cend()));
+    }
+
+    {
+      auto indices_ts = coo_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_COO_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(indices_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = coo_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_COO_INDICES, num_indices);
+      ASSERT_EQ(num_indices, static_cast<size_t>(indices_shape[0]));
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_linear_indices.cbegin(), expected_linear_indices.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+  {
+    // CSR test
+    const std::vector<int64_t> dense_shape{3, 3};
+    const std::vector<int64_t> values_shape{3};
+    const std::vector<int64_t> inner_shape{3};
+    const std::vector<int64_t> outer_shape{4};
+    const std::vector<int32_t> expected_values = {1, 1, 1};
+    const std::vector<int64_t> expected_inner = {2, 0, 2};
+    const std::vector<int64_t> expected_outer = {0, 1, 3, 3};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    auto csr_st = Ort::Value::CreateSparseTensor<int32_t>(allocator, ort_dense_shape);
+    csr_st.FillSparseTensorCsr(info, {values_shape.data(), values_shape.size(), {expected_values.data()}},
+                               expected_inner.data(), expected_inner.size(),
+                               expected_outer.data(), expected_outer.size());
+    {
+      auto ti = csr_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+
+    {
+      auto t_type_shape = csr_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+
+    ASSERT_EQ(ORT_SPARSE_CSRC, csr_st.GetSparseFormat());
+
+    {
+      auto values_ts = csr_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+    }
+
+    {
+      const auto* values = csr_st.GetSparseTensorValues<int32_t>();
+      auto val_span = gsl::make_span(values, expected_values.size());
+      ASSERT_TRUE(std::equal(expected_values.cbegin(), expected_values.cend(), val_span.cbegin(), val_span.cend()));
+    }
+
+    {
+      auto indices_ts = csr_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_CSR_INNER_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(inner_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = csr_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_CSR_INNER_INDICES, num_indices);
+      ASSERT_EQ(num_indices, expected_inner.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_inner.cbegin(), expected_inner.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+
+    {
+      auto indices_ts = csr_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_CSR_OUTER_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(outer_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = csr_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_CSR_OUTER_INDICES, num_indices);
+      ASSERT_EQ(num_indices, expected_outer.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_outer.cbegin(), expected_outer.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+  {
+    // BlockSparse test
+    const std::vector<int64_t> dense_shape{8, 8};
+    constexpr int64_t block_size = 2;
+    const std::vector<int64_t> values_shape{2, block_size, block_size};
+    // Two dense blocks
+    std::vector<int32_t> data_blocks{
+        1, 2, 3, 4, 5, 6, 7, 8};
+    const std::vector<int64_t> indices_shape{2, 2};  // two blocks by two coordinates
+    // (0, 0), (0,1)
+    std::vector<int32_t> blocksparse_indices = {
+        0, 0, 0, 1};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    auto bsp_st = Ort::Value::CreateSparseTensor<int32_t>(allocator, ort_dense_shape);
+    bsp_st.FillSparseTensorBlockSparse(info, {values_shape.data(), values_shape.size(), {data_blocks.data()}},
+                                       {indices_shape.data(), indices_shape.size()}, blocksparse_indices.data());
+    {
+      auto ti = bsp_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+    {
+      auto t_type_shape = bsp_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+    ASSERT_EQ(ORT_SPARSE_BLOCK_SPARSE, bsp_st.GetSparseFormat());
+    {
+      auto values_ts = bsp_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+    }
+    {
+      const auto* values = bsp_st.GetSparseTensorValues<int32_t>();
+      auto val_span = gsl::make_span(values, data_blocks.size());
+      ASSERT_TRUE(std::equal(data_blocks.cbegin(), data_blocks.cend(), val_span.cbegin(), val_span.cend()));
+    }
+    {
+      auto indices_ts = bsp_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_BLOCK_SPARSE_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, indices_ts.GetElementType());
+      ASSERT_EQ(indices_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int32_t* indices = bsp_st.GetSparseTensorIndicesData<int32_t>(ORT_SPARSE_BLOCK_SPARSE_INDICES, num_indices);
+      ASSERT_EQ(num_indices, blocksparse_indices.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(blocksparse_indices.cbegin(), blocksparse_indices.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+}
+
+TEST(CApiTest, SparseTensorFillSparseFormatStringsAPI) {
+  auto allocator = Ort::AllocatorWithDefaultOptions();
+  Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+
+  {
+    // COO
+    const std::vector<int64_t> dense_shape{3, 3};
+    const std::vector<int64_t> values_shape{3};
+    std::vector<std::string> expected_values = {"1", "1", "1"};
+    const char* const strings[] = {"1", "1", "1"};
+    constexpr int64_t values_len = 3;
+    std::vector<int64_t> expected_linear_indices = {2, 3, 5};
+    const std::vector<int64_t> indices_shape{3};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    auto coo_st = Ort::Value::CreateSparseTensor(allocator, ort_dense_shape, ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
+    coo_st.FillSparseTensorCoo(info, {&values_len, 1U, {strings}},
+                               expected_linear_indices.data(), expected_linear_indices.size());
+    {
+      auto ti = coo_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+
+    {
+      auto t_type_shape = coo_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+
+    ASSERT_EQ(ORT_SPARSE_COO, coo_st.GetSparseFormat());
+
+    {
+      auto values_ts = coo_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+
+      for (size_t i = 0; i < values_len; ++i) {
+        const auto& ex = expected_values[i];
+        size_t len = coo_st.GetStringTensorElementLength(i);
+        ASSERT_EQ(ex.size(), len);
+        auto buffer = std::make_unique<char[]>(len);
+        coo_st.GetStringTensorElement(len, i, buffer.get());
+        ASSERT_EQ(0, ex.compare(0U, ex.size(), buffer.get(), len));
+      }
+
+      size_t data_len = coo_st.GetStringTensorDataLength();
+      auto buffer = std::make_unique<char[]>(data_len);
+      auto offsets = std::make_unique<size_t[]>(expected_values.size());
+      /// XXX: Do something about this API.
+      /// Need to add N + 1 terminating offset, or skip the first zero offset
+      /// altogether and add the N + 1
+      coo_st.GetStringTensorContent(buffer.get(), data_len, offsets.get(), values_len);
+      for (size_t i = 0, limit = expected_values.size(); i < limit; ++i) {
+        const auto& ex = expected_values[i];
+        const char* p = &buffer[offsets[i]];
+        size_t len = (i == (limit - 1)) ? (data_len - offsets[i]) : offsets[i + 1] - offsets[i];
+        ASSERT_EQ(ex.size(), len);
+        std::string s(p, len);
+        ASSERT_EQ(expected_values[i], s);
+      }
+    }
+
+    {
+      auto indices_ts = coo_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_COO_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(indices_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = coo_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_COO_INDICES, num_indices);
+      ASSERT_EQ(num_indices, static_cast<size_t>(indices_shape[0]));
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_linear_indices.cbegin(), expected_linear_indices.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+  {
+    // CSR strings
+    const std::vector<int64_t> dense_shape{3, 3};
+    const std::vector<int64_t> values_shape{3};
+    const std::vector<int64_t> inner_shape{3};
+    const std::vector<int64_t> outer_shape{4};
+    const std::vector<std::string> expected_values{"1", "1", "1"};
+    const char* const strings[] = {"1", "1", "1"};
+    const std::vector<int64_t> expected_inner{2, 0, 2};
+    const std::vector<int64_t> expected_outer{0, 1, 3, 3};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    const int64_t values_len = static_cast<int64_t>(expected_values.size());
+    auto csr_st = Ort::Value::CreateSparseTensor(allocator, ort_dense_shape, ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
+    csr_st.FillSparseTensorCsr(info, {values_shape.data(), values_shape.size(), {strings}},
+                               expected_inner.data(), expected_inner.size(),
+                               expected_outer.data(), expected_outer.size());
+    {
+      auto ti = csr_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+
+    {
+      auto t_type_shape = csr_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+
+    ASSERT_EQ(ORT_SPARSE_CSRC, csr_st.GetSparseFormat());
+    {
+      auto values_ts = csr_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+
+      for (size_t i = 0; i < static_cast<size_t>(values_len); ++i) {
+        const auto& ex = expected_values[i];
+        size_t len = csr_st.GetStringTensorElementLength(i);
+        ASSERT_EQ(ex.size(), len);
+        auto buffer = std::make_unique<char[]>(len);
+        csr_st.GetStringTensorElement(len, i, buffer.get());
+        ASSERT_EQ(0, ex.compare(0U, ex.size(), buffer.get(), len));
+      }
+
+      size_t data_len = csr_st.GetStringTensorDataLength();
+      auto buffer = std::make_unique<char[]>(data_len);
+      auto offsets = std::make_unique<size_t[]>(expected_values.size());
+      /// XXX: Do something about this API.
+      /// Need to add N + 1 terminating offset, or skip the first zero offset
+      /// altogether and add the N + 1
+      csr_st.GetStringTensorContent(buffer.get(), data_len, offsets.get(), values_len);
+      for (size_t i = 0, limit = expected_values.size(); i < limit; ++i) {
+        const auto& ex = expected_values[i];
+        const char* p = &buffer[offsets[i]];
+        size_t len = (i == (limit - 1)) ? (data_len - offsets[i]) : offsets[i + 1] - offsets[i];
+        ASSERT_EQ(ex.size(), len);
+        std::string s(p, len);
+        ASSERT_EQ(ex, s);
+      }
+    }
+    {
+      auto indices_ts = csr_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_CSR_INNER_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(inner_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = csr_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_CSR_INNER_INDICES, num_indices);
+      ASSERT_EQ(num_indices, expected_inner.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_inner.cbegin(), expected_inner.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+
+    {
+      auto indices_ts = csr_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_CSR_OUTER_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
+      ASSERT_EQ(outer_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int64_t* indices = csr_st.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_CSR_OUTER_INDICES, num_indices);
+      ASSERT_EQ(num_indices, expected_outer.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(expected_outer.cbegin(), expected_outer.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+  {
+    // BlockSparse test
+    const std::vector<int64_t> dense_shape{8, 8};
+    constexpr int64_t block_size = 2;
+    const std::vector<int64_t> values_shape{2, block_size, block_size};
+    // Two dense blocks
+    const std::vector<std::string> data_blocks{
+        "1", "2", "3", "4", "5", "6", "7", "8"};
+    const char* const strings[] = {"1", "2", "3", "4", "5", "6", "7", "8"};
+    const std::vector<int64_t> indices_shape{2, 2};  // two blocks by two coordinates
+    // (0, 0), (0,1)
+    std::vector<int32_t> blocksparse_indices = {
+        0, 0, 0, 1};
+
+    Ort::Value::Shape ort_dense_shape{dense_shape.data(), dense_shape.size()};
+    auto bsp_st = Ort::Value::CreateSparseTensor(allocator, ort_dense_shape, ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
+    bsp_st.FillSparseTensorBlockSparse(info, {values_shape.data(), values_shape.size(), {strings}},
+                                       {indices_shape.data(), indices_shape.size()}, blocksparse_indices.data());
+    {
+      auto ti = bsp_st.GetTypeInfo();
+      ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
+      auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, tensor_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), tensor_type_shape.GetDimensionsCount());
+    }
+    {
+      auto t_type_shape = bsp_st.GetTensorTypeAndShapeInfo();
+      ASSERT_EQ(dense_shape, t_type_shape.GetShape());
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, t_type_shape.GetElementType());
+      ASSERT_EQ(dense_shape.size(), t_type_shape.GetDimensionsCount());
+    }
+    ASSERT_EQ(ORT_SPARSE_BLOCK_SPARSE, bsp_st.GetSparseFormat());
+    {
+      auto values_ts = bsp_st.GetSparseTensorValuesTypeAndShapeInfo();
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, values_ts.GetElementType());
+      ASSERT_EQ(values_shape, values_ts.GetShape());
+
+      for (size_t i = 0, limit = data_blocks.size(); i < limit; ++i) {
+        const auto& ex = data_blocks[i];
+        size_t len = bsp_st.GetStringTensorElementLength(i);
+        ASSERT_EQ(ex.size(), len);
+        auto buffer = std::make_unique<char[]>(len);
+        bsp_st.GetStringTensorElement(len, i, buffer.get());
+        ASSERT_EQ(0, ex.compare(0U, ex.size(), buffer.get(), len));
+      }
+
+      size_t data_len = bsp_st.GetStringTensorDataLength();
+      auto buffer = std::make_unique<char[]>(data_len);
+      /// XXX: Do something about this API.
+      /// Need to add N + 1 terminating offset, or skip the first zero offset
+      /// altogether and add the N + 1
+      auto offsets = std::make_unique<size_t[]>(data_blocks.size());
+      bsp_st.GetStringTensorContent(buffer.get(), data_len, offsets.get(), data_blocks.size());
+      for (size_t i = 0, limit = data_blocks.size(); i < limit; ++i) {
+        const auto& ex = data_blocks[i];
+        const char* p = &buffer[offsets[i]];
+        size_t len = (i == (limit - 1)) ? (data_len - offsets[i]) : offsets[i + 1] - offsets[i];
+        ASSERT_EQ(ex.size(), len);
+        std::string s(p, len);
+        ASSERT_EQ(ex, s);
+      }
+    }
+    {
+      auto indices_ts = bsp_st.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_BLOCK_SPARSE_INDICES);
+      ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, indices_ts.GetElementType());
+      ASSERT_EQ(indices_shape, indices_ts.GetShape());
+
+      size_t num_indices = 0;
+      const int32_t* indices = bsp_st.GetSparseTensorIndicesData<int32_t>(ORT_SPARSE_BLOCK_SPARSE_INDICES, num_indices);
+      ASSERT_EQ(num_indices, blocksparse_indices.size());
+      auto ind_span = gsl::make_span(indices, num_indices);
+      ASSERT_TRUE(std::equal(blocksparse_indices.cbegin(), blocksparse_indices.cend(), ind_span.cbegin(), ind_span.cend()));
+    }
+  }
+}
\ No newline at end of file
diff --git a/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_android_baseline_and_report_bin_size.sh b/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_android_baseline_and_report_bin_size.sh
index 13a0c79144..8da5048e43 100644
--- a/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_android_baseline_and_report_bin_size.sh
+++ b/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_android_baseline_and_report_bin_size.sh
@@ -29,7 +29,7 @@ python3 /onnxruntime_src/tools/ci_build/build.py \
     --include_ops_by_config /home/onnxruntimedev/.test_data/include_no_operators.config
 
 # set current size limit to BINARY_SIZE_LIMIT_IN_BYTES.
-BINARY_SIZE_LIMIT_IN_BYTES=1235000
+BINARY_SIZE_LIMIT_IN_BYTES=1255000
 echo "The current preset binary size limit is $BINARY_SIZE_LIMIT_IN_BYTES"
 python3 /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/check_build_binary_size.py \
     --threshold=$BINARY_SIZE_LIMIT_IN_BYTES \