diff --git a/aten/src/ATen/Context.h b/aten/src/ATen/Context.h
index eb9d021fc21..7fad49ab661 100644
--- a/aten/src/ATen/Context.h
+++ b/aten/src/ATen/Context.h
@@ -73,6 +73,8 @@ class TORCH_API Context {
       return at::detail::getPrivateUse1Hooks();
     } else if (device_type == at::kMTIA) {
       return at::detail::getMTIAHooks();
+    } else if (device_type == at::kHIP) {
+      return at::detail::getHIPHooks();
     } else {
       AT_ERROR(
           c10::DeviceTypeName(device_type), " device type not an accelerator.");
@@ -94,8 +96,22 @@ class TORCH_API Context {
       AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled.");
     }
   }
-  static bool isPinnedPtr(const void* data) {
-    return detail::getCUDAHooks().isPinnedPtr(data);
+  bool isPinnedPtr(
+      const void* data,
+      std::optional<c10::DeviceType> device_type = std::nullopt) {
+    auto opt_device_type =
+        device_type.has_value() ? device_type.value() : at::getAccelerator();
+    if (!opt_device_type.has_value() || // there is no accelerator
+        !at::isAccelerator(
+            opt_device_type.value())) { // passed device not an accelerator
+      return false;
+    }
+    return getAcceleratorHooksInterface(opt_device_type.value())
+        .isPinnedPtr(data);
+  }
+  Allocator* getPinnedMemoryAllocator(
+      std::optional<c10::DeviceType> device_type = std::nullopt) {
+    return getAcceleratorHooksInterface(device_type).getPinnedMemoryAllocator();
   }
   static bool hasOpenMP();
   static bool hasMKL();
diff --git a/aten/src/ATen/DeviceAccelerator.cpp b/aten/src/ATen/DeviceAccelerator.cpp
index 5b093cc9cbc..18025a9962a 100644
--- a/aten/src/ATen/DeviceAccelerator.cpp
+++ b/aten/src/ATen/DeviceAccelerator.cpp
@@ -2,7 +2,7 @@
 #include <ATen/DeviceAccelerator.h>
 namespace at {
 
-C10_API std::optional<DeviceType> getAccelerator(bool checked) {
+std::optional<c10::DeviceType> getAccelerator(bool checked) {
 #define DETECT_AND_ASSIGN_ACCELERATOR(device_name) \
   if (at::has##device_name()) {                    \
     device_type = k##device_name;                  \
@@ -20,11 +20,13 @@ C10_API std::optional<DeviceType> getAccelerator(bool checked) {
     // first.
     return kPrivateUse1;
   }
-  std::optional<DeviceType> device_type = std::nullopt;
+  std::optional<c10::DeviceType> device_type = std::nullopt;
   bool is_accelerator_detected = false;
   DETECT_AND_ASSIGN_ACCELERATOR(CUDA)
   DETECT_AND_ASSIGN_ACCELERATOR(MTIA)
   DETECT_AND_ASSIGN_ACCELERATOR(XPU)
+  DETECT_AND_ASSIGN_ACCELERATOR(HIP)
+  DETECT_AND_ASSIGN_ACCELERATOR(MPS)
   if (checked) {
     TORCH_CHECK(
         device_type, "Cannot access accelerator device when none is available.")
@@ -34,4 +36,18 @@ C10_API std::optional<DeviceType> getAccelerator(bool checked) {
 #undef DETECT_AND_ASSIGN_ACCELERATOR
 }
 
+bool isAccelerator(c10::DeviceType d) {
+  switch (d) {
+    case at::kCUDA:
+    case at::kMTIA:
+    case at::kXPU:
+    case at::kHIP:
+    case at::kMPS:
+    case at::kPrivateUse1:
+      return true;
+    default:
+      return false;
+  }
+}
+
 } // namespace at
diff --git a/aten/src/ATen/DeviceAccelerator.h b/aten/src/ATen/DeviceAccelerator.h
index 3eedb9945ef..7840911bd6b 100644
--- a/aten/src/ATen/DeviceAccelerator.h
+++ b/aten/src/ATen/DeviceAccelerator.h
@@ -13,9 +13,7 @@
 // - It provides a set of common APIs as defined by AcceleratorHooksInterface
 //
 // As of today, accelerator devices are (in no particular order):
-// CUDA, MTIA, XPU, PrivateUse1
-// We want to add once all the proper APIs are supported and tested:
-// HIP, MPS
+// CUDA, MTIA, XPU, HIP, MPS, PrivateUse1
 
 namespace at {
 
@@ -24,4 +22,6 @@ namespace at {
 // When checked is true, the returned optional always has a value.
 TORCH_API std::optional<c10::DeviceType> getAccelerator(bool checked = false);
 
+TORCH_API bool isAccelerator(c10::DeviceType d);
+
 } // namespace at
diff --git a/aten/src/ATen/cuda/PinnedMemoryAllocator.cpp b/aten/src/ATen/cuda/PinnedMemoryAllocator.cpp
deleted file mode 100644
index 0c3e3782564..00000000000
--- a/aten/src/ATen/cuda/PinnedMemoryAllocator.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <ATen/cuda/PinnedMemoryAllocator.h>
-#include <ATen/Context.h>
-#include <ATen/Config.h>
-#include <ATen/TensorUtils.h>
-#include <c10/core/Storage.h>
-#include <ATen/ATen.h>
-#include <ATen/CPUFunctions.h>
-
-namespace at::native {
-
-bool is_pinned_cuda(const Tensor& self, std::optional<Device> device) {
-  TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!device.has_value() || device->is_cuda());
-  // TODO: unhook this
-  return detail::getCUDAHooks().isPinnedPtr(self.storage().data());
-}
-
-Tensor _pin_memory_cuda(const Tensor& self, std::optional<Device> device) {
-  TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!device.has_value() || device->is_cuda());
-  auto* allocator = at::cuda::getPinnedMemoryAllocator();
-  auto storage = Storage(
-      Storage::use_byte_size_t(),
-      detail::computeStorageNbytes(
-          self.sizes(), self.strides(), self.dtype().itemsize()),
-      allocator,
-      /*resizable=*/false);
-  auto tensor = at::cpu::empty({0}, self.options()).set_(storage, 0, self.sizes(), self.strides());
-  tensor.copy_(self);
-  return tensor;
-}
-
-
-} // namespace at::native
diff --git a/aten/src/ATen/detail/AcceleratorHooksInterface.h b/aten/src/ATen/detail/AcceleratorHooksInterface.h
index d36e1c3f10c..7eefdfc7269 100644
--- a/aten/src/ATen/detail/AcceleratorHooksInterface.h
+++ b/aten/src/ATen/detail/AcceleratorHooksInterface.h
@@ -2,6 +2,7 @@
 
 #include <c10/core/Device.h>
 #include <c10/core/Stream.h>
+#include <c10/core/Allocator.h>
 C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
 namespace at {
 
@@ -40,6 +41,15 @@ struct TORCH_API AcceleratorHooksInterface {
     TORCH_CHECK(false, "Backend doesn't support maybeExchangeDevice()");
     return -1;
   }
+
+  virtual bool isPinnedPtr(const void* data) const {
+    return false;
+  }
+
+  virtual Allocator* getPinnedMemoryAllocator() const {
+    TORCH_CHECK(false, "Backend doesn't support getPinnedMemoryAllocator()");
+    return nullptr;
+  }
 };
 
 } // namespace at
diff --git a/aten/src/ATen/detail/CUDAHooksInterface.h b/aten/src/ATen/detail/CUDAHooksInterface.h
index 1349a580bca..9c37f6a82d7 100644
--- a/aten/src/ATen/detail/CUDAHooksInterface.h
+++ b/aten/src/ATen/detail/CUDAHooksInterface.h
@@ -77,7 +77,7 @@ struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface {
     TORCH_CHECK(false, "Cannot get device of pointer on CUDA without ATen_cuda library. ", CUDA_HELP);
   }
 
-  virtual bool isPinnedPtr(const void* /*data*/) const {
+  virtual bool isPinnedPtr(const void* data) const override {
     return false;
   }
 
@@ -121,7 +121,7 @@ struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface {
     return -1;
   }
 
-  virtual Allocator* getPinnedMemoryAllocator() const {
+  virtual Allocator* getPinnedMemoryAllocator() const override {
     TORCH_CHECK(false, "Pinned memory requires CUDA. ", CUDA_HELP);
   }
 
diff --git a/aten/src/ATen/detail/HIPHooksInterface.h b/aten/src/ATen/detail/HIPHooksInterface.h
index 72a90ba5f9d..2d60a5f4048 100644
--- a/aten/src/ATen/detail/HIPHooksInterface.h
+++ b/aten/src/ATen/detail/HIPHooksInterface.h
@@ -6,6 +6,8 @@
 
 #include <c10/util/Registry.h>
 
+#include <ATen/detail/AcceleratorHooksInterface.h>
+
 #include <memory>
 
 namespace at {
@@ -19,7 +21,7 @@ namespace at {
 // which we may want to call into from CPU code (and thus must be dynamically
 // dispatched, to allow for separate compilation of HIP code).  See
 // CUDAHooksInterface for more detailed motivation.
-struct TORCH_API HIPHooksInterface {
+struct TORCH_API HIPHooksInterface : AcceleratorHooksInterface {
   // This should never actually be implemented, but it is used to
   // squelch -Werror=non-virtual-dtor
   virtual ~HIPHooksInterface() = default;
@@ -41,7 +43,11 @@ struct TORCH_API HIPHooksInterface {
     return -1;
   }
 
-  virtual Allocator* getPinnedMemoryAllocator() const {
+  virtual bool isPinnedPtr(const void* data) const override {
+    return false;
+  }
+
+  virtual Allocator* getPinnedMemoryAllocator() const override {
     AT_ERROR("Pinned memory requires HIP.");
   }
 
@@ -52,6 +58,10 @@ struct TORCH_API HIPHooksInterface {
   virtual int getNumGPUs() const {
     return 0;
   }
+
+  virtual bool hasPrimaryContext(DeviceIndex device_index) const override {
+    AT_ERROR("Cannot check primary context without ATen_hip library.");
+  }
 };
 
 // NB: dummy argument to suppress "ISO C++11 requires at least one argument
diff --git a/aten/src/ATen/detail/MPSHooksInterface.h b/aten/src/ATen/detail/MPSHooksInterface.h
index a06ee40c255..869bf8134ea 100644
--- a/aten/src/ATen/detail/MPSHooksInterface.h
+++ b/aten/src/ATen/detail/MPSHooksInterface.h
@@ -94,6 +94,12 @@ struct TORCH_API MPSHooksInterface : AcceleratorHooksInterface {
   bool hasPrimaryContext(DeviceIndex device_index) const override {
     FAIL_MPSHOOKS_FUNC(__func__);
   }
+  virtual bool isPinnedPtr(const void* data) const override {
+    return false;
+  }
+  virtual Allocator* getPinnedMemoryAllocator() const override {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
   #undef FAIL_MPSHOOKS_FUNC
 };
 
diff --git a/aten/src/ATen/detail/MTIAHooksInterface.h b/aten/src/ATen/detail/MTIAHooksInterface.h
index 9b93d30fcc8..c55ac3437e6 100644
--- a/aten/src/ATen/detail/MTIAHooksInterface.h
+++ b/aten/src/ATen/detail/MTIAHooksInterface.h
@@ -6,6 +6,8 @@
 #include <c10/core/Stream.h>
 #include <c10/util/Registry.h>
 
+#include <c10/core/Allocator.h>
+
 #include <ATen/detail/AcceleratorHooksInterface.h>
 
 #include <string>
@@ -88,6 +90,15 @@ struct TORCH_API MTIAHooksInterface : AcceleratorHooksInterface {
   virtual void setCurrentStream(const c10::Stream& stream) const {
     FAIL_MTIAHOOKS_FUNC(__func__);
   }
+
+  virtual bool isPinnedPtr(const void* data) const override {
+    return false;
+  }
+
+  virtual Allocator* getPinnedMemoryAllocator() const override {
+    FAIL_MTIAHOOKS_FUNC(__func__);
+    return nullptr;
+  }
 };
 
 struct TORCH_API MTIAHooksArgs {};
diff --git a/aten/src/ATen/detail/PrivateUse1HooksInterface.h b/aten/src/ATen/detail/PrivateUse1HooksInterface.h
index b6310ec66b3..0b6b84f8bf8 100644
--- a/aten/src/ATen/detail/PrivateUse1HooksInterface.h
+++ b/aten/src/ATen/detail/PrivateUse1HooksInterface.h
@@ -24,7 +24,11 @@ struct TORCH_API PrivateUse1HooksInterface : AcceleratorHooksInterface {
         "You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `getDeviceFromPtr`.");
   }
 
-  virtual Allocator* getPinnedMemoryAllocator() const {
+  virtual bool isPinnedPtr(const void* data) const override {
+    return false;
+  }
+
+  virtual Allocator* getPinnedMemoryAllocator() const override {
     TORCH_CHECK(
         false,
         "You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `getPinnedMemoryAllocator`.");
diff --git a/aten/src/ATen/detail/XPUHooksInterface.h b/aten/src/ATen/detail/XPUHooksInterface.h
index b3e1f175c27..320808907f0 100644
--- a/aten/src/ATen/detail/XPUHooksInterface.h
+++ b/aten/src/ATen/detail/XPUHooksInterface.h
@@ -58,15 +58,15 @@ struct TORCH_API XPUHooksInterface : AcceleratorHooksInterface{
     TORCH_CHECK(false, "Cannot synchronize XPU device without ATen_xpu library.");
   }
 
-  virtual Allocator* getPinnedMemoryAllocator() const  {
+  virtual Allocator* getPinnedMemoryAllocator() const override {
     TORCH_CHECK(false, "Cannot get XPU pinned memory allocator without ATen_xpu library.");
   }
 
-  virtual bool isPinnedPtr(const void* /*data*/) const {
+  virtual bool isPinnedPtr(const void* data) const override {
     return false;
   }
 
-  virtual bool hasPrimaryContext(DeviceIndex /*device_index*/) const override{
+  virtual bool hasPrimaryContext(DeviceIndex device_index) const override {
     TORCH_CHECK(false, "Cannot query primary context without ATen_xpu library.");
   }
 };
diff --git a/aten/src/ATen/mps/MPSAllocator.mm b/aten/src/ATen/mps/MPSAllocator.mm
index 87a1e2c1b18..f546d986354 100644
--- a/aten/src/ATen/mps/MPSAllocator.mm
+++ b/aten/src/ATen/mps/MPSAllocator.mm
@@ -3,8 +3,6 @@
 #include <ATen/CPUFunctions.h>
 #include <ATen/EmptyTensor.h>
 #include <ATen/mps/MPSAllocator.h>
-#include <ATen/ops/_pin_memory_native.h>
-#include <ATen/ops/is_pinned_native.h>
 #include <c10/core/Allocator.h>
 #include <c10/core/Storage.h>
 
@@ -860,31 +858,12 @@ IMPSAllocator* getIMPSAllocator(bool sharedAllocator) {
   return nullptr;
 }
 
-} // namespace at::mps
-
-namespace at::native {
-
 // torch.is_pinned() implementation
 // Pinned memory will be helpful on Apple Silicon Macs with Unified memory as we
 // will be able to use SharedStorageMode for MTLBuffer allocations. This will
 // avoid extra copies on DataLoading operations.
-bool is_pinned_mps(const Tensor& self, std::optional<Device> device) {
-  TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!device.has_value() || device->is_mps());
-  return at::mps::_getSharedAllocator().isSharedBuffer(self.storage().data());
+bool isMPSPinnedPtr(const void* data) {
+  return at::mps::_getSharedAllocator().isSharedBuffer(data);
 }
 
-// torch.pin_memory() implementation
-Tensor _pin_memory_mps(const Tensor& self, std::optional<Device> device) {
-  TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!device.has_value() || device->is_mps());
-  auto* shared_allocator = at::mps::getIMPSAllocator(true);
-  TORCH_CHECK(shared_allocator, "unable to pin memory on a non-unified memory device");
-
-  const size_t storage_size = at::detail::computeStorageNbytes(self.sizes(), self.strides(), self.dtype().itemsize());
-  std::cerr << "Pinning memory of size " << storage_size / 1024UL << " KB\n";
-  auto storage = Storage(Storage::use_byte_size_t(), storage_size, shared_allocator, false);
-  auto tensor = at::cpu::empty({0}, self.options()).set_(storage, 0, self.sizes(), self.strides());
-  tensor.copy_(self);
-  return tensor;
-}
-
-} // namespace at::native
+} // namespace at::mps
diff --git a/aten/src/ATen/mps/MPSAllocatorInterface.h b/aten/src/ATen/mps/MPSAllocatorInterface.h
index cce232fd693..9aa4769f76e 100644
--- a/aten/src/ATen/mps/MPSAllocatorInterface.h
+++ b/aten/src/ATen/mps/MPSAllocatorInterface.h
@@ -59,4 +59,6 @@ C10_DECLARE_REGISTRY(MPSAllocatorCallbacksRegistry, IMpsAllocatorCallback);
 
 IMPSAllocator* getIMPSAllocator(bool sharedAllocator = false);
 
+bool isMPSPinnedPtr(const void* data);
+
 } // namespace at::mps
diff --git a/aten/src/ATen/mps/MPSGuardImpl.h b/aten/src/ATen/mps/MPSGuardImpl.h
index 1b57d296676..cb50df2faea 100644
--- a/aten/src/ATen/mps/MPSGuardImpl.h
+++ b/aten/src/ATen/mps/MPSGuardImpl.h
@@ -68,6 +68,11 @@ struct TORCH_API MPSGuardImpl final : public c10::impl::DeviceGuardImplInterface
     return Stream(Stream::DEFAULT, Device(c10::DeviceType::MPS, 0));
   }
 
+  Stream getNewStream(Device, int priority = 0) const override {
+    (void)priority;
+    return Stream(Stream::DEFAULT, Device(c10::DeviceType::MPS, 0));
+  }
+
   Stream getDefaultStream(Device d) const override {
     return Stream(Stream::DEFAULT, Device(c10::DeviceType::MPS, 0));
   }
diff --git a/aten/src/ATen/mps/MPSHooks.h b/aten/src/ATen/mps/MPSHooks.h
index 7a9a24e8017..4858c0609f5 100644
--- a/aten/src/ATen/mps/MPSHooks.h
+++ b/aten/src/ATen/mps/MPSHooks.h
@@ -34,6 +34,8 @@ struct MPSHooks : public at::MPSHooksInterface {
   size_t getDriverAllocatedMemory() const override;
   size_t getRecommendedMaxMemory() const override;
   void setMemoryFraction(double ratio) const override;
+  bool isPinnedPtr(const void* data) const override;
+  Allocator* getPinnedMemoryAllocator() const override;
 
   // MPSProfiler interface
   void profilerStartTrace(const std::string& mode, bool waitUntilCompleted) const override;
diff --git a/aten/src/ATen/mps/MPSHooks.mm b/aten/src/ATen/mps/MPSHooks.mm
index 285c0771c3c..e5bf149e5bd 100644
--- a/aten/src/ATen/mps/MPSHooks.mm
+++ b/aten/src/ATen/mps/MPSHooks.mm
@@ -124,6 +124,14 @@ double MPSHooks::elapsedTimeOfEvents(uint32_t start_event_id, uint32_t end_event
   return at::mps::getMPSEventPool()->elapsedTime(start_event_id, end_event_id);
 }
 
+bool MPSHooks::isPinnedPtr(const void* data) const {
+  return at::mps::isMPSPinnedPtr(data);
+}
+
+Allocator* MPSHooks::getPinnedMemoryAllocator() const {
+  return at::mps::getIMPSAllocator(true);
+}
+
 using at::MPSHooksRegistry;
 using at::RegistererMPSHooksRegistry;
 
diff --git a/aten/src/ATen/native/Memory.cpp b/aten/src/ATen/native/Memory.cpp
index fefe9ab5a8d..dd69c2fd251 100644
--- a/aten/src/ATen/native/Memory.cpp
+++ b/aten/src/ATen/native/Memory.cpp
@@ -1,15 +1,21 @@
 #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
 #include <ATen/core/Tensor.h>
 #include <ATen/MemoryOverlap.h>
+#include <ATen/Context.h>
+#include <c10/core/Storage.h>
+#include <ATen/EmptyTensor.h>
 
 #ifndef AT_PER_OPERATOR_HEADERS
 #include <ATen/Functions.h>
 #include <ATen/NativeFunctions.h>
+#include <ATen/CPUFunctions.h>
 #else
 #include <ATen/ops/_debug_has_internal_overlap_native.h>
 #include <ATen/ops/_pin_memory.h>
 #include <ATen/ops/is_pinned_native.h>
 #include <ATen/ops/pin_memory_native.h>
+#include <ATen/ops/_pin_memory_native.h>
+#include <ATen/ops/empty_cpu_dispatch.h>
 #endif
 
 namespace at::native {
@@ -19,15 +25,28 @@ int64_t _debug_has_internal_overlap(const Tensor& self) {
   return static_cast<int64_t>(at::has_internal_overlap(self));
 }
 
-// Technically, we could force backends to explicitly say "no, we don't support
-// pinned memory, always return false", but this makes life a little easier when
-// you haven't loaded the backend extension at all (which can happen, e.g., on a
-// CPU build of PyTorch and you try to check if something is CUDA pinned)
-bool is_pinned_default(const Tensor& self, std::optional<Device> device) {
-  return false;
+bool is_pinned(const Tensor& self, std::optional<c10::Device> device) {
+  std::optional<c10::DeviceType> opt_device_type;
+  if (device.has_value()) {
+    TORCH_WARN_DEPRECATION(
+        "The argument 'device' of Tensor.is_pinned() ",
+        "is deprecated. Please do not pass this argument.")
+    opt_device_type = device.value().type();
+  }
+  // Only CPU tensors can be pinned
+  if (!self.is_cpu()) {
+    return false;
+  }
+  // Use getAcceleratorHooksInterface to make is_pinned device-agnostic
+  return at::globalContext().isPinnedPtr(self.storage().data(), opt_device_type);
 }
 
-Tensor pin_memory(const Tensor& self, std::optional<Device> device) {
+Tensor pin_memory(const Tensor& self, std::optional<c10::Device> device) {
+  if (device.has_value()) {
+    TORCH_WARN_DEPRECATION(
+        "The argument 'device' of Tensor.pin_memory() ",
+        "is deprecated. Please do not pass this argument.")
+  }
   // Kind of mad that I have to do two dynamic dispatches here, pretty
   // annoying
   if (self.is_pinned(device)) {
@@ -36,4 +55,21 @@ Tensor pin_memory(const Tensor& self, std::optional<Device> device) {
   return at::_pin_memory(self, device);
 }
 
+Tensor _pin_memory(const Tensor& self, std::optional<c10::Device> device) {
+  TORCH_CHECK(self.device().is_cpu(), "cannot pin '", self.toString(), "' only dense CPU tensors can be pinned");
+  // Use getAcceleratorHooksInterface to make pin_memory device-agnostic
+  auto* allocator = device.has_value()?
+      at::globalContext().getPinnedMemoryAllocator(device.value().type()):
+      at::globalContext().getPinnedMemoryAllocator();
+  auto storage = Storage(
+      Storage::use_byte_size_t(),
+      detail::computeStorageNbytes(
+          self.sizes(), self.strides(), self.dtype().itemsize()),
+      allocator,
+      /*resizable=*/false);
+  auto tensor = at::cpu::empty({0}, self.options()).set_(storage, 0, self.sizes(), self.strides());
+  tensor.copy_(self);
+  return tensor;
+}
+
 } // namespace at::native
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
index d5138fe0e52..32fef6f23fd 100644
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
@@ -4545,9 +4545,10 @@
 - func: is_pinned(Tensor self, Device? device=None) -> bool
   variants: method
   dispatch:
-    NestedTensorCUDA, CUDA: is_pinned_cuda
-    MPS: is_pinned_mps
-    CompositeExplicitAutograd: is_pinned_default
+    # the NestedTensor keys are necessary because NestedTensor has been removed
+    # from the CompositeExplicitAutograd keyset see Note [NestedTensor Not Included in Backend Keys]
+    CompositeExplicitAutograd, NestedTensorCPU: is_pinned
+    SparseCPU, SparseCsrCPU: is_pinned_sparse
 
 # TODO: add a copy kwarg that guarantees that the tensor is put into fresh
 # pinned memory
@@ -4557,9 +4558,9 @@
 # Unlike pin_memory, this is guaranteed to give a new non-aliasing tensor
 - func: _pin_memory(Tensor self, Device? device=None) -> Tensor
   dispatch:
-    CUDA: _pin_memory_cuda
-    MPS: _pin_memory_mps
-    NestedTensorCUDA, NestedTensorCPU: _pin_memory_nested
+    NestedTensorCPU: _pin_memory_nested
+    SparseCPU, SparseCsrCPU: _pin_memory_sparse
+    CompositeExplicitAutograd: _pin_memory
   autogen: _pin_memory.out
 
 - func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
diff --git a/aten/src/ATen/native/sparse/SparseUnaryOps.cpp b/aten/src/ATen/native/sparse/SparseUnaryOps.cpp
index e1e1c05b567..71bb05d95de 100644
--- a/aten/src/ATen/native/sparse/SparseUnaryOps.cpp
+++ b/aten/src/ATen/native/sparse/SparseUnaryOps.cpp
@@ -71,6 +71,8 @@
 #include <ATen/ops/threshold_backward_native.h>
 #include <ATen/ops/trunc.h>
 #include <ATen/ops/trunc_native.h>
+#include <ATen/ops/is_pinned_native.h>
+#include <ATen/ops/_pin_memory_native.h>
 #endif
 
 namespace at::native {
@@ -280,4 +282,23 @@ Tensor& nan_to_num_sparse_(
   return nan_to_num_sparse_out(self, nan, posinf, neginf, self);
 }
 
+bool is_pinned_sparse(const Tensor& self, std::optional<c10::Device> device) {
+  if (device.has_value()) {
+    TORCH_WARN_DEPRECATION(
+        "The argument 'device' of Tensor.is_pinned() ",
+        "is deprecated. Please do not pass this argument.")
+  }
+  // Currently, we don't support pin memory for sparse tensor.
+  // so always return false
+  return false;
+}
+
+Tensor _pin_memory_sparse(const Tensor& self, std::optional<c10::Device> device) {
+  // Here, we throw an error rather than return self tensor. This
+  // is because we always return the pinned memory tensor, while
+  // giving unpinned tensor might mislead users.
+  TORCH_CHECK_NOT_IMPLEMENTED(
+      false, "'aten::_pin_memory' is not implemented for sparse tensor.");
+}
+
 }  // namespace at::native
diff --git a/aten/src/ATen/templates/RegisterBackendSelect.cpp b/aten/src/ATen/templates/RegisterBackendSelect.cpp
index aab49224895..018cf358f11 100644
--- a/aten/src/ATen/templates/RegisterBackendSelect.cpp
+++ b/aten/src/ATen/templates/RegisterBackendSelect.cpp
@@ -11,8 +11,6 @@
 #ifndef AT_PER_OPERATOR_HEADERS
 #include <ATen/Operators.h>
 #else
-#include <ATen/ops/is_pinned_ops.h>
-#include <ATen/ops/_pin_memory_ops.h>
 
 ${ops_headers}
 #endif
@@ -23,31 +21,8 @@ namespace {
 
 ${backend_select_method_definitions}
 
-bool is_pinned(const Tensor& self, std::optional<at::Device> device) {
-  // Only CPU tensors can be pinned
-  if (!self.is_cpu()) {
-    return false;
-  }
-  // TODO: fetch scalar type from Tensor? But it doesn't really matter...
-  DispatchKeySet _dk = c10::DispatchKeySet(c10::computeDispatchKey(std::nullopt, self.layout(), device.value_or(at::kCUDA)));
-  return at::_ops::is_pinned::redispatch(_dk, self, device);
-}
-
-at::Tensor _pin_memory(const Tensor& self, std::optional<at::Device> device) {
-  TORCH_CHECK(self.device().is_cpu(), "cannot pin '", self.toString(), "' only dense CPU tensors can be pinned");
-  DispatchKeySet _dk = c10::DispatchKeySet(c10::computeDispatchKey(std::nullopt, self.layout(), device.value_or(at::kCUDA)));
-  if (self.is_nested()) {
-    constexpr auto nested_key_set = c10::DispatchKeySet(
-        {c10::DispatchKey::NestedTensor, c10::DispatchKey::AutogradNestedTensor});
-    _dk = _dk.add(self.key_set() & nested_key_set);
-  }
-  return at::_ops::_pin_memory::redispatch(_dk, self, device);
-}
-
 TORCH_LIBRARY_IMPL(aten, BackendSelect, m) {
   ${backend_select_function_registrations};
-  m.impl(TORCH_SELECTIVE_NAME("aten::is_pinned"), TORCH_FN(is_pinned));
-  m.impl(TORCH_SELECTIVE_NAME("aten::_pin_memory"), TORCH_FN(_pin_memory));
 }
 
 } // namespace
diff --git a/test/cpp_extensions/open_registration_extension.cpp b/test/cpp_extensions/open_registration_extension.cpp
index 99bf97833cc..cf66c38976d 100644
--- a/test/cpp_extensions/open_registration_extension.cpp
+++ b/test/cpp_extensions/open_registration_extension.cpp
@@ -418,38 +418,6 @@ at::Tensor& custom_set_source_Storage_storage_offset(at::Tensor& result,
   return result;
 }
 
-// basic dummy functions related to pin_memory.
-std::vector<void*> custom_pinned_data_ptr;
-
-at::Tensor custom__pin_memory(const at::Tensor& self, std::optional<at::Device> device) {
-  TORCH_CHECK(
-      self.device().is_cpu(),
-      "cannot pin '",
-      self.toString(),
-      "' only dense CPU tensors can be pinned");
-
-  // record pinned data ptr
-  at::Tensor dump_pinned_tensor = self * 1.0;
-  custom_pinned_data_ptr.push_back(dump_pinned_tensor.storage().data_ptr().get());
-
-  return dump_pinned_tensor;
-}
-
-bool custom_is_pinned(const at::Tensor& self, std::optional<at::Device> device) {
-  // Only CPU tensors can be pinned
-  if (!self.is_cpu()) {
-    return false;
-  }
-
-  void* query_pinned_ptr = self.storage().data_ptr().get();
-  for (const auto& iter_ptr : custom_pinned_data_ptr) {
-    if (iter_ptr == query_pinned_ptr) {
-      return true;
-    }
-  }
-  return false;
-}
-
 const at::Tensor& custom_resize_(const at::Tensor& self, at::IntArrayRef size,
                           std::optional<at::MemoryFormat> optional_memory_format) {
   at::TensorImpl* tensor_impl = self.unsafeGetTensorImpl();
@@ -545,8 +513,6 @@ TORCH_LIBRARY_IMPL(aten, PrivateUse1, m) {
   m.impl("empty_strided", &custom_empty_strided);
   m.impl("set_.source_Storage", &custom_set_source_Storage);
   m.impl("set_.source_Storage_storage_offset",&custom_set_source_Storage_storage_offset);
-  m.impl("_pin_memory", &custom__pin_memory);
-  m.impl("is_pinned", &custom_is_pinned);
   m.impl("resize_", &custom_resize_);
   m.impl("as_strided", at::native::as_strided_tensorimpl);
   m.impl("quantize_per_tensor", at::native::quantize_per_tensor);
@@ -612,6 +578,9 @@ void set_custom_device_index(c10::DeviceIndex device_index) {
   custom_device_index = device_index;
 }
 
+// a global flag used for dummy pin_memory of custom device
+bool custom_pinned_flag = false;
+
 struct FooHooksArgs : public at::PrivateUse1HooksArgs {};
 
 struct FooHooksInterface : public at::PrivateUse1HooksInterface {
@@ -621,6 +590,16 @@ struct FooHooksInterface : public at::PrivateUse1HooksInterface {
       static auto device_gen = make_generator_privateuse1(device_index);
       return device_gen;
     }
+    // this is a simple implementation, custom_pinned_flag will be set as true
+    // once tensor.pin_memory() is called. And then tensor.is_pinned()
+    // always return true no matter what tensor it's called on.
+    bool isPinnedPtr(const void* data) const override {
+      return custom_pinned_flag;
+    }
+    c10::Allocator* getPinnedMemoryAllocator() const override {
+      custom_pinned_flag = true;
+      return c10::GetCPUAllocator();
+    }
 };
 
 TORCH_DECLARE_REGISTRY(PrivateUse1HooksRegistry, FooHooksInterface, FooHooksArgs);
diff --git a/test/test_cpp_extensions_open_device_registration.py b/test/test_cpp_extensions_open_device_registration.py
index 23cd281ba56..4c9d36af1f0 100644
--- a/test/test_cpp_extensions_open_device_registration.py
+++ b/test/test_cpp_extensions_open_device_registration.py
@@ -343,71 +343,24 @@ class TestCppExtensionOpenRgistration(common.TestCase):
         cpu_tensor_pin = cpu_tensor.pin_memory("foo")
         self.assertTrue(cpu_tensor_pin.is_pinned("foo"))
 
-        # Test storage pin_memory on custom device string
+        # Test storage pin_memory and is_pin
         cpu_storage = cpu_tensor.storage()
-        foo_device = torch.device("foo")
-        self.assertFalse(cpu_storage.is_pinned("foo"))
+        # We implement a dummy pin_memory of no practical significance
+        # for custom device. Once tensor.pin_memory() has been called,
+        # then tensor.is_pinned() will always return true no matter
+        # what tensor it's called on.
+        self.assertTrue(cpu_storage.is_pinned("foo"))
 
-        cpu_storage_pin = cpu_storage.pin_memory("foo")
-        self.assertFalse(cpu_storage.is_pinned())
-        self.assertFalse(cpu_storage.is_pinned("foo"))
-        self.assertFalse(cpu_storage.is_pinned(foo_device))
-        self.assertFalse(cpu_storage_pin.is_pinned())
-        self.assertTrue(cpu_storage_pin.is_pinned("foo"))
-        self.assertTrue(cpu_storage_pin.is_pinned(foo_device))
-
-        cpu_storage_pin_already = cpu_storage_pin.pin_memory("foo")
-        self.assertTrue(cpu_storage_pin.is_pinned("foo"))
-        self.assertTrue(cpu_storage_pin.is_pinned(foo_device))
-        self.assertTrue(cpu_storage_pin_already.is_pinned("foo"))
-        self.assertTrue(cpu_storage_pin_already.is_pinned(foo_device))
-        self.assertFalse(cpu_storage.is_pinned("foo"))
-
-        cpu_storage_pinned = cpu_storage.pin_memory(foo_device)
-        self.assertFalse(cpu_storage.is_pinned())
-        self.assertFalse(cpu_storage.is_pinned("foo"))
-        self.assertFalse(cpu_storage.is_pinned(foo_device))
-        self.assertFalse(cpu_storage_pinned.is_pinned())
+        cpu_storage_pinned = cpu_storage.pin_memory("foo")
         self.assertTrue(cpu_storage_pinned.is_pinned("foo"))
-        self.assertTrue(cpu_storage_pinned.is_pinned(foo_device))
 
         # Test untyped storage pin_memory and is_pin
         cpu_tensor = torch.randn([3, 2, 1, 4])
         cpu_untyped_storage = cpu_tensor.untyped_storage()
-        self.assertFalse(cpu_untyped_storage.is_pinned())
-        self.assertFalse(cpu_untyped_storage.is_pinned("foo"))
+        self.assertTrue(cpu_untyped_storage.is_pinned("foo"))
 
         cpu_untyped_storage_pinned = cpu_untyped_storage.pin_memory("foo")
-        self.assertFalse(cpu_untyped_storage_pinned.is_pinned())
         self.assertTrue(cpu_untyped_storage_pinned.is_pinned("foo"))
-        self.assertTrue(cpu_untyped_storage_pinned.is_pinned(foo_device))
-
-        cpu_untyped_storage_pinned = cpu_untyped_storage.pin_memory(foo_device)
-        self.assertFalse(cpu_untyped_storage_pinned.is_pinned())
-        self.assertTrue(cpu_untyped_storage_pinned.is_pinned("foo"))
-        self.assertTrue(cpu_untyped_storage_pinned.is_pinned(foo_device))
-
-        with self.assertRaisesRegex(TypeError, "positional arguments but 3 were given"):
-            cpu_untyped_storage_pinned.is_pinned("foo1", "foo2")
-
-        # Test storage pin_memory on error device
-        self.assertFalse(cpu_storage_pinned.is_pinned("hpu"))
-        self.assertFalse(cpu_untyped_storage_pinned.is_pinned("hpu"))
-        invalid_device = torch.device("hpu")
-        self.assertFalse(cpu_untyped_storage_pinned.is_pinned(invalid_device))
-
-        with self.assertRaisesRegex(
-            NotImplementedError, "with arguments from the 'HPU' backend"
-        ):
-            cpu_storage.pin_memory("hpu")
-        with self.assertRaisesRegex(
-            NotImplementedError, "with arguments from the 'HPU' backend"
-        ):
-            cpu_untyped_storage.pin_memory("hpu")
-        with self.assertRaisesRegex(
-            NotImplementedError, "with arguments from the 'HPU' backend"
-        ):
-            cpu_untyped_storage.pin_memory(invalid_device)
 
     @unittest.skip(
         "Temporarily disable due to the tiny differences between clang++ and g++ in defining static variable in inline function"
diff --git a/test/test_cpp_extensions_stream_and_event.py b/test/test_cpp_extensions_stream_and_event.py
index 9f2290fea5a..c26e8b2b1a8 100644
--- a/test/test_cpp_extensions_stream_and_event.py
+++ b/test/test_cpp_extensions_stream_and_event.py
@@ -14,6 +14,7 @@ from torch.testing._internal.common_utils import (
     IS_LINUX,
     skipIfTorchDynamo,
     TEST_CUDA,
+    TEST_MPS,
     TEST_PRIVATEUSE1,
     TEST_XPU,
 )
@@ -37,7 +38,13 @@ def remove_build_path():
 # Since we use a fake MTIA device backend to test generic Stream/Event, device backends are mutual exclusive to each other.
 # The test will be skipped if any of the following conditions are met:
 @unittest.skipIf(
-    IS_ARM64 or not IS_LINUX or TEST_CUDA or TEST_XPU or TEST_PRIVATEUSE1 or TEST_ROCM,
+    IS_ARM64
+    or not IS_LINUX
+    or TEST_CUDA
+    or TEST_XPU
+    or TEST_MPS
+    or TEST_PRIVATEUSE1
+    or TEST_ROCM,
     "Only on linux platform and mutual exclusive to other backends",
 )
 @torch.testing._internal.common_utils.markDynamoStrictTest
diff --git a/test/test_torch.py b/test/test_torch.py
index 6cb28f12307..3e782eabe92 100644
--- a/test/test_torch.py
+++ b/test/test_torch.py
@@ -8432,9 +8432,7 @@ tensor([[[1.+1.j, 1.+1.j, 1.+1.j,  ..., 1.+1.j, 1.+1.j, 1.+1.j],
     def test_pin_memory(self):
         x = torch.randn(3, 5)
         self.assertFalse(x.is_pinned())
-        if not torch.cuda.is_available():
-            self.assertRaises(RuntimeError, lambda: x.pin_memory())
-        else:
+        if torch.cuda.is_available():
             pinned = x.pin_memory()
             self.assertTrue(pinned.is_pinned())
             self.assertEqual(pinned, x)
diff --git a/torch/csrc/Stream.cpp b/torch/csrc/Stream.cpp
index 179f4f1390a..cff0dce5194 100644
--- a/torch/csrc/Stream.cpp
+++ b/torch/csrc/Stream.cpp
@@ -29,7 +29,7 @@ static PyObject* THPStream_pynew(
   int64_t priority = 0;
 
   static torch::PythonArgParser parser({
-      "Steram(Device device=None, *, int64_t priority=0)",
+      "Stream(Device device=None, *, int64_t priority=0)",
       "Stream(int64_t stream_id, int64_t device_index, int64_t device_type, *, int64_t priority=0)",
   });