enable memory tracker metrics for npu (#27280)

2026-05-14 20:58:08 +00:00 · 2023-11-06 21:44:21 +08:00 · 2023-11-06 21:44:21 +08:00 · 1ffc4dee5b
commit 1ffc4dee5b
parent d7dcfa8917
2 changed files with 18 additions and 3 deletions
--- a/src/transformers/trainer_utils.py
+++ b/src/transformers/trainer_utils.py
@ -459,6 +459,11 @@ class TrainerMemoryTracker:
        elif is_torch_xpu_available():
            import torch

+            self.torch = torch
+            self.gpu = {}
+        elif is_torch_npu_available():
+            import torch
+
            self.torch = torch
            self.gpu = {}
        else:
@ -517,6 +522,9 @@ class TrainerMemoryTracker:
            elif is_torch_xpu_available():
                self.torch.xpu.reset_peak_memory_stats()
                self.torch.xpu.empty_cache()
+            elif is_torch_npu_available():
+                self.torch.npu.reset_peak_memory_stats()
+                self.torch.npu.empty_cache()

        # gpu
        if self.torch is not None:
@ -524,6 +532,8 @@ class TrainerMemoryTracker:
                self.gpu_mem_used_at_start = self.torch.cuda.memory_allocated()
            elif is_torch_xpu_available():
                self.gpu_mem_used_at_start = self.torch.xpu.memory_allocated()
+            elif is_torch_npu_available():
+                self.gpu_mem_used_at_start = self.torch.npu.memory_allocated()

        # cpu
        self.cpu_mem_used_at_start = self.cpu_mem_used()
@ -551,6 +561,8 @@ class TrainerMemoryTracker:
                self.torch.cuda.empty_cache()
            elif is_torch_xpu_available():
                self.torch.xpu.empty_cache()
+            elif is_torch_npu_available():
+                self.torch.npu.empty_cache()

        # concepts:
        # - alloc_delta:  the difference of allocated memory between the end and the start
@ -565,6 +577,9 @@ class TrainerMemoryTracker:
            elif is_torch_xpu_available():
                self.gpu_mem_used_now = self.torch.xpu.memory_allocated()
                self.gpu_mem_used_peak = self.torch.xpu.max_memory_allocated()
+            elif is_torch_npu_available():
+                self.gpu_mem_used_now = self.torch.npu.memory_allocated()
+                self.gpu_mem_used_peak = self.torch.npu.max_memory_allocated()
            else:
                raise ValueError("No available GPU device found!")

--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@ -1944,18 +1944,18 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
        metrics = trainer.train().metrics
        check_func("init_mem_cpu_alloc_delta", metrics)
        check_func("train_mem_cpu_alloc_delta", metrics)
-        if torch.cuda.device_count() > 0:
+        if backend_device_count(torch_device) > 0:
            check_func("init_mem_gpu_alloc_delta", metrics)
            check_func("train_mem_gpu_alloc_delta", metrics)

        metrics = trainer.evaluate()
        check_func("eval_mem_cpu_alloc_delta", metrics)
-        if torch.cuda.device_count() > 0:
+        if backend_device_count(torch_device) > 0:
            check_func("eval_mem_gpu_alloc_delta", metrics)

        metrics = trainer.predict(RegressionDataset()).metrics
        check_func("test_mem_cpu_alloc_delta", metrics)
-        if torch.cuda.device_count() > 0:
+        if backend_device_count(torch_device) > 0:
            check_func("test_mem_gpu_alloc_delta", metrics)

    def test_mem_metrics(self):