From bf00d26debc3a313bf3a8b6c5b517e6800690276 Mon Sep 17 00:00:00 2001
From: Nathan John Sircombe <nathan.sircombe@arm.com>
Date: Thu, 20 May 2021 07:42:48 -0700
Subject: [PATCH] Enables builds with Compute Library backend for oneDNN
 (#55913)

Summary:
Since v1.7, oneDNN (MKL-DNN) has supported the use of Compute Library
for the Arm architeture to provide optimised convolution primitives
on AArch64.

This change enables the use of Compute Library in the PyTorch build.
Following the approach used to enable the use of CBLAS in MKLDNN,
It is enabled by setting the env vars USE_MKLDNN and USE_MKLDNN_ACL.
The location of the Compute Library build must be set useing `ACL_ROOT_DIR`.

This is an extension of the work in https://github.com/pytorch/pytorch/pull/50400
which added support for the oneDNN/MKL-DNN backend on AArch64.

_Note: this assumes that Compute Library has been built and installed at
ACL_ROOT_DIR. Compute library can be downloaded here:
`https://github.com/ARM-software/ComputeLibrary`_

Fixes #{issue number}

Pull Request resolved: https://github.com/pytorch/pytorch/pull/55913

Reviewed By: ailzhang

Differential Revision: D28559516

Pulled By: malfet

fbshipit-source-id: 29d24996097d0a54efc9ab754fb3f0bded290005
---
 CMakeLists.txt                    |  3 +++
 cmake/Summary.cmake               |  1 +
 cmake/public/ComputeLibrary.cmake | 34 +++++++++++++++++++++++++++++++
 cmake/public/mkldnn.cmake         |  4 ++++
 setup.py                          | 11 ++++++++++
 5 files changed, 53 insertions(+)
 create mode 100644 cmake/public/ComputeLibrary.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9786d36b854..5f308a75f07 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -269,6 +269,9 @@ option(USE_ZSTD "Use ZSTD" OFF)
 cmake_dependent_option(
   USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, and AArch64." "${CPU_INTEL}"
   "CPU_INTEL OR CPU_AARCH64" OFF)
+cmake_dependent_option(
+  USE_MKLDNN_ACL "Use Compute Library for the Arm architecture." OFF
+  "USE_MKLDNN AND CPU_AARCH64" OFF)
 set(MKLDNN_ENABLE_CONCURRENT_EXEC ${USE_MKLDNN})
 cmake_dependent_option(
     USE_MKLDNN_CBLAS "Use CBLAS in MKLDNN" OFF
diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
index 795da7cc428..8ff39bc458b 100644
--- a/cmake/Summary.cmake
+++ b/cmake/Summary.cmake
@@ -131,6 +131,7 @@ function(caffe2_print_configuration_summary)
   message(STATUS "  USE_MKL               : ${CAFFE2_USE_MKL}")
   message(STATUS "  USE_MKLDNN            : ${USE_MKLDNN}")
   if(${CAFFE2_USE_MKLDNN})
+    message(STATUS "  USE_MKLDNN_ACL        : ${USE_MKLDNN_ACL}")
     message(STATUS "  USE_MKLDNN_CBLAS      : ${USE_MKLDNN_CBLAS}")
   endif()
   message(STATUS "  USE_NCCL              : ${USE_NCCL}")
diff --git a/cmake/public/ComputeLibrary.cmake b/cmake/public/ComputeLibrary.cmake
new file mode 100644
index 00000000000..d0b3b56ff53
--- /dev/null
+++ b/cmake/public/ComputeLibrary.cmake
@@ -0,0 +1,34 @@
+# Build with Compute Library backend for the Arm architecture
+# Note: Compute Library is available from: https://github.com/ARM-software/ComputeLibrary
+#   and must be built separately. The location of the Compute Library build
+#   must be set with the env var ACL_ROOT_DIR. This path will be checked later
+#   as part of FindACL.cmake in oneDNN.
+
+if(NOT USE_MKLDNN_ACL)
+    RETURN()
+endif()
+
+set(DNNL_AARCH64_USE_ACL ON CACHE BOOL "" FORCE)
+
+# Check the Compute Library version number.
+# Note: oneDNN / MKL-DNN v2.2 onwards will check the Compute Library version
+#   the version check here can be removed once PyTorch transitions to v2.2.
+set(ACL_MINIMUM_VERSION "21.02")
+
+file(GLOB_RECURSE ACL_VERSION_FILE $ENV{ACL_ROOT_DIR}/*/arm_compute_version.embed)
+
+if("${ACL_VERSION_FILE}" STREQUAL "")
+  message(WARNING "Build may fail: Could not determine ACL version (minimum required is ${ACL_MINIMUM_VERSION})")
+else()
+  file(READ ${ACL_VERSION_FILE} ACL_VERSION_STRING)
+  string(REGEX MATCH "v([0-9]+\\.[0-9]+)" ACL_VERSION ${ACL_VERSION_STRING})
+  set(ACL_VERSION "${CMAKE_MATCH_1}")
+
+  if(${ACL_VERSION} VERSION_EQUAL "0.0")
+    # Unreleased ACL versions come with version string "v0.0-unreleased", and may not be compatible with oneDNN.
+    # It is recommended to use the latest release of ACL.
+    message(WARNING "Build may fail: Using unreleased ACL version (minimum required is ${ACL_MINIMUM_VERSION})")
+  elseif(${ACL_VERSION} VERSION_LESS ${ACL_MINIMUM_VERSION})
+    message(FATAL_ERROR "Detected ACL version ${ACL_VERSION}, but minimum required is ${ACL_MINIMUM_VERSION}")
+  endif()
+endif()
diff --git a/cmake/public/mkldnn.cmake b/cmake/public/mkldnn.cmake
index 50e862a6b2e..87935625f9b 100644
--- a/cmake/public/mkldnn.cmake
+++ b/cmake/public/mkldnn.cmake
@@ -1,5 +1,9 @@
 set(MKLDNN_USE_NATIVE_ARCH ${USE_NATIVE_ARCH})
 
+if(CPU_AARCH64)
+  include(${CMAKE_CURRENT_LIST_DIR}/ComputeLibrary.cmake)
+endif()
+
 find_package(MKLDNN QUIET)
 
 if(NOT TARGET caffe2::mkldnn)
diff --git a/setup.py b/setup.py
index 9c21e01e89a..d6dff1cef0a 100644
--- a/setup.py
+++ b/setup.py
@@ -43,6 +43,10 @@
 #   USE_MKLDNN=0
 #     disables use of MKLDNN
 #
+#   USE_MKLDNN_ACL
+#     enables use of Compute Library backend for MKLDNN on Arm;
+#     USE_MKLDNN must be explicitly enabled.
+#
 #   MKLDNN_CPU_RUNTIME
 #     MKL-DNN threading mode: TBB or OMP (default)
 #
@@ -156,6 +160,9 @@
 #   NVTOOLSEXT_PATH (Windows only)
 #     specify where nvtoolsext is installed
 #
+#   ACL_ROOT_DIR
+#     specify where Compute Library is installed
+#
 #   LIBRARY_PATH
 #   LD_LIBRARY_PATH
 #     we will search for libraries in these paths
@@ -460,6 +467,10 @@ class build_ext(setuptools.command.build_ext.build_ext):
             report('-- Not using CUDA')
         if cmake_cache_vars['USE_MKLDNN']:
             report('-- Using MKLDNN')
+            if cmake_cache_vars['USE_MKLDNN_ACL']:
+                report('-- Using Compute Library for the Arm architecture with MKLDNN')
+            else:
+                report('-- Not using Compute Library for the Arm architecture with MKLDNN')
             if cmake_cache_vars['USE_MKLDNN_CBLAS']:
                 report('-- Using CBLAS in MKLDNN')
             else: