From 6a7ea5c896ed458ed1308a0da9b6914b794b299c Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Wed, 15 Jan 2025 09:42:15 -0800 Subject: [PATCH] Update xnnpack, cpuinfo and pthreadpool (#23362) ### Description Update xnnpack to remove the dependency on psimd and fp16 libraries. However, coremltool still depends on them, which will be addressed later. Also, update CPUINFO because the latest xnnpack requires CPUINFO's avx10 support. ### Motivation and Context The fewer dependencies the better. --- cgmanifests/generated/cgmanifest.json | 8 ++-- cmake/deps.txt | 8 ++-- cmake/external/xnnpack.cmake | 41 +------------------ cmake/onnxruntime_providers_coreml.cmake | 2 +- .../xnnpack/AddEmscriptenAndIosSupport.patch | 24 +++++------ .../templates/download-deps.yml | 4 +- 6 files changed, 24 insertions(+), 63 deletions(-) diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json index da79918310..044588c080 100644 --- a/cgmanifests/generated/cgmanifest.json +++ b/cgmanifests/generated/cgmanifest.json @@ -136,7 +136,7 @@ "component": { "type": "git", "git": { - "commitHash": "309b75c9e56e0a674bf78d59872ce131f814dfb6", + "commitHash": "fe98e0b93565382648129271381c14d6205255e3", "repositoryUrl": "https://github.com/google/XNNPACK.git" }, "comments": "googlexnnpack" @@ -226,8 +226,8 @@ "component": { "type": "git", "git": { - "commitHash": "4fe0e1e183925bf8cfa6aae24237e724a96479b8", - "repositoryUrl": "https://github.com/Maratyszcza/pthreadpool.git" + "commitHash": "4e80ca24521aa0fb3a746f9ea9c3eaa20e9afbb0", + "repositoryUrl": "https://github.com/google/pthreadpool.git" }, "comments": "pthreadpool" } @@ -246,7 +246,7 @@ "component": { "type": "git", "git": { - "commitHash": "ca678952a9a8eaa6de112d154e8e104b22f9ab3f", + "commitHash": "8a1772a0c5c447df2d18edf33ec4603a8c9c04a6", "repositoryUrl": "https://github.com/pytorch/cpuinfo.git" }, "comments": "pytorch_cpuinfo" diff --git a/cmake/deps.txt b/cmake/deps.txt index 1082331974..d1a528bd6b 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -29,7 +29,7 @@ fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34 google_benchmark;https://github.com/google/benchmark/archive/refs/tags/v1.8.5.zip;cd47d3d272faf353600c8cc2fdec2b52d6f69177 googletest;https://github.com/google/googletest/archive/refs/tags/v1.15.0.zip;9d2d0af8d77ac726ea55d44a8fa727ec98311349 #xnnpack 2024.09.04 -googlexnnpack;https://github.com/google/XNNPACK/archive/309b75c9e56e0a674bf78d59872ce131f814dfb6.zip;39FA5259EAEACE0547284B63D5CEDC4F05553F5A +googlexnnpack;https://github.com/google/XNNPACK/archive/fe98e0b93565382648129271381c14d6205255e3.zip;14f61dcf17cec2cde34ba2dcf61d6f24bf6059f3 json;https://github.com/nlohmann/json/archive/refs/tags/v3.10.5.zip;f257f8dc27c5b8c085dc887b40cddd18ae1f725c microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf368104cd22a87b4dd0c80228919bb2df3e2a14 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5 @@ -46,9 +46,9 @@ protoc_linux_x86;https://github.com/protocolbuffers/protobuf/releases/download/v protoc_linux_aarch64;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-linux-aarch_64.zip;df9d45470b0b8cf939dd2f0ec6b88e9cafc4d617 protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-osx-universal_binary.zip;23710c3d1c2036d8d65a6a22234372fa2d7af9ef psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013 -pthreadpool;https://github.com/Maratyszcza/pthreadpool/archive/4fe0e1e183925bf8cfa6aae24237e724a96479b8.zip;07a0aa91dd9bf86f31b95497e00f31d8a261a4bd +pthreadpool;https://github.com/google/pthreadpool/archive/4e80ca24521aa0fb3a746f9ea9c3eaa20e9afbb0.zip;bd4ea65c8292801e9555b527a0ecbb2e0092c917 pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.13.1.zip;9255d5c8568debcc329dd42ed8f410ee139ac7b1 -pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/ca678952a9a8eaa6de112d154e8e104b22f9ab3f.zip;138bf57d2a110935330d1048dce6d7b82d17d377 +pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/8a1772a0c5c447df2d18edf33ec4603a8c9c04a6.zip;85bf8a60dae026b99b6ccd78606c85ed83bfb2cd re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88 safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381 @@ -59,4 +59,4 @@ composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/arch directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.7.0.zip;d0753d8d5b39947ca0729d7773cb84653a129eb1 dawn;https://github.com/google/dawn/archive/12a3b24c456cebd9fd11f23ac0164f78129b00c6.zip;ad428f6dc16f1336d584f7bad5714e1097dafc43 -kleidiai;https://gitlab.arm.com/kleidi/kleidiai/-/archive/v0.2.0/kleidiai-v0.2.0.zip;B1E3173992FD91F20DB904AB77D6E901778C2681 +kleidiai;https://gitlab.arm.com/kleidi/kleidiai/-/archive/d15722976120710080ca098fe8ddabf4556cb40f/kleidiai-d15722976120710080ca098fe8ddabf4556cb40f.zip;d6c840d00c3b05aedf06e957ddaece1013d1f40b diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index 3298c078b5..0be2c7082b 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -1,8 +1,7 @@ set(XNNPACK_USE_SYSTEM_LIBS ON CACHE INTERNAL "") set(XNNPACK_BUILD_TESTS OFF CACHE INTERNAL "") set(XNNPACK_BUILD_BENCHMARKS OFF CACHE INTERNAL "") -set(FP16_BUILD_TESTS OFF CACHE INTERNAL "") -set(FP16_BUILD_BENCHMARKS OFF CACHE INTERNAL "") + set(PTHREADPOOL_BUILD_TESTS OFF CACHE INTERNAL "") set(PTHREADPOOL_BUILD_BENCHMARKS OFF CACHE INTERNAL "") set(KLEIDIAI_BUILD_TESTS OFF CACHE INTERNAL "") @@ -17,44 +16,6 @@ if(CMAKE_ANDROID_ARCH_ABI STREQUAL armeabi-v7a) set(XNNPACK_ENABLE_ARM_BF16 OFF) endif() -# fp16 depends on psimd -FetchContent_Declare(psimd URL ${DEP_URL_psimd} URL_HASH SHA1=${DEP_SHA1_psimd}) -onnxruntime_fetchcontent_makeavailable(psimd) -set(PSIMD_SOURCE_DIR ${psimd_SOURCE_DIR}) - -block(PROPAGATE fp16_PATCH_COMMAND) - # only apply fp16 patch for Apple x86_64 targets - - if(APPLE) - if(NOT "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "") - if ("x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES) - set(fp16_PATCH_REQUIRED 1) - endif() - else() - # CMAKE_OSX_ARCHITECTURES unspecified, check host - if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - set(fp16_PATCH_REQUIRED 1) - endif() - endif() - endif() - - if(fp16_PATCH_REQUIRED) - message(STATUS "Applying fp16 patch.") - set(fp16_PATCH_FILE ${PROJECT_SOURCE_DIR}/patches/fp16/remove_math_h_dependency_from_fp16_h.patch) - set(fp16_PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${fp16_PATCH_FILE}) - else() - set(fp16_PATCH_COMMAND "") - endif() -endblock() - -FetchContent_Declare( - fp16 - URL ${DEP_URL_fp16} - URL_HASH SHA1=${DEP_SHA1_fp16} - PATCH_COMMAND ${fp16_PATCH_COMMAND} - ) -onnxruntime_fetchcontent_makeavailable(fp16) - # pthreadpool depends on fxdiv FetchContent_Declare(fxdiv URL ${DEP_URL_fxdiv} URL_HASH SHA1=${DEP_SHA1_fxdiv}) onnxruntime_fetchcontent_makeavailable(fxdiv) diff --git a/cmake/onnxruntime_providers_coreml.cmake b/cmake/onnxruntime_providers_coreml.cmake index 0aa25a221b..ec7bc7a989 100644 --- a/cmake/onnxruntime_providers_coreml.cmake +++ b/cmake/onnxruntime_providers_coreml.cmake @@ -177,7 +177,7 @@ endif() if (_enable_ML_PROGRAM) # Setup coremltools fp16 and json dependencies for creating an mlpackage. # - # These are also used by external/xnnpack.cmake. fp16 depends on psimd + # fp16 depends on psimd FetchContent_Declare(psimd URL ${DEP_URL_psimd} URL_HASH SHA1=${DEP_SHA1_psimd}) onnxruntime_fetchcontent_makeavailable(psimd) set(PSIMD_SOURCE_DIR ${psimd_SOURCE_DIR}) diff --git a/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch b/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch index 3abf2d3afe..c9cb4bcad9 100644 --- a/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch +++ b/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch @@ -1,8 +1,8 @@ diff --git a/CMakeLists.txt b/CMakeLists.txt -index 1ff85b538..c3ef2183f 100644 +index f0b3410ae..1e3cb8178 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -253,7 +253,7 @@ ENDIF() +@@ -337,7 +337,7 @@ ENDIF() # ---[ Build flags IF(NOT CMAKE_SYSTEM_NAME) MESSAGE(FATAL_ERROR "CMAKE_SYSTEM_NAME not defined") @@ -11,21 +11,21 @@ index 1ff85b538..c3ef2183f 100644 MESSAGE(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_NAME value \"${CMAKE_SYSTEM_NAME}\"") ENDIF() IF(CMAKE_SYSTEM_NAME MATCHES "Windows") -@@ -763,7 +763,12 @@ IF(XNNPACK_BUILD_LIBRARY) - TARGET_LINK_LIBRARIES(operator-run PRIVATE xnnpack-base logging) +@@ -848,7 +848,12 @@ IF(XNNPACK_BUILD_LIBRARY) TARGET_LINK_LIBRARIES(operator-utils PRIVATE xnnpack-base logging) - TARGET_LINK_LIBRARIES(subgraph PRIVATE xnnpack-base allocator logging memory mutex operators operator-run) -- TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph) + TARGET_LINK_LIBRARIES(reference-ukernels PRIVATE xnnpack-base) + TARGET_LINK_LIBRARIES(subgraph PRIVATE xnnpack-base allocator logging memory mutex operators operator-run datatype) +- TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base allocator cache hardware-config indirection memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph datatype reference-ukernels) + IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") -+ # omit microkernels-prod as the list is manually created by ORT in cmake/external/xnnpack.cmake -+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing subgraph) ++ # omit microkernels-prod as the list is manually created by ORT in cmake/external/xnnpack.cmake ++ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base allocator cache hardware-config indirection memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing subgraph datatype reference-ukernels) + ELSE() -+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph) -+ ENDIF() - TARGET_LINK_LIBRARIES(XNNPACK PUBLIC xnnpack-base) ++ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base allocator cache hardware-config indirection memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph datatype reference-ukernels) ++ ENDIF() + TARGET_LINK_LIBRARIES(XNNPACK PUBLIC pthreadpool logging) SET_TARGET_PROPERTIES(XNNPACK PROPERTIES C_EXTENSIONS YES) ENDIF() -@@ -772,7 +777,8 @@ IF(NOT MSVC) +@@ -857,7 +862,8 @@ IF(NOT MSVC) ENDIF() IF(XNNPACK_TARGET_PROCESSOR STREQUAL "arm") SET_PROPERTY(SOURCE ${ALL_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -marm ") diff --git a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml index 7e7ab29e3e..1a53ce6a42 100644 --- a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml +++ b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml @@ -11,7 +11,7 @@ steps: packageType: upack feed: '/7424c8e4-5c62-490e-95c4-79446f31017c' definition: '517c4f6f-5437-4392-a70d-4f15ec5be2f0' - version: 1.0.206 + version: 1.0.208 downloadPath: $(Build.BinariesDirectory)/deps # The private ADO project @@ -22,7 +22,7 @@ steps: packageType: upack feed: '/4c7631f5-24c0-4307-8822-1aa8f180c325' definition: 'fd9dd5ad-b73e-4678-890e-edcf680dbc1a' - version: 1.0.206 + version: 1.0.208 downloadPath: $(Build.BinariesDirectory)/deps # You can add more ADO accounts at here.