From ee74c2e5bec8beacbf1ec87f10fe4e726a920c26 Mon Sep 17 00:00:00 2001
From: Xiang Gao <qasdfgtyuiop@gmail.com>
Date: Tue, 18 Aug 2020 09:46:42 -0700
Subject: [PATCH] Compress fatbin to fit into 32bit indexing (#43074)

Summary:
Fixes https://github.com/pytorch/pytorch/issues/39968

tested with `TORCH_CUDA_ARCH_LIST='3.5 5.2 6.0 6.1 7.0 7.5 8.0+PTX'`, before this PR, it was failing, and with this  PR, the build succeed.

With `TORCH_CUDA_ARCH_LIST='7.0 7.5 8.0+PTX'`, `libtorch_cuda.so` with symbols changes from 2.9GB -> 2.2GB

cc: ptrblck mcarilli jjsjann123

Pull Request resolved: https://github.com/pytorch/pytorch/pull/43074

Reviewed By: mrshenli

Differential Revision: D23176095

Pulled By: malfet

fbshipit-source-id: 7b3e6d049fc080e519f21e80df05ef68e7bea57e
---
 CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 90dcd9696d0..aeb87eaa7e3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -357,6 +357,10 @@ if(MSVC)
   list(APPEND CUDA_NVCC_FLAGS "-Xcompiler /w -w")
 endif(MSVC)
 
+list(APPEND CUDA_NVCC_FLAGS "-Xfatbin" "-compress-all")
+list(APPEND CUDA_NVCC_FLAGS_DEBUG "-Xfatbin" "-compress-all")
+list(APPEND CUDA_NVCC_FLAGS_RELWITHDEBINFO "-Xfatbin" "-compress-all")
+
 if(NOT MSVC)
   list(APPEND CUDA_NVCC_FLAGS_DEBUG "-g" "-lineinfo" "--source-in-ptx")
   list(APPEND CUDA_NVCC_FLAGS_RELWITHDEBINFO "-g" "-lineinfo" "--source-in-ptx")