From bb4011b2b14cb2702a4922ccd0b070d9ecc49a93 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Sun, 14 Jan 2024 11:36:49 -0800 Subject: [PATCH] Set default flags nvcc and do not set default compile flags for ROCM EP (#19124) ### Description Set default flags nvcc and do not set the flags for ROCM EP. ### Motivation and Context 1. To meet a BinSkim requirement for CUDA EP. https://github.com/microsoft/binskim/blob/main/docs/BinSkimRules.md#rule-BA2024EnableSpectreMitigations 2. The ROCM EP's pipeline is broken since PR #19073 . Unit tests failed to load the EP with the following error message: Failed to load library libonnxruntime_providers_rocm.so with error: /build/Release/libonnxruntime_providers_rocm.so: undefined symbol: vtable for onnxruntime::InsertMaxPoolOutput . This PR is a hot fix to bring the pipeline back. So far I don't know why the error happened. The symbol "InsertMaxPoolOutput" is in onnxruntime_optimizers. I don't see any EP code references it directly. --- tools/ci_build/build.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 315b9a237b..0da4adb517 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1474,15 +1474,18 @@ def generate_build_tree( cflags = None cxxflags = None ldflags = None + cudaflags = [] for config in configs: # Setup default values for cflags/cxxflags/ldflags. # The values set here are purely for security and compliance purposes. ONNX Runtime should work fine without these flags. if ( "CFLAGS" not in os.environ and "CXXFLAGS" not in os.environ + and (not args.use_cuda or "CUDAFLAGS" not in os.environ) and not args.ios and not args.android and not args.build_wasm + and not args.use_rocm and not (is_linux() and platform.machine() != "aarch64" and platform.machine() != "x86_64") ): if is_windows(): @@ -1515,9 +1518,19 @@ def generate_build_tree( cxxflags = cflags.copy() if not args.disable_exceptions: cxxflags += ["/EHsc"] + if args.use_cuda: + # On Windows, nvcc passes /EHsc to the host compiler by default. + cuda_compile_flags_str = "" + for compile_flag in cflags: + if compile_flag.startswith("/D"): + cudaflags.append(compile_flag) + else: + cuda_compile_flags_str = cuda_compile_flags_str + " " + compile_flag + if len(cuda_compile_flags_str) != 0: + cudaflags.append('-Xcompiler="%s"' % cuda_compile_flags_str) elif is_linux() or is_macOS(): if is_linux(): - ldflags = ["-Wl,-Bsymbolic-functions", "-Wl,-z,relro", "-Wl,-z,now"] + ldflags = ["-Wl,-Bsymbolic-functions", "-Wl,-z,relro", "-Wl,-z,now", "-Wl,-z,noexecstack"] else: ldflags = [] if config == "Release": @@ -1560,7 +1573,8 @@ def generate_build_tree( # The following flags needs GCC 8 and newer cflags += ["-fstack-clash-protection", "-fcf-protection"] cxxflags = cflags.copy() - + if args.use_cuda: + cudaflags = cflags.copy() config_build_dir = get_config_build_dir(build_dir, config) os.makedirs(config_build_dir, exist_ok=True) if args.use_tvm: @@ -1580,6 +1594,8 @@ def generate_build_tree( "-DCMAKE_C_FLAGS=%s" % (" ".join(cflags)), "-DCMAKE_CXX_FLAGS=%s" % (" ".join(cxxflags)), ] + if cudaflags is not None and len(cudaflags) != 0: + temp_cmake_args += ["-DCMAKE_CUDA_FLAGS_INIT=%s" % (" ".join(cudaflags))] if ldflags is not None and len(ldflags) != 0: temp_cmake_args += [ "-DCMAKE_EXE_LINKER_FLAGS_INIT=%s" % (" ".join(ldflags)),