mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-01 03:45:06 +00:00
Remove CUDA 10.2 support (#12541)
This commit is contained in:
parent
819c36701f
commit
ac7538b909
17 changed files with 23 additions and 95 deletions
|
|
@ -1245,6 +1245,9 @@ endif()
|
|||
function(onnxruntime_set_compile_flags target_name)
|
||||
target_compile_definitions(${target_name} PUBLIC EIGEN_USE_THREADS)
|
||||
if (MSVC)
|
||||
foreach(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
||||
target_compile_options(${target_name} PRIVATE "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/external:I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY}>")
|
||||
endforeach()
|
||||
target_compile_definitions(${target_name} PUBLIC -DPLATFORM_WINDOWS -DNOGDI -DNOMINMAX -D_USE_MATH_DEFINES -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS)
|
||||
if (onnxruntime_ENABLE_MEMLEAK_CHECKER)
|
||||
target_compile_definitions(${target_name} PUBLIC -DONNXRUNTIME_ENABLE_MEMLEAK_CHECK)
|
||||
|
|
@ -1794,15 +1797,7 @@ if (onnxruntime_USE_CUDA)
|
|||
set(CMAKE_CUDA_RUNTIME_LIBRARY Shared)
|
||||
enable_language(CUDA)
|
||||
message( STATUS "CMAKE_CUDA_COMPILER_VERSION: ${CMAKE_CUDA_COMPILER_VERSION}")
|
||||
if (WIN32)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
foreach(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
||||
string(APPEND CMAKE_CXX_FLAGS " /external:I\"${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY}\"")
|
||||
endforeach()
|
||||
else()
|
||||
#CUDA 10.2 on Linux doesn't support C++17
|
||||
set(CMAKE_CUDA_STANDARD 14)
|
||||
endif()
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME)
|
||||
|
||||
if (NOT CMAKE_CUDA_ARCHITECTURES)
|
||||
|
|
|
|||
|
|
@ -278,12 +278,6 @@ inline std::wstring ToWideString(const std::wstring& s) { return s; }
|
|||
inline std::string ToWideString(const std::string& s) { return s; }
|
||||
#endif
|
||||
|
||||
#if ((__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L)))
|
||||
#define ORT_IF_CONSTEXPR if constexpr
|
||||
#else
|
||||
#define ORT_IF_CONSTEXPR if
|
||||
#endif
|
||||
|
||||
constexpr size_t kMaxStrLen = 2048;
|
||||
|
||||
// Returns whether `key` is in `container`.
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ namespace onnxruntime {
|
|||
*/
|
||||
template <typename T>
|
||||
bool TryParseStringWithClassicLocale(const std::string& str, T& value) {
|
||||
ORT_IF_CONSTEXPR (std::is_integral<T>::value && std::is_unsigned<T>::value) {
|
||||
if constexpr (std::is_integral<T>::value && std::is_unsigned<T>::value) {
|
||||
// if T is unsigned integral type, reject negative values which will wrap
|
||||
if (!str.empty() && str[0] == '-') {
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ class IAllocator {
|
|||
size_t alloc_size = count_or_bytes;
|
||||
|
||||
// if T is not void, 'count_or_bytes' == number of items so allow for that
|
||||
ORT_IF_CONSTEXPR(!std::is_void<T>::value) {
|
||||
if constexpr(!std::is_void<T>::value) {
|
||||
// sizeof(void) isn't valid, but the compiler isn't smart enough to ignore that this line isn't
|
||||
// reachable if T is void. use std::conditional to 'use' void* in the sizeof call
|
||||
if (!CalcMemSizeForArray(
|
||||
|
|
|
|||
|
|
@ -498,7 +498,7 @@ class ContainerChecker {
|
|||
ORT_ENFORCE(++index < c.size(), "Sequence is missing type entry for its element");
|
||||
constexpr int32_t prim_type = ToTensorProtoElementType<T>();
|
||||
// Check if this is a primitive type and it matches
|
||||
ORT_IF_CONSTEXPR(prim_type != ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED) {
|
||||
if constexpr(prim_type != ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED) {
|
||||
return c[index].IsType(data_types_internal::ContainerType::kTensor) &&
|
||||
c[index].IsPrimType(prim_type);
|
||||
}
|
||||
|
|
@ -528,7 +528,7 @@ class ContainerChecker {
|
|||
}
|
||||
ORT_ENFORCE(++index < c.size(), "Map is missing type entry for its value");
|
||||
constexpr int32_t val_type = ToTensorProtoElementType<V>();
|
||||
ORT_IF_CONSTEXPR(val_type != ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED) {
|
||||
if constexpr(val_type != ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED) {
|
||||
return c[index].IsType(data_types_internal::ContainerType::kTensor) &&
|
||||
c[index].IsPrimType(val_type);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ struct BFloat16 {
|
|||
val = static_cast<uint16_t>((U32 + rounding_bias) >> 16);
|
||||
}
|
||||
#else
|
||||
ORT_IF_CONSTEXPR(endian::native == endian::little) {
|
||||
if constexpr(endian::native == endian::little) {
|
||||
std::memcpy(&val, reinterpret_cast<char*>(&v) + sizeof(uint16_t), sizeof(uint16_t));
|
||||
}
|
||||
else {
|
||||
|
|
@ -93,7 +93,7 @@ struct BFloat16 {
|
|||
float result;
|
||||
char* const first = reinterpret_cast<char*>(&result);
|
||||
char* const second = first + sizeof(uint16_t);
|
||||
ORT_IF_CONSTEXPR(endian::native == endian::little) {
|
||||
if constexpr(endian::native == endian::little) {
|
||||
std::memset(first, 0, sizeof(uint16_t));
|
||||
std::memcpy(second, &val, sizeof(uint16_t));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ Status CopyLittleEndian(size_t element_size_in_bytes,
|
|||
ORT_RETURN_IF(source_bytes.size_bytes() != destination_bytes.size_bytes(),
|
||||
"source and destination buffer size mismatch");
|
||||
|
||||
ORT_IF_CONSTEXPR (endian::native == endian::little) {
|
||||
if constexpr (endian::native == endian::little) {
|
||||
std::memcpy(destination_bytes.data(), source_bytes.data(), source_bytes.size_bytes());
|
||||
} else {
|
||||
SwapByteOrderCopy(element_size_in_bytes, source_bytes, destination_bytes);
|
||||
|
|
|
|||
|
|
@ -779,7 +779,7 @@ ONNXTensorElementDataType GetTensorElementType(const ONNX_NAMESPACE::TensorProto
|
|||
|
||||
ONNX_NAMESPACE::TensorProto TensorToTensorProto(const Tensor& tensor, const std::string& tensor_proto_name) {
|
||||
// Given we are using the raw_data field in the protobuf, this will work only for little-endian format.
|
||||
ORT_IF_CONSTEXPR(endian::native != endian::little) {
|
||||
if constexpr(endian::native != endian::little) {
|
||||
ORT_THROW("Big endian not supported");
|
||||
}
|
||||
|
||||
|
|
@ -1127,7 +1127,7 @@ static void SetIndices(gsl::span<int64_t> gathered_indices,
|
|||
auto* ind_dest = reinterpret_cast<T*>(raw_indices.data());
|
||||
size_t dest_index = 0;
|
||||
for (auto src_index : gathered_indices) {
|
||||
ORT_IF_CONSTEXPR(sizeof(T) == sizeof(int8_t)) {
|
||||
if constexpr(sizeof(T) == sizeof(int8_t)) {
|
||||
ind_dest[dest_index] = static_cast<T>(src_index);
|
||||
}
|
||||
else {
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ ADD_IN_TYPE_TREE_ENSEMBLE_CLASSIFIER_OP(int32_t);
|
|||
|
||||
template <typename T>
|
||||
TreeEnsembleClassifier<T>::TreeEnsembleClassifier(const OpKernelInfo& info) : OpKernel(info) {
|
||||
ORT_IF_CONSTEXPR(std::is_same<T, double>::value) {
|
||||
if constexpr(std::is_same<T, double>::value) {
|
||||
p_tree_ensemble_ = std::make_unique<detail::TreeEnsembleCommonClassifier<T, double, OutputType>>();
|
||||
}
|
||||
else {
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ ONNX_CPU_OPERATOR_TYPED_ML_KERNEL(
|
|||
|
||||
template <typename T>
|
||||
TreeEnsembleRegressor<T>::TreeEnsembleRegressor(const OpKernelInfo& info) : OpKernel(info) {
|
||||
ORT_IF_CONSTEXPR(std::is_same<T, double>::value) {
|
||||
if constexpr(std::is_same<T, double>::value) {
|
||||
p_tree_ensemble_ = std::make_unique<detail::TreeEnsembleCommon<T, double, OutputType>>();
|
||||
}
|
||||
else {
|
||||
|
|
|
|||
|
|
@ -257,7 +257,7 @@ Status Conv<T>::UpdateState(OpKernelContext* context, bool bias_expected) const
|
|||
|
||||
if (!s_.cached_benchmark_results.contains(x_dims_cudnn)) {
|
||||
// set math type to tensor core before algorithm search
|
||||
ORT_IF_CONSTEXPR(std::is_same<T, MLFloat16>::value)
|
||||
if constexpr(std::is_same<T, MLFloat16>::value)
|
||||
CUDNN_RETURN_IF_ERROR(cudnnSetConvolutionMathType(s_.conv_desc, CUDNN_TENSOR_OP_MATH));
|
||||
|
||||
cudnnConvolutionFwdAlgoPerf_t perf;
|
||||
|
|
|
|||
|
|
@ -205,7 +205,7 @@ Status ReduceKernel<allow_multi_axes>::ReduceKernelShared(
|
|||
}
|
||||
|
||||
CudnnReduceDescriptor reduce_desc;
|
||||
ORT_IF_CONSTEXPR (std::is_same<T, MLFloat16>::value)
|
||||
if constexpr (std::is_same<T, MLFloat16>::value)
|
||||
ORT_RETURN_IF_ERROR(reduce_desc.Set(cudnn_reduce_op, CudnnTensor::GetDataType<float>(), ReduceTensorIndices));
|
||||
else
|
||||
ORT_RETURN_IF_ERROR(reduce_desc.Set(cudnn_reduce_op, cudnn_type_X, ReduceTensorIndices));
|
||||
|
|
@ -524,7 +524,7 @@ Status ReduceComputeCore(CUDAExecutionProvider& cuda_ep, const Tensor& input, Pr
|
|||
}
|
||||
|
||||
CudnnReduceDescriptor reduce_desc;
|
||||
ORT_IF_CONSTEXPR (std::is_same<T, MLFloat16>::value || std::is_same<T, BFloat16>::value) {
|
||||
if constexpr (std::is_same<T, MLFloat16>::value || std::is_same<T, BFloat16>::value) {
|
||||
ORT_RETURN_IF_ERROR(reduce_desc.Set(cudnn_reduce_op, CudnnTensor::GetDataType<float>(), ReduceTensorIndices));
|
||||
} else {
|
||||
ORT_RETURN_IF_ERROR(reduce_desc.Set(cudnn_reduce_op, cudnn_type_X, ReduceTensorIndices));
|
||||
|
|
|
|||
|
|
@ -202,7 +202,7 @@ Status ReduceKernel<allow_multi_axes>::ReduceKernelShared(
|
|||
}
|
||||
|
||||
MiopenReduceDescriptor reduce_desc;
|
||||
ORT_IF_CONSTEXPR(std::is_same<T, MLFloat16>::value)
|
||||
if constexpr(std::is_same<T, MLFloat16>::value)
|
||||
ORT_RETURN_IF_ERROR(reduce_desc.Set(miopen_reduce_op, MiopenTensor::GetDataType<float>(), ReduceTensorIndices));
|
||||
else ORT_RETURN_IF_ERROR(reduce_desc.Set(miopen_reduce_op, miopen_type_X, ReduceTensorIndices));
|
||||
|
||||
|
|
@ -523,7 +523,7 @@ Status ReduceComputeCore(ROCMExecutionProvider& rocm_ep, const Tensor& input, Pr
|
|||
}
|
||||
|
||||
MiopenReduceDescriptor reduce_desc;
|
||||
ORT_IF_CONSTEXPR (std::is_same<T, MLFloat16>::value || std::is_same<T, BFloat16>::value) {
|
||||
if constexpr (std::is_same<T, MLFloat16>::value || std::is_same<T, BFloat16>::value) {
|
||||
ORT_RETURN_IF_ERROR(reduce_desc.Set(miopen_reduce_op, MiopenTensor::GetDataType<float>(), ReduceTensorIndices));
|
||||
} else {
|
||||
ORT_RETURN_IF_ERROR(reduce_desc.Set(miopen_reduce_op, miopen_type_X, ReduceTensorIndices));
|
||||
|
|
|
|||
|
|
@ -698,7 +698,7 @@ struct InsertIndices {
|
|||
std::vector<int8_t> indices_data;
|
||||
insert_indices_data(indices_1D, values_size, shape_size, indices_data, indices_tp);
|
||||
indices_tp.set_data_type(utils::ToTensorProtoElementType<T>());
|
||||
ORT_IF_CONSTEXPR(sizeof(T) == sizeof(int8_t)) {
|
||||
if constexpr(sizeof(T) == sizeof(int8_t)) {
|
||||
indices_tp.mutable_raw_data()->assign(reinterpret_cast<const char*>(indices_data.data()), indices_data.size());
|
||||
}
|
||||
else {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
#This file is for CUDA 10.2 and 11.4 even the filename just says 11
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: manylinux # The name used to reference this repository in the checkout step
|
||||
|
|
@ -9,63 +7,6 @@ resources:
|
|||
ref: a8099af1b3e25f0489717ad9c4f9a2e25a8c5b36
|
||||
|
||||
jobs:
|
||||
- job: Linux_Build_CUDA10_NV6
|
||||
timeoutInMinutes: 180
|
||||
workspace:
|
||||
clean: all
|
||||
pool: Onnxruntime-Linux-GPU-NV6
|
||||
steps:
|
||||
- checkout: self
|
||||
clean: true
|
||||
submodules: recursive
|
||||
|
||||
- template: templates/get-docker-image-steps.yml
|
||||
parameters:
|
||||
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11
|
||||
Context: tools/ci_build/github/linux/docker
|
||||
DockerBuildArgs: "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg BASEIMAGE=nvcr.io/nvidia/cuda:10.2-cudnn8-devel-centos7 --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-8/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-8/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-8/root/usr/lib64:/opt/rh/devtoolset-8/root/usr/lib:/opt/rh/devtoolset-8/root/usr/lib64/dyninst:/opt/rh/devtoolset-8/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )"
|
||||
Repository: onnxruntimegpubuild
|
||||
- task: CmdLine@2
|
||||
inputs:
|
||||
script: |
|
||||
mkdir -p $HOME/.onnx
|
||||
docker run --gpus all -e CC=/opt/rh/devtoolset-8/root/usr/bin/cc -e CXX=/opt/rh/devtoolset-8/root/usr/bin/c++ -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" --rm \
|
||||
--volume /data/onnx:/data/onnx:ro \
|
||||
--volume $(Build.SourcesDirectory):/onnxruntime_src \
|
||||
--volume $(Build.BinariesDirectory):/build \
|
||||
--volume /data/models:/build/models:ro \
|
||||
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
|
||||
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
|
||||
-e NIGHTLY_BUILD \
|
||||
-e BUILD_BUILDNUMBER \
|
||||
onnxruntimegpubuild \
|
||||
/opt/python/cp37-cp37m/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
|
||||
--build_dir /build --cmake_generator Ninja \
|
||||
--config Release \
|
||||
--skip_submodule_sync \
|
||||
--build_shared_lib \
|
||||
--parallel \
|
||||
--build_wheel \
|
||||
--enable_onnx_tests --use_cuda --cuda_version=10.2 --cuda_home=/usr/local/cuda-10.2 --cudnn_home=/usr/local/cuda-10.2 \
|
||||
--enable_pybind --build_java --build_nodejs \
|
||||
--cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-8/root/usr/bin/cc CMAKE_CUDA_ARCHITECTURES=52
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: PublishTestResults@2
|
||||
displayName: 'Publish unit test results'
|
||||
inputs:
|
||||
testResultsFiles: '**/*.results.xml'
|
||||
searchFolder: '$(Build.BinariesDirectory)'
|
||||
testRunTitle: 'Unit Test Run'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
- template: templates/component-governance-component-detection-steps.yml
|
||||
parameters:
|
||||
condition: 'succeeded'
|
||||
|
||||
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
|
||||
displayName: 'Clean Agent Directories'
|
||||
condition: always()
|
||||
|
||||
- job: Linux_Build_CUDA11_NV6
|
||||
timeoutInMinutes: 180
|
||||
workspace:
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ jobs:
|
|||
MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary'
|
||||
OnnxRuntimeBuildDirectory: '$(Build.BinariesDirectory)'
|
||||
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
|
||||
EnvSetupScript: setup_env_cuda.bat
|
||||
EnvSetupScript: setup_env_cuda_11.bat
|
||||
buildArch: x64
|
||||
setVcvars: true
|
||||
timeoutInMinutes: 120
|
||||
|
|
@ -52,7 +52,7 @@ jobs:
|
|||
displayName: 'Build and test'
|
||||
inputs:
|
||||
scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py'
|
||||
arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_generator "Visual Studio 16 2019" --build_wheel --use_cuda --cuda_version=10.2 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2" --cudnn_home="C:\local\cudnn-10.2-windows10-x64-v8.0.3.33\cuda" --cmake_extra_defines CMAKE_SYSTEM_VERSION=10.0.18362.0 "CMAKE_CUDA_ARCHITECTURES=52" --include_ops_by_config="$(Build.SourcesDirectory)\onnxruntime\test\testdata\required_ops.config"'
|
||||
arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_generator "Visual Studio 16 2019" --build_wheel --use_cuda --cuda_version=11.4 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4" --cudnn_home="C:\local\cudnn-11.4-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines CMAKE_SYSTEM_VERSION=10.0.18362.0 "CMAKE_CUDA_ARCHITECTURES=52" --include_ops_by_config="$(Build.SourcesDirectory)\onnxruntime\test\testdata\required_ops.config"'
|
||||
workingDirectory: '$(Build.BinariesDirectory)'
|
||||
|
||||
- template: templates/component-governance-component-detection-steps.yml
|
||||
|
|
|
|||
|
|
@ -1,2 +0,0 @@
|
|||
set PATH=C:\azcopy;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\extras\CUPTI\lib64;%PATH%
|
||||
set GRADLE_OPTS=-Dorg.gradle.daemon=false
|
||||
Loading…
Reference in a new issue