Fix build errors on Dev machines after PR #7626 merge (#7781)

* two fixes

* more Fixes

* Disable mpi by default

* Revert "Disable mpi by default"

This reverts commit 46c774ad9c6fcb0f3c1a81cd08b7d5e0ba09a985.
This commit is contained in:
ashbhandare 2021-05-21 09:45:49 -07:00 committed by GitHub
parent 7c4a5faef5
commit db0d608ff0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 7 additions and 3 deletions

View file

@ -40,6 +40,9 @@ endif()
# Needed for the provider interface, as it includes training headers when training is enabled
if (onnxruntime_ENABLE_TRAINING OR onnxruntime_ENABLE_TRAINING_OPS)
target_include_directories(onnxruntime_framework PRIVATE ${ORTTRAINING_ROOT})
if (onnxruntime_USE_NCCL OR onnxruntime_USE_MPI)
target_include_directories(onnxruntime_framework PUBLIC ${MPI_CXX_INCLUDE_DIRS})
endif()
endif()
onnxruntime_add_include_to_target(onnxruntime_framework onnxruntime_common onnx onnx_proto protobuf::libprotobuf flatbuffers)
set_target_properties(onnxruntime_framework PROPERTIES FOLDER "ONNXRuntime")

View file

@ -63,7 +63,7 @@ Status LongformerAttentionBase__CheckInputs(const LongformerAttentionBase* p, co
#include "orttraining/training_ops/cpu/loss/softmax_cross_entropy_loss.h"
#include "orttraining/training_ops/cpu/tensor/split.h"
#endif
#if defined(USE_CUDA) && defined(ORT_USE_NCCL) && defined(USE_NCCL_P2P)
#if defined(USE_CUDA) && defined(ORT_USE_NCCL)
#include "orttraining/training_ops/cuda/communication/nccl_service.h"
#include "orttraining/core/framework/distributed_run_context.h"
#endif
@ -883,7 +883,7 @@ struct ProviderHostImpl : ProviderHost {
Status contrib__PrepareForTrainingCompute(const TensorShape& input_shape, int num_outputs, int64_t& axis, int& before_dims, int& after_dims_including_split_axis, int& after_dims_excluding_split, std::vector<int64_t>& split_sizes) override { return contrib::PrepareForTrainingCompute(input_shape, num_outputs, axis, before_dims, after_dims_including_split_axis, after_dims_excluding_split, split_sizes); }
Status contrib__YieldOp__Compute(const contrib::YieldOp* p, OpKernelContext* context) override { return p->YieldOp::Compute(context); }
#if defined(ORT_USE_NCCL) && defined(USE_NCCL_P2P)
#if defined(ORT_USE_NCCL)
training::DistributedRunContext& GetDistributedRunContextInstance() override { return training::DistributedRunContext::GetInstance(); }
#endif
#endif

View file

@ -40,6 +40,7 @@ struct DeleteOnUnloadPtr {
#include <gsl/gsl>
#include <unordered_map>
#include <unordered_set>
#include <stddef.h>
#include "onnx/common/stl_backports.h"
#include "core/common/common.h"
#include "core/common/const_pointer_container.h"

View file

@ -791,7 +791,7 @@ struct ProviderHost {
virtual Status contrib__PrepareForTrainingCompute(const TensorShape& input_shape, int num_outputs, int64_t& axis, int& before_dims, int& after_dims_including_split_axis, int& after_dims_excluding_split, std::vector<int64_t>& split_sizes) = 0;
virtual Status contrib__YieldOp__Compute(const contrib::YieldOp* p, OpKernelContext* context) = 0;
#if defined(ORT_USE_NCCL) && defined(USE_NCCL_P2P)
#if defined(ORT_USE_NCCL)
virtual training::DistributedRunContext& GetDistributedRunContextInstance() = 0;
#endif
#endif