mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-02 03:55:34 +00:00
Install ssh in builder image, fix segfault in TrainingRunnerTest.Basic. (#5186)
This commit is contained in:
parent
400ac85565
commit
a20f8037f6
2 changed files with 5 additions and 3 deletions
|
|
@ -19,12 +19,13 @@ FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 as builder
|
|||
# set location for builds
|
||||
WORKDIR /stage
|
||||
|
||||
# install curl and git
|
||||
# install curl, git, ssh (required by MPI when running ORT tests)
|
||||
RUN apt-get -y update &&\
|
||||
apt-get -y --no-install-recommends install \
|
||||
curl \
|
||||
git \
|
||||
language-pack-en \
|
||||
openssh-client \
|
||||
unattended-upgrades
|
||||
|
||||
# update existing packages to minimize security vulnerabilities
|
||||
|
|
|
|||
|
|
@ -262,7 +262,8 @@ void NcclService::Launch() {
|
|||
{
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
// All tasks must be ready with a valid time.
|
||||
if (time_ > schedule_.size() - 1 ||
|
||||
if (schedule_.empty() ||
|
||||
time_ > schedule_.size() - 1 ||
|
||||
!schedule_[time_].IsAllTasksEqueued() ||
|
||||
schedule_[time_].IsAllTasksFinished()) {
|
||||
continue;
|
||||
|
|
@ -337,7 +338,7 @@ void NcclService::Terminate() {
|
|||
WaitForLaunch();
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
cv_.wait(lock, [this] { return total_time_ > 0 && time_ == 0; });
|
||||
cv_.wait(lock, [this] { return schedule_.empty() || total_time_ > 0 && time_ == 0; });
|
||||
}
|
||||
|
||||
is_running_ = false;
|
||||
|
|
|
|||
Loading…
Reference in a new issue