--shm-size=1024m to fix nccl shared memory issue (#5214)

* --shm-size=256m to fix nccl shared memory issue

Co-authored-by: liqun <liqun@OrtTrainingDev4.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net>
This commit is contained in:
liqunfu 2020-09-17 17:21:47 -07:00 committed by GitHub
parent 8156e0dd10
commit f37e1292a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -107,6 +107,8 @@ fi
if [ $BUILD_DEVICE = "cpu" ] || [ $BUILD_DEVICE = "ngraph" ] || [ $BUILD_DEVICE = "openvino" ] || [ $BUILD_DEVICE = "nnapi" ] || [ $BUILD_DEVICE = "arm" ]; then
RUNTIME=
elif [[ $BUILD_EXTR_PAR = *--enable_training_python_frontend_e2e_tests* ]]; then
RUNTIME="--gpus all --shm-size=256m"
else
RUNTIME="--gpus all"
fi