mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-26 22:35:43 +00:00
Merge orttraining and ortmodule single gpu ci pipelines (#8022)
* Merge orttraining and ortmodule single gpu ci pipelines * Remove Debug from orttrainer build config
This commit is contained in:
parent
4d1b48632c
commit
b2ed4fb0a4
5 changed files with 56 additions and 67 deletions
|
|
@ -2548,7 +2548,7 @@ def test_primitive_inputs(bool_argument, int_argument, float_argument):
|
|||
input1 = torch.randn(N, D_in, device=device)
|
||||
pt_out = pt_model(input1, bool_argument, int_argument, float_argument)
|
||||
ort_out = ort_model(input1, bool_argument, int_argument, float_argument)
|
||||
assert torch.equal(pt_out, ort_out)
|
||||
_test_helpers.assert_values_are_close(pt_out, ort_out)
|
||||
|
||||
@pytest.mark.parametrize("bool_arguments", [(True, False), (False, True)])
|
||||
def test_changing_bool_input_re_exports_model(bool_arguments):
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ jobs:
|
|||
SubmoduleCheckoutMode: 'recursive'
|
||||
RunDockerBuildArgs: >
|
||||
-o ubuntu20.04 -p 3.8 -d gpu -r $(Build.BinariesDirectory)
|
||||
-t onnxruntime_orttraining_ortmodule_tests_image
|
||||
-e
|
||||
-x "
|
||||
--enable_training
|
||||
--config $(buildConfig)
|
||||
|
|
@ -16,6 +18,10 @@ jobs:
|
|||
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=70
|
||||
"
|
||||
DoNugetPack: 'false'
|
||||
RunInjectedPipeline: 'true'
|
||||
InjectedPipeline: 'orttraining-linux-gpu-ortmodule-test-ci-pipeline.yml'
|
||||
DockerImageTag: 'onnxruntime_orttraining_ortmodule_tests_image'
|
||||
BuildConfig: $(buildConfig)
|
||||
ArtifactName: 'drop-linux'
|
||||
TimeoutInMinutes: 120
|
||||
# Enable unreleased onnx opsets in CI builds
|
||||
|
|
@ -24,8 +30,6 @@ jobs:
|
|||
Strategy:
|
||||
maxParallel: 2
|
||||
matrix:
|
||||
Debug:
|
||||
buildConfig: Debug
|
||||
Release:
|
||||
buildConfig: Release
|
||||
|
||||
|
|
|
|||
|
|
@ -1,64 +0,0 @@
|
|||
trigger: none
|
||||
|
||||
jobs:
|
||||
- job: Onnxruntime_Linux_GPU_ORTModule_Test
|
||||
|
||||
timeoutInMinutes: 120
|
||||
pool: 'Linux-Single-GPU-V100'
|
||||
|
||||
steps:
|
||||
- checkout: self
|
||||
clean: true
|
||||
submodules: recursive
|
||||
|
||||
- template: templates/run-docker-build-steps.yml
|
||||
parameters:
|
||||
RunDockerBuildArgs: |
|
||||
-o ubuntu20.04 -p 3.8 -p 3.8 -d gpu -r $(Build.BinariesDirectory) \
|
||||
-t onnxruntime_ortmodule_tests_image \
|
||||
-x " \
|
||||
--config RelWithDebInfo \
|
||||
--enable_training \
|
||||
--update --build \
|
||||
--build_wheel --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=70 \
|
||||
" \
|
||||
-u \
|
||||
-e
|
||||
DisplayName: 'Build'
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/mnist" -d "/mnist"
|
||||
displayName: 'Mount MNIST'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/bert-data" -d "/bert_data"
|
||||
displayName: 'Mount bert-data'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
|
||||
displayName: 'Mount hf-models-cache'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
# Entry point for all ORTModule tests
|
||||
# The onnxruntime folder is deleted in the build directory
|
||||
# to enforce use of the onnxruntime wheel
|
||||
- script: |
|
||||
docker run \
|
||||
--gpus all \
|
||||
--shm-size=1024m \
|
||||
--rm \
|
||||
--volume $(Build.SourcesDirectory):/onnxruntime_src \
|
||||
--volume $(Build.BinariesDirectory):/build \
|
||||
--volume /mnist:/mnist \
|
||||
--volume /bert_data:/bert_data \
|
||||
--volume /hf_models_cache:/hf_models_cache \
|
||||
onnxruntime_ortmodule_tests_image \
|
||||
bash -c "python3 -m pip install /build/RelWithDebInfo/dist/onnxruntime*.whl ; rm -rf /build/RelWithDebInfo/onnxruntime/ ; /build/RelWithDebInfo/launch_test.py --cmd_line_with_args 'python orttraining_ortmodule_tests.py --mnist /mnist --bert_data /bert_data/hf_data/glue_data/CoLA/original/raw --transformers_cache /hf_models_cache/huggingface/transformers' --cwd /build/RelWithDebInfo" \
|
||||
displayName: 'Run orttraining_ortmodule_tests.py'
|
||||
condition: succeededOrFailed()
|
||||
timeoutInMinutes: 60
|
||||
|
||||
- template: templates/component-governance-component-detection-steps.yml
|
||||
parameters:
|
||||
condition: 'succeeded'
|
||||
|
||||
- template: templates/clean-agent-build-directory-step.yml
|
||||
|
|
@ -6,6 +6,10 @@ parameters:
|
|||
DoNodejsPack: 'false'
|
||||
DoNugetPack: 'false'
|
||||
NuPackScript: ''
|
||||
RunInjectedPipeline: 'false'
|
||||
InjectedPipeline: ''
|
||||
DockerImageTag: ''
|
||||
BuildConfig: ''
|
||||
ArtifactName: 'drop-linux'
|
||||
TimeoutInMinutes: 120
|
||||
# Controls whether unreleased onnx opsets are allowed. Default is set to 1
|
||||
|
|
@ -64,6 +68,12 @@ jobs:
|
|||
inputs:
|
||||
artifactName: ${{ parameters.ArtifactName }}
|
||||
targetPath: '$(Build.ArtifactStagingDirectory)'
|
||||
- ${{ if eq(parameters['RunInjectedPipeline'], 'true') }}:
|
||||
- template: |
|
||||
${{ parameters.InjectedPipeline }}
|
||||
parameters:
|
||||
DockerImageTag: ${{ parameters.DockerImageTag }}
|
||||
BuildConfig: ${{ parameters.BuildConfig }}
|
||||
- template: component-governance-component-detection-steps.yml
|
||||
parameters :
|
||||
condition : 'succeeded'
|
||||
|
|
|
|||
|
|
@ -0,0 +1,39 @@
|
|||
parameters:
|
||||
- name: DockerImageTag
|
||||
type: string
|
||||
- name: BuildConfig
|
||||
type: string
|
||||
|
||||
steps:
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/mnist" -d "/mnist"
|
||||
displayName: 'Mount MNIST'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/bert-data" -d "/bert_data"
|
||||
displayName: 'Mount bert-data'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
|
||||
displayName: 'Mount hf-models-cache'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
# Entry point for all ORTModule tests
|
||||
# The onnxruntime folder is deleted in the build directory
|
||||
# to enforce use of the onnxruntime wheel
|
||||
# Uninstall orttraining requirements.txt and install ortmodule requirements.txt before running tests.
|
||||
- script: |
|
||||
docker run \
|
||||
--gpus all \
|
||||
--shm-size=1024m \
|
||||
--rm \
|
||||
--volume $(Build.SourcesDirectory):/onnxruntime_src \
|
||||
--volume $(Build.BinariesDirectory)/${{ parameters.BuildConfig }}:/build \
|
||||
--volume /mnist:/mnist \
|
||||
--volume /bert_data:/bert_data \
|
||||
--volume /hf_models_cache:/hf_models_cache \
|
||||
${{ parameters.DockerImageTag }} \
|
||||
bash -c "python3 -m pip uninstall -y -r /onnxruntime_src/tools/ci_build/github/linux/docker/scripts/training/requirements.txt && python3 -m pip install -r /onnxruntime_src/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch_cu11.1.txt && python3 -m pip install -r /onnxruntime_src/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage2/requirements.txt && python3 -m pip install /build/dist/onnxruntime*.whl && rm -rf /build/onnxruntime/ && /build/launch_test.py --cmd_line_with_args 'python orttraining_ortmodule_tests.py --mnist /mnist --bert_data /bert_data/hf_data/glue_data/CoLA/original/raw --transformers_cache /hf_models_cache/huggingface/transformers' --cwd /build" \
|
||||
displayName: 'Run orttraining_ortmodule_tests.py'
|
||||
condition: succeededOrFailed()
|
||||
timeoutInMinutes: 60
|
||||
Loading…
Reference in a new issue