mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-26 22:35:43 +00:00
Implement CloudEP for hybrid inferencing. The PR introduces zero new API, customers could configure session and run options to do inferencing with Azure [triton endpoint.](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-with-triton?tabs=azure-cli%2Cendpoint) Sample configuration in python be like: ``` sess_opt.add_session_config_entry('cloud.endpoint_type', 'triton'); sess_opt.add_session_config_entry('cloud.uri', 'https://cloud.com'); sess_opt.add_session_config_entry('cloud.model_name', 'detection2'); sess_opt.add_session_config_entry('cloud.model_version', '7'); // optional, default 1 sess_opt.add_session_config_entry('cloud.verbose', '1'); // optional, default '0', meaning no verbose ... run_opt.add_run_config_entry('use_cloud', '1') # 0 for local inferencing, 1 for cloud endpoint. run_opt.add_run_config_entry('cloud.auth_key', '...') ... sess.run(None, {'input':input_}, run_opt) ``` Co-authored-by: Randy Shuai <rashuai@microsoft.com>
71 lines
2.2 KiB
YAML
71 lines
2.2 KiB
YAML
parameters:
|
|
- name: arch
|
|
type: string
|
|
|
|
- name: machine_pool
|
|
type: string
|
|
|
|
- name: base_image
|
|
type: string
|
|
|
|
- name: devtoolset_rootpath
|
|
type: string
|
|
|
|
- name: ld_library_path_arg
|
|
type: string
|
|
|
|
- name: prepend_path
|
|
type: string
|
|
|
|
- name: device
|
|
type: string
|
|
default: '-d CPU'
|
|
|
|
jobs:
|
|
- job: Linux_py_Wheels_${{ parameters.arch }}
|
|
timeoutInMinutes: 240
|
|
workspace:
|
|
clean: all
|
|
pool: ${{ parameters.machine_pool }}
|
|
variables:
|
|
# The build machine pool doesn't have dotnet, so it can't run CG.
|
|
skipComponentGovernanceDetection: true
|
|
steps:
|
|
- checkout: self
|
|
clean: true
|
|
submodules: none
|
|
|
|
- template: set-nightly-build-option-variable-step.yml
|
|
|
|
- template: get-docker-image-steps.yml
|
|
parameters:
|
|
Dockerfile: tools/ci_build/github/linux/docker/inference/x64/python/cpu/Dockerfile.manylinux2014_cpu
|
|
Context: tools/ci_build/github/linux/docker/inference/x64/python/cpu
|
|
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u ) --build-arg BASEIMAGE=${{ parameters.base_image }} --build-arg PLATFORM=${{ parameters.arch }} --build-arg PREPEND_PATH=${{ parameters.prepend_path }} --build-arg LD_LIBRARY_PATH_ARG=${{ parameters.ld_library_path_arg }} --build-arg DEVTOOLSET_ROOTPATH=${{ parameters.devtoolset_rootpath }}"
|
|
Repository: onnxruntimecpubuilpython${{ parameters.arch }}
|
|
${{ if eq(parameters.arch, 'aarch64') }}:
|
|
UpdateDepsTxt: false
|
|
|
|
- task: Bash@3
|
|
displayName: 'Build Python Wheel'
|
|
inputs:
|
|
targetType: filePath
|
|
filePath: tools/ci_build/github/linux/run_python_dockerbuild.sh
|
|
arguments: -i onnxruntimecpubuilpython${{ parameters.arch }} -x "${{ parameters.device }}"
|
|
|
|
- task: PublishBuildArtifacts@1
|
|
displayName: 'Publish Artifact: ONNXRuntime python wheel'
|
|
inputs:
|
|
PathtoPublish: '$(Build.BinariesDirectory)/dist'
|
|
ArtifactName: onnxruntime
|
|
|
|
- task: PublishPipelineArtifact@0
|
|
displayName: 'Publish Test Binaries'
|
|
inputs:
|
|
artifactName: 'drop-linux-cpu-${{ parameters.arch }}'
|
|
targetPath: '$(Build.BinariesDirectory)/Release'
|
|
|
|
|
|
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
|
|
displayName: 'Clean Agent Directories'
|
|
condition: always()
|