mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-26 03:00:54 +00:00
* Revert "Fix nightly CI pipeline to generate ROCm 4.2 wheels and add ROCm 4.3.1 wheels (#9101)"
This reverts commit 47888392ab.
* Add BatchNorm kernel for ROCm (#9014)
* Add BatchNorm kernel for ROCm, update BN test
* correct epsilon_ setting; limit min epsilon
* Upgrade ROCm CI pipeline for ROCm 4.3.1 and permit run inside container (#9070)
* try to run inside 4.3.1 container
* no \ in container run command
* remove networking options
* try with adding video render groups
* add job to build docker image
* try without 1st stage
* change alpha, beta to float
* try adding service connection
* retain huggingface directory
* static video and render gid
* use runtime expression for variables
* install torch-ort
* pin sacrebleu==1.5.1
* update curves for rocm 4.3.1
* try again
* disable determinism and only check tail of loss curve and with a much larger threshold of 0.05
* disable RoBERTa due to high run variablity on ROCm 4.3.1
* put reduction unit tests back in
* Fix nightly CI pipeline to generate ROCm 4.2 wheels and add ROCm 4.3.1 wheels (#9101)
* make work for both rocm 4.2 and rocm 4.3.1
* fix rocm 4.3.1 docker image reference
* fix CUDA_VERSION to ROCM_VERSION
* fix ReduceConsts conflict def
* add ifdef to miopen_common.h as well
* trailing ws
Co-authored-by: wangye <wangye@microsoft.com>
Co-authored-by: mindest <30493312+mindest@users.noreply.github.com>
42 lines
1.3 KiB
Python
Executable file
42 lines
1.3 KiB
Python
Executable file
import sys
|
|
import json
|
|
import collections
|
|
|
|
actual = sys.argv[1]
|
|
expect = sys.argv[2]
|
|
|
|
with open(actual) as file_actual:
|
|
json_actual = json.loads(file_actual.read())
|
|
|
|
with open(expect) as file_expect:
|
|
json_expect = json.loads(file_expect.read())
|
|
|
|
def almost_equal(x, y, threshold=0.05):
|
|
return abs(x-y) < threshold
|
|
|
|
# loss curve tail match
|
|
loss_tail_length = 4
|
|
loss_tail_matches = collections.deque(maxlen=loss_tail_length)
|
|
logged_steps = len(json_actual['steps'])
|
|
for i in range(logged_steps):
|
|
step_actual = json_actual['steps'][i]
|
|
step_expect = json_expect['steps'][i]
|
|
|
|
is_match = step_actual['step'] == step_expect['step']
|
|
is_match = is_match if almost_equal(step_actual['loss'], step_expect['loss']) else False
|
|
loss_tail_matches.append(is_match)
|
|
|
|
print('step {} loss actual {:.6f} expected {:.6f} match {}'.format(
|
|
step_actual['step'], step_actual['loss'], step_expect['loss'],
|
|
is_match if logged_steps - i <= loss_tail_length else 'n/a'))
|
|
|
|
success = all(loss_tail_matches)
|
|
|
|
# performance match
|
|
threshold = 0.97
|
|
is_performant = json_actual['samples_per_second'] >= threshold*json_expect['samples_per_second']
|
|
success = success if is_performant else False
|
|
print('samples_per_second actual {:.3f} expected {:.3f} in-range {}'.format(
|
|
json_actual['samples_per_second'], json_expect['samples_per_second'], is_performant))
|
|
|
|
assert(success)
|