From a8cbc4269cedfff492eb417106d313a3429d8dbd Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 24 Sep 2020 14:10:26 -0700 Subject: [PATCH] [fsmt] build/test scripts (#7257) Co-authored-by: Sam Shleifer --- scripts/fsmt/convert-allenai-wmt16.sh | 2 +- scripts/fsmt/convert-allenai-wmt19.sh | 2 +- scripts/fsmt/convert-facebook-wmt19.sh | 2 +- scripts/fsmt/eval-allenai-wmt16.sh | 2 +- scripts/fsmt/eval-allenai-wmt19.sh | 2 +- scripts/fsmt/eval-facebook-wmt19.sh | 2 +- scripts/fsmt/s3-move.sh | 103 +++++++++++++++++++++++++ scripts/fsmt/tests-to-run.sh | 6 ++ 8 files changed, 115 insertions(+), 6 deletions(-) create mode 100644 scripts/fsmt/s3-move.sh create mode 100755 scripts/fsmt/tests-to-run.sh diff --git a/scripts/fsmt/convert-allenai-wmt16.sh b/scripts/fsmt/convert-allenai-wmt16.sh index 549919032..5f45c883e 100755 --- a/scripts/fsmt/convert-allenai-wmt16.sh +++ b/scripts/fsmt/convert-allenai-wmt16.sh @@ -1,4 +1,4 @@ -#/usr/bin/env bash +#!/usr/bin/env bash # this script acquires data and converts it to fsmt model # it covers: diff --git a/scripts/fsmt/convert-allenai-wmt19.sh b/scripts/fsmt/convert-allenai-wmt19.sh index 3ece67d21..25f1fec94 100755 --- a/scripts/fsmt/convert-allenai-wmt19.sh +++ b/scripts/fsmt/convert-allenai-wmt19.sh @@ -1,4 +1,4 @@ -#/usr/bin/env bash +#!/usr/bin/env bash # this script acquires data and converts it to fsmt model # it covers: diff --git a/scripts/fsmt/convert-facebook-wmt19.sh b/scripts/fsmt/convert-facebook-wmt19.sh index 89621ae6f..6edf51d2a 100755 --- a/scripts/fsmt/convert-facebook-wmt19.sh +++ b/scripts/fsmt/convert-facebook-wmt19.sh @@ -1,4 +1,4 @@ -#/usr/bin/env bash +#!/usr/bin/env bash # this script acquires data and converts it to fsmt model # it covers: diff --git a/scripts/fsmt/eval-allenai-wmt16.sh b/scripts/fsmt/eval-allenai-wmt16.sh index 513245a48..4f6705a67 100755 --- a/scripts/fsmt/eval-allenai-wmt16.sh +++ b/scripts/fsmt/eval-allenai-wmt16.sh @@ -1,4 +1,4 @@ -#/usr/bin/env bash +#!/usr/bin/env bash # this script evals the following fsmt models # it covers: diff --git a/scripts/fsmt/eval-allenai-wmt19.sh b/scripts/fsmt/eval-allenai-wmt19.sh index 07da60b26..b4b7205a5 100755 --- a/scripts/fsmt/eval-allenai-wmt19.sh +++ b/scripts/fsmt/eval-allenai-wmt19.sh @@ -1,4 +1,4 @@ -#/usr/bin/env bash +#!/usr/bin/env bash # this script evals the following fsmt models # it covers: diff --git a/scripts/fsmt/eval-facebook-wmt19.sh b/scripts/fsmt/eval-facebook-wmt19.sh index a47051489..ab197e173 100755 --- a/scripts/fsmt/eval-facebook-wmt19.sh +++ b/scripts/fsmt/eval-facebook-wmt19.sh @@ -1,4 +1,4 @@ -#/usr/bin/env bash +#!/usr/bin/env bash # this script evals the following fsmt models # it covers: diff --git a/scripts/fsmt/s3-move.sh b/scripts/fsmt/s3-move.sh new file mode 100644 index 000000000..6c1e3eb16 --- /dev/null +++ b/scripts/fsmt/s3-move.sh @@ -0,0 +1,103 @@ + +# this is the process of uploading the updated models to s3. As I can't upload them directly to the correct orgs, this script shows how this is done + +1. upload updated models to my account + +transformers-cli upload -y wmt19-ru-en +transformers-cli upload -y wmt19-en-ru +transformers-cli upload -y wmt19-de-en +transformers-cli upload -y wmt19-en-de +transformers-cli upload -y wmt19-de-en-6-6-base +transformers-cli upload -y wmt19-de-en-6-6-big +transformers-cli upload -y wmt16-en-de-dist-12-1 +transformers-cli upload -y wmt16-en-de-dist-6-1 +transformers-cli upload -y wmt16-en-de-12-1 + + +2. ask someone to move them to: + +* to facebook: "wmt19-ru-en", "wmt19-en-ru", "wmt19-en-de", "wmt19-de-en" +* to allenai: "wmt16-en-de-dist-12-1", "wmt16-en-de-dist-6-1", "wmt16-en-de-12-1", "wmt19-de-en-6-6-base", "wmt19-de-en-6-6-big" + +export b="s3://models.huggingface.co/bert" +stas_to_fb () { + src=$1 + shift + aws s3 sync $b/stas/$src $b/facebook/$src $@ +} + +stas_to_allenai () { + src=$1 + shift + aws s3 sync $b/stas/$src $b/allenai/$src $@ +} + +stas_to_fb wmt19-en-ru +stas_to_fb wmt19-ru-en +stas_to_fb wmt19-en-de +stas_to_fb wmt19-de-en + +stas_to_allenai wmt16-en-de-dist-12-1 +stas_to_allenai wmt16-en-de-dist-6-1 +stas_to_allenai wmt16-en-de-6-1 +stas_to_allenai wmt16-en-de-12-1 +stas_to_allenai wmt19-de-en-6-6-base +stas_to_allenai wmt19-de-en-6-6-big + + +3. and then remove all these model files from my account + +transformers-cli s3 rm wmt16-en-de-12-1/config.json +transformers-cli s3 rm wmt16-en-de-12-1/merges.txt +transformers-cli s3 rm wmt16-en-de-12-1/pytorch_model.bin +transformers-cli s3 rm wmt16-en-de-12-1/tokenizer_config.json +transformers-cli s3 rm wmt16-en-de-12-1/vocab-src.json +transformers-cli s3 rm wmt16-en-de-12-1/vocab-tgt.json +transformers-cli s3 rm wmt16-en-de-dist-12-1/config.json +transformers-cli s3 rm wmt16-en-de-dist-12-1/merges.txt +transformers-cli s3 rm wmt16-en-de-dist-12-1/pytorch_model.bin +transformers-cli s3 rm wmt16-en-de-dist-12-1/tokenizer_config.json +transformers-cli s3 rm wmt16-en-de-dist-12-1/vocab-src.json +transformers-cli s3 rm wmt16-en-de-dist-12-1/vocab-tgt.json +transformers-cli s3 rm wmt16-en-de-dist-6-1/config.json +transformers-cli s3 rm wmt16-en-de-dist-6-1/merges.txt +transformers-cli s3 rm wmt16-en-de-dist-6-1/pytorch_model.bin +transformers-cli s3 rm wmt16-en-de-dist-6-1/tokenizer_config.json +transformers-cli s3 rm wmt16-en-de-dist-6-1/vocab-src.json +transformers-cli s3 rm wmt16-en-de-dist-6-1/vocab-tgt.json +transformers-cli s3 rm wmt19-de-en-6-6-base/config.json +transformers-cli s3 rm wmt19-de-en-6-6-base/merges.txt +transformers-cli s3 rm wmt19-de-en-6-6-base/pytorch_model.bin +transformers-cli s3 rm wmt19-de-en-6-6-base/tokenizer_config.json +transformers-cli s3 rm wmt19-de-en-6-6-base/vocab-src.json +transformers-cli s3 rm wmt19-de-en-6-6-base/vocab-tgt.json +transformers-cli s3 rm wmt19-de-en-6-6-big/config.json +transformers-cli s3 rm wmt19-de-en-6-6-big/merges.txt +transformers-cli s3 rm wmt19-de-en-6-6-big/pytorch_model.bin +transformers-cli s3 rm wmt19-de-en-6-6-big/tokenizer_config.json +transformers-cli s3 rm wmt19-de-en-6-6-big/vocab-src.json +transformers-cli s3 rm wmt19-de-en-6-6-big/vocab-tgt.json +transformers-cli s3 rm wmt19-de-en/config.json +transformers-cli s3 rm wmt19-de-en/merges.txt +transformers-cli s3 rm wmt19-de-en/pytorch_model.bin +transformers-cli s3 rm wmt19-de-en/tokenizer_config.json +transformers-cli s3 rm wmt19-de-en/vocab-src.json +transformers-cli s3 rm wmt19-de-en/vocab-tgt.json +transformers-cli s3 rm wmt19-en-de/config.json +transformers-cli s3 rm wmt19-en-de/merges.txt +transformers-cli s3 rm wmt19-en-de/pytorch_model.bin +transformers-cli s3 rm wmt19-en-de/tokenizer_config.json +transformers-cli s3 rm wmt19-en-de/vocab-src.json +transformers-cli s3 rm wmt19-en-de/vocab-tgt.json +transformers-cli s3 rm wmt19-en-ru/config.json +transformers-cli s3 rm wmt19-en-ru/merges.txt +transformers-cli s3 rm wmt19-en-ru/pytorch_model.bin +transformers-cli s3 rm wmt19-en-ru/tokenizer_config.json +transformers-cli s3 rm wmt19-en-ru/vocab-src.json +transformers-cli s3 rm wmt19-en-ru/vocab-tgt.json +transformers-cli s3 rm wmt19-ru-en/config.json +transformers-cli s3 rm wmt19-ru-en/merges.txt +transformers-cli s3 rm wmt19-ru-en/pytorch_model.bin +transformers-cli s3 rm wmt19-ru-en/tokenizer_config.json +transformers-cli s3 rm wmt19-ru-en/vocab-src.json +transformers-cli s3 rm wmt19-ru-en/vocab-tgt.json diff --git a/scripts/fsmt/tests-to-run.sh b/scripts/fsmt/tests-to-run.sh new file mode 100755 index 000000000..d3a74fd76 --- /dev/null +++ b/scripts/fsmt/tests-to-run.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +# these scripts need to be run before any changes to FSMT-related code - it should cover all bases + +USE_CUDA=0 RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py +USE_CUDA=1 RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py