[fsmt] build/test scripts (#7257)

Co-authored-by: Sam Shleifer <sshleifer@gmail.com>
This commit is contained in:
Stas Bekman 2020-09-24 14:10:26 -07:00 committed by GitHub
parent a8e7982f84
commit a8cbc4269c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 115 additions and 6 deletions

View file

@ -1,4 +1,4 @@
#/usr/bin/env bash
#!/usr/bin/env bash
# this script acquires data and converts it to fsmt model
# it covers:

View file

@ -1,4 +1,4 @@
#/usr/bin/env bash
#!/usr/bin/env bash
# this script acquires data and converts it to fsmt model
# it covers:

View file

@ -1,4 +1,4 @@
#/usr/bin/env bash
#!/usr/bin/env bash
# this script acquires data and converts it to fsmt model
# it covers:

View file

@ -1,4 +1,4 @@
#/usr/bin/env bash
#!/usr/bin/env bash
# this script evals the following fsmt models
# it covers:

View file

@ -1,4 +1,4 @@
#/usr/bin/env bash
#!/usr/bin/env bash
# this script evals the following fsmt models
# it covers:

View file

@ -1,4 +1,4 @@
#/usr/bin/env bash
#!/usr/bin/env bash
# this script evals the following fsmt models
# it covers:

103
scripts/fsmt/s3-move.sh Normal file
View file

@ -0,0 +1,103 @@
# this is the process of uploading the updated models to s3. As I can't upload them directly to the correct orgs, this script shows how this is done
1. upload updated models to my account
transformers-cli upload -y wmt19-ru-en
transformers-cli upload -y wmt19-en-ru
transformers-cli upload -y wmt19-de-en
transformers-cli upload -y wmt19-en-de
transformers-cli upload -y wmt19-de-en-6-6-base
transformers-cli upload -y wmt19-de-en-6-6-big
transformers-cli upload -y wmt16-en-de-dist-12-1
transformers-cli upload -y wmt16-en-de-dist-6-1
transformers-cli upload -y wmt16-en-de-12-1
2. ask someone to move them to:
* to facebook: "wmt19-ru-en", "wmt19-en-ru", "wmt19-en-de", "wmt19-de-en"
* to allenai: "wmt16-en-de-dist-12-1", "wmt16-en-de-dist-6-1", "wmt16-en-de-12-1", "wmt19-de-en-6-6-base", "wmt19-de-en-6-6-big"
export b="s3://models.huggingface.co/bert"
stas_to_fb () {
src=$1
shift
aws s3 sync $b/stas/$src $b/facebook/$src $@
}
stas_to_allenai () {
src=$1
shift
aws s3 sync $b/stas/$src $b/allenai/$src $@
}
stas_to_fb wmt19-en-ru
stas_to_fb wmt19-ru-en
stas_to_fb wmt19-en-de
stas_to_fb wmt19-de-en
stas_to_allenai wmt16-en-de-dist-12-1
stas_to_allenai wmt16-en-de-dist-6-1
stas_to_allenai wmt16-en-de-6-1
stas_to_allenai wmt16-en-de-12-1
stas_to_allenai wmt19-de-en-6-6-base
stas_to_allenai wmt19-de-en-6-6-big
3. and then remove all these model files from my account
transformers-cli s3 rm wmt16-en-de-12-1/config.json
transformers-cli s3 rm wmt16-en-de-12-1/merges.txt
transformers-cli s3 rm wmt16-en-de-12-1/pytorch_model.bin
transformers-cli s3 rm wmt16-en-de-12-1/tokenizer_config.json
transformers-cli s3 rm wmt16-en-de-12-1/vocab-src.json
transformers-cli s3 rm wmt16-en-de-12-1/vocab-tgt.json
transformers-cli s3 rm wmt16-en-de-dist-12-1/config.json
transformers-cli s3 rm wmt16-en-de-dist-12-1/merges.txt
transformers-cli s3 rm wmt16-en-de-dist-12-1/pytorch_model.bin
transformers-cli s3 rm wmt16-en-de-dist-12-1/tokenizer_config.json
transformers-cli s3 rm wmt16-en-de-dist-12-1/vocab-src.json
transformers-cli s3 rm wmt16-en-de-dist-12-1/vocab-tgt.json
transformers-cli s3 rm wmt16-en-de-dist-6-1/config.json
transformers-cli s3 rm wmt16-en-de-dist-6-1/merges.txt
transformers-cli s3 rm wmt16-en-de-dist-6-1/pytorch_model.bin
transformers-cli s3 rm wmt16-en-de-dist-6-1/tokenizer_config.json
transformers-cli s3 rm wmt16-en-de-dist-6-1/vocab-src.json
transformers-cli s3 rm wmt16-en-de-dist-6-1/vocab-tgt.json
transformers-cli s3 rm wmt19-de-en-6-6-base/config.json
transformers-cli s3 rm wmt19-de-en-6-6-base/merges.txt
transformers-cli s3 rm wmt19-de-en-6-6-base/pytorch_model.bin
transformers-cli s3 rm wmt19-de-en-6-6-base/tokenizer_config.json
transformers-cli s3 rm wmt19-de-en-6-6-base/vocab-src.json
transformers-cli s3 rm wmt19-de-en-6-6-base/vocab-tgt.json
transformers-cli s3 rm wmt19-de-en-6-6-big/config.json
transformers-cli s3 rm wmt19-de-en-6-6-big/merges.txt
transformers-cli s3 rm wmt19-de-en-6-6-big/pytorch_model.bin
transformers-cli s3 rm wmt19-de-en-6-6-big/tokenizer_config.json
transformers-cli s3 rm wmt19-de-en-6-6-big/vocab-src.json
transformers-cli s3 rm wmt19-de-en-6-6-big/vocab-tgt.json
transformers-cli s3 rm wmt19-de-en/config.json
transformers-cli s3 rm wmt19-de-en/merges.txt
transformers-cli s3 rm wmt19-de-en/pytorch_model.bin
transformers-cli s3 rm wmt19-de-en/tokenizer_config.json
transformers-cli s3 rm wmt19-de-en/vocab-src.json
transformers-cli s3 rm wmt19-de-en/vocab-tgt.json
transformers-cli s3 rm wmt19-en-de/config.json
transformers-cli s3 rm wmt19-en-de/merges.txt
transformers-cli s3 rm wmt19-en-de/pytorch_model.bin
transformers-cli s3 rm wmt19-en-de/tokenizer_config.json
transformers-cli s3 rm wmt19-en-de/vocab-src.json
transformers-cli s3 rm wmt19-en-de/vocab-tgt.json
transformers-cli s3 rm wmt19-en-ru/config.json
transformers-cli s3 rm wmt19-en-ru/merges.txt
transformers-cli s3 rm wmt19-en-ru/pytorch_model.bin
transformers-cli s3 rm wmt19-en-ru/tokenizer_config.json
transformers-cli s3 rm wmt19-en-ru/vocab-src.json
transformers-cli s3 rm wmt19-en-ru/vocab-tgt.json
transformers-cli s3 rm wmt19-ru-en/config.json
transformers-cli s3 rm wmt19-ru-en/merges.txt
transformers-cli s3 rm wmt19-ru-en/pytorch_model.bin
transformers-cli s3 rm wmt19-ru-en/tokenizer_config.json
transformers-cli s3 rm wmt19-ru-en/vocab-src.json
transformers-cli s3 rm wmt19-ru-en/vocab-tgt.json

6
scripts/fsmt/tests-to-run.sh Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env bash
# these scripts need to be run before any changes to FSMT-related code - it should cover all bases
USE_CUDA=0 RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py
USE_CUDA=1 RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py