diff --git a/scripts/pegasus/build_test_sample_spm_no_bos.py b/scripts/pegasus/build_test_sample_spm_no_bos.py
new file mode 100755
index 000000000..92ec94c42
--- /dev/null
+++ b/scripts/pegasus/build_test_sample_spm_no_bos.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+
+# this script builds a small sample spm file tests/fixtures/test_sentencepiece_no_bos.model, with features needed by pegasus 
+
+# 1. pip install sentencepiece
+# 
+# 2. wget https://raw.githubusercontent.com/google/sentencepiece/master/data/botchan.txt
+
+# 3. build
+import sentencepiece as spm
+
+# pegasus:
+# 1. no bos
+# 2. eos_id is 1
+# 3. unk_id is 2
+# build a sample spm file accordingly
+spm.SentencePieceTrainer.train('--input=botchan.txt --model_prefix=test_sentencepiece_no_bos --bos_id=-1 --unk_id=2  --eos_id=1  --vocab_size=1000')
+
+# 4. now update the fixture
+# mv test_sentencepiece_no_bos.model ../../tests/fixtures/
diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py
index 1dfbf66e5..afc70672e 100644
--- a/src/transformers/testing_utils.py
+++ b/src/transformers/testing_utils.py
@@ -184,13 +184,23 @@ def require_faiss(test_case):
         return test_case
 
 
-def get_tests_dir():
+def get_tests_dir(append_path=None):
     """
-    returns the full path to the `tests` dir, so that the tests can be invoked from anywhere
+    Args:
+        append_path: optional path to append to the tests dir path
+
+    Return:
+        The full path to the `tests` dir, so that the tests can be invoked from anywhere.
+        Optionally `append_path` is joined after the `tests` dir the former is provided.
+
     """
     # this function caller's __file__
     caller__file__ = inspect.stack()[1][1]
-    return os.path.abspath(os.path.dirname(caller__file__))
+    tests_dir = os.path.abspath(os.path.dirname(caller__file__))
+    if append_path:
+        return os.path.join(tests_dir, append_path)
+    else:
+        return tests_dir
 
 
 #
diff --git a/src/transformers/tokenization_pegasus.py b/src/transformers/tokenization_pegasus.py
index 346bcdb58..e29d7439d 100644
--- a/src/transformers/tokenization_pegasus.py
+++ b/src/transformers/tokenization_pegasus.py
@@ -49,7 +49,7 @@ class PegasusTokenizer(ReformerTokenizer):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        # Dont use reserved words added_token_encoder, added_tokens_decoder because of
+        # Don't use reserved words added_token_encoder, added_tokens_decoder because of
         # AssertionError: Non-consecutive added token '1' found. in from_pretrained
         assert len(self.added_tokens_decoder) == 0
         self.encoder: Dict[int, str] = {0: self.pad_token, 1: self.eos_token}
@@ -58,7 +58,7 @@ class PegasusTokenizer(ReformerTokenizer):
         self.decoder: Dict[str, int] = {v: k for k, v in self.encoder.items()}
 
     def _convert_token_to_id(self, token: str) -> int:
-        """ Converts a token (str) in an id using the vocab. """
+        """ Converts a token (str) to an id using the vocab. """
         if token in self.decoder:
             return self.decoder[token]
         elif token in self.added_tokens_decoder:
@@ -67,7 +67,7 @@ class PegasusTokenizer(ReformerTokenizer):
         return sp_id + self.offset
 
     def _convert_id_to_token(self, index: int) -> str:
-        """Converts an index (integer) in a token (str) using the vocab."""
+        """Converts an index (integer) to a token (str) using the vocab."""
         if index in self.encoder:
             return self.encoder[index]
         elif index in self.added_tokens_encoder:
@@ -81,11 +81,6 @@ class PegasusTokenizer(ReformerTokenizer):
     def vocab_size(self) -> int:
         return len(self.sp_model) + self.offset
 
-    def get_vocab(self) -> Dict[str, int]:
-        vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
-        vocab.update(self.added_tokens_encoder)
-        return vocab
-
     def num_special_tokens_to_add(self, pair=False):
         """Just EOS"""
         return 1
@@ -109,12 +104,12 @@ class PegasusTokenizer(ReformerTokenizer):
 
     def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None) -> List[int]:
         """
-        Build model inputs from a sequence or a pair of sequence for sequence classification tasks
+        Build model inputs from a sequence or a pair of sequences for sequence classification tasks
         by concatenating and adding special tokens.
         A Pegasus sequence has the following format, where ``X`` represents the sequence:
 
         - single sequence: ``X </s>``
-        - pair of sequences: ``A B </s>``  (not intended use)
+        - pair of sequences: ``A B </s>`` (not intended use)
 
         BOS is never used.
         Pairs of sequences are not the expected use case, but they will be handled without a separator.
diff --git a/src/transformers/tokenization_reformer.py b/src/transformers/tokenization_reformer.py
index 017e4a346..a2b2e7856 100644
--- a/src/transformers/tokenization_reformer.py
+++ b/src/transformers/tokenization_reformer.py
@@ -17,6 +17,7 @@
 
 import os
 from shutil import copyfile
+from typing import Dict
 
 from .tokenization_utils import PreTrainedTokenizer
 from .tokenization_utils_fast import PreTrainedTokenizerFast
@@ -119,7 +120,7 @@ class ReformerTokenizer(PreTrainedTokenizer):
     def vocab_size(self):
         return self.sp_model.get_piece_size()
 
-    def get_vocab(self):
+    def get_vocab(self) -> Dict[str, int]:
         vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
         vocab.update(self.added_tokens_encoder)
         return vocab
diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py
index 188b8ac76..b4d8829f4 100644
--- a/src/transformers/tokenization_utils.py
+++ b/src/transformers/tokenization_utils.py
@@ -186,7 +186,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
 
             num_added_toks = tokenizer.add_tokens(['new_tok1', 'my_new-tok2'])
             print('We have added', num_added_toks, 'tokens')
-            # Notice: resize_token_embeddings expect to receive the full size of the new vocabulary, i.e. the length of the tokenizer.
+            # Note: resize_token_embeddings expects to receive the full size of the new vocabulary, i.e. the length of the tokenizer.
             model.resize_token_embeddings(len(tokenizer))
         """
         new_tokens = [str(tok) for tok in new_tokens]
diff --git a/tests/fixtures/test_sentencepiece_no_bos.model b/tests/fixtures/test_sentencepiece_no_bos.model
new file mode 100644
index 000000000..c3336ae60
Binary files /dev/null and b/tests/fixtures/test_sentencepiece_no_bos.model differ
diff --git a/tests/test_tokenization_pegasus.py b/tests/test_tokenization_pegasus.py
index 3943322bf..ba3e84058 100644
--- a/tests/test_tokenization_pegasus.py
+++ b/tests/test_tokenization_pegasus.py
@@ -1,13 +1,15 @@
 import unittest
-from pathlib import Path
 
 from transformers.file_utils import cached_property
-from transformers.testing_utils import require_torch
+from transformers.testing_utils import get_tests_dir, require_torch
 from transformers.tokenization_pegasus import PegasusTokenizer, PegasusTokenizerFast
 
 from .test_tokenization_common import TokenizerTesterMixin
 
 
+SAMPLE_VOCAB = get_tests_dir("fixtures/test_sentencepiece_no_bos.model")
+
+
 class PegasusTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
 
     tokenizer_class = PegasusTokenizer
@@ -17,11 +19,9 @@ class PegasusTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
     def setUp(self):
         super().setUp()
 
-        save_dir = Path(self.tmpdirname)
-        spm_file = PegasusTokenizer.vocab_files_names["vocab_file"]
-        if not (save_dir / spm_file).exists():
-            tokenizer = self.pegasus_large_tokenizer
-            tokenizer.save_pretrained(self.tmpdirname)
+        # We have a SentencePiece fixture for testing
+        tokenizer = PegasusTokenizer(SAMPLE_VOCAB)
+        tokenizer.save_pretrained(self.tmpdirname)
 
     @cached_property
     def pegasus_large_tokenizer(self):
@@ -32,10 +32,7 @@ class PegasusTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
         pass
 
     def get_tokenizer(self, **kwargs) -> PegasusTokenizer:
-        if not kwargs:
-            return self.pegasus_large_tokenizer
-        else:
-            return PegasusTokenizer.from_pretrained(self.tmpdirname, **kwargs)
+        return PegasusTokenizer.from_pretrained(self.tmpdirname, **kwargs)
 
     def get_input_output_texts(self, tokenizer):
         return ("This is a test", "This is a test")
diff --git a/tests/test_tokenization_t5.py b/tests/test_tokenization_t5.py
index 234e9f91f..9b670478c 100644
--- a/tests/test_tokenization_t5.py
+++ b/tests/test_tokenization_t5.py
@@ -14,19 +14,18 @@
 # limitations under the License.
 
 
-import os
 import unittest
 
 from transformers import BatchEncoding
 from transformers.file_utils import cached_property
-from transformers.testing_utils import _torch_available
+from transformers.testing_utils import _torch_available, get_tests_dir
 from transformers.tokenization_t5 import T5Tokenizer, T5TokenizerFast
 from transformers.tokenization_xlnet import SPIECE_UNDERLINE
 
 from .test_tokenization_common import TokenizerTesterMixin
 
 
-SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
+SAMPLE_VOCAB = get_tests_dir("fixtures/test_sentencepiece.model")
 
 FRAMEWORK = "pt" if _torch_available else "tf"