From 699bc7e86ea2253bfb2f011f006180b2e49f0703 Mon Sep 17 00:00:00 2001
From: thomwolf <thomwolf@gmail.com>
Date: Fri, 12 Jul 2019 11:46:57 +0200
Subject: [PATCH] fix gpt-2 unk token test

---
 docs/README.md                            | 2 +-
 pytorch_transformers/tokenization_gpt2.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/README.md b/docs/README.md
index c39ecda0d..1b3c1fead 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -57,4 +57,4 @@ It should build the static app that will be available under `/docs/_build/html`
 ## Adding a new element to the tree (toc-tree)
 
 Accepted files are reStructuredText (.rst) and Markdown (.md). Create a file with its extension and put it
-in the source directory. You can then link it to the toc-tree by putting the filename without the extension.
\ No newline at end of file
+in the source directory. You can then link it to the toc-tree by putting the filename without the extension.
diff --git a/pytorch_transformers/tokenization_gpt2.py b/pytorch_transformers/tokenization_gpt2.py
index 6084dc3e0..bd90a9225 100644
--- a/pytorch_transformers/tokenization_gpt2.py
+++ b/pytorch_transformers/tokenization_gpt2.py
@@ -177,7 +177,9 @@ class GPT2Tokenizer(PreTrainedTokenizer):
 
     def _convert_token_to_id(self, token):
         """ Converts a token (str/unicode) in an id using the vocab. """
-        return self.encoder.get(token)
+        if token in self.encoder:
+            return self.encoder.get(token)
+        return self.encoder.get(self.unk_token)
 
     def _convert_id_to_token(self, index):
         """Converts an index (integer) in a token (string/unicode) using the vocab."""