diff --git a/examples/test_examples.py b/examples/test_examples.py
index 66a345f36..c6e1d34f8 100644
--- a/examples/test_examples.py
+++ b/examples/test_examples.py
@@ -22,7 +22,8 @@ from unittest.mock import patch
 
 import torch
 
-from transformers.testing_utils import TestCasePlus
+from transformers.file_utils import is_apex_available
+from transformers.testing_utils import TestCasePlus, torch_device
 
 
 SRC_DIRS = [
@@ -52,6 +53,11 @@ def get_setup_file():
     return args.f
 
 
+def is_cuda_and_apex_avaliable():
+    is_using_cuda = torch.cuda.is_available() and torch_device == "cuda"
+    return is_using_cuda and is_apex_available()
+
+
 class ExamplesTests(TestCasePlus):
     def test_run_glue(self):
         stream_handler = logging.StreamHandler(sys.stdout)
@@ -74,7 +80,13 @@ class ExamplesTests(TestCasePlus):
             --warmup_steps=2
             --seed=42
             --max_seq_length=128
-            """.split()
+            """
+        output_dir = "./tests/fixtures/tests_samples/temp_dir_{}".format(hash(testargs))
+        testargs += "--output_dir " + output_dir
+        testargs = testargs.split()
+
+        if is_cuda_and_apex_avaliable():
+            testargs.append("--fp16")
 
         with patch.object(sys, "argv", testargs):
             result = run_glue.main()
@@ -135,8 +147,13 @@ class ExamplesTests(TestCasePlus):
             --do_train
             --do_eval
             --num_train_epochs=1
-            --no_cuda
-            """.split()
+            """
+        output_dir = "./tests/fixtures/tests_samples/temp_dir_{}".format(hash(testargs))
+        testargs += "--output_dir " + output_dir
+        testargs = testargs.split()
+
+        if torch_device != "cuda":
+            testargs.append("--no_cuda")
 
         with patch.object(sys, "argv", testargs):
             result = run_language_modeling.main()
@@ -175,7 +192,14 @@ class ExamplesTests(TestCasePlus):
         logger.addHandler(stream_handler)
 
         testargs = ["run_generation.py", "--prompt=Hello", "--length=10", "--seed=42"]
-        model_type, model_name = ("--model_type=gpt2", "--model_name_or_path=sshleifer/tiny-gpt2")
+
+        if is_cuda_and_apex_avaliable():
+            testargs.append("--fp16")
+
+        model_type, model_name = (
+            "--model_type=gpt2",
+            "--model_name_or_path=sshleifer/tiny-gpt2",
+        )
         with patch.object(sys, "argv", testargs + [model_type, model_name]):
             result = run_generation.main()
             self.assertGreaterEqual(len(result[0]), 10)
diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py
index 40017733e..12acd0cb0 100644
--- a/examples/text-generation/run_generation.py
+++ b/examples/text-generation/run_generation.py
@@ -186,11 +186,20 @@ def main():
     parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")
     parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available")
     parser.add_argument("--num_return_sequences", type=int, default=1, help="The number of samples to generate.")
+    parser.add_argument(
+        "--fp16",
+        action="store_true",
+        help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit",
+    )
     args = parser.parse_args()
 
     args.device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
     args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count()
 
+    logger.warning(
+        "device: %s, n_gpu: %s, 16-bits training: %s", args.device, args.n_gpu, args.fp16,
+    )
+
     set_seed(args)
 
     # Initialize the model and tokenizer
@@ -204,6 +213,9 @@ def main():
     model = model_class.from_pretrained(args.model_name_or_path)
     model.to(args.device)
 
+    if args.fp16:
+        model.half()
+
     args.length = adjust_length_to_model(args.length, max_sequence_length=model.config.max_position_embeddings)
     logger.info(args)