diff --git a/examples/pytorch/text-generation/requirements.txt b/examples/pytorch/text-generation/requirements.txt index 0ef50f181..324a8cfb1 100644 --- a/examples/pytorch/text-generation/requirements.txt +++ b/examples/pytorch/text-generation/requirements.txt @@ -1,3 +1,4 @@ +accelerate >= 0.21.0 sentencepiece != 0.1.92 protobuf torch >= 1.3 diff --git a/examples/pytorch/text-generation/run_generation.py b/examples/pytorch/text-generation/run_generation.py index 75221934d..557b75572 100755 --- a/examples/pytorch/text-generation/run_generation.py +++ b/examples/pytorch/text-generation/run_generation.py @@ -23,8 +23,9 @@ import inspect import logging from typing import Tuple -import numpy as np import torch +from accelerate import PartialState +from accelerate.utils import set_seed from transformers import ( AutoTokenizer, @@ -88,13 +89,6 @@ the Virgin Mary, prompting him to become a priest. Rasputin quickly becomes famo with people, even a bishop, begging for his blessing. """ -def set_seed(args): - np.random.seed(args.seed) - torch.manual_seed(args.seed) - if args.n_gpu > 0: - torch.cuda.manual_seed_all(args.seed) - - # # Functions to prepare models' input # @@ -327,7 +321,11 @@ def main(): parser.add_argument("--xlm_language", type=str, default="", help="Optional language when used with the XLM model.") parser.add_argument("--seed", type=int, default=42, help="random seed for initialization") - parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available") + parser.add_argument( + "--use_cpu", + action="store_true", + help="Whether or not to use cpu. If set to False, " "we will use gpu/npu or mps device if available", + ) parser.add_argument("--num_return_sequences", type=int, default=1, help="The number of samples to generate.") parser.add_argument( "--fp16", @@ -337,12 +335,13 @@ def main(): parser.add_argument("--jit", action="store_true", help="Whether or not to use jit trace to accelerate inference") args = parser.parse_args() - args.device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() + # Initialize the distributed state. + distributed_state = PartialState(cpu=args.use_cpu) - logger.warning(f"device: {args.device}, n_gpu: {args.n_gpu}, 16-bits training: {args.fp16}") + logger.warning(f"device: {distributed_state.device}, 16-bits inference: {args.fp16}") - set_seed(args) + if args.seed is not None: + set_seed(args.seed) # Initialize the model and tokenizer try: @@ -355,7 +354,9 @@ def main(): if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model = model_class.from_pretrained(args.model_name_or_path) - model.to(args.device) + + # Set the model to the right device + model.to(distributed_state.device) if args.fp16: model.half() @@ -382,7 +383,7 @@ def main(): else: prefix = args.prefix if args.prefix else args.padding_text encoded_prompt = tokenizer.encode(prefix + prompt_text, add_special_tokens=False, return_tensors="pt") - encoded_prompt = encoded_prompt.to(args.device) + encoded_prompt = encoded_prompt.to(distributed_state.device) if encoded_prompt.size()[-1] == 0: input_ids = None diff --git a/examples/pytorch/text-generation/run_generation_contrastive_search.py b/examples/pytorch/text-generation/run_generation_contrastive_search.py index 117f063a6..91781f051 100755 --- a/examples/pytorch/text-generation/run_generation_contrastive_search.py +++ b/examples/pytorch/text-generation/run_generation_contrastive_search.py @@ -23,8 +23,8 @@ python run_generation_contrastive_search.py --model_name_or_path=gpt2-large --pe import argparse import logging -import numpy as np -import torch +from accelerate import PartialState +from accelerate.utils import set_seed from transformers import AutoModelForCausalLM, AutoTokenizer @@ -37,13 +37,6 @@ logging.basicConfig( logger = logging.getLogger(__name__) -def set_seed(args): - np.random.seed(args.seed) - torch.manual_seed(args.seed) - if args.n_gpu > 0: - torch.cuda.manual_seed_all(args.seed) - - def main(): parser = argparse.ArgumentParser() parser.add_argument( @@ -73,7 +66,11 @@ def main(): parser.add_argument("--xlm_language", type=str, default="", help="Optional language when used with the XLM model.") parser.add_argument("--seed", type=int, default=42, help="random seed for initialization") - parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available") + parser.add_argument( + "--use_cpu", + action="store_true", + help="Whether or not to use cpu. If set to False, " "we will use gpu/npu or mps device if available", + ) parser.add_argument( "--fp16", action="store_true", @@ -81,12 +78,13 @@ def main(): ) args = parser.parse_args() - args.device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() + # Initialize the distributed state. + distributed_state = PartialState(cpu=args.use_cpu) - logger.warning(f"device: {args.device}, n_gpu: {args.n_gpu}, 16-bits training: {args.fp16}") + logger.warning(f"device: {distributed_state.device}, 16-bits inference: {args.fp16}") - set_seed(args) + if args.seed is not None: + set_seed(args.seed) # Initialize the model and tokenizer tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path) @@ -94,7 +92,8 @@ def main(): # tokenizer = GPT2Tokenizer.from_pretrained(args.model_name_or_path) # model = OPTForCausalLM.from_pretrained(args.model_name_or_path) - model.to(args.device) + # Set the model to the right device + model.to(distributed_state.device) if args.fp16: model.half() @@ -103,7 +102,7 @@ def main(): prompt_text = args.prompt if args.prompt else input("Model prompt >>> ") inputs = tokenizer(prompt_text, return_tensors="pt", add_special_tokens=False) - inputs = {key: value.to(args.device) for key, value in inputs.items()} + inputs = {key: value.to(distributed_state.device) for key, value in inputs.items()} output_sequences = model.generate( **inputs,