diff --git a/docs/python/notebooks/onnxruntime-nuphar-tutorial.ipynb b/docs/python/notebooks/onnxruntime-nuphar-tutorial.ipynb index ee9f5c762b..ccc6a19044 100644 --- a/docs/python/notebooks/onnxruntime-nuphar-tutorial.ipynb +++ b/docs/python/notebooks/onnxruntime-nuphar-tutorial.ipynb @@ -297,7 +297,7 @@ "metadata": {}, "outputs": [], "source": [ - "SymbolicShapeInference.infer_shapes(input_model=lstm_model, output_model=lstm_model)" + "onnx.save(SymbolicShapeInference.infer_shapes(onnx.load(lstm_model)), lstm_model)" ] }, { @@ -559,7 +559,7 @@ "bert_model_with_shape_inference = os.path.join(bert_model_dir, 'bertsquad10_shaped.onnx')\n", "\n", "# run symbolic shape inference\n", - "SymbolicShapeInference.infer_shapes(bert_model, bert_model_with_shape_inference, auto_merge=True, int_max=100000)" + "onnx.save(SymbolicShapeInference.infer_shapes(onnx.load(bert_model), auto_merge=True, int_max=100000), bert_model_with_shape_inference)" ] }, { @@ -692,7 +692,7 @@ "gpt2_model_with_shape_inference = os.path.join(gpt2_model_dir, 'model_shaped.onnx')\n", "\n", "# run symbolic shape inference\n", - "SymbolicShapeInference.infer_shapes(gpt2_model, gpt2_model_with_shape_inference, auto_merge=True)" + "onnx.save(SymbolicShapeInference.infer_shapes(onnx.load(gpt2_model), auto_merge=True), gpt2_model_with_shape_inference)" ] }, { @@ -892,7 +892,7 @@ "source": [ "# editing\n", "bidaf_converted = 'bidaf_mod.onnx'\n", - "SymbolicShapeInference.infer_shapes(bidaf, bidaf_converted)\n", + "onnx.save(SymbolicShapeInference.infer_shapes(onnx.load(bidaf)), bidaf_converted)\n", "convert_to_scan_model(bidaf_converted, bidaf_converted)\n", "# When quantizing, there's an only_for_scan option to quantize only the GEMV inside Scan ops.\n", "# This is useful when the input dims of LSTM being much bigger than hidden dims.\n", diff --git a/onnxruntime/core/providers/nuphar/scripts/model_editor.py b/onnxruntime/core/providers/nuphar/scripts/model_editor.py index 3b715761bc..6a2050730d 100644 --- a/onnxruntime/core/providers/nuphar/scripts/model_editor.py +++ b/onnxruntime/core/providers/nuphar/scripts/model_editor.py @@ -831,5 +831,7 @@ if __name__ == '__main__': else: raise NotImplementedError('Unknown mode') print('Running symbolic shape inference on output model') - SymbolicShapeInference.infer_shapes(args.output, args.output, auto_merge=True) + mp = onnx.load(args.output) + mp = SymbolicShapeInference.infer_shapes(mp, auto_merge=True) + onnx.save(mp, args.output) print('Done!') diff --git a/onnxruntime/core/providers/nuphar/scripts/model_quantizer.py b/onnxruntime/core/providers/nuphar/scripts/model_quantizer.py index 3efb7dc392..986b98ef36 100644 --- a/onnxruntime/core/providers/nuphar/scripts/model_quantizer.py +++ b/onnxruntime/core/providers/nuphar/scripts/model_quantizer.py @@ -9,7 +9,6 @@ import numpy as np import onnx from onnx import helper, numpy_helper from .node_factory import NodeFactory, ensure_opset -from ..tools.symbolic_shape_infer import SymbolicShapeInference class QuantizeConfig: def __init__(self, signed, reserved_bits, type_bits): diff --git a/onnxruntime/core/providers/nuphar/scripts/rnn_benchmark.py b/onnxruntime/core/providers/nuphar/scripts/rnn_benchmark.py index dfc35e06e3..0514f1d2a2 100644 --- a/onnxruntime/core/providers/nuphar/scripts/rnn_benchmark.py +++ b/onnxruntime/core/providers/nuphar/scripts/rnn_benchmark.py @@ -120,7 +120,7 @@ def perf_test(rnn_type, num_threads, input_dim, hidden_dim, bidirectional, layer scan_model_name = os.path.splitext(model_name)[0] + '_scan.onnx' convert_to_scan_model(model_name, scan_model_name) # note that symbolic shape inference is needed because model has symbolic batch dim, thus init_state is ConstantOfShape - SymbolicShapeInference.infer_shapes(scan_model_name, scan_model_name) + onnx.save(SymbolicShapeInference.infer_shapes(onnx.load(scan_model_name)), scan_model_name) sess = onnxruntime.InferenceSession(scan_model_name) count, duration, per_iter_cost = perf_run(sess, feeds, min_counts=top_n, min_duration_seconds=min_duration_seconds) avg_scan = top_n_avg(per_iter_cost, top_n) @@ -130,7 +130,7 @@ def perf_test(rnn_type, num_threads, input_dim, hidden_dim, bidirectional, layer from .model_quantizer import convert_matmul_model int8_model_name = os.path.splitext(model_name)[0] + '_int8.onnx' convert_matmul_model(scan_model_name, int8_model_name) - SymbolicShapeInference.infer_shapes(int8_model_name, int8_model_name) + onnx.save(SymbolicShapeInference.infer_shapes(onnx.load(int8_model_name)), int8_model_name) sess = onnxruntime.InferenceSession(int8_model_name) count, duration, per_iter_cost = perf_run(sess, feeds, min_counts=top_n, min_duration_seconds=min_duration_seconds) avg_int8 = top_n_avg(per_iter_cost, top_n) diff --git a/onnxruntime/python/tools/symbolic_shape_infer.py b/onnxruntime/python/tools/symbolic_shape_infer.py index e1986dfc3a..fc7ee89061 100755 --- a/onnxruntime/python/tools/symbolic_shape_infer.py +++ b/onnxruntime/python/tools/symbolic_shape_infer.py @@ -81,6 +81,7 @@ class SymbolicShapeInference: 'CategoryMapper' : self._infer_CategoryMapper, 'Compress' : self._infer_Compress, 'Concat' : self._infer_Concat, + 'Constant' : self._infer_Constant, 'ConstantOfShape' : self._infer_ConstantOfShape, 'Conv' : self._infer_Conv, 'CumSum' : self._pass_on_shape_and_type, @@ -91,6 +92,7 @@ class SymbolicShapeInference: 'Gather' : self._infer_Gather, 'GatherElements' : self._infer_GatherElements, 'GatherND' : self._infer_GatherND, + 'Gelu' : self._pass_on_shape_and_type, 'If' : self._infer_If, 'Loop' : self._infer_Loop, 'MatMul' : self._infer_MatMul, @@ -113,6 +115,7 @@ class SymbolicShapeInference: 'Shape' : self._infer_Shape, 'Size' : self._infer_Size, 'Slice' : self._infer_Slice, + 'SoftmaxCrossEntropyLoss':self._infer_SoftmaxCrossEntropyLoss, 'Split' : self._infer_Split, 'SplitToSequence' : self._infer_SplitToSequence, 'Squeeze' : self._infer_Squeeze, @@ -189,43 +192,8 @@ class SymbolicShapeInference: d.dim_param = v def _preprocess(self, in_mp): - out_mp = onnx.ModelProto() - out_mp.CopyFrom(in_mp) - out_mp.graph.ClearField('node') - self.out_mp_ = out_mp - - defined = set([i.name for i in list(in_mp.graph.input) + list(in_mp.graph.initializer)]) - pending_nodes = [] - - # returns True if no more ready nodes - def _insert_ready_nodes(): - ready_nodes = [pn for pn in pending_nodes if all([i in defined for i in pn.input if i])] - for rn in ready_nodes: - self.out_mp_.graph.node.add().CopyFrom(rn) - for o in rn.output: - defined.add(o) - pending_nodes.remove(rn) - return not ready_nodes - - # constant op -> initializer, topological sort - for in_n in in_mp.graph.node: - if in_n.op_type == 'Constant': - t = get_attribute(in_n, 'value') - t.name = in_n.output[0] - self.out_mp_.graph.initializer.add().CopyFrom(t) - defined.add(t.name) - else: - pending_nodes.append(in_n) - _insert_ready_nodes() - - while pending_nodes: - if _insert_ready_nodes(): - break - - if pending_nodes and self.verbose_ > 0: - print('SymbolicShapeInference: orphaned nodes discarded: ') - print(*[n.op_type + ': ' + n.output[0] for n in pending_nodes], sep='\n') - + self.out_mp_ = onnx.ModelProto() + self.out_mp_.CopyFrom(in_mp) self.initializers_ = dict([(i.name, i) for i in self.out_mp_.graph.initializer]) self.known_vi_ = dict([(i.name, i) for i in list(self.out_mp_.graph.input)]) self.known_vi_.update(dict([(i.name, helper.make_tensor_value_info(i.name, i.data_type, list(i.dims))) for i in self.out_mp_.graph.initializer])) @@ -370,8 +338,6 @@ class SymbolicShapeInference: symbolic_shape_inference = SymbolicShapeInference(self.int_max_, self.auto_merge_, self.guess_output_rank_, self.verbose_) all_shapes_inferred = False symbolic_shape_inference._preprocess(self.tmp_mp_) - # note that after _preprocess, Constant node will be converted to initializer and should be appended to subgraph.initializer - subgraph.initializer.extend([i for i in symbolic_shape_inference.out_mp_.graph.initializer if i.name not in subgraph_implicit_input and i.name not in subgraph_inputs]) symbolic_shape_inference.suggested_merge_ = self.suggested_merge_.copy() while symbolic_shape_inference.run_: all_shapes_inferred = symbolic_shape_inference._infer_impl(self.tmp_mp_, self.sympy_data_.copy()) @@ -638,11 +604,9 @@ class SymbolicShapeInference: vi = self.known_vi_[node.output[0]] vi.CopyFrom(helper.make_tensor_value_info(node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, get_shape_from_sympy_shape(sympy_shape))) - def _infer_Conv(self, node): - sympy_shape = self._compute_conv_pool_shape(node) - self._update_computed_dims(sympy_shape) - vi = self.known_vi_[node.output[0]] - vi.CopyFrom(helper.make_tensor_value_info(node.output[0], vi.type.tensor_type.elem_type, get_shape_from_sympy_shape(sympy_shape))) + def _infer_Constant(self, node): + t = get_attribute(node, 'value') + self.sympy_data_[node.output[0]] = numpy_helper.to_array(t) def _infer_ConstantOfShape(self, node): sympy_shape = self._get_int_values(node)[0] @@ -662,6 +626,12 @@ class SymbolicShapeInference: vi.type.tensor_type.elem_type, get_shape_from_sympy_shape(sympy_shape))) + def _infer_Conv(self, node): + sympy_shape = self._compute_conv_pool_shape(node) + self._update_computed_dims(sympy_shape) + vi = self.known_vi_[node.output[0]] + vi.CopyFrom(helper.make_tensor_value_info(node.output[0], vi.type.tensor_type.elem_type, get_shape_from_sympy_shape(sympy_shape))) + def _infer_Expand(self, node): expand_to_shape = self._try_get_value(node, 1) if expand_to_shape is not None: @@ -680,8 +650,8 @@ class SymbolicShapeInference: vi.CopyFrom(helper.make_tensor_value_info(node.output[0], vi.type.tensor_type.elem_type, data_shape[:axis] + indices_shape + data_shape[axis+1:])) - if node.input[0] in self.sympy_data_: - assert 0 == get_attribute(node, 'axis', 0) # only handle 1D sympy compute + # for 1D input, do some sympy compute + if node.input[0] in self.sympy_data_ and len(data_shape) == 1 and 0 == get_attribute(node, 'axis', 0): idx = self._get_value(node, 1) data = self.sympy_data_[node.input[0]] if type(data) == list: @@ -1037,11 +1007,20 @@ class SymbolicShapeInference: get_shape_from_sympy_shape(new_sympy_shape))) # handle sympy_data if needed, for slice in shape computation - if node.input[0] in self.sympy_data_: - assert [0] == axes - assert len(starts) == 1 - assert len(ends) == 1 - self.sympy_data_[node.output[0]] = self.sympy_data_[node.input[0]][starts[0]:ends[0]] + if node.input[0] in self.sympy_data_ and [0] == axes and len(starts) == 1 and len(ends) == 1: + input_sympy_data = self.sympy_data_[node.input[0]] + if type(input_sympy_data) == list or (type(input_sympy_data) == np.array and len(input_sympy_data.shape) == 1): + self.sympy_data_[node.output[0]] = input_sympy_data[starts[0]:ends[0]] + + def _infer_SoftmaxCrossEntropyLoss(self, node): + vi = self.known_vi_[node.output[0]] + elem_type = self.known_vi_[node.input[0]].type.tensor_type.elem_type + vi.type.tensor_type.elem_type = elem_type + + if len(node.output) > 1: + data_shape = self._get_shape(node, 0) + vi = self.known_vi_[node.output[1]] + vi.CopyFrom(helper.make_tensor_value_info(vi.name, elem_type, data_shape)) def _infer_Split_Common(self, node, make_value_info_func): input_sympy_shape = self._get_sympy_shape(node, 0) @@ -1276,22 +1255,20 @@ class SymbolicShapeInference: output.CopyFrom(self.known_vi_[output.name]) @staticmethod - def infer_shapes(input_model, output_model, int_max=2**31 - 1, auto_merge=False, guess_output_rank=False, verbose=0): - in_mp = onnx.load(input_model) + def infer_shapes(in_mp, int_max=2**31 - 1, auto_merge=False, guess_output_rank=False, verbose=0): onnx_opset = get_opset(in_mp) if not onnx_opset or onnx_opset < 7: print('Only support models of onnx opset 7 and above.') - return + return None symbolic_shape_inference = SymbolicShapeInference(int_max, auto_merge, guess_output_rank, verbose) all_shapes_inferred = False symbolic_shape_inference._preprocess(in_mp) while symbolic_shape_inference.run_: all_shapes_inferred = symbolic_shape_inference._infer_impl(in_mp) symbolic_shape_inference._update_output_from_vi() - if output_model: - onnx.save(symbolic_shape_inference.out_mp_, output_model) if not all_shapes_inferred: - sys.exit(1) + raise Exception("Incomplete symbolic shape inference") + return symbolic_shape_inference.out_mp_ def parse_arguments(): parser = argparse.ArgumentParser() @@ -1309,5 +1286,7 @@ if __name__ == '__main__': if args.output: print('output model ' + args.output) print('Doing symbolic shape inference...') - out_mp = SymbolicShapeInference.infer_shapes(args.input, args.output, args.int_max, args.auto_merge, args.guess_output_rank, args.verbose) - print('Done!') + out_mp = SymbolicShapeInference.infer_shapes(onnx.load(args.input), args.int_max, args.auto_merge, args.guess_output_rank, args.verbose) + if args.output and out_mp: + onnx.save(out_mp, args.output) + print('Done!') diff --git a/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py b/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py index 19f455ff06..7a82bf9422 100644 --- a/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py +++ b/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py @@ -2,11 +2,12 @@ # Licensed under the MIT License. # -*- coding: UTF-8 -*- -import unittest +import onnx import os from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference -import sys from pathlib import Path +import sys +import unittest class TestSymbolicShapeInference(unittest.TestCase): def test_symbolic_shape_infer(self): @@ -17,8 +18,7 @@ class TestSymbolicShapeInference(unittest.TestCase): continue # skip some bad model files print("Running symbolic shape inference on : " + str(filename)) SymbolicShapeInference.infer_shapes( - input_model=str(filename), - output_model=None, + in_mp=onnx.load(str(filename)), auto_merge=True, int_max=100000, guess_output_rank=True) diff --git a/orttraining/orttraining/python/ort_trainer.py b/orttraining/orttraining/python/ort_trainer.py index 58ca53e470..56766ce0d9 100644 --- a/orttraining/orttraining/python/ort_trainer.py +++ b/orttraining/orttraining/python/ort_trainer.py @@ -16,6 +16,8 @@ import warnings from .checkpointing_utils import list_checkpoint_files, get_checkpoint_name, CombineZeroCheckpoint import onnxruntime.capi.pt_patch +from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference + DEFAULT_OPSET_VERSION = 12 class IODescription(): @@ -320,8 +322,14 @@ def convert_model_loss_fn_to_onnx(model, loss_fn, model_desc, device, inputs, op import copy # Deepcopy inputs, since input values may change after model run. sample_inputs_copy = copy.deepcopy(sample_inputs) - # Deepcopy model, in case model is stateful and changes after model run. - model_copy = copy.deepcopy(model) + try: + # Deepcopy model, in case model is stateful and changes after model run. + model_copy = copy.deepcopy(model) + except Exception: + model_copy = model + warnings.warn("This model cannot be deep copied (or pickled), which is a required step for stateful models to be properly exported to ONNX." + " Compute will continue, but unexpected results may occur!") + sample_outputs = model_copy(*sample_inputs_copy) if isinstance(sample_outputs, torch.Tensor): sample_outputs = [sample_outputs] @@ -539,7 +547,7 @@ class ORTTrainer(): global_step=0, get_lr_this_step=None, loss_scaler=None, deepspeed_zero_stage=0, enable_grad_norm_clip=True, frozen_weights=[], _opset_version=DEFAULT_OPSET_VERSION, _enable_internal_postprocess=True, _extra_postprocess=None, _use_deterministic_compute=False, - use_invertible_layernorm_grad=False): + use_invertible_layernorm_grad=False, run_symbolic_shape_infer=False): super(ORTTrainer, self).__init__() """ Initialize ORTTrainer. @@ -607,6 +615,8 @@ class ORTTrainer(): Defaults to None use_invertible_layernorm_grad: use invertible layernorm grad Defaults to False + run_symbolic_shape_infer: run symbolic shape inference + Defaults to False """ warnings.warn('DISCLAIMER: This is an early version of an experimental training API and it is subject to change. DO NOT create production applications with it') self.is_train = True @@ -669,6 +679,7 @@ class ORTTrainer(): self.state_dict_ = None self._use_deterministic_compute = _use_deterministic_compute self.use_invertible_layernorm_grad = use_invertible_layernorm_grad + self.run_symbolic_shape_infer = run_symbolic_shape_infer # use this special string to workaround a corner case that external loss_scale is passed into train_step as kwargs. # see prepare_input_and_fetches for more details. @@ -681,6 +692,10 @@ class ORTTrainer(): return self._verify_fully_optimized_model(self.onnx_model_) + + if self.run_symbolic_shape_infer: + self.onnx_model_ = SymbolicShapeInference.infer_shapes(self.onnx_model_, auto_merge=True, guess_output_rank=True) + self.session, self.train_io_binding, self.eval_io_binding, self.output_name, _, self.output_types = \ create_ort_training_session_with_optimizer( self.onnx_model_, self.device_, diff --git a/orttraining/orttraining/python/training/orttrainer.py b/orttraining/orttraining/python/training/orttrainer.py index df70853d46..755173dffd 100644 --- a/orttraining/orttraining/python/training/orttrainer.py +++ b/orttraining/orttraining/python/training/orttrainer.py @@ -10,6 +10,8 @@ import onnxruntime as ort from . import _utils, amp, checkpoint, optim, postprocess, ORTTrainerOptions from .model_desc_validation import _ORTTrainerModelDesc +from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference + class TrainStepInfo(object): r"""Private class used to store runtime information from current train step. @@ -671,6 +673,9 @@ class ORTTrainer(object): if self._onnx_model is None: return + if self.options.utils.run_symbolic_shape_infer: + self._onnx_model = SymbolicShapeInference.infer_shapes(self._onnx_model, auto_merge=True, guess_output_rank=True) + # Create training session used by train_step self._create_ort_training_session() diff --git a/orttraining/orttraining/python/training/orttrainer_options.py b/orttraining/orttraining/python/training/orttrainer_options.py index ad57205c31..0ed9a68f5f 100644 --- a/orttraining/orttraining/python/training/orttrainer_options.py +++ b/orttraining/orttraining/python/training/orttrainer_options.py @@ -132,6 +132,10 @@ class ORTTrainerOptions(object): 'invertible_layer_norm_gradient' : { 'type' : 'boolean', 'default' : False + }, + 'run_symbolic_shape_infer' : { + 'type' : 'boolean', + 'default' : False } } }, @@ -225,6 +229,8 @@ class ORTTrainerOptions(object): enables gradient norm clipping for 'AdamOptimizer' and 'LambOptimizer' utils.invertible_layer_norm_gradient (bool, default is False): enables use of invertible layer norm gradients + utils.run_symbolic_shape_infer (bool, default is False): + runs symbolic shape inference on the model debug (dict): debug options debug.deterministic_compute (bool, default is False) @@ -445,6 +451,10 @@ _ORTTRAINER_OPTIONS_SCHEMA = { 'invertible_layer_norm_gradient' : { 'type': 'boolean', 'default': False + }, + 'run_symbolic_shape_infer' : { + 'type': 'boolean', + 'default': False } } }, diff --git a/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py b/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py index fcef3af1bd..1039a72703 100644 --- a/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py +++ b/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py @@ -98,6 +98,7 @@ def testORTTrainerOptionsDefaultValues(test_input): 'frozen_weights': [], 'grad_norm_clip': True, 'invertible_layer_norm_gradient': False, + 'run_symbolic_shape_infer': False }, 'debug': { 'deterministic_compute': False, @@ -1290,3 +1291,58 @@ def testLossScalerLegacyAndExperimentalRandomAllFinite(): assert_allclose(new_loss_scale, old_loss_scale) out.append(new_loss_scale) assert new_loss_scale > 1e-7 + +def testORTTrainerRunSymbolicShapeInfer(): + # Common data + seed = 0 + total_steps = 12 + device = 'cuda' + torch.set_printoptions(precision=10) + + # Setup without symbolic shape inference + torch.manual_seed(seed) + set_seed(seed) + options = orttrainer.ORTTrainerOptions({'device' : {'id' : device}, + 'debug' : {'deterministic_compute' : True}}) + model, model_desc, my_loss, batcher_fn, train_data, _, _ = _load_pytorch_transformer_model(device) + optim_config = optim.LambConfig(lr=0.001) + trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options) + # Training loop + expected_loss = [] + for i in range(total_steps): + data, targets = batcher_fn(train_data, i) + loss, _ = trainer.train_step(data, targets) + expected_loss.append(loss.cpu()) + + # Setup with symbolic shape inference + torch.manual_seed(seed) + set_seed(seed) + model, model_desc, my_loss, batcher_fn, train_data, _, _ = _load_pytorch_transformer_model(device) + optim_config = optim.LambConfig(lr=0.001) + options.utils.run_symbolic_shape_infer = True + trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options) + # Training loop + new_loss = [] + for i in range(total_steps): + data, targets = batcher_fn(train_data, i) + loss, _ = trainer.train_step(data, targets) + new_loss.append(loss.cpu()) + + # Setup with symbolic shape inference in legacy API + torch.manual_seed(seed) + set_seed(seed) + model, (model_desc, lr_desc), _, _, _, _, _ = _load_pytorch_transformer_model(device, legacy_api=True) + legacy_trainer = Legacy_ORTTrainer(model, my_loss, model_desc, "LambOptimizer", + None, lr_desc, device=device, + run_symbolic_shape_infer=True, + _use_deterministic_compute=True) + # Training loop + legacy_loss = [] + for i in range(total_steps): + data, targets = batcher_fn(train_data, i) + loss, _ = legacy_trainer.train_step(data, targets, torch.tensor([optim_config.lr])) + legacy_loss.append(loss.cpu()) + + # Compare losses + _test_helpers.assert_model_outputs(new_loss, expected_loss) + _test_helpers.assert_model_outputs(legacy_loss, expected_loss) \ No newline at end of file