Update the comparison notebook

This commit is contained in:
VictorSanh 2018-11-03 09:08:05 -04:00
parent 391a4ec2f3
commit 8c932e37f9

View file

@ -4,26 +4,72 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# TensorFlow code"
"# Comparing TensorFlow (original) and PyTorch models\n",
"\n",
"We use this small notebook to test the conversion of the model's weights and to make sure both the TensorFlow and PyTorch are coherent. In particular, we compare the weights of the last layer on a simple example (in `input.txt`).\n",
"\n",
"To run this notebook, please make sure that your Python environment has both TensorFlow and PyTorch.\n",
"You should follow the instructions in the `README.md` and make sure that you have:\n",
"- the original TensorFlow implementation\n",
"- the `BERT-base, Uncased` model\n",
"- run the script `convert_tf_checkpoint_to_pytorch.py` to convert the weights to PyTorch\n",
"\n",
"Please modify the relative paths accordingly (at the beggining of Sections 1 and 2)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1/ TensorFlow code"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"original_tf_inplem_dir = \"../bert/\"\n",
"model_dir = \"../uncased_L-12_H-768_A-12/\"\n",
"\n",
"vocab_file = model_dir + \"vocab.txt\"\n",
"bert_config_file = model_dir + \"bert_config.json\"\n",
"init_checkpoint = model_dir + \"bert_model.ckpt\"\n",
"\n",
"input_file = \"input.txt\"\n",
"max_seq_length = 128"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:09:37.498678Z",
"start_time": "2018-11-03T02:09:36.366672Z"
}
},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/site-packages/h5py/__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
" from ._conv import register_converters as _register_converters\n"
]
}
],
"source": [
"import sys\n",
"sys.path.append(original_tf_inplem_dir)\n",
"\n",
"from extract_features import *"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:09:37.621865Z",
@ -45,13 +91,6 @@
}
],
"source": [
"data_dir=\"/Users/thomaswolf/Documents/Thomas/Code/HF/BERT/data/glue_data/MRPC/\"\n",
"vocab_file=\"/Users/thomaswolf/Documents/Thomas/Code/HF/BERT/google_models/uncased_L-12_H-768_A-12/vocab.txt\"\n",
"bert_config_file=\"/Users/thomaswolf/Documents/Thomas/Code/HF/BERT/google_models/uncased_L-12_H-768_A-12/bert_config.json\"\n",
"init_checkpoint=\"/Users/thomaswolf/Documents/Thomas/Code/HF/BERT/google_models/uncased_L-12_H-768_A-12/bert_model.ckpt\"\n",
"max_seq_length=128\n",
"input_file=\"/Users/thomaswolf/Documents/Thomas/Code/HF/BERT/pytorch-pretrained-BERT/input.txt\"\n",
"\n",
"layer_indexes = list(range(12))\n",
"bert_config = modeling.BertConfig.from_json_file(bert_config_file)\n",
"tokenizer = tokenization.FullTokenizer(\n",
@ -67,7 +106,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:09:40.831618Z",
@ -79,15 +118,15 @@
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:Estimator's model_fn (<function model_fn_builder.<locals>.model_fn at 0x12b0bcc80>) includes params argument, but params are not passed to Estimator.\n",
"WARNING:tensorflow:Using temporary folder as model directory: /var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmpgpb5nz3u\n",
"INFO:tensorflow:Using config: {'_model_dir': '/var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmpgpb5nz3u', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true\n",
"WARNING:tensorflow:Estimator's model_fn (<function model_fn_builder.<locals>.model_fn at 0x1289c1a60>) includes params argument, but params are not passed to Estimator.\n",
"WARNING:tensorflow:Using temporary folder as model directory: /var/folders/y2/py87pn6115bdsdftbc6394nh0000gn/T/tmpmcfk2tyr\n",
"INFO:tensorflow:Using config: {'_model_dir': '/var/folders/y2/py87pn6115bdsdftbc6394nh0000gn/T/tmpmcfk2tyr', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true\n",
"graph_options {\n",
" rewrite_options {\n",
" meta_optimizer_iterations: ONE\n",
" }\n",
"}\n",
", '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12e1160f0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_tpu_config': TPUConfig(iterations_per_loop=2, num_shards=1, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None, input_partition_dims=None), '_cluster': None}\n",
", '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12c242470>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_tpu_config': TPUConfig(iterations_per_loop=2, num_shards=1, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None, input_partition_dims=None), '_cluster': None}\n",
"WARNING:tensorflow:Setting TPUConfig.num_shards==1 is an unsupported behavior. Please fix as soon as possible (leaving num_shards as None.\n",
"INFO:tensorflow:_TPUContext: eval_on_tpu True\n",
"WARNING:tensorflow:eval_on_tpu ignored because use_tpu is False.\n"
@ -123,7 +162,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:09:46.413197Z",
@ -135,7 +174,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Could not find trained model in model_dir: /var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmpgpb5nz3u, running initialization to predict.\n",
"INFO:tensorflow:Could not find trained model in model_dir: /var/folders/y2/py87pn6115bdsdftbc6394nh0000gn/T/tmpmcfk2tyr, running initialization to predict.\n",
"INFO:tensorflow:Calling model_fn.\n",
"INFO:tensorflow:Running infer on CPU\n",
"INFO:tensorflow:Done calling model_fn.\n",
@ -186,7 +225,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:09:46.460128Z",
@ -211,7 +250,7 @@
"(128, 768)"
]
},
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@ -227,7 +266,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:09:46.498637Z",
@ -243,12 +282,12 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# PyTorch code"
"## 2/ PyTorch code"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:09:46.660303Z",
@ -263,12 +302,22 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"init_checkpoint_pt = \"../pytorch_model/uncased_L-12_H-768_A-12/pytorch_model.bin\""
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:09:48.292135Z",
"start_time": "2018-11-03T02:09:46.661921Z"
}
},
"scrolled": true
},
"outputs": [
{
@ -569,14 +618,12 @@
")"
]
},
"execution_count": 8,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"init_checkpoint_pt=\"/Users/thomaswolf/Documents/Thomas/Code/HF/BERT/google_models/uncased_L-12_H-768_A-12/pytorch_model.bin\"\n",
"\n",
"device = torch.device(\"cpu\")\n",
"model = extract_features_pytorch.BertModel(bert_config)\n",
"model.load_state_dict(torch.load(init_checkpoint_pt, map_location='cpu'))\n",
@ -585,7 +632,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:09:48.332982Z",
@ -892,7 +939,7 @@
")"
]
},
"execution_count": 9,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@ -912,7 +959,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:09:54.371188Z",
@ -1000,7 +1047,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:09:57.139854Z",
@ -1026,7 +1073,7 @@
"(128, 768)"
]
},
"execution_count": 17,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@ -1043,7 +1090,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:09:59.000058Z",
@ -1068,7 +1115,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:09:59.462123Z",
@ -1090,9 +1137,16 @@
"print(tensorflow_outputs[1].shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3/ Comparing the standard deviation on the last layer of both models"
]
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 16,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:10:00.014784Z",
@ -1106,7 +1160,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 17,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-03T02:10:09.582557Z",
@ -1127,7 +1181,7 @@
"4.1671223e-07"
]
},
"execution_count": 24,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@ -1137,21 +1191,14 @@
"print(np.array(tensorflow_outputs[i]).shape, np.array(pytorch_outputs[i]).shape)\n",
"np.sqrt(np.mean((np.array(tensorflow_outputs[i]) - np.array(pytorch_outputs[i]))**2.0))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python [conda env:bert]",
"display_name": "Python 3",
"language": "python",
"name": "conda-env-bert-py"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
@ -1163,7 +1210,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
"version": "3.6.5"
},
"toc": {
"colors": {