From 62427d0815825436fa55b43725f44776e94abb65 Mon Sep 17 00:00:00 2001 From: Nikita Date: Fri, 15 May 2020 17:33:08 +0300 Subject: [PATCH] rerun notebook 02-transformers (#4341) --- notebooks/02-transformers.ipynb | 138 ++++++++++++-------------------- 1 file changed, 53 insertions(+), 85 deletions(-) diff --git a/notebooks/02-transformers.ipynb b/notebooks/02-transformers.ipynb index 9ff777ce5..7164e957b 100644 --- a/notebooks/02-transformers.ipynb +++ b/notebooks/02-transformers.ipynb @@ -3,7 +3,6 @@ { "cell_type": "markdown", "metadata": { - "collapsed": true, "pycharm": { "is_executing": false, "name": "#%% md\n" @@ -77,7 +76,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "pycharm": { "is_executing": false, @@ -85,77 +84,7 @@ }, "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: transformers in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (2.5.1)\n", - "Requirement already satisfied: filelock in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (3.0.12)\n", - "Requirement already satisfied: sentencepiece in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (0.1.83)\n", - "Requirement already satisfied: boto3 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (1.12.0)\n", - "Requirement already satisfied: requests in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (2.22.0)\n", - "Requirement already satisfied: numpy in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (1.18.1)\n", - "Requirement already satisfied: sacremoses in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (0.0.35)\n", - "Requirement already satisfied: tokenizers==0.5.2 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (0.5.2)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (2020.1.8)\n", - "Requirement already satisfied: tqdm>=4.27 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (4.42.1)\n", - "Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from boto3->transformers) (0.3.3)\n", - "Requirement already satisfied: botocore<1.16.0,>=1.15.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from boto3->transformers) (1.15.0)\n", - "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from boto3->transformers) (0.9.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests->transformers) (2019.11.28)\n", - "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests->transformers) (2.8)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests->transformers) (1.25.8)\n", - "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests->transformers) (3.0.4)\n", - "Requirement already satisfied: joblib in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from sacremoses->transformers) (0.14.0)\n", - "Requirement already satisfied: click in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from sacremoses->transformers) (7.0)\n", - "Requirement already satisfied: six in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from sacremoses->transformers) (1.14.0)\n", - "Requirement already satisfied: docutils<0.16,>=0.10 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from botocore<1.16.0,>=1.15.0->boto3->transformers) (0.15.2)\n", - "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from botocore<1.16.0,>=1.15.0->boto3->transformers) (2.8.1)\n", - "Requirement already satisfied: tensorflow==2.1.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (2.1.0)\n", - "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.1.0)\n", - "Requirement already satisfied: keras-preprocessing>=1.1.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.1.0)\n", - "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (3.1.0)\n", - "Requirement already satisfied: protobuf>=3.8.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (3.11.4)\n", - "Requirement already satisfied: numpy<2.0,>=1.16.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.18.1)\n", - "Requirement already satisfied: tensorboard<2.2.0,>=2.1.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (2.1.0)\n", - "Requirement already satisfied: keras-applications>=1.0.8 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.0.8)\n", - "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.11.2)\n", - "Requirement already satisfied: six>=1.12.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.14.0)\n", - "Requirement already satisfied: tensorflow-estimator<2.2.0,>=2.1.0rc0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (2.1.0)\n", - "Requirement already satisfied: scipy==1.4.1; python_version >= \"3\" in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.4.1)\n", - "Requirement already satisfied: google-pasta>=0.1.6 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (0.1.8)\n", - "Requirement already satisfied: wheel>=0.26; python_version >= \"3\" in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (0.34.2)\n", - "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.16.1)\n", - "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (0.9.0)\n", - "Requirement already satisfied: gast==0.2.2 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (0.2.2)\n", - "Requirement already satisfied: astor>=0.6.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (0.8.0)\n", - "Requirement already satisfied: setuptools in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from protobuf>=3.8.0->tensorflow==2.1.0) (45.2.0.post20200210)\n", - "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (1.11.2)\n", - "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (0.4.1)\n", - "Requirement already satisfied: markdown>=2.6.8 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (3.1.1)\n", - "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (1.0.0)\n", - "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (2.22.0)\n", - "Requirement already satisfied: h5py in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from keras-applications>=1.0.8->tensorflow==2.1.0) (2.10.0)\n", - "Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (4.0)\n", - "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (4.0.0)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (0.2.8)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (1.3.0)\n", - "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (2.8)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (2019.11.28)\n", - "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (3.0.4)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (1.25.8)\r\n", - "Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from rsa<4.1,>=3.1.4->google-auth<2,>=1.6.3->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (0.4.8)\r\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (3.1.0)\r\n" - ] - } - ], + "outputs": [], "source": [ "!pip install transformers\n", "!pip install tensorflow==2.1.0" @@ -174,7 +103,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 2, @@ -441,7 +370,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "pycharm": { "is_executing": false @@ -458,13 +387,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "pycharm": { "is_executing": false } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "output differences: 1.6236e-05\n", + "pooled differences: -1.3039e-08\n" + ] + } + ], "source": [ "# transformers generates a ready to use dictionary with all the required parameters for the specific framework.\n", "input_tf = tokenizer.encode_plus(\"This is a sample input\", return_tensors=\"tf\")\n", @@ -476,7 +414,7 @@ "# Models outputs 2 values (The value for each tokens, the pooled representation of the input sentence)\n", "# Here we compare the output differences between PyTorch and TensorFlow.\n", "for name, o_tf, o_pt in zip([\"output\", \"pooled\"], output_tf, output_pt):\n", - " print(\"{} differences: {}\".format(name, (o_tf.numpy() - o_pt.numpy()).sum()))" + " print(\"{} differences: {:.5}\".format(name, (o_tf.numpy() - o_pt.numpy()).sum()))" ] }, { @@ -504,13 +442,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "pycharm": { "is_executing": false } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 232 ms, sys: 0 ns, total: 232 ms\n", + "Wall time: 21.1 ms\n", + "CPU times: user 511 ms, sys: 0 ns, total: 511 ms\n", + "Wall time: 43.9 ms\n" + ] + } + ], "source": [ "from transformers import DistilBertModel\n", "\n", @@ -541,13 +490,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "pycharm": { "is_executing": false } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tokens (int) : [102, 12272, 9355, 5746, 30881, 215, 261, 5945, 4118, 212, 2414, 153, 1942, 232, 3532, 566, 103]\n", + "Tokens (str) : ['[CLS]', 'Hug', '##ging', 'Fac', '##e', 'ist', 'eine', 'französische', 'Firma', 'mit', 'Sitz', 'in', 'New', '-', 'York', '.', '[SEP]']\n", + "Tokens (attn_mask): [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]\n", + "\n", + "Token wise output: torch.Size([1, 7, 768]), Pooled output: torch.Size([1, 768])\n" + ] + } + ], "source": [ "# Let's load German BERT from the Bavarian State Library\n", "de_bert = BertModel.from_pretrained(\"dbmdz/bert-base-german-cased\")\n", @@ -557,7 +518,14 @@ " \"Hugging Face ist eine französische Firma mit Sitz in New-York.\",\n", " return_tensors=\"pt\"\n", ")\n", - "output_de, pooled_de = de_bert(**de_input)" + "print(\"Tokens (int) : {}\".format(de_input['input_ids'].tolist()[0]))\n", + "print(\"Tokens (str) : {}\".format([de_tokenizer.convert_ids_to_tokens(s) for s in de_input['input_ids'].tolist()[0]]))\n", + "print(\"Tokens (attn_mask): {}\".format(de_input['attention_mask'].tolist()[0]))\n", + "print()\n", + "\n", + "output_de, pooled_de = de_bert(**de_input)\n", + "\n", + "print(\"Token wise output: {}, Pooled output: {}\".format(outputs.shape, pooled.shape))" ] } ], @@ -577,7 +545,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.7.4" }, "pycharm": { "stem_cell": { @@ -590,5 +558,5 @@ } }, "nbformat": 4, - "nbformat_minor": 1 + "nbformat_minor": 4 }