# -*- coding: utf-8 -*-
"""transformers matrices demo.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1MwZPSHeluIEjyXYMzhjraYH4-Z2Blne3
"""

!pip install transformers

!pip freeze

from transformers import BertTokenizer, BertModel, BertForMaskedLM

our_sentence = "The quick brown fox jumped over the lazy dog."

t = BertTokenizer.from_pretrained("bert-base-uncased")
for m in [BertModel, BertForMaskedLM]:
  model = m.from_pretrained('bert-base-uncased')
  model.eval()
  tokenized = t(our_sentence, return_tensors='pt')
  embeds = model(**tokenized)

tokenized

t.convert_ids_to_tokens(tokenized['input_ids'][0])

t.tokenize("The quick brown fox jumped over the lazyyyyy dog.")

t.tokenize("speediness")

plain_model = BertModel.from_pretrained('bert-base-uncased')
plain_model.eval()
tokenized = t(our_sentence, return_tensors='pt')
plain_model

embeds = plain_model(**tokenized, output_attentions=True)

embeds.keys()

embeds['last_hidden_state']

len(embeds['attentions'])

embeds['attentions'][0].shape

embeds['attentions'][-1].shape # self-attentions for attention heads

embeds['attentions'][-1].shape

embeds['attentions'][-1][0][0] # QK^T for first attention head

embeds['attentions'][-1][0][0][4] # QK^T for first attention head for the word "fox" at the last layer

embeds['attentions'][-1][0][4][0] # QK^T for first attention head for the word "fox"

embeds['attentions'][-1][0][-3][0] # QK^T for first attention head for the word "dog"

our_sentence

our_sentence_with_subwords = "The quick brown fox jumped over the lazyyyy dog."
tokenized = t(our_sentence_with_subwords, return_tensors='pt')
embeds = model(**tokenized, output_attentions=True)

t.convert_ids_to_tokens(tokenized['input_ids'][0])

embeds['attentions'][-1][0][-7][0] # QK^T for first attention head for the word "lazy"

embeds['attentions'][-1][0][0][-7] # QK^T for first attention head for the word "lazy"

masked_language_model = BertForMaskedLM.from_pretrained('bert-base-uncased')
masked_language_model.eval()
tokenized = t(our_sentence, return_tensors='pt')
embeds = masked_language_model(**tokenized)

embeds.keys()