[TensorFlow 2.0]Word Embeddings in TensorFlow

Photo by Adrian Pereira on Unsplash

I. Technical Setup

from __future__ import absolute_import, division, print_function, unicode_literalstry:# %tensorflow_version only exists in Colab.
!pip install tf-nightly
except Exception:
pass
import tensorflow as tffrom tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds
tfds.disable_progress_bar()

II. Load IMDB data set

(train_data, test_data), info = tfds.load(
'imdb_reviews/subwords8k',
split = (tfds.Split.TRAIN, tfds.Split.TEST),
with_info=True, as_supervised=True)

III. Standardize the length of the reviews

padded_shapes = ([None],())
train_batches = train_data.shuffle(1000).padded_batch(10, padded_shapes = padded_shapes)
test_batches = test_data.shuffle(1000).padded_batch(10, padded_shapes = padded_shapes)
train_batch, train_labels = next(iter(train_batches))
train_batch.numpy()

Some python basics; _iter_ and _next_ ; for loop is a more elegant method. 😎

my_birthday = [12,30,'cecil' ,'kim' ]
my_birthday = iter(my_list)
# iterate through it using next()
print(next(my_birthday)) #12
print(next(my_birthday)) #30
print(my_birthday.__next__()) #cecil
print(my_birthday.__next__()) #kim
#next(object) is same as object.__next__()next(my_birthday)
#<-- since there is no left object, this will cause an error!
my_birthday = [12,30,'cecil' ,'kim' ]for i in my_birthday:
print(i)

IV. Modelling

encoder = info.features['text'].encoder
encoder.subwords[5:20]
encoder.vocab_size #8185
sample_string = ''love your neighbor as yourself.'encoded_string = encoder.encode(sample_string)
print ('Encoded string is {}'.format(encoded_string))
original_string = encoder.decode(encoded_string)
print ('The original string: "{}"'.format(original_string))
for ts in encoded_string:
print ('{} ----> {}'.format(ts, encoder.decode([ts])))
encoder.subwords[173] #'love_'
encoder.subwords[154] #'your_'
encoder.subwords[19] #'as_'
embedding_dim=16

model = keras.Sequential([
layers.Embedding(encoder.vocab_size, embedding_dim),
layers.GlobalAveragePooling1D(),
layers.Dense(16, activation='relu'),
layers.Dense(1, activation='sigmoid')
])

model.summary()
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])

history = model.fit(
train_batches,
epochs=10,
validation_data=test_batches, validation_steps=20)
import matplotlib.pyplot as plt

history_dict = history.history

acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']
loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(4,3))
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.figure(figsize=(4,3))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.ylim((0.5,1))
plt.show()

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store