import plotly.io as pio
= 'notebook'
pio.renderers.default # For example
import numpy as np
def f(x):
return np.abs(x)*(1+np.sin(x) ** 2) + 1
import plotly.graph_objs as go
= 1000
n = np.linspace(-5,5,n)
X = f(X)
val = val + np.random.normal(0,1, size=n)
y = go.Figure(go.Scatter(x = X, y = y, mode="markers", name="Data points"))
fig =X, y=val, name="True function"))
fig.add_trace(go.Scatter(x
fig.update_layout(="Data and its underlying function",
title=800, height=500,
width=dict(text='x'),
xaxis_title=dict(text='y=f(x)'))
yaxis_title
fig.show()
TP3 - Deep Neural Networks (DNN)
Course: Advanced Machine Learning
Lecturer: Sothea HAS, PhD
Objective: We had studied how to detect the connection between inputs and the target. It is important to truely estimate this connection through various models. This practical session (TP) is designed to familiarize you with concept and application of Deep Neural Networks, which is a powerful model that is theoretically able to reconstruct any reasonably complex input-output relationship.
- The
notebook
of thisTP
can be downloaded here: TP3_DNN.ipynb.
1. Universal Approximation Theorem
DNNs have been proven to be universal approximators which means that they can approximate any reasonably complex continuous functions.
A. Create your own function \(f\), for example:
\[f(x)=|x|(1+\sin^2(x))+1+\epsilon, \epsilon\sim{\cal N}(0,\sigma^2).\]
- Define python function to evaluate your function \(f\).
- Plot the graph of this function at \(n=1000\) values some domain \(D\), for example \(D=[-5,5]\).
B. Build a \(2\)-layer DNN with your favorite hyperparameters to estimate this function on the domain \(D\) (You can use your favorite Python
modules such as Keras or Pytorch
to build the model).
Vary the hyperparameters such as mini-batch, number of epochs, penalty strength,… of the network for better approximation.
Plot the learning curves as you make change to the network.
Plot the fitted curve and compare to the true data above.
# This is an example with Keras
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, Input
from keras import regularizers
# Input
= 1
d
= Sequential()
model =(d,)))
model.add(Input(shape
# To do
64, activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(1, activation="linear"))
model.add(Dense(
# Set up optimizer for our model
compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])
model.
# Training the network
= model.fit(X, y, epochs=2000, batch_size=32, validation_split=0.1, verbose=0)
history
# Extract loss values
= history.history['loss']
train_loss = history.history['val_loss'] val_loss
import plotly.io as pio
= 'notebook'
pio.renderers.default
# Plot the learning curves
= list(range(1, len(train_loss) + 1))
epochs = go.Figure(go.Scatter(x=epochs, y=train_loss, name="Training loss"))
fig1 =epochs, y=val_loss, name="Training loss"))
fig1.add_trace(go.Scatter(x="Training and Validation Loss",
fig1.update_layout(title=800, height=500,
width=dict(title="Epoch", type="log"),
xaxis=dict(title="Loss"))
yaxis fig1.show()
import plotly.io as pio
= 'notebook'
pio.renderers.default
from sklearn.metrics import mean_squared_error
= model.predict(X)
y_pred print(f'MSE: {mean_squared_error(y, y_pred)/np.mean(y) ** 2}')
= go.Figure(data=[
fig_pred 0],
fig.data[1]
fig.data[
])=X, y=y_pred.reshape(-1), mode='lines', name="prediction", line=dict(color="green")))
fig_pred.add_trace(go.Scatter(x= 800, height= 600, title="True vs Prediction")
fig_pred.update_layout(width fig_pred.show()
32/32 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
MSE: 0.039028134998251314
- Using the trained network to predict \(x\) outside the domain \(D\), for example, on the interval
x_test = np.linspace(5,7,50)
. What do you observe?
import plotly.io as pio
= 'notebook'
pio.renderers.default
= np.linspace(-5, 8, 100)
x_full = np.linspace(-5, 5, 100)
x_new = np.linspace(5, 8, 100)
x_out = model.predict(x_new).reshape(-1)
y_new = model.predict(x_out).reshape(-1)
y_out = go.Figure(go.Scatter(x=x_full, y=f(x_full), name="True function"))
fig_out =x_new, y=y_new, name="On-domain predictions", line=dict(color="red")))
fig_out.add_trace(go.Scatter(x=x_out, y=y_out, name="Outside domain predictions", line=dict(color="green", dash = "dash")))
fig_out.add_trace(go.Scatter(x=800, height=600, title='Outside domain prediction', xaxis = dict(title="x"), yaxis=dict(title="y"))
fig_out.update_layout(width fig_out.show()
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step
Neural networks are decompositions of matrix multiplications and some nonlinear activation functions. It learns by adjusting weights and biases to minimize loss function. The resulting network is not the true input-output relationship on the entire domain but the approximation of what it has seen within the given range of inputs. The generalization to outside domain/range is poor because, by construction, its true nature is almost linear from some threshold of inputs (outside domain). In other words, it’s poor at extrapolating but great at interpolating.
- Let’s try to modify some hyperparameters and observe the performance of the network.
- Noramlly, increase batch size has similar effect as decaying learning rate (see: Smith et al. (2018)). We shall explore that with the number of epoches.
# We setup a model with 3 hidden layers
= Sequential()
model =(d,)))
model.add(Input(shape
# To do
64, activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(1, activation="linear"))
model.add(Dense(
# Set up optimizer for our model
compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])
model.
# Training the network
= model.fit(X, y, epochs=2000, batch_size=120, validation_split=0.1, verbose=0)
history
# Extract loss values
= history.history['loss']
train_loss = history.history['val_loss'] val_loss
import plotly.io as pio
= 'notebook'
pio.renderers.default
# Plot the learning curves
= list(range(1, len(train_loss) + 1))
epochs = go.Figure(go.Scatter(x=epochs, y=train_loss, name="Training loss"))
fig1 =epochs, y=val_loss, name="Training loss"))
fig1.add_trace(go.Scatter(x="Training and Validation Loss",
fig1.update_layout(title=800, height=500,
width=dict(title="Epoch", type="log"),
xaxis=dict(title="Loss")) yaxis
import plotly.io as pio
= 'notebook'
pio.renderers.default
= np.linspace(-5, 8, 100)
x_full = np.linspace(-5, 5, 100)
x_new = np.linspace(5, 8, 100)
x_out = model.predict(x_new).reshape(-1)
y_new = model.predict(x_out).reshape(-1)
y_out = go.Figure(go.Scatter(x=x_full, y=f(x_full), name="True function"))
fig_out =x_new, y=y_new, name="On-domain predictions", line=dict(color="red")))
fig_out.add_trace(go.Scatter(x=x_out, y=y_out, name="Outside domain predictions", line=dict(color="green", dash = "dash")))
fig_out.add_trace(go.Scatter(x=800, height=600, title='Outside domain prediction', xaxis = dict(title="x"), yaxis=dict(title="y"))
fig_out.update_layout(width fig_out.show()
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 44ms/step
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step
- The learning curve suggests that the validation curve converges more slowly torwards the stable region compared to the previous case. This is what we would observe with small learning rate. However, increasing batch size provides more stable learning process than the case of small batch size and small learning rate.
- Let’s increase the number of neurons.
# We setup a model with 3 hidden layers
= Sequential()
model =(d,)))
model.add(Input(shape
# To do
128, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(1, activation="linear"))
model.add(Dense(
# Set up optimizer for our model
compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])
model.
# Training the network
= model.fit(X, y, epochs=2000, batch_size=120, validation_split=0.1, verbose=0)
history
# Extract loss values
= history.history['loss']
train_loss = history.history['val_loss'] val_loss
import plotly.io as pio
= 'notebook'
pio.renderers.default
# Plot the learning curves
= list(range(1, len(train_loss) + 1))
epochs = go.Figure(go.Scatter(x=epochs, y=train_loss, name="Training loss"))
fig1 =epochs, y=val_loss, name="Training loss"))
fig1.add_trace(go.Scatter(x="Training and Validation Loss",
fig1.update_layout(title=800, height=500,
width=dict(title="Epoch", type="log"),
xaxis=dict(title="Loss")) yaxis
import plotly.io as pio
= 'notebook'
pio.renderers.default
= np.linspace(-5, 8, 100)
x_full = np.linspace(-5, 5, 100)
x_new = np.linspace(5, 8, 100)
x_out = model.predict(x_new).reshape(-1)
y_new = model.predict(x_out).reshape(-1)
y_out = go.Figure(go.Scatter(x=x_full, y=f(x_full), name="True function"))
fig_out =x_new, y=y_new, name="On-domain predictions", line=dict(color="red")))
fig_out.add_trace(go.Scatter(x=x_out, y=y_out, name="Outside domain predictions", line=dict(color="green", dash = "dash")))
fig_out.add_trace(go.Scatter(x=800, height=600, title='Outside domain prediction', xaxis = dict(title="x"), yaxis=dict(title="y"))
fig_out.update_layout(width fig_out.show()
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 28ms/step
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step
C. Increase the number of layers and observe the change in the network approximation power.
Compare the approximation to the true function.
Make outside domain prediction. Comment.
# We setup a model with 3 hidden layers
= Sequential()
model =(d,)))
model.add(Input(shape
# To do
128, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(1, activation="linear"))
model.add(Dense(
# Set up optimizer for our model
compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])
model.
# Training the network
= model.fit(X, y, epochs=2000, batch_size=128, validation_split=0.1, verbose=0)
history
# Extract loss values
= history.history['loss']
train_loss = history.history['val_loss'] val_loss
The learning curves
import plotly.io as pio
= 'notebook'
pio.renderers.default
# Plot the learning curves
= list(range(1, len(train_loss) + 1))
epochs = go.Figure(go.Scatter(x=epochs, y=train_loss, name="Training loss"))
fig2 =epochs, y=val_loss, name="Training loss"))
fig2.add_trace(go.Scatter(x="Training and Validation Loss",
fig2.update_layout(title=800, height=500,
width=dict(title="Epoch", type="log"),
xaxis=dict(title="Loss"))
yaxis fig2.show()
import plotly.io as pio
= 'notebook'
pio.renderers.default
= model.predict(x_new).reshape(-1)
y_new2 = model.predict(x_out).reshape(-1)
y_out2 = go.Figure(go.Scatter(x=x_full, y=f(x_full), name="True function"))
fig_out2 =x_new, y=y_new2, name="On-domain predictions", line=dict(color="red")))
fig_out2.add_trace(go.Scatter(x=x_out, y=y_out2, name="Outside domain predictions", line=dict(color="green", dash = "dash")))
fig_out2.add_trace(go.Scatter(x=800, height=600, title='Outside domain prediction', xaxis = dict(title="x"), yaxis=dict(title="y"))
fig_out2.update_layout(width fig_out2.show()
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
- Let’s train the network even longer, we would like to see if the network overfit the data.
# We setup a model with 3 hidden layers
= Sequential()
model =(d,)))
model.add(Input(shape
# To do
128, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(1, activation="linear"))
model.add(Dense(
# We use Adam optimizer
# Set up optimizer for our model
compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])
model.
# Training the network
= model.fit(X, y, epochs=5000, batch_size=128, validation_split=0.1, verbose=0)
history
# Extract loss values
= history.history['loss']
train_loss = history.history['val_loss'] val_loss
import plotly.io as pio
= 'notebook'
pio.renderers.default
# Plot the learning curves
= list(range(1, len(train_loss) + 1))
epochs = go.Figure(go.Scatter(x=epochs, y=train_loss, name="Training loss"))
fig3 =epochs, y=val_loss, name="Training loss"))
fig3.add_trace(go.Scatter(x="Training and Validation Loss",
fig3.update_layout(title=800, height=500,
width=dict(title="Epoch", type="log"),
xaxis=dict(title="Loss"))
yaxis fig3.show()
import plotly.io as pio
= 'notebook'
pio.renderers.default
= model.predict(x_new).reshape(-1)
y_new3 = model.predict(x_out).reshape(-1)
y_out3 = go.Figure(go.Scatter(x=x_full, y=f(x_full), name="True function"))
fig_out3 =x_new, y=y_new3, name="On-domain predictions", line=dict(color="red")))
fig_out3.add_trace(go.Scatter(x=x_out, y=y_out3, name="Outside domain predictions", line=dict(color="green", dash = "dash")))
fig_out3.add_trace(go.Scatter(x=800, height=600, title='Outside domain prediction', xaxis = dict(title="x"), yaxis=dict(title="y"))
fig_out3.update_layout(width fig_out3.show()
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step
- We observe that the network barely overfits the data probably due to symmetric noise with constant variance.
2. Heart disease dataset
Your mission here is to recreate what we had done in the course on the Heart Disease Dataset.
Report test performance metrics.
import numpy as np
import pandas as pd
import kagglehub
# Download latest version
= kagglehub.dataset_download("johnsmith88/heart-disease-dataset")
path = pd.read_csv(path + "/heart.csv")
data 5) data.head(
age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 52 | 1 | 0 | 125 | 212 | 0 | 1 | 168 | 0 | 1.0 | 2 | 2 | 3 | 0 |
1 | 53 | 1 | 0 | 140 | 203 | 1 | 0 | 155 | 1 | 3.1 | 0 | 0 | 3 | 0 |
2 | 70 | 1 | 0 | 145 | 174 | 0 | 1 | 125 | 1 | 2.6 | 0 | 0 | 3 | 0 |
3 | 61 | 1 | 0 | 148 | 203 | 0 | 1 | 161 | 0 | 0.0 | 2 | 1 | 3 | 0 |
4 | 62 | 0 | 0 | 138 | 294 | 1 | 1 | 106 | 0 | 1.9 | 1 | 3 | 2 | 0 |
This can be found in the course: Deep Learning.
3. Mnist dataset
In this section, you will work with Mnist
dataset. It can be imported using the following codes.
from keras.datasets import mnist
= mnist.load_data()
(train_images, train_labels), (test_images, test_labels)
import matplotlib.pyplot as plt
import numpy as np
= np.random.choice(train_images.shape[0], size=10)
digit = plt.subplots(2,5, figsize=(9, 3))
_ , axs for i in range(10):
//5, i%5].imshow(train_images[digit[i]])
axs[i//5, i%5].axis("off")
axs[i//5, i%5].set_title(f"True label: {train_labels[digit[i]]}")
axs[i
plt.tight_layout() plt.show()
- Build your own designed DNN to identify the digits of testing images.
- Evaluate its performance using suitable matrix and conclude.
# Processing the data
= train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
train_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255
test_images
print(f'Train images shape: {train_images.shape}')
print(f'Train labels shape: {train_labels.shape}')
from tensorflow.keras.utils import to_categorical
= to_categorical(train_labels)
train_labels = to_categorical(test_labels) test_labels
Train images shape: (60000, 784)
Train labels shape: (60000,)
# We setup a model with 3 hidden layers
= Sequential()
model =(train_images.shape[1],)))
model.add(Input(shape
# To do
128, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(10, activation="softmax"))
model.add(Dense(
# Set up optimizer for our model
compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.
# Training the network
= model.fit(train_images, train_labels, epochs=50, batch_size=200, validation_split=0.1, verbose=0)
history
# Extract loss values
= history.history['loss']
train_loss = history.history['val_loss'] val_loss
import plotly.io as pio
= 'notebook'
pio.renderers.default
# Plot the learning curves
= list(range(1, len(train_loss) + 1))
epochs = go.Figure(go.Scatter(x=epochs, y=train_loss, name="Training loss"))
fig3 =epochs, y=val_loss, name="Training loss"))
fig3.add_trace(go.Scatter(x="Training and Validation Loss on Mnist dataset",
fig3.update_layout(title=800, height=500,
width=dict(title="Epoch", type="log"),
xaxis=dict(title="Loss")) yaxis
# Test accuracy
= model.predict(test_images).argmax(axis=1)
pred_test = test_labels.argmax(axis=1)
true_test print(f'Test accuracy on Mnist: {np.mean(pred_test==true_test)}')
313/313 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step
Test accuracy on Mnist: 0.9839
Let’s see how the mispredicted digits look like.
= plt.subplots(2,5, figsize=(15, 5))
_ , axs = np.where(pred_test!=true_test)[0]
mispredicted_id = test_images.reshape(10000, 28, 28)
test_images for i in range(10):
//5, i%5].imshow(test_images[mispredicted_id[i],:,:])
axs[i//5, i%5].axis("off")
axs[i//5, i%5].set_title(f"True label: {true_test[mispredicted_id[i]]}; Pred: {pred_test[mispredicted_id[i]]}")
axs[i
plt.tight_layout() plt.show()
References
\(^{\text{📚}}\) Deep Learning, Ian Goodfellow. (2016)..
\(^{\text{📚}}\) Hands-on ML with Sklearn, Keras & Tensorflow, Aurélien Geron (2017)..
\(^{\text{📚}}\) Heart Disease Dataset.
\(^{\text{📚}}\) Backpropagation, 3B1B.