CNNs are designed for grid-structured data (images). A convolutional layer applies \(K\) learnable filters to local neighborhoods. Conceptually, for an input image \(X\) and filter \(W\):
CNNs exploit:
In this lesson we:
# If dataset_fashion_mnist() is not available in your install, use dataset_mnist() instead.
data <- dataset_fashion_mnist()
x_train <- data$train$x
y_train <- data$train$y
x_test <- data$test$x
y_test <- data$test$y
dim(x_train); length(y_train)## [1] 60000 28 28
## [1] 60000
## [1] 10000 28 28
## [1] 10000
CNN expects a 4D tensor: \((n, H, W, C)\).
# Convert to float and scale to [0,1]
x_train <- array_reshape(x_train, dim = c(nrow(x_train), 28, 28, 1)) / 255
x_test <- array_reshape(x_test, dim = c(nrow(x_test), 28, 28, 1)) / 255
num_classes <- 10
y_train_oh <- to_categorical(y_train, num_classes)
y_test_oh <- to_categorical(y_test, num_classes)
dim(x_train); dim(y_train_oh)## [1] 60000 28 28 1
## [1] 60000 10
set.seed(123)
n <- dim(x_train)[1]
idx <- sample.int(n)
n_tr <- floor(0.9 * n)
tr_idx <- idx[1:n_tr]
va_idx <- idx[(n_tr + 1):n]
X_tr <- x_train[tr_idx, , , , drop = FALSE]
Y_tr <- y_train_oh[tr_idx, , drop = FALSE]
X_va <- x_train[va_idx, , , , drop = FALSE]
Y_va <- y_train_oh[va_idx, , drop = FALSE]We use:
input <- layer_input(shape = c(28, 28, 1), name = "image")
x <- input |>
layer_conv_2d(filters = 32, kernel_size = c(3,3), padding = "same") |>
layer_batch_normalization() |>
layer_activation("relu") |>
layer_conv_2d(filters = 32, kernel_size = c(3,3), padding = "same") |>
layer_batch_normalization() |>
layer_activation("relu") |>
layer_max_pooling_2d(pool_size = c(2,2)) |>
layer_dropout(rate = 0.25) |>
layer_conv_2d(filters = 64, kernel_size = c(3,3), padding = "same") |>
layer_batch_normalization() |>
layer_activation("relu") |>
layer_conv_2d(filters = 64, kernel_size = c(3,3), padding = "same") |>
layer_batch_normalization() |>
layer_activation("relu") |>
layer_max_pooling_2d(pool_size = c(2,2)) |>
layer_dropout(rate = 0.30) |>
layer_flatten() |>
layer_dense(units = 128, activation = "relu") |>
layer_dropout(rate = 0.40)
output <- x |>
layer_dense(units = num_classes, activation = "softmax", name = "class_prob")
cnn <- keras_model(inputs = input, outputs = output)
cnn |>
compile(
optimizer = optimizer_adam(learning_rate = 1e-3),
loss = "categorical_crossentropy",
metrics = "accuracy"
)
cnn## Model: "functional_10"
## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓
## ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ Trainable ┃
## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩
## │ image (InputLayer) │ (None, 28, 28, 1) │ 0 │ - │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ conv2d (Conv2D) │ (None, 28, 28, 32) │ 320 │ Y │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ batch_normalization (BatchNormalization) │ (None, 28, 28, 32) │ 128 │ Y │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ activation (Activation) │ (None, 28, 28, 32) │ 0 │ - │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ conv2d_1 (Conv2D) │ (None, 28, 28, 32) │ 9,248 │ Y │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ batch_normalization_1 (BatchNormalization) │ (None, 28, 28, 32) │ 128 │ Y │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ activation_1 (Activation) │ (None, 28, 28, 32) │ 0 │ - │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ max_pooling2d (MaxPooling2D) │ (None, 14, 14, 32) │ 0 │ - │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ dropout_5 (Dropout) │ (None, 14, 14, 32) │ 0 │ - │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ conv2d_2 (Conv2D) │ (None, 14, 14, 64) │ 18,496 │ Y │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ batch_normalization_2 (BatchNormalization) │ (None, 14, 14, 64) │ 256 │ Y │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ activation_2 (Activation) │ (None, 14, 14, 64) │ 0 │ - │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ conv2d_3 (Conv2D) │ (None, 14, 14, 64) │ 36,928 │ Y │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ batch_normalization_3 (BatchNormalization) │ (None, 14, 14, 64) │ 256 │ Y │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ activation_3 (Activation) │ (None, 14, 14, 64) │ 0 │ - │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ max_pooling2d_1 (MaxPooling2D) │ (None, 7, 7, 64) │ 0 │ - │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ dropout_6 (Dropout) │ (None, 7, 7, 64) │ 0 │ - │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ flatten (Flatten) │ (None, 3136) │ 0 │ - │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ dense_16 (Dense) │ (None, 128) │ 401,536 │ Y │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ dropout_7 (Dropout) │ (None, 128) │ 0 │ - │
## ├────────────────────────────────────────────┼───────────────────────────────────┼──────────────────┼───────────┤
## │ class_prob (Dense) │ (None, 10) │ 1,290 │ Y │
## └────────────────────────────────────────────┴───────────────────────────────────┴──────────────────┴───────────┘
## Total params: 468,586 (1.79 MB)
## Trainable params: 468,202 (1.79 MB)
## Non-trainable params: 384 (1.50 KB)
cb_es <- callback_early_stopping(
monitor = "val_accuracy",
mode = "max",
patience = 5,
restore_best_weights = TRUE
)
cb_rlr <- callback_reduce_lr_on_plateau(
monitor = "val_loss",
factor = 0.5,
patience = 2,
min_lr = 1e-5
)
set.seed(123)
history <- cnn |>
fit(
x = X_tr, y = Y_tr,
validation_data = list(X_va, Y_va),
epochs = 50,
batch_size = 128,
callbacks = list(cb_es, cb_rlr),
verbose = 2
)
plot(history) +
theme_minimal() +
ggtitle("CNN learning curves — Fashion-MNIST")metrics <- cnn |>
evaluate(x_test, y_test_oh, verbose = 0)
setNames(as.numeric(metrics), cnn$metrics_names) |>
round(4)## loss compile_metrics
## 0.9379 0.1879
## pred
## truth 0 1 2 3 4 5 6 7 8 9
## 0 896 0 11 9 0 0 82 0 2 0
## 1 1 988 0 6 1 0 3 0 1 0
## 2 14 2 906 8 34 0 36 0 0 0
## 3 8 2 8 936 21 0 25 0 0 0
## 4 0 0 15 18 925 0 42 0 0 0
## 5 0 0 0 0 0 985 0 10 0 5
## 6 79 0 37 22 59 0 800 0 3 0
## 7 0 0 0 0 0 2 0 986 0 12
## 8 2 0 0 2 2 1 0 0 993 0
## 9 0 0 0 0 0 4 0 32 0 964
A work by Gianluca Sottile
gianluca.sottile@unipa.it