# 5-fold EfficientNet-B3 model using Fastai

## Setting up dependencies

In [None]:
!rm -r ../root/.kaggle
!mkdir ../root/.kaggle
!cp kaggle.json ../root/.kaggle
!chmod 600 ../root/.kaggle/kaggle.json
!rm -r sample_data
!pip install efficientnet-pytorch

## Importing packages for our experiment

In [None]:
from fastai import *
from fastai.vision import *
from fastai.vision.models import efficientnet
from fastai.utils.ipython import *
from fastai.callbacks.tracker import SaveModelCallback
from sklearn.model_selection import StratifiedKFold

## Setting up k-fold training strategy

In [None]:
path = Path(".")
path.ls()

In [None]:
data_init = (ImageList.from_folder(path/"sipakmed_wsi_pap_smear")
 .split_none()
 .label_from_folder())
data_init

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
skf

In [None]:
!mkdir sipakmed_wsi_pap_smear/models

In [None]:
def model_callback(model, model_name):
 return [SaveModelCallback(model, every="improvement", monitor="accuracy", name=model_name)]

In [None]:
tfms = get_transforms(flip_vert=True, max_warp=0.0, max_rotate=60.0, max_zoom=1.0)

In [None]:
our_metrics = [accuracy, Precision(average="macro"), Recall(average="macro"), FBeta(average="macro"), KappaScore(weights="quadratic")]
our_metrics

## K-Fold Training Process
* Loop through fold indices
* Load the fold data
* Initialize the learner with the fold data (with freezing)
* Train the learner
* Fine-tune the learner
* Repeat for different folds

In [None]:
idxs = [[train_idx, val_idx] for train_idx, val_idx in skf.split(data_init.x.items, data_init.y.items)]

## First Fold

In [None]:
fold_idxs = idxs[0]
fold_data = (ImageList.from_folder(path/"sipakmed_wsi_pap_smear")
 .split_by_idxs(fold_idxs[0], fold_idxs[1])
 .label_from_folder()
 .transform(tfms, size=224)
 .databunch(bs=16)
 .normalize(imagenet_stats))

In [None]:
learner = Learner(fold_data, efficientnet.EfficientNetB3(fold_data), metrics=our_metrics).to_fp16()
learner = learner.split([learner.model._conv_stem, learner.model._blocks, learner.model._conv_head])

In [None]:
# Re-run this cell if no graph generated
learner.lr_find()
learner.recorder.plot()

In [None]:
# Choose learning rate with the least loss value and has a good downward slope.
learner.fit_one_cycle(30, max_lr=slice(2e-03), callbacks=model_callback(learner, "best-effb3-fold1-stage1"))

In [None]:
# Re-run this cell if no graph generated
learner.load("best-effb3-fold1-stage1")
learner.unfreeze()
learner.lr_find()
learner.recorder.plot()

In [None]:
# Choose learning rate with the least loss value and has appropriate downward slope.
learner.fit_one_cycle(30, max_lr=slice(2e-06), callbacks=model_callback(learner, "best-effb3-fold1-stage2"))

## Second Fold

In [None]:
fold_idxs = idxs[1]
fold_data = (ImageList.from_folder(path/"sipakmed_wsi_pap_smear")
 .split_by_idxs(fold_idxs[0], fold_idxs[1])
 .label_from_folder()
 .transform(tfms, size=224)
 .databunch(bs=16)
 .normalize(imagenet_stats))

In [None]:
# Transferring weights of our model from earlier fold to the new fold
# Re-run this cell if no graph generated.
learner.load("best-effb3-fold1-stage2")
learner.data = fold_data
learner.freeze()
learner = to_fp16(learner)
learner.lr_find()
learner.recorder.plot()

In [None]:
# Choose learning rate which has the least loss and has appropriate downward slope.
learner.fit_one_cycle(30, max_lr=slice(3e-05), callbacks=model_callback(learner, "best-effb3-fold2-stage1"))
learner.save("last-effb3-fold2-stage1")

In [None]:
# Re-run this cell if no graph generated.
learner.load("last-effb3-fold2-stage1")
learner.unfreeze()
learner.lr_find()
learner.recorder.plot()

In [None]:
# Choose learning rate which has the least loss and has appropriate downward slope.
learner.fit_one_cycle(30, max_lr=slice(7e-07, 7e-07), callbacks=model_callback(learner, "best-effb3-fold2-stage2"))
learner.save("last-effb3-fold2-stage2")

## Third Fold

In [None]:
fold_idxs = idxs[2]
fold_data = (ImageList.from_folder(path/"sipakmed_wsi_pap_smear")
 .split_by_idxs(fold_idxs[0], fold_idxs[1])
 .label_from_folder()
 .transform(tfms, size=224)
 .databunch(bs=16)
 .normalize(imagenet_stats))

In [None]:
# Transferring weights of our model from earlier fold to the new fold
# Re-run this cell if no graph generated.

learner.purge()
learner.load("best-effb3-fold2-stage2")
learner.data = fold_data
learner.freeze()
learner = to_fp16(learner)
learner.lr_find()
learner.recorder.plot()

In [None]:
# Choose learning rate which has the least loss and has appropriate downward slope.
learner.fit_one_cycle(30, max_lr=slice(5e-06, 6e-06), callbacks=model_callback(learner, "best-effb3-fold3-stage1"))
learner.save("last-effb3-fold3-stage1")

In [None]:
# Re-run this cell if no graph generated.
learner.load("last-effb3-fold3-stage1")
learner.unfreeze()
learner.lr_find()
learner.recorder.plot()

In [None]:
# Choose learning rate which has the least loss and has appropriate downward slope
learner.fit_one_cycle(20, max_lr=slice(7e-07), callbacks=model_callback(learner, "best-effb3-fold3-stage2"))
learner.save("last-effb3-fold3-stage2")

## Fourth Fold

In [None]:
fold_idxs = idxs[3]
fold_data = (ImageList.from_folder(path/"sipakmed_wsi_pap_smear")
 .split_by_idxs(fold_idxs[0], fold_idxs[1])
 .label_from_folder()
 .transform(tfms, size=224)
 .databunch(bs=16)
 .normalize(imagenet_stats))

In [None]:
# Transferring weights of our model from earlier fold to the new fold
# Re-run this cell if graph not generated.

learner.purge()
learner.load("last-effb3-fold3-stage2")
learner.data = fold_data
learner.freeze()
learner = to_fp16(learner)
learner.lr_find()
learner.recorder.plot()

In [None]:
# Choose learning rate which has the least loss and has appropriate downward slope
learner.fit_one_cycle(20, max_lr=slice(1.5e-05), callbacks=model_callback(learner, "best-effb3-fold4-stage1"))
learner.save("last-effb3-fold4-stage1")

In [None]:
# Re-run this graph if not generated
learner.load("best-effb3-fold4-stage1")
learner.unfreeze()
learner.lr_find()
learner.recorder.plot()

In [None]:
# Choose learning rate which has the least loss and has appropriate downward slope
learner.fit_one_cycle(20, max_lr=slice(2e-06), callbacks=model_callback(learner, "best-effb3-fold4-stage2"))
learner.save("last-effb3-fold4-stage2")

## Fifth Fold

In [None]:
fold_idxs = idxs[4]
fold_data = (ImageList.from_folder(path/"sipakmed_wsi_pap_smear")
 .split_by_idxs(fold_idxs[0], fold_idxs[1])
 .label_from_folder()
 .transform(tfms, size=224)
 .databunch(bs=16)
 .normalize(imagenet_stats))

In [None]:
# Transferring weights of our model from earlier fold to the new fold
# Re-run this cell if graph not generated.

learner.load("last-effb3-fold4-stage2")
learner.data = fold_data
learner.freeze()
learner = to_fp16(learner)
learner.lr_find()
learner.recorder.plot()

In [None]:
# Choose learning rate which has the least loss and has appropriate downward slope
learner.fit_one_cycle(20, max_lr=slice(7.5e-07), callbacks=model_callback(learner, "best-effb3-fold5-stage1"))
learner.save("last-effb3-fold5-stage1")

In [None]:
learner.purge()
learner.load("last-effb3-fold5-stage1")
learner.unfreeze()
learner.lr_find()
learner.recorder.plot()

In [None]:
learner.fit_one_cycle(20, max_lr=slice(1e-06, 1e-06), callbacks=model_callback(learner, "best-effb3-fold5-stage2"))
learner.save("last-effb3-fold5-stage2")

# Model Validation after k-fold training

In [None]:
all_data = (ImageList.from_folder(path/"sipakmed_wsi_pap_smear")
 .split_none()
 .label_from_folder()
 .transform(tfms, size=224)
 .databunch(bs=16)
 .normalize(imagenet_stats))
all_data

In [None]:
learner.load("last-effb3-fold5-stage2")

In [None]:
all_learner = to_fp32(learner)
all_learner.data = all_data
all_learner.freeze()

In [None]:
interp = ClassificationInterpretation.from_learner(all_learner, ds_type=DatasetType.Train)
interp.plot_confusion_matrix(figsize=(7, 7))

In [None]:
interp.plot_confusion_matrix(figsize=(7, 7), normalize=True)

In [None]:
val_learner = to_fp32(all_learner)
test_mets = [accuracy, Precision(average="macro"), Recall(average="macro"), FBeta(average="macro"), KappaScore(weights="quadratic")]
val_learner.validate(dl=all_data.train_dl, metrics=test_mets)

In [None]:
all_learner.export("5fold-effb3-new.pkl")