In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Dense, Activation,Dropout,Conv2D, MaxPooling2D,BatchNormalization, Flatten
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model, load_model, Sequential
import numpy as np
import pandas as pd
import shutil
import time
import cv2 as cv2
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import os
import seaborn as sns
sns.set_style('darkgrid')
from PIL import Image
from sklearn.metrics import confusion_matrix, classification_report
from IPython.core.display import display, HTML

#
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
colon_dir=r'../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/colon_image_sets'
lung_dir=r'../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets'
for i,d in enumerate([colon_dir,lung_dir]):
    filepaths=[]
    labels=[]
    classlist=os.listdir(d)
    for klass in classlist:
        classpath=os.path.join(d,klass)
        if os.path.isdir(classpath):
            flist=os.listdir(classpath)
            for f in flist:
                fpath=os.path.join(classpath,f)
                filepaths.append(fpath)
                labels.append(klass)
    Fseries= pd.Series(filepaths, name='filepaths')
    Lseries=pd.Series(labels, name='labels')
    if i==0:
        colon_df=pd.concat([Fseries, Lseries], axis=1)
    else:
        lung_df=pd.concat([Fseries, Lseries], axis=1)
df=pd.concat([colon_df, lung_df], axis =0).reset_index(drop=True)# make a combined dataframe
print (df.head())
print(df['labels'].value_counts())

In [None]:
sample_size=5000
sample_list=[]
group=df.groupby('labels')
for label in df['labels'].unique():
    label_group=group.get_group(label).sample(sample_size, replace=False, random_state=123, axis=0)
    sample_list.append(label_group)
df=pd.concat(sample_list, axis=0).reset_index(drop=True)
print (len(df))

In [None]:
train_split=.8
test_split=.1
dummy_split=test_split/(1-train_split)
train_df, dummy_df=train_test_split(df, train_size=train_split, shuffle=True, random_state=123)
test_df, valid_df=train_test_split(dummy_df, train_size=dummy_split, shuffle=True, random_state=123)
print ('train_df length: ', len(train_df), ' _test_df length: ', len(test_df), '  valid_df length: ', len(valid_df))

In [None]:
height=128
width=128
channels=3
batch_size=128
img_shape=(height, width, channels)
img_size=(height, width)
length=len(test_df)
test_batch_size=sorted([int(length/n) for n in range(1,length+1) if length % n ==0 and length/n<=80],reverse=True)[0]
test_steps=int(length/test_batch_size)
print ( 'test batch size: ' ,test_batch_size, '  test steps: ', test_steps)


#------------------------------------------------
import cv2
from skimage import io
from matplotlib import pyplot as plt
def scalar(img):
    #path="../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/colon_image_sets/colon_aca/colonca1.jpeg"
    #img = img[...,::-1] # Added
    #img = cv2.imread(img)
    #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    lab_img= cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab_img)
    #plt.hist(l.flat, bins=100, range=(0,255))
    #plt.show()
    equ = cv2.equalizeHist(l)
    updated_lab_img1 = cv2.merge((equ,a,b))
    hist_eq_img = cv2.cvtColor(updated_lab_img1, cv2.COLOR_LAB2BGR)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    clahe_img = clahe.apply(l)
    updated_lab_img2 = cv2.merge((clahe_img,a,b))
    CLAHE_img = cv2.cvtColor(updated_lab_img2, cv2.COLOR_LAB2BGR)
    return CLAHE_img/127.5-1  # scale pixel between -1 and +1
#return img/127.5-1  # scale pixel between -1 and +1
#------------------------------------------------

gen=ImageDataGenerator(preprocessing_function=scalar)
train_gen=gen.flow_from_dataframe( train_df, x_col='filepaths', y_col='labels', target_size=img_size, class_mode='categorical',
                                    color_mode='rgb', shuffle=True, batch_size=batch_size)
test_gen=gen.flow_from_dataframe( test_df, x_col='filepaths', y_col='labels', target_size=img_size, class_mode='categorical',
                                    color_mode='rgb', shuffle=False, batch_size=test_batch_size)
valid_gen=gen.flow_from_dataframe( valid_df, x_col='filepaths', y_col='labels', target_size=img_size, class_mode='categorical',
                                    color_mode='rgb', shuffle=False, batch_size=batch_size)
classes=list(train_gen.class_indices.keys())
class_count=len(classes)

In [None]:
import cv2
import numpy as np
from PIL import Image
def myFunc(image):
    img = np.array(image)
    print(img.shape)
    lab_img= cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab_img)
    #plt.hist(l.flat, bins=100, range=(0,255))
    #plt.show()
    equ = cv2.equalizeHist(l)
    updated_lab_img1 = cv2.merge((equ,a,b))
    hist_eq_img = cv2.cvtColor(updated_lab_img1, cv2.COLOR_LAB2BGR)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    clahe_img = clahe.apply(l)
    updated_lab_img2 = cv2.merge((clahe_img,a,b))
    CLAHE_img = cv2.cvtColor(updated_lab_img2, cv2.COLOR_LAB2BGR)
    #return CLAHE_img/127.5-1  # scale pixel between -1 and +1
    return Image.fromarray(CLAHE_img )

In [None]:
from tensorflow.keras.preprocessing import image
SAMPLES = ['../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets/lung_scc/lungscc1.jpeg',
           '../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets/lung_n/lungn1.jpeg',
           '../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets/lung_aca/lungaca1.jpeg',
           '../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/colon_image_sets/colon_n/colonn1.jpeg',
           '../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/colon_image_sets/colon_aca/colonca1.jpeg']

plt.figure(figsize=(22, 10))
global c
c = 0

for i in SAMPLES:
    plt.subplot(1, 5, c + 1)
    c += 1
    t = i.split('/')
    plt.title(t[5])
    plt.imshow(image.load_img(i))
    plt.axis('off')
plt.show()

In [None]:
    from tensorflow.keras.preprocessing import image
    import cv2
    import numpy as np
    from PIL import Image
    image= '../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets/lung_n/lungn1.jpeg'
    SAMPLES = ['../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets/lung_scc/lungscc1.jpeg',
           '../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets/lung_n/lungn1.jpeg',
           '../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets/lung_aca/lungaca1.jpeg',
           '../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/colon_image_sets/colon_n/colonn1.jpeg',
           '../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/colon_image_sets/colon_aca/colonca1.jpeg']
    SAMPLES = ['../input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets/lung_scc/lungscc1.jpeg']
    plt.figure(figsize=(15, 8))
    t = i.split('/')
    plt.title(t[5])
    #image=image.load_img(i)
    image=cv2.imread(image)
    plt.imshow(image)
    plt.axis('off')

    plt.figure(figsize=(15, 8))
    plt.hist(image.ravel(), bins = 256, color = 'orange', )
    plt.hist(image[:, :, 0].ravel(), bins = 256, color = 'red', alpha = 0.5)
    _ = plt.hist(image[:, :, 1].ravel(), bins = 256, color = 'Green', alpha = 0.5)
    _ = plt.hist(image[:, :, 2].ravel(), bins = 256, color = 'Blue', alpha = 0.5)
    _ = plt.xlabel('Intensity Value')
    _ = plt.ylabel('Count')
    _ = plt.legend(['Total', 'red_Channel', 'Green_Channel', 'Blue_Channel'])
    plt.show()

    plt.figure(figsize=(15, 8))
    h1=plt.hist(image.flat, bins=100, range=(0,255))
    plt.xlabel('image hisgram Values')
    plt.show()

    img = np.array(image)

    plt.figure(figsize=(15, 8))
    lab_img= cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
    plt.imshow(lab_img)
    plt.xlabel('image  after  after convert  from RGB_COLOR to LAB_COLOR')
    plt.show()

    plt.figure(figsize=(15, 8))
    l, a, b = cv2.split(lab_img)
    h2=plt.hist(l.flat, bins=100, range=(0,255))
    plt.xlabel('L chanel hisgram Values')
    plt.show()


    plt.figure(figsize=(15, 8))
    equ = cv2.equalizeHist(l)
    h3=plt.hist(equ.flat, bins=100, range=(0,255))
    plt.xlabel(' equalizeHist of L chanel hisgram Values')
    plt.show()

    plt.figure(figsize=(15, 8))
    updated_lab_img1 = cv2.merge((equ,a,b))
    hist_eq_img = cv2.cvtColor(updated_lab_img1, cv2.COLOR_LAB2BGR)
    h4=plt.hist(hist_eq_img.flat, bins=100, range=(0,255))
    plt.xlabel('image hisgram Values after  equalizeHist of L')
    plt.show()

    plt.figure(figsize=(15, 8))
    plt.imshow(hist_eq_img)
    plt.xlabel('image  after  equalizeHist of L')
    plt.show()
    plt.axis('off')


    plt.figure(figsize=(15, 8))
    clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8,8))
    clahe_img = clahe.apply(equ)
    h5=plt.hist(clahe_img.flat, bins=100, range=(0,255))
    plt.xlabel('clahe hisgram Values for equalizeHist ')
    plt.show()

    plt.figure(figsize=(15, 8))
    updated_lab_img2 = cv2.merge((clahe_img,a,b))
    CLAHE_img = cv2.cvtColor(updated_lab_img2, cv2.COLOR_LAB2BGR)
    h6=plt.hist(CLAHE_img.flat, bins=100, range=(0,255))
    plt.xlabel('image hisgram Values after CLAHE')
    plt.show()

    plt.figure(figsize=(15, 8))
    plt.imshow(CLAHE_img)
    plt.xlabel('image  after CLAHE')
    plt.show()
    plt.axis('off')

In [None]:
def print_in_color(txt_msg,fore_tupple,back_tupple,):
    #prints the text_msg in the foreground color specified by fore_tupple with the background specified by back_tupple
    #text_msg is the text, fore_tupple is foregroud color tupple (r,g,b), back_tupple is background tupple (r,g,b)
    rf,gf,bf=fore_tupple
    rb,gb,bb=back_tupple
    msg='{0}' + txt_msg
    mat='\33[38;2;' + str(rf) +';' + str(gf) + ';' + str(bf) + ';48;2;' + str(rb) + ';' +str(gb) + ';' + str(bb) +'m'
    print(msg .format(mat), flush=True)
    print('\33[0m', flush=True) # returns default print color to back to black
    return

In [None]:
model_name='InceptionResNetV2'
base_model=tf.keras.applications.InceptionResNetV2(include_top=False, weights="imagenet",input_shape=img_shape, pooling='max')
x=base_model.output
x=keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001 )(x)
x = Dense(256, kernel_regularizer = regularizers.l2(l = 0.016),activity_regularizer=regularizers.l1(0.006),
                bias_regularizer=regularizers.l1(0.006) ,activation='relu')(x)
x=Dropout(rate=.45, seed=123)(x)
output=Dense(class_count, activation='softmax')(x)
model=Model(inputs=base_model.input, outputs=output)
model.compile(Adamax(lr=.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
early_stopping=EarlyStopping(monitor='val_loss', patience=3)
history = model.fit(
    train_gen,
    validation_data=valid_gen,
    epochs=20,
    callbacks=[early_stopping])
#model.save_weights("model.h5")
model.save("InceptionResNetV2.hd5")
print("Saved model to disk")

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(10, 10))
plt.subplot(2, 1, 1)

plt.plot(acc, label='Training Accuracy', color='r')
plt.plot(val_acc, label='Validation Accuracy', color='b')


plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.legend(loc='lower right', fontsize=13)
plt.ylabel('Accuracy', fontsize=16, weight='bold')
plt.title('Training & Validation Acc.', fontsize=16, weight='bold')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss', color='r')
plt.plot(val_loss, label='Validation Loss', color='b')
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.legend(loc='upper right', fontsize=13)
plt.ylabel('Cross Entropy', fontsize=16, weight='bold')
plt.title('Training & Validation Loss', fontsize=15, weight='bold')
plt.xlabel('Epoch', fontsize=15, weight='bold')
plt.show()

In [None]:
from sklearn.metrics import classification_report

Y_pred = model.predict(test_gen)
y_pred = np.argmax(Y_pred, axis=1)

print(classification_report(test_gen.labels, y_pred))

In [None]:
import itertools
def plot_confusion_matrix(cm, classes,
                          normalize=True,
                          title='Confusion matrix',
                          cmap=plt.cm.Greys):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title, weight='bold', fontsize=16)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, fontsize=14)
    plt.yticks(tick_marks, classes, fontsize=14)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center", fontsize=16, weight='bold',
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label', fontsize=20, weight='bold')
    plt.xlabel('Predicted label', fontsize=16, weight='bold')

# Compute confusion matrix
cnf_matrix = confusion_matrix(test_gen.labels, y_pred)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure(figsize=(10, 10))
plot_confusion_matrix(cnf_matrix, classes=['colon_aca', 'colon_n', 'lung_aca', 'lung_n', 'lung_scc'],normalize=False,
                      title='Normalized Confusion Matrix')
plt.show()

In [None]:
model_name='EfficientNetB0'
base_model=tf.keras.applications.EfficientNetB0(include_top=False, weights="imagenet",input_shape=img_shape, pooling='max')
x=base_model.output
x=keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001 )(x)
x = Dense(256, kernel_regularizer = regularizers.l2(l = 0.016),activity_regularizer=regularizers.l1(0.006),
                bias_regularizer=regularizers.l1(0.006) ,activation='relu')(x)
x=Dropout(rate=.45, seed=123)(x)
output=Dense(class_count, activation='softmax')(x)
model=Model(inputs=base_model.input, outputs=output)
model.compile(Adamax(lr=.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
early_stopping=EarlyStopping(monitor='val_loss', patience=3)
history = model.fit(
    train_gen,
    validation_data=valid_gen,
    epochs=30,
    callbacks=[early_stopping])
model.save("EfficientNetB0.hd5")
#model.save_weights("model.h5")
print("Saved model to disk")

In [None]:
from sklearn.metrics import classification_report

Y_pred = model.predict(test_gen)
y_pred = np.argmax(Y_pred, axis=1)

print(classification_report(test_gen.labels, y_pred))

In [None]:
!pip install lime

In [None]:
import lime
from lime import lime_image
explainer = lime_image.LimeImageExplainer()

In [None]:
explanation = explainer.explain_instance(x_batch[15], model.predict, top_labels=5, hide_color=0, num_samples=10000)

In [None]:
from skimage.segmentation import mark_boundaries
temp_1, mask_1 = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=10000, hide_rest=True)
temp_2, mask_2 = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=10000, hide_rest=True)
#plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))
fig, (ax,ax1, ax2) = plt.subplots(1, 3, figsize=(15,15))
ax.imshow(x_batch[15]/255)
ax1.imshow(mark_boundaries(temp_1, mask_1))
ax2.imshow(mark_boundaries(temp_2, mask_2))

ax.axis('off')
ax1.axis('off')
ax2.axis('off')

plt.savefig('mask_default.png')

In [None]:
import lime
from lime import lime_image
explainer = lime_image.LimeImageExplainer()
explanation = explainer.explain_instance(x_batch[1], model.predict, top_labels=5, hide_color=0, num_samples=10000)


In [None]:
from skimage.segmentation import mark_boundaries
temp_1, mask_1 = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=10000, hide_rest=True)
temp_2, mask_2 = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=10000, hide_rest=True)
#plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))
fig, (ax,ax1, ax2) = plt.subplots(1, 3, figsize=(15,15))
ax.imshow(x_batch[1]/255)
ax1.imshow(mark_boundaries(temp_1, mask_1))
ax2.imshow(mark_boundaries(temp_2, mask_2))

ax.axis('off')
ax1.axis('off')
ax2.axis('off')

plt.savefig('mask_default.png')

In [None]:
import shap
masker = shap.maskers.Image("blur(28,28)", x_batch[0].shape)
explainer = shap.Explainer(model, masker, output_names=classes)
explainer

In [None]:
shap_values = explainer(x_batch[0:8], outputs=shap.Explanation.argsort.flip[:5])
shap_values.shape

In [None]:
print("Actual Labels    : {}".format(np.argmax(y_batch[0:8], axis=1)))
probs = model.predict(x_batch[0:8])
print("Predicted Labels : {}".format( np.argmax(probs, axis=1)))
print("Probabilities : {}".format(np.max(probs, axis=1)))

In [None]:
shap.image_plot(shap_values)