A Glance of AutoML part2: A Image Classify example with NNI

After a quick start of the Neural Network Intelligence (NNI) , I try using NNI in my own deep learning project. Recently, I’m buried in prostate cancer detection, one of its steps is to do image classfication to get the class of cancer, and I added NNI to help the configuration of hyper-parameters.

(Reference) minist with nni: https://github.com/Microsoft/nni/tree/master/examples/trials/mnist

1. Data preprocessing

The image data comes from Prostate X, the data provide two class: having cancer or not.

from __future__ import print_function
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.callbacks import ReduceLROnPlateau, CSVLogger, EarlyStopping

import resnet
import os
from glob import glob
import csv
import cv2
import numpy as np

# Change to your own corresponding file path
ADC_PATH='CAM-Data/Prostatex-ADC/'
csv_file='CAM-Data/ProstateX-Findings-Train.csv'
true_files=[]
false_files=[]

csvFile = open(csv_file, "r")
reader = csv.reader(csvFile)

for item in reader:
    #print(item)
    if reader.line_num == 1:
        continue
    pre_name=item[0]+'-Finding'+item[1]
    #print(pre_name)
    if item[-1]=='TRUE':
        true_file=glob(ADC_PATH+pre_name+'*_ADC0.bmp')
        #print(ADC_PATH+pre_name+'-ep2d_diff_tra_DYNDIST_ADC0.bmp')
        #print (true_file)
        for tfile in true_file:
            # print("Yes")
            true_files.append(tfile)
    else:
        false_file=glob(ADC_PATH+pre_name+'*_ADC0.bmp')
        #print (false_file)
        for ffile in false_file:
            false_files.append(ffile)


# Generate the network input data

pixel_size=64
true_data=np.zeros((len(true_files),pixel_size,pixel_size,3))
false_data=np.zeros((len(false_files),pixel_size,pixel_size,3))



for t in range(len(true_files)):
    true_data[t,...,0]=cv2.resize(cv2.imread(true_files[t],0),(pixel_size,pixel_size))
    true_data[t,...,1]=cv2.resize(cv2.imread(true_files[t],0),(pixel_size,pixel_size))
    true_data[t,...,2]=cv2.resize(cv2.imread(true_files[t],0),(pixel_size,pixel_size))
for f in range(len(false_files)):
    false_data[f,...,0]=cv2.resize(cv2.imread(false_files[f],0),(pixel_size,pixel_size))
    false_data[f,...,1]=cv2.resize(cv2.imread(false_files[f],0),(pixel_size,pixel_size))
    false_data[f,...,2]=cv2.resize(cv2.imread(false_files[f],0),(pixel_size,pixel_size))

# Load the data
data=np.vstack((true_data,false_data))

true_label=[[0,1]]
false_label=[[1,0]]
label=np.array(true_label*len(true_data)+false_label*len(false_data))

# Shuffle the data
def unison_shuffled_copies(a, b,rl):
    assert len(a) == len(b)
    r=range(len(a))
    p = np.random.permutation(len(a))
    return a[p], b[p]

data,label=unison_shuffled_copies(data,label,0.2)

splitpoint = int(round(len(data) * 0.8))
(x_train, x_val) = (data[0:splitpoint], data[splitpoint:])
(y_train, y_val) = (label[0:splitpoint], label[splitpoint:])

2. Configure Resnet with NNI

I use resnet to do this job, there are five hyper-parameters to configure:

learning_rate_patience
min_learning_rate
batch_size
min_delta (used for the judgement of early stopping)
early_stopping_patience

def main(params):

    #lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
    lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=params['learning_rate_patience'], min_lr= params['min_learning_rate'])
    #early_stopper = EarlyStopping(min_delta=0.001, patience=15)
    early_stopper = EarlyStopping(min_delta=params['min_delta'], patience=params['early_stopping_patience'])
    # csv_logger = CSVLogger('resnet18_cifar10.csv')

    batch_size = params['batch_size']
    nb_classes = 2
    nb_epoch = 50
    data_augmentation = False

    # input image dimensions
    img_rows, img_cols = 64, 64
    # The CIFAR10 images are RGB.
    img_channels = 3



    model = resnet.ResnetBuilder.build_resnet_18((img_channels, img_rows, img_cols), nb_classes)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    model.summary()

    if not data_augmentation:
        print('Not using data augmentation.')
        hist = model.fit(x_train, y_train,
                  batch_size=batch_size,
                  nb_epoch=nb_epoch,
                  validation_data=(x_val, y_val),
                  shuffle=True,
                  callbacks=[lr_reducer, early_stopper])
        nni.report_final_result(hist.history['val_acc'][-1])
    else:
        print('Using real-time data augmentation.')
        # This will do preprocessing and realtime data augmentation:
        datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False)  # randomly flip images

        # Compute quantities required for featurewise normalization
        # (std, mean, and principal components if ZCA whitening is applied).
        datagen.fit(X_train)

        # Fit the model on the batches generated by datagen.flow().
        hist = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size),
                            steps_per_epoch=X_train.shape[0] // batch_size,
                            validation_data=(X_test, Y_test),
                            epochs=nb_epoch, verbose=1, max_q_size=100,
                            callbacks=[lr_reducer, early_stopper, csv_logger])
        nni.report_final_result(hist.history['val_acc'][-1])

Remember to add nni.report_final_result(hist.history['val_acc'][-1]) to get the metrics, I’ve not found a method for keras to record metrics for nni in real time, so I don’t add intermediate metrics.

Use nni.get_next_parameter() to get a combination of hyper-parameters by nni.

if __name__ == '__main__':
    try:
        # get parameters form tuner
        tuner_params = nni.get_next_parameter()
        main(tuner_params)
    except Exception as exception:
        logger.exception(exception)
        raise

3. Configure NNI

3.1 Search space defination

Define the search space information in a json file search_space.json, the detail of search space can be viewed here.

{
    "min_learning_rate":{"_type":"choice","_value":[0.5e-6, 0.5e-5, 0.5e-4, 0.5e-3, 0.5e-2]},
    "learning_rate_patience":{"_type":"choice","_value":[5, 10, 15, 20]},
    "min_delta":{"_type":"choice","_value":[0.0001, 0.001, 0.01]},
    "early_stopping_patience":{"_type":"choice","_value":[5, 10, 15, 20]},
    "batch_size":{"_type":"choice","_value":[16, 32, 64]}
}

3.2 Config file

Define the config information in config.yml, this is the boot configuration for NNI.

authorName: Dinghow
experimentName: image_classfiy
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: local
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
  #choice: TPE, Random, Anneal, Evolution, BatchTuner
  #SMAC (SMAC should be installed through nnictl)
  builtinTunerName: TPE
  classArgs:
    #choice: maximize, minimize
    optimize_mode: maximize
trial:
  command: python CAM_GRAD.py
  codeDir: .
  gpuNum: 0

3.3 Run NNI

nnictl create --config config.yml

4. Training result

Open the WebUI url:

I just trained 50 epochs, the best metrics is 0.742424, and there are two combination of parameters can get this metric.

min_learning_rate:0.000005
learning_rate_patience:5
min_delta:0.0001
early_stopping_patience:15
batch_size:32

min_learning_rate:0.000005
learning_rate_patience:10
min_delta:0.0001
early_stopping_patience:5
batch_size:16

As for a try of AutoML, I used limited data and don’t train enough epochs, I think the augumentation of data and increasement of data can perform better.