How to build an AI app that classifies images of dogs according to their breed?

This is the Capstone Project of the Data Scientist Nanodegree from Udacity

Project Overview

Problem Statement

Metrics

Provided by the ML Crash Course by Google

Data Exploration and visualization

Datasets and Inputs

There are 13233 total human images.
There are 8351 total dog images.

Data visualization

Methodology

Data Preprocessing

# convert BGR image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# transform the image
in_transform = transforms.Compose([
transforms.Resize(255),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])])
# Define transforms for the training data and testing data
data_transforms = {'train' : transforms.Compose(
[transforms.RandomRotation(30),
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])]),
'test' : transforms.Compose(
[transforms.Resize(255),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])]),
'valid' : transforms.Compose(
[transforms.Resize(255),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])])}

Implementation

How to detect humans in an image?

def face_detector(img_path):
"""
INPUT
img_path - a string-valued file path to an image
OUTPUT
returns "True" if face is detected in image stored at img_path
"""
img = cv2.imread(img_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray)
return len(faces) > 0

How to detect dogs in an image?

from PIL import Image
import torchvision.transforms as transforms

def VGG16_predict(img_path):
'''
Use pre-trained VGG-16 model to obtain index corresponding to
predicted ImageNet class for image at specified path.

INPUT:
img_path - a string-valued file path to an image
OUTPUT:
Index corresponding to VGG-16 model's prediction
'''

## Load and pre-process an image from the given img_path
## Return the *index* of the predicted class for that image

# load the image
image = Image.open(img_path)
# transform the image
in_transform = transforms.Compose([
transforms.Resize(255),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])])

# discard the transparent, alpha channel (that's the :3) and add the batch dimension
image = in_transform(image)[:3,:,:].unsqueeze(0)
VGG16.eval()

if use_cuda:
image = image.cuda()

output = VGG16(image)

# predicted class
return output.data.argmax(dim=1)

Try to create my own CNN to classify dog breeds?

Baseline Model Architecture

import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True


import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
"""
A class to define the CNN architecture of our model.
"""
def __init__(self):
super(Net, self).__init__()
"""
In the constructor we define three convolutional layers, a max pooling layer,
two fully connected layers and a dropout layer with an input probability of 0.3.
"""
## Define layers of a CNN
# 224*224*3
self.conv1 = nn.Conv2d(3, 32, 3, stride=2, padding=1)
# 56*56*32
self.conv2 = nn.Conv2d(32, 64, 3, stride=2, padding=1)
# 14*14*64
self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
# 7*7*128

# pool
self.pool = nn.MaxPool2d(2, 2)

# fully-connected
self.fc1 = nn.Linear(7*7*128, 500)
self.fc2 = nn.Linear(500, num_classes)

# drop-out
self.dropout = nn.Dropout(0.3)

def forward(self, x):
"""
A function that defines forward behavior. In the forward function we accept a Tensor of input data
and we must return a Tensor of output data. We use Modules defined in the constructor as.
INPUT
x - a Tensor of input data
OUTPUT
x - a Tensor of output data
"""

# Pass data through conv1
# Use the rectified-linear activation function over x
x = F.relu(self.conv1(x))
# Run max pooling over x
x = self.pool(x)
# Pass data through conv2
# Use the rectified-linear activation function over x
x = F.relu(self.conv2(x))
# Run max pooling over x
x = self.pool(x)
# Pass data through conv3
# Use the rectified-linear activation function over x
x = F.relu(self.conv3(x))
# Run max pooling over x
x = self.pool(x)

# flatten the tensor
x = x.view(-1, 7*7*128)

# Pass data through dropout
x = self.dropout(x)
# Pass data through fc1 and apply relu
x = F.relu(self.fc1(x))

# Pass data through dropout
x = self.dropout(x)
# Pass data through fc2 and apply relu
x = self.fc2(x)
return x

How did I get to my final CNN architecture?

Net(
(conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(in_features=6272, out_features=500, bias=True)
(fc2): Linear(in_features=500, out_features=133, bias=True)
(dropout): Dropout(p=0.3)
)

How well did my model performed?

Refinement

Let’s use Transfer Learning to create a CNN to classify dog breeds

import torchvision.models as models
import torch.nn as nn

model_transfer = models.densenet121(pretrained=True)

# freeze parameters so we won't backprop through them
for param in model_transfer.features.parameters():
param.requires_grad = False

## Specify model architecture

# get the in_features from classifier
n_inputs = model_transfer.classifier.in_features
# # add last layer
model_transfer.classifier = nn.Linear(n_inputs, num_classes)

Results

Model Evaluation and Validation

Justification

Write the algorithm

def run_app(img_path):
"""
INPUT
img_path - a string-valued file path to an image
"""
## handle cases for a human face, dog, and neither
image = Image.open(img_path)
plt.imshow(image)
plt.show()

if dog_detector(img_path):
print(predict_breed_transfer(img_path))
elif face_detector(img_path):
print(f"You look like a {predict_breed_transfer(img_path)}")
else:
print('Error')

Let’s test it

Conclusions

Reflection and Improvement

Data Scientist