Intro to Object Detection using FasterRCNN

Computer Vision

Kashish Mukheja


Sunday, 03 September 2023

Mission Statement :

Identifying the car from the given entire dataset of ~16000 images using Computer Vision techniques.

Objectives :

  • To import, understand and preprocess the data ready for model building
  • Summarize the valuable insights from the data using EDA
  • Design a basic Deep Learning based car identification model
  • Importing Necessary libraries and packages

    import os # To use terminal commands inside python for working on files and  directories
    # Data transformation libraries
    import pandas as pd
    import numpy as np
    import copy
    # Visualization libraries
    import seaborn as sns
    import matplotlib.pyplot as plt
    # Scikit-learn packages
    from sklearn.preprocessing import LabelEncoder # For label encoding the class labels
    from sklearn.model_selection import ShuffleSplit
    ## Other libraries helpful for CV tasks
    import cv2 # computer vision related packages for reading and playing with Image data
    # Necessary packages for getting image editing capabilities
    from PIL import Image
    import PIL
    from prettytable import PrettyTable # To format the results with good formatting
    from zipfile import ZipFile # For extracting .zip files
    import pickle # For exporting and importing the model files
    # Necessary Pytorch libraries
    import torch
    import torch.nn as nn
    from import Dataset,DataLoader
    from import Subset
    import torchvision.transforms.functional as tf
    import torch.optim as optim
    import torchvision
    from torchvision.models.detection import fasterrcnn_resnet50_fpn
    from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
    from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
    %matplotlib inline
    # For plotting images inside the notebook
    from google.colab import drive
    Mounted at /content/drive

    Step 1: Import the data.

    Extracting the necessary zip files for the problem and if already extracted, skipping the file extraction

        with ZipFile('Car', 'r') as z:
        with ZipFile('', 'r') as z:

    Step 2: Map training and testing images to its classes.

  • Creating a dataframe to store image name, image details and image path for using while modelling
  • Using this details, we can merge with annotations and car name make files to get more information about the images present in train and test folders
  • # Get all the image names into a df and then merging above with images data --> Image name+image_name
    # Creating the placeholders for storing the values while reading the images in train and test data
    # Iterating through train images folder to get image_path, image_name and car_name_make_details
    for sub_folder in os.listdir(os.path.join('car Images')):
        if sub_folder =='Train Images':
            for car_folder in os.listdir(os.path.join('car Images','Train Images')):
                # Neglecting temporary folders created runtime and taking only the required folders
                if car_folder!='.DS_Store' and car_folder!='.ipynb_checkpoints':
                    for car_image_name in os.listdir(os.path.join('car Images/Train Images',car_folder)):
                        if car_image_name.split('.')[1]=='jpg':
                            train_car_images_path_list.append('car Images/Train Images/'+car_folder+'/'+car_image_name)
    # Iterating through test images folder to get image_path, image_name and car_name_make_details
    for sub_folder in os.listdir(os.path.join('car Images')):
        if sub_folder =='Test Images':
            for car_folder in os.listdir(os.path.join('car Images','Test Images')):
                # Neglecting temporary folders created runtime and taking only the required folders
                if car_folder!='.DS_Store' and car_folder!='.ipynb_checkpoints':
                    for car_image_name in os.listdir(os.path.join('car Images/Test Images',car_folder)):
                        if car_image_name.split('.')[1]=='jpg':
                            test_car_images_path_list.append('car Images/Test Images/'+car_folder+'/'+car_image_name)
    # Storing all the extracted information into the train and test dataframes created earlier
    print('train_images_df shape:',train_images_df.shape)
    print('test_images_df shape:',test_images_df.shape)
    train_images_df shape: (8144, 3)
    test_images_df shape: (8041, 3)
  • Merging the above dataframe with car name and make file to get the class label details
  • These labels can be used as classes during the modelling
  • car_name_make_df=pd.read_csv('Car+names+and+make.csv',header=None)
    car_name_make_df.rename(columns={0: 'car_name_make'}, inplace=True)
    # In the car_name_make file,few car names are with '/', so replacing '/' with '-' to match with image folder names
    car_name_make_df['car_name_make']=car_name_make_df['car_name_make'].apply(lambda x:x.replace('/','-'))
    print('Car names and make file shape:',car_name_make_df.shape)
    # Merging with previous data frame on id='car_name_make' to get all details
    # about train and test dataframes into a single entity
    print('train_df shape:',train_df.shape)
    print('test_df shape:',test_df.shape)
    Car names and make file shape: (196, 2)
    train_df shape: (8144, 4)
    test_df shape: (8041, 4)

    Viewing sample output of the dataframe in both train and test data

    image_path image_name car_name_make image_class
    0 car Images/Train Images/Dodge Dakota Crew Cab ... 03132.jpg Dodge Dakota Crew Cab 2010 90
    1 car Images/Train Images/Dodge Dakota Crew Cab ... 03984.jpg Dodge Dakota Crew Cab 2010 90
    2 car Images/Train Images/Dodge Dakota Crew Cab ... 08090.jpg Dodge Dakota Crew Cab 2010 90
    3 car Images/Train Images/Dodge Dakota Crew Cab ... 02012.jpg Dodge Dakota Crew Cab 2010 90
    4 car Images/Train Images/Dodge Dakota Crew Cab ... 07660.jpg Dodge Dakota Crew Cab 2010 90
    image_path image_name car_name_make image_class
    0 car Images/Test Images/GMC Canyon Extended Cab... 02668.jpg GMC Canyon Extended Cab 2012 122
    1 car Images/Test Images/GMC Canyon Extended Cab... 01433.jpg GMC Canyon Extended Cab 2012 122

    Step 3: Map training and testing images to its annotations.

  • Merging the above dataframe with Annotations files in both train and test folders to get the co-ordinates and bounding box details
  • These details can be used while posing a object detection problem during the modelling further
  • Also, extracting some details like brand and model year information from the car_name_make to get better EDA insights on the train and test data
  • # Reading Annotations file for both both test and train folders to get image boundary and image_class details
    train_annot_df=pd.read_csv(os.path.join('Annotations/Train Annotations.csv'))
    print('Train_annotations shape:',train_annot_df.shape)
    test_annot_df=pd.read_csv(os.path.join('Annotations/Test Annotation.csv'))
    print('Test_annotations shape:',test_annot_df.shape)
    Train_annotations shape: (8144, 6)
    Test_annotations shape: (8041, 6)
    # Merging with the previous dataframe with id as image_name+Image Name
    train_df=train_df.merge(train_annot_df,left_on='image_name', right_on='Image Name')
    # Dropping the unnecessary/repititive columns in the final dataframe
    train_df.drop(['Image Name','Image class'],axis=1,inplace=True)
    # Extracting few variables from train_df which can be helpful for EDA from car_name_make column
    train_df['car_brand']=train_df['car_name_make'].apply(lambda x:x.split()[0])
    train_df['car_make_yr']=train_df['car_name_make'].apply(lambda x:x.split()[-1])
    print('Final_train_data shape:',train_df.shape)
    test_df=test_df.merge(test_annot_df,left_on='image_name', right_on='Image Name')
    # Dropping the unnecessary/repititive columns in the final dataframe
    test_df.drop(['Image Name','Image class'],axis=1,inplace=True)
    # Extracting few variables from test_df which can be helpful for EDA from car_name_make column
    test_df['car_brand']=test_df['car_name_make'].apply(lambda x:x.split()[0])
    test_df['car_make_yr']=test_df['car_name_make'].apply(lambda x:x.split()[-1])
    print('Final_test_data shape:',test_df.shape)
    Final_train_data shape: (8144, 10)
    Final_test_data shape: (8041, 10)

    Renaming the columns properly to get the correct vertices details for plotting the boundary box

    # Renaming the image coordinates columns from the annotations file for better intuition and understanding
    # Identified the co-ordinates with real images and then decided with these vertices details for the rectangle BB
    train_df.rename(columns={'Bounding Box coordinates':'x1',
                     'Unnamed: 2':'y1',
                     'Unnamed: 3':'x2',
                     'Unnamed: 4':'y2'},inplace=True)
    test_df.rename(columns={'Bounding Box coordinates':'x1',
                     'Unnamed: 2':'y1',
                     'Unnamed: 3':'x2',
                     'Unnamed: 4':'y2'},inplace=True)

    Viewing sample output and columns present in the final dataframe for both train and test data

    This final data frame should have the following details: image_path, image_name, Annotations(BB-vertices details), image_class, car_name_make, car_brand & car_make_yr

    image_path image_name car_name_make image_class x1 y1 x2 y2 car_brand car_make_yr
    0 car Images/Train Images/Dodge Dakota Crew Cab ... 03132.jpg Dodge Dakota Crew Cab 2010 90 45 14 261 191 Dodge 2010
    1 car Images/Train Images/Dodge Dakota Crew Cab ... 03984.jpg Dodge Dakota Crew Cab 2010 90 8 19 289 180 Dodge 2010
    Index(['image_path', 'image_name', 'car_name_make', 'image_class', 'x1', 'y1',
           'x2', 'y2', 'car_brand', 'car_make_yr'],
    image_path image_name car_name_make image_class x1 y1 x2 y2 car_brand car_make_yr
    0 car Images/Test Images/GMC Canyon Extended Cab... 02668.jpg GMC Canyon Extended Cab 2012 122 13 66 477 228 GMC 2012
    1 car Images/Test Images/GMC Canyon Extended Cab... 01433.jpg GMC Canyon Extended Cab 2012 122 23 41 584 395 GMC 2012


    train_dataset = train_df.copy()
    test_dataset = test_df.copy()
    train_dir = "/content/drive/MyDrive/GL_capstone_project/car Images/Train Images"
    test_dir = "/content/drive/MyDrive/GL_capstone_project/car Images/Test Images"
    image_dir = "/content/drive/MyDrive/GL_capstone_project/"
    device="cuda" if torch.cuda.is_available() else "cpu"
    train_df['image_name'].nunique(), test_df['image_name'].nunique()
    print(f'Unique image classes = {train_df.image_class.nunique()}')
    Unique image classes = 196

    Creating custom dataset

    1. Creating a new column called image_id by replacing the string from image_name
    2. Creating tensors as required by input to the model.
    3. We already have our bounding boxes as coordinates. So, we don’t need to perform any transformation there. However, the area for the bounding box is calculated based on \((x2-x1)*(y2-y1)\)
    4. We will use sklearn’s ShuffleSplit function here to split the images based on train and valid (80-20 split)
    train_df['image_id'] = train_df['image_name'].str.replace(r'\.jpg$', '', regex=True)
    test_df['image_id'] = test_df['image_name'].str.replace(r'\.jpg$', '', regex=True)
    """The input to the model is expected to be a list of tensors, each of shape [C, H, W],
       one for each image, and should be in 0-1 range. Different images can have different
       sizes.The behavior of the model changes depending if it is in training or evaluation
    class CarDataset(Dataset):
        def __init__(self,dataframe,image_dir):
        def __len__(self):
            return len(self.img_list)
        def __getitem__(self,idx):
            # converting bounding box from x0y0wh format to x0y0x1y1 format
            # boxes[:,2]=boxes[:,0]+boxes[:,2]
            # boxes[:,3]=boxes[:,1]+boxes[:,3]
            return img,target
    for train_idx,val_idx in ss.split(indexs):
        print(f"Train dataset length: {len(train_idx)}")
        print(f"Validation dataset length: {len(val_idx)}")
    Train dataset length: 6515
    Validation dataset length: 1629
    len(train_ds), len(val_ds)
    (6515, 1629)
    def show(img,boxes):
        for box in boxes:
            cv2.rectangle(sample,(box[0], box[1]),(box[2], box[3]),(220, 0, 0), 3)


    def collate_fn(batch):
        return tuple(zip(*batch))
                        pin_memory=True if torch.cuda.is_available else False,
                      pin_memory=True if torch.cuda.is_available else False,


    # load a model pre-trained on COCO
    # replace the classifier with a new one, that has
    # num_classes which is user-defined
    # 1 class (person) + background
    num_classes = 196
    # get number of input features for the classifier
    # replace the pre-trained head with a new one
    Downloading: "" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
    100%|██████████| 160M/160M [00:02<00:00, 79.0MB/s]
    Training The Fine-Tuned Model

    def get_lr(optimizer):
        for params in optimizer.param_groups:
            return params["lr"]
    for epoch in range(epochs):
        #During training, the model expects both the input tensors, as well as a targets
        for imgs,targets in train_dl:
            imgs=[ for img in imgs]
            targets=[{ for (k,v) in d.items()} for d in targets]
            """The model returns a Dict[Tensor] during training, containing the classification
               and regression losses for both the RPN and the R-CNN."""
            losses=sum(loss for loss in loss_dict.values())
        with torch.no_grad():
            for imgs,targets in val_dl:
                imgs=[ for img in imgs]
                targets=[{ for (k,v) in d.items()} for d in targets]
                """The model returns a Dict[Tensor] during training, containing the classification
                   and regression losses for both the RPN and the R-CNN."""
                losses=sum(loss for loss in loss_dict.values())
        if current_lr!=get_lr(optimizer):
            print("Loading best Model weights")
        if validation_loss<best_validation_loss:
            print("Updating Best Model weights")
        print(f"Training Loss: {training_loss/train_len}")
        print(f"Validation_loss: {validation_loss/val_len}")
    Updating Best Model weights
    Training Loss: 0.5091633841704151
    Validation_loss: 0.4923232738008669
    Updating Best Model weights
    Training Loss: 0.34851808806999957
    Validation_loss: 0.33369734506891724
    Updating Best Model weights
    Training Loss: 0.2804957070411487
    Validation_loss: 0.29477986226863406
    Updating Best Model weights
    Training Loss: 0.2661970856332907
    Validation_loss: 0.285527507111915
    Training Loss: 0.26277425953473665
    Validation_loss: 0.28928613928909896
    sns.lineplot(x=range(epochs),y=loss_history["training_loss"],label="Train Losses");
    sns.lineplot(x=range(epochs),y=loss_history["validation_loss"],label="Validation Losses");
    plt.title("Training Validation Datasets Losses Plot");

    Model Save


    Next Steps:

    1. Inference on the model
    2. Performing Data Augmentation, hyperparameter-tuning, different backbone architectures to compare losses.
