use-case-and-architecture/EdgeFLite/data_collection/skin_dataset.py

# -*- coding: utf-8 -*-
# @Author: Weisen Pan

# Import necessary libraries for image processing and handling datasets.
from PIL import Image  # Used for opening and manipulating images.
from cv2 import split  # A function from OpenCV, though it's not used here. It may have been intended for something else.
from torch.utils.data import DataLoader, Dataset  # These are PyTorch utilities for managing datasets and data loading.
import torch  # PyTorch library for tensor operations and deep learning.

# Define a custom dataset class named 'SkinData' which inherits from PyTorch's Dataset class.
class SkinData(Dataset):
    # Initialize the dataset with a DataFrame (df), an optional apply_transformationation (apply_transformation), and a split factor (split_factor).
    def __init__(self, df, apply_transformation=None, split_factor=1):
        self.df = df  # Store the DataFrame containing image paths and target labels.
        self.apply_transformation = apply_transformation  # Optional image apply_transformationations to apply (e.g., resizing, normalizing).
        self.split_factor = split_factor  # A factor determining how many times to split or augment the image.
        self.test_same_view = False  # A flag indicating whether to return multiple augmentations of the same image.

    # Return the number of samples in the dataset, which corresponds to the number of rows in the DataFrame.
    def __len__(self):
        return len(self.df)

    # Retrieve the image and corresponding label at a specific index.
    def __getitem__(self, index):
        Xs = []  # Create an empty list to store apply_transformationed versions of the image.

        # Open the image located at the 'path' specified by the index in the DataFrame, then resize it to 64x64.
        X = Image.open(self.df['path'][index]).resize((64, 64))

        # Retrieve the target label (as a tensor) from the 'target' column of the DataFrame and convert it to a PyTorch tensor.
        y = torch.tensor(int(self.df['target'][index]))

        # If 'test_same_view' is set to True, apply the same apply_transformationation multiple times and store the augmented images.
        if self.test_same_view:
            if self.apply_transformation:
                aug = self.apply_transformation(X)  # Apply the apply_transformationation once to the image.
                # Store the same augmented image multiple times in the list 'Xs' (repeated 'split_factor' times).
                Xs = [aug for _ in range(self.split_factor)]
        else:
            # If 'test_same_view' is False, apply the apply_transformationation independently to create different augmentations.
            if self.apply_transformation:
                # Store different augmentations of the image in the list 'Xs', each apply_transformationed independently.
                Xs = [self.apply_transformation(X) for _ in range(self.split_factor)]

        # Concatenate the list of images into a single tensor along the first dimension (batch) and return it along with the label.
        return torch.cat(Xs, dim=0), y