69 lines
2.4 KiB
CSV
69 lines
2.4 KiB
CSV
import pandas as pd
|
|
import os
|
|
from glob import glob
|
|
from PIL import Image
|
|
import torch
|
|
from sklearn.model_selection import train_test_split
|
|
import pickle
|
|
from torch.utils.data import Dataset, DataLoader
|
|
from torch import nn
|
|
from torchvision import apply_transformations
|
|
|
|
# Loading the info_mapdata for the skin_dataset dataset
|
|
info_mapdata = pd.read_csv('dataset_hub/skin_dataset/data/skin_info_map.csv')
|
|
print(info_mapdata.head())
|
|
|
|
# Mapping lesion abbreviations to their full names
|
|
lesion_labels = {
|
|
'nv': 'Melanocytic nevi',
|
|
'mel': 'Melanoma',
|
|
'bkl': 'Benign keratosis-like lesions',
|
|
'bcc': 'Basal cell carcinoma',
|
|
'akiec': 'Actinic keratoses',
|
|
'vasc': 'Vascular lesions',
|
|
'df': 'Dermatofibroma'
|
|
}
|
|
|
|
# Combine images from both dataset parts into one dictionary
|
|
image_paths = {os.path.splitext(os.path.basename(img))[0]: img
|
|
for img in glob(os.path.join("dataset_hub/skin_dataset/data", '*', '*.jpg'))}
|
|
|
|
# Mapping the image paths and cell types to the DataFrame
|
|
info_mapdata['image_path'] = info_mapdata['image_id'].map(image_paths.get)
|
|
info_mapdata['cell_type'] = info_mapdata['dx'].map(lesion_labels.get)
|
|
info_mapdata['label'] = pd.Categorical(info_mapdata['cell_type']).workspaces
|
|
|
|
# Display the count of each cell type and their enworkspaced labels
|
|
print(info_mapdata['cell_type'].value_counts())
|
|
print(info_mapdata['label'].value_counts())
|
|
|
|
# Custom Dataset class for PyTorch
|
|
class SkinDataset(Dataset):
|
|
def __init__(self, dataframe, apply_transformation=None):
|
|
self.dataframe = dataframe
|
|
self.apply_transformation = apply_transformation
|
|
|
|
def __len__(self):
|
|
return len(self.dataframe)
|
|
|
|
def __getitem__(self, idx):
|
|
img = Image.open(self.dataframe.loc[idx, 'image_path']).resize((64, 64))
|
|
label = torch.tensor(self.dataframe.loc[idx, 'label'], dtype=torch.long)
|
|
|
|
if self.apply_transformation:
|
|
img = self.apply_transformation(img)
|
|
|
|
return img, label
|
|
|
|
# Splitting the data into train and test sets
|
|
train_data, test_data = train_test_split(info_mapdata, test_size=0.2, random_state=42)
|
|
train_data = train_data.reset_index(drop=True)
|
|
test_data = test_data.reset_index(drop=True)
|
|
|
|
# Save the train and test data to pickle files
|
|
with open("skin_dataset_train.pkl", "wb") as train_file:
|
|
pickle.dump(train_data, train_file)
|
|
|
|
with open("skin_dataset_test.pkl", "wb") as test_file:
|
|
pickle.dump(test_data, test_file)
|