# -*- coding: utf-8 -*-
# @Author: Weisen Pan

# Load environment modules and required dependencies
source /etc/profile.d/modules.sh
module load gcc/11.2.0     # Load GCC compiler version 11.2.0
module load openmpi/4.1.3  # Load OpenMPI version 4.1.3 for distributed computing
module load cuda/11.5/11.5.2  # Load CUDA version 11.5.2 for GPU acceleration
module load cudnn/8.3/8.3.3  # Load cuDNN version 8.3.3 for deep learning libraries
module load nccl/2.11/2.11.4-1  # Load NCCL version 2.11.4 for multi-GPU communication
module load python/3.10/3.10.4  # Load Python version 3.10.4

# Activate the virtual Python environment
source ~/venv/pytorch1.11+horovod/bin/activate  # Activate the virtual environment with PyTorch 1.11 and Horovod

# Define the log directory, clean up old records if any, and recreate the directory
LOG_PATH="/home/projadmin/Federated_Learning/project_EdgeFLite/records/${JOB_NAME}_${JOB_ID}"
rm -rf ${LOG_PATH}  # Remove the existing log directory if it exists
mkdir -p ${LOG_PATH}  # Create the log directory

# Set up the local data directory and copy the dataset into it
DATA_STORAGE="${SGE_LOCALDIR}/${JOB_ID}/"
cp -r ../summit2024/simpleFL/performance_test/cifar100/data ${DATA_STORAGE}  # Copy CIFAR-100 dataset into the local storage directory

# Navigate to the working directory where training scripts are located
cd EdgeFLite  # Change directory to the project EdgeFLite

# Execute the training script with federated learning parameters
python run_gkt.py \
    --is_fed=1  # Enable federated learning mode
    --fixed_cluster=0  # Allow dynamic cluster selection
    --split_factor=1  # Set the split factor for cluster selection
    --num_clusters=20  # Specify the number of clusters for federated learning
    --num_selected=20  # Specify the number of selected clusters for each round
    --arch="wide_resnet16_8"  # Use the Wide ResNet16_8 architecture
    --dataset="cifar10"  # Specify the dataset as CIFAR-10
    --num_classes=10  # Set the number of classes for classification
    --is_single_branch=0  # Use multiple branches (not single branch)
    --is_amp=0  # Disable automatic mixed precision (AMP)
    --num_rounds=300  # Specify the number of federated learning rounds
    --fed_epochs=1  # Set the number of epochs per round for federated learning
    --cifar10_non_iid="quantity_skew"  # Use non-iid data distribution with quantity skew for CIFAR-10
    --spid="FGKT_W168_20c_skew"  # Set the specific process ID for tracking
    --data=${DATA_STORAGE}  # Specify the local data storage path