# -*- coding: utf-8 -*-
# @Author: Weisen Pan

# Load necessary modules and dependencies
source /etc/profile.d/modules.sh

# Load GCC version 11.2.0
module load gcc/11.2.0

# Load OpenMPI version 4.1.3 for distributed computing
module load openmpi/4.1.3

# Load CUDA version 11.5 (subversion 11.5.2) for GPU acceleration
module load cuda/11.5/11.5.2

# Load cuDNN version 8.3 (subversion 8.3.3) for deep learning operations
module load cudnn/8.3/8.3.3

# Load NCCL version 2.11 (subversion 2.11.4-1) for multi-GPU communication
module load nccl/2.11/2.11.4-1

# Load Python version 3.10 (subversion 3.10.4)
module load python/3.10/3.10.4

# Activate the Python virtual environment for PyTorch 1.11 + Horovod
source ~/venv/pytorch1.11+horovod/bin/activate

# Configure the output log directory and clean up any existing records
OUTPUT_LOG_DIR="/home/projadmin/Federated_Learning/project_EdgeFLite/records/${JOB_NAME}_${JOB_ID}"

# Remove any previous log files from the directory
rm -rf ${OUTPUT_LOG_DIR}

# Create a fresh directory for storing logs
mkdir -p ${OUTPUT_LOG_DIR}

# Copy the dataset to a local directory for processing during training
LOCAL_DATA_PATH="${SGE_LOCALDIR}/${JOB_ID}/"

# Copy the dataset files from the performance test directory to the local directory
cp -r ../summit2024/simpleFL/performance_test/cifar100/data ${LOCAL_DATA_PATH}

# Switch to the working directory containing the EdgeFLite training scripts
cd EdgeFLite

# Run the federated learning training script with the specified settings
python run_gkt.py \
    --is_fed=1 \  # Enable federated learning
    --fixed_cluster=0 \  # Disable fixed clusters
    --split_factor=1 \  # Set data split factor
    --num_clusters=20 \  # Specify number of clusters
    --num_selected=20 \  # Specify number of selected clients
    --arch="wide_resnet16_8" \  # Use Wide ResNet 16-8 architecture
    --dataset="cifar10" \  # Set dataset to CIFAR-10
    --num_classes=10 \  # Set number of classes
    --is_single_branch=0 \  # Use multi-branch training
    --is_amp=0 \  # Disable automatic mixed precision (AMP)
    --num_rounds=300 \  # Set number of training rounds
    --fed_epochs=1 \  # Set number of federated learning epochs per round
    --spid="fedgkt_wrn168_split1_cifar10_20clients_20choose_300rounds" \  # Set session ID
    --data=${LOCAL_DATA_PATH}  # Set path to the local dataset