# -*- coding: utf-8 -*- # @Author: Weisen Pan # Load necessary system modules source /etc/profile.d/modules.sh # Load the GCC module version 11.2.0 module load gcc/11.2.0 # Load the OpenMPI module version 4.1.3 module load openmpi/4.1.3 # Load the CUDA module version 11.5.2 module load cuda/11.5/11.5.2 # Load the cuDNN module version 8.3.3 module load cudnn/8.3/8.3.3 # Load the NCCL module version 2.11.4-1 module load nccl/2.11/2.11.4-1 # Load the Python module version 3.10.4 module load python/3.10/3.10.4 # Activate the virtual environment for PyTorch and Horovod source ~/venv/pytorch1.11+horovod/bin/activate # Set up the log directory and clean previous records if they exist LOG_OUTPUT="/home/projadmin/Federated_Learning/project_EdgeFLite/records/${JOB_NAME}_${JOB_ID}" rm -rf ${LOG_OUTPUT} # Remove previous log files mkdir -p ${LOG_OUTPUT} # Create a new directory for logs # Prepare local storage for the dataset by copying it to a local directory LOCAL_DATA_DIR="${SGE_LOCALDIR}/${JOB_ID}/" cp -r ../summit2024/simpleFL/performance_test/cifar100/data ${LOCAL_DATA_DIR} # Navigate to the EdgeFLite project directory cd EdgeFLite # Run the federated learning experiment with the specified parameters python run_gkt.py \ --is_fed=1 \ # Enable federated learning --fixed_cluster=0 \ # Disable fixed cluster settings --split_factor=1 \ # Use split factor of 1 --num_clusters=20 \ # Set the number of clusters to 20 --num_selected=20 \ # Select 20 clients for each round --arch=resnet_model_110sl \ # Use ResNet110 single branch architecture --dataset=cifar100 \ # Use CIFAR-100 dataset --num_classes=100 \ # Set the number of classes to 100 --is_single_branch=0 \ # Use multiple branches in the model --is_amp=0 \ # Disable automatic mixed precision --num_rounds=650 \ # Set the number of communication rounds to 650 --fed_epochs=1 \ # Set the number of federated epochs to 1 --cifar100_non_iid="quantity_skew" \ # Apply non-IID data partitioning (quantity skew) --spid="FGKT_R110_20c_skew" \ # Set the experiment ID --data=${LOCAL_DATA_DIR} # Set the path to the dataset in local storage