Weisen Pan a877aed45f AI-based CFN Traffic Control and Computer Force Scheduling
Change-Id: I16cd7730c1e0732253ac52f51010f6b813295aa7
2023-11-03 00:09:19 -07:00

132 lines
3.5 KiB
Go

package utils
// Author: Weisen Pan
// Date: 2023-10-24
import (
"fmt"
"log"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
api "k8s.io/kubernetes/knets_pkg/apis/core"
simontype "github.com/hkust-adsl/kubernetes-scheduler-simulator/knets_pkg/type"
"github.com/hkust-adsl/kubernetes-scheduler-simulator/knets_pkg/type/open-gpu-share/utils"
)
const (
ResourceMilliCpu = "MilliCpuLeft"
ResourceMemory = "Memory"
ResourceGpu = "Gpu"
ResourceMilliGpu = "MilliGpu"
)
var resourceList = []string{ResourceMilliCpu, ResourceMemory, ResourceGpu, ResourceMilliGpu}
type ResourceSummary struct {
name string
requested int64
allocatable int64
}
type AllocAmount struct {
NodeName string
Requested map[string]int64
Allocatable map[string]int64
}
func (a AllocAmount) Add(b AllocAmount) error {
if len(a.Requested) == 0 && len(a.Allocatable) == 0 {
for k, v := range b.Requested {
a.Requested[k] = v
}
for k, v := range b.Allocatable {
a.Allocatable[k] = v
}
return nil
}
if len(b.Requested) == 0 && len(b.Allocatable) == 0 {
return nil
}
if len(a.Requested) != len(b.Requested) || len(a.Allocatable) != len(b.Allocatable) {
return fmt.Errorf("len(a)(%d, %d) != len(b)(%d, %d)",
len(a.Requested), len(b.Requested), len(a.Allocatable), len(b.Allocatable))
}
for k := range a.Requested {
if ba, ok := b.Allocatable[k]; ok {
a.Allocatable[k] += ba
}
if br, ok := b.Requested[k]; ok {
a.Requested[k] += br
}
}
return nil
}
func ReportNodeAllocationRate(aamap map[string]AllocAmount) []ResourceSummary {
requested := make(map[string]int64)
allocatable := make(map[string]int64)
clusterAllocAmount := AllocAmount{"cluster", requested, allocatable}
for _, amount := range aamap {
clusterAllocAmount.Add(amount)
}
log.Infof("Allocation Ratio:\n")
var rs []ResourceSummary
for _, k := range resourceList {
rval := clusterAllocAmount.Requested[k]
aval := clusterAllocatable[k]
ratio := 100.0 * float64(rval) / float64(aval)
if aval == 0 {
ratio = 0
}
log.Infof(" %-8s: %4.1f%% (%d/%d)\n", k, ratio, rval, aval)
rs = append(rs, ResourceSummary{k, rval, aval})
}
return rs
}
func GetNodeAllocMap(nodeStatus []simontype.NodeStatus) (map[string]AllocAmount, error) {
allPods := GetAllPodsPtrFromNodeStatus(nodeStatus)
nodeAllocMap := make(map[string]AllocAmount)
for _, ns := range nodeStatus {
node := ns.Node
allocatable := make(map[string]int64)
for k, v := range node.Status.Allocatable {
switch k.String() {
case api.ResourceCPU.String():
allocatable[ResourceMilliCpu] = v.MilliValue()
case api.ResourceMemory.String():
allocatable[ResourceMemory] = v.Value()
}
}
gpuNumber := utils.GetGpuCountOfNode(node)
allocatable[ResourceGpu] = int64(gpuNumber)
allocatable[ResourceMilliGpu] = int64(gpuNumber) * utils.MILLI
requested := make(map[string]int64)
reqs, _ := GetPodsTotalRequestsAndLimitsByNodeName(allPods, node.Name)
nodeCpuReq, nodeMemReq := reqs[corev1.ResourceCPU], reqs[corev1.ResourceMemory]
requested[ResourceMilliCpu] = nodeCpuReq.MilliValue()
requested[ResourceMemory] = nodeMemReq.Value()
requested[ResourceMilliGpu] = 0
requested[ResourceGpu] = 0
if gpuNodeInfoStr, err := GetGpuNodeInfoFromAnnotation(node); err == nil && gpuNodeInfoStr != nil {
for _, dev := range gpuNodeInfoStr.DevsBrief {
if dev.GpuUsedMilli > 0 {
requested[ResourceGpu] += 1
}
requested[ResourceMilliGpu] += dev.GpuUsedMilli
}
}
nodeAllocMap[node.Name] = AllocAmount{node.Name, requested, allocatable}
}
return nodeAllocMap, nil
}