Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: Add podgroups statistics #3751

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ replace (
k8s.io/component-helpers => k8s.io/component-helpers v0.31.1
k8s.io/controller-manager => k8s.io/controller-manager v0.31.1
k8s.io/cri-api => k8s.io/cri-api v0.31.1
k8s.io/cri-client => k8s.io/cri-client v0.31.1
k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.31.1
k8s.io/dynamic-resource-allocation => k8s.io/dynamic-resource-allocation v0.31.1
k8s.io/endpointslice => k8s.io/endpointslice v0.31.1
Expand Down
5 changes: 4 additions & 1 deletion installer/helm/chart/volcano/templates/controllers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,11 @@ rules:
resources: ["secrets"]
verbs: ["get", "create", "delete", "update"]
- apiGroups: ["scheduling.incubator.k8s.io", "scheduling.volcano.sh"]
resources: ["podgroups", "queues", "queues/status"]
resources: ["podgroups", "queues"]
verbs: ["get", "list", "watch", "create", "delete", "update"]
- apiGroups: [ "scheduling.incubator.k8s.io", "scheduling.volcano.sh" ]
resources: [ "queues/status" ]
verbs: ["get", "watch", "patch"]
- apiGroups: ["flow.volcano.sh"]
resources: ["jobflows", "jobtemplates"]
verbs: ["get", "list", "watch", "create", "delete", "update"]
Expand Down
5 changes: 4 additions & 1 deletion installer/volcano-development.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4318,8 +4318,11 @@ rules:
resources: ["secrets"]
verbs: ["get", "create", "delete", "update"]
- apiGroups: ["scheduling.incubator.k8s.io", "scheduling.volcano.sh"]
resources: ["podgroups", "queues", "queues/status"]
resources: ["podgroups", "queues"]
verbs: ["get", "list", "watch", "create", "delete", "update"]
- apiGroups: [ "scheduling.incubator.k8s.io", "scheduling.volcano.sh" ]
resources: [ "queues/status" ]
verbs: ["get", "watch", "patch"]
- apiGroups: ["flow.volcano.sh"]
resources: ["jobflows", "jobtemplates"]
verbs: ["get", "list", "watch", "create", "delete", "update"]
Expand Down
30 changes: 23 additions & 7 deletions pkg/cli/queue/get.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,21 +63,37 @@ func GetQueue(ctx context.Context) error {
return err
}

PrintQueue(queue, os.Stdout)
// Although the featuregate called CustomResourceFieldSelectors is enabled by default after v1.31, there are still
// users using k8s versions lower than v1.31. Therefore we can only get all the podgroups from kube-apiserver
// and then filtering them.
pgList, err := queueClient.SchedulingV1beta1().PodGroups("").List(ctx, metav1.ListOptions{})
if err != nil {
return fmt.Errorf("failed to list podgroup for queue %s with err: %v", getQueueFlags.Name, err)
}

pgStats := &PodGroupStatistics{}
for _, pg := range pgList.Items {
if pg.Spec.Queue == getQueueFlags.Name {
pgStats.StatPodGroupCountsForQueue(&pg)
}
}

PrintQueue(queue, pgStats, os.Stdout)

return nil
}

// PrintQueue prints queue information.
func PrintQueue(queue *v1beta1.Queue, writer io.Writer) {
_, err := fmt.Fprintf(writer, "%-25s%-8s%-8s%-8s%-8s%-8s%-8s\n",
Name, Weight, State, Inqueue, Pending, Running, Unknown)
func PrintQueue(queue *v1beta1.Queue, pgStats *PodGroupStatistics, writer io.Writer) {
_, err := fmt.Fprintf(writer, "%-25s%-8s%-8s%-8s%-8s%-8s%-8s%-8s\n",
Name, Weight, State, Inqueue, Pending, Running, Unknown, Completed)
if err != nil {
fmt.Printf("Failed to print queue command result: %s.\n", err)
}
_, err = fmt.Fprintf(writer, "%-25s%-8d%-8s%-8d%-8d%-8d%-8d\n",
queue.Name, queue.Spec.Weight, queue.Status.State, queue.Status.Inqueue,
queue.Status.Pending, queue.Status.Running, queue.Status.Unknown)

_, err = fmt.Fprintf(writer, "%-25s%-8d%-8s%-8d%-8d%-8d%-8d%-8d\n",
queue.Name, queue.Spec.Weight, queue.Status.State, pgStats.Inqueue,
pgStats.Pending, pgStats.Running, pgStats.Unknown, pgStats.Completed)
if err != nil {
fmt.Printf("Failed to print queue command result: %s.\n", err)
}
Expand Down
36 changes: 29 additions & 7 deletions pkg/cli/queue/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ const (
// Inqueue status of queue
Inqueue string = "Inqueue"

// Completed status of the queue
Completed string = "Completed"

// State is state of queue
State string = "State"
)
Expand Down Expand Up @@ -81,22 +84,41 @@ func ListQueue(ctx context.Context) error {
fmt.Printf("No resources found\n")
return nil
}
PrintQueues(queues, os.Stdout)

// Although the featuregate called CustomResourceFieldSelectors is enabled by default after v1.31, there are still
// users using k8s versions lower than v1.31. Therefore we can only get all the podgroups from kube-apiserver
// and then filtering them.
pgList, err := jobClient.SchedulingV1beta1().PodGroups("").List(ctx, metav1.ListOptions{})
if err != nil {
return fmt.Errorf("failed to list podgroups with err: %v", err)
}

queueStats := make(map[string]*PodGroupStatistics, len(queues.Items))
for _, queue := range queues.Items {
queueStats[queue.Name] = &PodGroupStatistics{}
}

for _, pg := range pgList.Items {
queueStats[pg.Spec.Queue].StatPodGroupCountsForQueue(&pg)
}

PrintQueues(queues, queueStats, os.Stdout)

return nil
}

// PrintQueues prints queue information.
func PrintQueues(queues *v1beta1.QueueList, writer io.Writer) {
_, err := fmt.Fprintf(writer, "%-25s%-8s%-8s%-8s%-8s%-8s%-8s\n",
Name, Weight, State, Inqueue, Pending, Running, Unknown)
func PrintQueues(queues *v1beta1.QueueList, queueStats map[string]*PodGroupStatistics, writer io.Writer) {
_, err := fmt.Fprintf(writer, "%-25s%-8s%-8s%-8s%-8s%-8s%-8s%-8s\n",
Name, Weight, State, Inqueue, Pending, Running, Unknown, Completed)
if err != nil {
fmt.Printf("Failed to print queue command result: %s.\n", err)
}

for _, queue := range queues.Items {
_, err = fmt.Fprintf(writer, "%-25s%-8d%-8s%-8d%-8d%-8d%-8d\n",
queue.Name, queue.Spec.Weight, queue.Status.State, queue.Status.Inqueue,
queue.Status.Pending, queue.Status.Running, queue.Status.Unknown)
_, err = fmt.Fprintf(writer, "%-25s%-8d%-8s%-8d%-8d%-8d%-8d%-8d\n",
queue.Name, queue.Spec.Weight, queue.Status.State, queueStats[queue.Name].Inqueue, queueStats[queue.Name].Pending,
queueStats[queue.Name].Running, queueStats[queue.Name].Unknown, queueStats[queue.Name].Completed)
if err != nil {
fmt.Printf("Failed to print queue command result: %s.\n", err)
}
Expand Down
24 changes: 24 additions & 0 deletions pkg/cli/queue/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (

busv1alpha1 "volcano.sh/apis/pkg/apis/bus/v1alpha1"
"volcano.sh/apis/pkg/apis/helpers"
"volcano.sh/apis/pkg/apis/scheduling/v1beta1"
"volcano.sh/apis/pkg/client/clientset/versioned"
)

Expand Down Expand Up @@ -62,3 +63,26 @@ func createQueueCommand(ctx context.Context, config *rest.Config, action busv1al

return nil
}

type PodGroupStatistics struct {
Inqueue int
Pending int
Running int
Unknown int
Completed int
}

func (pgStats *PodGroupStatistics) StatPodGroupCountsForQueue(pg *v1beta1.PodGroup) {
switch pg.Status.Phase {
case v1beta1.PodGroupInqueue:
pgStats.Inqueue++
case v1beta1.PodGroupPending:
pgStats.Pending++
case v1beta1.PodGroupRunning:
pgStats.Running++
case v1beta1.PodGroupUnknown:
pgStats.Unknown++
case v1beta1.PodGroupCompleted:
pgStats.Completed++
}
}
84 changes: 84 additions & 0 deletions pkg/controllers/metrics/queue.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package metrics

import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"

"volcano.sh/volcano/pkg/scheduler/metrics"
)

var (
queuePodGroupInqueue = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: metrics.VolcanoNamespace,
Name: "queue_pod_group_inqueue_count",
Help: "The number of Inqueue PodGroup in this queue",
}, []string{"queue_name"},
)

queuePodGroupPending = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: metrics.VolcanoNamespace,
Name: "queue_pod_group_pending_count",
Help: "The number of Pending PodGroup in this queue",
}, []string{"queue_name"},
)

queuePodGroupRunning = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: metrics.VolcanoNamespace,
Name: "queue_pod_group_running_count",
Help: "The number of Running PodGroup in this queue",
}, []string{"queue_name"},
)

queuePodGroupUnknown = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: metrics.VolcanoNamespace,
Name: "queue_pod_group_unknown_count",
Help: "The number of Unknown PodGroup in this queue",
}, []string{"queue_name"},
)

queuePodGroupCompleted = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: metrics.VolcanoNamespace,
Name: "queue_pod_group_completed_count",
Help: "The number of Completed PodGroup in this queue",
}, []string{"queue_name"},
)
)

// UpdateQueuePodGroupInqueueCount records the number of Inqueue PodGroup in this queue
func UpdateQueuePodGroupInqueueCount(queueName string, count int32) {
queuePodGroupInqueue.WithLabelValues(queueName).Set(float64(count))
}

// UpdateQueuePodGroupPendingCount records the number of Pending PodGroup in this queue
func UpdateQueuePodGroupPendingCount(queueName string, count int32) {
queuePodGroupPending.WithLabelValues(queueName).Set(float64(count))
}

// UpdateQueuePodGroupRunningCount records the number of Running PodGroup in this queue
func UpdateQueuePodGroupRunningCount(queueName string, count int32) {
queuePodGroupRunning.WithLabelValues(queueName).Set(float64(count))
}

// UpdateQueuePodGroupUnknownCount records the number of Unknown PodGroup in this queue
func UpdateQueuePodGroupUnknownCount(queueName string, count int32) {
queuePodGroupUnknown.WithLabelValues(queueName).Set(float64(count))
}

// UpdateQueuePodGroupCompletedCount records the number of Completed PodGroup in this queue
func UpdateQueuePodGroupCompletedCount(queueName string, count int32) {
queuePodGroupCompleted.WithLabelValues(queueName).Set(float64(count))
}

// DeleteQueueMetrics delete all metrics related to the queue
func DeleteQueueMetrics(queueName string) {
queuePodGroupInqueue.DeleteLabelValues(queueName)
queuePodGroupPending.DeleteLabelValues(queueName)
queuePodGroupRunning.DeleteLabelValues(queueName)
queuePodGroupUnknown.DeleteLabelValues(queueName)
queuePodGroupCompleted.DeleteLabelValues(queueName)
}
2 changes: 0 additions & 2 deletions pkg/controllers/queue/queue_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,6 @@ func (c *queuecontroller) Initialize(opt *framework.ControllerOption) error {
}

queuestate.SyncQueue = c.syncQueue
queuestate.OpenQueue = c.openQueue
queuestate.CloseQueue = c.closeQueue

c.syncHandler = c.handleQueue
c.syncCommandHandler = c.handleCommand
Expand Down
Loading
Loading