diff --git a/pkg/controllers/job/job_controller_actions.go b/pkg/controllers/job/job_controller_actions.go index 0c7ffd8c14..42532697b6 100644 --- a/pkg/controllers/job/job_controller_actions.go +++ b/pkg/controllers/job/job_controller_actions.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "reflect" + "strings" "sync" "sync/atomic" "time" @@ -286,13 +287,7 @@ func (cc *jobcontroller) syncJob(jobInfo *apis.JobInfo, updateStatus state.Updat if pg.Status.Phase != "" && pg.Status.Phase != scheduling.PodGroupPending { syncTask = true } - - for _, condition := range pg.Status.Conditions { - if condition.Type == scheduling.PodGroupUnschedulableType { - cc.recorder.Eventf(job, v1.EventTypeWarning, string(batch.PodGroupPending), - fmt.Sprintf("PodGroup %s:%s unschedule,reason: %s", job.Namespace, job.Name, condition.Message)) - } - } + cc.recordPodGroupEvent(job, pg) } var jobCondition batch.JobCondition @@ -845,6 +840,27 @@ func (cc *jobcontroller) initJobStatus(job *batch.Job) (*batch.Job, error) { return newJob, nil } +func (cc *jobcontroller) recordPodGroupEvent(job *batch.Job, podGroup *scheduling.PodGroup) { + var latestCondition *scheduling.PodGroupCondition + + // Get the latest condition by timestamp + for _, condition := range podGroup.Status.Conditions { + if condition.Status == v1.ConditionTrue { + if latestCondition == nil || + condition.LastTransitionTime.Time.After(latestCondition.LastTransitionTime.Time) { + latestCondition = &condition + } + } + } + + // If the latest condition is not scheduled, then a warning event is recorded + if latestCondition != nil && latestCondition.Type != scheduling.PodGroupScheduled { + cc.recorder.Eventf(job, v1.EventTypeWarning, string(batch.PodGroupPending), + fmt.Sprintf("PodGroup %s:%s %s, reason: %s", job.Namespace, job.Name, + strings.ToLower(string(latestCondition.Type)), latestCondition.Message)) + } +} + func classifyAndAddUpPodBaseOnPhase(pod *v1.Pod, pending, running, succeeded, failed, unknown *int32) { switch pod.Status.Phase { case v1.PodPending: diff --git a/pkg/controllers/job/job_controller_actions_test.go b/pkg/controllers/job/job_controller_actions_test.go index 711df38e12..34f2c4cf0d 100644 --- a/pkg/controllers/job/job_controller_actions_test.go +++ b/pkg/controllers/job/job_controller_actions_test.go @@ -20,11 +20,14 @@ import ( "context" "errors" "fmt" + "reflect" + "testing" + "time" + "github.com/agiledragon/gomonkey/v2" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "reflect" - "testing" + "k8s.io/client-go/tools/record" "volcano.sh/apis/pkg/apis/batch/v1alpha1" schedulingapi "volcano.sh/apis/pkg/apis/scheduling/v1beta1" @@ -664,3 +667,123 @@ func TestDeleteJobPod(t *testing.T) { }) } } + +func TestRecordPodGroupEvent(t *testing.T) { + job1 := &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: "default", + ResourceVersion: "100", + UID: "e7f18111-1cec-11ea-b688-fa163ec79500", + }, + Spec: v1alpha1.JobSpec{ + Tasks: []v1alpha1.TaskSpec{}, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + } + + pg1 := &schedulingapi.PodGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1-e7f18111-1cec-11ea-b688-fa163ec79500", + Namespace: "default", + }, + Spec: schedulingapi.PodGroupSpec{ + MinResources: &v1.ResourceList{}, + MinTaskMember: map[string]int32{}, + }, + Status: schedulingapi.PodGroupStatus{ + Phase: schedulingapi.PodGroupRunning, + Conditions: []schedulingapi.PodGroupCondition{}, + }, + } + testcases := []struct { + Name string + pgConditions []schedulingapi.PodGroupCondition + ExpectEvent string + }{ + { + Name: "pg with no conditions", + pgConditions: []schedulingapi.PodGroupCondition{}, + ExpectEvent: "", + }, + { + Name: "pg with Unschedulable condition", + pgConditions: []schedulingapi.PodGroupCondition{ + { + Type: schedulingapi.PodGroupUnschedulableType, + Status: v1.ConditionTrue, + Reason: "test reason", + Message: "test message", + LastTransitionTime: metav1.NewTime(time.Date(2024, time.July, 1, 15, 4, 5, 0, time.UTC)), + }, + }, + ExpectEvent: "Warning PodGroupPending PodGroup default:job1 unschedulable, reason: test message", + }, + { + Name: "pg with Unschedulable later than Scheduled", + pgConditions: []schedulingapi.PodGroupCondition{ + { + Type: schedulingapi.PodGroupUnschedulableType, + Status: v1.ConditionTrue, + Reason: "test reason", + Message: "test message", + LastTransitionTime: metav1.NewTime(time.Date(2024, time.July, 1, 15, 15, 5, 0, time.UTC)), + }, + { + Type: schedulingapi.PodGroupScheduled, + Status: v1.ConditionTrue, + Reason: "test reason", + Message: "test message", + LastTransitionTime: metav1.NewTime(time.Date(2024, time.July, 1, 15, 4, 5, 0, time.UTC)), + }, + }, + ExpectEvent: "Warning PodGroupPending PodGroup default:job1 unschedulable, reason: test message", + }, + { + Name: "pg with Scheduled later than Unschedulable ", + pgConditions: []schedulingapi.PodGroupCondition{ + { + Type: schedulingapi.PodGroupUnschedulableType, + Status: v1.ConditionTrue, + Reason: "test reason", + Message: "test message", + LastTransitionTime: metav1.NewTime(time.Date(2024, time.July, 1, 15, 15, 5, 0, time.UTC)), + }, + { + Type: schedulingapi.PodGroupScheduled, + Status: v1.ConditionTrue, + Reason: "test reason", + Message: "test message", + LastTransitionTime: metav1.NewTime(time.Date(2024, time.July, 1, 15, 20, 5, 0, time.UTC)), + }, + }, + ExpectEvent: "", + }, + } + for _, tt := range testcases { + t.Run(tt.Name, func(t *testing.T) { + fakeController := newFakeController() + fakeController.recorder = record.NewFakeRecorder(100) + pg1.Status.Conditions = tt.pgConditions + fakeController.recordPodGroupEvent(job1, pg1) + r := fakeController.recorder.(*record.FakeRecorder) + close(r.Events) + event := <-r.Events + + if len(tt.ExpectEvent) == 0 { + if len(event) != 0 { + t.Errorf("Testcase %s failed, expect no event, but got %s", tt.Name, event) + } + } else { + if tt.ExpectEvent != event { + t.Errorf("Testcase %s failed, expect event: %s, but got %s", tt.Name, tt.ExpectEvent, event) + } + } + }) + } + +}