Skip to content

Commit

Permalink
Merge pull request #3432 from lowang-bh/refactPreempt
Browse files Browse the repository at this point in the history
filter out those nodes which are UnschedulableAndUnresolvable when preempting
  • Loading branch information
volcano-sh-bot authored Jul 15, 2024
2 parents 7a4bc9a + c9a9f6d commit 277f19b
Show file tree
Hide file tree
Showing 11 changed files with 422 additions and 88 deletions.
7 changes: 4 additions & 3 deletions pkg/scheduler/actions/allocate/allocate.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,18 +292,19 @@ func (alloc *Action) allocateResourcesForTasks(tasks *util.PriorityQueue, job *a

func (alloc *Action) predicate(task *api.TaskInfo, node *api.NodeInfo) ([]*api.Status, error) {
// Check for Resource Predicate
var statusSets api.StatusSets
if ok, resources := task.InitResreq.LessEqualWithResourcesName(node.FutureIdle(), api.Zero); !ok {
return nil, api.NewFitError(task, node, api.WrapInsufficientResourceReason(resources))
statusSets = append(statusSets, &api.Status{Code: api.Unschedulable, Reason: api.WrapInsufficientResourceReason(resources)})
return nil, api.NewFitErrWithStatus(task, node, statusSets...)
}
var statusSets util.StatusSets
statusSets, err := alloc.session.PredicateFn(task, node)
if err != nil {
return nil, api.NewFitError(task, node, err.Error())
}

if statusSets.ContainsUnschedulable() || statusSets.ContainsUnschedulableAndUnresolvable() ||
statusSets.ContainsErrorSkipOrWait() {
return nil, api.NewFitError(task, node, statusSets.Message())
return nil, api.NewFitErrWithStatus(task, node, statusSets...)
}
return nil, nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/scheduler/actions/backfill/backfill.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func (backfill *Action) Execute(ssn *framework.Session) {
defer klog.V(5).Infof("Leaving Backfill ...")

predicateFunc := func(task *api.TaskInfo, node *api.NodeInfo) ([]*api.Status, error) {
var statusSets util.StatusSets
var statusSets api.StatusSets
statusSets, err := ssn.PredicateFn(task, node)
if err != nil {
return nil, err
Expand Down
9 changes: 5 additions & 4 deletions pkg/scheduler/actions/preempt/preempt.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,23 +208,24 @@ func preempt(
predicateHelper util.PredicateHelper,
) (bool, error) {
assigned := false
allNodes := ssn.NodeList

if err := ssn.PrePredicateFn(preemptor); err != nil {
return false, fmt.Errorf("PrePredicate for task %s/%s failed for: %v", preemptor.Namespace, preemptor.Name, err)
}

predicateFn := func(task *api.TaskInfo, node *api.NodeInfo) ([]*api.Status, error) {
var statusSets util.StatusSets
var statusSets api.StatusSets
statusSets, _ = ssn.PredicateFn(task, node)

// When filtering candidate nodes, need to consider the node statusSets instead of the err information.
// refer to kube-scheduler preemption code: https://github.com/kubernetes/kubernetes/blob/9d87fa215d9e8020abdc17132d1252536cd752d2/pkg/scheduler/framework/preemption/preemption.go#L422
if statusSets.ContainsUnschedulableAndUnresolvable() || statusSets.ContainsErrorSkipOrWait() {
return nil, api.NewFitError(task, node, statusSets.Message())
return nil, api.NewFitErrWithStatus(task, node, statusSets...)
}
return nil, nil
}

// we should filter out those nodes that are UnschedulableAndUnresolvable status got in allocate action
allNodes := ssn.GetUnschedulableAndUnresolvableNodesForTask(preemptor)
predicateNodes, _ := predicateHelper.PredicateNodes(preemptor, allNodes, predicateFn, true)

nodeScores := util.PrioritizeNodes(preemptor, predicateNodes, ssn.BatchNodeOrderFn, ssn.NodeOrderMapFn, ssn.NodeOrderReduceFn)
Expand Down
6 changes: 4 additions & 2 deletions pkg/scheduler/actions/reclaim/reclaim.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,10 @@ func (ra *Action) Execute(ssn *framework.Session) {
}

assigned := false
for _, n := range ssn.Nodes {
var statusSets util.StatusSets
// we should filter out those nodes that are UnschedulableAndUnresolvable status got in allocate action
totalNodes := ssn.GetUnschedulableAndUnresolvableNodesForTask(task)
for _, n := range totalNodes {
var statusSets api.StatusSets
statusSets, _ = ssn.PredicateFn(task, n)

// When filtering candidate nodes, need to consider the node statusSets instead of the err information.
Expand Down
6 changes: 3 additions & 3 deletions pkg/scheduler/api/job_info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,9 @@ func TestTaskSchedulingReason(t *testing.T) {
nodefes: map[TaskID]*FitErrors{
TaskID(t6.UID): {
nodes: map[string]*FitError{
"node1": {Reasons: []string{NodePodNumberExceeded}},
"node2": {Reasons: []string{NodeResourceFitFailed}},
"node3": {Reasons: []string{NodeResourceFitFailed}},
"node1": {Status: []*Status{{Reason: NodePodNumberExceeded}}},
"node2": {Status: []*Status{{Reason: NodeResourceFitFailed}}},
"node3": {Status: []*Status{{Reason: NodeResourceFitFailed}}},
},
},
},
Expand Down
70 changes: 70 additions & 0 deletions pkg/scheduler/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ limitations under the License.
package api

import (
"strings"

k8sframework "k8s.io/kubernetes/pkg/scheduler/framework"
)

Expand Down Expand Up @@ -157,6 +159,74 @@ func (s Status) String() string {
return s.Reason
}

type StatusSets []*Status

func (s StatusSets) ContainsUnschedulable() bool {
for _, status := range s {
if status == nil {
continue
}
if status.Code == Unschedulable {
return true
}
}
return false
}

func (s StatusSets) ContainsUnschedulableAndUnresolvable() bool {
for _, status := range s {
if status == nil {
continue
}
if status.Code == UnschedulableAndUnresolvable {
return true
}
}
return false
}

func (s StatusSets) ContainsErrorSkipOrWait() bool {
for _, status := range s {
if status == nil {
continue
}
if status.Code == Error || status.Code == Skip || status.Code == Wait {
return true
}
}
return false
}

// Message return the message generated from StatusSets
func (s StatusSets) Message() string {
if s == nil {
return ""
}
all := make([]string, 0, len(s))
for _, status := range s {
if status.Reason == "" {
continue
}
all = append(all, status.Reason)
}
return strings.Join(all, ",")
}

// Reasons return the reasons list
func (s StatusSets) Reasons() []string {
if s == nil {
return nil
}
all := make([]string, 0, len(s))
for _, status := range s {
if status.Reason == "" {
continue
}
all = append(all, status.Reason)
}
return all
}

// ValidateExFn is the func declaration used to validate the result.
type ValidateExFn func(interface{}) *ValidateResult

Expand Down
48 changes: 42 additions & 6 deletions pkg/scheduler/api/unschedule_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"fmt"
"sort"
"strings"

"k8s.io/apimachinery/pkg/util/sets"
)

const (
Expand Down Expand Up @@ -60,13 +62,24 @@ func (f *FitErrors) SetNodeError(nodeName string, err error) {
default:
fe = &FitError{
NodeName: nodeName,
Reasons: []string{obj.Error()},
Status: []*Status{{Code: Error, Reason: obj.Error()}},
}
}

f.nodes[nodeName] = fe
}

// GetUnschedulableAndUnresolvableNodes returns the set of nodes that has no help from preempting pods from it
func (f *FitErrors) GetUnschedulableAndUnresolvableNodes() map[string]sets.Empty {
ret := make(map[string]sets.Empty)
for _, node := range f.nodes {
if node.Status.ContainsUnschedulableAndUnresolvable() {
ret[node.NodeName] = sets.Empty{}
}
}
return ret
}

// Error returns the final error message
func (f *FitErrors) Error() string {
if f.err == "" {
Expand All @@ -78,7 +91,7 @@ func (f *FitErrors) Error() string {

reasons := make(map[string]int)
for _, node := range f.nodes {
for _, reason := range node.Reasons {
for _, reason := range node.Reasons() {
reasons[reason]++
}
}
Expand All @@ -100,23 +113,46 @@ type FitError struct {
taskNamespace string
taskName string
NodeName string
Reasons []string
Status StatusSets
}

// NewFitError return FitError by message
// NewFitError return FitError by message, setting default code to Error
func NewFitError(task *TaskInfo, node *NodeInfo, message ...string) *FitError {
fe := &FitError{
taskName: task.Name,
taskNamespace: task.Namespace,
NodeName: node.Name,
Reasons: message,
}
sts := make([]*Status, 0, len(message))
for _, msg := range message {
sts = append(sts, &Status{Reason: msg, Code: Error})
}
fe.Status = StatusSets(sts)
return fe
}

// NewFitErrWithStatus returns a fit error with code and reason in it
func NewFitErrWithStatus(task *TaskInfo, node *NodeInfo, sts ...*Status) *FitError {
fe := &FitError{
taskName: task.Name,
taskNamespace: task.Namespace,
NodeName: node.Name,
Status: sts,
}
return fe
}

// Reasons returns the reasons
func (fe *FitError) Reasons() []string {
if fe == nil {
return []string{}
}
return fe.Status.Reasons()
}

// Error returns the final error message
func (f *FitError) Error() string {
return fmt.Sprintf("task %s/%s on node %s fit failed: %s", f.taskNamespace, f.taskName, f.NodeName, strings.Join(f.Reasons, ", "))
return fmt.Sprintf("task %s/%s on node %s fit failed: %s", f.taskNamespace, f.taskName, f.NodeName, strings.Join(f.Reasons(), ", "))
}

// WrapInsufficientResourceReason wrap insufficient resource reason.
Expand Down
129 changes: 129 additions & 0 deletions pkg/scheduler/api/unschedule_info_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
Copyright 2024 The Volcano Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api

import (
"fmt"
"testing"

"github.com/stretchr/testify/assert"
"k8s.io/apimachinery/pkg/util/sets"
)

const (
affinityRulesNotMatch = "node(s) didn't match pod affinity rules"
existingAntiAffinityNotMatch = "node(s) didn't satisfy existing pods anti-affinity rules"
nodeAffinity = "node(s) didn't match Pod's node affinity/selector"
)

func TestFitError(t *testing.T) {
tests := []struct {
task *TaskInfo
node *NodeInfo
status []*Status
// the wanted reason from fitError
reason []string
// the wanted fitError
wantErr *FitError
// string of fitError
errStr string
}{
{
task: &TaskInfo{Name: "pod1", Namespace: "ns1"},
node: &NodeInfo{Name: "node1"},
reason: []string{affinityRulesNotMatch, nodeAffinity},
wantErr: &FitError{
NodeName: "node1", taskNamespace: "ns1", taskName: "pod1",
Status: []*Status{{Reason: affinityRulesNotMatch, Code: Error}, {Reason: nodeAffinity, Code: Error}},
},
errStr: "task ns1/pod1 on node node1 fit failed: " + affinityRulesNotMatch + ", " + nodeAffinity,
},
{
task: &TaskInfo{Name: "pod2", Namespace: "ns2"},
node: &NodeInfo{Name: "node2"},
status: []*Status{{Reason: nodeAffinity, Code: UnschedulableAndUnresolvable}, {Reason: existingAntiAffinityNotMatch, Code: Error}},
reason: []string{nodeAffinity, existingAntiAffinityNotMatch},
wantErr: &FitError{
NodeName: "node2", taskNamespace: "ns2", taskName: "pod2",
Status: []*Status{{Reason: nodeAffinity, Code: UnschedulableAndUnresolvable}, {Reason: existingAntiAffinityNotMatch, Code: Error}},
},
errStr: "task ns2/pod2 on node node2 fit failed: " + nodeAffinity + ", " + existingAntiAffinityNotMatch,
},
}

var got *FitError
for _, test := range tests {
if len(test.status) != 0 {
got = NewFitErrWithStatus(test.task, test.node, test.status...)
} else if len(test.reason) != 0 {
got = NewFitError(test.task, test.node, test.reason...)
}

assert.Equal(t, test.wantErr, got)
assert.Equal(t, test.reason, got.Reasons())
assert.Equal(t, test.errStr, got.Error())
}
}

func TestFitErrors(t *testing.T) {
tests := []struct {
node string
fitStr string
err error
fiterr *FitError
want string // expected error string
// nodes that are not helpful for preempting, which has a code of UnschedulableAndUnresolvable
filterNodes map[string]sets.Empty
}{
{
want: "0/0 nodes are unavailable", // base fit err string is empty, set as the default
filterNodes: map[string]sets.Empty{},
},
{
node: "node1",
fitStr: "fit failed",
err: fmt.Errorf(NodePodNumberExceeded),
want: "fit failed: 1 node(s) pod number exceeded.",
// no node has UnschedulableAndUnresolvable
filterNodes: map[string]sets.Empty{},
},
{
node: "node1",
fitStr: "NodeResourceFitFailed",
err: fmt.Errorf(NodePodNumberExceeded),
fiterr: &FitError{
taskNamespace: "ns1", taskName: "task1", NodeName: "node2",
Status: []*Status{{Reason: nodeAffinity, Code: UnschedulableAndUnresolvable}},
},
want: "NodeResourceFitFailed: 1 node(s) didn't match Pod's node affinity/selector, 1 node(s) pod number exceeded.",
// only node2 has UnschedulableAndUnresolvable
filterNodes: map[string]sets.Empty{"node2": {}},
},
}
for _, test := range tests {
fitErrs := NewFitErrors()
fitErrs.SetError(test.fitStr)
if test.err != nil {
fitErrs.SetNodeError(test.node, test.err)
}
if test.fiterr != nil {
fitErrs.SetNodeError(test.fiterr.NodeName, test.fiterr)
}
got := fitErrs.Error()
assert.Equal(t, test.want, got)
assert.Equal(t, test.filterNodes, fitErrs.GetUnschedulableAndUnresolvableNodes())
}
}
Loading

0 comments on commit 277f19b

Please sign in to comment.