Skip to content

Commit

Permalink
Add runtimeClassName as optional field in resource profile (#253)
Browse files Browse the repository at this point in the history
Useful for k3s...
Also sync `manifests/` via `make manifests`
  • Loading branch information
nstogner authored Sep 27, 2024
1 parent ebedde6 commit 3aaf99d
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 14 deletions.
1 change: 1 addition & 0 deletions docs/how-to/configure-resource-profiles.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ resourceProfiles:
custom.com/gpu: "1"
cpu: "3"
memory: "12Gi"
runtimeClassName: "my-custom-runtime-class"
```

If you need to run custom model server images on your resource profile, make sure to also add those in the `modelServers` section:
Expand Down
13 changes: 7 additions & 6 deletions internal/config/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,13 @@ func (d *Duration) UnmarshalJSON(b []byte) error {
}

type ResourceProfile struct {
ImageName string `json:"imageName"`
Requests corev1.ResourceList `json:"requests,omitempty"`
Limits corev1.ResourceList `json:"limits,omitempty"`
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
Affinity *corev1.Affinity `json:"affinity,omitempty"`
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
ImageName string `json:"imageName"`
Requests corev1.ResourceList `json:"requests,omitempty"`
Limits corev1.ResourceList `json:"limits,omitempty"`
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
Affinity *corev1.Affinity `json:"affinity,omitempty"`
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
RuntimeClassName *string `json:"runtimeClassName,omitempty"`
}

type MessageStream struct {
Expand Down
18 changes: 11 additions & 7 deletions internal/modelcontroller/model_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,11 @@ func (r *ModelReconciler) vLLMPodForModel(m *kubeaiv1.Model, profile ModelConfig
},
Spec: corev1.PodSpec{
NodeSelector: profile.NodeSelector,
SecurityContext: r.ModelServerPods.ModelPodSecurityContext,
ServiceAccountName: r.ModelServerPods.ModelServiceAccountName,
Affinity: profile.Affinity,
Tolerations: profile.Tolerations,
RuntimeClassName: profile.RuntimeClassName,
ServiceAccountName: r.ModelServerPods.ModelServiceAccountName,
SecurityContext: r.ModelServerPods.ModelPodSecurityContext,
Containers: []corev1.Container{
{
Name: "server",
Expand Down Expand Up @@ -355,10 +356,11 @@ func (r *ModelReconciler) oLlamaPodForModel(m *kubeaiv1.Model, profile ModelConf
},
Spec: corev1.PodSpec{
NodeSelector: profile.NodeSelector,
SecurityContext: r.ModelServerPods.ModelPodSecurityContext,
ServiceAccountName: r.ModelServerPods.ModelServiceAccountName,
Affinity: profile.Affinity,
Tolerations: profile.Tolerations,
RuntimeClassName: profile.RuntimeClassName,
ServiceAccountName: r.ModelServerPods.ModelServiceAccountName,
SecurityContext: r.ModelServerPods.ModelPodSecurityContext,
Containers: []corev1.Container{
{
Name: "server",
Expand Down Expand Up @@ -503,10 +505,11 @@ func (r *ModelReconciler) fasterWhisperPodForModel(m *kubeaiv1.Model, profile Mo
},
Spec: corev1.PodSpec{
NodeSelector: profile.NodeSelector,
SecurityContext: r.ModelServerPods.ModelPodSecurityContext,
ServiceAccountName: r.ModelServerPods.ModelServiceAccountName,
Affinity: profile.Affinity,
Tolerations: profile.Tolerations,
RuntimeClassName: profile.RuntimeClassName,
ServiceAccountName: r.ModelServerPods.ModelServiceAccountName,
SecurityContext: r.ModelServerPods.ModelPodSecurityContext,
Containers: []corev1.Container{
{
Name: "server",
Expand Down Expand Up @@ -656,10 +659,11 @@ func (r *ModelReconciler) infinityPodForModel(m *kubeaiv1.Model, profile ModelCo
Annotations: ann,
},
Spec: corev1.PodSpec{
ServiceAccountName: r.ModelServerPods.ModelServiceAccountName,
NodeSelector: profile.NodeSelector,
Affinity: profile.Affinity,
Tolerations: profile.Tolerations,
RuntimeClassName: profile.RuntimeClassName,
ServiceAccountName: r.ModelServerPods.ModelServiceAccountName,
SecurityContext: r.ModelServerPods.ModelPodSecurityContext,
Containers: []corev1.Container{
{
Expand Down
13 changes: 13 additions & 0 deletions manifests/models/gemma-2b-it-tpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Source: models/templates/models.yaml
apiVersion: kubeai.org/v1
kind: Model
metadata:
name: gemma-2b-it-tpu
spec:
features: [TextGeneration]
owner: google
url: hf://google/gemma-2b-it
engine: VLLM
args:
- --disable-log-requests
resourceProfile: google-tpu-v5e-1x1:1
18 changes: 18 additions & 0 deletions manifests/models/llama-3.1-8b-instruct-tpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Source: models/templates/models.yaml
apiVersion: kubeai.org/v1
kind: Model
metadata:
name: llama-3.1-8b-instruct-tpu
spec:
features: [TextGeneration]
owner: meta-llama
url: hf://meta-llama/Meta-Llama-3.1-8B-Instruct
engine: VLLM
args:
- --disable-log-requests
- --swap-space=8
- --tensor-parallel-size=4
- --num-scheduler-steps=4
- --max-model-len=8192
- --distributed-executor-backend=ray
resourceProfile: google-tpu-v5e-2x2:4
23 changes: 22 additions & 1 deletion test/integration/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ import (
"gocloud.dev/pubsub"
_ "gocloud.dev/pubsub/mempubsub"
corev1 "k8s.io/api/core/v1"
nodev1 "k8s.io/api/node/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/envtest"
Expand All @@ -33,7 +36,13 @@ var (
testNS = "default"
// testHTTPClient is a client with a long timeout for use in tests
// where requests may be held for long periods of time on purpose.
testHTTPClient = &http.Client{Timeout: 5 * time.Minute}
testHTTPClient = &http.Client{Timeout: 5 * time.Minute}
cpuRuntimeClass = nodev1.RuntimeClass{
ObjectMeta: metav1.ObjectMeta{
Name: cpuRuntimeClassName,
},
Handler: "my-cpu-runtime-handler",
}
)

// Messenger //
Expand All @@ -55,6 +64,7 @@ const (
resourceProfileNvidiaGPU = "nvidia-gpu-l4"
testVLLMDefualtImage = "default-vllm-image:v1.2.3"
testVLLMCPUImage = "cpu-vllm-image:v1.2.3"
cpuRuntimeClassName = "my-cpu-runtime-class"
)

// sysCfg returns the System configuration for testing.
Expand Down Expand Up @@ -103,6 +113,7 @@ func sysCfg() config.System {
Effect: corev1.TaintEffectNoSchedule,
},
},
RuntimeClassName: ptr.To(cpuRuntimeClassName),
Affinity: &corev1.Affinity{
NodeAffinity: &corev1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
Expand Down Expand Up @@ -159,6 +170,9 @@ func TestMain(m *testing.M) {
testK8sClient, err = client.New(k8sCfg, client.Options{Scheme: manager.Scheme})
requireNoError(err)

err = installCommonResources()
requireNoError(err)

// Setup messenger requests.
testRequestsTopic, err = pubsub.OpenTopic(testCtx, memRequestsURL)
requireNoError(err)
Expand Down Expand Up @@ -208,3 +222,10 @@ func requireNoError(err error) {
log.Fatal(err)
}
}

func installCommonResources() error {
if err := testK8sClient.Create(testCtx, &cpuRuntimeClass); err != nil {
return err
}
return nil
}
2 changes: 2 additions & 0 deletions test/integration/model_profiles_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
)

Expand Down Expand Up @@ -44,6 +45,7 @@ func TestModelProfiles(t *testing.T) {
// The Pod should have a single container named "server".
container := mustFindPodContainerByName(t, pod, "server")
assert.Equal(t, expectedResources, container.Resources)
assert.Equal(t, ptr.To(cpuRuntimeClassName), pod.Spec.RuntimeClassName)
assert.Contains(t, pod.Spec.Tolerations, sysCfg().ResourceProfiles[resourceProfileCPU].Tolerations[0])
assert.Equal(t, sysCfg().ResourceProfiles[resourceProfileCPU].Affinity, pod.Spec.Affinity)
assert.Equal(t, sysCfg().ResourceProfiles[resourceProfileCPU].NodeSelector, pod.Spec.NodeSelector)
Expand Down

0 comments on commit 3aaf99d

Please sign in to comment.