diff --git a/docs/diagrams/multitenancy-labels.excalidraw.png b/docs/diagrams/multitenancy-labels.excalidraw.png index 85fcb755..a0d6fdab 100644 Binary files a/docs/diagrams/multitenancy-labels.excalidraw.png and b/docs/diagrams/multitenancy-labels.excalidraw.png differ diff --git a/docs/how-to/architect-for-multitenancy.md b/docs/how-to/architect-for-multitenancy.md index f3d4d49b..db8459ba 100644 --- a/docs/how-to/architect-for-multitenancy.md +++ b/docs/how-to/architect-for-multitenancy.md @@ -1,6 +1,6 @@ # Architect for Multitenancy -KubeAI can support multitenancy by filtering the models that it serves via Kubernetes [label selectors](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors). These label selectors can be specified on all OpenAI-compatible endpoints through the `X-Selector` HTTP header and will match on labels specified on the `kind: Model` objects. The pattern is similar to using a `WHERE` clause in a SQL query. +KubeAI can support multitenancy by filtering the models that it serves via Kubernetes [label selectors](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors). These label selectors can be specified on all OpenAI-compatible endpoints through the `X-Label-Selector` HTTP header and will match on labels specified on the `kind: Model` objects. The pattern is similar to using a `WHERE` clause in a SQL query. Example Models: @@ -27,16 +27,18 @@ Example HTTP requests: ```bash # List of models will be filtered. curl http://$KUBEAI_ENDPOINT/openai/v1/models \ - -H "X-Selector: tenancy in (org-abc, public)" + -H "X-Label-Selector: tenancy in (org-abc, public)" # When running inference, if the label selector does not match # a 404 will be returned. curl http://$KUBEAI_ENDPOINT/openai/v1/completions \ -H "Content-Type: application/json" \ - -H "X-Selector: tenancy in (org-abc, public)" \ + -H "X-Label-Selector: tenancy in (org-abc, public)" \ -d '{"prompt": "Hi", "model": "llama-3.2"}' ``` Example architecture: -![Multitenancy](../diagrams/multitenancy-labels.excalidraw.png) \ No newline at end of file +![Multitenancy](../diagrams/multitenancy-labels.excalidraw.png) + +NOTE: Multiple `X-Label-Selector` headers can be specified in the same HTTP request and will be treated as a logical `AND`. diff --git a/internal/modelproxy/request.go b/internal/modelproxy/request.go index d985946b..6183c89c 100644 --- a/internal/modelproxy/request.go +++ b/internal/modelproxy/request.go @@ -45,7 +45,7 @@ func newProxyRequest(r *http.Request) *proxyRequest { // attempts to unmarshal the request body as JSON and extract the // .model field. func (pr *proxyRequest) parse() error { - pr.selectors = pr.r.Header.Values("X-Selector") + pr.selectors = pr.r.Header.Values("X-Label-Selector") // Try to get the model from the header first if headerModel := pr.r.Header.Get("X-Model"); headerModel != "" { diff --git a/internal/openaiserver/models.go b/internal/openaiserver/models.go index 22f44e0f..d7b333b3 100644 --- a/internal/openaiserver/models.go +++ b/internal/openaiserver/models.go @@ -24,7 +24,7 @@ func (h *Handler) getModels(w http.ResponseWriter, r *http.Request) { } var listOpts []client.ListOption - headerSelectors := r.Header.Values("X-Selector") + headerSelectors := r.Header.Values("X-Label-Selector") for _, sel := range headerSelectors { parsedSel, err := labels.Parse(sel) if err != nil { diff --git a/proposals/diagrams/auth-with-label-selector.excalidraw.png b/proposals/diagrams/auth-with-label-selector.excalidraw.png index 85fcb755..40ba7cbd 100644 Binary files a/proposals/diagrams/auth-with-label-selector.excalidraw.png and b/proposals/diagrams/auth-with-label-selector.excalidraw.png differ diff --git a/proposals/auth.md b/proposals/multitenancy.md similarity index 94% rename from proposals/auth.md rename to proposals/multitenancy.md index 7502605b..8e6cd518 100644 --- a/proposals/auth.md +++ b/proposals/multitenancy.md @@ -1,4 +1,4 @@ -# Auth +# Multitenancy The goal of this proposal is to allow KubeAI to be used in a multitenancy environment where some users only have access to some models. @@ -46,10 +46,10 @@ In this implementation, label selectors are used to filter models. The decision ```bash curl http://localhost:8000/openai/v1/completions \ - -H "X-Selector: key1=value1" + -H "X-Label-Selector: key1=value1" curl http://localhost:8000/openai/v1/models \ - -H "X-Selector: key1=value1" + -H "X-Label-Selector: key1=value1" ``` Models just need to have the labels set. diff --git a/test/integration/selector_test.go b/test/integration/selector_test.go index aa46e874..1210d380 100644 --- a/test/integration/selector_test.go +++ b/test/integration/selector_test.go @@ -93,6 +93,8 @@ func TestSelector(t *testing.T) { expCode: http.StatusOK, }, { + // `AND` logic should be used. + // This is important because if `OR` logic were used it would open up a possible vulerability: if the headers that an end-user specified were proxied with `OR` logic it would allow users to circumvent and proxy-enforced selectors. name: "model exists 2/2 labels match separate headers", modelName: m0.Name, selectorHeaders: []string{ diff --git a/test/integration/utils_test.go b/test/integration/utils_test.go index 422950cc..03f2a5b7 100644 --- a/test/integration/utils_test.go +++ b/test/integration/utils_test.go @@ -150,7 +150,7 @@ func sendOpenAIInferenceRequest(t *testing.T, modelName string, selectorHeaders require.NoError(t, err, msg) for _, selector := range selectorHeaders { t.Logf("Using selector: %s", selector) - req.Header.Add("X-Selector", selector) + req.Header.Add("X-Label-Selector", selector) } res, err := testHTTPClient.Do(req) @@ -174,7 +174,7 @@ func sendOpenAIListModelsRequest(t *testing.T, selectorHeaders []string, expCode req, err := http.NewRequest(http.MethodGet, "http://localhost:8000/openai/v1/models", nil) require.NoError(t, err, msg) for _, selector := range selectorHeaders { - req.Header.Add("X-Selector", selector) + req.Header.Add("X-Label-Selector", selector) } res, err := testHTTPClient.Do(req)