diff --git a/.github/actions/argocd-update/action.yml b/.github/actions/argocd-update/action.yml index 28fe75faf..d3a51609f 100644 --- a/.github/actions/argocd-update/action.yml +++ b/.github/actions/argocd-update/action.yml @@ -62,6 +62,7 @@ runs: yq -i '(.spec.source.helm.parameters.[] | select(.name == "pod-init.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-operator.yaml yq -i '(.spec.source.helm.parameters.[] | select(.name == "pod-init.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-operator-euc1.yaml yq -i '(.spec.source.helm.parameters.[] | select(.name == "inferenceGateway.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-ai.yaml + yq -i '(.spec.source.helm.parameters.[] | select(.name == "inferenceService.default.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-ai.yaml yq -i '(.spec.source.helm.parameters.[] | select(.name == "controller.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-operator-gke-usc1.yaml yq -i '.spec.source.targetRevision= "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-ai.yaml yq -i '(.spec.source.helm.parameters.[] | select(.name == "image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-ai-events-reporter.yaml @@ -103,6 +104,7 @@ runs: yq -i '(.spec.source.helm.parameters.[] | select(.name == "pod-init.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-operator-usw2.yaml yq -i '(.spec.source.helm.parameters.[] | select(.name == "pod-init.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-operator-gke-usc1.yaml yq -i '(.spec.source.helm.parameters.[] | select(.name == "inferenceGateway.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-ai.yaml + yq -i '(.spec.source.helm.parameters.[] | select(.name == "inferenceService.default.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-ai.yaml yq -i '.spec.source.targetRevision= "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-ai.yaml - name: Git commit and push to remote diff --git a/.github/actions/build-and-push-to-quay/action.yml b/.github/actions/build-and-push-to-quay/action.yml index 394338981..a860e2442 100644 --- a/.github/actions/build-and-push-to-quay/action.yml +++ b/.github/actions/build-and-push-to-quay/action.yml @@ -1,5 +1,5 @@ -name: 'Build and push to Quay' -description: 'Builds a container image and pushes it to our Quay organization' +name: "Build and push to Quay" +description: "Builds a container image and pushes it to our Quay organization" inputs: image_name: description: 'The name of the image, not including the registry or the tag, for example "postgres"' @@ -13,9 +13,9 @@ inputs: required: false default: "quay.io/tembo" docker_directory: - description: 'The relative path to a directory in which there is a Dockerfile' + description: "The relative path to a directory in which there is a Dockerfile" required: false - default: '.' + default: "." quay_user: required: true description: "Quay 'robot user' user name" @@ -29,11 +29,11 @@ inputs: required: true description: "Quay 'robot user' access token for Tembo org" publish_calver: - description: 'Should we tag with calendar versioning?' + description: "Should we tag with calendar versioning?" required: false default: false calver_suffix: - description: 'Optional suffix to the calendar version' + description: "Optional suffix to the calendar version" required: false default: "" publish_latest: @@ -55,11 +55,13 @@ runs: - name: Install TOML parser shell: bash run: | - set -xe - wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 - mv stoml_linux_amd64 stoml - chmod +x stoml - sudo mv stoml /usr/local/bin/ + set -xe + sudo apt-get update + sudo apt-get install -y wget + wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 + mv stoml_linux_amd64 stoml + chmod +x stoml + sudo mv stoml /usr/local/bin/ - name: Create whitespace-separated tags list shell: bash id: tags diff --git a/.github/actions/pgx-init/action.yml b/.github/actions/pgx-init/action.yml index a5644af61..a4e8de2c9 100644 --- a/.github/actions/pgx-init/action.yml +++ b/.github/actions/pgx-init/action.yml @@ -1,46 +1,48 @@ -name: 'pgrx initialization' -description: 'Initialize PGRX if it is a dependency, otherwise do nothing.' +name: "pgrx initialization" +description: "Initialize PGRX if it is a dependency, otherwise do nothing." inputs: working-directory: - description: 'The directory in which there is a pgrx extension project' + description: "The directory in which there is a pgrx extension project" required: true outputs: {} runs: using: "composite" steps: - - name: Install TOML parser - shell: bash - run: | - set -xe - wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null - mv stoml_linux_amd64 stoml - chmod +x stoml - sudo mv stoml /usr/local/bin/ - - name: setup pgrx - shell: bash - id: pgrx_install - working-directory: ${{ inputs.working-directory }} - run: | - pgrx_version=$(stoml Cargo.toml dependencies.pgrx) - if [ -z "${pgrx_version}" ]; then - echo "pgrx is not a dependency: skipping" - echo "skip=true" >> $GITHUB_OUTPUT - else - cargo install --version ${pgrx_version} cargo-pgrx - echo "skip=false" >> $GITHUB_OUTPUT - fi - - name: pgrx init - shell: bash - if: steps.pgrx_install.outputs.skip == 'false' - working-directory: ${{ inputs.working-directory }} - run: | - set -x - pg_version=$(stoml Cargo.toml features.default) - # pgrx init can take a long time, and it re-compiles postgres even when there - # is a cached version. So, we can just check for the directory and - cat /home/runner/.pgrx/config.toml || true - if find /home/runner/.pgrx | grep $(awk -F "=" '/${pg_version}/ {print $2}' /home/runner/.pgrx/config.toml | tr -d '"'); then - echo "Already found pgrx is initialized. Skipping 'cargo pgrx init' command." - else - cargo pgrx init --${pg_version} download || true - fi + - name: Install TOML parser + shell: bash + run: | + set -xe + sudo apt-get update + sudo apt-get install -y wget + wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null + mv stoml_linux_amd64 stoml + chmod +x stoml + sudo mv stoml /usr/local/bin/ + - name: setup pgrx + shell: bash + id: pgrx_install + working-directory: ${{ inputs.working-directory }} + run: | + pgrx_version=$(stoml Cargo.toml dependencies.pgrx) + if [ -z "${pgrx_version}" ]; then + echo "pgrx is not a dependency: skipping" + echo "skip=true" >> $GITHUB_OUTPUT + else + cargo install --version ${pgrx_version} cargo-pgrx + echo "skip=false" >> $GITHUB_OUTPUT + fi + - name: pgrx init + shell: bash + if: steps.pgrx_install.outputs.skip == 'false' + working-directory: ${{ inputs.working-directory }} + run: | + set -x + pg_version=$(stoml Cargo.toml features.default) + # pgrx init can take a long time, and it re-compiles postgres even when there + # is a cached version. So, we can just check for the directory and + cat /home/runner/.pgrx/config.toml || true + if find /home/runner/.pgrx | grep $(awk -F "=" '/${pg_version}/ {print $2}' /home/runner/.pgrx/config.toml | tr -d '"'); then + echo "Already found pgrx is initialized. Skipping 'cargo pgrx init' command." + else + cargo pgrx init --${pg_version} download || true + fi diff --git a/.github/actions/publish-crate/action.yml b/.github/actions/publish-crate/action.yml index 45377a30b..a540ec5c0 100644 --- a/.github/actions/publish-crate/action.yml +++ b/.github/actions/publish-crate/action.yml @@ -1,5 +1,5 @@ -name: 'Publish to crates.io' -description: 'Publish cratest to crates.io and some other crates.io-related actions, like checking if a version is already published.' +name: "Publish to crates.io" +description: "Publish cratest to crates.io and some other crates.io-related actions, like checking if a version is already published." inputs: working-directory: required: false @@ -30,11 +30,13 @@ runs: - name: Install TOML parser shell: bash run: | - set -xe - wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null - mv stoml_linux_amd64 stoml - chmod +x stoml - sudo mv stoml /usr/local/bin/ + set -xe + sudo apt-get update + sudo apt-get install wget -y + wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null + mv stoml_linux_amd64 stoml + chmod +x stoml + sudo mv stoml /usr/local/bin/ - name: Publish shell: bash working-directory: ${{ inputs.working-directory }} diff --git a/.github/workflows/cargo-test.yaml b/.github/workflows/cargo-test.yaml index 4e075d28e..85ba34c99 100644 --- a/.github/workflows/cargo-test.yaml +++ b/.github/workflows/cargo-test.yaml @@ -4,11 +4,11 @@ on: push: branches: ["main"] paths-ignore: - - 'tembo-py/**' + - "tembo-py/**" pull_request: branches: ["main"] paths-ignore: - - 'tembo-py/**' + - "tembo-py/**" jobs: find_directories: @@ -25,7 +25,7 @@ jobs: with: contains_the_file: Cargo.toml changed_relative_to_ref: origin/${{ github.base_ref || 'not-a-branch' }} - ignore_dirs: ".coredb examples tembo-cli/temboclient tembo-cli/tembodataclient" + ignore_dirs: ".coredb examples tembo-cli/temboclient tembo-cli/tembodataclient inference-gateway" lint: name: Run linters @@ -88,7 +88,7 @@ jobs: run: | set -xe sudo apt-get update - sudo apt-get install -y pkg-config libssl-dev lsb-release + sudo apt-get install -y pkg-config libssl-dev lsb-release wget sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' wget -qO- https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo tee /etc/apt/trusted.gpg.d/pgdg.asc &>/dev/null sudo apt-get update && sudo apt-get install -y postgresql-client @@ -103,7 +103,7 @@ jobs: export PROMETHEUS_URL=https://prometheus-data-1.use1.dev.plat.cdb-svc.com cd ${{ matrix.path }} && cargo test env: - ORG_ID: org_2YW4TYIMI1LeOqJTXIyvkHOHCUo + ORG_ID: org_2YW4TYIMI1LeOqJTXIyvkHOHCUo ACCESS_TOKEN: ${{ secrets.TEMBO_TOKEN_TEST_ORG_DEV }} TEMBO_HOST: ${{ secrets.TEMBO_HOST }} TEMBO_DATA_HOST: ${{ secrets.TEMBO_DATA_HOST }} diff --git a/.github/workflows/tembo_ai.yaml b/.github/workflows/tembo_ai.yaml index a3ac7aa93..83c63b21d 100644 --- a/.github/workflows/tembo_ai.yaml +++ b/.github/workflows/tembo_ai.yaml @@ -28,6 +28,7 @@ on: jobs: tests: name: Run tests + runs-on: - self-hosted - dind diff --git a/CODEOWNERS b/CODEOWNERS index 560c2f254..5347769e4 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,9 +1,10 @@ -.github/** @nhudson @ianstanton @shahadarsh +.github/** @nhudson @ianstanton @shahadarsh @vrmiguel charts/** @nhudson @ianstanton @shahadarsh -tembo-operator/** @nhudson @ianstanton @ChuckHend -conductor/** @nhudson @ianstanton @ChuckHend +dataplane-webserver/** @nhudson @ianstanton @vrmiguel +tembo-operator/** @nhudson @ianstanton @ChuckHend @vrmiguel +conductor/** @nhudson @ianstanton @ChuckHend @vrmiguel tembo-pod-init/** @nhudson @ianstanton tembo-cli/** @shahadarsh @vrmiguel @DarrenBaldwin07 @joshuajerin tembo-py/** @chuckhend tembo-stacks/** @chuckhend @jasonmp85 -inference-gateway/** @chuckhend @jasonmp85 +inference-gateway/** @chuckhend @jasonmp85 \ No newline at end of file diff --git a/charts/tembo-ai/Chart.yaml b/charts/tembo-ai/Chart.yaml index 43b8509dd..225f19f3f 100644 --- a/charts/tembo-ai/Chart.yaml +++ b/charts/tembo-ai/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.2.0 +version: 0.3.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/charts/tembo-ai/templates/_helpers.tpl b/charts/tembo-ai/templates/_helpers.tpl index 743f33b18..91d47c765 100644 --- a/charts/tembo-ai/templates/_helpers.tpl +++ b/charts/tembo-ai/templates/_helpers.tpl @@ -5,6 +5,13 @@ Expand the name of the chart. {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} {{- end }} +{{/* +Define the namespace to use across the Helm chart +*/}} +{{- define "tembo-ai.namespace" -}} +{{- default .Release.Namespace }} +{{- end -}} + {{/* Create a default fully qualified app name. We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). diff --git a/charts/tembo-ai/templates/inference-service/_helpers.tpl b/charts/tembo-ai/templates/inference-service/_helpers.tpl index ea705d962..01e0ee702 100644 --- a/charts/tembo-ai/templates/inference-service/_helpers.tpl +++ b/charts/tembo-ai/templates/inference-service/_helpers.tpl @@ -2,7 +2,6 @@ Inference service specific labels */}} {{- define "tembo-ai.inferenceService.labels" -}} -app.kubernetes.io/component: inference-service {{ include "tembo-ai.labels" . }} {{- end }} @@ -20,3 +19,23 @@ Create the name of the inference-service service account to use {{- define "tembo-ai.inferenceService.serviceAccountName" -}} {{- include "tembo-ai.fullname" . }}-service {{- end }} + +{{/* +Deepmerge the inference-service default configs and the services sepecific configs +*/}} +{{- define "tembo-ai.inferenceService.deepMerge" -}} +{{- $result := deepCopy (index . 0) -}} +{{- range $key, $value := index . 1 -}} + {{- if kindIs "map" $value -}} + {{- if hasKey $result $key -}} + {{- $newValue := fromYaml (include "tembo-ai.inferenceService.deepMerge" (list (get $result $key) $value)) -}} + {{- $_ := set $result $key $newValue -}} + {{- else -}} + {{- $_ := set $result $key $value -}} + {{- end -}} + {{- else -}} + {{- $_ := set $result $key $value -}} + {{- end -}} +{{- end -}} +{{- $result | toYaml -}} +{{- end -}} diff --git a/charts/tembo-ai/templates/inference-service/external-secret.yaml b/charts/tembo-ai/templates/inference-service/external-secret.yaml index 06d62dc61..66b91b6f0 100644 --- a/charts/tembo-ai/templates/inference-service/external-secret.yaml +++ b/charts/tembo-ai/templates/inference-service/external-secret.yaml @@ -1,21 +1,29 @@ -{{- if .Values.inferenceService.externalSecrets.secretName -}} +{{- if .Values.inferenceService.services }} + {{- $defaults := .Values.inferenceService.defaults }} + {{- range $serviceName, $serviceConfig := .Values.inferenceService.services }} + {{- $mergedConfig := fromYaml (include "tembo-ai.inferenceService.deepMerge" (list $defaults $serviceConfig)) }} + {{- if and (default false $mergedConfig.enabled) $mergedConfig.externalSecrets.secretName }} +--- apiVersion: external-secrets.io/v1beta1 kind: ExternalSecret metadata: - name: {{ include "tembo-ai.fullname" . }}-service - namespace: {{ .Release.Namespace }} + name: {{ include "tembo-ai.fullname" $ }}-{{ $serviceName }} + namespace: {{ include "tembo-ai.namespace" $ }} labels: - {{- include "tembo-ai.inferenceService.labels" . | nindent 4 }} + {{- include "tembo-ai.inferenceService.labels" $ | nindent 4 }} + app.kubernetes.io/component: {{ $serviceName }} spec: - refreshInterval: {{ .Values.inferenceService.externalSecrets.refreshInterval }} + refreshInterval: {{ $mergedConfig.externalSecrets.refreshInterval }} secretStoreRef: - name: {{ .Values.inferenceService.externalSecrets.parameterStore.name }} - kind: {{ .Values.inferenceService.externalSecrets.parameterStore.kind }} + name: {{ $mergedConfig.externalSecrets.parameterStore.name }} + kind: {{ $mergedConfig.externalSecrets.parameterStore.kind }} target: creationPolicy: 'Owner' - name: {{ .Values.inferenceService.externalSecrets.secretName }} + name: {{ $mergedConfig.externalSecrets.secretName }} dataFrom: - find: name: - regexp: {{ .Values.inferenceService.externalSecrets.secretRegex }} + regexp: {{ $mergedConfig.externalSecrets.secretRegex }} + {{- end }} + {{- end }} {{- end }} diff --git a/charts/tembo-ai/templates/inference-service/pod-monitor.yaml b/charts/tembo-ai/templates/inference-service/pod-monitor.yaml index b4401054c..7a22e8f23 100644 --- a/charts/tembo-ai/templates/inference-service/pod-monitor.yaml +++ b/charts/tembo-ai/templates/inference-service/pod-monitor.yaml @@ -1,19 +1,34 @@ -{{- if .Values.inferenceService.podMonitor.enabled -}} +{{- if .Values.inferenceService.services }} + {{- $defaults := .Values.inferenceService.defaults }} + {{- $releaseName := default "release-name" .Release.Name }} + {{- range $serviceName, $serviceConfig := .Values.inferenceService.services }} + {{- $mergedConfig := fromYaml (include "tembo-ai.inferenceService.deepMerge" (list $defaults $serviceConfig)) }} + {{- if and (default false $mergedConfig.enabled) (default false $mergedConfig.podMonitor.enabled) }} +--- apiVersion: monitoring.coreos.com/v1 kind: PodMonitor metadata: - name: {{ include "tembo-ai.fullname" . }}-service - namespace: {{ .Release.Namespace }} + name: {{ include "tembo-ai.fullname" $ }}-{{ $serviceName }} + namespace: {{ include "tembo-ai.namespace" $ }} labels: - {{- include "tembo-ai.inferenceService.labels" . | nindent 4 }} + {{- include "tembo-ai.inferenceService.labels" $ | nindent 4 }} + app.kubernetes.io/component: {{ $serviceName }} spec: podMetricsEndpoints: - - path: {{ .Values.inferenceService.podMonitor.path }} - port: {{ .Values.inferenceService.podMonitor.portName }} + - port: {{ $mergedConfig.podMonitor.portName }} + path: {{ $mergedConfig.podMonitor.path }} + {{- with $mergedConfig.podMonitor.interval }} + interval: {{ . }} + {{- end }} + {{- with $mergedConfig.podMonitor.scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} namespaceSelector: matchNames: - - {{ .Release.Namespace }} + - {{ include "tembo-ai.namespace" $ }} selector: matchLabels: - {{- include "tembo-ai.inferenceService.selectorLabels" . | nindent 6 }} + {{- include "tembo-ai.inferenceService.selectorLabels" $ | nindent 6 }} + {{- end }} + {{- end }} {{- end }} diff --git a/charts/tembo-ai/templates/inference-service/service.yaml b/charts/tembo-ai/templates/inference-service/service.yaml index db0a95414..303a054cf 100644 --- a/charts/tembo-ai/templates/inference-service/service.yaml +++ b/charts/tembo-ai/templates/inference-service/service.yaml @@ -1,16 +1,28 @@ +{{- if .Values.inferenceService.services }} + {{- $defaults := .Values.inferenceService.defaults }} + {{- range $serviceName, $serviceConfig := .Values.inferenceService.services }} + {{- $mergedConfig := fromYaml (include "tembo-ai.inferenceService.deepMerge" (list $defaults $serviceConfig)) }} + {{- if and (default false $mergedConfig.enabled) (default true $mergedConfig.service.enabled) }} +--- apiVersion: v1 kind: Service metadata: - name: {{ include "tembo-ai.fullname" . }}-service + name: {{ include "tembo-ai.fullname" $ }}-{{ $serviceName }} + namespace: {{ include "tembo-ai.namespace" $ }} labels: - {{- include "tembo-ai.inferenceService.labels" . | nindent 4 }} + {{- include "tembo-ai.inferenceService.labels" $ | nindent 4 }} + app.kubernetes.io/component: {{ $serviceName }} spec: clusterIP: None - type: {{ .Values.inferenceService.service.type }} + type: {{ $mergedConfig.service.type | default "ClusterIP" }} ports: - - port: {{ .Values.inferenceService.service.port }} + - port: {{ $mergedConfig.service.port }} targetPort: http protocol: TCP name: http selector: - {{- include "tembo-ai.inferenceService.selectorLabels" . | nindent 4 }} + {{- include "tembo-ai.inferenceService.selectorLabels" $ | nindent 4 }} + app.kubernetes.io/component: {{ $serviceName }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/tembo-ai/templates/inference-service/serviceaccount.yaml b/charts/tembo-ai/templates/inference-service/serviceaccount.yaml index 76af4ec9f..e559119e1 100644 --- a/charts/tembo-ai/templates/inference-service/serviceaccount.yaml +++ b/charts/tembo-ai/templates/inference-service/serviceaccount.yaml @@ -1,10 +1,31 @@ +{{- if .Values.inferenceService.services }} + {{- $defaults := .Values.inferenceService.defaults }} + {{- range $serviceName, $serviceConfig := .Values.inferenceService.services }} + {{- $mergedConfig := fromYaml (include "tembo-ai.inferenceService.deepMerge" (list $defaults $serviceConfig)) }} + {{- if and (default false $mergedConfig.enabled) (default true $mergedConfig.serviceAccount.create) }} +--- apiVersion: v1 kind: ServiceAccount metadata: - name: {{ include "tembo-ai.inferenceService.serviceAccountName" . }} + name: {{ include "tembo-ai.inferenceService.serviceAccountName" $ }}-{{ $serviceName }} + namespace: {{ include "tembo-ai.namespace" $ }} labels: - {{- include "tembo-ai.inferenceService.labels" . | nindent 4 }} - {{- with .Values.inferenceService.serviceAccount.annotations }} + {{- include "tembo-ai.inferenceService.labels" $ | nindent 4 }} + app.kubernetes.io/component: {{ $serviceName }} + {{- with $mergedConfig.serviceAccount.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with $mergedConfig.serviceAccount.annotations }} annotations: {{- toYaml . | nindent 4 }} {{- end }} +{{- if $mergedConfig.serviceAccount.imagePullSecrets }} +imagePullSecrets: + {{- toYaml $mergedConfig.serviceAccount.imagePullSecrets | nindent 2 }} +{{- end }} +{{- if hasKey $mergedConfig.serviceAccount "automountServiceAccountToken" }} +automountServiceAccountToken: {{ $mergedConfig.serviceAccount.automountServiceAccountToken }} +{{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/tembo-ai/templates/inference-service/statefulset.yaml b/charts/tembo-ai/templates/inference-service/statefulset.yaml index 019960348..69efe42e4 100644 --- a/charts/tembo-ai/templates/inference-service/statefulset.yaml +++ b/charts/tembo-ai/templates/inference-service/statefulset.yaml @@ -1,112 +1,126 @@ +{{- if .Values.inferenceService.services }} + {{- $defaults := .Values.inferenceService.defaults }} + {{- range $serviceName, $serviceConfig := .Values.inferenceService.services }} + {{- $mergedConfig := fromYaml (include "tembo-ai.inferenceService.deepMerge" (list $defaults $serviceConfig)) }} + {{- if (default false $mergedConfig.enabled) }} +--- apiVersion: apps/v1 kind: StatefulSet metadata: - name: {{ include "tembo-ai.fullname" . }}-inference-service + name: {{ include "tembo-ai.fullname" $ }}-{{ $serviceName }} + namespace: {{ include "tembo-ai.namespace" $ }} labels: - {{- include "tembo-ai.inferenceService.labels" . | nindent 4 }} + {{- include "tembo-ai.inferenceService.labels" $ | nindent 4 }} + app.kubernetes.io/component: {{ $serviceName }} spec: - replicas: {{ .Values.inferenceService.replicaCount }} + replicas: {{ $mergedConfig.replicaCount }} selector: matchLabels: - {{- include "tembo-ai.inferenceService.selectorLabels" . | nindent 6 }} - serviceName: {{ include "tembo-ai.fullname" . }}-inference-service + {{- include "tembo-ai.inferenceService.selectorLabels" $ | nindent 6 }} + app.kubernetes.io/component: {{ $serviceName }} + serviceName: {{ include "tembo-ai.fullname" $ }}-{{ $serviceName }}-inference-service template: metadata: - {{- with .Values.inferenceService.podAnnotations }} + {{- with $mergedConfig.podAnnotations }} annotations: {{- toYaml . | nindent 8 }} {{- end }} labels: - {{- include "tembo-ai.inferenceService.selectorLabels" . | nindent 8 }} + {{- include "tembo-ai.inferenceService.selectorLabels" $ | nindent 8 }} + app.kubernetes.io/component: {{ $serviceName }} spec: - {{- with .Values.inferenceService.imagePullSecrets }} + {{- with $mergedConfig.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} {{- end }} - serviceAccountName: {{ include "tembo-ai.inferenceService.serviceAccountName" . }} + serviceAccountName: {{ include "tembo-ai.inferenceService.serviceAccountName" $ }}-{{ $serviceName }} securityContext: - {{- toYaml .Values.inferenceService.podSecurityContext | nindent 8 }} + {{- toYaml $mergedConfig.podSecurityContext | nindent 8 }} containers: - name: inference-service securityContext: - {{- toYaml .Values.inferenceService.securityContext | nindent 12 }} - image: "{{ .Values.inferenceService.image.repository }}:{{ .Values.inferenceService.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.inferenceService.image.pullPolicy }} + {{- toYaml $mergedConfig.securityContext | nindent 12 }} + image: "{{ $mergedConfig.image.repository }}:{{ $mergedConfig.image.tag }}" + imagePullPolicy: {{ $mergedConfig.image.pullPolicy }} ports: - name: http - containerPort: {{ .Values.inferenceService.service.port }} + containerPort: {{ $mergedConfig.service.port }} protocol: TCP - {{- if and .Values.inferenceService.podMonitor.enabled (ne .Values.inferenceService.podMonitor.port "http") }} - - name: {{ .Values.inferenceService.podMonitor.portName }} - containerPort: {{ .Values.inferenceService.podMonitor.containerPort }} + {{- if and $mergedConfig.podMonitor.enabled (ne $mergedConfig.podMonitor.portName "http") }} + - name: {{ $mergedConfig.podMonitor.portName }} + containerPort: {{ $mergedConfig.podMonitor.containerPort }} protocol: TCP {{- end }} - {{- if .Values.inferenceService.livenessProbe.enabled }} + {{- if $mergedConfig.livenessProbe.enabled }} livenessProbe: httpGet: - path: {{ .Values.inferenceService.livenessProbe.path }} - port: {{ .Values.inferenceService.livenessProbe.port }} + path: {{ $mergedConfig.livenessProbe.path }} + port: {{ $mergedConfig.livenessProbe.port }} {{- end }} - {{- if .Values.inferenceService.readinessProbe.enabled }} + {{- if $mergedConfig.readinessProbe.enabled }} readinessProbe: httpGet: - path: {{ .Values.inferenceService.readinessProbe.path }} - port: {{ .Values.inferenceService.readinessProbe.port }} + path: {{ $mergedConfig.readinessProbe.path }} + port: {{ $mergedConfig.readinessProbe.port }} {{- end }} - {{- if .Values.inferenceService.startupProbe.enabled }} + {{- if $mergedConfig.startupProbe.enabled }} startupProbe: httpGet: - path: {{ .Values.inferenceService.startupProbe.path }} - port: {{ .Values.inferenceService.startupProbe.port }} - failureThreshold: {{ .Values.inferenceService.startupProbe.failureThreshold }} - periodSeconds: {{ .Values.inferenceService.startupProbe.periodSeconds }} + path: {{ $mergedConfig.startupProbe.path }} + port: {{ $mergedConfig.startupProbe.port }} + failureThreshold: {{ $mergedConfig.startupProbe.failureThreshold }} + periodSeconds: {{ $mergedConfig.startupProbe.periodSeconds }} {{- end }} resources: - {{- toYaml .Values.inferenceService.resources | nindent 12 }} - {{- with .Values.inferenceService.args }} + {{- toYaml $mergedConfig.resources | nindent 12 }} + {{- with $mergedConfig.args }} args: - {{- toYaml . | nindent 10 }} + {{- toYaml . | nindent 12 }} {{- end }} - {{- with .Values.inferenceService.command }} + {{- with $mergedConfig.command }} command: - {{- toYaml . | nindent 10 }} + {{- toYaml . | nindent 12 }} {{- end }} - {{- with .Values.inferenceService.env }} + {{- with $mergedConfig.env }} env: - {{- toYaml . | nindent 10 }} + {{- toYaml . | nindent 12 }} {{- end }} - {{- if .Values.inferenceService.persistence.enabled }} + {{- if $mergedConfig.persistence.enabled }} volumeMounts: - name: models - mountPath: {{ .Values.inferenceService.persistence.mountPath }} + mountPath: {{ $mergedConfig.persistence.mountPath }} {{- end }} - {{- with .Values.inferenceService.nodeSelector }} + {{- with $mergedConfig.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.inferenceService.affinity }} + {{- with $mergedConfig.affinity }} affinity: {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.inferenceService.tolerations }} + {{- with $mergedConfig.tolerations }} tolerations: {{- toYaml . | nindent 8 }} {{- end }} + {{- if $mergedConfig.persistence.enabled }} volumeClaimTemplates: - {{- if .Values.inferenceService.persistence.enabled }} - apiVersion: v1 kind: PersistentVolumeClaim metadata: name: models labels: - {{- include "tembo-ai.inferenceService.labels" . | nindent 8 }} + {{- include "tembo-ai.inferenceService.labels" $ | nindent 10 }} + app.kubernetes.io/component: {{ $serviceName }} spec: accessModes: - - {{ .Values.inferenceService.persistence.accessMode }} + - {{ $mergedConfig.persistence.accessMode }} resources: requests: - storage: {{ .Values.inferenceService.persistence.size }} - {{- if .Values.inferenceService.persistence.storageClass }} - storageClassName: {{ .Values.inferenceService.persistence.storageClass }} + storage: {{ $mergedConfig.persistence.size }} + {{- if $mergedConfig.persistence.storageClass }} + storageClassName: {{ $mergedConfig.persistence.storageClass }} {{- end }} + {{- end }} {{- end }} + {{- end }} +{{- end }} diff --git a/charts/tembo-ai/values.yaml b/charts/tembo-ai/values.yaml index 9d715757e..2dd852dd5 100644 --- a/charts/tembo-ai/values.yaml +++ b/charts/tembo-ai/values.yaml @@ -87,80 +87,87 @@ inferenceGateway: podSecurityContext: {} inferenceService: - image: - repository: quay.io/tembo/inference - pullPolicy: IfNotPresent - tag: latest - resources: - requests: - cpu: "4" - memory: "16Gi" - nvidia.com/gpu: "1" - limits: - cpu: "8" - memory: "16Gi" - nvidia.com/gpu: "1" - livenessProbe: - enabled: true - path: /health - port: http - readinessProbe: - enabled: true - path: /health - port: http - startupProbe: - enabled: true - path: /health - port: http - failureThreshold: 30 - periodSeconds: 10 - replicaCount: 1 - externalSecrets: - refreshInterval: "5m" - parameterStore: - name: "secret-store-parameter-store" - kind: ClusterSecretStore - secretName: ~ - secretRegex: ~ - podMonitor: - enabled: false - path: /metrics - # Sometimes applications serve metrics on a different port, - # which makes it easier to prevent metrics from accidentally - # being publicly available. - portName: metrics - containerPort: 8081 - serviceAccount: - create: true - annotations: {} - service: - port: 8000 - args: [] - command: [] - env: [] - securityContext: {} - # # The most practical security settings are - # # dropping all linux capabilities and - # # running as non-root. - # capabilities: - # drop: - # - ALL - # runAsNonRoot: true - # # Read only file system is better if the application - # # can tolerate it. - # # readOnlyRootFilesystem: true - nodeSelector: {} - tolerations: - - key: "tembo.io/gpu" - operator: "Equal" - value: "true" - effect: "NoSchedule" - affinity: {} - podAnnotations: {} - podSecurityContext: {} - persistence: - enabled: true - size: 100Gi - storageClass: "" - accessMode: ReadWriteOnce - mountPath: /root/.cache/ + defaults: + image: + repository: quay.io/tembo/inference + pullPolicy: IfNotPresent + tag: latest + resources: + requests: + cpu: "4" + memory: "16Gi" + nvidia.com/gpu: "1" + limits: + cpu: "8" + memory: "16Gi" + nvidia.com/gpu: "1" + livenessProbe: + enabled: true + path: /health + port: http + readinessProbe: + enabled: true + path: /health + port: http + startupProbe: + enabled: true + path: /health + port: http + failureThreshold: 30 + periodSeconds: 10 + replicaCount: 1 + externalSecrets: + refreshInterval: "5m" + parameterStore: + name: "secret-store-parameter-store" + kind: ClusterSecretStore + secretName: ~ + secretRegex: ~ + podMonitor: + enabled: false + path: /metrics + # Sometimes applications serve metrics on a different port, + # which makes it easier to prevent metrics from accidentally + # being publicly available. + portName: metrics + containerPort: 8081 + serviceAccount: + create: true + annotations: {} + automountServiceAccountToken: false + service: + enabled: true + port: 8000 + args: [] + command: [] + env: [] + securityContext: {} + # # The most practical security settings are + # # dropping all linux capabilities and + # # running as non-root. + # capabilities: + # drop: + # - ALL + # runAsNonRoot: true + # # Read only file system is better if the application + # # can tolerate it. + # # readOnlyRootFilesystem: true + nodeSelector: {} + tolerations: + - key: "tembo.io/gpu" + operator: "Equal" + value: "true" + effect: "NoSchedule" + affinity: {} + podAnnotations: {} + podSecurityContext: {} + persistence: + enabled: true + size: 100Gi + storageClass: "" + accessMode: ReadWriteOnce + mountPath: /root/.cache/ + # Define individual inference services here + services: {} + # service1: + # enabled: true diff --git a/charts/tembo-operator/templates/crd.yaml b/charts/tembo-operator/templates/crd.yaml index 11e3081f8..ef55e284d 100644 --- a/charts/tembo-operator/templates/crd.yaml +++ b/charts/tembo-operator/templates/crd.yaml @@ -2246,7 +2246,7 @@ spec: description: |- Configure the load balancer to be public or private. - **Default**: false. + **Default**: true. type: boolean serviceType: default: LoadBalancer diff --git a/conductor/src/main.rs b/conductor/src/main.rs index 031b4023b..b4af1ed5b 100644 --- a/conductor/src/main.rs +++ b/conductor/src/main.rs @@ -85,6 +85,10 @@ async fn run(metrics: CustomMetrics) -> Result<(), ConductorError> { .unwrap_or_else(|_| "".to_owned()) .parse() .expect("error parsing GCP_PROJECT_NUMBER"); + let is_loadbalancer_public: bool = env::var("IS_LOADBALANCER_PUBLIC") + .unwrap_or_else(|_| "true".to_owned()) + .parse() + .expect("error parsing IS_LOADBALANCER_PUBLIC"); // Error and exit if CF_TEMPLATE_BUCKET is not set when IS_CLOUD_FORMATION is enabled if is_cloud_formation && cf_template_bucket.is_empty() { @@ -279,6 +283,7 @@ async fn run(metrics: CustomMetrics) -> Result<(), ConductorError> { &mut coredb_spec, is_cloud_formation, &client, + is_loadbalancer_public, ) .await { @@ -801,6 +806,7 @@ async fn init_cloud_perms( coredb_spec: &mut CoreDBSpec, is_cloud_formation: bool, _client: &Client, + is_loadbalancer_public: bool, ) -> Result<(), ConductorError> { if !is_cloud_formation { return Ok(()); @@ -860,6 +866,12 @@ async fn init_cloud_perms( coredb_spec.backup = backup; coredb_spec.serviceAccountTemplate = service_account_template; + if is_loadbalancer_public { + if let Some(ref mut dedicated_networking) = coredb_spec.dedicated_networking { + dedicated_networking.public = true; + } + } + Ok(()) } diff --git a/inference-gateway/.sqlx/query-be21197914e8a3778e818a1fca0080e9e700ba084f945fe20fda7b28e5a8f6af.json b/inference-gateway/.sqlx/query-be21197914e8a3778e818a1fca0080e9e700ba084f945fe20fda7b28e5a8f6af.json new file mode 100644 index 000000000..3985b5d0a --- /dev/null +++ b/inference-gateway/.sqlx/query-be21197914e8a3778e818a1fca0080e9e700ba084f945fe20fda7b28e5a8f6af.json @@ -0,0 +1,14 @@ +{ + "db_name": "PostgreSQL", + "query": "UPDATE billing.reporter_watermark\n SET last_reported_at = $1", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Timestamptz" + ] + }, + "nullable": [] + }, + "hash": "be21197914e8a3778e818a1fca0080e9e700ba084f945fe20fda7b28e5a8f6af" +} diff --git a/inference-gateway/Makefile b/inference-gateway/Makefile index 9bb793f51..2570797fd 100644 --- a/inference-gateway/Makefile +++ b/inference-gateway/Makefile @@ -1,5 +1,5 @@ DATABASE_URL:=postgresql://postgres:postgres@localhost:5432/postgres -LLM_SERVICE_HOST_PORT=http://localhost:8000 +MODEL_SERVICE_PORT_MAP=facebook/opt-125m=http://localhost:8000 RUST_LOG=debug SQLX_OFFLINE:=true ORG_AUTH_ENABLED:=false @@ -18,7 +18,7 @@ check: cargo sqlx prepare --check run: - LLM_SERVICE_HOST_PORT=${LLM_SERVICE_HOST_PORT} RUST_LOG=${RUST_LOG} ORG_AUTH_ENABLED=${ORG_AUTH_ENABLED} cargo run + MODEL_SERVICE_PORT_MAP=${MODEL_SERVICE_PORT_MAP} RUST_LOG=${RUST_LOG} ORG_AUTH_ENABLED=${ORG_AUTH_ENABLED} cargo run run-migrations: sqlx migrate run --database-url ${DATABASE_URL} @@ -30,9 +30,9 @@ run-mock-server: docker compose up -d mock-server unit-test: - cargo test + cargo test -- --test-threads=1 integration-test: run-mock-server - RUST_LOG=${RUST_LOG} LLM_SERVICE_HOST_PORT=${LLM_SERVICE_HOST_PORT} cargo test ${TEST_NAME} -- --ignored --nocapture + RUST_LOG=${RUST_LOG} MODEL_SERVICE_PORT_MAP=${MODEL_SERVICE_PORT_MAP} cargo test ${TEST_NAME} -- --ignored --nocapture --test-threads=1 test-all: unit-test integration-test \ No newline at end of file diff --git a/inference-gateway/docker-compose.yml b/inference-gateway/docker-compose.yml index 2e5c5d44f..3e3c54387 100644 --- a/inference-gateway/docker-compose.yml +++ b/inference-gateway/docker-compose.yml @@ -15,7 +15,7 @@ services: environment: - RUST_LOG=info - DATABASE_URL=postgresql://postgres:postgres@postgres:5432/postgres - - LLM_SERVICE_HOST_PORT=${LLM_SERVICE_HOST_PORT} + - MODEL_SERVICE_PORT_MAP=${MODEL_SERVICE_PORT_MAP} ports: - 8080:8080 vllm: diff --git a/inference-gateway/docker/inference/Dockerfile b/inference-gateway/docker/inference/Dockerfile index 082c15da9..8559cdd50 100644 --- a/inference-gateway/docker/inference/Dockerfile +++ b/inference-gateway/docker/inference/Dockerfile @@ -1,3 +1,3 @@ -FROM vllm/vllm-openai:v0.6.2 +FROM vllm/vllm-openai:v0.6.3 ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] diff --git a/inference-gateway/migrations/20241010050949_add-watermark-default-value.sql b/inference-gateway/migrations/20241010050949_add-watermark-default-value.sql new file mode 100644 index 000000000..f7c8bac50 --- /dev/null +++ b/inference-gateway/migrations/20241010050949_add-watermark-default-value.sql @@ -0,0 +1,11 @@ +ALTER TABLE billing.reporter_watermark +ALTER COLUMN last_reported_at SET DATA TYPE TIMESTAMP WITH TIME ZONE; + +ALTER TABLE billing.reporter_watermark +ALTER COLUMN last_reported_at SET DEFAULT '1970-01-01'; + +ALTER TABLE billing.reporter_watermark +ALTER COLUMN last_reported_at SET NOT NULL; + +INSERT INTO billing.reporter_watermark (last_reported_at) +VALUES (DEFAULT); \ No newline at end of file diff --git a/inference-gateway/src/config.rs b/inference-gateway/src/config.rs index 623308448..2148a50c8 100644 --- a/inference-gateway/src/config.rs +++ b/inference-gateway/src/config.rs @@ -1,13 +1,15 @@ +use std::collections::HashMap; use std::env; use url::Url; +use crate::errors::PlatformError; + #[derive(Clone, Debug)] pub struct Config { - /// service and port of the inference service - /// Must be an OpenAI compatible interface - pub llm_service_host_port: Url, - /// Postgres connection string to the timeseries databse which logs token usage + pub model_rewrites: HashMap, + pub model_service_map: HashMap, + /// Postgres connection string to the timeseries database which logs token usage pub pg_conn_str: String, /// Postgres connection string for the Control Plane queue pub billing_queue_conn_str: String, @@ -26,7 +28,8 @@ pub struct Config { impl Config { pub async fn new() -> Self { Self { - llm_service_host_port: parse_llm_service(), + model_rewrites: parse_model_rewrite(), + model_service_map: parse_model_service_port_map(), pg_conn_str: from_env_default( "DATABASE_URL", "postgresql://postgres:postgres@0.0.0.0:5432/postgres", @@ -62,7 +65,190 @@ fn from_env_default(key: &str, default: &str) -> String { env::var(key).unwrap_or_else(|_| default.to_owned()) } -fn parse_llm_service() -> Url { - let value = from_env_default("LLM_SERVICE_HOST_PORT", "http://vllm:8000"); - Url::parse(&value).unwrap_or_else(|_| panic!("malformed LLM_SERVICE_HOST_PORT: {value}")) +/// MODEL_NAME_SERVICE_PORT_MAP -- a comma separate list of model names and the host:port they are served at +/// =:,=: +/// e.g. meta-llama/Meta-Llama-3-8B-Instruct=llama-3-8b-instruct:8000,meta-llama/Llama-3.1-8B-Instruct=llama-3-1-8b-instruct:8000, +/// Must be an OpenAI compatible interface +fn parse_model_service_port_map() -> HashMap { + let model_mappings_values = from_env_default( + "MODEL_SERVICE_PORT_MAP", + "facebook/opt-125m=http://vllm:8000", + ); + + // Initialize an empty HashMap to store model-service-port mappings + let mut model_map: HashMap = HashMap::new(); + + // Split the environment variable value by semicolon to get individual mappings + for mapping in model_mappings_values.split(',') { + // Split each mapping into =: + if let Some((model_name, service_port)) = mapping.split_once('=') { + let svc_port_url = Url::parse(service_port) + .unwrap_or_else(|_| panic!("malformed service: {service_port}")); + model_map.insert(model_name.to_string(), svc_port_url); + } + } + model_map +} + +fn parse_model_rewrite() -> HashMap { + let mut map = HashMap::new(); + + if let Ok(env_var) = env::var("MODEL_REWRITES") { + for pair in env_var.split(',') { + if let Some((key, value)) = pair.split_once(':') { + map.insert(key.to_string(), value.to_string()); + } + } + } + + map +} + +#[derive(Debug)] +pub struct MappedRequest { + // the mapped model name + pub model: String, + // url to the correct service for the model + pub base_url: Url, + // request body with updated model name + pub body: serde_json::Value, +} + +pub fn rewrite_model_request( + mut body: serde_json::Value, + config: &Config, +) -> Result { + // map the model, if there is a mapping for it + let target_model = if let Some(model) = body.get("model") { + let requested_model = model.as_str().ok_or_else(|| { + PlatformError::InvalidQuery("empty value in `model` parameter".to_string()) + })?; + + if let Some(rewritten_model) = config.model_rewrites.get(requested_model) { + body["model"] = serde_json::Value::String(rewritten_model.clone()); + rewritten_model + } else { + requested_model + } + } else { + Err(PlatformError::InvalidQuery( + "missing `model` parameter in request body".to_string(), + ))? + }; + + let base_url = config + .model_service_map + .get(target_model) + .ok_or_else(|| PlatformError::InvalidQuery(format!("model {} not found", target_model)))? + .clone(); + + Ok(MappedRequest { + model: target_model.to_string(), + base_url, + body, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::env; + + #[tokio::test] + async fn test_rewrite() { + env::set_var("MODEL_REWRITES", "cat:dog,old:young"); + env::set_var( + "MODEL_SERVICE_PORT_MAP", + "dog=http://dog:8000/,young=http://young:8000/", + ); + + let cfg = Config::new().await; + let body = serde_json::json!({ + "model": "cat", + "key": "value" + }); + + let rewritten = rewrite_model_request(body.clone(), &cfg).unwrap(); + assert_eq!(rewritten.model, "dog"); + assert_eq!(rewritten.base_url.to_string(), "http://dog:8000/"); + assert_eq!(rewritten.body.get("key").unwrap(), "value"); + + let body = serde_json::json!({ + "model": "old", + "key": "value2" + }); + + let rewritten = rewrite_model_request(body.clone(), &cfg).unwrap(); + assert_eq!(rewritten.model, "young"); + assert_eq!(rewritten.base_url.to_string(), "http://young:8000/"); + assert_eq!(rewritten.body.get("key").unwrap(), "value2"); + } + + #[test] + fn test_valid_env_var() { + env::set_var("MODEL_REWRITES", "cat:dog,old:young"); + let result = parse_model_rewrite(); + + let mut expected = HashMap::new(); + expected.insert("cat".to_string(), "dog".to_string()); + expected.insert("old".to_string(), "young".to_string()); + + assert_eq!(result, expected); + } + + #[test] + fn test_empty_env_var() { + env::set_var("MODEL_REWRITES", ""); + let result = parse_model_rewrite(); + assert!(result.is_empty()); + } + + #[test] + fn test_invalid_format() { + env::set_var("MODEL_REWRITES", "cat:dog,invalidpair,old:young"); + let result = parse_model_rewrite(); + + let mut expected = HashMap::new(); + expected.insert("cat".to_string(), "dog".to_string()); + expected.insert("old".to_string(), "young".to_string()); + + assert_eq!(result, expected); + } + + #[test] + fn test_default_values() { + env::remove_var("MODEL_SERVICE_PORT_MAP"); + + let result = parse_model_service_port_map(); + let mut expected = HashMap::new(); + expected.insert( + "facebook/opt-125m".to_string(), + Url::parse("http://vllm:8000").unwrap(), + ); + assert_eq!(result, expected); + } + + #[test] + fn test_custom_mapping() { + env::set_var("MODEL_SERVICE_PORT_MAP", "meta-llama/Meta-Llama-3-8B-Instruct=http://tembo-ai-dev-llama-3-8b-instruct.svc.cluster.local:8000"); + + let result = parse_model_service_port_map(); + let mut expected = HashMap::new(); + expected.insert( + "meta-llama/Meta-Llama-3-8B-Instruct".to_string(), + Url::parse("http://tembo-ai-dev-llama-3-8b-instruct.svc.cluster.local:8000").unwrap(), + ); + + assert_eq!(result, expected); + } + + #[test] + #[should_panic(expected = "malformed service: http://vllm:invalid_port")] + fn test_malformed_url() { + env::set_var( + "MODEL_SERVICE_PORT_MAP", + "facebook/opt-125m=http://vllm:invalid_port", + ); + parse_model_service_port_map(); + } } diff --git a/inference-gateway/src/events_reporter.rs b/inference-gateway/src/events_reporter.rs index cdd4da243..045bb42b4 100644 --- a/inference-gateway/src/events_reporter.rs +++ b/inference-gateway/src/events_reporter.rs @@ -91,6 +91,21 @@ async fn get_reporter_watermark(conn: &PgPool) -> Result, + now: DateTime, +) -> Result<(), sqlx::Error> { + sqlx::query!( + "UPDATE billing.reporter_watermark + SET last_reported_at = $1", + now + ) + .execute(inference_pool) + .await + .map_err(Into::into) + .map(|_| ()) +} + fn start_of_the_hour(datetime: DateTime) -> DateTime { // Safe unwrap since, according to chrono docs, Utc will never have double mappings Utc.with_ymd_and_hms( @@ -149,6 +164,9 @@ pub async fn run_events_reporter(pg_conn: String, billing_queue_conn: String) -> for (start_time, end_time) in chunks { enqueue_event(&inference_pool, &queue, BILLING_QUEUE, start_time, end_time).await?; } + + // Save new reporter watermark + save_reporter_watermark(&inference_pool, now).await?; } } diff --git a/inference-gateway/src/routes/forward.rs b/inference-gateway/src/routes/forward.rs index 23873de4f..690050418 100644 --- a/inference-gateway/src/routes/forward.rs +++ b/inference-gateway/src/routes/forward.rs @@ -7,6 +7,7 @@ use std::sync::Arc; use tokio::sync::RwLock; use crate::authorization; +use crate::config::rewrite_model_request; use crate::errors::{AuthError, PlatformError}; pub async fn forward_request( @@ -45,13 +46,19 @@ pub async fn forward_request( return Ok(HttpResponse::BadRequest().body("Embedding generation is not yet supported")); } - let mut new_url = config.llm_service_host_port.clone(); + let rewrite_request = rewrite_model_request(body.clone(), &config)?; + + let mut new_url = rewrite_request.base_url; new_url.set_path(path); new_url.set_query(req.uri().query()); // log request duration let start = std::time::Instant::now(); - let resp = client.post(new_url).json(&body).send().await?; + let resp = client + .post(new_url) + .json(&rewrite_request.body) + .send() + .await?; let duration = start.elapsed().as_millis() as i32; if resp.status().is_success() { let llm_resp = resp.json::().await?; diff --git a/inference-gateway/tests/integration_test.rs b/inference-gateway/tests/integration_test.rs index faba3f397..b7191e593 100644 --- a/inference-gateway/tests/integration_test.rs +++ b/inference-gateway/tests/integration_test.rs @@ -136,3 +136,59 @@ async fn test_authorization() { println!("{:?}", resp); assert!(resp.status().is_success()); } + +#[ignore] +#[actix_web::test] +async fn test_unavailable_model() { + let app = common::get_test_app(false).await; + + let mut rng = rand::thread_rng(); + let rnd = rng.gen_range(0..100000); + let instance = format!("MY-TEST-INSTANCE-{}", rnd); + let model = "random/not-a-real-model"; + let payload = serde_json::json!({ + "model": model, + "messages": [{"role": "user", "content": "the quick brown fox..."}] + }); + let req = test::TestRequest::post() + .uri("/v1/chat/completions") + .insert_header(("X-TEMBO-ORG", "MY-TEST-ORG")) + .insert_header(("X-TEMBO-INSTANCE", instance.clone())) + .insert_header((header::CONTENT_TYPE, "application/json")) + .set_payload(payload.to_string()) + .to_request(); + + let resp = test::call_service(&app, req).await; + assert!(resp.status().is_client_error()); +} + +#[ignore] +#[actix_web::test] +async fn test_model_rewrite() { + let model = "facebook/davinci"; + std::env::set_var("MODEL_REWRITES", format!("{model}:facebook/opt-125m")); + + let app = common::get_test_app(false).await; + + let mut rng = rand::thread_rng(); + let rnd = rng.gen_range(0..100000); + let instance = format!("MY-TEST-INSTANCE-{}", rnd); + let payload = serde_json::json!({ + "model": model, + "messages": [{"role": "user", "content": "the quick brown fox..."}] + }); + let req = test::TestRequest::post() + .uri("/v1/chat/completions") + .insert_header(("X-TEMBO-ORG", "MY-TEST-ORG")) + .insert_header(("X-TEMBO-INSTANCE", instance.clone())) + .insert_header((header::CONTENT_TYPE, "application/json")) + .set_payload(payload.to_string()) + .to_request(); + + let resp = test::call_service(&app, req).await; + assert!(resp.status().is_success()); + + let body: serde_json::Value = test::read_body_json(resp).await; + let return_model = body.get("model").unwrap().as_str().unwrap(); + assert_eq!(return_model, "facebook/opt-125m"); +} diff --git a/tembo-cli/Cargo.lock b/tembo-cli/Cargo.lock index ee6200100..b76e049c9 100644 --- a/tembo-cli/Cargo.lock +++ b/tembo-cli/Cargo.lock @@ -4342,7 +4342,7 @@ dependencies = [ [[package]] name = "tembo-cli" -version = "0.20.7" +version = "0.20.8" dependencies = [ "actix-cors", "actix-service", diff --git a/tembo-cli/Cargo.toml b/tembo-cli/Cargo.toml index 83b3a256b..27aabbe05 100644 --- a/tembo-cli/Cargo.toml +++ b/tembo-cli/Cargo.toml @@ -1,7 +1,7 @@ workspace = { members = ["temboclient", "tembodataclient"] } [package] name = "tembo-cli" -version = "0.20.7" +version = "0.20.8" edition = "2021" authors = ["Tembo.io"] description = "The CLI for Tembo" diff --git a/tembo-operator/Cargo.lock b/tembo-operator/Cargo.lock index 587d546e4..576ca6dd5 100644 --- a/tembo-operator/Cargo.lock +++ b/tembo-operator/Cargo.lock @@ -503,7 +503,7 @@ dependencies = [ [[package]] name = "controller" -version = "0.50.1" +version = "0.50.2" dependencies = [ "actix-web", "anyhow", diff --git a/tembo-operator/Cargo.toml b/tembo-operator/Cargo.toml index dfc964b84..55c2a291c 100644 --- a/tembo-operator/Cargo.toml +++ b/tembo-operator/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "controller" description = "Tembo Operator for Postgres" -version = "0.50.1" +version = "0.50.2" edition = "2021" default-run = "controller" license = "Apache-2.0" diff --git a/tembo-operator/src/apis/coredb_types.rs b/tembo-operator/src/apis/coredb_types.rs index 3445c40c2..12d87bda0 100644 --- a/tembo-operator/src/apis/coredb_types.rs +++ b/tembo-operator/src/apis/coredb_types.rs @@ -506,7 +506,7 @@ pub struct DedicatedNetworking { /// Configure the load balancer to be public or private. /// - /// **Default**: false. + /// **Default**: true. #[serde(default)] pub public: bool, diff --git a/tembo-operator/src/dedicated_networking.rs b/tembo-operator/src/dedicated_networking.rs index ad6653ac1..143ac1e63 100644 --- a/tembo-operator/src/dedicated_networking.rs +++ b/tembo-operator/src/dedicated_networking.rs @@ -399,12 +399,6 @@ async fn reconcile_dedicated_networking_service( "cnpg.io/cluster".to_string(), serde_json::Value::String(cdb_name.to_string()), ); - if is_public { - labels.insert( - "public".to_string(), - serde_json::Value::String("true".to_string()), - ); - } let mut service_spec = serde_json::Map::new(); service_spec.insert( @@ -427,8 +421,23 @@ async fn reconcile_dedicated_networking_service( service_spec.insert("type".to_string(), json!(service_type)); let ip_allow_list = cdb.spec.ip_allow_list.clone().unwrap_or_else(|| vec![]); - if service_type == "LoadBalancer" && !ip_allow_list.is_empty() { - service_spec.insert("loadBalancerSourceRanges".to_string(), json!(ip_allow_list)); + // Allow ip_allow_list to allow all entries are in CIDR notation + let ip_allow_list_cidr: Vec = ip_allow_list + .iter() + .map(|ip| { + if ip.contains('/') { + ip.clone() + } else { + format!("{}/32", ip) + } + }) + .collect(); + + if service_type == "LoadBalancer" && !ip_allow_list_cidr.is_empty() { + service_spec.insert( + "loadBalancerSourceRanges".to_string(), + json!(ip_allow_list_cidr), + ); } let service = json!({ diff --git a/tembo-operator/tests/integration_tests.rs b/tembo-operator/tests/integration_tests.rs index 14e7381ad..53f00a9d0 100644 --- a/tembo-operator/tests/integration_tests.rs +++ b/tembo-operator/tests/integration_tests.rs @@ -2153,16 +2153,6 @@ mod test { service.spec.as_ref().unwrap().type_, Some("LoadBalancer".to_string()) ); - assert_eq!( - service - .metadata - .labels - .as_ref() - .expect("Labels should be present") - .get("public") - .expect("Public label should be present"), - "true" - ); let annotations = service .metadata @@ -2226,16 +2216,6 @@ mod test { service.spec.as_ref().unwrap().type_, Some("LoadBalancer".to_string()) ); - assert_eq!( - service - .metadata - .labels - .as_ref() - .expect("Labels should be present") - .get("public") - .expect("Public label should be present"), - "true" - ); let annotations = service .metadata diff --git a/tembo-stacks/Cargo.lock b/tembo-stacks/Cargo.lock index 78186524d..18a03604d 100644 --- a/tembo-stacks/Cargo.lock +++ b/tembo-stacks/Cargo.lock @@ -2471,7 +2471,7 @@ dependencies = [ [[package]] name = "tembo-stacks" -version = "0.17.0" +version = "0.17.1" dependencies = [ "anyhow", "clap", diff --git a/tembo-stacks/Cargo.toml b/tembo-stacks/Cargo.toml index 4824513c9..3bd0d6eb7 100644 --- a/tembo-stacks/Cargo.toml +++ b/tembo-stacks/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tembo-stacks" description = "Tembo Stacks for Postgres" -version = "0.17.0" +version = "0.17.1" authors = ["tembo.io"] edition = "2021" license = "Apache-2.0" diff --git a/tembo-stacks/src/apps/embeddings.yaml b/tembo-stacks/src/apps/embeddings.yaml index 401f6dd18..71cb4e1ee 100644 --- a/tembo-stacks/src/apps/embeddings.yaml +++ b/tembo-stacks/src/apps/embeddings.yaml @@ -1,6 +1,6 @@ name: !embeddings appServices: - - image: 387894460527.dkr.ecr.us-east-1.amazonaws.com/tembo-io/vector-serve:6397964 + - image: 387894460527.dkr.ecr.us-east-1.amazonaws.com/tembo-io/vector-serve:0e8078d name: embeddings metrics: path: /metrics diff --git a/tembo-stacks/src/stacks/specs/rag.yaml b/tembo-stacks/src/stacks/specs/rag.yaml index 86891795f..7ab85824f 100644 --- a/tembo-stacks/src/stacks/specs/rag.yaml +++ b/tembo-stacks/src/stacks/specs/rag.yaml @@ -30,7 +30,7 @@ appServices: volumes: - emptyDir: {} name: empty-dir - - image: 387894460527.dkr.ecr.us-east-1.amazonaws.com/tembo-io/vector-serve:6397964 + - image: 387894460527.dkr.ecr.us-east-1.amazonaws.com/tembo-io/vector-serve:0e8078d name: embeddings metrics: path: /metrics diff --git a/tembo-stacks/src/stacks/specs/vectordb.yaml b/tembo-stacks/src/stacks/specs/vectordb.yaml index c66cbef48..aea7b21b9 100644 --- a/tembo-stacks/src/stacks/specs/vectordb.yaml +++ b/tembo-stacks/src/stacks/specs/vectordb.yaml @@ -8,7 +8,7 @@ images: 16: "standard-cnpg:16-5120dd1" stack_version: 0.1.0 appServices: - - image: 387894460527.dkr.ecr.us-east-1.amazonaws.com/tembo-io/vector-serve:6397964 + - image: 387894460527.dkr.ecr.us-east-1.amazonaws.com/tembo-io/vector-serve:0e8078d name: embeddings metrics: path: /metrics