diff --git a/.github/actions/argocd-update/action.yml b/.github/actions/argocd-update/action.yml
index 28fe75faf..d3a51609f 100644
--- a/.github/actions/argocd-update/action.yml
+++ b/.github/actions/argocd-update/action.yml
@@ -62,6 +62,7 @@ runs:
           yq -i '(.spec.source.helm.parameters.[] | select(.name == "pod-init.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-operator.yaml
           yq -i '(.spec.source.helm.parameters.[] | select(.name == "pod-init.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-operator-euc1.yaml
           yq -i '(.spec.source.helm.parameters.[] | select(.name == "inferenceGateway.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-ai.yaml
+          yq -i '(.spec.source.helm.parameters.[] | select(.name == "inferenceService.default.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-ai.yaml
           yq -i '(.spec.source.helm.parameters.[] | select(.name == "controller.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-operator-gke-usc1.yaml
           yq -i '.spec.source.targetRevision= "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-ai.yaml
           yq -i '(.spec.source.helm.parameters.[] | select(.name == "image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-ai-events-reporter.yaml
@@ -103,6 +104,7 @@ runs:
           yq -i '(.spec.source.helm.parameters.[] | select(.name == "pod-init.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-operator-usw2.yaml
           yq -i '(.spec.source.helm.parameters.[] | select(.name == "pod-init.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-operator-gke-usc1.yaml
           yq -i '(.spec.source.helm.parameters.[] | select(.name == "inferenceGateway.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-ai.yaml
+          yq -i '(.spec.source.helm.parameters.[] | select(.name == "inferenceService.default.image.tag")).value = "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-ai.yaml
           yq -i '.spec.source.targetRevision= "${{ inputs.version }}"' ${{ inputs.clone_into }}/${{ inputs.subdirectory }}/tembo-ai.yaml
 
     - name: Git commit and push to remote
diff --git a/.github/actions/build-and-push-to-quay/action.yml b/.github/actions/build-and-push-to-quay/action.yml
index 394338981..a860e2442 100644
--- a/.github/actions/build-and-push-to-quay/action.yml
+++ b/.github/actions/build-and-push-to-quay/action.yml
@@ -1,5 +1,5 @@
-name: 'Build and push to Quay'
-description: 'Builds a container image and pushes it to our Quay organization'
+name: "Build and push to Quay"
+description: "Builds a container image and pushes it to our Quay organization"
 inputs:
   image_name:
     description: 'The name of the image, not including the registry or the tag, for example "postgres"'
@@ -13,9 +13,9 @@ inputs:
     required: false
     default: "quay.io/tembo"
   docker_directory:
-    description: 'The relative path to a directory in which there is a Dockerfile'
+    description: "The relative path to a directory in which there is a Dockerfile"
     required: false
-    default: '.'
+    default: "."
   quay_user:
     required: true
     description: "Quay 'robot user' user name"
@@ -29,11 +29,11 @@ inputs:
     required: true
     description: "Quay 'robot user' access token for Tembo org"
   publish_calver:
-    description: 'Should we tag with calendar versioning?'
+    description: "Should we tag with calendar versioning?"
     required: false
     default: false
   calver_suffix:
-    description: 'Optional suffix to the calendar version'
+    description: "Optional suffix to the calendar version"
     required: false
     default: ""
   publish_latest:
@@ -55,11 +55,13 @@ runs:
     - name: Install TOML parser
       shell: bash
       run: |
-       set -xe
-       wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64
-       mv stoml_linux_amd64 stoml
-       chmod +x stoml
-       sudo mv stoml /usr/local/bin/
+        set -xe
+        sudo apt-get update
+        sudo apt-get install -y wget
+        wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64
+        mv stoml_linux_amd64 stoml
+        chmod +x stoml
+        sudo mv stoml /usr/local/bin/
     - name: Create whitespace-separated tags list
       shell: bash
       id: tags
diff --git a/.github/actions/pgx-init/action.yml b/.github/actions/pgx-init/action.yml
index a5644af61..a4e8de2c9 100644
--- a/.github/actions/pgx-init/action.yml
+++ b/.github/actions/pgx-init/action.yml
@@ -1,46 +1,48 @@
-name: 'pgrx initialization'
-description: 'Initialize PGRX if it is a dependency, otherwise do nothing.'
+name: "pgrx initialization"
+description: "Initialize PGRX if it is a dependency, otherwise do nothing."
 inputs:
   working-directory:
-    description: 'The directory in which there is a pgrx extension project'
+    description: "The directory in which there is a pgrx extension project"
     required: true
 outputs: {}
 runs:
   using: "composite"
   steps:
-      - name: Install TOML parser
-        shell: bash
-        run: |
-         set -xe
-         wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
-         mv stoml_linux_amd64 stoml
-         chmod +x stoml
-         sudo mv stoml /usr/local/bin/
-      - name: setup pgrx
-        shell: bash
-        id: pgrx_install
-        working-directory: ${{ inputs.working-directory }}
-        run: |
-          pgrx_version=$(stoml Cargo.toml dependencies.pgrx)
-          if [ -z "${pgrx_version}" ]; then
-            echo "pgrx is not a dependency: skipping"
-            echo "skip=true" >> $GITHUB_OUTPUT
-          else
-            cargo install --version ${pgrx_version} cargo-pgrx
-            echo "skip=false" >> $GITHUB_OUTPUT
-          fi
-      - name: pgrx init
-        shell: bash
-        if: steps.pgrx_install.outputs.skip == 'false'
-        working-directory: ${{ inputs.working-directory }}
-        run: |
-          set -x
-          pg_version=$(stoml Cargo.toml features.default)
-          # pgrx init can take a long time, and it re-compiles postgres even when there
-          # is a cached version. So, we can just check for the directory and
-          cat /home/runner/.pgrx/config.toml || true
-          if find /home/runner/.pgrx | grep $(awk -F "=" '/${pg_version}/ {print $2}' /home/runner/.pgrx/config.toml | tr -d '"'); then
-            echo "Already found pgrx is initialized. Skipping 'cargo pgrx init' command."
-          else
-            cargo pgrx init --${pg_version} download || true
-          fi
+    - name: Install TOML parser
+      shell: bash
+      run: |
+        set -xe
+        sudo apt-get update
+        sudo apt-get install -y wget
+        wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
+        mv stoml_linux_amd64 stoml
+        chmod +x stoml
+        sudo mv stoml /usr/local/bin/
+    - name: setup pgrx
+      shell: bash
+      id: pgrx_install
+      working-directory: ${{ inputs.working-directory }}
+      run: |
+        pgrx_version=$(stoml Cargo.toml dependencies.pgrx)
+        if [ -z "${pgrx_version}" ]; then
+          echo "pgrx is not a dependency: skipping"
+          echo "skip=true" >> $GITHUB_OUTPUT
+        else
+          cargo install --version ${pgrx_version} cargo-pgrx
+          echo "skip=false" >> $GITHUB_OUTPUT
+        fi
+    - name: pgrx init
+      shell: bash
+      if: steps.pgrx_install.outputs.skip == 'false'
+      working-directory: ${{ inputs.working-directory }}
+      run: |
+        set -x
+        pg_version=$(stoml Cargo.toml features.default)
+        # pgrx init can take a long time, and it re-compiles postgres even when there
+        # is a cached version. So, we can just check for the directory and
+        cat /home/runner/.pgrx/config.toml || true
+        if find /home/runner/.pgrx | grep $(awk -F "=" '/${pg_version}/ {print $2}' /home/runner/.pgrx/config.toml | tr -d '"'); then
+          echo "Already found pgrx is initialized. Skipping 'cargo pgrx init' command."
+        else
+          cargo pgrx init --${pg_version} download || true
+        fi
diff --git a/.github/actions/publish-crate/action.yml b/.github/actions/publish-crate/action.yml
index 45377a30b..a540ec5c0 100644
--- a/.github/actions/publish-crate/action.yml
+++ b/.github/actions/publish-crate/action.yml
@@ -1,5 +1,5 @@
-name: 'Publish to crates.io'
-description: 'Publish cratest to crates.io and some other crates.io-related actions, like checking if a version is already published.'
+name: "Publish to crates.io"
+description: "Publish cratest to crates.io and some other crates.io-related actions, like checking if a version is already published."
 inputs:
   working-directory:
     required: false
@@ -30,11 +30,13 @@ runs:
     - name: Install TOML parser
       shell: bash
       run: |
-       set -xe
-       wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
-       mv stoml_linux_amd64 stoml
-       chmod +x stoml
-       sudo mv stoml /usr/local/bin/
+        set -xe
+        sudo apt-get update
+        sudo apt-get install wget -y
+        wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
+        mv stoml_linux_amd64 stoml
+        chmod +x stoml
+        sudo mv stoml /usr/local/bin/
     - name: Publish
       shell: bash
       working-directory: ${{ inputs.working-directory }}
diff --git a/.github/workflows/cargo-test.yaml b/.github/workflows/cargo-test.yaml
index 4e075d28e..85ba34c99 100644
--- a/.github/workflows/cargo-test.yaml
+++ b/.github/workflows/cargo-test.yaml
@@ -4,11 +4,11 @@ on:
   push:
     branches: ["main"]
     paths-ignore:
-      - 'tembo-py/**'
+      - "tembo-py/**"
   pull_request:
     branches: ["main"]
     paths-ignore:
-      - 'tembo-py/**'
+      - "tembo-py/**"
 
 jobs:
   find_directories:
@@ -25,7 +25,7 @@ jobs:
         with:
           contains_the_file: Cargo.toml
           changed_relative_to_ref: origin/${{ github.base_ref || 'not-a-branch' }}
-          ignore_dirs: ".coredb examples tembo-cli/temboclient tembo-cli/tembodataclient"
+          ignore_dirs: ".coredb examples tembo-cli/temboclient tembo-cli/tembodataclient inference-gateway"
 
   lint:
     name: Run linters
@@ -88,7 +88,7 @@ jobs:
         run: |
           set -xe
           sudo apt-get update
-          sudo apt-get install -y pkg-config libssl-dev lsb-release
+          sudo apt-get install -y pkg-config libssl-dev lsb-release wget
           sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
           wget -qO- https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo tee /etc/apt/trusted.gpg.d/pgdg.asc &>/dev/null
           sudo apt-get update && sudo apt-get install -y postgresql-client
@@ -103,7 +103,7 @@ jobs:
           export PROMETHEUS_URL=https://prometheus-data-1.use1.dev.plat.cdb-svc.com
           cd ${{ matrix.path }} && cargo test
         env:
-          ORG_ID: org_2YW4TYIMI1LeOqJTXIyvkHOHCUo 
+          ORG_ID: org_2YW4TYIMI1LeOqJTXIyvkHOHCUo
           ACCESS_TOKEN: ${{ secrets.TEMBO_TOKEN_TEST_ORG_DEV }}
           TEMBO_HOST: ${{ secrets.TEMBO_HOST }}
           TEMBO_DATA_HOST: ${{ secrets.TEMBO_DATA_HOST }}
diff --git a/.github/workflows/tembo_ai.yaml b/.github/workflows/tembo_ai.yaml
index a3ac7aa93..83c63b21d 100644
--- a/.github/workflows/tembo_ai.yaml
+++ b/.github/workflows/tembo_ai.yaml
@@ -28,6 +28,7 @@ on:
 jobs:
   tests:
     name: Run tests
+
     runs-on:
       - self-hosted
       - dind
diff --git a/CODEOWNERS b/CODEOWNERS
index 560c2f254..5347769e4 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -1,9 +1,10 @@
-.github/** @nhudson @ianstanton @shahadarsh
+.github/** @nhudson @ianstanton @shahadarsh @vrmiguel 
 charts/**  @nhudson @ianstanton @shahadarsh
-tembo-operator/** @nhudson @ianstanton @ChuckHend
-conductor/**  @nhudson @ianstanton @ChuckHend
+dataplane-webserver/** @nhudson @ianstanton @vrmiguel 
+tembo-operator/** @nhudson @ianstanton @ChuckHend @vrmiguel
+conductor/**  @nhudson @ianstanton @ChuckHend @vrmiguel
 tembo-pod-init/** @nhudson @ianstanton
 tembo-cli/** @shahadarsh @vrmiguel @DarrenBaldwin07 @joshuajerin
 tembo-py/** @chuckhend
 tembo-stacks/** @chuckhend @jasonmp85
-inference-gateway/** @chuckhend @jasonmp85
+inference-gateway/** @chuckhend @jasonmp85
\ No newline at end of file
diff --git a/charts/tembo-ai/Chart.yaml b/charts/tembo-ai/Chart.yaml
index 43b8509dd..225f19f3f 100644
--- a/charts/tembo-ai/Chart.yaml
+++ b/charts/tembo-ai/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.2.0
+version: 0.3.1
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
diff --git a/charts/tembo-ai/templates/_helpers.tpl b/charts/tembo-ai/templates/_helpers.tpl
index 743f33b18..91d47c765 100644
--- a/charts/tembo-ai/templates/_helpers.tpl
+++ b/charts/tembo-ai/templates/_helpers.tpl
@@ -5,6 +5,13 @@ Expand the name of the chart.
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 
+{{/* 
+Define the namespace to use across the Helm chart
+*/}}
+{{- define "tembo-ai.namespace" -}}
+{{- default .Release.Namespace }}
+{{- end -}}
+
 {{/*
 Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
diff --git a/charts/tembo-ai/templates/inference-service/_helpers.tpl b/charts/tembo-ai/templates/inference-service/_helpers.tpl
index ea705d962..01e0ee702 100644
--- a/charts/tembo-ai/templates/inference-service/_helpers.tpl
+++ b/charts/tembo-ai/templates/inference-service/_helpers.tpl
@@ -2,7 +2,6 @@
 Inference service specific labels
 */}}
 {{- define "tembo-ai.inferenceService.labels" -}}
-app.kubernetes.io/component: inference-service
 {{ include "tembo-ai.labels" . }}
 {{- end }}
 
@@ -20,3 +19,23 @@ Create the name of the inference-service service account to use
 {{- define "tembo-ai.inferenceService.serviceAccountName" -}}
 {{- include "tembo-ai.fullname" . }}-service
 {{- end }}
+
+{{/*
+Deepmerge the inference-service default configs and the services sepecific configs
+*/}}
+{{- define "tembo-ai.inferenceService.deepMerge" -}}
+{{- $result := deepCopy (index . 0) -}}
+{{- range $key, $value := index . 1 -}}
+  {{- if kindIs "map" $value -}}
+    {{- if hasKey $result $key -}}
+      {{- $newValue := fromYaml (include "tembo-ai.inferenceService.deepMerge" (list (get $result $key) $value)) -}}
+      {{- $_ := set $result $key $newValue -}}
+    {{- else -}}
+      {{- $_ := set $result $key $value -}}
+    {{- end -}}
+  {{- else -}}
+    {{- $_ := set $result $key $value -}}
+  {{- end -}}
+{{- end -}}
+{{- $result | toYaml -}}
+{{- end -}}
diff --git a/charts/tembo-ai/templates/inference-service/external-secret.yaml b/charts/tembo-ai/templates/inference-service/external-secret.yaml
index 06d62dc61..66b91b6f0 100644
--- a/charts/tembo-ai/templates/inference-service/external-secret.yaml
+++ b/charts/tembo-ai/templates/inference-service/external-secret.yaml
@@ -1,21 +1,29 @@
-{{- if .Values.inferenceService.externalSecrets.secretName -}}
+{{- if .Values.inferenceService.services }}
+  {{- $defaults := .Values.inferenceService.defaults }}
+  {{- range $serviceName, $serviceConfig := .Values.inferenceService.services }}
+    {{- $mergedConfig := fromYaml (include "tembo-ai.inferenceService.deepMerge" (list $defaults $serviceConfig)) }}
+    {{- if and (default false $mergedConfig.enabled) $mergedConfig.externalSecrets.secretName }}
+---
 apiVersion: external-secrets.io/v1beta1
 kind: ExternalSecret
 metadata:
-  name: {{ include "tembo-ai.fullname" . }}-service
-  namespace: {{ .Release.Namespace }}
+  name: {{ include "tembo-ai.fullname" $ }}-{{ $serviceName }}
+  namespace: {{ include "tembo-ai.namespace" $ }}
   labels:
-    {{- include "tembo-ai.inferenceService.labels" . | nindent 4 }}
+    {{- include "tembo-ai.inferenceService.labels" $ | nindent 4 }}
+    app.kubernetes.io/component: {{ $serviceName }}
 spec:
-  refreshInterval: {{ .Values.inferenceService.externalSecrets.refreshInterval }}
+  refreshInterval: {{ $mergedConfig.externalSecrets.refreshInterval }}
   secretStoreRef:
-    name: {{ .Values.inferenceService.externalSecrets.parameterStore.name }}
-    kind: {{ .Values.inferenceService.externalSecrets.parameterStore.kind }}
+    name: {{ $mergedConfig.externalSecrets.parameterStore.name }}
+    kind: {{ $mergedConfig.externalSecrets.parameterStore.kind }}
   target:
     creationPolicy: 'Owner'
-    name: {{ .Values.inferenceService.externalSecrets.secretName }}
+    name: {{ $mergedConfig.externalSecrets.secretName }}
   dataFrom:
   - find:
       name:
-        regexp: {{ .Values.inferenceService.externalSecrets.secretRegex }}
+        regexp: {{ $mergedConfig.externalSecrets.secretRegex }}
+    {{- end }}
+  {{- end }}
 {{- end }}
diff --git a/charts/tembo-ai/templates/inference-service/pod-monitor.yaml b/charts/tembo-ai/templates/inference-service/pod-monitor.yaml
index b4401054c..7a22e8f23 100644
--- a/charts/tembo-ai/templates/inference-service/pod-monitor.yaml
+++ b/charts/tembo-ai/templates/inference-service/pod-monitor.yaml
@@ -1,19 +1,34 @@
-{{- if .Values.inferenceService.podMonitor.enabled -}}
+{{- if .Values.inferenceService.services }}
+  {{- $defaults := .Values.inferenceService.defaults }}
+  {{- $releaseName := default "release-name" .Release.Name }}
+  {{- range $serviceName, $serviceConfig := .Values.inferenceService.services }}
+    {{- $mergedConfig := fromYaml (include "tembo-ai.inferenceService.deepMerge" (list $defaults $serviceConfig)) }}
+    {{- if and (default false $mergedConfig.enabled) (default false $mergedConfig.podMonitor.enabled) }}
+---
 apiVersion: monitoring.coreos.com/v1
 kind: PodMonitor
 metadata:
-  name: {{ include "tembo-ai.fullname" . }}-service
-  namespace: {{ .Release.Namespace }}
+  name: {{ include "tembo-ai.fullname" $ }}-{{ $serviceName }}
+  namespace: {{ include "tembo-ai.namespace" $ }}
   labels:
-    {{- include "tembo-ai.inferenceService.labels" . | nindent 4 }}
+    {{- include "tembo-ai.inferenceService.labels" $ | nindent 4 }}
+    app.kubernetes.io/component: {{ $serviceName }}
 spec:
   podMetricsEndpoints:
-    - path: {{ .Values.inferenceService.podMonitor.path }}
-      port: {{ .Values.inferenceService.podMonitor.portName }}
+  - port: {{ $mergedConfig.podMonitor.portName }}
+    path: {{ $mergedConfig.podMonitor.path }}
+    {{- with $mergedConfig.podMonitor.interval }}
+    interval: {{ . }}
+    {{- end }}
+    {{- with $mergedConfig.podMonitor.scrapeTimeout }}
+    scrapeTimeout: {{ . }}
+    {{- end }}
   namespaceSelector:
     matchNames:
-      - {{ .Release.Namespace }}
+      - {{ include "tembo-ai.namespace" $ }}
   selector:
     matchLabels:
-      {{- include "tembo-ai.inferenceService.selectorLabels" . | nindent 6 }}
+      {{- include "tembo-ai.inferenceService.selectorLabels" $ | nindent 6 }}
+    {{- end }}
+  {{- end }}
 {{- end }}
diff --git a/charts/tembo-ai/templates/inference-service/service.yaml b/charts/tembo-ai/templates/inference-service/service.yaml
index db0a95414..303a054cf 100644
--- a/charts/tembo-ai/templates/inference-service/service.yaml
+++ b/charts/tembo-ai/templates/inference-service/service.yaml
@@ -1,16 +1,28 @@
+{{- if .Values.inferenceService.services }}
+  {{- $defaults := .Values.inferenceService.defaults }}
+  {{- range $serviceName, $serviceConfig := .Values.inferenceService.services }}
+    {{- $mergedConfig := fromYaml (include "tembo-ai.inferenceService.deepMerge" (list $defaults $serviceConfig)) }}
+    {{- if and (default false $mergedConfig.enabled) (default true $mergedConfig.service.enabled) }}
+---
 apiVersion: v1
 kind: Service
 metadata:
-  name: {{ include "tembo-ai.fullname" . }}-service
+  name: {{ include "tembo-ai.fullname" $ }}-{{ $serviceName }}
+  namespace: {{ include "tembo-ai.namespace" $ }}
   labels:
-    {{- include "tembo-ai.inferenceService.labels" . | nindent 4 }}
+    {{- include "tembo-ai.inferenceService.labels" $ | nindent 4 }}
+    app.kubernetes.io/component: {{ $serviceName }}
 spec:
   clusterIP: None
-  type: {{ .Values.inferenceService.service.type }}
+  type: {{ $mergedConfig.service.type | default "ClusterIP" }}
   ports:
-    - port: {{ .Values.inferenceService.service.port }}
+    - port: {{ $mergedConfig.service.port }}
       targetPort: http
       protocol: TCP
       name: http
   selector:
-    {{- include "tembo-ai.inferenceService.selectorLabels" . | nindent 4 }}
+    {{- include "tembo-ai.inferenceService.selectorLabels" $ | nindent 4 }}
+    app.kubernetes.io/component: {{ $serviceName }}
+    {{- end }}
+  {{- end }}
+{{- end }}
diff --git a/charts/tembo-ai/templates/inference-service/serviceaccount.yaml b/charts/tembo-ai/templates/inference-service/serviceaccount.yaml
index 76af4ec9f..e559119e1 100644
--- a/charts/tembo-ai/templates/inference-service/serviceaccount.yaml
+++ b/charts/tembo-ai/templates/inference-service/serviceaccount.yaml
@@ -1,10 +1,31 @@
+{{- if .Values.inferenceService.services }}
+  {{- $defaults := .Values.inferenceService.defaults }}
+  {{- range $serviceName, $serviceConfig := .Values.inferenceService.services }}
+    {{- $mergedConfig := fromYaml (include "tembo-ai.inferenceService.deepMerge" (list $defaults $serviceConfig)) }}
+    {{- if and (default false $mergedConfig.enabled) (default true $mergedConfig.serviceAccount.create) }}
+---
 apiVersion: v1
 kind: ServiceAccount
 metadata:
-  name: {{ include "tembo-ai.inferenceService.serviceAccountName" . }}
+  name: {{ include "tembo-ai.inferenceService.serviceAccountName" $ }}-{{ $serviceName }}
+  namespace: {{ include "tembo-ai.namespace" $ }}
   labels:
-    {{- include "tembo-ai.inferenceService.labels" . | nindent 4 }}
-  {{- with .Values.inferenceService.serviceAccount.annotations }}
+    {{- include "tembo-ai.inferenceService.labels" $ | nindent 4 }}
+    app.kubernetes.io/component: {{ $serviceName }}
+  {{- with $mergedConfig.serviceAccount.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+  {{- with $mergedConfig.serviceAccount.annotations }}
   annotations:
     {{- toYaml . | nindent 4 }}
   {{- end }}
+{{- if $mergedConfig.serviceAccount.imagePullSecrets }}
+imagePullSecrets:
+  {{- toYaml $mergedConfig.serviceAccount.imagePullSecrets | nindent 2 }}
+{{- end }}
+{{- if hasKey $mergedConfig.serviceAccount "automountServiceAccountToken" }}
+automountServiceAccountToken: {{ $mergedConfig.serviceAccount.automountServiceAccountToken }}
+{{- end }}
+    {{- end }}
+  {{- end }}
+{{- end }}
diff --git a/charts/tembo-ai/templates/inference-service/statefulset.yaml b/charts/tembo-ai/templates/inference-service/statefulset.yaml
index 019960348..69efe42e4 100644
--- a/charts/tembo-ai/templates/inference-service/statefulset.yaml
+++ b/charts/tembo-ai/templates/inference-service/statefulset.yaml
@@ -1,112 +1,126 @@
+{{- if .Values.inferenceService.services }}
+  {{- $defaults := .Values.inferenceService.defaults }}
+  {{- range $serviceName, $serviceConfig := .Values.inferenceService.services }}
+    {{- $mergedConfig := fromYaml (include "tembo-ai.inferenceService.deepMerge" (list $defaults $serviceConfig)) }}
+    {{- if (default false $mergedConfig.enabled) }}
+---
 apiVersion: apps/v1
 kind: StatefulSet
 metadata:
-  name: {{ include "tembo-ai.fullname" . }}-inference-service
+  name: {{ include "tembo-ai.fullname" $ }}-{{ $serviceName }}
+  namespace: {{ include "tembo-ai.namespace" $ }}
   labels:
-    {{- include "tembo-ai.inferenceService.labels" . | nindent 4 }}
+    {{- include "tembo-ai.inferenceService.labels" $ | nindent 4 }}
+    app.kubernetes.io/component: {{ $serviceName }}
 spec:
-  replicas: {{ .Values.inferenceService.replicaCount }}
+  replicas: {{ $mergedConfig.replicaCount }}
   selector:
     matchLabels:
-      {{- include "tembo-ai.inferenceService.selectorLabels" . | nindent 6 }}
-  serviceName: {{ include "tembo-ai.fullname" . }}-inference-service
+      {{- include "tembo-ai.inferenceService.selectorLabels" $ | nindent 6 }}
+      app.kubernetes.io/component: {{ $serviceName }}
+  serviceName: {{ include "tembo-ai.fullname" $ }}-{{ $serviceName }}-inference-service
   template:
     metadata:
-      {{- with .Values.inferenceService.podAnnotations }}
+      {{- with $mergedConfig.podAnnotations }}
       annotations:
         {{- toYaml . | nindent 8 }}
       {{- end }}
       labels:
-        {{- include "tembo-ai.inferenceService.selectorLabels" . | nindent 8 }}
+        {{- include "tembo-ai.inferenceService.selectorLabels" $ | nindent 8 }}
+        app.kubernetes.io/component: {{ $serviceName }}
     spec:
-      {{- with .Values.inferenceService.imagePullSecrets }}
+      {{- with $mergedConfig.imagePullSecrets }}
       imagePullSecrets:
         {{- toYaml . | nindent 8 }}
       {{- end }}
-      serviceAccountName: {{ include "tembo-ai.inferenceService.serviceAccountName" . }}
+      serviceAccountName: {{ include "tembo-ai.inferenceService.serviceAccountName" $ }}-{{ $serviceName }}
       securityContext:
-        {{- toYaml .Values.inferenceService.podSecurityContext | nindent 8 }}
+        {{- toYaml $mergedConfig.podSecurityContext | nindent 8 }}
       containers:
         - name: inference-service
           securityContext:
-            {{- toYaml .Values.inferenceService.securityContext | nindent 12 }}
-          image: "{{ .Values.inferenceService.image.repository }}:{{ .Values.inferenceService.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.inferenceService.image.pullPolicy }}
+            {{- toYaml $mergedConfig.securityContext | nindent 12 }}
+          image: "{{ $mergedConfig.image.repository }}:{{ $mergedConfig.image.tag }}"
+          imagePullPolicy: {{ $mergedConfig.image.pullPolicy }}
           ports:
             - name: http
-              containerPort: {{ .Values.inferenceService.service.port }}
+              containerPort: {{ $mergedConfig.service.port }}
               protocol: TCP
-          {{- if and .Values.inferenceService.podMonitor.enabled (ne .Values.inferenceService.podMonitor.port "http") }}
-            - name: {{ .Values.inferenceService.podMonitor.portName }}
-              containerPort: {{ .Values.inferenceService.podMonitor.containerPort }}
+          {{- if and $mergedConfig.podMonitor.enabled (ne $mergedConfig.podMonitor.portName "http") }}
+            - name: {{ $mergedConfig.podMonitor.portName }}
+              containerPort: {{ $mergedConfig.podMonitor.containerPort }}
               protocol: TCP
           {{- end }}
-          {{- if .Values.inferenceService.livenessProbe.enabled }}
+          {{- if $mergedConfig.livenessProbe.enabled }}
           livenessProbe:
             httpGet:
-              path: {{ .Values.inferenceService.livenessProbe.path }}
-              port: {{ .Values.inferenceService.livenessProbe.port }}
+              path: {{ $mergedConfig.livenessProbe.path }}
+              port: {{ $mergedConfig.livenessProbe.port }}
           {{- end }}
-          {{- if .Values.inferenceService.readinessProbe.enabled }}
+          {{- if $mergedConfig.readinessProbe.enabled }}
           readinessProbe:
             httpGet:
-              path: {{ .Values.inferenceService.readinessProbe.path }}
-              port: {{ .Values.inferenceService.readinessProbe.port }}
+              path: {{ $mergedConfig.readinessProbe.path }}
+              port: {{ $mergedConfig.readinessProbe.port }}
           {{- end }}
-          {{- if .Values.inferenceService.startupProbe.enabled }}
+          {{- if $mergedConfig.startupProbe.enabled }}
           startupProbe:
             httpGet:
-              path: {{ .Values.inferenceService.startupProbe.path }}
-              port: {{ .Values.inferenceService.startupProbe.port }}
-            failureThreshold: {{ .Values.inferenceService.startupProbe.failureThreshold }}
-            periodSeconds: {{ .Values.inferenceService.startupProbe.periodSeconds }}
+              path: {{ $mergedConfig.startupProbe.path }}
+              port: {{ $mergedConfig.startupProbe.port }}
+            failureThreshold: {{ $mergedConfig.startupProbe.failureThreshold }}
+            periodSeconds: {{ $mergedConfig.startupProbe.periodSeconds }}
           {{- end }}
           resources:
-            {{- toYaml .Values.inferenceService.resources | nindent 12 }}
-          {{- with .Values.inferenceService.args }}
+            {{- toYaml $mergedConfig.resources | nindent 12 }}
+          {{- with $mergedConfig.args }}
           args:
-            {{- toYaml . | nindent 10 }}
+            {{- toYaml . | nindent 12 }}
           {{- end }}
-          {{- with .Values.inferenceService.command }}
+          {{- with $mergedConfig.command }}
           command:
-            {{- toYaml . | nindent 10 }}
+            {{- toYaml . | nindent 12 }}
           {{- end }}
-          {{- with .Values.inferenceService.env }}
+          {{- with $mergedConfig.env }}
           env:
-            {{- toYaml . | nindent 10 }}
+            {{- toYaml . | nindent 12 }}
           {{- end }}
-          {{- if .Values.inferenceService.persistence.enabled }}
+          {{- if $mergedConfig.persistence.enabled }}
           volumeMounts:
             - name: models
-              mountPath: {{ .Values.inferenceService.persistence.mountPath }}
+              mountPath: {{ $mergedConfig.persistence.mountPath }}
           {{- end }}
-      {{- with .Values.inferenceService.nodeSelector }}
+      {{- with $mergedConfig.nodeSelector }}
       nodeSelector:
         {{- toYaml . | nindent 8 }}
       {{- end }}
-      {{- with .Values.inferenceService.affinity }}
+      {{- with $mergedConfig.affinity }}
       affinity:
         {{- toYaml . | nindent 8 }}
       {{- end }}
-      {{- with .Values.inferenceService.tolerations }}
+      {{- with $mergedConfig.tolerations }}
       tolerations:
         {{- toYaml . | nindent 8 }}
       {{- end }}
+  {{- if $mergedConfig.persistence.enabled }}
   volumeClaimTemplates:
-    {{- if .Values.inferenceService.persistence.enabled }}
     - apiVersion: v1
       kind: PersistentVolumeClaim
       metadata:
         name: models
         labels:
-          {{- include "tembo-ai.inferenceService.labels" . | nindent 8 }}
+          {{- include "tembo-ai.inferenceService.labels" $ | nindent 10 }}
+          app.kubernetes.io/component: {{ $serviceName }}
       spec:
         accessModes:
-          - {{ .Values.inferenceService.persistence.accessMode }}
+          - {{ $mergedConfig.persistence.accessMode }}
         resources:
           requests:
-            storage: {{ .Values.inferenceService.persistence.size }}
-        {{- if .Values.inferenceService.persistence.storageClass }}
-        storageClassName: {{ .Values.inferenceService.persistence.storageClass }}
+            storage: {{ $mergedConfig.persistence.size }}
+        {{- if $mergedConfig.persistence.storageClass }}
+        storageClassName: {{ $mergedConfig.persistence.storageClass }}
         {{- end }}
+  {{- end }}
     {{- end }}
+  {{- end }}
+{{- end }}
diff --git a/charts/tembo-ai/values.yaml b/charts/tembo-ai/values.yaml
index 9d715757e..2dd852dd5 100644
--- a/charts/tembo-ai/values.yaml
+++ b/charts/tembo-ai/values.yaml
@@ -87,80 +87,87 @@ inferenceGateway:
   podSecurityContext: {}
 
 inferenceService:
-  image:
-    repository: quay.io/tembo/inference
-    pullPolicy: IfNotPresent
-    tag: latest
-  resources:
-    requests:
-      cpu: "4"
-      memory: "16Gi"
-      nvidia.com/gpu: "1"
-    limits:
-      cpu: "8"
-      memory: "16Gi"
-      nvidia.com/gpu: "1"
-  livenessProbe:
-    enabled: true
-    path: /health
-    port: http
-  readinessProbe:
-    enabled: true
-    path: /health
-    port: http
-  startupProbe:
-    enabled: true
-    path: /health
-    port: http
-    failureThreshold: 30
-    periodSeconds: 10
-  replicaCount: 1
-  externalSecrets:
-    refreshInterval: "5m"
-    parameterStore:
-      name: "secret-store-parameter-store"
-      kind: ClusterSecretStore
-    secretName: ~
-    secretRegex: ~
-  podMonitor:
-    enabled: false
-    path: /metrics
-    # Sometimes applications serve metrics on a different port,
-    # which makes it easier to prevent metrics from accidentally
-    # being publicly available.
-    portName: metrics
-    containerPort: 8081
-  serviceAccount:
-    create: true
-    annotations: {}
-  service:
-    port: 8000
-  args: []
-  command: []
-  env: []
-  securityContext: {}
-  #   # The most practical security settings are
-  #   # dropping all linux capabilities and
-  #   # running as non-root.
-  #   capabilities:
-  #     drop:
-  #     - ALL
-  #   runAsNonRoot: true
-  #   # Read only file system is better if the application
-  #   # can tolerate it.
-  #   # readOnlyRootFilesystem: true
-  nodeSelector: {}
-  tolerations:
-    - key: "tembo.io/gpu"
-      operator: "Equal"
-      value: "true"
-      effect: "NoSchedule"
-  affinity: {}
-  podAnnotations: {}
-  podSecurityContext: {}
-  persistence:
-    enabled: true
-    size: 100Gi
-    storageClass: ""
-    accessMode: ReadWriteOnce
-    mountPath: /root/.cache/
+  defaults:
+    image:
+      repository: quay.io/tembo/inference
+      pullPolicy: IfNotPresent
+      tag: latest
+    resources:
+      requests:
+        cpu: "4"
+        memory: "16Gi"
+        nvidia.com/gpu: "1"
+      limits:
+        cpu: "8"
+        memory: "16Gi"
+        nvidia.com/gpu: "1"
+    livenessProbe:
+      enabled: true
+      path: /health
+      port: http
+    readinessProbe:
+      enabled: true
+      path: /health
+      port: http
+    startupProbe:
+      enabled: true
+      path: /health
+      port: http
+      failureThreshold: 30
+      periodSeconds: 10
+    replicaCount: 1
+    externalSecrets:
+      refreshInterval: "5m"
+      parameterStore:
+        name: "secret-store-parameter-store"
+        kind: ClusterSecretStore
+      secretName: ~
+      secretRegex: ~
+    podMonitor:
+      enabled: false
+      path: /metrics
+      # Sometimes applications serve metrics on a different port,
+      # which makes it easier to prevent metrics from accidentally
+      # being publicly available.
+      portName: metrics
+      containerPort: 8081
+    serviceAccount:
+      create: true
+      annotations: {}
+      automountServiceAccountToken: false
+    service:
+      enabled: true
+      port: 8000
+    args: []
+    command: []
+    env: []
+    securityContext: {}
+    #   # The most practical security settings are
+    #   # dropping all linux capabilities and
+    #   # running as non-root.
+    #   capabilities:
+    #     drop:
+    #     - ALL
+    #   runAsNonRoot: true
+    #   # Read only file system is better if the application
+    #   # can tolerate it.
+    #   # readOnlyRootFilesystem: true
+    nodeSelector: {}
+    tolerations:
+      - key: "tembo.io/gpu"
+        operator: "Equal"
+        value: "true"
+        effect: "NoSchedule"
+    affinity: {}
+    podAnnotations: {}
+    podSecurityContext: {}
+    persistence:
+      enabled: true
+      size: 100Gi
+      storageClass: ""
+      accessMode: ReadWriteOnce
+      mountPath: /root/.cache/
+  # Define individual inference services here
+  services: {}
+    # service1:
+    #   enabled: true
diff --git a/charts/tembo-operator/templates/crd.yaml b/charts/tembo-operator/templates/crd.yaml
index 11e3081f8..ef55e284d 100644
--- a/charts/tembo-operator/templates/crd.yaml
+++ b/charts/tembo-operator/templates/crd.yaml
@@ -2246,7 +2246,7 @@ spec:
                     description: |-
                       Configure the load balancer to be public or private.
 
-                      **Default**: false.
+                      **Default**: true.
                     type: boolean
                   serviceType:
                     default: LoadBalancer
diff --git a/conductor/src/main.rs b/conductor/src/main.rs
index 031b4023b..b4af1ed5b 100644
--- a/conductor/src/main.rs
+++ b/conductor/src/main.rs
@@ -85,6 +85,10 @@ async fn run(metrics: CustomMetrics) -> Result<(), ConductorError> {
         .unwrap_or_else(|_| "".to_owned())
         .parse()
         .expect("error parsing GCP_PROJECT_NUMBER");
+    let is_loadbalancer_public: bool = env::var("IS_LOADBALANCER_PUBLIC")
+        .unwrap_or_else(|_| "true".to_owned())
+        .parse()
+        .expect("error parsing IS_LOADBALANCER_PUBLIC");
 
     // Error and exit if CF_TEMPLATE_BUCKET is not set when IS_CLOUD_FORMATION is enabled
     if is_cloud_formation && cf_template_bucket.is_empty() {
@@ -279,6 +283,7 @@ async fn run(metrics: CustomMetrics) -> Result<(), ConductorError> {
                     &mut coredb_spec,
                     is_cloud_formation,
                     &client,
+                    is_loadbalancer_public,
                 )
                 .await
                 {
@@ -801,6 +806,7 @@ async fn init_cloud_perms(
     coredb_spec: &mut CoreDBSpec,
     is_cloud_formation: bool,
     _client: &Client,
+    is_loadbalancer_public: bool,
 ) -> Result<(), ConductorError> {
     if !is_cloud_formation {
         return Ok(());
@@ -860,6 +866,12 @@ async fn init_cloud_perms(
     coredb_spec.backup = backup;
     coredb_spec.serviceAccountTemplate = service_account_template;
 
+    if is_loadbalancer_public {
+        if let Some(ref mut dedicated_networking) = coredb_spec.dedicated_networking {
+            dedicated_networking.public = true;
+        }
+    }
+
     Ok(())
 }
 
diff --git a/inference-gateway/.sqlx/query-be21197914e8a3778e818a1fca0080e9e700ba084f945fe20fda7b28e5a8f6af.json b/inference-gateway/.sqlx/query-be21197914e8a3778e818a1fca0080e9e700ba084f945fe20fda7b28e5a8f6af.json
new file mode 100644
index 000000000..3985b5d0a
--- /dev/null
+++ b/inference-gateway/.sqlx/query-be21197914e8a3778e818a1fca0080e9e700ba084f945fe20fda7b28e5a8f6af.json
@@ -0,0 +1,14 @@
+{
+  "db_name": "PostgreSQL",
+  "query": "UPDATE billing.reporter_watermark\n        SET last_reported_at = $1",
+  "describe": {
+    "columns": [],
+    "parameters": {
+      "Left": [
+        "Timestamptz"
+      ]
+    },
+    "nullable": []
+  },
+  "hash": "be21197914e8a3778e818a1fca0080e9e700ba084f945fe20fda7b28e5a8f6af"
+}
diff --git a/inference-gateway/Makefile b/inference-gateway/Makefile
index 9bb793f51..2570797fd 100644
--- a/inference-gateway/Makefile
+++ b/inference-gateway/Makefile
@@ -1,5 +1,5 @@
 DATABASE_URL:=postgresql://postgres:postgres@localhost:5432/postgres
-LLM_SERVICE_HOST_PORT=http://localhost:8000
+MODEL_SERVICE_PORT_MAP=facebook/opt-125m=http://localhost:8000
 RUST_LOG=debug
 SQLX_OFFLINE:=true
 ORG_AUTH_ENABLED:=false
@@ -18,7 +18,7 @@ check:
 	cargo sqlx prepare --check
 
 run:
-	LLM_SERVICE_HOST_PORT=${LLM_SERVICE_HOST_PORT} RUST_LOG=${RUST_LOG} ORG_AUTH_ENABLED=${ORG_AUTH_ENABLED} cargo run
+	MODEL_SERVICE_PORT_MAP=${MODEL_SERVICE_PORT_MAP} RUST_LOG=${RUST_LOG} ORG_AUTH_ENABLED=${ORG_AUTH_ENABLED} cargo run
 
 run-migrations:
 	sqlx migrate run --database-url ${DATABASE_URL}
@@ -30,9 +30,9 @@ run-mock-server:
 	docker compose up -d mock-server
 
 unit-test:
-	cargo test
+	cargo test -- --test-threads=1
 
 integration-test: run-mock-server
-	RUST_LOG=${RUST_LOG} LLM_SERVICE_HOST_PORT=${LLM_SERVICE_HOST_PORT} cargo test ${TEST_NAME} -- --ignored --nocapture
+	RUST_LOG=${RUST_LOG} MODEL_SERVICE_PORT_MAP=${MODEL_SERVICE_PORT_MAP} cargo test ${TEST_NAME} -- --ignored --nocapture --test-threads=1
 
 test-all: unit-test integration-test
\ No newline at end of file
diff --git a/inference-gateway/docker-compose.yml b/inference-gateway/docker-compose.yml
index 2e5c5d44f..3e3c54387 100644
--- a/inference-gateway/docker-compose.yml
+++ b/inference-gateway/docker-compose.yml
@@ -15,7 +15,7 @@ services:
     environment:
       - RUST_LOG=info
       - DATABASE_URL=postgresql://postgres:postgres@postgres:5432/postgres
-      - LLM_SERVICE_HOST_PORT=${LLM_SERVICE_HOST_PORT}
+      - MODEL_SERVICE_PORT_MAP=${MODEL_SERVICE_PORT_MAP}
     ports:
       - 8080:8080
   vllm:
diff --git a/inference-gateway/docker/inference/Dockerfile b/inference-gateway/docker/inference/Dockerfile
index 082c15da9..8559cdd50 100644
--- a/inference-gateway/docker/inference/Dockerfile
+++ b/inference-gateway/docker/inference/Dockerfile
@@ -1,3 +1,3 @@
-FROM  vllm/vllm-openai:v0.6.2
+FROM  vllm/vllm-openai:v0.6.3
 
 ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
diff --git a/inference-gateway/migrations/20241010050949_add-watermark-default-value.sql b/inference-gateway/migrations/20241010050949_add-watermark-default-value.sql
new file mode 100644
index 000000000..f7c8bac50
--- /dev/null
+++ b/inference-gateway/migrations/20241010050949_add-watermark-default-value.sql
@@ -0,0 +1,11 @@
+ALTER TABLE billing.reporter_watermark 
+ALTER COLUMN last_reported_at SET DATA TYPE TIMESTAMP WITH TIME ZONE;
+
+ALTER TABLE billing.reporter_watermark 
+ALTER COLUMN last_reported_at SET DEFAULT '1970-01-01';
+
+ALTER TABLE billing.reporter_watermark 
+ALTER COLUMN last_reported_at SET NOT NULL;
+
+INSERT INTO billing.reporter_watermark (last_reported_at)
+VALUES (DEFAULT);
\ No newline at end of file
diff --git a/inference-gateway/src/config.rs b/inference-gateway/src/config.rs
index 623308448..2148a50c8 100644
--- a/inference-gateway/src/config.rs
+++ b/inference-gateway/src/config.rs
@@ -1,13 +1,15 @@
+use std::collections::HashMap;
 use std::env;
 
 use url::Url;
 
+use crate::errors::PlatformError;
+
 #[derive(Clone, Debug)]
 pub struct Config {
-    /// service and port of the inference service
-    /// Must be an OpenAI compatible interface
-    pub llm_service_host_port: Url,
-    /// Postgres connection string to the timeseries databse which logs token usage
+    pub model_rewrites: HashMap<String, String>,
+    pub model_service_map: HashMap<String, Url>,
+    /// Postgres connection string to the timeseries database which logs token usage
     pub pg_conn_str: String,
     /// Postgres connection string for the Control Plane queue
     pub billing_queue_conn_str: String,
@@ -26,7 +28,8 @@ pub struct Config {
 impl Config {
     pub async fn new() -> Self {
         Self {
-            llm_service_host_port: parse_llm_service(),
+            model_rewrites: parse_model_rewrite(),
+            model_service_map: parse_model_service_port_map(),
             pg_conn_str: from_env_default(
                 "DATABASE_URL",
                 "postgresql://postgres:postgres@0.0.0.0:5432/postgres",
@@ -62,7 +65,190 @@ fn from_env_default(key: &str, default: &str) -> String {
     env::var(key).unwrap_or_else(|_| default.to_owned())
 }
 
-fn parse_llm_service() -> Url {
-    let value = from_env_default("LLM_SERVICE_HOST_PORT", "http://vllm:8000");
-    Url::parse(&value).unwrap_or_else(|_| panic!("malformed LLM_SERVICE_HOST_PORT: {value}"))
+/// MODEL_NAME_SERVICE_PORT_MAP -- a comma separate list of model names and the host:port they are served at
+/// <model-name>=<host>:<port>,<model-name>=<host>:<port>
+/// e.g. meta-llama/Meta-Llama-3-8B-Instruct=llama-3-8b-instruct:8000,meta-llama/Llama-3.1-8B-Instruct=llama-3-1-8b-instruct:8000,
+/// Must be an OpenAI compatible interface
+fn parse_model_service_port_map() -> HashMap<String, Url> {
+    let model_mappings_values = from_env_default(
+        "MODEL_SERVICE_PORT_MAP",
+        "facebook/opt-125m=http://vllm:8000",
+    );
+
+    // Initialize an empty HashMap to store model-service-port mappings
+    let mut model_map: HashMap<String, Url> = HashMap::new();
+
+    // Split the environment variable value by semicolon to get individual mappings
+    for mapping in model_mappings_values.split(',') {
+        // Split each mapping into <model_name>=<service>:<port>
+        if let Some((model_name, service_port)) = mapping.split_once('=') {
+            let svc_port_url = Url::parse(service_port)
+                .unwrap_or_else(|_| panic!("malformed service: {service_port}"));
+            model_map.insert(model_name.to_string(), svc_port_url);
+        }
+    }
+    model_map
+}
+
+fn parse_model_rewrite() -> HashMap<String, String> {
+    let mut map = HashMap::new();
+
+    if let Ok(env_var) = env::var("MODEL_REWRITES") {
+        for pair in env_var.split(',') {
+            if let Some((key, value)) = pair.split_once(':') {
+                map.insert(key.to_string(), value.to_string());
+            }
+        }
+    }
+
+    map
+}
+
+#[derive(Debug)]
+pub struct MappedRequest {
+    // the mapped model name
+    pub model: String,
+    // url to the correct service for the model
+    pub base_url: Url,
+    // request body with updated model name
+    pub body: serde_json::Value,
+}
+
+pub fn rewrite_model_request(
+    mut body: serde_json::Value,
+    config: &Config,
+) -> Result<MappedRequest, PlatformError> {
+    // map the model, if there is a mapping for it
+    let target_model = if let Some(model) = body.get("model") {
+        let requested_model = model.as_str().ok_or_else(|| {
+            PlatformError::InvalidQuery("empty value in `model` parameter".to_string())
+        })?;
+
+        if let Some(rewritten_model) = config.model_rewrites.get(requested_model) {
+            body["model"] = serde_json::Value::String(rewritten_model.clone());
+            rewritten_model
+        } else {
+            requested_model
+        }
+    } else {
+        Err(PlatformError::InvalidQuery(
+            "missing `model` parameter in request body".to_string(),
+        ))?
+    };
+
+    let base_url = config
+        .model_service_map
+        .get(target_model)
+        .ok_or_else(|| PlatformError::InvalidQuery(format!("model {} not found", target_model)))?
+        .clone();
+
+    Ok(MappedRequest {
+        model: target_model.to_string(),
+        base_url,
+        body,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::env;
+
+    #[tokio::test]
+    async fn test_rewrite() {
+        env::set_var("MODEL_REWRITES", "cat:dog,old:young");
+        env::set_var(
+            "MODEL_SERVICE_PORT_MAP",
+            "dog=http://dog:8000/,young=http://young:8000/",
+        );
+
+        let cfg = Config::new().await;
+        let body = serde_json::json!({
+            "model": "cat",
+            "key": "value"
+        });
+
+        let rewritten = rewrite_model_request(body.clone(), &cfg).unwrap();
+        assert_eq!(rewritten.model, "dog");
+        assert_eq!(rewritten.base_url.to_string(), "http://dog:8000/");
+        assert_eq!(rewritten.body.get("key").unwrap(), "value");
+
+        let body = serde_json::json!({
+            "model": "old",
+            "key": "value2"
+        });
+
+        let rewritten = rewrite_model_request(body.clone(), &cfg).unwrap();
+        assert_eq!(rewritten.model, "young");
+        assert_eq!(rewritten.base_url.to_string(), "http://young:8000/");
+        assert_eq!(rewritten.body.get("key").unwrap(), "value2");
+    }
+
+    #[test]
+    fn test_valid_env_var() {
+        env::set_var("MODEL_REWRITES", "cat:dog,old:young");
+        let result = parse_model_rewrite();
+
+        let mut expected = HashMap::new();
+        expected.insert("cat".to_string(), "dog".to_string());
+        expected.insert("old".to_string(), "young".to_string());
+
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_empty_env_var() {
+        env::set_var("MODEL_REWRITES", "");
+        let result = parse_model_rewrite();
+        assert!(result.is_empty());
+    }
+
+    #[test]
+    fn test_invalid_format() {
+        env::set_var("MODEL_REWRITES", "cat:dog,invalidpair,old:young");
+        let result = parse_model_rewrite();
+
+        let mut expected = HashMap::new();
+        expected.insert("cat".to_string(), "dog".to_string());
+        expected.insert("old".to_string(), "young".to_string());
+
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_default_values() {
+        env::remove_var("MODEL_SERVICE_PORT_MAP");
+
+        let result = parse_model_service_port_map();
+        let mut expected = HashMap::new();
+        expected.insert(
+            "facebook/opt-125m".to_string(),
+            Url::parse("http://vllm:8000").unwrap(),
+        );
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_custom_mapping() {
+        env::set_var("MODEL_SERVICE_PORT_MAP", "meta-llama/Meta-Llama-3-8B-Instruct=http://tembo-ai-dev-llama-3-8b-instruct.svc.cluster.local:8000");
+
+        let result = parse_model_service_port_map();
+        let mut expected = HashMap::new();
+        expected.insert(
+            "meta-llama/Meta-Llama-3-8B-Instruct".to_string(),
+            Url::parse("http://tembo-ai-dev-llama-3-8b-instruct.svc.cluster.local:8000").unwrap(),
+        );
+
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    #[should_panic(expected = "malformed service: http://vllm:invalid_port")]
+    fn test_malformed_url() {
+        env::set_var(
+            "MODEL_SERVICE_PORT_MAP",
+            "facebook/opt-125m=http://vllm:invalid_port",
+        );
+        parse_model_service_port_map();
+    }
 }
diff --git a/inference-gateway/src/events_reporter.rs b/inference-gateway/src/events_reporter.rs
index cdd4da243..045bb42b4 100644
--- a/inference-gateway/src/events_reporter.rs
+++ b/inference-gateway/src/events_reporter.rs
@@ -91,6 +91,21 @@ async fn get_reporter_watermark(conn: &PgPool) -> Result<Option<ReporterWatermar
     .map_err(Into::into)
 }
 
+async fn save_reporter_watermark(
+    inference_pool: &Pool<Postgres>,
+    now: DateTime<Utc>,
+) -> Result<(), sqlx::Error> {
+    sqlx::query!(
+        "UPDATE billing.reporter_watermark
+        SET last_reported_at = $1",
+        now
+    )
+    .execute(inference_pool)
+    .await
+    .map_err(Into::into)
+    .map(|_| ())
+}
+
 fn start_of_the_hour(datetime: DateTime<Utc>) -> DateTime<Utc> {
     // Safe unwrap since, according to chrono docs, Utc will never have double mappings
     Utc.with_ymd_and_hms(
@@ -149,6 +164,9 @@ pub async fn run_events_reporter(pg_conn: String, billing_queue_conn: String) ->
         for (start_time, end_time) in chunks {
             enqueue_event(&inference_pool, &queue, BILLING_QUEUE, start_time, end_time).await?;
         }
+
+        // Save new reporter watermark
+        save_reporter_watermark(&inference_pool, now).await?;
     }
 }
 
diff --git a/inference-gateway/src/routes/forward.rs b/inference-gateway/src/routes/forward.rs
index 23873de4f..690050418 100644
--- a/inference-gateway/src/routes/forward.rs
+++ b/inference-gateway/src/routes/forward.rs
@@ -7,6 +7,7 @@ use std::sync::Arc;
 use tokio::sync::RwLock;
 
 use crate::authorization;
+use crate::config::rewrite_model_request;
 use crate::errors::{AuthError, PlatformError};
 
 pub async fn forward_request(
@@ -45,13 +46,19 @@ pub async fn forward_request(
         return Ok(HttpResponse::BadRequest().body("Embedding generation is not yet supported"));
     }
 
-    let mut new_url = config.llm_service_host_port.clone();
+    let rewrite_request = rewrite_model_request(body.clone(), &config)?;
+
+    let mut new_url = rewrite_request.base_url;
     new_url.set_path(path);
     new_url.set_query(req.uri().query());
 
     // log request duration
     let start = std::time::Instant::now();
-    let resp = client.post(new_url).json(&body).send().await?;
+    let resp = client
+        .post(new_url)
+        .json(&rewrite_request.body)
+        .send()
+        .await?;
     let duration = start.elapsed().as_millis() as i32;
     if resp.status().is_success() {
         let llm_resp = resp.json::<serde_json::Value>().await?;
diff --git a/inference-gateway/tests/integration_test.rs b/inference-gateway/tests/integration_test.rs
index faba3f397..b7191e593 100644
--- a/inference-gateway/tests/integration_test.rs
+++ b/inference-gateway/tests/integration_test.rs
@@ -136,3 +136,59 @@ async fn test_authorization() {
     println!("{:?}", resp);
     assert!(resp.status().is_success());
 }
+
+#[ignore]
+#[actix_web::test]
+async fn test_unavailable_model() {
+    let app = common::get_test_app(false).await;
+
+    let mut rng = rand::thread_rng();
+    let rnd = rng.gen_range(0..100000);
+    let instance = format!("MY-TEST-INSTANCE-{}", rnd);
+    let model = "random/not-a-real-model";
+    let payload = serde_json::json!({
+        "model": model,
+        "messages": [{"role": "user", "content": "the quick brown fox..."}]
+    });
+    let req = test::TestRequest::post()
+        .uri("/v1/chat/completions")
+        .insert_header(("X-TEMBO-ORG", "MY-TEST-ORG"))
+        .insert_header(("X-TEMBO-INSTANCE", instance.clone()))
+        .insert_header((header::CONTENT_TYPE, "application/json"))
+        .set_payload(payload.to_string())
+        .to_request();
+
+    let resp = test::call_service(&app, req).await;
+    assert!(resp.status().is_client_error());
+}
+
+#[ignore]
+#[actix_web::test]
+async fn test_model_rewrite() {
+    let model = "facebook/davinci";
+    std::env::set_var("MODEL_REWRITES", format!("{model}:facebook/opt-125m"));
+
+    let app = common::get_test_app(false).await;
+
+    let mut rng = rand::thread_rng();
+    let rnd = rng.gen_range(0..100000);
+    let instance = format!("MY-TEST-INSTANCE-{}", rnd);
+    let payload = serde_json::json!({
+        "model": model,
+        "messages": [{"role": "user", "content": "the quick brown fox..."}]
+    });
+    let req = test::TestRequest::post()
+        .uri("/v1/chat/completions")
+        .insert_header(("X-TEMBO-ORG", "MY-TEST-ORG"))
+        .insert_header(("X-TEMBO-INSTANCE", instance.clone()))
+        .insert_header((header::CONTENT_TYPE, "application/json"))
+        .set_payload(payload.to_string())
+        .to_request();
+
+    let resp = test::call_service(&app, req).await;
+    assert!(resp.status().is_success());
+
+    let body: serde_json::Value = test::read_body_json(resp).await;
+    let return_model = body.get("model").unwrap().as_str().unwrap();
+    assert_eq!(return_model, "facebook/opt-125m");
+}
diff --git a/tembo-cli/Cargo.lock b/tembo-cli/Cargo.lock
index ee6200100..b76e049c9 100644
--- a/tembo-cli/Cargo.lock
+++ b/tembo-cli/Cargo.lock
@@ -4342,7 +4342,7 @@ dependencies = [
 
 [[package]]
 name = "tembo-cli"
-version = "0.20.7"
+version = "0.20.8"
 dependencies = [
  "actix-cors",
  "actix-service",
diff --git a/tembo-cli/Cargo.toml b/tembo-cli/Cargo.toml
index 83b3a256b..27aabbe05 100644
--- a/tembo-cli/Cargo.toml
+++ b/tembo-cli/Cargo.toml
@@ -1,7 +1,7 @@
 workspace = { members = ["temboclient", "tembodataclient"] }
 [package]
 name = "tembo-cli"
-version = "0.20.7"
+version = "0.20.8"
 edition = "2021"
 authors = ["Tembo.io"]
 description = "The CLI for Tembo"
diff --git a/tembo-operator/Cargo.lock b/tembo-operator/Cargo.lock
index 587d546e4..576ca6dd5 100644
--- a/tembo-operator/Cargo.lock
+++ b/tembo-operator/Cargo.lock
@@ -503,7 +503,7 @@ dependencies = [
 
 [[package]]
 name = "controller"
-version = "0.50.1"
+version = "0.50.2"
 dependencies = [
  "actix-web",
  "anyhow",
diff --git a/tembo-operator/Cargo.toml b/tembo-operator/Cargo.toml
index dfc964b84..55c2a291c 100644
--- a/tembo-operator/Cargo.toml
+++ b/tembo-operator/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "controller"
 description = "Tembo Operator for Postgres"
-version = "0.50.1"
+version = "0.50.2"
 edition = "2021"
 default-run = "controller"
 license = "Apache-2.0"
diff --git a/tembo-operator/src/apis/coredb_types.rs b/tembo-operator/src/apis/coredb_types.rs
index 3445c40c2..12d87bda0 100644
--- a/tembo-operator/src/apis/coredb_types.rs
+++ b/tembo-operator/src/apis/coredb_types.rs
@@ -506,7 +506,7 @@ pub struct DedicatedNetworking {
 
     /// Configure the load balancer to be public or private.
     ///
-    /// **Default**: false.
+    /// **Default**: true.
     #[serde(default)]
     pub public: bool,
 
diff --git a/tembo-operator/src/dedicated_networking.rs b/tembo-operator/src/dedicated_networking.rs
index ad6653ac1..143ac1e63 100644
--- a/tembo-operator/src/dedicated_networking.rs
+++ b/tembo-operator/src/dedicated_networking.rs
@@ -399,12 +399,6 @@ async fn reconcile_dedicated_networking_service(
         "cnpg.io/cluster".to_string(),
         serde_json::Value::String(cdb_name.to_string()),
     );
-    if is_public {
-        labels.insert(
-            "public".to_string(),
-            serde_json::Value::String("true".to_string()),
-        );
-    }
 
     let mut service_spec = serde_json::Map::new();
     service_spec.insert(
@@ -427,8 +421,23 @@ async fn reconcile_dedicated_networking_service(
     service_spec.insert("type".to_string(), json!(service_type));
     let ip_allow_list = cdb.spec.ip_allow_list.clone().unwrap_or_else(|| vec![]);
 
-    if service_type == "LoadBalancer" && !ip_allow_list.is_empty() {
-        service_spec.insert("loadBalancerSourceRanges".to_string(), json!(ip_allow_list));
+    // Allow ip_allow_list to allow all entries are in CIDR notation
+    let ip_allow_list_cidr: Vec<String> = ip_allow_list
+        .iter()
+        .map(|ip| {
+            if ip.contains('/') {
+                ip.clone()
+            } else {
+                format!("{}/32", ip)
+            }
+        })
+        .collect();
+
+    if service_type == "LoadBalancer" && !ip_allow_list_cidr.is_empty() {
+        service_spec.insert(
+            "loadBalancerSourceRanges".to_string(),
+            json!(ip_allow_list_cidr),
+        );
     }
 
     let service = json!({
diff --git a/tembo-operator/tests/integration_tests.rs b/tembo-operator/tests/integration_tests.rs
index 14e7381ad..53f00a9d0 100644
--- a/tembo-operator/tests/integration_tests.rs
+++ b/tembo-operator/tests/integration_tests.rs
@@ -2153,16 +2153,6 @@ mod test {
             service.spec.as_ref().unwrap().type_,
             Some("LoadBalancer".to_string())
         );
-        assert_eq!(
-            service
-                .metadata
-                .labels
-                .as_ref()
-                .expect("Labels should be present")
-                .get("public")
-                .expect("Public label should be present"),
-            "true"
-        );
 
         let annotations = service
             .metadata
@@ -2226,16 +2216,6 @@ mod test {
             service.spec.as_ref().unwrap().type_,
             Some("LoadBalancer".to_string())
         );
-        assert_eq!(
-            service
-                .metadata
-                .labels
-                .as_ref()
-                .expect("Labels should be present")
-                .get("public")
-                .expect("Public label should be present"),
-            "true"
-        );
 
         let annotations = service
             .metadata
diff --git a/tembo-stacks/Cargo.lock b/tembo-stacks/Cargo.lock
index 78186524d..18a03604d 100644
--- a/tembo-stacks/Cargo.lock
+++ b/tembo-stacks/Cargo.lock
@@ -2471,7 +2471,7 @@ dependencies = [
 
 [[package]]
 name = "tembo-stacks"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
  "anyhow",
  "clap",
diff --git a/tembo-stacks/Cargo.toml b/tembo-stacks/Cargo.toml
index 4824513c9..3bd0d6eb7 100644
--- a/tembo-stacks/Cargo.toml
+++ b/tembo-stacks/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tembo-stacks"
 description = "Tembo Stacks for Postgres"
-version = "0.17.0"
+version = "0.17.1"
 authors = ["tembo.io"]
 edition = "2021"
 license = "Apache-2.0"
diff --git a/tembo-stacks/src/apps/embeddings.yaml b/tembo-stacks/src/apps/embeddings.yaml
index 401f6dd18..71cb4e1ee 100644
--- a/tembo-stacks/src/apps/embeddings.yaml
+++ b/tembo-stacks/src/apps/embeddings.yaml
@@ -1,6 +1,6 @@
 name: !embeddings
 appServices:
-  - image: 387894460527.dkr.ecr.us-east-1.amazonaws.com/tembo-io/vector-serve:6397964
+  - image: 387894460527.dkr.ecr.us-east-1.amazonaws.com/tembo-io/vector-serve:0e8078d
     name: embeddings
     metrics:
       path: /metrics
diff --git a/tembo-stacks/src/stacks/specs/rag.yaml b/tembo-stacks/src/stacks/specs/rag.yaml
index 86891795f..7ab85824f 100644
--- a/tembo-stacks/src/stacks/specs/rag.yaml
+++ b/tembo-stacks/src/stacks/specs/rag.yaml
@@ -30,7 +30,7 @@ appServices:
       volumes:
       - emptyDir: {}
         name: empty-dir
-  - image: 387894460527.dkr.ecr.us-east-1.amazonaws.com/tembo-io/vector-serve:6397964
+  - image: 387894460527.dkr.ecr.us-east-1.amazonaws.com/tembo-io/vector-serve:0e8078d
     name: embeddings
     metrics:
       path: /metrics
diff --git a/tembo-stacks/src/stacks/specs/vectordb.yaml b/tembo-stacks/src/stacks/specs/vectordb.yaml
index c66cbef48..aea7b21b9 100644
--- a/tembo-stacks/src/stacks/specs/vectordb.yaml
+++ b/tembo-stacks/src/stacks/specs/vectordb.yaml
@@ -8,7 +8,7 @@ images:
   16: "standard-cnpg:16-5120dd1"
 stack_version: 0.1.0
 appServices:
-  - image: 387894460527.dkr.ecr.us-east-1.amazonaws.com/tembo-io/vector-serve:6397964
+  - image: 387894460527.dkr.ecr.us-east-1.amazonaws.com/tembo-io/vector-serve:0e8078d
     name: embeddings
     metrics:
       path: /metrics