tembo ai model source (#123)

* makes Tembo a viable model source target * allows user to configure OpenAI base url * changes project defaults from pg15 to pg16
tembo-io · Jun 6, 2024 · 1012741 · 1012741
1 parent 6397964
commit 1012741
Show file tree

Hide file tree

Showing 20 changed files with 332 additions and 114 deletions.
diff --git a/.github/workflows/build-ollama-serve.yml b/.github/workflows/build-ollama-serve.yml
@@ -24,7 +24,7 @@ defaults:
   run:
     shell: bash
     working-directory: ./ollama-serve/
-  
+
 jobs:
   build_and_push:
     name: Build and push images
@@ -43,7 +43,7 @@ jobs:
           echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
-  
+
       - name: Set up QEMU
         uses: docker/setup-qemu-action@v3
       - name: Set up Docker Buildx

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -4,8 +4,8 @@ on:
     branches:
       - main
     paths:
-      - 'docs/**'
-      - '.github/workflows/docs.yml'
+      - "docs/**"
+      - ".github/workflows/docs.yml"
 jobs:
   deploy:
     runs-on: ubuntu-latest
@@ -19,5 +19,5 @@ jobs:
 
       - name: deploy docs
         env:
-            GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: poetry run mkdocs gh-deploy --force
diff --git a/.github/workflows/extension_ci.yml b/.github/workflows/extension_ci.yml
@@ -10,13 +10,13 @@ on:
     branches:
       - main
     paths-ignore:
-      - 'README.md'
+      - "README.md"
       - docs/**
   push:
     branches:
       - main
     paths-ignore:
-      - 'README.md'
+      - "README.md"
       - docs/**
 
   release:
@@ -39,12 +39,12 @@ jobs:
       - name: Install stoml and pg-trunk
         shell: bash
         run: |
-         set -xe
-         wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
-         mv stoml_linux_amd64 stoml
-         chmod +x stoml
-         sudo mv stoml /usr/local/bin/
-         cargo install pg-trunk
+          set -xe
+          wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
+          mv stoml_linux_amd64 stoml
+          chmod +x stoml
+          sudo mv stoml /usr/local/bin/
+          cargo install pg-trunk
 
       - name: Cache binaries
         uses: actions/cache@v2
@@ -120,11 +120,9 @@ jobs:
             ${{ runner.os }}-bins-
       - name: setup-tests
         run: |
-          pgrx15_config=$(/usr/local/bin/stoml ~/.pgrx/config.toml configs.pg15)
-          ~/.cargo/bin/trunk install pgvector --pg-config ${pgrx15_config}
-          ~/.cargo/bin/trunk install pgmq --pg-config ${pgrx15_config}
-          ~/.cargo/bin/trunk install pg_cron --pg-config ${pgrx15_config}
-          echo "shared_preload_libraries = 'pg_cron, vectorize'" >> ~/.pgrx/data-15/postgresql.conf
+          make trunk-dependencies
+          make setup.urls
+          make setup.shared_preload_libraries
           rm -rf ./target/pgrx-test-data-* || true
       - name: unit-test
         run: |
@@ -133,8 +131,6 @@ jobs:
         env:
           HF_API_KEY: ${{ secrets.HF_API_KEY }}
         run: |
-          pgrx15_config=$(/usr/local/bin/stoml ~/.pgrx/config.toml configs.pg15)
-          pg_version=$(/usr/local/bin/stoml Cargo.toml features.default)
           echo "\q" | make run
           make test-integration
 
@@ -160,12 +156,12 @@ jobs:
       - name: Install stoml and pg-trunk
         shell: bash
         run: |
-         set -xe
-         wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
-         mv stoml_linux_amd64 stoml
-         chmod +x stoml
-         sudo mv stoml /usr/local/bin/
-         cargo install pg-trunk
+          set -xe
+          wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
+          mv stoml_linux_amd64 stoml
+          chmod +x stoml
+          sudo mv stoml /usr/local/bin/
+          cargo install pg-trunk
       - name: trunk build
         working-directory: ./extension
         # trunk does not support path dependencies in Cargo.toml that are not within the current working directory
@@ -184,7 +180,7 @@ jobs:
     needs:
       - publish
     env:
-      TARGET_PLATFORMS: linux/amd64,linux/arm64    
+      TARGET_PLATFORMS: linux/amd64,linux/arm64
     runs-on:
       - self-hosted
       - dind
@@ -202,11 +198,11 @@ jobs:
       - name: Install stoml and pg-trunk
         shell: bash
         run: |
-         set -xe
-         wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
-         mv stoml_linux_amd64 stoml
-         chmod +x stoml
-         sudo mv stoml /usr/local/bin/
+          set -xe
+          wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
+          mv stoml_linux_amd64 stoml
+          chmod +x stoml
+          sudo mv stoml /usr/local/bin/
       - name: Set version strings
         id: versions
         run: |

diff --git a/core/src/transformers/openai.rs b/core/src/transformers/openai.rs
@@ -6,7 +6,7 @@ use crate::types::{JobParams, VectorizeMeta};
 // max token length is 8192
 // however, depending on content of text, token count can be higher than
 pub const MAX_TOKEN_LEN: usize = 8192;
-pub const OPENAI_EMBEDDING_URL: &str = "https://api.openai.com/v1/embeddings";
+pub const OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
 
 pub fn prepare_openai_request(
     vect_meta: VectorizeMeta,
@@ -30,7 +30,7 @@ pub fn prepare_openai_request(
         },
     };
     Ok(EmbeddingRequest {
-        url: OPENAI_EMBEDDING_URL.to_owned(),
+        url: format!("{OPENAI_BASE_URL}/embeddings"),
         payload,
         api_key: Some(apikey),
     })

diff --git a/core/src/types.rs b/core/src/types.rs
@@ -164,7 +164,10 @@ pub enum ModelError {
 impl Model {
     pub fn new(input: &str) -> Result<Self, ModelError> {
         let mut parts: Vec<&str> = input.split('/').collect();
-        let missing_source = parts.len() != 2;
+        let missing_source = parts.len() < 2;
+        if parts.len() > 3 {
+            return Err(ModelError::InvalidFormat(input.to_string()));
+        }
         if missing_source && parts[0] == "text-embedding-ada-002" {
             // for backwards compatibility, prepend "openai" to text-embedding-ada-2
             parts.insert(0, "openai");
@@ -178,10 +181,22 @@ impl Model {
             .parse::<ModelSource>()
             .map_err(|_| ModelError::InvalidSource(parts[0].to_string()))?;
 
+        let name = if source == ModelSource::Tembo {
+            // removes the leading /tembo from the model name
+            parts.remove(0);
+            // all others remain the same
+            parts.join("/")
+        } else {
+            parts
+                .last()
+                .expect("expected non-empty model name")
+                .to_string()
+        };
+
         Ok(Self {
             source,
-            fullname: format!("{}/{}", parts[0], parts[1]),
-            name: parts[1].to_string(),
+            fullname: parts.join("/"),
+            name,
         })
     }
 }
@@ -199,6 +214,7 @@ pub enum ModelSource {
     OpenAI,
     SentenceTransformers,
     Ollama,
+    Tembo,
 }
 
 impl FromStr for ModelSource {
@@ -209,6 +225,7 @@ impl FromStr for ModelSource {
             "ollama" => Ok(ModelSource::Ollama),
             "openai" => Ok(ModelSource::OpenAI),
             "sentence-transformers" => Ok(ModelSource::SentenceTransformers),
+            "tembo" => Ok(ModelSource::Tembo),
             _ => Ok(ModelSource::SentenceTransformers),
         }
     }
@@ -220,6 +237,7 @@ impl Display for ModelSource {
             ModelSource::Ollama => write!(f, "ollama"),
             ModelSource::OpenAI => write!(f, "openai"),
             ModelSource::SentenceTransformers => write!(f, "sentence-transformers"),
+            ModelSource::Tembo => write!(f, "tembo"),
         }
     }
 }
@@ -230,6 +248,7 @@ impl From<String> for ModelSource {
             "ollama" => ModelSource::Ollama,
             "openai" => ModelSource::OpenAI,
             "sentence-transformers" => ModelSource::SentenceTransformers,
+            "tembo" => ModelSource::Tembo,
             // other cases are assumed to be private sentence-transformer compatible model
             // and can be hot-loaded
             _ => ModelSource::SentenceTransformers,
@@ -242,10 +261,19 @@ impl From<String> for ModelSource {
 mod model_tests {
     use super::*;
 
+    #[test]
+    fn test_tembo_parsing() {
+        let model = Model::new("tembo/meta-llama/Meta-Llama-3-8B-Instruct").unwrap();
+        assert_eq!(model.source, ModelSource::Tembo);
+        assert_eq!(model.fullname, "meta-llama/Meta-Llama-3-8B-Instruct");
+        assert_eq!(model.name, "meta-llama/Meta-Llama-3-8B-Instruct");
+    }
+
     #[test]
     fn test_ollama_parsing() {
         let model = Model::new("ollama/wizardlm2:7b").unwrap();
         assert_eq!(model.source, ModelSource::Ollama);
+        assert_eq!(model.fullname, "ollama/wizardlm2:7b");
         assert_eq!(model.name, "wizardlm2:7b");
     }
 
@@ -295,11 +323,6 @@ mod model_tests {
         assert!(Model::new("openaimodel-name").is_err());
     }
 
-    #[test]
-    fn test_invalid_format_extra_slash() {
-        assert!(Model::new("openai/model/name").is_err());
-    }
-
     #[test]
     fn test_backwards_compatibility() {
         let model = Model::new("text-embedding-ada-002").unwrap();

diff --git a/core/src/worker/base.rs b/core/src/worker/base.rs
@@ -79,7 +79,7 @@ impl Config {
 }
 
 /// source a variable from environment - use default if not exists
-fn from_env_default(key: &str, default: &str) -> String {
+pub fn from_env_default(key: &str, default: &str) -> String {
     env::var(key).unwrap_or_else(|_| default.to_owned())
 }
 
@@ -98,7 +98,9 @@ async fn execute_job(
             &msg.message.inputs,
             cfg.openai_api_key.clone(),
         )?,
-        ModelSource::Ollama => Err(anyhow::anyhow!("Ollama transformer not implemented yet"))?,
+        ModelSource::Ollama | &ModelSource::Tembo => Err(anyhow::anyhow!(
+            "Ollama/Tembo transformer not implemented yet"
+        ))?,
         ModelSource::SentenceTransformers => generic::prepare_generic_embedding_request(
             job_meta.clone(),
             &msg.message.inputs,

diff --git a/extension/Cargo.toml b/extension/Cargo.toml
@@ -1,14 +1,14 @@
 [package]
 name = "vectorize"
-version = "0.15.1"
+version = "0.16.0"
 edition = "2021"
 publish = false
 
 [lib]
 crate-type = ["cdylib"]
 
 [features]
-default = ["pg15"]
+default = ["pg16"]
 pg14 = ["pgrx/pg14", "pgrx-tests/pg14"]
 pg15 = ["pgrx/pg15", "pgrx-tests/pg15"]
 pg16 = ["pgrx/pg16", "pgrx-tests/pg16"]

diff --git a/extension/Makefile b/extension/Makefile
@@ -1,10 +1,10 @@
 SQLX_OFFLINE:=true
-DATABASE_URL:=postgres://${USER}:${USER}@localhost:28815/postgres
 DISTNAME = $(shell grep -m 1 '^name' Trunk.toml | sed -e 's/[^"]*"\([^"]*\)",\{0,1\}/\1/')
 DISTVERSION  = $(shell grep -m 1 '^version' Trunk.toml | sed -e 's/[^"]*"\([^"]*\)",\{0,1\}/\1/')
-PG_VERSION:=15
+PG_VERSION:=16
+DATABASE_URL:=postgres://${USER}:${USER}@localhost:288${PG_VERSION}/postgres
 PGRX_PG_CONFIG =$(shell cargo pgrx info pg-config pg${PG_VERSION})
-UPGRADE_FROM_VER:=0.9.0
+UPGRADE_FROM_VER:=0.16.0
 BRANCH:=$(git rev-parse --abbrev-ref HEAD)
 RUST_LOG:=debug
 
@@ -40,11 +40,17 @@ setup.shared_preload_libraries:
 setup.urls:
 	echo "vectorize.embedding_service_url = 'http://localhost:3000/v1/embeddings'" >> ~/.pgrx/data-${PG_VERSION}/postgresql.conf
 	echo "vectorize.ollama_service_url = 'http://localhost:3001'" >> ~/.pgrx/data-${PG_VERSION}/postgresql.conf
-setup: setup.dependencies setup.shared_preload_libraries
+setup: setup.dependencies setup.shared_preload_libraries setup.urls
 
 cat-logs:
 	cat ~/.pgrx/${PG_VERSION}.log
 
+# install dependencies from trunk (limited OS/arch)
+trunk-dependencies:
+	trunk install pgvector --pg-config ${PGRX_PG_CONFIG}
+	trunk install pgmq --pg-config ${PGRX_PG_CONFIG}
+	trunk install pg_cron --pg-config ${PGRX_PG_CONFIG}
+
 install-pg_cron:
 	git clone https://github.com/citusdata/pg_cron.git && \
 	cd pg_cron && \
@@ -65,7 +71,9 @@ install-pgvector:
 install-pgmq:
 	git clone https://github.com/tembo-io/pgmq.git && \
 	cd pgmq && \
-	cargo pgrx install --pg-config=${PGRX_PG_CONFIG} && \
+	PG_CONFIG=${PGRX_PG_CONFIG} make clean && \
+	PG_CONFIG=${PGRX_PG_CONFIG} make && \
+	PG_CONFIG=${PGRX_PG_CONFIG} make install && \
 	cd .. && rm -rf pgmq
 
 test-integration:

diff --git a/extension/Trunk.toml b/extension/Trunk.toml
@@ -6,7 +6,7 @@ description = "The simplest way to orchestrate vector search on Postgres."
 homepage = "https://github.com/tembo-io/pg_vectorize"
 documentation = "https://github.com/tembo-io/pg_vectorize"
 categories = ["orchestration", "machine_learning"]
-version = "0.15.1"
+version = "0.16.0"
 
 [build]
 postgres_version = "15"

diff --git a/extension/sql/vectorize--0.15.1--0.16.0.sql b/extension/sql/vectorize--0.15.1--0.16.0.sql
@@ -0,0 +1,28 @@
+-- src/api.rs:158
+-- vectorize::api::generate
+CREATE  FUNCTION vectorize."generate"(
+	"input" TEXT, /* &str */
+	"model" TEXT DEFAULT 'openai/gpt-3.5-turbo', /* alloc::string::String */
+	"api_key" TEXT DEFAULT NULL /* core::option::Option<alloc::string::String> */
+) RETURNS TEXT /* core::result::Result<alloc::string::String, anyhow::Error> */
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'generate_wrapper';
+
+-- src/api.rs:168
+-- vectorize::api::env_interpolate_guc
+CREATE  FUNCTION vectorize."env_interpolate_guc"(
+	"guc_name" TEXT /* &str */
+) RETURNS TEXT /* core::result::Result<alloc::string::String, anyhow::Error> */
+STRICT
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'env_interpolate_guc_wrapper';
+
+-- src/api.rs:79
+-- vectorize::api::encode
+CREATE  FUNCTION vectorize."encode"(
+	"input" TEXT, /* &str */
+	"model" TEXT DEFAULT 'openai/text-embedding-ada-002', /* alloc::string::String */
+	"api_key" TEXT DEFAULT NULL /* core::option::Option<alloc::string::String> */
+) RETURNS double precision[] /* core::result::Result<alloc::vec::Vec<f64>, anyhow::Error> */
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'encode_wrapper';