From 8216e89e27c56be8fa67d7489c68e6976544fe4f Mon Sep 17 00:00:00 2001 From: Adam Hendel Date: Fri, 23 Aug 2024 09:16:21 -0500 Subject: [PATCH 1/2] change to tembo as default source --- core/src/transformers/providers/ollama.rs | 11 ++- docs/api/rag.md | 2 +- docs/api/search.md | 3 +- docs/api/utilities.md | 6 +- extension/sql/vectorize--0.17.1--0.18.0.sql | 89 +++++++++++++++++---- extension/src/api.rs | 15 ++-- extension/src/transformers/generic.rs | 17 ---- extension/src/transformers/http_handler.rs | 38 +++++---- 8 files changed, 117 insertions(+), 64 deletions(-) diff --git a/core/src/transformers/providers/ollama.rs b/core/src/transformers/providers/ollama.rs index 349508b..2d12713 100644 --- a/core/src/transformers/providers/ollama.rs +++ b/core/src/transformers/providers/ollama.rs @@ -56,11 +56,14 @@ impl EmbeddingProvider for OllamaProvider { } async fn model_dim(&self, model_name: &str) -> Result { - let dim = match model_name { - "llama2" => 5192, - _ => 1536, + // determine embedding dim by generating an embedding and getting length of array + let req = GenericEmbeddingRequest { + input: vec!["hello world".to_string()], + model: model_name.to_string(), }; - Ok(dim) + let embedding = self.generate_embedding(&req).await?; + let dim = embedding.embeddings[0].len(); + Ok(dim as u32) } } diff --git a/docs/api/rag.md b/docs/api/rag.md index 92e9a67..63231b8 100644 --- a/docs/api/rag.md +++ b/docs/api/rag.md @@ -15,7 +15,7 @@ vectorize.init_rag( "unique_record_id" TEXT, "column" TEXT, "schema" TEXT DEFAULT 'public', - "transformer" TEXT DEFAULT 'openai/text-embedding-ada-002', + "transformer" TEXT DEFAULT 'meta-llama/Meta-Llama-3-8B-Instruct', "index_dist_type" vectorize.IndexDist DEFAULT 'pgv_hnsw_cosine', "table_method" vectorize.TableMethod DEFAULT 'append' ) RETURNS TEXT diff --git a/docs/api/search.md b/docs/api/search.md index 757d93e..59c8a54 100644 --- a/docs/api/search.md +++ b/docs/api/search.md @@ -12,10 +12,9 @@ vectorize."table"( "columns" TEXT[], "job_name" TEXT, "primary_key" TEXT, - "args" json DEFAULT '{}', "schema" TEXT DEFAULT 'public', "update_col" TEXT DEFAULT 'last_updated_at', - "transformer" TEXT DEFAULT 'openai/text-embedding-ada-002', + "transformer" TEXT DEFAULT 'meta-llama/Meta-Llama-3-8B-Instruct', "index_dist_type" vectorize.IndexDist DEFAULT 'pgv_hnsw_cosine', "table_method" vectorize.TableMethod DEFAULT 'join', "schedule" TEXT DEFAULT '* * * * *' diff --git a/docs/api/utilities.md b/docs/api/utilities.md index d650fed..51ceb20 100644 --- a/docs/api/utilities.md +++ b/docs/api/utilities.md @@ -7,9 +7,9 @@ Transforms a block of text to embeddings using the specified transformer. Requires the `vector-serve` container to be set via `vectorize.embedding_service_url`, or an OpenAI key to be set if using OpenAI embedding models. ```sql -vectorize."transform_embeddings"( +vectorize."encode"( "input" TEXT, - "model_name" TEXT DEFAULT 'openai/text-embedding-ada-002', + "model_name" TEXT DEFAULT 'sentence-transformers/all-MiniLM-L6-v2', "api_key" TEXT DEFAULT NULL ) RETURNS double precision[] ``` @@ -25,7 +25,7 @@ vectorize."transform_embeddings"( ### Example ```sql -select vectorize.transform_embeddings( +select vectorize.encode( input => 'the quick brown fox jumped over the lazy dogs', model_name => 'sentence-transformers/multi-qa-MiniLM-L6-dot-v1' ); diff --git a/extension/sql/vectorize--0.17.1--0.18.0.sql b/extension/sql/vectorize--0.17.1--0.18.0.sql index 2e96aa5..b20268b 100644 --- a/extension/sql/vectorize--0.17.1--0.18.0.sql +++ b/extension/sql/vectorize--0.17.1--0.18.0.sql @@ -1,20 +1,81 @@ +DROP FUNCTION vectorize."transform_embeddings"; +CREATE FUNCTION vectorize."transform_embeddings"( + "input" TEXT, /* &str */ + "model_name" TEXT DEFAULT 'sentence-transformers/all-MiniLM-L6-v2', /* alloc::string::String */ + "api_key" TEXT DEFAULT NULL /* core::option::Option */ +) RETURNS double precision[] /* core::result::Result, anyhow::Error> */ +LANGUAGE c /* Rust */ +AS 'MODULE_PATHNAME', 'transform_embeddings_wrapper'; + + +DROP FUNCTION vectorize."rag"; +CREATE FUNCTION vectorize."rag"( + "agent_name" TEXT, /* &str */ + "query" TEXT, /* &str */ + "chat_model" TEXT DEFAULT 'tembo/meta-llama/Meta-Llama-3-8B-Instruct', /* alloc::string::String */ + "task" TEXT DEFAULT 'question_answer', /* alloc::string::String */ + "api_key" TEXT DEFAULT NULL, /* core::option::Option */ + "num_context" INT DEFAULT 2, /* i32 */ + "force_trim" bool DEFAULT false /* bool */ +) RETURNS TABLE ( + "chat_results" jsonb /* pgrx::datum::json::JsonB */ +) +LANGUAGE c /* Rust */ +AS 'MODULE_PATHNAME', 'rag_wrapper'; + + +DROP FUNCTION vectorize."generate"; +CREATE FUNCTION vectorize."generate"( + "input" TEXT, /* &str */ + "model" TEXT DEFAULT 'tembo/meta-llama/Meta-Llama-3-8B-Instruct', /* alloc::string::String */ + "api_key" TEXT DEFAULT NULL /* core::option::Option */ +) RETURNS TEXT /* core::result::Result */ +LANGUAGE c /* Rust */ +AS 'MODULE_PATHNAME', 'generate_wrapper'; + + +DROP FUNCTION vectorize."encode"; +CREATE FUNCTION vectorize."encode"( + "input" TEXT, /* &str */ + "model" TEXT DEFAULT 'sentence-transformers/all-MiniLM-L6-v2', /* alloc::string::String */ + "api_key" TEXT DEFAULT NULL /* core::option::Option */ +) RETURNS double precision[] /* core::result::Result, anyhow::Error> */ +LANGUAGE c /* Rust */ +AS 'MODULE_PATHNAME', 'encode_wrapper'; + --- src/api.rs:14 --- vectorize::api::table DROP FUNCTION vectorize."table"; CREATE FUNCTION vectorize."table"( - "table" TEXT, /* &str */ - "columns" TEXT[], /* alloc::vec::Vec */ - "job_name" TEXT, /* &str */ - "primary_key" TEXT, /* &str */ - "schema" TEXT DEFAULT 'public', /* &str */ - "update_col" TEXT DEFAULT 'last_updated_at', /* alloc::string::String */ - "index_dist_type" vectorize.IndexDist DEFAULT 'pgv_hnsw_cosine', /* vectorize::types::IndexDist */ - "transformer" TEXT DEFAULT 'openai/text-embedding-ada-002', /* &str */ - "search_alg" vectorize.SimilarityAlg DEFAULT 'pgv_cosine_similarity', /* vectorize::types::SimilarityAlg */ - "table_method" vectorize.TableMethod DEFAULT 'join', /* vectorize::types::TableMethod */ - "schedule" TEXT DEFAULT '* * * * *' /* &str */ + "table" TEXT, /* &str */ + "columns" TEXT[], /* alloc::vec::Vec */ + "job_name" TEXT, /* &str */ + "primary_key" TEXT, /* &str */ + "schema" TEXT DEFAULT 'public', /* &str */ + "update_col" TEXT DEFAULT 'last_updated_at', /* alloc::string::String */ + "index_dist_type" vectorize.IndexDist DEFAULT 'pgv_hnsw_cosine', /* vectorize::types::IndexDist */ + "transformer" TEXT DEFAULT 'sentence-transformers/all-MiniLM-L6-v2', /* &str */ + "search_alg" vectorize.SimilarityAlg DEFAULT 'pgv_cosine_similarity', /* vectorize::types::SimilarityAlg */ + "table_method" vectorize.TableMethod DEFAULT 'join', /* vectorize::types::TableMethod */ + "schedule" TEXT DEFAULT '* * * * *' /* &str */ +) RETURNS TEXT /* core::result::Result */ +STRICT +LANGUAGE c /* Rust */ +AS 'MODULE_PATHNAME', 'table_wrapper'; + + +DROP FUNCTION vectorize."init_rag"; +CREATE FUNCTION vectorize."init_rag"( + "agent_name" TEXT, /* &str */ + "table_name" TEXT, /* &str */ + "unique_record_id" TEXT, /* &str */ + "column" TEXT, /* &str */ + "schema" TEXT DEFAULT 'public', /* &str */ + "index_dist_type" vectorize.IndexDist DEFAULT 'pgv_hnsw_cosine', /* vectorize::types::IndexDist */ + "transformer" TEXT DEFAULT 'sentence-transformers/all-MiniLM-L6-v2', /* &str */ + "search_alg" vectorize.SimilarityAlg DEFAULT 'pgv_cosine_similarity', /* vectorize::types::SimilarityAlg */ + "table_method" vectorize.TableMethod DEFAULT 'join', /* vectorize::types::TableMethod */ + "schedule" TEXT DEFAULT '* * * * *' /* &str */ ) RETURNS TEXT /* core::result::Result */ STRICT LANGUAGE c /* Rust */ -AS 'MODULE_PATHNAME', 'table_wrapper'; \ No newline at end of file +AS 'MODULE_PATHNAME', 'init_rag_wrapper'; \ No newline at end of file diff --git a/extension/src/api.rs b/extension/src/api.rs index d7575a8..700cc6b 100644 --- a/extension/src/api.rs +++ b/extension/src/api.rs @@ -20,7 +20,7 @@ fn table( schema: default!(&str, "'public'"), update_col: default!(String, "'last_updated_at'"), index_dist_type: default!(types::IndexDist, "'pgv_hnsw_cosine'"), - transformer: default!(&str, "'openai/text-embedding-ada-002'"), + transformer: default!(&str, "'sentence-transformers/all-MiniLM-L6-v2'"), // search_alg is now deprecated search_alg: default!(types::SimilarityAlg, "'pgv_cosine_similarity'"), table_method: default!(types::TableMethod, "'join'"), @@ -67,7 +67,7 @@ fn search( #[pg_extern] fn transform_embeddings( input: &str, - model_name: default!(String, "'openai/text-embedding-ada-002'"), + model_name: default!(String, "'sentence-transformers/all-MiniLM-L6-v2'"), api_key: default!(Option, "NULL"), ) -> Result> { let model = Model::new(&model_name)?; @@ -77,7 +77,7 @@ fn transform_embeddings( #[pg_extern] fn encode( input: &str, - model: default!(String, "'openai/text-embedding-ada-002'"), + model: default!(String, "'sentence-transformers/all-MiniLM-L6-v2'"), api_key: default!(Option, "NULL"), ) -> Result> { let model = Model::new(&model)?; @@ -95,7 +95,7 @@ fn init_rag( schema: default!(&str, "'public'"), index_dist_type: default!(types::IndexDist, "'pgv_hnsw_cosine'"), // transformer model to use in vector-search - transformer: default!(&str, "'openai/text-embedding-ada-002'"), + transformer: default!(&str, "'sentence-transformers/all-MiniLM-L6-v2'"), // similarity algorithm to use in vector-search // search_alg is now deprecated search_alg: default!(types::SimilarityAlg, "'pgv_cosine_similarity'"), @@ -126,10 +126,7 @@ fn init_rag( fn rag( agent_name: &str, query: &str, - // chat models: currently only supports gpt 3.5 and 4 - // https://platform.openai.com/docs/models/gpt-3-5-turbo - // https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo - chat_model: default!(String, "'openai/gpt-3.5-turbo'"), + chat_model: default!(String, "'tembo/meta-llama/Meta-Llama-3-8B-Instruct'"), // points to the type of prompt template to use task: default!(String, "'question_answer'"), api_key: default!(Option, "NULL"), @@ -155,7 +152,7 @@ fn rag( #[pg_extern] fn generate( input: &str, - model: default!(String, "'openai/gpt-3.5-turbo'"), + model: default!(String, "'tembo/meta-llama/Meta-Llama-3-8B-Instruct'"), api_key: default!(Option, "NULL"), ) -> Result { let model = Model::new(&model)?; diff --git a/extension/src/transformers/generic.rs b/extension/src/transformers/generic.rs index bf3cf28..2ed0b46 100644 --- a/extension/src/transformers/generic.rs +++ b/extension/src/transformers/generic.rs @@ -1,24 +1,7 @@ use anyhow::Result; -use crate::guc; use vectorize_core::transformers::generic::{find_placeholders, interpolate}; -pub fn get_env_interpolated_guc(requested: guc::VectorizeGuc) -> Result { - if let Some(url) = guc::get_guc(requested.clone()) { - env_interpolate_string(&url) - } else { - match requested { - guc::VectorizeGuc::EmbeddingServiceUrl => { - Err(anyhow::anyhow!("vectorize.embedding_service_url not set")) - } - guc::VectorizeGuc::OpenAIServiceUrl => { - Err(anyhow::anyhow!("vectorize.openai_service_url not set")) - } - _ => Err(anyhow::anyhow!("GUC not found")), - } - } -} - /// Interpolates environment variables into a string /// if env var is missing, the placeholder is left as a raw string pub fn env_interpolate_string(input: &str) -> Result { diff --git a/extension/src/transformers/http_handler.rs b/extension/src/transformers/http_handler.rs index 93ab2f4..bcfdb32 100644 --- a/extension/src/transformers/http_handler.rs +++ b/extension/src/transformers/http_handler.rs @@ -1,29 +1,35 @@ -use super::generic::get_env_interpolated_guc; use crate::guc; -use anyhow::Result; +use anyhow::{Context, Result}; use pgrx::prelude::*; -use vectorize_core::transformers::providers::vector_serve::VectorServeProvider; -use vectorize_core::transformers::providers::EmbeddingProvider; +use vectorize_core::transformers::providers::get_provider; use vectorize_core::transformers::types::TransformerMetadata; +use vectorize_core::types::Model; #[pg_extern] pub fn mod_info(model_name: &str, api_key: default!(Option, "NULL")) -> pgrx::JsonB { - let meta = sync_get_model_info(model_name, api_key).unwrap(); + let transformer_model = Model::new(model_name) + .context("Invalid model name") + .unwrap(); + let mut guc_configs = guc::get_guc_configs(&transformer_model.source); + if let Some(key) = api_key { + guc_configs.api_key = Some(key); + } + let meta = sync_get_model_info(&transformer_model, &guc_configs).unwrap(); pgrx::JsonB(serde_json::to_value(meta).unwrap()) } pub fn sync_get_model_info( - model_name: &str, - api_key: Option, + model: &Model, + guc_configs: &guc::ModelGucConfig, ) -> Result { let runtime = tokio::runtime::Builder::new_current_thread() .enable_io() .enable_time() .build() .unwrap_or_else(|e| error!("failed to initialize tokio runtime: {}", e)); - let meta = match runtime.block_on(async { get_model_info(model_name, api_key).await }) { + let meta = match runtime.block_on(async { get_model_info(model, guc_configs).await }) { Ok(e) => e, Err(e) => { error!("error getting model info: {}", e); @@ -33,14 +39,18 @@ pub fn sync_get_model_info( } pub async fn get_model_info( - model_name: &str, - api_key: Option, + model: &Model, + guc_configs: &guc::ModelGucConfig, ) -> Result { - let svc_url = get_env_interpolated_guc(guc::VectorizeGuc::EmbeddingServiceUrl)?; - let provider = VectorServeProvider::new(Some(svc_url.clone()), api_key); - let dim = provider.model_dim(model_name).await?; + let provider = get_provider( + &model.source, + guc_configs.api_key.clone(), + guc_configs.service_url.clone(), + guc_configs.virtual_key.clone(), + )?; + let dim = provider.model_dim(&model.api_name()).await?; Ok(TransformerMetadata { - model: model_name.to_string(), + model: model.api_name(), max_seq_len: 0, embedding_dimension: dim as i32, }) From 273491576ac5322e4794e293f1ab61b69ee381ea Mon Sep 17 00:00:00 2001 From: Adam Hendel Date: Fri, 23 Aug 2024 09:26:30 -0500 Subject: [PATCH 2/2] fix docs --- docs/api/rag.md | 2 +- docs/api/search.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api/rag.md b/docs/api/rag.md index 63231b8..ba9416a 100644 --- a/docs/api/rag.md +++ b/docs/api/rag.md @@ -15,7 +15,7 @@ vectorize.init_rag( "unique_record_id" TEXT, "column" TEXT, "schema" TEXT DEFAULT 'public', - "transformer" TEXT DEFAULT 'meta-llama/Meta-Llama-3-8B-Instruct', + "transformer" TEXT DEFAULT 'tembo/meta-llama/Meta-Llama-3-8B-Instruct', "index_dist_type" vectorize.IndexDist DEFAULT 'pgv_hnsw_cosine', "table_method" vectorize.TableMethod DEFAULT 'append' ) RETURNS TEXT diff --git a/docs/api/search.md b/docs/api/search.md index 59c8a54..60c73be 100644 --- a/docs/api/search.md +++ b/docs/api/search.md @@ -14,7 +14,7 @@ vectorize."table"( "primary_key" TEXT, "schema" TEXT DEFAULT 'public', "update_col" TEXT DEFAULT 'last_updated_at', - "transformer" TEXT DEFAULT 'meta-llama/Meta-Llama-3-8B-Instruct', + "transformer" TEXT DEFAULT 'sentence-transformers/all-MiniLM-L6-v2', "index_dist_type" vectorize.IndexDist DEFAULT 'pgv_hnsw_cosine', "table_method" vectorize.TableMethod DEFAULT 'join', "schedule" TEXT DEFAULT '* * * * *'