Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change to tembo as default source #135

Merged
merged 2 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions core/src/transformers/providers/ollama.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,14 @@ impl EmbeddingProvider for OllamaProvider {
}

async fn model_dim(&self, model_name: &str) -> Result<u32, VectorizeError> {
let dim = match model_name {
"llama2" => 5192,
_ => 1536,
// determine embedding dim by generating an embedding and getting length of array
let req = GenericEmbeddingRequest {
input: vec!["hello world".to_string()],
model: model_name.to_string(),
};
Ok(dim)
let embedding = self.generate_embedding(&req).await?;
let dim = embedding.embeddings[0].len();
Ok(dim as u32)
}
}

Expand Down
2 changes: 1 addition & 1 deletion docs/api/rag.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ vectorize.init_rag(
"unique_record_id" TEXT,
"column" TEXT,
"schema" TEXT DEFAULT 'public',
"transformer" TEXT DEFAULT 'openai/text-embedding-ada-002',
"transformer" TEXT DEFAULT 'tembo/meta-llama/Meta-Llama-3-8B-Instruct',
"index_dist_type" vectorize.IndexDist DEFAULT 'pgv_hnsw_cosine',
"table_method" vectorize.TableMethod DEFAULT 'append'
) RETURNS TEXT
Expand Down
3 changes: 1 addition & 2 deletions docs/api/search.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@ vectorize."table"(
"columns" TEXT[],
"job_name" TEXT,
"primary_key" TEXT,
"args" json DEFAULT '{}',
"schema" TEXT DEFAULT 'public',
"update_col" TEXT DEFAULT 'last_updated_at',
"transformer" TEXT DEFAULT 'openai/text-embedding-ada-002',
"transformer" TEXT DEFAULT 'sentence-transformers/all-MiniLM-L6-v2',
"index_dist_type" vectorize.IndexDist DEFAULT 'pgv_hnsw_cosine',
"table_method" vectorize.TableMethod DEFAULT 'join',
"schedule" TEXT DEFAULT '* * * * *'
Expand Down
6 changes: 3 additions & 3 deletions docs/api/utilities.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ Transforms a block of text to embeddings using the specified transformer.
Requires the `vector-serve` container to be set via `vectorize.embedding_service_url`, or an OpenAI key to be set if using OpenAI embedding models.

```sql
vectorize."transform_embeddings"(
vectorize."encode"(
"input" TEXT,
"model_name" TEXT DEFAULT 'openai/text-embedding-ada-002',
"model_name" TEXT DEFAULT 'sentence-transformers/all-MiniLM-L6-v2',
"api_key" TEXT DEFAULT NULL
) RETURNS double precision[]
```
Expand All @@ -25,7 +25,7 @@ vectorize."transform_embeddings"(
### Example

```sql
select vectorize.transform_embeddings(
select vectorize.encode(
input => 'the quick brown fox jumped over the lazy dogs',
model_name => 'sentence-transformers/multi-qa-MiniLM-L6-dot-v1'
);
Expand Down
89 changes: 75 additions & 14 deletions extension/sql/vectorize--0.17.1--0.18.0.sql
Original file line number Diff line number Diff line change
@@ -1,20 +1,81 @@
DROP FUNCTION vectorize."transform_embeddings";
CREATE FUNCTION vectorize."transform_embeddings"(
"input" TEXT, /* &str */
"model_name" TEXT DEFAULT 'sentence-transformers/all-MiniLM-L6-v2', /* alloc::string::String */
"api_key" TEXT DEFAULT NULL /* core::option::Option<alloc::string::String> */
) RETURNS double precision[] /* core::result::Result<alloc::vec::Vec<f64>, anyhow::Error> */
LANGUAGE c /* Rust */
AS 'MODULE_PATHNAME', 'transform_embeddings_wrapper';


DROP FUNCTION vectorize."rag";
CREATE FUNCTION vectorize."rag"(
"agent_name" TEXT, /* &str */
"query" TEXT, /* &str */
"chat_model" TEXT DEFAULT 'tembo/meta-llama/Meta-Llama-3-8B-Instruct', /* alloc::string::String */
"task" TEXT DEFAULT 'question_answer', /* alloc::string::String */
"api_key" TEXT DEFAULT NULL, /* core::option::Option<alloc::string::String> */
"num_context" INT DEFAULT 2, /* i32 */
"force_trim" bool DEFAULT false /* bool */
) RETURNS TABLE (
"chat_results" jsonb /* pgrx::datum::json::JsonB */
)
LANGUAGE c /* Rust */
AS 'MODULE_PATHNAME', 'rag_wrapper';


DROP FUNCTION vectorize."generate";
CREATE FUNCTION vectorize."generate"(
"input" TEXT, /* &str */
"model" TEXT DEFAULT 'tembo/meta-llama/Meta-Llama-3-8B-Instruct', /* alloc::string::String */
"api_key" TEXT DEFAULT NULL /* core::option::Option<alloc::string::String> */
) RETURNS TEXT /* core::result::Result<alloc::string::String, anyhow::Error> */
LANGUAGE c /* Rust */
AS 'MODULE_PATHNAME', 'generate_wrapper';


DROP FUNCTION vectorize."encode";
CREATE FUNCTION vectorize."encode"(
"input" TEXT, /* &str */
"model" TEXT DEFAULT 'sentence-transformers/all-MiniLM-L6-v2', /* alloc::string::String */
"api_key" TEXT DEFAULT NULL /* core::option::Option<alloc::string::String> */
) RETURNS double precision[] /* core::result::Result<alloc::vec::Vec<f64>, anyhow::Error> */
LANGUAGE c /* Rust */
AS 'MODULE_PATHNAME', 'encode_wrapper';


-- src/api.rs:14
-- vectorize::api::table
DROP FUNCTION vectorize."table";
CREATE FUNCTION vectorize."table"(
"table" TEXT, /* &str */
"columns" TEXT[], /* alloc::vec::Vec<alloc::string::String> */
"job_name" TEXT, /* &str */
"primary_key" TEXT, /* &str */
"schema" TEXT DEFAULT 'public', /* &str */
"update_col" TEXT DEFAULT 'last_updated_at', /* alloc::string::String */
"index_dist_type" vectorize.IndexDist DEFAULT 'pgv_hnsw_cosine', /* vectorize::types::IndexDist */
"transformer" TEXT DEFAULT 'openai/text-embedding-ada-002', /* &str */
"search_alg" vectorize.SimilarityAlg DEFAULT 'pgv_cosine_similarity', /* vectorize::types::SimilarityAlg */
"table_method" vectorize.TableMethod DEFAULT 'join', /* vectorize::types::TableMethod */
"schedule" TEXT DEFAULT '* * * * *' /* &str */
"table" TEXT, /* &str */
"columns" TEXT[], /* alloc::vec::Vec<alloc::string::String> */
"job_name" TEXT, /* &str */
"primary_key" TEXT, /* &str */
"schema" TEXT DEFAULT 'public', /* &str */
"update_col" TEXT DEFAULT 'last_updated_at', /* alloc::string::String */
"index_dist_type" vectorize.IndexDist DEFAULT 'pgv_hnsw_cosine', /* vectorize::types::IndexDist */
"transformer" TEXT DEFAULT 'sentence-transformers/all-MiniLM-L6-v2', /* &str */
"search_alg" vectorize.SimilarityAlg DEFAULT 'pgv_cosine_similarity', /* vectorize::types::SimilarityAlg */
"table_method" vectorize.TableMethod DEFAULT 'join', /* vectorize::types::TableMethod */
"schedule" TEXT DEFAULT '* * * * *' /* &str */
) RETURNS TEXT /* core::result::Result<alloc::string::String, anyhow::Error> */
STRICT
LANGUAGE c /* Rust */
AS 'MODULE_PATHNAME', 'table_wrapper';


DROP FUNCTION vectorize."init_rag";
CREATE FUNCTION vectorize."init_rag"(
"agent_name" TEXT, /* &str */
"table_name" TEXT, /* &str */
"unique_record_id" TEXT, /* &str */
"column" TEXT, /* &str */
"schema" TEXT DEFAULT 'public', /* &str */
"index_dist_type" vectorize.IndexDist DEFAULT 'pgv_hnsw_cosine', /* vectorize::types::IndexDist */
"transformer" TEXT DEFAULT 'sentence-transformers/all-MiniLM-L6-v2', /* &str */
"search_alg" vectorize.SimilarityAlg DEFAULT 'pgv_cosine_similarity', /* vectorize::types::SimilarityAlg */
"table_method" vectorize.TableMethod DEFAULT 'join', /* vectorize::types::TableMethod */
"schedule" TEXT DEFAULT '* * * * *' /* &str */
) RETURNS TEXT /* core::result::Result<alloc::string::String, anyhow::Error> */
STRICT
LANGUAGE c /* Rust */
AS 'MODULE_PATHNAME', 'table_wrapper';
AS 'MODULE_PATHNAME', 'init_rag_wrapper';
15 changes: 6 additions & 9 deletions extension/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ fn table(
schema: default!(&str, "'public'"),
update_col: default!(String, "'last_updated_at'"),
index_dist_type: default!(types::IndexDist, "'pgv_hnsw_cosine'"),
transformer: default!(&str, "'openai/text-embedding-ada-002'"),
transformer: default!(&str, "'sentence-transformers/all-MiniLM-L6-v2'"),
// search_alg is now deprecated
search_alg: default!(types::SimilarityAlg, "'pgv_cosine_similarity'"),
table_method: default!(types::TableMethod, "'join'"),
Expand Down Expand Up @@ -67,7 +67,7 @@ fn search(
#[pg_extern]
fn transform_embeddings(
input: &str,
model_name: default!(String, "'openai/text-embedding-ada-002'"),
model_name: default!(String, "'sentence-transformers/all-MiniLM-L6-v2'"),
api_key: default!(Option<String>, "NULL"),
) -> Result<Vec<f64>> {
let model = Model::new(&model_name)?;
Expand All @@ -77,7 +77,7 @@ fn transform_embeddings(
#[pg_extern]
fn encode(
input: &str,
model: default!(String, "'openai/text-embedding-ada-002'"),
model: default!(String, "'sentence-transformers/all-MiniLM-L6-v2'"),
api_key: default!(Option<String>, "NULL"),
) -> Result<Vec<f64>> {
let model = Model::new(&model)?;
Expand All @@ -95,7 +95,7 @@ fn init_rag(
schema: default!(&str, "'public'"),
index_dist_type: default!(types::IndexDist, "'pgv_hnsw_cosine'"),
// transformer model to use in vector-search
transformer: default!(&str, "'openai/text-embedding-ada-002'"),
transformer: default!(&str, "'sentence-transformers/all-MiniLM-L6-v2'"),
// similarity algorithm to use in vector-search
// search_alg is now deprecated
search_alg: default!(types::SimilarityAlg, "'pgv_cosine_similarity'"),
Expand Down Expand Up @@ -126,10 +126,7 @@ fn init_rag(
fn rag(
agent_name: &str,
query: &str,
// chat models: currently only supports gpt 3.5 and 4
// https://platform.openai.com/docs/models/gpt-3-5-turbo
// https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
chat_model: default!(String, "'openai/gpt-3.5-turbo'"),
chat_model: default!(String, "'tembo/meta-llama/Meta-Llama-3-8B-Instruct'"),
// points to the type of prompt template to use
task: default!(String, "'question_answer'"),
api_key: default!(Option<String>, "NULL"),
Expand All @@ -155,7 +152,7 @@ fn rag(
#[pg_extern]
fn generate(
input: &str,
model: default!(String, "'openai/gpt-3.5-turbo'"),
model: default!(String, "'tembo/meta-llama/Meta-Llama-3-8B-Instruct'"),
api_key: default!(Option<String>, "NULL"),
) -> Result<String> {
let model = Model::new(&model)?;
Expand Down
17 changes: 0 additions & 17 deletions extension/src/transformers/generic.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,7 @@
use anyhow::Result;

use crate::guc;
use vectorize_core::transformers::generic::{find_placeholders, interpolate};

pub fn get_env_interpolated_guc(requested: guc::VectorizeGuc) -> Result<String> {
if let Some(url) = guc::get_guc(requested.clone()) {
env_interpolate_string(&url)
} else {
match requested {
guc::VectorizeGuc::EmbeddingServiceUrl => {
Err(anyhow::anyhow!("vectorize.embedding_service_url not set"))
}
guc::VectorizeGuc::OpenAIServiceUrl => {
Err(anyhow::anyhow!("vectorize.openai_service_url not set"))
}
_ => Err(anyhow::anyhow!("GUC not found")),
}
}
}

/// Interpolates environment variables into a string
/// if env var is missing, the placeholder is left as a raw string
pub fn env_interpolate_string(input: &str) -> Result<String> {
Expand Down
38 changes: 24 additions & 14 deletions extension/src/transformers/http_handler.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,35 @@
use super::generic::get_env_interpolated_guc;
use crate::guc;
use anyhow::Result;
use anyhow::{Context, Result};

use pgrx::prelude::*;

use vectorize_core::transformers::providers::vector_serve::VectorServeProvider;
use vectorize_core::transformers::providers::EmbeddingProvider;
use vectorize_core::transformers::providers::get_provider;
use vectorize_core::transformers::types::TransformerMetadata;
use vectorize_core::types::Model;

#[pg_extern]
pub fn mod_info(model_name: &str, api_key: default!(Option<String>, "NULL")) -> pgrx::JsonB {
let meta = sync_get_model_info(model_name, api_key).unwrap();
let transformer_model = Model::new(model_name)
.context("Invalid model name")
.unwrap();
let mut guc_configs = guc::get_guc_configs(&transformer_model.source);
if let Some(key) = api_key {
guc_configs.api_key = Some(key);
}
let meta = sync_get_model_info(&transformer_model, &guc_configs).unwrap();
pgrx::JsonB(serde_json::to_value(meta).unwrap())
}

pub fn sync_get_model_info(
model_name: &str,
api_key: Option<String>,
model: &Model,
guc_configs: &guc::ModelGucConfig,
) -> Result<TransformerMetadata> {
let runtime = tokio::runtime::Builder::new_current_thread()
.enable_io()
.enable_time()
.build()
.unwrap_or_else(|e| error!("failed to initialize tokio runtime: {}", e));
let meta = match runtime.block_on(async { get_model_info(model_name, api_key).await }) {
let meta = match runtime.block_on(async { get_model_info(model, guc_configs).await }) {
Ok(e) => e,
Err(e) => {
error!("error getting model info: {}", e);
Expand All @@ -33,14 +39,18 @@ pub fn sync_get_model_info(
}

pub async fn get_model_info(
model_name: &str,
api_key: Option<String>,
model: &Model,
guc_configs: &guc::ModelGucConfig,
) -> Result<TransformerMetadata> {
let svc_url = get_env_interpolated_guc(guc::VectorizeGuc::EmbeddingServiceUrl)?;
let provider = VectorServeProvider::new(Some(svc_url.clone()), api_key);
let dim = provider.model_dim(model_name).await?;
let provider = get_provider(
&model.source,
guc_configs.api_key.clone(),
guc_configs.service_url.clone(),
guc_configs.virtual_key.clone(),
)?;
let dim = provider.model_dim(&model.api_name()).await?;
Ok(TransformerMetadata {
model: model_name.to_string(),
model: model.api_name(),
max_seq_len: 0,
embedding_dimension: dim as i32,
})
Expand Down
Loading