Skip to content

Commit

Permalink
tembo ai model source (#123)
Browse files Browse the repository at this point in the history
* makes Tembo a viable model source target
* allows user to configure OpenAI base url
* changes project defaults from pg15 to pg16
  • Loading branch information
ChuckHend authored Jun 6, 2024
1 parent 6397964 commit 1012741
Show file tree
Hide file tree
Showing 20 changed files with 332 additions and 114 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build-ollama-serve.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ defaults:
run:
shell: bash
working-directory: ./ollama-serve/

jobs:
build_and_push:
name: Build and push images
Expand All @@ -43,7 +43,7 @@ jobs:
echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ on:
branches:
- main
paths:
- 'docs/**'
- '.github/workflows/docs.yml'
- "docs/**"
- ".github/workflows/docs.yml"
jobs:
deploy:
runs-on: ubuntu-latest
Expand All @@ -19,5 +19,5 @@ jobs:

- name: deploy docs
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: poetry run mkdocs gh-deploy --force
50 changes: 23 additions & 27 deletions .github/workflows/extension_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ on:
branches:
- main
paths-ignore:
- 'README.md'
- "README.md"
- docs/**
push:
branches:
- main
paths-ignore:
- 'README.md'
- "README.md"
- docs/**

release:
Expand All @@ -39,12 +39,12 @@ jobs:
- name: Install stoml and pg-trunk
shell: bash
run: |
set -xe
wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
mv stoml_linux_amd64 stoml
chmod +x stoml
sudo mv stoml /usr/local/bin/
cargo install pg-trunk
set -xe
wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
mv stoml_linux_amd64 stoml
chmod +x stoml
sudo mv stoml /usr/local/bin/
cargo install pg-trunk
- name: Cache binaries
uses: actions/cache@v2
Expand Down Expand Up @@ -120,11 +120,9 @@ jobs:
${{ runner.os }}-bins-
- name: setup-tests
run: |
pgrx15_config=$(/usr/local/bin/stoml ~/.pgrx/config.toml configs.pg15)
~/.cargo/bin/trunk install pgvector --pg-config ${pgrx15_config}
~/.cargo/bin/trunk install pgmq --pg-config ${pgrx15_config}
~/.cargo/bin/trunk install pg_cron --pg-config ${pgrx15_config}
echo "shared_preload_libraries = 'pg_cron, vectorize'" >> ~/.pgrx/data-15/postgresql.conf
make trunk-dependencies
make setup.urls
make setup.shared_preload_libraries
rm -rf ./target/pgrx-test-data-* || true
- name: unit-test
run: |
Expand All @@ -133,8 +131,6 @@ jobs:
env:
HF_API_KEY: ${{ secrets.HF_API_KEY }}
run: |
pgrx15_config=$(/usr/local/bin/stoml ~/.pgrx/config.toml configs.pg15)
pg_version=$(/usr/local/bin/stoml Cargo.toml features.default)
echo "\q" | make run
make test-integration
Expand All @@ -160,12 +156,12 @@ jobs:
- name: Install stoml and pg-trunk
shell: bash
run: |
set -xe
wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
mv stoml_linux_amd64 stoml
chmod +x stoml
sudo mv stoml /usr/local/bin/
cargo install pg-trunk
set -xe
wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
mv stoml_linux_amd64 stoml
chmod +x stoml
sudo mv stoml /usr/local/bin/
cargo install pg-trunk
- name: trunk build
working-directory: ./extension
# trunk does not support path dependencies in Cargo.toml that are not within the current working directory
Expand All @@ -184,7 +180,7 @@ jobs:
needs:
- publish
env:
TARGET_PLATFORMS: linux/amd64,linux/arm64
TARGET_PLATFORMS: linux/amd64,linux/arm64
runs-on:
- self-hosted
- dind
Expand All @@ -202,11 +198,11 @@ jobs:
- name: Install stoml and pg-trunk
shell: bash
run: |
set -xe
wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
mv stoml_linux_amd64 stoml
chmod +x stoml
sudo mv stoml /usr/local/bin/
set -xe
wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
mv stoml_linux_amd64 stoml
chmod +x stoml
sudo mv stoml /usr/local/bin/
- name: Set version strings
id: versions
run: |
Expand Down
4 changes: 2 additions & 2 deletions core/src/transformers/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::types::{JobParams, VectorizeMeta};
// max token length is 8192
// however, depending on content of text, token count can be higher than
pub const MAX_TOKEN_LEN: usize = 8192;
pub const OPENAI_EMBEDDING_URL: &str = "https://api.openai.com/v1/embeddings";
pub const OPENAI_BASE_URL: &str = "https://api.openai.com/v1";

pub fn prepare_openai_request(
vect_meta: VectorizeMeta,
Expand All @@ -30,7 +30,7 @@ pub fn prepare_openai_request(
},
};
Ok(EmbeddingRequest {
url: OPENAI_EMBEDDING_URL.to_owned(),
url: format!("{OPENAI_BASE_URL}/embeddings"),
payload,
api_key: Some(apikey),
})
Expand Down
39 changes: 31 additions & 8 deletions core/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,10 @@ pub enum ModelError {
impl Model {
pub fn new(input: &str) -> Result<Self, ModelError> {
let mut parts: Vec<&str> = input.split('/').collect();
let missing_source = parts.len() != 2;
let missing_source = parts.len() < 2;
if parts.len() > 3 {
return Err(ModelError::InvalidFormat(input.to_string()));
}
if missing_source && parts[0] == "text-embedding-ada-002" {
// for backwards compatibility, prepend "openai" to text-embedding-ada-2
parts.insert(0, "openai");
Expand All @@ -178,10 +181,22 @@ impl Model {
.parse::<ModelSource>()
.map_err(|_| ModelError::InvalidSource(parts[0].to_string()))?;

let name = if source == ModelSource::Tembo {
// removes the leading /tembo from the model name
parts.remove(0);
// all others remain the same
parts.join("/")
} else {
parts
.last()
.expect("expected non-empty model name")
.to_string()
};

Ok(Self {
source,
fullname: format!("{}/{}", parts[0], parts[1]),
name: parts[1].to_string(),
fullname: parts.join("/"),
name,
})
}
}
Expand All @@ -199,6 +214,7 @@ pub enum ModelSource {
OpenAI,
SentenceTransformers,
Ollama,
Tembo,
}

impl FromStr for ModelSource {
Expand All @@ -209,6 +225,7 @@ impl FromStr for ModelSource {
"ollama" => Ok(ModelSource::Ollama),
"openai" => Ok(ModelSource::OpenAI),
"sentence-transformers" => Ok(ModelSource::SentenceTransformers),
"tembo" => Ok(ModelSource::Tembo),
_ => Ok(ModelSource::SentenceTransformers),
}
}
Expand All @@ -220,6 +237,7 @@ impl Display for ModelSource {
ModelSource::Ollama => write!(f, "ollama"),
ModelSource::OpenAI => write!(f, "openai"),
ModelSource::SentenceTransformers => write!(f, "sentence-transformers"),
ModelSource::Tembo => write!(f, "tembo"),
}
}
}
Expand All @@ -230,6 +248,7 @@ impl From<String> for ModelSource {
"ollama" => ModelSource::Ollama,
"openai" => ModelSource::OpenAI,
"sentence-transformers" => ModelSource::SentenceTransformers,
"tembo" => ModelSource::Tembo,
// other cases are assumed to be private sentence-transformer compatible model
// and can be hot-loaded
_ => ModelSource::SentenceTransformers,
Expand All @@ -242,10 +261,19 @@ impl From<String> for ModelSource {
mod model_tests {
use super::*;

#[test]
fn test_tembo_parsing() {
let model = Model::new("tembo/meta-llama/Meta-Llama-3-8B-Instruct").unwrap();
assert_eq!(model.source, ModelSource::Tembo);
assert_eq!(model.fullname, "meta-llama/Meta-Llama-3-8B-Instruct");
assert_eq!(model.name, "meta-llama/Meta-Llama-3-8B-Instruct");
}

#[test]
fn test_ollama_parsing() {
let model = Model::new("ollama/wizardlm2:7b").unwrap();
assert_eq!(model.source, ModelSource::Ollama);
assert_eq!(model.fullname, "ollama/wizardlm2:7b");
assert_eq!(model.name, "wizardlm2:7b");
}

Expand Down Expand Up @@ -295,11 +323,6 @@ mod model_tests {
assert!(Model::new("openaimodel-name").is_err());
}

#[test]
fn test_invalid_format_extra_slash() {
assert!(Model::new("openai/model/name").is_err());
}

#[test]
fn test_backwards_compatibility() {
let model = Model::new("text-embedding-ada-002").unwrap();
Expand Down
6 changes: 4 additions & 2 deletions core/src/worker/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ impl Config {
}

/// source a variable from environment - use default if not exists
fn from_env_default(key: &str, default: &str) -> String {
pub fn from_env_default(key: &str, default: &str) -> String {
env::var(key).unwrap_or_else(|_| default.to_owned())
}

Expand All @@ -98,7 +98,9 @@ async fn execute_job(
&msg.message.inputs,
cfg.openai_api_key.clone(),
)?,
ModelSource::Ollama => Err(anyhow::anyhow!("Ollama transformer not implemented yet"))?,
ModelSource::Ollama | &ModelSource::Tembo => Err(anyhow::anyhow!(
"Ollama/Tembo transformer not implemented yet"
))?,
ModelSource::SentenceTransformers => generic::prepare_generic_embedding_request(
job_meta.clone(),
&msg.message.inputs,
Expand Down
4 changes: 2 additions & 2 deletions extension/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
[package]
name = "vectorize"
version = "0.15.1"
version = "0.16.0"
edition = "2021"
publish = false

[lib]
crate-type = ["cdylib"]

[features]
default = ["pg15"]
default = ["pg16"]
pg14 = ["pgrx/pg14", "pgrx-tests/pg14"]
pg15 = ["pgrx/pg15", "pgrx-tests/pg15"]
pg16 = ["pgrx/pg16", "pgrx-tests/pg16"]
Expand Down
18 changes: 13 additions & 5 deletions extension/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
SQLX_OFFLINE:=true
DATABASE_URL:=postgres://${USER}:${USER}@localhost:28815/postgres
DISTNAME = $(shell grep -m 1 '^name' Trunk.toml | sed -e 's/[^"]*"\([^"]*\)",\{0,1\}/\1/')
DISTVERSION = $(shell grep -m 1 '^version' Trunk.toml | sed -e 's/[^"]*"\([^"]*\)",\{0,1\}/\1/')
PG_VERSION:=15
PG_VERSION:=16
DATABASE_URL:=postgres://${USER}:${USER}@localhost:288${PG_VERSION}/postgres
PGRX_PG_CONFIG =$(shell cargo pgrx info pg-config pg${PG_VERSION})
UPGRADE_FROM_VER:=0.9.0
UPGRADE_FROM_VER:=0.16.0
BRANCH:=$(git rev-parse --abbrev-ref HEAD)
RUST_LOG:=debug

Expand Down Expand Up @@ -40,11 +40,17 @@ setup.shared_preload_libraries:
setup.urls:
echo "vectorize.embedding_service_url = 'http://localhost:3000/v1/embeddings'" >> ~/.pgrx/data-${PG_VERSION}/postgresql.conf
echo "vectorize.ollama_service_url = 'http://localhost:3001'" >> ~/.pgrx/data-${PG_VERSION}/postgresql.conf
setup: setup.dependencies setup.shared_preload_libraries
setup: setup.dependencies setup.shared_preload_libraries setup.urls

cat-logs:
cat ~/.pgrx/${PG_VERSION}.log

# install dependencies from trunk (limited OS/arch)
trunk-dependencies:
trunk install pgvector --pg-config ${PGRX_PG_CONFIG}
trunk install pgmq --pg-config ${PGRX_PG_CONFIG}
trunk install pg_cron --pg-config ${PGRX_PG_CONFIG}

install-pg_cron:
git clone https://github.com/citusdata/pg_cron.git && \
cd pg_cron && \
Expand All @@ -65,7 +71,9 @@ install-pgvector:
install-pgmq:
git clone https://github.com/tembo-io/pgmq.git && \
cd pgmq && \
cargo pgrx install --pg-config=${PGRX_PG_CONFIG} && \
PG_CONFIG=${PGRX_PG_CONFIG} make clean && \
PG_CONFIG=${PGRX_PG_CONFIG} make && \
PG_CONFIG=${PGRX_PG_CONFIG} make install && \
cd .. && rm -rf pgmq

test-integration:
Expand Down
2 changes: 1 addition & 1 deletion extension/Trunk.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ description = "The simplest way to orchestrate vector search on Postgres."
homepage = "https://github.com/tembo-io/pg_vectorize"
documentation = "https://github.com/tembo-io/pg_vectorize"
categories = ["orchestration", "machine_learning"]
version = "0.15.1"
version = "0.16.0"

[build]
postgres_version = "15"
Expand Down
28 changes: 28 additions & 0 deletions extension/sql/vectorize--0.15.1--0.16.0.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
-- src/api.rs:158
-- vectorize::api::generate
CREATE FUNCTION vectorize."generate"(
"input" TEXT, /* &str */
"model" TEXT DEFAULT 'openai/gpt-3.5-turbo', /* alloc::string::String */
"api_key" TEXT DEFAULT NULL /* core::option::Option<alloc::string::String> */
) RETURNS TEXT /* core::result::Result<alloc::string::String, anyhow::Error> */
LANGUAGE c /* Rust */
AS 'MODULE_PATHNAME', 'generate_wrapper';

-- src/api.rs:168
-- vectorize::api::env_interpolate_guc
CREATE FUNCTION vectorize."env_interpolate_guc"(
"guc_name" TEXT /* &str */
) RETURNS TEXT /* core::result::Result<alloc::string::String, anyhow::Error> */
STRICT
LANGUAGE c /* Rust */
AS 'MODULE_PATHNAME', 'env_interpolate_guc_wrapper';

-- src/api.rs:79
-- vectorize::api::encode
CREATE FUNCTION vectorize."encode"(
"input" TEXT, /* &str */
"model" TEXT DEFAULT 'openai/text-embedding-ada-002', /* alloc::string::String */
"api_key" TEXT DEFAULT NULL /* core::option::Option<alloc::string::String> */
) RETURNS double precision[] /* core::result::Result<alloc::vec::Vec<f64>, anyhow::Error> */
LANGUAGE c /* Rust */
AS 'MODULE_PATHNAME', 'encode_wrapper';
Loading

0 comments on commit 1012741

Please sign in to comment.