Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

upgrade test #54

Merged
merged 39 commits into from
Feb 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
b45ed67
add test tools
ChuckHend Feb 16, 2024
56d811d
change ver
ChuckHend Feb 16, 2024
bc91283
upgrade
ChuckHend Feb 16, 2024
761ddcd
add clean
ChuckHend Feb 16, 2024
c6d9617
add upgrade
ChuckHend Feb 16, 2024
0bb08a8
share preloads
ChuckHend Feb 16, 2024
3f184bc
change ver
ChuckHend Feb 16, 2024
3bb5b0c
move make
ChuckHend Feb 16, 2024
4525987
change name
ChuckHend Feb 16, 2024
1a08172
debug setup
ChuckHend Feb 16, 2024
9e59cc5
cat Makefile
ChuckHend Feb 16, 2024
5ec02b0
add phony
ChuckHend Feb 16, 2024
57dcc4b
checkout
ChuckHend Feb 16, 2024
c35b496
fix order
ChuckHend Feb 16, 2024
5fa281a
remove compiler flags
ChuckHend Feb 16, 2024
0dacfa6
no cmd
ChuckHend Feb 16, 2024
d93cdca
move branch
ChuckHend Feb 16, 2024
5020125
fix cmd
ChuckHend Feb 16, 2024
19f1e12
fix formula
ChuckHend Feb 17, 2024
cbdc40d
run embedding container in upgrade ci
ChuckHend Feb 17, 2024
5c280f7
debug existing ci
ChuckHend Feb 17, 2024
19148fe
pull tag
ChuckHend Feb 17, 2024
43ae397
use git directly
ChuckHend Feb 17, 2024
f8569a7
Merge branch 'ci/upgradeTest' of github.com:tembo-io/pg_vectorize int…
ChuckHend Feb 17, 2024
4d68d57
test from 0.10
ChuckHend Feb 17, 2024
961a216
fix make command
ChuckHend Feb 17, 2024
6857eac
debug logs
ChuckHend Feb 17, 2024
84a28f9
fix makefile
ChuckHend Feb 17, 2024
3bf7c54
update
ChuckHend Feb 17, 2024
a811fcc
Update Makefile
ChuckHend Feb 17, 2024
50a85b2
extra tab
ChuckHend Feb 17, 2024
121001e
re-checkout
ChuckHend Feb 17, 2024
052138b
phony
ChuckHend Feb 17, 2024
501fa11
test from main
ChuckHend Feb 17, 2024
20f2b98
retries on tests
ChuckHend Feb 17, 2024
14b50c1
Merge branch 'ci/upgradeTest' of github.com:tembo-io/pg_vectorize int…
ChuckHend Feb 17, 2024
2778f54
unused import
ChuckHend Feb 17, 2024
49d12aa
update makefile
ChuckHend Feb 17, 2024
dc97565
update ci
ChuckHend Feb 17, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions .github/workflows/extension_upgrade.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
name: Extension Upgrade

defaults:
run:
shell: bash
working-directory: ./

on:
pull_request:
branches:
- main
push:
branches:
- main
release:
types:
- created

jobs:
test:
name: Upgrade Test
runs-on: ubuntu-22.04
services:
vector-serve:
image: quay.io/tembo/vector-serve:latest
ports:
- 3000:3000
steps:
- name: Checkout repository content
uses: actions/checkout@v2

- name: Install Rust stable toolchain
uses: actions-rs/toolchain@v1
with:
toolchain: stable
- uses: Swatinem/rust-cache@v2
with:
prefix-key: "extension-upgrade-test"
workspaces: |
vectorize
# Additional directories to cache
cache-directories: |
/home/runner/.pgrx
- name: Get current version
id: current-version
run: echo "CI_BRANCH=$(git name-rev --name-only HEAD)" >> $GITHUB_OUTPUT
- uses: ./.github/actions/pgx-init
with:
working-directory: ./
- name: Install sys dependencies
run: |
sudo apt-get update && sudo apt-get install -y postgresql-server-dev-14
make setup
- name: Test previous version (main))
run: |
git fetch --tags
git checkout main
echo "\q" | make run
cargo test -- --ignored --test-threads=1
- name: Test branch's version
env:
CI_BRANCH: ${{ steps.current-version.outputs.CI_BRANCH }}
run: |
git checkout $CI_BRANCH
make test-update
- name: Debugging information
if: always()
env:
CI_BRANCH: ${{ steps.current-version.outputs.CI_BRANCH }}
run: |
git checkout $CI_BRANCH
make cat-logs
49 changes: 48 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@ SQLX_OFFLINE:=true
DATABASE_URL:=postgres://${USER}:${USER}@localhost:28815/postgres
DISTNAME = $(shell grep -m 1 '^name' Trunk.toml | sed -e 's/[^"]*"\([^"]*\)",\{0,1\}/\1/')
DISTVERSION = $(shell grep -m 1 '^version' Trunk.toml | sed -e 's/[^"]*"\([^"]*\)",\{0,1\}/\1/')
PG_VERSION:=15
PGRX_PG_CONFIG =$(shell cargo pgrx info pg-config pg${PG_VERSION})
UPGRADE_FROM_VER:=0.9.0
BRANCH:=$(git rev-parse --abbrev-ref HEAD)

.PHONY: install-pg_cron install-pg_vector install-pgmq run setup test-integration test-unit test-version test-branch test-upgrade cat-logs

sqlx-cache:
cargo sqlx prepare
Expand All @@ -12,7 +18,7 @@ format:

# ensure the DATABASE_URL is not used, since pgrx will stop postgres during compile
run:
SQLX_OFFLINE=true DATABASE_URL=${DATABASE_URL} cargo pgrx run pg15 postgres
SQLX_OFFLINE=true DATABASE_URL=${DATABASE_URL} cargo pgrx run pg${PG_VERSION} postgres

META.json: META.json.in Trunk.toml
@sed "s/@CARGO_VERSION@/$(DISTVERSION)/g" META.json.in > META.json
Expand All @@ -27,9 +33,50 @@ pgxn-zip: $(DISTNAME)-$(DISTVERSION).zip
clean:
@rm -rf META.json $(DISTNAME)-$(DISTVERSION).zip

setup: install-pg_cron install-pgvector install-pgmq
echo "shared_preload_libraries = 'pg_cron, vectorize'" >> ~/.pgrx/data-${PG_VERSION}/postgresql.conf

cat-logs:
cat ~/.pgrx/${PG_VERSION}.log

install-pg_cron:
git clone https://github.com/citusdata/pg_cron.git && \
cd pg_cron && \
sed -i.bak 's/-Werror//g' Makefile && \
PG_CONFIG=${PGRX_PG_CONFIG} make clean && \
PG_CONFIG=${PGRX_PG_CONFIG} make && \
PG_CONFIG=${PGRX_PG_CONFIG} make install && \
cd .. && rm -rf pg_cron

install-pgvector:
git clone --branch v0.6.0 https://github.com/pgvector/pgvector.git && \
cd pgvector && \
PG_CONFIG=${PGRX_PG_CONFIG} make clean && \
PG_CONFIG=${PGRX_PG_CONFIG} make && \
PG_CONFIG=${PGRX_PG_CONFIG} make install && \
cd .. && rm -rf pgvector

install-pgmq:
git clone https://github.com/tembo-io/pgmq.git && \
cd pgmq && \
cargo pgrx install --pg-config=${PGRX_PG_CONFIG} && \
cd .. && rm -rf pgmq

test-integration:
echo "\q" | make run
cargo test -- --ignored --test-threads=1

test-unit:
cargo pgrx test

test-version:
git fetch --tags
git checkout tags/v${UPGRADE_FROM_VER}
echo "\q" | make run
psql ${DATABASE_URL} -c "DROP EXTENSION IF EXISTS vectorize"
cargo test -- --ignored --test-threads=1

test-update:
echo "\q" | make run
psql ${DATABASE_URL} -c "ALTER EXTENSION vectorize UPDATE"
make test-integration
18 changes: 18 additions & 0 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,21 @@ pub fn get_pg_options(cfg: Config) -> Result<PgConnectOptions> {
}
}
}

pub async fn ready(conn: &Pool<Postgres>) -> bool {
sqlx::query_scalar(
"SELECT EXISTS (
SELECT 1
FROM pg_tables
WHERE schemaname = 'vectorize'
) AND EXISTS (
SELECT 1
FROM pg_tables
WHERE schemaname = 'pgmq'
AND tablename = 'q_vectorize_jobs'
) AS both_exist;",
)
.fetch_one(conn)
.await
.expect("failed")
}
12 changes: 8 additions & 4 deletions src/workers/pg_bgw.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::guc::init_guc;
use crate::init::VECTORIZE_QUEUE;
use crate::util::get_pg_conn;
use crate::util::{get_pg_conn, ready};
use anyhow::Result;
use pgrx::bgworkers::*;
use pgrx::*;
Expand Down Expand Up @@ -40,9 +40,13 @@ pub extern "C" fn background_worker_main(_arg: pg_sys::Datum) {
log!("Starting BG Workers {}", BackgroundWorker::get_name(),);

while BackgroundWorker::wait_latch(Some(Duration::from_millis(10))) {
if BackgroundWorker::sighup_received() {
// on SIGHUP, you might want to reload configurations and env vars
}
runtime.block_on(async {
while !ready(&conn).await {
log!("pg-vectorize: waiting for CREATE EXTENSION vectorize");
tokio::time::sleep(Duration::from_secs(5)).await;
}
});

let _worker_ran: Result<()> = runtime.block_on(async {
// continue to poll without pauses
let start = Instant::now();
Expand Down
83 changes: 18 additions & 65 deletions tests/integration_tests.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
use rand::Rng;
mod util;
use sqlx::FromRow;

use rand::Rng;
use util::common;

// Integration tests are ignored by default
Expand Down Expand Up @@ -87,22 +85,10 @@ async fn test_realtime_job() {
.await
.expect("failed to init job");

// embedding should be updated after few seconds
tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;

let result = sqlx::query(&format!(
"SELECT vectorize.search(
job_name => '{job_name}',
query => 'mobile devices',
return_columns => ARRAY['product_name', 'product_id'],
num_results => 3
);"
))
.execute(&conn)
.await
.expect("failed to select from test_table");
// 3 rows returned
assert_eq!(result.rows_affected(), 3);
let search_results = common::search_with_retry(&conn, "mobile devices", &job_name, 10, 2)
.await
.expect("failed to exec search");
assert_eq!(search_results.len(), 3);

let random_product_id = rng.gen_range(0..100000);

Expand All @@ -117,42 +103,20 @@ async fn test_realtime_job() {
.await
.expect("failed to insert into test_table");

// embedding should be updated after few seconds
tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;

#[allow(dead_code)]
#[derive(FromRow, Debug, serde::Deserialize)]
struct SearchResult {
product_id: i32,
product_name: String,
similarity_score: f64,
}

#[allow(dead_code)]
#[derive(FromRow, Debug)]
struct SearchJSON {
search_results: serde_json::Value,
}

let result = sqlx::query_as::<_, SearchJSON>(&format!(
"SELECT search_results from vectorize.search(
job_name => '{job_name}',
query => 'car testing devices',
return_columns => ARRAY['product_id','product_name'],
num_results => 3
) as search_results;"
))
.fetch_all(&conn)
.await
.expect("failed to execute search");
// index will need to rebuild
tokio::time::sleep(tokio::time::Duration::from_secs(5 as u64)).await;
let search_results = common::search_with_retry(&conn, "car testing devices", &job_name, 10, 2)
.await
.expect("failed to exec search");

let mut found_it = false;
for row in result {
println!("row: {:?}", row);
let row: SearchResult = serde_json::from_value(row.search_results).unwrap();
for row in search_results {
let row: common::SearchResult = serde_json::from_value(row.search_results).unwrap();
if row.product_id == random_product_id {
assert_eq!(row.product_name, "car tester");
found_it = true;
} else {
println!("row: {:?}", row);
}
}
assert!(found_it);
Expand Down Expand Up @@ -184,20 +148,9 @@ async fn test_rag() {
.await
.expect("failed to init job");

// embedding should be updated after few seconds
tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;

// must be able to conduct vector search on agent tables
let result = sqlx::query(&format!(
"SELECT vectorize.search(
job_name => '{agent_name}',
query => 'car testing devices',
return_columns => ARRAY['description'],
num_results => 3
);"
))
.execute(&conn)
.await
.expect("failed to select from test_table");
assert_eq!(result.rows_affected(), 3);
let search_results = common::search_with_retry(&conn, "mobile devices", &agent_name, 10, 2)
.await
.expect("failed to exec search");
assert_eq!(search_results.len(), 3);
}
47 changes: 46 additions & 1 deletion tests/util.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
pub mod common {
use anyhow::Result;
use log::LevelFilter;
use sqlx::postgres::{PgConnectOptions, PgPoolOptions};
use sqlx::ConnectOptions;
use sqlx::{ConnectOptions, FromRow};
use sqlx::{Pool, Postgres, Row};
use url::{ParseError, Url};

#[allow(dead_code)]
#[derive(FromRow, Debug, serde::Deserialize)]
pub struct SearchResult {
pub product_id: i32,
pub product_name: String,
pub similarity_score: f64,
}

#[allow(dead_code)]
#[derive(FromRow, Debug)]
pub struct SearchJSON {
pub search_results: serde_json::Value,
}

pub async fn connect(url: &str) -> Pool<Postgres> {
let options = conn_options(url).expect("failed to parse url");
println!("URL: {}", url);
Expand Down Expand Up @@ -101,4 +116,34 @@ pub mod common {
.expect("failed to init embedding svc url");
}
}

pub async fn search_with_retry(
conn: &Pool<Postgres>,
query: &str,
job_name: &str,
retries: usize,
delay_seconds: usize,
) -> Result<Vec<SearchJSON>> {
let mut results: Vec<SearchJSON> = vec![];
for i in 0..retries {
results = sqlx::query_as::<_, SearchJSON>(&format!(
"SELECT * from vectorize.search(
job_name => '{job_name}',
query => '{query}',
return_columns => ARRAY['product_id', 'product_name'],
num_results => 3
) as search_results;"
))
.fetch_all(conn)
.await?;
if results.len() != 3 {
println!("retrying search query: {}/{}", i + 1, retries);
tokio::time::sleep(tokio::time::Duration::from_secs(delay_seconds as u64)).await;
} else {
return Ok(results);
}
}
println!("results: {:?}", results);
Err(anyhow::anyhow!("timed out waiting for search query"))
}
}
Loading