-
Notifications
You must be signed in to change notification settings - Fork 31
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix: Rename "table" to "table_name" and switch to REGCLASS for better schema handling #163
base: main
Are you sure you want to change the base?
Changes from all commits
3f5c377
09d16ef
b86f178
86455d0
912fd5c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
DROP function vectorize."table"; | ||
|
||
-- vectorize::api::table | ||
CREATE FUNCTION vectorize."table"( | ||
"table_name" REGCLASS, /* PgOid*/ | ||
"columns" TEXT[], /* alloc::vec::Vec<alloc::string::String> */ | ||
"job_name" TEXT, /* alloc::string::String */ | ||
"primary_key" TEXT, /* alloc::string::String */ | ||
"args" json DEFAULT '{}', /* pgrx::datum::json::Json */ | ||
"update_col" TEXT DEFAULT 'last_updated_at', /* alloc::string::String */ | ||
"transformer" vectorize.Transformer DEFAULT 'openai', /* vectorize::types::Transformer */ | ||
"search_alg" vectorize.SimilarityAlg DEFAULT 'pgv_cosine_similarity', /* vectorize::types::SimilarityAlg */ | ||
"table_method" vectorize.TableMethod DEFAULT 'append', /* vectorize::types::TableMethod */ | ||
"schedule" TEXT DEFAULT '* * * * *' /* alloc::string::String */ | ||
) RETURNS TEXT /* core::result::Result<alloc::string::String, anyhow::Error> */ | ||
STRICT | ||
LANGUAGE c /* Rust */ | ||
AS 'MODULE_PATHNAME', 'table_wrapper'; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -239,7 +239,7 @@ fn append_embedding_column(job_name: &str, schema: &str, table: &str, col_type: | |
) | ||
} | ||
|
||
pub fn get_column_datatype(schema: &str, table: &str, column: &str) -> Result<String> { | ||
pub fn get_column_datatype(table: &str, column: &str) -> Result<String> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think here we still need the |
||
Spi::get_one_with_args( | ||
" | ||
SELECT data_type | ||
|
@@ -250,23 +250,20 @@ pub fn get_column_datatype(schema: &str, table: &str, column: &str) -> Result<St | |
AND column_name = $3 | ||
", | ||
vec![ | ||
(PgBuiltInOids::TEXTOID.oid(), schema.into_datum()), | ||
(PgBuiltInOids::TEXTOID.oid(), table.into_datum()), | ||
(PgBuiltInOids::TEXTOID.oid(), column.into_datum()), | ||
], | ||
) | ||
.map_err(|_| { | ||
anyhow!( | ||
"One of schema:`{}`, table:`{}`, column:`{}` does not exist.", | ||
schema, | ||
"One of table:`{}`, column:`{}` does not exist.", | ||
table, | ||
column | ||
) | ||
})? | ||
.ok_or_else(|| { | ||
anyhow!( | ||
"An unknown error occurred while fetching the data type for column `{}` in `{}.{}`.", | ||
schema, | ||
"An unknown error occurred while fetching the data type for column `{}` in `{}`.", | ||
table, | ||
column | ||
) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ use crate::init; | |
use crate::job::{create_event_trigger, create_trigger_handler, initalize_table_job}; | ||
use crate::transformers::openai; | ||
use crate::transformers::transform; | ||
use crate::util; | ||
use crate::util::*; | ||
|
||
use anyhow::{Context, Result}; | ||
use pgrx::prelude::*; | ||
|
@@ -15,8 +15,7 @@ use vectorize_core::types::{self, Model, ModelSource, TableMethod, VectorizeMeta | |
#[allow(clippy::too_many_arguments)] | ||
pub fn init_table( | ||
job_name: &str, | ||
schema: &str, | ||
table: &str, | ||
table_name: PgOid, | ||
columns: Vec<String>, | ||
primary_key: &str, | ||
update_col: Option<String>, | ||
|
@@ -28,14 +27,16 @@ pub fn init_table( | |
// cron-like for a cron based update model, or 'realtime' for a trigger-based | ||
schedule: &str, | ||
) -> Result<String> { | ||
let table_name_str = pg_oid_to_table_name(table_name); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am hoping that we can determine the schema from the oid too. so maybe we have a |
||
|
||
// validate table method | ||
// realtime is only compatible with the join method | ||
if schedule == "realtime" && table_method != TableMethod::join { | ||
error!("realtime schedule is only compatible with the join table method"); | ||
} | ||
|
||
// get prim key type | ||
let pkey_type = init::get_column_datatype(schema, table, primary_key)?; | ||
let pkey_type = init::get_column_datatype(table_name, primary_key)?; | ||
init::init_pgmq()?; | ||
|
||
let guc_configs = get_guc_configs(&transformer.source); | ||
|
@@ -101,8 +102,7 @@ pub fn init_table( | |
}; | ||
|
||
let valid_params = types::JobParams { | ||
schema: schema.to_string(), | ||
table: table.to_string(), | ||
table: table_name_str.clone(), | ||
columns: columns.clone(), | ||
update_time_col: update_col, | ||
table_method: table_method.clone(), | ||
|
@@ -167,8 +167,8 @@ pub fn init_table( | |
// setup triggers | ||
// create the trigger if not exists | ||
let trigger_handler = create_trigger_handler(job_name, &columns, primary_key); | ||
let insert_trigger = create_event_trigger(job_name, schema, table, "INSERT"); | ||
let update_trigger = create_event_trigger(job_name, schema, table, "UPDATE"); | ||
let insert_trigger = create_event_trigger(job_name, table_name_str.clone(), "INSERT"); | ||
let update_trigger = create_event_trigger(job_name, table_name_str.clone(), "UPDATE"); | ||
let _: Result<_, spi::Error> = Spi::connect(|mut c| { | ||
let _r = c.update(&trigger_handler, None, None)?; | ||
let _r = c.update(&insert_trigger, None, None)?; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove pgrx from
./core/Cargo.toml
. Core is separate so we can have a library that has no dependency on pgrx or any postgres service side.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for PgOid we need it if we support
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the only place PgOid type would be used is on the
vectorize.table()
outer most function, in the signature and then using it to determinetable
andschema
, right? everything else could refer to eitherschema.table
as a string orschema
andtable
as separate strings would be ok too i think.