From 66633e701fad7e57def25e1ac69d712ecfb30ba0 Mon Sep 17 00:00:00 2001 From: Ambati Sahithi <162500856+asr2003@users.noreply.github.com> Date: Thu, 17 Oct 2024 18:25:04 +0530 Subject: [PATCH 1/8] docs: add filtering and partial index examples to vector search documentation --- docs/api/search.md | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/docs/api/search.md b/docs/api/search.md index 60c73be..db3fabb 100644 --- a/docs/api/search.md +++ b/docs/api/search.md @@ -76,8 +76,7 @@ select vectorize.table( ## Search a table -Search a table initialized with `vectorize.table`. The search results are sorted in descending order according to similarity. - The `query` is transformed to embeddings using the same `transformer` configured during `vectorize.table`. +Search a table initialized with `vectorize.table`. The search results are sorted in descending order according to similarity. You can use the `where_sql` parameter to apply additional filtering to the search results based on SQL conditions. The `query` is transformed to embeddings using the same `transformer` configured during `vectorize.table`. ```sql vectorize."search"( @@ -86,6 +85,7 @@ vectorize."search"( "api_key" TEXT DEFAULT NULL, "return_columns" TEXT[] DEFAULT ARRAY['*']::text[], "num_results" INT DEFAULT 10 + "where_sql" TEXT DEFAULT NULL ) RETURNS TABLE ( "search_results" jsonb ) @@ -100,6 +100,7 @@ vectorize."search"( | api_key | text | API key for the specified chat model. If OpenAI, this value overrides the config `vectorize.openai_key` | | return_columns | text[] | The columns to return in the search results. Defaults to all columns. | | num_results | int | The number of results to return. Sorted in descending order according to similarity. Defaults to 10. | +| where_sql | text | An optional SQL condition to filter the search results. This condition is applied after the similarity search. | ### Example @@ -122,3 +123,35 @@ SELECT * FROM vectorize.search( {"product_id": 4, "product_name": "Bluetooth Speaker", "similarity_score": 0.8250355616233103} (3 rows) ``` + +## Filtering Search Results + +The `where_sql` parameter allows to apply SQL-based filtering after performing the vector similarity search. This feature is useful when you want to narrow down the search results based on certain conditions such as `product category` or `price`. + +### Example + +```sql +SELECT * FROM vectorize.search( + job_name => 'product_search', + query => 'mobile electronic devices', + return_columns => ARRAY['product_id', 'product_name'], + num_results => 3, + where_sql => 'product_category = ''electronics'' AND price > 100' +); +``` + +In the above example, the results are filtered where the `product_category` is `electronics` and the `price` is greater than 100. + +## Optimizing Searches with Partial Indices + +For improving performance when using filters, you can create partial indices. This will speed up the execution of queries with frequent conditions in the `where_sql` parameter. + +### Example + +```sql +CREATE INDEX idx_product_price ON products (product_name) WHERE price > 100; +``` + +This index optimizes queries that search for products where the `price` is greater than 100. + +By combining the `where_sql` filtering feature with partial indices, you can efficiently narrow down search results and improve query performance. From e4dae989585c61057301560ae76c87b4aa186a43 Mon Sep 17 00:00:00 2001 From: Ambati Sahithi <162500856+asr2003@users.noreply.github.com> Date: Thu, 17 Oct 2024 18:29:21 +0530 Subject: [PATCH 2/8] docs: reorder lines --- docs/api/search.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/api/search.md b/docs/api/search.md index db3fabb..23d3bcb 100644 --- a/docs/api/search.md +++ b/docs/api/search.md @@ -76,7 +76,12 @@ select vectorize.table( ## Search a table -Search a table initialized with `vectorize.table`. The search results are sorted in descending order according to similarity. You can use the `where_sql` parameter to apply additional filtering to the search results based on SQL conditions. The `query` is transformed to embeddings using the same `transformer` configured during `vectorize.table`. +Search a table initialized with `vectorize.table`. The search results are sorted in descending order according to similarity. + + +The `query` is transformed to embeddings using the same `transformer` configured during `vectorize.table`. + +The `where_sql` parameter is used to apply additional filtering to the search results based on SQL conditions. ```sql vectorize."search"( From 78cf58c8d41f624dd90a0c1df1a6e8e3136aa8d4 Mon Sep 17 00:00:00 2001 From: Ambati Sahithi <162500856+asr2003@users.noreply.github.com> Date: Thu, 17 Oct 2024 18:57:14 +0530 Subject: [PATCH 3/8] format --- docs/api/search.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/api/search.md b/docs/api/search.md index 23d3bcb..04d9eba 100644 --- a/docs/api/search.md +++ b/docs/api/search.md @@ -81,7 +81,7 @@ Search a table initialized with `vectorize.table`. The search results are sorted The `query` is transformed to embeddings using the same `transformer` configured during `vectorize.table`. -The `where_sql` parameter is used to apply additional filtering to the search results based on SQL conditions. +The `where` parameter is used to apply additional filtering to the search results based on SQL conditions. ```sql vectorize."search"( @@ -90,7 +90,7 @@ vectorize."search"( "api_key" TEXT DEFAULT NULL, "return_columns" TEXT[] DEFAULT ARRAY['*']::text[], "num_results" INT DEFAULT 10 - "where_sql" TEXT DEFAULT NULL + "where" TEXT DEFAULT NULL ) RETURNS TABLE ( "search_results" jsonb ) @@ -105,7 +105,7 @@ vectorize."search"( | api_key | text | API key for the specified chat model. If OpenAI, this value overrides the config `vectorize.openai_key` | | return_columns | text[] | The columns to return in the search results. Defaults to all columns. | | num_results | int | The number of results to return. Sorted in descending order according to similarity. Defaults to 10. | -| where_sql | text | An optional SQL condition to filter the search results. This condition is applied after the similarity search. | +| where | text | An optional SQL condition to filter the search results. This condition is applied after the similarity search. | ### Example @@ -131,7 +131,7 @@ SELECT * FROM vectorize.search( ## Filtering Search Results -The `where_sql` parameter allows to apply SQL-based filtering after performing the vector similarity search. This feature is useful when you want to narrow down the search results based on certain conditions such as `product category` or `price`. +The `where` parameter allows to apply SQL-based filtering after performing the vector similarity search. This feature is useful when you want to narrow down the search results based on certain conditions such as `product category` or `price`. ### Example @@ -141,7 +141,7 @@ SELECT * FROM vectorize.search( query => 'mobile electronic devices', return_columns => ARRAY['product_id', 'product_name'], num_results => 3, - where_sql => 'product_category = ''electronics'' AND price > 100' + where => 'product_category = ''electronics'' AND price > 100' ); ``` @@ -149,7 +149,7 @@ In the above example, the results are filtered where the `product_category` is ` ## Optimizing Searches with Partial Indices -For improving performance when using filters, you can create partial indices. This will speed up the execution of queries with frequent conditions in the `where_sql` parameter. +For improving performance when using filters, you can create partial indices. This will speed up the execution of queries with frequent conditions in the `where` parameter. ### Example @@ -159,4 +159,4 @@ CREATE INDEX idx_product_price ON products (product_name) WHERE price > 100; This index optimizes queries that search for products where the `price` is greater than 100. -By combining the `where_sql` filtering feature with partial indices, you can efficiently narrow down search results and improve query performance. +By combining the `where` filtering feature with partial indices, you can efficiently narrow down search results and improve query performance. From 83222ed63989670bbd8cd6557ba6bbf98d71a283 Mon Sep 17 00:00:00 2001 From: Ambati Sahithi <162500856+asr2003@users.noreply.github.com> Date: Thu, 17 Oct 2024 18:57:56 +0530 Subject: [PATCH 4/8] remove extra white spaces --- docs/api/search.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/api/search.md b/docs/api/search.md index 04d9eba..c5d3d3d 100644 --- a/docs/api/search.md +++ b/docs/api/search.md @@ -78,7 +78,6 @@ select vectorize.table( Search a table initialized with `vectorize.table`. The search results are sorted in descending order according to similarity. - The `query` is transformed to embeddings using the same `transformer` configured during `vectorize.table`. The `where` parameter is used to apply additional filtering to the search results based on SQL conditions. From 5a980aea39ff97ded0487f324f61342f6430ca31 Mon Sep 17 00:00:00 2001 From: Ambati Sahithi <162500856+asr2003@users.noreply.github.com> Date: Fri, 18 Oct 2024 22:21:59 +0530 Subject: [PATCH 5/8] docs: add note for partial indices --- docs/api/search.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/api/search.md b/docs/api/search.md index c5d3d3d..d7f1e30 100644 --- a/docs/api/search.md +++ b/docs/api/search.md @@ -158,4 +158,6 @@ CREATE INDEX idx_product_price ON products (product_name) WHERE price > 100; This index optimizes queries that search for products where the `price` is greater than 100. +> **Note:** Partial indices improve performance by only indexing rows that meet the specified condition. This reduces the amount of data the database needs to scan, making queries with the same filter more efficient since only relevant rows are included in the index. + By combining the `where` filtering feature with partial indices, you can efficiently narrow down search results and improve query performance. From 5945a331400e4dbbcfc4e64c4d23f8f8ac7b2d78 Mon Sep 17 00:00:00 2001 From: asr2003 <162500856+asr2003@users.noreply.github.com> Date: Sun, 20 Oct 2024 06:37:27 +0530 Subject: [PATCH 6/8] Apply suggestions from code review Co-authored-by: Adam Hendel --- docs/api/search.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api/search.md b/docs/api/search.md index d7f1e30..f9fb35c 100644 --- a/docs/api/search.md +++ b/docs/api/search.md @@ -148,7 +148,7 @@ In the above example, the results are filtered where the `product_category` is ` ## Optimizing Searches with Partial Indices -For improving performance when using filters, you can create partial indices. This will speed up the execution of queries with frequent conditions in the `where` parameter. +For improving performance when using filters, you can create partial indices. This will speed up the execution of queries with frequent conditions in the `where_sql` parameter. ### Example @@ -160,4 +160,4 @@ This index optimizes queries that search for products where the `price` is great > **Note:** Partial indices improve performance by only indexing rows that meet the specified condition. This reduces the amount of data the database needs to scan, making queries with the same filter more efficient since only relevant rows are included in the index. -By combining the `where` filtering feature with partial indices, you can efficiently narrow down search results and improve query performance. +By combining the `where_sql` filtering feature with partial indices, you can efficiently narrow down search results and improve query performance. From 6b007236b569e179740216d416630f585d2e6a18 Mon Sep 17 00:00:00 2001 From: asr2003 <162500856+asr2003@users.noreply.github.com> Date: Sun, 20 Oct 2024 06:44:23 +0530 Subject: [PATCH 7/8] where -> where_sql --- docs/api/search.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/api/search.md b/docs/api/search.md index f9fb35c..2a4abcb 100644 --- a/docs/api/search.md +++ b/docs/api/search.md @@ -80,7 +80,7 @@ Search a table initialized with `vectorize.table`. The search results are sorted The `query` is transformed to embeddings using the same `transformer` configured during `vectorize.table`. -The `where` parameter is used to apply additional filtering to the search results based on SQL conditions. +The `where_sql` parameter is used to apply additional filtering to the search results based on SQL conditions. ```sql vectorize."search"( @@ -89,7 +89,7 @@ vectorize."search"( "api_key" TEXT DEFAULT NULL, "return_columns" TEXT[] DEFAULT ARRAY['*']::text[], "num_results" INT DEFAULT 10 - "where" TEXT DEFAULT NULL + "where_sql" TEXT DEFAULT NULL ) RETURNS TABLE ( "search_results" jsonb ) @@ -104,7 +104,7 @@ vectorize."search"( | api_key | text | API key for the specified chat model. If OpenAI, this value overrides the config `vectorize.openai_key` | | return_columns | text[] | The columns to return in the search results. Defaults to all columns. | | num_results | int | The number of results to return. Sorted in descending order according to similarity. Defaults to 10. | -| where | text | An optional SQL condition to filter the search results. This condition is applied after the similarity search. | +| where_sql | text | An optional SQL condition to filter the search results. This condition is applied after the similarity search. | ### Example @@ -130,7 +130,7 @@ SELECT * FROM vectorize.search( ## Filtering Search Results -The `where` parameter allows to apply SQL-based filtering after performing the vector similarity search. This feature is useful when you want to narrow down the search results based on certain conditions such as `product category` or `price`. +The `where_sql` parameter allows to apply SQL-based filtering after performing the vector similarity search. This feature is useful when you want to narrow down the search results based on certain conditions such as `product category` or `price`. ### Example @@ -140,7 +140,7 @@ SELECT * FROM vectorize.search( query => 'mobile electronic devices', return_columns => ARRAY['product_id', 'product_name'], num_results => 3, - where => 'product_category = ''electronics'' AND price > 100' + where_sql => 'product_category = ''electronics'' AND price > 100' ); ``` From 8b90f0d320b26379f19dfa08a2f434eaf339c5ab Mon Sep 17 00:00:00 2001 From: asr2003 <162500856+asr2003@users.noreply.github.com> Date: Sun, 20 Oct 2024 06:55:55 +0530 Subject: [PATCH 8/8] update columns --- extension/sql/example.sql | 84 ++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/extension/sql/example.sql b/extension/sql/example.sql index ec1823c..c4fb4d3 100644 --- a/extension/sql/example.sql +++ b/extension/sql/example.sql @@ -2,48 +2,50 @@ CREATE TABLE example_products ( product_id SERIAL PRIMARY KEY, product_name TEXT NOT NULL, description TEXT, + product_category TEXT NOT NULL, + price DECIMAL(10, 2) NOT NULL, last_updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP ); -INSERT INTO example_products(product_name, description, last_updated_at) VALUES -('Pencil', 'Utensil used for writing and often works best on paper', NOW()), -('Laptop Stand', 'Elevated platform for laptops, enhancing ergonomics', NOW()), -('Desk Lamp', 'Illumination device for workspaces, often adjustable', NOW()), -('Bluetooth Speaker', 'Portable audio device with wireless connectivity', NOW()), -('Water Bottle', 'Reusable container for liquids, often insulated', NOW()), -('Backpack', 'Storage solution for carrying personal items on one’s back', NOW()), -('Wireless Mouse', 'Pointing device without the need for a physical connection', NOW()), -('Plant Pot', 'Container for holding plants, often with drainage', NOW()), -('Sunglasses', 'Protective eyewear to shield eyes from UV rays', NOW()), -('Notebook', 'Bound sheets of paper for note-taking or sketching', NOW()), -('Stylus Pen', 'Tool for touchscreen devices, mimics finger touch', NOW()), -('Travel Mug', 'Insulated container for beverages on-the-go', NOW()), -('Phone Charger', 'Device to replenish the battery of mobile phones', NOW()), -('Yoga Mat', 'Cushioned surface for practicing yoga or exercise', NOW()), -('Wall Clock', 'Time-telling device meant to hang on walls', NOW()), -('Keychain', 'Small device for holding keys together', NOW()), -('Desk Organizer', 'Tool for sorting and storing desk items', NOW()), -('Earbuds', 'Small headphones that fit directly inside the ear', NOW()), -('Calendar', 'Physical representation of days and months, often used for scheduling', NOW()), -('Umbrella', 'Protective gear against rain or intense sun', NOW()), -('Hand Sanitizer', 'Liquid or gel used to decrease infectious agents on hands', NOW()), -('Sketchbook', 'Paper-filled book used for drawing or painting', NOW()), -('Flash Drive', 'Portable storage device for digital files', NOW()), -('Tablet Holder', 'Stand or grip for holding tablets or e-readers', NOW()), -('Shampoo', 'Hair care product designed to cleanse the scalp and hair', NOW()), -('Wristwatch', 'Time-telling device worn around the wrist', NOW()), -('Basketball', 'Spherical sporting equipment used in basketball games', NOW()), -('Guitar Picks', 'Small flat tool used to strum or pick a guitar', NOW()), -('Thermal Flask', 'Insulated bottle for keeping beverages hot or cold', NOW()), -('Slippers', 'Soft and light footwear intended for indoor use', NOW()), -('Easel', 'Upright support for artists to display or work on canvases', NOW()), -('Bicycle Helmet', 'Protective headgear for cyclists', NOW()), -('Candle Holder', 'Accessory to safely hold candles when they burn', NOW()), -('Cutting Board', 'Durable board on which to place materials for cutting', NOW()), -('Gardening Gloves', 'Handwear for protection during gardening tasks', NOW()), -('Alarm Clock', 'Time-telling device with a feature to sound at a specified time', NOW()), -('Spatula', 'Flat tool used in cooking for flipping or spreading', NOW()), -('Jigsaw Puzzle', 'Picture printed on cardboard or wood and cut into pieces to be reassembled', NOW()), -('Hammock', 'Sling made of fabric or netting, suspended between two points for relaxation', NOW()), -('Luggage Tag', 'Accessory attached to luggage for identification purposes', NOW()) +INSERT INTO example_products(product_name, description, product_category, price, last_updated_at) VALUES +('Pencil', 'Utensil used for writing and often works best on paper', 'stationery', 1.50, NOW()), +('Laptop Stand', 'Elevated platform for laptops, enhancing ergonomics', 'electronics', 35.99, NOW()), +('Desk Lamp', 'Illumination device for workspaces, often adjustable', 'furniture', 22.50, NOW()), +('Bluetooth Speaker', 'Portable audio device with wireless connectivity', 'electronics', 99.99, NOW()), +('Water Bottle', 'Reusable container for liquids, often insulated', 'kitchenware', 15.00, NOW()), +('Backpack', 'Storage solution for carrying personal items on one’s back', 'accessories', 45.00, NOW()), +('Wireless Mouse', 'Pointing device without the need for a physical connection', 'electronics', 25.00, NOW()), +('Plant Pot', 'Container for holding plants, often with drainage', 'garden', 12.00, NOW()), +('Sunglasses', 'Protective eyewear to shield eyes from UV rays', 'accessories', 50.00, NOW()), +('Notebook', 'Bound sheets of paper for note-taking or sketching', 'stationery', 3.99, NOW()), +('Stylus Pen', 'Tool for touchscreen devices, mimics finger touch', 'electronics', 18.50, NOW()), +('Travel Mug', 'Insulated container for beverages on-the-go', 'kitchenware', 10.99, NOW()), +('Phone Charger', 'Device to replenish the battery of mobile phones', 'electronics', 20.00, NOW()), +('Yoga Mat', 'Cushioned surface for practicing yoga or exercise', 'sports', 30.00, NOW()), +('Wall Clock', 'Time-telling device meant to hang on walls', 'furniture', 15.50, NOW()), +('Keychain', 'Small device for holding keys together', 'accessories', 5.00, NOW()), +('Desk Organizer', 'Tool for sorting and storing desk items', 'furniture', 12.50, NOW()), +('Earbuds', 'Small headphones that fit directly inside the ear', 'electronics', 49.99, NOW()), +('Calendar', 'Physical representation of days and months, often used for scheduling', 'stationery', 10.00, NOW()), +('Umbrella', 'Protective gear against rain or intense sun', 'accessories', 8.99, NOW()), +('Hand Sanitizer', 'Liquid or gel used to decrease infectious agents on hands', 'personal care', 2.50, NOW()), +('Sketchbook', 'Paper-filled book used for drawing or painting', 'stationery', 6.99, NOW()), +('Flash Drive', 'Portable storage device for digital files', 'electronics', 12.00, NOW()), +('Tablet Holder', 'Stand or grip for holding tablets or e-readers', 'electronics', 22.99, NOW()), +('Shampoo', 'Hair care product designed to cleanse the scalp and hair', 'personal care', 7.50, NOW()), +('Wristwatch', 'Time-telling device worn around the wrist', 'accessories', 120.00, NOW()), +('Basketball', 'Spherical sporting equipment used in basketball games', 'sports', 20.00, NOW()), +('Guitar Picks', 'Small flat tool used to strum or pick a guitar', 'music', 5.00, NOW()), +('Thermal Flask', 'Insulated bottle for keeping beverages hot or cold', 'kitchenware', 18.99, NOW()), +('Slippers', 'Soft and light footwear intended for indoor use', 'footwear', 10.00, NOW()), +('Easel', 'Upright support for artists to display or work on canvases', 'art supplies', 45.00, NOW()), +('Bicycle Helmet', 'Protective headgear for cyclists', 'sports', 35.00, NOW()), +('Candle Holder', 'Accessory to safely hold candles when they burn', 'home decor', 15.00, NOW()), +('Cutting Board', 'Durable board on which to place materials for cutting', 'kitchenware', 10.50, NOW()), +('Gardening Gloves', 'Handwear for protection during gardening tasks', 'garden', 8.00, NOW()), +('Alarm Clock', 'Time-telling device with a feature to sound at a specified time', 'electronics', 25.00, NOW()), +('Spatula', 'Flat tool used in cooking for flipping or spreading', 'kitchenware', 3.99, NOW()), +('Jigsaw Puzzle', 'Picture printed on cardboard or wood and cut into pieces to be reassembled', 'toys', 12.99, NOW()), +('Hammock', 'Sling made of fabric or netting, suspended between two points for relaxation', 'outdoor', 40.00, NOW()), +('Luggage Tag', 'Accessory attached to luggage for identification purposes', 'travel', 7.50, NOW()) ;