From c0426b3ebd397f34b7edc991c60ea11e8947792e Mon Sep 17 00:00:00 2001 From: Adam Hendel Date: Tue, 23 Apr 2024 13:30:20 -0500 Subject: [PATCH] doc update (#105) * add doc for self-hosted models * docs * diff model * update model * update sig on docs * update formatting * update table * add docs ci * path * update compose * add path * fix install cmd * add token * only main --------- Co-authored-by: Adam Hendel --- .github/workflows/docs.yml | 23 +++ Makefile | 5 + README.md | 39 ++++- docker-compose.yml | 8 + docs/api/index.md | 1 - docs/api/rag.md | 34 ++-- docs/api/search.md | 32 ++-- docs/api/utilities.md | 10 +- docs/examples/openai_embeddings.md | 16 +- docs/examples/sentence_transformers.md | 16 +- docs/models/index.md | 225 +++++++++++++++++++++++++ extension/Makefile | 11 +- mkdocs.yml | 1 + 13 files changed, 352 insertions(+), 69 deletions(-) create mode 100644 .github/workflows/docs.yml create mode 100644 Makefile create mode 100644 docs/models/index.md diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..d89f155 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,23 @@ +name: Docs CI Workflow +on: + push: + branches: + - main + paths: + - 'docs/**' + - '.github/workflows/docs.yml' +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v2 + with: + python-version: 3.11 + - run: curl -sSL https://install.python-poetry.org | POETRY_VERSION=1.7.1 python3 - + - run: poetry install + + - name: deploy docs + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: poetry run mkdocs gh-deploy --force diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..25bb346 --- /dev/null +++ b/Makefile @@ -0,0 +1,5 @@ +.PHONY: docs + +docs: + poetry install --no-directory --no-root + poetry run mkdocs serve diff --git a/README.md b/README.md index 9ec0955..694a1b8 100644 --- a/README.md +++ b/README.md @@ -125,12 +125,12 @@ Create a job to vectorize the products table. We'll specify the tables primary k ```sql SELECT vectorize.table( - job_name => 'product_search_hf', - "table" => 'products', + job_name => 'product_search_hf', + "table" => 'products', primary_key => 'product_id', - columns => ARRAY['product_name', 'description'], + columns => ARRAY['product_name', 'description'], transformer => 'sentence-transformers/multi-qa-MiniLM-L6-dot-v1', - schedule => 'realtime' + schedule => 'realtime' ); ``` @@ -140,10 +140,10 @@ Then search, ```sql SELECT * FROM vectorize.search( - job_name => 'product_search_hf', - query => 'accessories for mobile devices', - return_columns => ARRAY['product_id', 'product_name'], - num_results => 3 + job_name => 'product_search_hf', + query => 'accessories for mobile devices', + return_columns => ARRAY['product_id', 'product_name'], + num_results => 3 ); ``` @@ -184,6 +184,9 @@ ALTER TABLE products ADD COLUMN context TEXT GENERATED ALWAYS AS (product_name || ': ' || description) STORED; ``` +Initialize the RAG project. + We'll use the `sentence-transformers/all-MiniLM-L12-v2` model to generate embeddings on our source documents. + ```sql SELECT vectorize.init_rag( agent_name => 'product_chat', @@ -194,10 +197,13 @@ SELECT vectorize.init_rag( ); ``` +Now we can ask questions of the `products` table and get responses from the `product_chat` agent using the `openai/gpt-3.5-turbo` generative model. + ```sql SELECT vectorize.rag( agent_name => 'product_chat', - query => 'What is a pencil?' + query => 'What is a pencil?', + chat_model => 'openai/gpt-3.5-turbo' ) -> 'chat_response'; ``` @@ -205,6 +211,21 @@ SELECT vectorize.rag( "A pencil is an item that is commonly used for writing and is known to be most effective on paper." ``` +And to use a locally hosted Ollama service, change the `chat_model` parameter: + +```sql +SELECT vectorize.rag( + agent_name => 'product_chat', + query => 'What is a pencil?', + chat_model => 'ollama/wizardlm2:7b' +) -> 'chat_response'; +``` + +```text +" A pencil is a writing instrument that consists of a solid or gelignola wood core, known as the \"lead,\" encased in a cylindrical piece of breakable material (traditionally wood or plastic), which serves as the body of the pencil. The tip of the body is tapered to a point for writing, and it can mark paper with the imprint of the lead. When used on a sheet of paper, the combination of the pencil's lead and the paper creates a visible mark that is distinct from unmarked areas of the paper. Pencils are particularly well-suited for writing on paper, as they allow for precise control over the marks made." +``` + + :bulb: Note that the `-> 'chat_response'` addition selects for that field of the JSON object output. Removing it will show the full JSON object, including information on which documents were included in the contextual prompt. ## Updating Embeddings diff --git a/docker-compose.yml b/docker-compose.yml index 90e7b56..280d9f0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,3 +17,11 @@ services: - 3001:3001 environment: - OLLAMA_HOST=0.0.0.0:3001 + # deploy: + # replicas: 1 + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: 1 + # capabilities: [gpu] diff --git a/docs/api/index.md b/docs/api/index.md index c85c807..d3b7f8c 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -1,4 +1,3 @@ # PG Vectorize API Overview pg vectorize provides tools for two closely related tasks; vector search and retrieval augmented generation (RAG), and there are APIs dedicated to both of these tasks. Vector search is an important component of RAG and the RAG APIs depend on the vector search APIs. It could be helpful to think of the vector search APIs as lower level than RAG. However, relative to Postgres's APIs, both of these vectorize APIs are very high level. - diff --git a/docs/api/rag.md b/docs/api/rag.md index bcae563..92e9a67 100644 --- a/docs/api/rag.md +++ b/docs/api/rag.md @@ -15,8 +15,8 @@ vectorize.init_rag( "unique_record_id" TEXT, "column" TEXT, "schema" TEXT DEFAULT 'public', - "transformer" TEXT DEFAULT 'text-embedding-ada-002', - "search_alg" vectorize.SimilarityAlg DEFAULT 'pgv_cosine_similarity', + "transformer" TEXT DEFAULT 'openai/text-embedding-ada-002', + "index_dist_type" vectorize.IndexDist DEFAULT 'pgv_hnsw_cosine', "table_method" vectorize.TableMethod DEFAULT 'append' ) RETURNS TEXT ``` @@ -31,18 +31,18 @@ vectorize.init_rag( | column | text | The name of the column that contains the content that is used for context for RAG. | | schema | text | The name of the schema where the table is located. Defaults to 'public'. | | transformer | text | The name of the transformer to use for the embeddings. Defaults to 'text-embedding-ada-002'. | -| search_alg | SimilarityAlg | The name of the search algorithm to use. Defaults to 'pgv_cosine_similarity'. | +| index_dist_type | IndexDist | The name of index type to build. Defaults to 'pgv_hnsw_cosine'. | | table_method | TableMethod | The method to use for the table. Defaults to 'append', which adds a column to the existing table. | Example: ```sql select vectorize.init_rag( - agent_name => 'tembo_chat', - table_name => 'tembo_docs', - unique_record_id => 'document_name', - "column" => 'content', - transformer => 'sentence-transformers/all-MiniLM-L12-v2' + agent_name => 'tembo_chat', + table_name => 'tembo_docs', + unique_record_id => 'document_name', + "column" => 'content', + transformer => 'sentence-transformers/all-MiniLM-L12-v2' ); ``` @@ -56,7 +56,7 @@ select vectorize.init_rag( vectorize."rag"( "agent_name" TEXT, "query" TEXT, - "chat_model" TEXT DEFAULT 'gpt-3.5-turbo', + "chat_model" TEXT DEFAULT 'openai/gpt-3.5-turbo', "task" TEXT DEFAULT 'question_answer', "api_key" TEXT DEFAULT NULL, "num_context" INT DEFAULT 2, @@ -81,10 +81,10 @@ vectorize."rag"( ```sql select vectorize.rag( - agent_name => 'tembo_support', - query => 'what are the major features from the tembo kubernetes operator?', - chat_model => 'gpt-3.5-turbo', - force_trim => 'true' + agent_name => 'tembo_support', + query => 'what are the major features from the tembo kubernetes operator?', + chat_model => 'openai/gpt-3.5-turbo', + force_trim => 'true' ); ``` @@ -112,10 +112,10 @@ Filter the results to just the `chat_response`: ```sql select vectorize.rag( - agent_name => 'tembo_support', - query => 'what are the major features from the tembo kubernetes operator?', - chat_model => 'gpt-3.5-turbo', - force_trim => 'true' + agent_name => 'tembo_support', + query => 'what are the major features from the tembo kubernetes operator?', + chat_model => 'gpt-3.5-turbo', + force_trim => 'true' ) -> 'chat_response'; ``` diff --git a/docs/api/search.md b/docs/api/search.md index 28f1a6b..757d93e 100644 --- a/docs/api/search.md +++ b/docs/api/search.md @@ -15,8 +15,8 @@ vectorize."table"( "args" json DEFAULT '{}', "schema" TEXT DEFAULT 'public', "update_col" TEXT DEFAULT 'last_updated_at', - "transformer" TEXT DEFAULT 'text-embedding-ada-002', - "search_alg" vectorize.SimilarityAlg DEFAULT 'pgv_cosine_similarity', + "transformer" TEXT DEFAULT 'openai/text-embedding-ada-002', + "index_dist_type" vectorize.IndexDist DEFAULT 'pgv_hnsw_cosine', "table_method" vectorize.TableMethod DEFAULT 'join', "schedule" TEXT DEFAULT '* * * * *' ) RETURNS TEXT @@ -32,7 +32,7 @@ vectorize."table"( | schema | text | The name of the schema where the table is located. Defaults to 'public'. | | update_col | text | Column specifying the last time the record was updated. Required for cron-like schedule. Defaults to `last_updated_at` | | transformer | text | The name of the transformer to use for the embeddings. Defaults to 'text-embedding-ada-002'. | -| search_alg | SimilarityAlg | The name of the search algorithm to use. Defaults to 'pgv_cosine_similarity'. | +| index_dist_type | IndexDist | The name of index type to build. Defaults to 'pgv_hnsw_cosine'. | | table_method | TableMethod | `join` to store embeddings in a new table in the vectorize schema. `append` to create columns for embeddings on the source table. Defaults to `join`. | | schedule | text | Accepts a cron-like input for a cron based updates. Or `realtime` to set up a trigger. | @@ -47,12 +47,12 @@ Pass the API key into the function call via `args`. ```sql select vectorize.table( - job_name => 'product_search', - "table" => 'products', + job_name => 'product_search', + "table" => 'products', primary_key => 'product_id', - columns => ARRAY['product_name', 'description'], - transformer => 'text-embedding-ada-002', - args => '{"api_key": "my-openai-key"}' + columns => ARRAY['product_name', 'description'], + transformer => 'openai/text-embedding-ada-002', + args => '{"api_key": "my-openai-key"}' ); ``` @@ -67,11 +67,11 @@ Then call `vectorize.table()` without providing the API key. ```sql select vectorize.table( - job_name => 'product_search', - "table" => 'products', + job_name => 'product_search', + "table" => 'products', primary_key => 'product_id', - columns => ARRAY['product_name', 'description'], - transformer => 'text-embedding-ada-002' + columns => ARRAY['product_name', 'description'], + transformer => 'openai/text-embedding-ada-002' ); ``` @@ -106,10 +106,10 @@ vectorize."search"( ```sql SELECT * FROM vectorize.search( - job_name => 'product_search', - query => 'mobile electronic devices', - return_columns => ARRAY['product_id', 'product_name'], - num_results => 3 + job_name => 'product_search', + query => 'mobile electronic devices', + return_columns => ARRAY['product_id', 'product_name'], + num_results => 3 ); ``` diff --git a/docs/api/utilities.md b/docs/api/utilities.md index 49149df..d650fed 100644 --- a/docs/api/utilities.md +++ b/docs/api/utilities.md @@ -4,12 +4,12 @@ Transforms a block of text to embeddings using the specified transformer. -Requires the `vector-serve` container to be set via `vectorize.embedding_svc_url`, or an OpenAI key to be set if using OpenAI embedding models. +Requires the `vector-serve` container to be set via `vectorize.embedding_service_url`, or an OpenAI key to be set if using OpenAI embedding models. ```sql vectorize."transform_embeddings"( "input" TEXT, - "model_name" TEXT DEFAULT 'text-embedding-ada-002', + "model_name" TEXT DEFAULT 'openai/text-embedding-ada-002', "api_key" TEXT DEFAULT NULL ) RETURNS double precision[] ``` @@ -26,8 +26,8 @@ vectorize."transform_embeddings"( ```sql select vectorize.transform_embeddings( - input => 'the quick brown fox jumped over the lazy dogs', - model_name => 'sentence-transformers/multi-qa-MiniLM-L6-dot-v1' + input => 'the quick brown fox jumped over the lazy dogs', + model_name => 'sentence-transformers/multi-qa-MiniLM-L6-dot-v1' ); {-0.2556323707103729,-0.3213586211204529 ..., -0.0951206386089325} @@ -37,7 +37,7 @@ select vectorize.transform_embeddings( Configure `vectorize` to run on a database other than the default `postgres`. -Note that when making this change, it's also required to update `pg_cron` such that its corresponding background workers also connect to the appropriate database. +Note that when making this change, it's also required to update `pg_cron` such that its corresponding background workers also connect to the appropriate database. ### Example diff --git a/docs/examples/openai_embeddings.md b/docs/examples/openai_embeddings.md index f3afdda..8681cde 100644 --- a/docs/examples/openai_embeddings.md +++ b/docs/examples/openai_embeddings.md @@ -22,11 +22,11 @@ Then create the job. ```sql SELECT vectorize.table( - job_name => 'product_search_openai', - "table" => 'products', + job_name => 'product_search_openai', + "table" => 'products', primary_key => 'product_id', - columns => ARRAY['product_name', 'description'], - transformer => 'text-embedding-ada-002' + columns => ARRAY['product_name', 'description'], + transformer => 'openai/text-embedding-ada-002' ); ``` @@ -34,10 +34,10 @@ To search the table, use the `vectorize.search` function. ```sql SELECT * FROM vectorize.search( - job_name => 'product_search_openai', - query => 'accessories for mobile devices', - return_columns => ARRAY['product_id', 'product_name'], - num_results => 3 + job_name => 'product_search_openai', + query => 'accessories for mobile devices', + return_columns => ARRAY['product_id', 'product_name'], + num_results => 3 ); ``` diff --git a/docs/examples/sentence_transformers.md b/docs/examples/sentence_transformers.md index 8f7b6cd..fee8733 100644 --- a/docs/examples/sentence_transformers.md +++ b/docs/examples/sentence_transformers.md @@ -53,12 +53,12 @@ Create a job to vectorize the products table. We'll specify the tables primary k ```sql SELECT vectorize.table( - job_name => 'product_search_hf', - "table" => 'products', + job_name => 'product_search_hf', + "table" => 'products', primary_key => 'product_id', - columns => ARRAY['product_name', 'description'], + columns => ARRAY['product_name', 'description'], transformer => 'sentence-transformers/multi-qa-MiniLM-L6-dot-v1', - scheduler => 'realtime' + scheduler => 'realtime' ); ``` @@ -68,10 +68,10 @@ Then search, ```sql SELECT * FROM vectorize.search( - job_name => 'product_search_hf', - query => 'accessories for mobile devices', - return_columns => ARRAY['product_id', 'product_name'], - num_results => 3 + job_name => 'product_search_hf', + query => 'accessories for mobile devices', + return_columns => ARRAY['product_id', 'product_name'], + num_results => 3 ); search_results diff --git a/docs/models/index.md b/docs/models/index.md new file mode 100644 index 0000000..a8f8301 --- /dev/null +++ b/docs/models/index.md @@ -0,0 +1,225 @@ +# Supported Transformers and Generative Models + +pg_vectorize provides hooks into two types of models; `text-to-embedding` transformer models and `text-generation` models. + Whether a model is a text-to-embedding transformer or a text generation model, the models are always referenced from SQL using the following syntax: + +`${provider}/${model-name}` + +A few illustrative examples: + +- `openai/text-embedding-ada-002` is one of OpenAI's earliest [embedding](https://platform.openai.com/docs/models/embeddings) models +- `openai/gpt-3.5-turbo-instruct` is a [text generation](https://platform.openai.com/docs/models/gpt-3-5-turbo) model from OpenAI. +- `ollama/wizardlm2:7b` is a language model hosted in [Ollama](https://ollama.com/library/wizardlm2:7b) and developed by MicrosoftAI. +- `sentence-transformers/all-MiniLM-L12-v2` is a text-to-embedding model from [SentenceTransformers](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2). + +## Text-to-Embedding Models + +pg_vectorize provides hooks into the following tex-to-embedding models: + +- OpenAI (public API) +- SentenceTransformers (self-hosted) + +The transformer model that you want to be used is specified in a parameter in various functions in this project, + +For example, the `sentence-transformers` provider has a model named `all-MiniLM-L12-v2`. + The model name is `sentence-transformers/all-MiniLM-L12-v2`. To use openai's `text-embedding-ada-002`, + the model name is `openai/text-embedding-ada-002`. + +### SentenceTransformers + +[SentenceTransformers](https://sbert.net/) is a Python library for computing text embeddings. + pg_vectorize provides a container image that implements the SentenceTransformer library beyind a REST API. + The container image is pre-built with `sentence-transformers/all-mini-LM-L12-v2` pre-cached. + Models that are not pre-cached will be downloaded on first use and cached for subsequent use. + +When calling the model server from Postgres, the url to the model server must first be set in the `vectorize.embedding_service_url` configuration parameter. + Assuming the model server is running on the same host as Postgres, you would set the following: + +```sql +ALTER SYSTEM SET vectorize.embedding_service_url TO 'http://localhost:3000/v1/embeddings'; +SELECT pg_reload_conf(); +``` + +#### Running the model server + +You can run this model server locally by executing + +```bash +docker compose up vector-serve -d +``` + +Then call it with simple curl commands: + +#### Calling with curl + +```bash +curl -X POST http://localhost:3000/v1/embeddings \ + -H 'Content-Type: application/json' \ + -d '{"input": ["solar powered mobile electronics accessories without screens"], + "model": "sentence-transformers/all-MiniLM-L12-v2"}' +``` + +```plaintext +{ + "data": [ + { + "embedding": [ + -0.07903402298688889, + 0.028912536799907684, + -0.018827738240361214, + -0.013423092663288116, + -0.06503172218799591, + ....384 total elements + ], + "index": 0 + } + ], + "model": "all-MiniLM-L12-v2" +} +``` + +We can change the model name to any of the models supported by SentenceTransformers, and it will be downloaded on-the-fly. + +```bash +curl -X POST http://localhost:3000/v1/embeddings \ + -H 'Content-Type: application/json' \ + -d '{"input": ["solar powered mobile electronics accessories without screens"], + "model": "sentence-transformers/sentence-t5-base"}' +``` + +```plaintext +{ + "data": [ + { + "embedding": [ + -0.07903402298688889, + 0.028912536799907684, + -0.018827738240361214, + -0.013423092663288116, + -0.06503172218799591, + ....384 total elements + ], + "index": 0 + } + ], + "model": "sentence-transformers/sentence-t5-base" +} +``` + +#### Calling with SQL + +We can also call the model server from SQL using the `pg_vectorize.transform_embeddings` function. + +Model name support rules apply the same. + +```sql +select vectorize.transform_embeddings( + input => 'the quick brown fox jumped over the lazy dogs', + model_name => 'sentence-transformers/multi-qa-MiniLM-L6-dot-v1' +); +``` + +```plaintext +{-0.2556323707103729,-0.3213586211204529 ..., -0.0951206386089325} +``` + +### OpenAI + +OpenAI embedding models are hosted by OpenAI's public API. + You just need to have an API key of your own, and it can be set with: + +```sql +ALTER SYSTEM SET vectorize.openai_key TO ''; + +SELECT pg_reload_conf(); +``` + +To call the `text-embedding-ada-002` from OpenAI: + +```sql +select vectorize.transform_embeddings( + input => 'the quick brown fox jumped over the lazy dogs', + model_name => 'openai/text-embedding-ada-002' +); +``` + +To call `text-embedding-3-large` + +```sql +select vectorize.transform_embeddings( + input => 'the quick brown fox jumped over the lazy dogs', + model_name => 'openai/text-embedding-3-large' +); +``` + +## Text Generation Models + +pg_vectorize provides hooks into the following text generation models: + +- OpenAI (public API) +- Ollama (self-hosted) + +### Ollama Generative Models + +To run the self-hosted Ollama models, you must first start the model server: + +```bash +docker compose up ollama-serve -d +``` + +This starts an Ollama server pre-loaded with the `wizardlm2:7b` model. + +#### Calling with `curl` + +Once the Ollama server is running, you can call it directly with `curl`: + +```bash +curl http://localhost:3001/api/generate -d '{ + "model": "wizardlm2:7b", + "prompt": "What is Postgres?" +}' +``` + +#### Calling with SQL + +First set the url to the Ollama server: + +```sql +ALTER SYSTEM set vectorize.ollama_service_url TO 'http://localhost:3001`; +SELECT pg_reload_conf(); +``` + +The text-generation models are available as part of the [RAG](../api/rag.md) API. + To call the models provided by the self-hosted Ollama container, + pass the model name into the `chat_model` parameter. + +```sql +SELECT vectorize.rag( + agent_name => 'product_chat', + query => 'What is a pencil?', + chat_model => 'ollama/wizardlm2:7b' +); +``` + +#### Loading new Ollama models + +While Ollama server comes preloaded with `wizardlm2:7b`, we can load and model supported by Ollama by calling the `/api/pull` endpoint. + The service is compatible with all models available in the [Ollama library](https://ollama.com/library). + +To pull Llama 3: + +```bash +curl http://localhost:3001/api/pull -d '{ + "name": "llama3" +}' +``` + +Then use that model in your RAG application: + +```sql +SELECT vectorize.rag( + agent_name => 'product_chat', + query => 'What is a pencil?' + chat_model => 'ollama/llama3' +); +``` diff --git a/extension/Makefile b/extension/Makefile index 1336419..3706247 100644 --- a/extension/Makefile +++ b/extension/Makefile @@ -34,8 +34,13 @@ pgxn-zip: $(DISTNAME)-$(DISTVERSION).zip clean: @rm -rf META.json $(DISTNAME)-$(DISTVERSION).zip -setup: install-pg_cron install-pgvector install-pgmq +setup.dependencies: install-pg_cron install-pgvector install-pgmq +setup.shared_preload_libraries: echo "shared_preload_libraries = 'pg_cron, vectorize'" >> ~/.pgrx/data-${PG_VERSION}/postgresql.conf +setup.urls: + echo "vectorize.embedding_service_url = 'http://localhost:3000/v1/embeddings'" >> ~/.pgrx/data-${PG_VERSION}/postgresql.conf + echo "vectorize.ollama_service_url = 'http://localhost:3001'" >> ~/.pgrx/data-${PG_VERSION}/postgresql.conf +setup: setup.dependencies setup.shared_preload_libraries cat-logs: cat ~/.pgrx/${PG_VERSION}.log @@ -81,7 +86,3 @@ test-update: echo "\q" | make run psql ${DATABASE_URL} -c "ALTER EXTENSION vectorize UPDATE" make test-integration - -docs: - poetry install --no-directory --no-root - poetry run mkdocs serve diff --git a/mkdocs.yml b/mkdocs.yml index 58ee404..6b161cc 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -24,6 +24,7 @@ nav: - 'examples/sentence_transformers.md' - 'examples/openai_embeddings.md' - 'examples/scheduling.md' + - Model Providers: 'models/index.md' markdown_extensions: - toc: permalink: true