tembo-io · jasonmp85 · Sep 26, 2024 · Jun 18, 2024
@@ -2,7 +2,7 @@
   "name": "timeseries",
   "abstract": "Convenience API for Tembo time series stack",
   "description": "Convenience API for Tembo time series stack",
-  "version": "0.1.5",
+  "version": "0.1.6",
   "maintainer": [
     "Jason Petersen <jason@tembo.io>"
   ],
@@ -12,7 +12,7 @@
       "abstract": "Convenience API for Tembo time series stack",
       "file": "sql/timeseries.sql",
       "docfile": "doc/timeseries.md",
-      "version": "0.1.5"
+      "version": "0.1.6"
     }
   },
   "prereqs": {
@@ -23,6 +23,7 @@
       "recommends": {
         "columnar": "1.1.2",
         "pg_cron": "1.6.2",
+        "pg_ivm": "1.8",
         "pg_partman": "4.5.1"
       }
     }

@@ -5,7 +5,7 @@ EXTVERSION   = $(shell grep -m 1 '[[:space:]]\{6\}"version":' META.json | \
 DISTVERSION  = $(shell grep -m 1 '[[:space:]]\{3\}"version":' META.json | \
                sed -e 's/[[:space:]]*"version":[[:space:]]*"\([^"]*\)",\{0,1\}/\1/')
 
-EXTVERSIONS = 0.1.5
+EXTVERSIONS = 0.1.6
 
 DATA 		 = $(wildcard sql/*--*.sql)
 DATA_built   = $(foreach v,$(EXTVERSIONS),sql/$(EXTENSION)--$(v).sql)
@@ -30,5 +30,5 @@ latest-changes.md: Changes
 
 # generate each version's file installation file by concatenating
 # previous upgrade scripts
-sql/$(EXTENSION)--0.1.5.sql: sql/$(EXTENSION).sql
+sql/$(EXTENSION)--0.1.6.sql: sql/$(EXTENSION).sql
 	cat $^ > $@
@@ -6,7 +6,7 @@ description = "Open source timeseries extension for Postgres."
 homepage = "https://github.com/tembo-io/pg_timeseries"
 documentation = "https://github.com/tembo-io/pg_timeseries"
 categories = ["analytics"]
-version = "0.1.5"
+version = "0.1.6"
 
 [build]
 postgres_version = "15"

@@ -4,7 +4,7 @@ In this guide, you will become familiar with the functions and features of the t
 
 ## Preparing your database
 
-You'll need a PostgreSQL instance running timeseries `0.1.5` or later. An easy way to have one set up for you is to deploy one from Tembo Cloud [here](https://cloud.tembo.io). The free tier will perform well enough for the data set we'll be using.
+You'll need a PostgreSQL instance running timeseries `0.1.6` or later. An easy way to have one set up for you is to deploy one from Tembo Cloud [here](https://cloud.tembo.io). The free tier will perform well enough for the data set we'll be using.
 
 Once that's up and running, you'll need a client machine with `psql` (to connect to your database) and [the Divvy dataset](https://tembo-demo-bucket.s3.amazonaws.com/202004--202402-divvy-tripdata-slim.csv), which will total about 50MiB of CSV after decompression.
 

@@ -132,6 +132,30 @@ Clears the compression policy for a table (so its data will never be dropped).
 
 `interval`, the previous policy for this table, or `NULL` if none was set.
 
+## Incremental Views
+
+These functions deal with the creation and maintenance of incremental views, or more verbosely, _incrementally maintainable materialized views_ (IMMVs). The underlying functionality is provided the the `pg_ivm` ("PostgreSQL Incremental View Maintenance") project, which handles applying changes toward any partial aggregates and groups which are affected by incoming `INSERT`, `UPDATE`, `DELETE`, etc. (DML) statements.
+
+Within `pg_timeseries`, it is expected that these functions be applied against existing time-series enhanced tables.
+
+### `make_view_incremental`
+
+This function takes an existing view—typically used to calculate aggregates across a time-series data set—and makes it into an incremental view: one whose aggregates are stored in a materialized fashion, but which is kept up-to-date as every individual modification to the underlying table is processed.
+
+After this function exits, the same view will exist, but will now point at an _incrementally maintainable materialized view_. This view maintains its correctness through a series of statement triggers on the time-series table, meaning that for maximum performance, some degree of row batching should be used for ingest.
+
+#### Arguments
+
+  * `target_view_id` (`regclass`), **required** — a view aggregating a single existing time-series table
+
+#### Considerations
+
+Views which reference more than one table are presently forbidden, though this restriction may be lifted at a later date.
+
+#### Returns
+
+`void`. If any problems are encountered, an error will be raised.
+
 ## Analytics Functions
 
 These functions are not related to the maintenance of time-series tables, but do sometimes rely on the related metadata to function. They are intended to make time-series queries easier to read and maintain.

@@ -613,3 +613,80 @@ CREATE AGGREGATE locf(anyelement) (
   SFUNC = locf_agg,
   STYPE = anyelement
 );
+
+-- A wrapper around the incremental view functionality provided by pg_ivm. This
+-- function accepts a pointer to an existing view (presumably one that queries
+-- a time-series table) and rewrites that view to point at an underlying IMMV
+-- instead. The purpose of this is to avoid exposing users to implementation-
+-- specific columns used to track partial aggregate state within IMMV tables:
+-- these columns may confuse users or interfere with software which expects
+-- all visible columns to be meaningful to an application (say, dashboards).
+--
+-- For the record, IMMV means "incrementally maintainable materialized view",
+-- though at the moment these objects are actually plain tables. They are
+-- created by calling functions from the pg_ivm project ("PostgreSQL incre-
+-- mental view maintenance"). After creation, these views automatically keep
+-- aggregates up-to-date with the latest data written to any referenced tables.
+-- This is an improvement over plain views (which reaggregate on every scan)
+-- and materialized views (which require periodic calls to REFRESH).
+--
+-- Within this wrapper, the provided view's SQL is extracted, then a new IMMV
+-- is created using the extracted query. Because this IMMV will have at least
+-- one (sometimes two or more) internal column per aggregate (beginning with
+-- the prefix '__ivm_'), it is desirable to allow the user to query a wrapper
+-- view instead. So the original view's query is redefined to point at the
+-- IMMV table, but restricted solely to the visible columns of the original
+-- query. Big picture: the user can create an aggregating view, check it
+-- quickly (with a LIMIT, for instance), then make it incremental once they
+-- have verified it's producing the aggregate rows they want.
+CREATE OR REPLACE FUNCTION public.make_view_incremental(target_view_id regclass)
+ RETURNS void
+ LANGUAGE plpgsql
+AS $function$
+DECLARE
+  orig_view_sql text;
+  immv_name text;
+  columns_sql text;
+  old_client_msg text;
+  old_log_msg text;
+BEGIN
+  -- check that target_view_id is actually a view
+  -- check that target_view_id mentions only one table
+
+  -- get the original SQL definition of the view to be upgraded
+  SELECT pg_get_viewdef(target_view_id) INTO orig_view_sql;
+
+  -- build the name we'll use for the new immv
+  SELECT format('%I.%I', n.nspname, c.relname || '_immv')
+    INTO immv_name
+    FROM pg_class c
+    LEFT JOIN pg_namespace n
+      ON n.oid = c.relnamespace
+    WHERE c.oid=target_view_id;
+
+  -- pg_ivm is chatty, even at NOTICE, so turn it down
+  SELECT current_setting('client_min_messages') INTO old_client_msg;
+  SELECT current_setting('log_min_messages') INTO old_log_msg;
+  SET LOCAL client_min_messages TO warning;
+  SET LOCAL log_min_messages TO warning;
+
+  -- create an immv with the original view's SQL
+  PERFORM create_immv(immv_name, orig_view_sql);
+
+  -- restore previous client/log levels
+  EXECUTE format('SET LOCAL client_min_messages TO %s', old_client_msg);
+  EXECUTE format('SET LOCAL log_min_messages TO %s', old_log_msg);
+
+  -- build a list of visible columns for the view
+  SELECT string_agg(quote_ident(attname), ', ' ORDER BY attnum ASC)
+    INTO columns_sql
+    FROM pg_attribute
+    WHERE attnum > 0 AND
+          NOT attisdropped AND
+          attrelid=target_view_id;
+
+  -- modify the existing view to point at the immv, hiding internal columns
+  EXECUTE format('CREATE OR REPLACE VIEW %I AS SELECT %s FROM %s',
+                 target_view_id, columns_sql, immv_name);
+END;
+$function$;
@@ -238,3 +238,68 @@ ORDER BY 3;
              2 | 1.9000000000000001 | Wed Nov 04 15:59:00 2020 PST
 (9 rows)
 
+CREATE VIEW events_5m AS
+  SELECT
+    user_id,
+    date_bin('5 minutes',
+             event_time,
+             TIMESTAMPTZ '1970-01-01') AS event_time,
+    max(value),
+    min(value)
+    FROM events
+    GROUP BY 1, 2;
+CREATE VIEW events_totals AS
+  SELECT
+    user_id,
+    sum(value),
+    count(user_id)
+  FROM events
+  GROUP BY 1;
+SELECT make_view_incremental('events_5m');
+ make_view_incremental 
+-----------------------
+
+(1 row)
+
+SELECT make_view_incremental('events_totals');
+ make_view_incremental 
+-----------------------
+
+(1 row)
+
+SELECT * FROM events_5m ORDER BY 1, 2;
+ user_id |          event_time          | max | min 
+---------+------------------------------+-----+-----
+       1 | Wed Nov 04 15:50:00 2020 PST | 1.2 | 1.1
+       1 | Wed Nov 04 15:55:00 2020 PST | 1.6 | 1.3
+       2 | Wed Nov 04 15:50:00 2020 PST | 1.8 | 1.7
+       2 | Wed Nov 04 15:55:00 2020 PST | 2.2 | 1.9
+(4 rows)
+
+SELECT * FROM events_totals ORDER BY 1;
+ user_id | sum  | count 
+---------+------+-------
+       1 |  8.1 |     6
+       2 | 11.7 |     6
+(2 rows)
+
+INSERT INTO events VALUES (3, 1, '2020-11-04 15:51:02.226999-08', 1.1);
+DELETE FROM events WHERE event_id = 12;
+SELECT * FROM events_5m ORDER BY 1, 2;
+ user_id |          event_time          | max | min 
+---------+------------------------------+-----+-----
+       1 | Wed Nov 04 15:50:00 2020 PST | 1.2 | 1.1
+       1 | Wed Nov 04 15:55:00 2020 PST | 1.6 | 1.3
+       2 | Wed Nov 04 15:50:00 2020 PST | 1.8 | 1.7
+       2 | Wed Nov 04 15:55:00 2020 PST | 2.1 | 1.9
+       3 | Wed Nov 04 15:50:00 2020 PST | 1.1 | 1.1
+(5 rows)
+
+SELECT * FROM events_totals ORDER BY 1;
+ user_id | sum | count 
+---------+-----+-------
+       1 | 8.1 |     6
+       2 | 9.5 |     5
+       3 | 1.1 |     1
+(3 rows)
+
@@ -99,3 +99,34 @@ FROM date_bin_table(NULL::events, '1 minute',
                     '(2020-11-04 15:50:00-08, 2020-11-04 16:00:00-08)')
 GROUP BY 3
 ORDER BY 3;
+
+CREATE VIEW events_5m AS
+  SELECT
+    user_id,
+    date_bin('5 minutes',
+             event_time,
+             TIMESTAMPTZ '1970-01-01') AS event_time,
+    max(value),
+    min(value)
+    FROM events
+    GROUP BY 1, 2;
+
+CREATE VIEW events_totals AS
+  SELECT
+    user_id,
+    sum(value),
+    count(user_id)
+  FROM events
+  GROUP BY 1;
+
+SELECT make_view_incremental('events_5m');
+SELECT make_view_incremental('events_totals');
+
+SELECT * FROM events_5m ORDER BY 1, 2;
+SELECT * FROM events_totals ORDER BY 1;
+
+INSERT INTO events VALUES (3, 1, '2020-11-04 15:51:02.226999-08', 1.1);
+DELETE FROM events WHERE event_id = 12;
+
+SELECT * FROM events_5m ORDER BY 1, 2;
+SELECT * FROM events_totals ORDER BY 1;
@@ -1,5 +1,5 @@
 # time series extension
 comment = 'Convenience API for Tembo time series stack'
-default_version = '0.1.5'
+default_version = '0.1.6'
 module_pathname = '$libdir/timeseries'
-requires = 'columnar, pg_cron, pg_partman'
+requires = 'columnar, pg_cron, pg_ivm, pg_partman'