Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use assignment instead of select into to set variables in last_agg and first_agg #22

Merged
merged 1 commit into from
May 29, 2024

Conversation

brianpursley
Copy link
Contributor

I don't know if there is a specific reason to use select into versus assignment to set old_rank, but changing it to use assignment cuts execution time in half for me: 3.8s --> 1.8s in the test below, and similar relative improvements on larger tests I ran.


CREATE TABLE sensor_readings (
    timestamp TIMESTAMPTZ NOT NULL,
    location_id INT NOT NULL,
    temperature DECIMAL(5, 2) NOT NULL,
    humidity DECIMAL(5, 2) NOT NULL,
    voc DECIMAL(7, 2) NOT NULL
) PARTITION BY RANGE (timestamp);
CREATE INDEX idx_sensor_readings_timestamp ON sensor_readings (timestamp);
CLUSTER sensor_readings USING idx_sensor_readings_timestamp;
CREATE INDEX idx_sensor_readings_location_id ON sensor_readings (location_id);
SELECT enable_ts_table('sensor_readings');

DO
$$
DECLARE
    start_date TIMESTAMPTZ := '2024-01-01 00:00:00+00';
    end_date TIMESTAMPTZ := '2024-05-01 00:00:00+00';
    c TIMESTAMPTZ;
    location_id INT;
    temperature DECIMAL(5, 2);
    humidity DECIMAL(5, 2);
    voc DECIMAL(7, 2);
BEGIN
    c := start_date;
    WHILE c < end_date LOOP
        FOR location_id IN 1..10 LOOP
            IF random() > 0.1 THEN
                temperature := 20 + (random() * 15);
                humidity := 30 + (random() * 70);
                voc := 100 + (random() * 900);
                INSERT INTO sensor_readings (timestamp, location_id, temperature, humidity, voc)
                VALUES (c, location_id, temperature, humidity, voc);
            END IF;
        END LOOP;
        c := c + INTERVAL '5 minute';
    END LOOP;
END;
$$;

select count(*) from sensor_readings;

/*
 count  
--------
 313733
(1 row)
*/

select location_id, last(temperature, timestamp) from sensor_readings group by location_id order by location_id;

/*
 location_id | last  
-------------+-------
           1 | 20.48
           2 | 25.03
           3 | 26.39
           4 | 33.50
           5 | 31.57
           6 | 20.93
           7 | 34.23
           8 | 22.32
           9 | 32.09
          10 | 27.67
(10 rows)
*/

explain analyze select location_id, last(temperature, timestamp) from sensor_readings group by location_id order by location_id;

/*
                                                                             QUERY PLAN                                                                              
---------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=89193.02..89193.52 rows=200 width=36) (actual time=3841.259..3841.266 rows=10 loops=1)
   Sort Key: sensor_readings.location_id
   Sort Method: quicksort  Memory: 25kB
   ->  HashAggregate  (cost=89133.38..89185.38 rows=200 width=36) (actual time=3841.230..3841.256 rows=10 loops=1)
         Group Key: sensor_readings.location_id
         Batches: 1  Memory Usage: 40kB
         ->  Append  (cost=0.00..7552.39 rows=323093 width=18) (actual time=0.007..130.770 rows=313733 loops=1)
               ->  Seq Scan on sensor_readings_p20240423 sensor_readings_1  (cost=0.00..333.33 rows=18133 width=18) (actual time=0.007..2.595 rows=18133 loops=1)
               ->  Seq Scan on sensor_readings_p20240430 sensor_readings_2  (cost=0.00..47.94 rows=2594 width=18) (actual time=0.009..0.323 rows=2594 loops=1)
               ->  Seq Scan on sensor_readings_p20240507 sensor_readings_3  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.003..0.003 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240514 sensor_readings_4  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.002..0.002 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240521 sensor_readings_5  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.003..0.003 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240528 sensor_readings_6  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.002..0.003 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240604 sensor_readings_7  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.002..0.002 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240611 sensor_readings_8  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.002..0.002 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240618 sensor_readings_9  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.002..0.002 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240625 sensor_readings_10  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.002..0.002 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240702 sensor_readings_11  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.002..0.002 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_default sensor_readings_12  (cost=0.00..5372.06 rows=293006 width=18) (actual time=0.004..36.851 rows=293006 loops=1)
 Planning Time: 0.258 ms
 Execution Time: 3841.316 ms
(21 rows)
*/

CREATE OR REPLACE FUNCTION public.last_agg(state text[], value anyelement, rank anycompatible)
 RETURNS text[]
 LANGUAGE plpgsql
AS $function$
DECLARE
  new_state text[] := state;
  old_rank rank%TYPE;
BEGIN
  IF rank IS NULL THEN
    RETURN new_state;
  END IF;

  old_rank := state[2];
  IF (state IS NULL) OR
     (rank > old_rank) THEN
    new_state := ARRAY[value::text, rank::text];
  END IF;

  RETURN new_state;
END;
$function$;

select location_id, last(temperature, timestamp) from sensor_readings group by location_id order by location_id;

/*
 location_id | last  
-------------+-------
           1 | 20.48
           2 | 25.03
           3 | 26.39
           4 | 33.50
           5 | 31.57
           6 | 20.93
           7 | 34.23
           8 | 22.32
           9 | 32.09
          10 | 27.67
(10 rows)
*/

explain analyze select location_id, last(temperature, timestamp) from sensor_readings group by location_id order by location_id;

/*
                                                                              QUERY PLAN                                                                              
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=89193.02..89193.52 rows=200 width=36) (actual time=1822.059..1822.067 rows=10 loops=1)
   Sort Key: sensor_readings.location_id
   Sort Method: quicksort  Memory: 25kB
   ->  HashAggregate  (cost=89133.38..89185.38 rows=200 width=36) (actual time=1822.028..1822.058 rows=10 loops=1)
         Group Key: sensor_readings.location_id
         Batches: 1  Memory Usage: 40kB
         ->  Append  (cost=0.00..7552.39 rows=323093 width=18) (actual time=0.007..353.218 rows=313733 loops=1)
               ->  Seq Scan on sensor_readings_p20240423 sensor_readings_1  (cost=0.00..333.33 rows=18133 width=18) (actual time=0.007..2.070 rows=18133 loops=1)
               ->  Seq Scan on sensor_readings_p20240430 sensor_readings_2  (cost=0.00..47.94 rows=2594 width=18) (actual time=0.011..0.318 rows=2594 loops=1)
               ->  Seq Scan on sensor_readings_p20240507 sensor_readings_3  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.003..0.003 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240514 sensor_readings_4  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.002..0.002 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240521 sensor_readings_5  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.003..0.003 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240528 sensor_readings_6  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.003..0.003 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240604 sensor_readings_7  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.002..0.003 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240611 sensor_readings_8  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.002..0.002 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240618 sensor_readings_9  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.002..0.002 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240625 sensor_readings_10  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.002..0.002 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_p20240702 sensor_readings_11  (cost=0.00..20.40 rows=1040 width=24) (actual time=0.002..0.002 rows=0 loops=1)
               ->  Seq Scan on sensor_readings_default sensor_readings_12  (cost=0.00..5372.06 rows=293006 width=18) (actual time=0.005..262.178 rows=293006 loops=1)
 Planning Time: 0.250 ms
 Execution Time: 1822.119 ms
(21 rows)
*/

@ChuckHend ChuckHend merged commit e5a59ec into tembo-io:main May 29, 2024
13 of 14 checks passed
@ChuckHend
Copy link
Member

Thank you @brianpursley

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants