Hello,
I am having trouble with a monthly Continuous Aggregate on top of a daily one. I create the cagg with the WITH NO DATA
option. At that point, when querying the cagg it has the correct data. But as soon as I manually refresh the cagg with refresh_continuous_aggregate
it has wrong data and one time bucket is duplicated (once with the correct/expected data and once with the wrong data).
I am using timescaledb v2.10.2 and postgresql version 14 (running the timescale/timescaledb-ha:pg14-latest docker image)
Here is what I have done:
CREATE TABLE object_scans (
id uuid NOT NULL,
scanned_at timestamp(0) NOT NULL,
product_version_id uuid NULL,
campaign_version_id uuid NOT NULL,
created_at timestamp(0) NULL,
updated_at timestamp(0) NULL,
);
SELECT create_hypertable('object_scans', 'scanned_at', chunk_time_interval => INTERVAL '1 day');
/* Inserting some test data with copy to and copy from with the following query
SELECT
gen_random_uuid() as id,
scanned_at,
'99c15bea-39d0-454e-8c02-58076a6ddd2f' as product_version_id,
'99c15bea-3763-40cf-891a-7ffbde74e38d' as campaign_version_id,
scanned_at as created_at,
scanned_at as updated_at
FROM generate_series('2023-01-01 00:00:00', '2023-11-16 13:00:00', INTERVAL '31 minutes') as scanned_at;
*/
-- Creating the daily cagg
CREATE MATERIALIZED VIEW scans_daily
WITH (timescaledb.continuous) AS
SELECT "object_scans"."campaign_version_id" as "campaign_version_id",
"object_scans"."product_version_id" AS "product_version_id",
time_bucket(INTERVAL '1 day', "scanned_at") AS "time_bucket",
COUNT(DISTINCT "object_scans"."id") AS "scans"
FROM "object_scans"
GROUP BY "campaign_version_id", "product_version_id", "time_bucket"
WITH NO DATA;
CALL refresh_continuous_aggregate('scans_daily', NULL, localtimestamp - INTERVAL '2 days');
-- This now has correct data
SELECT time_bucket, scans FROM scans_daily
WHERE time_bucket >= '2023-10-01';
Select results
time_bucket |scans|
-----------------------+-----+
2023-10-01 00:00:00.000| 46|
2023-10-02 00:00:00.000| 47|
2023-10-03 00:00:00.000| 46|
2023-10-04 00:00:00.000| 47|
2023-10-05 00:00:00.000| 46|
2023-10-06 00:00:00.000| 46|
2023-10-07 00:00:00.000| 47|
2023-10-08 00:00:00.000| 46|
2023-10-09 00:00:00.000| 47|
2023-10-10 00:00:00.000| 46|
2023-10-11 00:00:00.000| 47|
2023-10-12 00:00:00.000| 46|
2023-10-13 00:00:00.000| 47|
2023-10-14 00:00:00.000| 46|
2023-10-15 00:00:00.000| 47|
2023-10-16 00:00:00.000| 46|
2023-10-17 00:00:00.000| 46|
2023-10-18 00:00:00.000| 47|
2023-10-19 00:00:00.000| 46|
2023-10-20 00:00:00.000| 47|
2023-10-21 00:00:00.000| 46|
2023-10-22 00:00:00.000| 47|
2023-10-23 00:00:00.000| 46|
2023-10-24 00:00:00.000| 47|
2023-10-25 00:00:00.000| 46|
2023-10-26 00:00:00.000| 47|
2023-10-27 00:00:00.000| 46|
2023-10-28 00:00:00.000| 46|
2023-10-29 00:00:00.000| 47|
2023-10-30 00:00:00.000| 46|
2023-10-31 00:00:00.000| 47|
2023-11-01 00:00:00.000| 46|
2023-11-02 00:00:00.000| 47|
2023-11-03 00:00:00.000| 46|
2023-11-04 00:00:00.000| 47|
2023-11-05 00:00:00.000| 46|
2023-11-06 00:00:00.000| 46|
2023-11-07 00:00:00.000| 47|
2023-11-08 00:00:00.000| 46|
2023-11-09 00:00:00.000| 47|
2023-11-10 00:00:00.000| 46|
2023-11-11 00:00:00.000| 47|
2023-11-12 00:00:00.000| 46|
2023-11-13 00:00:00.000| 47|
2023-11-14 00:00:00.000| 46|
2023-11-15 00:00:00.000| 47|
2023-11-16 00:00:00.000| 36|
-- Create monthly cagg
CREATE MATERIALIZED VIEW scans_monthly
WITH (timescaledb.continuous) AS
SELECT "scans_daily"."campaign_version_id" as "campaign_version_id",
"scans_daily"."product_version_id" AS "product_version_id",
time_bucket(INTERVAL '1 month', "scans_daily"."time_bucket") AS "time_bucket",
SUM("scans_daily"."scans") AS "scans"
FROM "scans_daily_by_product"
GROUP BY "scans_daily"."campaign_version_id", "scans_daily"."product_version_id", time_bucket(INTERVAL '1 month', "scans_daily"."time_bucket")
WITH NO DATA;
-- Querying the monthly cagg now gives the correct scans amount
SELECT time_bucket, scans from scans_monthly;
Select results
time_bucket |scans|
-----------------------+-----+
2023-01-01 00:00:00.000| 1440|
2023-02-01 00:00:00.000| 1301|
2023-03-01 00:00:00.000| 1440|
2023-04-01 00:00:00.000| 1394|
2023-05-01 00:00:00.000| 1440|
2023-06-01 00:00:00.000| 1393|
2023-07-01 00:00:00.000| 1440|
2023-08-01 00:00:00.000| 1440|
2023-09-01 00:00:00.000| 1394|
2023-10-01 00:00:00.000| 1440|
2023-11-01 00:00:00.000| 733|
Now when refreshing the monthly cagg manually, strange things start to happen.
-- refresh cagg
CALL refresh_continuous_aggregate('scans_monthly', NULL, localtimestamp - INTERVAL '1 day');
-- Querying the monthly cagg now gives the incorrect values and duplicated time bucket
SELECT time_bucket, scans from scans_monthly;
Select results
time_bucket |scans|
-----------------------+-----+
2023-01-01 00:00:00.000| 1440|
2023-02-01 00:00:00.000| 1301|
2023-03-01 00:00:00.000| 1440|
2023-04-01 00:00:00.000| 1394|
2023-05-01 00:00:00.000| 1440|
2023-06-01 00:00:00.000| 1393|
2023-07-01 00:00:00.000| 1440|
2023-08-01 00:00:00.000| 1440|
2023-09-01 00:00:00.000| 1394|
2023-10-01 00:00:00.000| 1440|
2023-10-01 00:00:00.000| 140|
2023-11-01 00:00:00.000| 733|
Notice how the october time bucket appears twice.
2023-10-01 00:00:00.000| 1440|
2023-10-01 00:00:00.000| 140|
I am not sure what is going on or what I might have configured/created wrongly here.