Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/backend/distributed/sql/citus--13.0-1--13.1-1.sql
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ DROP VIEW IF EXISTS pg_catalog.citus_lock_waits;
#include "udfs/citus_stat_counters/13.1-1.sql"
#include "udfs/citus_stat_counters_reset/13.1-1.sql"
#include "udfs/citus_nodes/13.1-1.sql"
#include "udfs/citus_column_stats/13.1-1.sql"
#include "udfs/citus_stat_user_tables/13.1-1.sql"

-- Since shard_name/13.1-1.sql first drops the function and then creates it, we first
-- need to drop citus_shards view since that view depends on this function. And immediately
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ DROP VIEW pg_catalog.citus_stat_counters;
DROP FUNCTION pg_catalog.citus_stat_counters(oid);
DROP FUNCTION pg_catalog.citus_stat_counters_reset(oid);
DROP VIEW IF EXISTS pg_catalog.citus_nodes;
DROP FUNCTION IF EXISTS pg_catalog.citus_column_stats;
DROP FUNCTION IF EXISTS pg_catalog.citus_stat_user_tables;

-- Definition of shard_name() prior to this release doesn't have a separate SQL file
-- because it's quite an old UDF that its prior definition(s) was(were) squashed into
Expand Down
61 changes: 61 additions & 0 deletions src/backend/distributed/sql/udfs/citus_column_stats/13.1-1.sql

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

61 changes: 61 additions & 0 deletions src/backend/distributed/sql/udfs/citus_column_stats/latest.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
CREATE OR REPLACE FUNCTION pg_catalog.citus_column_stats(
qualified_table_name text)
RETURNS TABLE (
attname text,
null_frac float4,
most_common_vals text[],
most_common_freqs float4[]
)
AS $func$
BEGIN
IF NOT EXISTS (SELECT * FROM pg_dist_partition WHERE logicalrelid = qualified_table_name::regclass) THEN
RAISE EXCEPTION 'Not a Citus table';
ELSE
RETURN QUERY

WITH most_common_vals_json AS (
SELECT * FROM run_command_on_shards(qualified_table_name,
$$ SELECT json_agg(row_to_json(shard_stats)) FROM (
SELECT attname, s.null_frac, most_common_vals, most_common_freqs, c.reltuples AS reltuples
FROM pg_stats s RIGHT JOIN pg_class c ON (s.tablename = c.relname)
WHERE c.relname = '%s') shard_stats $$ )),

table_reltuples_json AS (
SELECT distinct(shardid),
(json_array_elements(result::json)->>'reltuples')::bigint AS shard_reltuples
FROM most_common_vals_json),

table_reltuples AS (
SELECT sum(shard_reltuples) AS table_reltuples FROM table_reltuples_json),

most_common_vals AS (
SELECT shardid,
(json_array_elements(result::json)->>'attname')::text AS attname,
(json_array_elements(result::json)->>'null_frac')::float4 AS null_frac,
json_array_elements_text((json_array_elements(result::json)->>'most_common_vals')::json)::text AS common_val,
json_array_elements_text((json_array_elements(result::json)->>'most_common_freqs')::json)::float4 AS common_freq,
(json_array_elements(result::json)->>'reltuples')::bigint AS shard_reltuples
FROM most_common_vals_json),

common_val_occurrence AS (
SELECT m.attname, common_val,
sum(common_freq * shard_reltuples)::bigint AS occurrence,
any_value(m.null_frac * shard_reltuples)::bigint AS null_occurrences
FROM most_common_vals m
GROUP BY m.attname, common_val
ORDER BY m.attname, occurrence DESC, common_val)

SELECT c.attname,
any_value((null_occurrences/t.table_reltuples)::float4) AS null_frac,
ARRAY_agg(common_val) AS most_common_vals,
ARRAY_agg((occurrence/t.table_reltuples)::float4) AS most_common_freqs
FROM common_val_occurrence c, table_reltuples t
GROUP BY c.attname;

END IF;
END;
$func$ LANGUAGE plpgsql;

COMMENT ON FUNCTION pg_catalog.citus_column_stats(
qualified_table_name text)
IS 'provides some pg_stats for columns of input Citus table';
55 changes: 55 additions & 0 deletions src/backend/distributed/sql/udfs/citus_stat_user_tables/13.1-1.sql

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

55 changes: 55 additions & 0 deletions src/backend/distributed/sql/udfs/citus_stat_user_tables/latest.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
CREATE OR REPLACE FUNCTION pg_catalog.citus_stat_user_tables()
RETURNS TABLE (
relname regclass,
n_tup_ins bigint,
n_tup_upd bigint,
n_tup_del bigint,
n_tup_hot_upd bigint,
n_tup_newpage_upd bigint,
n_live_tup bigint,
n_dead_tup bigint
)
AS $func$
BEGIN
RETURN QUERY

WITH pg_dist_stats_double_json AS (
SELECT ( SELECT json_agg(row_to_json(f)) FROM ( SELECT result FROM
run_command_on_shards(logicalrelid, $$ SELECT json_agg(row_to_json(d))
FROM ( SELECT '$$ || logicalrelid || $$' AS dist_table,
s.relname, s.n_tup_ins, s.n_tup_upd, s.n_tup_del,
s.n_tup_hot_upd, s.n_tup_newpage_upd, s.n_live_tup, s.n_dead_tup
FROM pg_stat_user_tables s
JOIN pg_class c ON s.relname = c.relname
WHERE c.oid = '%s'::regclass::oid) d $$)) f)
FROM pg_dist_partition),

pg_dist_stats_single_json AS (
SELECT (json_array_elements(json_agg)->>'result') AS result
FROM pg_dist_stats_double_json),

pg_dist_stats_regular AS (
SELECT (json_array_elements(result::json)->>'dist_table')::regclass AS relname,
(json_array_elements(result::json)->>'relname')::name AS shardname,
(json_array_elements(result::json)->>'n_tup_ins')::bigint AS n_tup_ins,
(json_array_elements(result::json)->>'n_tup_upd')::bigint AS n_tup_upd,
(json_array_elements(result::json)->>'n_tup_del')::bigint AS n_tup_del,
(json_array_elements(result::json)->>'n_tup_hot_upd')::bigint AS n_tup_hot_upd,
(json_array_elements(result::json)->>'n_tup_newpage_upd')::bigint AS n_tup_newpage_upd,
(json_array_elements(result::json)->>'n_live_tup')::bigint AS n_live_tup,
(json_array_elements(result::json)->>'n_dead_tup')::bigint AS n_dead_tup
FROM pg_dist_stats_single_json
WHERE result != '')

SELECT s.relname, sum(s.n_tup_ins)::bigint AS n_tup_ins, sum(s.n_tup_upd)::bigint AS n_tup_upd,
sum(s.n_tup_del)::bigint AS n_tup_del, sum(s.n_tup_hot_upd)::bigint AS n_tup_hot_upd,
sum(s.n_tup_newpage_upd)::bigint AS n_tup_newpage_upd,
sum(s.n_live_tup)::bigint AS n_live_tup, sum(s.n_dead_tup)::bigint AS n_dead_tup
FROM pg_dist_stats_regular s
GROUP BY 1 ORDER BY 1;

END;
$func$ LANGUAGE plpgsql;

COMMENT ON FUNCTION pg_catalog.citus_stat_user_tables()
IS 'provides some pg_stat_user_tables entries for Citus tables';
131 changes: 131 additions & 0 deletions src/test/regress/expected/citus_aggregated_stats.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
SET citus.shard_count = 2;
CREATE USER user1;
GRANT ALL ON SCHEMA public TO user1;
SET SESSION AUTHORIZATION user1;
CREATE TABLE current_check (currentid int, payload text, rlsuser text);
GRANT ALL ON current_check TO PUBLIC;
INSERT INTO current_check VALUES
(1, 'abc', 'user1'),
(3, 'cde', 'user1'),
(4, 'def', 'user1'),
(4, 'def', 'user1'),
(3, 'cde', 'user2'),
(5, NULL, NULL);
ALTER TABLE current_check ENABLE ROW LEVEL SECURITY;
SET row_security TO ON;
ANALYZE current_check;
SELECT attname, null_frac, most_common_vals, most_common_freqs FROM pg_stats
WHERE tablename = 'current_check'
ORDER BY 1;
attname | null_frac | most_common_vals | most_common_freqs
---------------------------------------------------------------------
currentid | 0 | {3,4} | {0.333333,0.333333}
payload | 0.166667 | {cde,def} | {0.333333,0.333333}
rlsuser | 0.166667 | {user1} | {0.666667}
(3 rows)

SELECT * FROM citus_column_stats('current_check');
ERROR: Not a Citus table
CONTEXT: PL/pgSQL function citus_column_stats(text) line XX at RAISE
CREATE TABLE dist_current_check (currentid int, payload text, rlsuser text);
SELECT create_distributed_table('dist_current_check', 'currentid');
create_distributed_table
---------------------------------------------------------------------

(1 row)

INSERT INTO dist_current_check VALUES
(1, 'abc', 'user1'),
(3, 'cde', 'user1'),
(4, 'def', 'user1'),
(4, 'def', 'user1'),
(3, 'cde', 'user2'),
(5, NULL, NULL);
ANALYZE dist_current_check;
SELECT attname, null_frac, most_common_vals, most_common_freqs FROM pg_stats
WHERE tablename = 'dist_current_check'
ORDER BY 1;
attname | null_frac | most_common_vals | most_common_freqs
---------------------------------------------------------------------
(0 rows)

SELECT * FROM citus_column_stats('dist_current_check');
attname | null_frac | most_common_vals | most_common_freqs
---------------------------------------------------------------------
currentid | 0 | {3,4} | {0.333333,0.333333}
payload | 0.166667 | {cde,def} | {0.333333,0.333333}
rlsuser | 0.166667 | {user1} | {0.666667}
(3 rows)

DROP TABLE current_check;
DROP TABLE dist_current_check;
RESET SESSION AUTHORIZATION;
RESET row_security;
-- compare pg_stat_user_tables with citus_stat_user_tables
CREATE TABLE trunc_stats_test(id serial);
CREATE TABLE trunc_stats_dist_test(id serial);
SELECT create_distributed_table('trunc_stats_dist_test', 'id');
create_distributed_table
---------------------------------------------------------------------

(1 row)

-- rollback a savepoint: this should count 4 inserts and have 2
-- live tuples after commit (and 2 dead ones due to aborted subxact)
BEGIN;
INSERT INTO trunc_stats_test DEFAULT VALUES;
INSERT INTO trunc_stats_test DEFAULT VALUES;
INSERT INTO trunc_stats_dist_test DEFAULT VALUES;
INSERT INTO trunc_stats_dist_test DEFAULT VALUES;
SAVEPOINT p1;
INSERT INTO trunc_stats_test DEFAULT VALUES;
INSERT INTO trunc_stats_test DEFAULT VALUES;
TRUNCATE trunc_stats_test;
INSERT INTO trunc_stats_test DEFAULT VALUES;
INSERT INTO trunc_stats_dist_test DEFAULT VALUES;
INSERT INTO trunc_stats_dist_test DEFAULT VALUES;
TRUNCATE trunc_stats_dist_test;
INSERT INTO trunc_stats_dist_test DEFAULT VALUES;
ROLLBACK TO SAVEPOINT p1;
COMMIT;
\c - - - :worker_1_port
SELECT pg_stat_force_next_flush();
pg_stat_force_next_flush
---------------------------------------------------------------------

(1 row)

\c - - - :worker_2_port
SELECT pg_stat_force_next_flush();
pg_stat_force_next_flush
---------------------------------------------------------------------

(1 row)

\c - - - :master_port
SELECT pg_stat_force_next_flush();
pg_stat_force_next_flush
---------------------------------------------------------------------

(1 row)

SELECT relname, n_tup_ins, n_live_tup, n_dead_tup
FROM pg_stat_user_tables
WHERE relname like 'trunc_stats%';
relname | n_tup_ins | n_live_tup | n_dead_tup
---------------------------------------------------------------------
trunc_stats_test | 4 | 2 | 2
trunc_stats_dist_test | 0 | 0 | 0
(2 rows)

SELECT relname, n_tup_ins, n_live_tup, n_dead_tup
FROM citus_stat_user_tables();
relname | n_tup_ins | n_live_tup | n_dead_tup
---------------------------------------------------------------------
trunc_stats_dist_test | 4 | 2 | 2
(1 row)

REVOKE ALL ON SCHEMA public FROM user1;
DROP USER user1;
DROP TABLE trunc_stats_test;
DROP TABLE trunc_stats_dist_test;
Loading
Loading