Re: Allow pageinspect's bt_page_stats function to return a set of rows instead of a single row

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: Allow pageinspect's bt_page_stats function to return a set of rows instead of a single row
Дата
Msg-id 1742183.1663005504@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: Allow pageinspect's bt_page_stats function to return a set of rows instead of a single row  (Hamid Akhtar <hamid.akhtar@gmail.com>)
Ответы Re: Allow pageinspect's bt_page_stats function to return a set of rows instead of a single row  (Tom Lane <tgl@sss.pgh.pa.us>)
Список pgsql-hackers
Hamid Akhtar <hamid.akhtar@gmail.com> writes:
> Attached is the rebased version of the patch
> (pageinspect_btree_multipagestats_03.patch) for the master branch.

I looked through this and cleaned up the code a little (attached).
There are still some issues before it could be considered committable:

1. Where's the documentation update?

2. As this stands, there's no nice way to ask for "all the pages".
If you specify a page count that's even one too large, you get an
error.  I think there's room for an easier-to-use way to do that.
We could say that the thing just silently stops at the last page,
so that you just need to write a large page count.  Or maybe it'd
be better to define a zero or negative page count as "all the rest",
while still insisting that a positive count refer to real pages.

3. I think it's highly likely that the new test case is not portable.
In particular a machine with MAXALIGN 4 would be likely to put a
different number of tuples per page, or do the page split differently
so that the page with fewer index tuples isn't page 3.  Unfortunately
I don't seem to have a working setup like that right at the moment
to verify; but I'd counsel trying this inside a VM or something to
see if it's actually likely to survive on the buildfarm.  I'm not
sure, but making the indexed column be int8 instead of int4 might
reduce the risks here.

            regards, tom lane

diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile
index 5c0736564a..069683caf6 100644
--- a/contrib/pageinspect/Makefile
+++ b/contrib/pageinspect/Makefile
@@ -13,7 +13,9 @@ OBJS = \
     rawpage.o

 EXTENSION = pageinspect
-DATA =  pageinspect--1.9--1.10.sql pageinspect--1.8--1.9.sql \
+DATA = \
+    pageinspect--1.10--1.11.sql \
+    pageinspect--1.9--1.10.sql pageinspect--1.8--1.9.sql \
     pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \
     pageinspect--1.5.sql pageinspect--1.5--1.6.sql \
     pageinspect--1.4--1.5.sql pageinspect--1.3--1.4.sql \
diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c
index 9375d55e14..f52a6ac87e 100644
--- a/contrib/pageinspect/btreefuncs.c
+++ b/contrib/pageinspect/btreefuncs.c
@@ -46,6 +46,7 @@ PG_FUNCTION_INFO_V1(bt_page_items);
 PG_FUNCTION_INFO_V1(bt_page_items_bytea);
 PG_FUNCTION_INFO_V1(bt_page_stats_1_9);
 PG_FUNCTION_INFO_V1(bt_page_stats);
+PG_FUNCTION_INFO_V1(bt_multi_page_stats);

 #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
 #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
@@ -80,6 +81,28 @@ typedef struct BTPageStat
     BTCycleId    btpo_cycleid;
 } BTPageStat;

+/*
+ * cross-call data structure for SRF for page stats
+ */
+typedef struct ua_page_stats
+{
+    Oid            relid;
+    int64        blkno;
+    int64        blk_count;
+} ua_page_stats;
+
+/*
+ * cross-call data structure for SRF for page items
+ */
+typedef struct ua_page_items
+{
+    Page        page;
+    OffsetNumber offset;
+    bool        leafpage;
+    bool        rightmost;
+    TupleDesc    tupd;
+} ua_page_items;
+

 /* -------------------------------------------------
  * GetBTPageStatistics()
@@ -177,34 +200,15 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
 }

 /* -----------------------------------------------
- * bt_page_stats()
+ * bt_index_block_validate()
  *
- * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1);
+ * Validate index type is btree and block number
+ * is within a valid block number range.
  * -----------------------------------------------
  */
-static Datum
-bt_page_stats_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
+static void
+bt_index_block_validate(Relation rel, int64 blkno)
 {
-    text       *relname = PG_GETARG_TEXT_PP(0);
-    int64        blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1));
-    Buffer        buffer;
-    Relation    rel;
-    RangeVar   *relrv;
-    Datum        result;
-    HeapTuple    tuple;
-    TupleDesc    tupleDesc;
-    int            j;
-    char       *values[11];
-    BTPageStat    stat;
-
-    if (!superuser())
-        ereport(ERROR,
-                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
-                 errmsg("must be superuser to use pageinspect functions")));
-
-    relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
-    rel = relation_openrv(relrv, AccessShareLock);
-
     if (!IS_INDEX(rel) || !IS_BTREE(rel))
         ereport(ERROR,
                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -232,6 +236,39 @@ bt_page_stats_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
                  errmsg("block 0 is a meta page")));

     CHECK_RELATION_BLOCK_RANGE(rel, blkno);
+}
+
+/* -----------------------------------------------
+ * bt_page_stats()
+ *
+ * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1);
+ * Arguments are index relation name and block number
+ * -----------------------------------------------
+ */
+static Datum
+bt_page_stats_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
+{
+    text       *relname = PG_GETARG_TEXT_PP(0);
+    int64        blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1));
+    Buffer        buffer;
+    Relation    rel;
+    RangeVar   *relrv;
+    Datum        result;
+    HeapTuple    tuple;
+    TupleDesc    tupleDesc;
+    int            j;
+    char       *values[11];
+    BTPageStat    stat;
+
+    if (!superuser())
+        ereport(ERROR,
+                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+                 errmsg("must be superuser to use pageinspect functions")));
+
+    relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
+    rel = relation_openrv(relrv, AccessShareLock);
+
+    bt_index_block_validate(rel, blkno);

     buffer = ReadBuffer(rel, blkno);
     LockBuffer(buffer, BUFFER_LOCK_SHARE);
@@ -284,17 +321,134 @@ bt_page_stats(PG_FUNCTION_ARGS)
 }


-/*
- * cross-call data structure for SRF
+/* -----------------------------------------------
+ * bt_multi_page_stats()
+ *
+ * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1, 2);
+ * Arguments are index relation name, first block number, number of blocks
+ * -----------------------------------------------
  */
-struct user_args
+Datum
+bt_multi_page_stats(PG_FUNCTION_ARGS)
 {
-    Page        page;
-    OffsetNumber offset;
-    bool        leafpage;
-    bool        rightmost;
-    TupleDesc    tupd;
-};
+    Relation    rel;
+    ua_page_stats *uargs;
+    FuncCallContext *fctx;
+    MemoryContext mctx;
+
+    if (!superuser())
+        ereport(ERROR,
+                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+                 errmsg("must be superuser to use pageinspect functions")));
+
+    if (SRF_IS_FIRSTCALL())
+    {
+        text       *relname = PG_GETARG_TEXT_PP(0);
+        int64        blkno = PG_GETARG_INT64(1);
+        int64        blk_count = PG_GETARG_INT64(2);
+        RangeVar   *relrv;
+
+        fctx = SRF_FIRSTCALL_INIT();
+
+        relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
+        rel = relation_openrv(relrv, AccessShareLock);
+
+        /* Check that rel is a valid btree index and 1st block number is OK */
+        bt_index_block_validate(rel, blkno);
+
+        /* Save arguments for reuse */
+        mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
+
+        uargs = palloc(sizeof(ua_page_stats));
+
+        uargs->relid = RelationGetRelid(rel);
+        uargs->blkno = blkno;
+        uargs->blk_count = blk_count;
+
+        fctx->user_fctx = uargs;
+
+        MemoryContextSwitchTo(mctx);
+
+        /*
+         * To avoid possibly leaking a relcache reference if the SRF isn't run
+         * to completion, we close and re-open the index rel each time
+         * through, using the index's OID for re-opens to ensure we get the
+         * same rel.  Keep the AccessShareLock though, to ensure it doesn't go
+         * away underneath us.
+         */
+        relation_close(rel, NoLock);
+    }
+
+    fctx = SRF_PERCALL_SETUP();
+    uargs = fctx->user_fctx;
+
+    /* We need to fetch next block statistics */
+    if (uargs->blk_count > 0)
+    {
+        Buffer        buffer;
+        Datum        result;
+        HeapTuple    tuple;
+        int            j;
+        char       *values[11];
+        BTPageStat    stat;
+        TupleDesc    tupleDesc;
+
+        /* We should have lock already */
+        rel = relation_open(uargs->relid, NoLock);
+
+        /*
+         * Check that the next block number is valid --- we could have stepped
+         * off the end, or index could have gotten shorter.
+         */
+        CHECK_RELATION_BLOCK_RANGE(rel, uargs->blkno);
+
+        buffer = ReadBuffer(rel, uargs->blkno);
+        LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
+        /* keep compiler quiet */
+        stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
+        stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
+
+        GetBTPageStatistics(uargs->blkno, buffer, &stat);
+
+        UnlockReleaseBuffer(buffer);
+        relation_close(rel, NoLock);
+
+        /* Build a tuple descriptor for our result type */
+        if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
+            elog(ERROR, "return type must be a row type");
+
+        j = 0;
+        values[j++] = psprintf("%u", stat.blkno);
+        values[j++] = psprintf("%c", stat.type);
+        values[j++] = psprintf("%u", stat.live_items);
+        values[j++] = psprintf("%u", stat.dead_items);
+        values[j++] = psprintf("%u", stat.avg_item_size);
+        values[j++] = psprintf("%u", stat.page_size);
+        values[j++] = psprintf("%u", stat.free_size);
+        values[j++] = psprintf("%u", stat.btpo_prev);
+        values[j++] = psprintf("%u", stat.btpo_next);
+        values[j++] = psprintf("%u", stat.btpo_level);
+        values[j++] = psprintf("%d", stat.btpo_flags);
+
+        /* Construct tuple to be returned */
+        tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
+                                       values);
+
+        result = HeapTupleGetDatum(tuple);
+
+        /*
+         * Move to the next block number and decrement the number of blocks
+         * still to be fetched
+         */
+        uargs->blkno++;
+        uargs->blk_count--;
+
+        SRF_RETURN_NEXT(fctx, result);
+    }
+
+    SRF_RETURN_DONE(fctx);
+}

 /*-------------------------------------------------------
  * bt_page_print_tuples()
@@ -303,7 +457,7 @@ struct user_args
  * ------------------------------------------------------
  */
 static Datum
-bt_page_print_tuples(struct user_args *uargs)
+bt_page_print_tuples(ua_page_items *uargs)
 {
     Page        page = uargs->page;
     OffsetNumber offset = uargs->offset;
@@ -453,7 +607,7 @@ bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
     Datum        result;
     FuncCallContext *fctx;
     MemoryContext mctx;
-    struct user_args *uargs;
+    ua_page_items *uargs;

     if (!superuser())
         ereport(ERROR,
@@ -473,33 +627,7 @@ bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
         relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
         rel = relation_openrv(relrv, AccessShareLock);

-        if (!IS_INDEX(rel) || !IS_BTREE(rel))
-            ereport(ERROR,
-                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
-                     errmsg("\"%s\" is not a %s index",
-                            RelationGetRelationName(rel), "btree")));
-
-        /*
-         * Reject attempts to read non-local temporary relations; we would be
-         * likely to get wrong data since we have no visibility into the
-         * owning session's local buffers.
-         */
-        if (RELATION_IS_OTHER_TEMP(rel))
-            ereport(ERROR,
-                    (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                     errmsg("cannot access temporary tables of other sessions")));
-
-        if (blkno < 0 || blkno > MaxBlockNumber)
-            ereport(ERROR,
-                    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                     errmsg("invalid block number")));
-
-        if (blkno == 0)
-            ereport(ERROR,
-                    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                     errmsg("block 0 is a meta page")));
-
-        CHECK_RELATION_BLOCK_RANGE(rel, blkno);
+        bt_index_block_validate(rel, blkno);

         buffer = ReadBuffer(rel, blkno);
         LockBuffer(buffer, BUFFER_LOCK_SHARE);
@@ -511,7 +639,7 @@ bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
          */
         mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);

-        uargs = palloc(sizeof(struct user_args));
+        uargs = palloc(sizeof(ua_page_items));

         uargs->page = palloc(BLCKSZ);
         memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ);
@@ -587,7 +715,7 @@ bt_page_items_bytea(PG_FUNCTION_ARGS)
     bytea       *raw_page = PG_GETARG_BYTEA_P(0);
     Datum        result;
     FuncCallContext *fctx;
-    struct user_args *uargs;
+    ua_page_items *uargs;

     if (!superuser())
         ereport(ERROR,
@@ -603,7 +731,7 @@ bt_page_items_bytea(PG_FUNCTION_ARGS)
         fctx = SRF_FIRSTCALL_INIT();
         mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);

-        uargs = palloc(sizeof(struct user_args));
+        uargs = palloc(sizeof(ua_page_items));

         uargs->page = get_page_from_raw(raw_page);

diff --git a/contrib/pageinspect/expected/btree.out b/contrib/pageinspect/expected/btree.out
index 035a81a759..de747f1751 100644
--- a/contrib/pageinspect/expected/btree.out
+++ b/contrib/pageinspect/expected/btree.out
@@ -34,6 +34,118 @@ btpo_flags    | 3

 SELECT * FROM bt_page_stats('test1_a_idx', 2);
 ERROR:  block number out of range
+-- bt_multi_page_stats() function returns a set of records of page statistics.
+CREATE TABLE test2 AS (SELECT generate_series(1, 5000) AS col1);
+CREATE INDEX test2_col1_idx ON test2(col1);
+SELECT * FROM bt_multi_page_stats('test2_col1_idx', 0, 1);
+ERROR:  block 0 is a meta page
+SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, -1);
+(0 rows)
+
+SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, 0);
+(0 rows)
+
+SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, 2);
+-[ RECORD 1 ]-+-----
+blkno         | 1
+type          | l
+live_items    | 367
+dead_items    | 0
+avg_item_size | 16
+page_size     | 8192
+free_size     | 808
+btpo_prev     | 0
+btpo_next     | 2
+btpo_level    | 0
+btpo_flags    | 1
+-[ RECORD 2 ]-+-----
+blkno         | 2
+type          | l
+live_items    | 367
+dead_items    | 0
+avg_item_size | 16
+page_size     | 8192
+free_size     | 808
+btpo_prev     | 1
+btpo_next     | 4
+btpo_level    | 0
+btpo_flags    | 1
+
+SELECT * FROM bt_multi_page_stats('test2_col1_idx', 2, 6);
+-[ RECORD 1 ]-+-----
+blkno         | 2
+type          | l
+live_items    | 367
+dead_items    | 0
+avg_item_size | 16
+page_size     | 8192
+free_size     | 808
+btpo_prev     | 1
+btpo_next     | 4
+btpo_level    | 0
+btpo_flags    | 1
+-[ RECORD 2 ]-+-----
+blkno         | 3
+type          | r
+live_items    | 14
+dead_items    | 0
+avg_item_size | 15
+page_size     | 8192
+free_size     | 7876
+btpo_prev     | 0
+btpo_next     | 0
+btpo_level    | 1
+btpo_flags    | 2
+-[ RECORD 3 ]-+-----
+blkno         | 4
+type          | l
+live_items    | 367
+dead_items    | 0
+avg_item_size | 16
+page_size     | 8192
+free_size     | 808
+btpo_prev     | 2
+btpo_next     | 5
+btpo_level    | 0
+btpo_flags    | 1
+-[ RECORD 4 ]-+-----
+blkno         | 5
+type          | l
+live_items    | 367
+dead_items    | 0
+avg_item_size | 16
+page_size     | 8192
+free_size     | 808
+btpo_prev     | 4
+btpo_next     | 6
+btpo_level    | 0
+btpo_flags    | 1
+-[ RECORD 5 ]-+-----
+blkno         | 6
+type          | l
+live_items    | 367
+dead_items    | 0
+avg_item_size | 16
+page_size     | 8192
+free_size     | 808
+btpo_prev     | 5
+btpo_next     | 7
+btpo_level    | 0
+btpo_flags    | 1
+-[ RECORD 6 ]-+-----
+blkno         | 7
+type          | l
+live_items    | 367
+dead_items    | 0
+avg_item_size | 16
+page_size     | 8192
+free_size     | 808
+btpo_prev     | 6
+btpo_next     | 8
+btpo_level    | 0
+btpo_flags    | 1
+
+DROP TABLE test2;
 SELECT * FROM bt_page_items('test1_a_idx', -1);
 ERROR:  invalid block number
 SELECT * FROM bt_page_items('test1_a_idx', 0);
diff --git a/contrib/pageinspect/pageinspect--1.10--1.11.sql b/contrib/pageinspect/pageinspect--1.10--1.11.sql
new file mode 100644
index 0000000000..976d029de9
--- /dev/null
+++ b/contrib/pageinspect/pageinspect--1.10--1.11.sql
@@ -0,0 +1,23 @@
+/* contrib/pageinspect/pageinspect--1.10--1.11.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.11'" to load this file. \quit
+
+--
+-- bt_multi_page_stats()
+--
+CREATE FUNCTION bt_multi_page_stats(IN relname text, IN blkno int8, IN blk_count int8,
+    OUT blkno int8,
+    OUT type "char",
+    OUT live_items int4,
+    OUT dead_items int4,
+    OUT avg_item_size int4,
+    OUT page_size int4,
+    OUT free_size int4,
+    OUT btpo_prev int8,
+    OUT btpo_next int8,
+    OUT btpo_level int8,
+    OUT btpo_flags int4)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'bt_multi_page_stats'
+LANGUAGE C STRICT PARALLEL SAFE;
diff --git a/contrib/pageinspect/pageinspect.control b/contrib/pageinspect/pageinspect.control
index 7cdf37913d..f277413dd8 100644
--- a/contrib/pageinspect/pageinspect.control
+++ b/contrib/pageinspect/pageinspect.control
@@ -1,5 +1,5 @@
 # pageinspect extension
 comment = 'inspect the contents of database pages at a low level'
-default_version = '1.10'
+default_version = '1.11'
 module_pathname = '$libdir/pageinspect'
 relocatable = true
diff --git a/contrib/pageinspect/sql/btree.sql b/contrib/pageinspect/sql/btree.sql
index 1f554f0f67..9a959142cd 100644
--- a/contrib/pageinspect/sql/btree.sql
+++ b/contrib/pageinspect/sql/btree.sql
@@ -11,6 +11,16 @@ SELECT * FROM bt_page_stats('test1_a_idx', 0);
 SELECT * FROM bt_page_stats('test1_a_idx', 1);
 SELECT * FROM bt_page_stats('test1_a_idx', 2);

+-- bt_multi_page_stats() function returns a set of records of page statistics.
+CREATE TABLE test2 AS (SELECT generate_series(1, 5000) AS col1);
+CREATE INDEX test2_col1_idx ON test2(col1);
+SELECT * FROM bt_multi_page_stats('test2_col1_idx', 0, 1);
+SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, -1);
+SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, 0);
+SELECT * FROM bt_multi_page_stats('test2_col1_idx', 1, 2);
+SELECT * FROM bt_multi_page_stats('test2_col1_idx', 2, 6);
+DROP TABLE test2;
+
 SELECT * FROM bt_page_items('test1_a_idx', -1);
 SELECT * FROM bt_page_items('test1_a_idx', 0);
 SELECT * FROM bt_page_items('test1_a_idx', 1);

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Andrey Borodin
Дата:
Сообщение: Re: pg_stat_statements locking
Следующее
От: Peter Eisentraut
Дата:
Сообщение: Re: Convert *GetDatum() and DatumGet*() macros to inline functions