Обсуждение: Using system tables directly takes many hours, using temp tables with no indexes takes a few seconds for geometry_columns view.

Поиск
Список
Период
Сортировка
Hi

We are running
postgres server 12.6 (Ubuntu 12.6-0ubuntu0.20.04.1))
POSTGIS="3.1.1 aaf4c79" [EXTENSION] PGSQL="120" GEOS="3.9.0-CAPI-1.16.2" SFCGAL="1.3.7" PROJ="7.2.1" GDAL="GDAL 3.2.1, released 2020/12/29" LIBXML="2.9.10" LIBJSON="0.13.1" LIBPROTOBUF="1.3.3" WAGYU="0.5.0 (Internal)" TOPOLOGY RASTER

The problem is that it takes more than 10 hours (duration: 36885527.039) to browse tables geometry from qgis https://explain.depesz.com/s/MxAN#bquery with high load on the server.
We have at least 45 jobs running and around 70% CPU load on the server.

Then I started to check views/tables involved and found that the view geometry_columns seems to be using a very long time
'explain analyze select * from geometry_columns' have been waiting for more than 2 hours now, will paste the result to https://explain.depesz.com when done.

While waiting I created temp table for the system tables involved in view geometry_columns like this

create temp table pg_attribute_temp as select attcollation,attrelid,attname,atttypid,attstattarget,attlen,attnum,attndims,attcacheoff,atttypmod,attbyval,attstorage,attalign,attnotnull,atthasdef,atthasmissing,attidentity,attgenerated,attisdropped,attislocal,attinhcount,attacl,attoptions,attfdwoptions from pg_attribute;
create temp table pg_namespace_temp as select * from pg_namespace;
create temp table pg_type_temp as select * from pg_type;
create temp table pg_constraint_temp as select * from pg_constraint;

SELECT 1702623
Time: 42552.899 ms (00:42.553)
SELECT 841
Time: 132.595 ms
SELECT 245239
Time: 3378.395 ms (00:03.378)
SELECT 9575
Time: 205.036 ms

That did not take very long time.

Then created geometry_columns_temp_no_rules using those new temp tables.

explain analyze select * from geometry_columns_temp_no_rules

And that takes less than 6 seconds with no indexes. Here is the explain from https://explain.depesz.com/s/yBSd

Why is temp tables with no indexes much faster system tables with indexes ?

(I do not think it's related to not having rules I tested to crated a view using system tables with but with no rules and that hanged for more that 15 minuttes an dthen I gave up)

Here is the view def that I used.

CREATE VIEW geometry_columns_temp_no_rules AS
SELECT current_database()::character varying(256) AS f_table_catalog,
    n.nspname AS f_table_schema,
    c.relname AS f_table_name,
    a.attname AS f_geometry_column,
    COALESCE(postgis_typmod_dims(a.atttypmod), sn.ndims, 2) AS coord_dimension,
    COALESCE(NULLIF(postgis_typmod_srid(a.atttypmod), 0), sr.srid, 0) AS srid,
    replace(replace(COALESCE(NULLIF(upper(postgis_typmod_type(a.atttypmod)), 'GEOMETRY'::text), st.type, 'GEOMETRY'::text), 'ZM'::text, ''::text), 'Z'::text, ''::text)::character varying(30) AS type
   FROM pg_class c
     JOIN pg_attribute_temp a ON a.attrelid = c.oid AND NOT a.attisdropped
     JOIN pg_namespace_temp n ON c.relnamespace = n.oid
     JOIN pg_type_temp t ON a.atttypid = t.oid
     LEFT JOIN ( SELECT s.connamespace,
            s.conrelid,
            s.conkey,
            replace(split_part(s.consrc, ''''::text, 2), ')'::text, ''::text) AS type
           FROM ( SELECT pg_constraint_temp.connamespace,
                    pg_constraint_temp.conrelid,
                    pg_constraint_temp.conkey,
                    pg_get_constraintdef(pg_constraint_temp.oid) AS consrc
                   FROM pg_constraint_temp) s
          WHERE s.consrc ~~* '%geometrytype(% = %'::text) st ON st.connamespace = n.oid AND st.conrelid = c.oid AND (a.attnum = ANY (st.conkey))
     LEFT JOIN ( SELECT s.connamespace,
            s.conrelid,
            s.conkey,
            replace(split_part(s.consrc, ' = '::text, 2), ')'::text, ''::text)::integer AS ndims
           FROM ( SELECT pg_constraint_temp.connamespace,
                    pg_constraint_temp.conrelid,
                    pg_constraint_temp.conkey,
                    pg_get_constraintdef(pg_constraint_temp.oid) AS consrc
                   FROM pg_constraint_temp) s
          WHERE s.consrc ~~* '%ndims(% = %'::text) sn ON sn.connamespace = n.oid AND sn.conrelid = c.oid AND (a.attnum = ANY (sn.conkey))
     LEFT JOIN ( SELECT s.connamespace,
            s.conrelid,
            s.conkey,
            replace(replace(split_part(s.consrc, ' = '::text, 2), ')'::text, ''::text), '('::text, ''::text)::integer AS srid
           FROM ( SELECT pg_constraint_temp.connamespace,
                    pg_constraint_temp.conrelid,
                    pg_constraint_temp.conkey,
                    pg_get_constraintdef(pg_constraint_temp.oid) AS consrc
                   FROM pg_constraint_temp) s
          WHERE s.consrc ~~* '%srid(% = %'::text) sr ON sr.connamespace = n.oid AND sr.conrelid = c.oid AND (a.attnum = ANY (sr.conkey))
  WHERE (c.relkind = ANY (ARRAY['r'::"char", 'v'::"char", 'm'::"char", 'f'::"char", 'p'::"char"])) AND NOT c.relname = 'raster_columns'::name AND t.typname = 'geometry'::name AND NOT pg_is_other_temp_schema(c.relnamespace) AND has_table_privilege(c.oid, 'SELECT'::text);
;    

Thanks.

Lars
>Hi
>
>We are running
>postgres server 12.6 (Ubuntu 12.6-0ubuntu0.20.04.1))
>POSTGIS="3.1.1 aaf4c79" [EXTENSION] PGSQL="120" GEOS="3.9.0-CAPI-1.16.2" SFCGAL="1.3.7" PROJ="7.2.1" GDAL="GDAL 3.2.1, released 2020/12/29" LIBXML="2.9.10" LIBJSON="0.13.1" LIBPROTOBUF="1.3.3" WAGYU="0.5.0 (Internal)" TOPOLOGY RASTER
>
>The problem is that it takes more than 10 hours (duration: 36885527.039) to browse tables geometry from qgis https://explain.depesz.com/s/MxAN#bquery with high load on the server.
>We have at least 45 jobs running and around 70% CPU load on the server.
>
>Then I started to check views/tables involved and found that the view geometry_columns seems to be using a very long time
>'explain analyze select * from geometry_columns' have been waiting for more than 2 hours now, will paste the result to https://explain.depesz.com when done.
>
>While waiting I created temp table for the system tables involved in view geometry_columns like this
>
>create temp table pg_attribute_temp as select attcollation,attrelid,attname,atttypid,attstattarget,attlen,attnum,attndims,attcacheoff,atttypmod,attbyval,attstorage,attalign,attnotnull,atthasdef,atthasmissing,attidentity,attgenerated,attisdropped,attislocal,attinhcount,attacl,attoptions,attfdwoptions from pg_attribute;
>create temp table pg_namespace_temp as select * from pg_namespace;
>create temp table pg_type_temp as select * from pg_type;
>create temp table pg_constraint_temp as select * from pg_constraint;
>
>SELECT 1702623
>Time: 42552.899 ms (00:42.553)
>SELECT 841
>Time: 132.595 ms
>SELECT 245239
>Time: 3378.395 ms (00:03.378)
>SELECT 9575
>Time: 205.036 ms
>
>That did not take very long time.
>
>Then created geometry_columns_temp_no_rules using those new temp tables.
>
>explain analyze select * from geometry_columns_temp_no_rules
>
>And that takes less than 6 seconds with no indexes. Here is the explain from https://explain.depesz.com/s/yBSd
>
>Why is temp tables with no indexes much faster system tables with indexes ?
>
>(I do not think it's related to not having rules I tested to crated a view using system tables with but with no rules and that hanged for more that 15 minuttes an dthen I gave up)
>
>Here is the view def that I used.
>
>CREATE VIEW geometry_columns_temp_no_rules AS
>SELECT current_database()::character varying(256) AS f_table_catalog,
>    n.nspname AS f_table_schema,
>    c.relname AS f_table_name,
>    a.attname AS f_geometry_column,
>    COALESCE(postgis_typmod_dims(a.atttypmod), sn.ndims, 2) AS coord_dimension,
>    COALESCE(NULLIF(postgis_typmod_srid(a.atttypmod), 0), sr.srid, 0) AS srid,
>    replace(replace(COALESCE(NULLIF(upper(postgis_typmod_type(a.atttypmod)), 'GEOMETRY'::text), st.type, 'GEOMETRY'::text), 'ZM'::text, ''::text), 'Z'::text, ''::text)::character varying(30) AS type
>   FROM pg_class c
>     JOIN pg_attribute_temp a ON a.attrelid = c.oid AND NOT a.attisdropped
>     JOIN pg_namespace_temp n ON c.relnamespace = n.oid
>     JOIN pg_type_temp t ON a.atttypid = t.oid
>     LEFT JOIN ( SELECT s.connamespace,
>            s.conrelid,
>            s.conkey,
>            replace(split_part(s.consrc, ''''::text, 2), ')'::text, ''::text) AS type
>           FROM ( SELECT pg_constraint_temp.connamespace,
>                    pg_constraint_temp.conrelid,
>                    pg_constraint_temp.conkey,
>                    pg_get_constraintdef(pg_constraint_temp.oid) AS consrc
>                   FROM pg_constraint_temp) s
>          WHERE s.consrc ~~* '%geometrytype(% = %'::text) st ON st.connamespace = n.oid AND st.conrelid = c.oid AND (a.attnum = ANY (st.conkey))
>     LEFT JOIN ( SELECT s.connamespace,
>            s.conrelid,
>            s.conkey,
>            replace(split_part(s.consrc, ' = '::text, 2), ')'::text, ''::text)::integer AS ndims
>           FROM ( SELECT pg_constraint_temp.connamespace,
>                    pg_constraint_temp.conrelid,
>                    pg_constraint_temp.conkey,
>                    pg_get_constraintdef(pg_constraint_temp.oid) AS consrc
>                   FROM pg_constraint_temp) s
>          WHERE s.consrc ~~* '%ndims(% = %'::text) sn ON sn.connamespace = n.oid AND sn.conrelid = c.oid AND (a.attnum = ANY (sn.conkey))
>     LEFT JOIN ( SELECT s.connamespace,
>            s.conrelid,
>            s.conkey,
>            replace(replace(split_part(s.consrc, ' = '::text, 2), ')'::text, ''::text), '('::text, ''::text)::integer AS srid
>           FROM ( SELECT pg_constraint_temp.connamespace,
>                    pg_constraint_temp.conrelid,
>                    pg_constraint_temp.conkey,
>                    pg_get_constraintdef(pg_constraint_temp.oid) AS consrc
>                   FROM pg_constraint_temp) s
>          WHERE s.consrc ~~* '%srid(% = %'::text) sr ON sr.connamespace = n.oid AND sr.conrelid = c.oid AND (a.attnum = ANY (sr.conkey))
>  WHERE (c.relkind = ANY (ARRAY['r'::"char", 'v'::"char", 'm'::"char", 'f'::"char", 'p'::"char"])) AND NOT c.relname = 'raster_columns'::name AND t.typname = 'geometry'::name AND NOT pg_is_other_temp_schema(c.relnamespace) AND has_table_privilege(c.oid, 'SELECT'::text);
>;    
>
>Thanks.
>
>Lars
Hi

I did another test.

Here ( https://explain.depesz.com/s/H7f9 ) I use pg_attribute_temp and we use around 6 seconds

But in this query (https://explain.depesz.com/s/Op7i) I use pg_attribute system table directly and execution time is around 50 seconds

The explain analyze is still running on select * from geometry_columns.

Thanks

Lars
On Wed, Mar 23, 2022 at 09:44:09AM +0000, Lars Aksel Opsahl wrote:
> Why is temp tables with no indexes much faster system tables with indexes ?

I think the "temp table" way is accidentally faster due to having no
statistics, not because it has no indexes.  If you run ANALYZE, you may hit the
same issue (or, maybe you just need to VACUUM ANALYZE your system catalogs).

-- 
Justin






>From: Justin Pryzby <pryzby@telsasoft.com>Sent: Wednesday, March 23, 2022 2:19 PMTo: Lars Aksel Opsahl <Lars.Opsahl@nibio.no>Cc: pgsql-performance@lists.postgresql.org <pgsql-performance@lists.postgresql.org>Subject: Re: Using system tables directly takes many hours, using temp tables with no indexes takes a few seconds for geometry_columns view.
>
>On Wed, Mar 23, 2022 at 09:44:09AM +0000, Lars Aksel Opsahl wrote:
>> Why is temp tables with no indexes much faster system tables with indexes ?
>
>I think the "temp table" way is accidentally faster due to having no
>statistics, not because it has no indexes.  If you run ANALYZE, you may hit the
>same issue (or, maybe you just need to VACUUM ANALYZE your system catalogs).

Hi

I had tested this in the morning and it did not work  (VACUUM ANALYZE pg_class; VACUUM ANALYZE pg_attribute; VACUUM ANALYZE pg_namespace; VACUUM ANALYZE raster_columns; VACUUM ANALYZE pg_type; )

But now it seemed to work maybe one time, the 50 secs query (https://explain.depesz.com/s/Op7iwas down to 6 secs, but just to be sure I rerun the query one more time and we where where back to execution time of 50 seconds.

It seems like stats may be valid for just some few seconds  before having to run analyze again and that takes a long time.

The 45 jobs running on the server are creating a lot temp tables and maybe some unlogged tables

We can not run run analyze in every job because this may be many hundred thounsed jobs that we need to run.

Does this mean that we can not use temp tables in this extent and in stead use https://www.postgresql.org/docs/12/queries-with.html ?
But the problem with "with" is that we can not create indexes.

Or is a option to exclude temp tables geometry_columns in effective way , but that will probably cause problems if we create temp table in jobs where we use postgis.so that not a good solution either,

Thanks

Lars

From: Justin Pryzby <pryzby@telsasoft.com>
Sent: Wednesday, March 23, 2022 2:19 PM

>On Wed, Mar 23, 2022 at 09:44:09AM +0000, Lars Aksel Opsahl wrote:
>> Why is temp tables with no indexes much faster system tables with indexes ?
>
>I think the "temp table" way is accidentally faster due to having no
>statistics, not because it has no indexes.  If you run ANALYZE, you may hit the
>same issue (or, maybe you just need to VACUUM ANALYZE your system catalogs).

Hi

Sorry I misread your mail you are totally right.

Before I do vacuum we have these execution Time: 9422.964 ms (00:09.423)

The vacuum as you suggested
VACUUM ANALYZE pg_attribute_temp;
VACUUM ANALYZE pg_namespace_temp;
VACUUM ANALYZE pg_type_temp;
VACUUM ANALYZE pg_constraint_temp;

I can wait for 10 minutes and it just hangs, yes so we have the same problem as suggested.

The original query "select * from geometry_columns" finally finished after almost 9 hours .

The plan is here https://explain.depesz.com/s/jGXf

I did some more testing and if remove LEFT JOIN to pg_constraint in runs in less than a minute and return  75219 rows.

WITH geo_column_list AS (SELECT
current_database()::character varying(256) AS f_table_catalog,
    n.nspname AS f_table_schema,
    n.oid AS n_oid,
    c.relname AS f_table_name,
    c.oid AS c_oid,
    a.attname AS f_geometry_column,
    a.attnum AS a_attnum
    --COALESCE(postgis_typmod_dims(a.atttypmod), sn.ndims, 2) AS coord_dimension,
    --COALESCE(NULLIF(postgis_typmod_srid(a.atttypmod), 0), sr.srid, 0) AS srid,
    --replace(replace(COALESCE(NULLIF(upper(postgis_typmod_type(a.atttypmod)), 'GEOMETRY'::text), st.type, 'GEOMETRY'::text), 'ZM'::text, ''::text), 'Z'::text, ''::text)::character varying(30) AS type
   FROM pg_class c
     JOIN pg_attribute a ON a.attrelid = c.oid AND NOT a.attisdropped
     JOIN pg_namespace n ON c.relnamespace = n.oid
     JOIN pg_type t ON a.atttypid = t.oid
  WHERE (c.relkind = ANY (ARRAY['r'::"char", 'v'::"char", 'm'::"char", 'f'::"char", 'p'::"char"]))
  AND NOT c.relname = 'raster_columns'::name AND t.typname = 'geometry'::name
  AND NOT pg_is_other_temp_schema(c.relnamespace)
  AND has_table_privilege(c.oid, 'SELECT'::text)
)
SELECT * FROM geo_column_list;

But if I try this with LEFT JOIN it hangs for hours it seems like.

WITH geo_column_list AS (SELECT
current_database()::character varying(256) AS f_table_catalog,
    n.nspname AS f_table_schema,
    n.oid AS n_oid,
    c.relname AS f_table_name,
    c.oid AS c_oid,
    a.attname AS f_geometry_column,
    a.attnum AS a_attnum,
    a.atttypmod
    --COALESCE(NULLIF(postgis_typmod_srid(a.atttypmod), 0), sr.srid, 0) AS srid,
    --replace(replace(COALESCE(NULLIF(upper(postgis_typmod_type(a.atttypmod)), 'GEOMETRY'::text), st.type, 'GEOMETRY'::text), 'ZM'::text, ''::text), 'Z'::text, ''::text)::character varying(30) AS type
   FROM pg_class c
     JOIN pg_attribute a ON a.attrelid = c.oid AND NOT a.attisdropped
     JOIN pg_namespace n ON c.relnamespace = n.oid
     JOIN pg_type t ON a.atttypid = t.oid
  WHERE (c.relkind = ANY (ARRAY['r'::"char", 'v'::"char", 'm'::"char", 'f'::"char", 'p'::"char"]))
  AND NOT c.relname = 'raster_columns'::name AND t.typname = 'geometry'::name
  AND NOT pg_is_other_temp_schema(c.relnamespace)
  AND has_table_privilege(c.oid, 'SELECT'::text)
),
pg_constraint_list AS (SELECT pg_constraint.connamespace,
                    pg_constraint.conrelid,
                    pg_constraint.conkey,
                    pg_get_constraintdef(pg_constraint.oid) AS consrc
                   FROM pg_constraint, geo_column_list
WHERE connamespace = n_oid AND conrelid = c_oid AND (a_attnum = ANY (conkey))
)
,
geo_column_list_full AS (SELECT * FROM geo_column_list
     LEFT JOIN ( SELECT s.connamespace,
            s.conrelid,
            s.conkey,
            replace(split_part(s.consrc, ''''::text, 2), ')'::text, ''::text) AS type
           FROM pg_constraint_list s
          WHERE s.consrc ~~* '%geometrytype(% = %'::text) st ON TRUE
     LEFT JOIN ( SELECT s.connamespace,
            s.conrelid,
            s.conkey,
            replace(split_part(s.consrc, ' = '::text, 2), ')'::text, ''::text)::integer AS ndims
           FROM pg_constraint_list s
          WHERE s.consrc ~~* '%ndims(% = %'::text) sn ON TRUE
     LEFT JOIN ( SELECT s.connamespace,
            s.conrelid,
            s.conkey,
            replace(replace(split_part(s.consrc, ' = '::text, 2), ')'::text, ''::text), '('::text, ''::text)::integer AS srid
           FROM pg_constraint_list s
          WHERE s.consrc ~~* '%srid(% = %'::text) sr ON TRUE
)
SELECT *,
    COALESCE(postgis_typmod_dims(atttypmod), ndims, 2) AS coord_dimension
FROM geo_column_list_full;

but if I try this it return 648 rows in less than second

WITH geo_column_list AS (SELECT
current_database()::character varying(256) AS f_table_catalog,
    n.nspname AS f_table_schema,
    n.oid AS n_oid,
    c.relname AS f_table_name,
    c.oid AS c_oid,
    a.attname AS f_geometry_column,
    a.attnum AS a_attnum,
    a.atttypmod
    --COALESCE(NULLIF(postgis_typmod_srid(a.atttypmod), 0), sr.srid, 0) AS srid,
    --replace(replace(COALESCE(NULLIF(upper(postgis_typmod_type(a.atttypmod)), 'GEOMETRY'::text), st.type, 'GEOMETRY'::text), 'ZM'::text, ''::text), 'Z'::text, ''::text)::character varying(30) AS type
   FROM pg_class c
     JOIN pg_attribute a ON a.attrelid = c.oid AND NOT a.attisdropped
     JOIN pg_namespace n ON c.relnamespace = n.oid
     JOIN pg_type t ON a.atttypid = t.oid
  WHERE (c.relkind = ANY (ARRAY['r'::"char", 'v'::"char", 'm'::"char", 'f'::"char", 'p'::"char"]))
  AND NOT c.relname = 'raster_columns'::name AND t.typname = 'geometry'::name
  AND NOT pg_is_other_temp_schema(c.relnamespace)
  AND has_table_privilege(c.oid, 'SELECT'::text)
),
pg_constraint_list AS (SELECT pg_constraint.connamespace,
                    pg_constraint.conrelid,
                    pg_constraint.conkey,
                    pg_get_constraintdef(pg_constraint.oid) AS consrc
                   FROM pg_constraint, geo_column_list
WHERE connamespace = n_oid AND conrelid = c_oid AND (a_attnum = ANY (conkey))
)
SELECT *
FROM pg_constraint_list;

Thanks.

Lars