Обсуждение: BUG #17334: Assert failed inside computeDistance() on gist index scanning

Поиск
Список
Период
Сортировка

BUG #17334: Assert failed inside computeDistance() on gist index scanning

От
PG Bug reporting form
Дата:
The following bug has been logged on the website:

Bug reference:      17334
Logged by:          Alexander Lakhin
Email address:      exclusion@gmail.com
PostgreSQL version: 14.1
Operating system:   Ubuntu 20.04
Description:

When executing the following queries:
CREATE TABLE point_tbl(f1 point);
CREATE INDEX gpointind ON point_tbl USING gist(f1);
INSERT INTO point_tbl SELECT ('0,0') FROM generate_series(1, 185);
INSERT INTO point_tbl VALUES ('0,NaN');
SET enable_seqscan=off; SELECT f1 <-> point '(0,0)' AS dist FROM point_tbl
ORDER BY dist;

I get a failed assertion with the backtrace:
Core was generated by `postgres: law regression [local] SELECT
                        '.
Program terminated with signal SIGABRT, Aborted.
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
50      ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#1  0x00007ff58e5ff859 in __GI_abort () at abort.c:79
#2  0x000055fa508acaaf in ExceptionalCondition (conditionName=0x55fa509407ec
"box->low.y <= box->high.y", 
    errorType=0x55fa5094066a "FailedAssertion", fileName=0x55fa509406fa
"gistproc.c", lineNumber=1240) at assert.c:69
#3  0x000055fa501be673 in computeDistance (isLeaf=false, box=0x7ff5837ff428,
point=0x55fa5122ccf8) at gistproc.c:1240
#4  0x000055fa501bf265 in gist_point_distance (fcinfo=0x7ffdb02e05d0) at
gistproc.c:1466
#5  0x000055fa508b85b4 in FunctionCall5Coll (flinfo=0x55fa5123baa0,
collation=0, arg1=140727559259904, 
    arg2=94533591420152, arg3=15, arg4=600, arg5=140727559259823) at
fmgr.c:1241
#6  0x000055fa501ba284 in gistindex_keytest (scan=0x55fa5123b978,
tuple=0x7ff5837ff420, page=0x7ff5837fd480 "", 
    offset=2, recheck_p=0x7ffdb02e0779, recheck_distances_p=0x7ffdb02e077a)
at gistget.c:286
#7  0x000055fa501ba94c in gistScanPage (scan=0x55fa5123b978,
pageItem=0x7ffdb02e0830, myDistances=0x0, tbm=0x0, 
    ntids=0x0) at gistget.c:438
#8  0x000055fa501bb0f6 in gistgettuple (scan=0x55fa5123b978,
dir=ForwardScanDirection) at gistget.c:639
#9  0x000055fa502190c5 in index_getnext_tid (scan=0x55fa5123b978,
direction=ForwardScanDirection) at indexam.c:533
#10 0x000055fa504a6a85 in IndexOnlyNext (node=0x55fa5123a378) at
nodeIndexonlyscan.c:121
#11 0x000055fa5047b72a in ExecScanFetch (node=0x55fa5123a378,
accessMtd=0x55fa504a64d3 <IndexOnlyNext>, 
    recheckMtd=0x55fa504a6b31 <IndexOnlyRecheck>) at execScan.c:133
#12 0x000055fa5047b7cf in ExecScan (node=0x55fa5123a378,
accessMtd=0x55fa504a64d3 <IndexOnlyNext>, 
    recheckMtd=0x55fa504a6b31 <IndexOnlyRecheck>) at execScan.c:199
#13 0x000055fa504a6bfd in ExecIndexOnlyScan (pstate=0x55fa5123a378) at
nodeIndexonlyscan.c:317
#14 0x000055fa50477458 in ExecProcNodeFirst (node=0x55fa5123a378) at
execProcnode.c:463
#15 0x000055fa5046af44 in ExecProcNode (node=0x55fa5123a378) at
../../../src/include/executor/executor.h:257
#16 0x000055fa5046db62 in ExecutePlan (estate=0x55fa5123a140,
planstate=0x55fa5123a378, use_parallel_mode=false, 
    operation=CMD_SELECT, sendTuples=true, numberTuples=0,
direction=ForwardScanDirection, dest=0x55fa5122d308, 
    execute_once=true) at execMain.c:1551
#17 0x000055fa5046b67b in standard_ExecutorRun (queryDesc=0x55fa511f8ae0,
direction=ForwardScanDirection, count=0, 
    execute_once=true) at execMain.c:361
#18 0x000055fa5046b466 in ExecutorRun (queryDesc=0x55fa511f8ae0,
direction=ForwardScanDirection, count=0, 
    execute_once=true) at execMain.c:305
#19 0x000055fa506ee7c0 in PortalRunSelect (portal=0x55fa511c12c0,
forward=true, count=0, dest=0x55fa5122d308)
    at pquery.c:921
#20 0x000055fa506ee3de in PortalRun (portal=0x55fa511c12c0,
count=9223372036854775807, isTopLevel=true, run_once=true, 
    dest=0x55fa5122d308, altdest=0x55fa5122d308, qc=0x7ffdb02e0d70) at
pquery.c:765
#21 0x000055fa506e7255 in exec_simple_query (
    query_string=0x55fa5114e220 "SELECT f1 <-> point '(0,0)' AS dist FROM
point_tbl ORDER BY dist;") at postgres.c:1215
#22 0x000055fa506ec1ab in PostgresMain (dbname=0x55fa51179b88 "regression",
username=0x55fa51179b68 "law")
    at postgres.c:4498
#23 0x000055fa5060cd49 in BackendRun (port=0x55fa5116f640) at
postmaster.c:4594
#24 0x000055fa5060c5d0 in BackendStartup (port=0x55fa5116f640) at
postmaster.c:4322
#25 0x000055fa506083b8 in ServerLoop () at postmaster.c:1802
#26 0x000055fa50607b15 in PostmasterMain (argc=3, argv=0x55fa511484e0) at
postmaster.c:1474
#27 0x000055fa504f4d81 in main (argc=3, argv=0x55fa511484e0) at main.c:198

With a server compiled without asserts I get:
ERROR:  inconsistent point values

Reproduced on REL_10_STABLE..HEAD.


Re: BUG #17334: Assert failed inside computeDistance() on gist index scanning

От
Tomas Vondra
Дата:
On 12/12/21 17:00, PG Bug reporting form wrote:
> The following bug has been logged on the website:
> 
> Bug reference:      17334
> Logged by:          Alexander Lakhin
> Email address:      exclusion@gmail.com
> PostgreSQL version: 14.1
> Operating system:   Ubuntu 20.04
> Description:
> 
> When executing the following queries:
> CREATE TABLE point_tbl(f1 point);
> CREATE INDEX gpointind ON point_tbl USING gist(f1);
> INSERT INTO point_tbl SELECT ('0,0') FROM generate_series(1, 185);
> INSERT INTO point_tbl VALUES ('0,NaN');
> SET enable_seqscan=off; SELECT f1 <-> point '(0,0)' AS dist FROM point_tbl
> ORDER BY dist;
> 

FWIW I can reproduce this on master too. The failure happens because of 
NaN value in the index:

#3  0x00000000004d3e72 in computeDistance (isLeaf=false, 
box=0x760f5ce13b28, point=0x1f99608) at gistproc.c:1240
1240            Assert(box->low.y <= box->high.y);
(gdb) p box->high
$1 = {x = 0, y = nan(0x8000000000000)}

and indeed, pageinspect says this:

test=# select * from gist_page_items(get_raw_page('gpointind', 2), 
'gpointind'::regclass);
  itemoffset |  ctid   | itemlen | dead |      keys
------------+---------+---------+------+----------------
           1 | (0,94)  |      40 | f    | (f1)=((0,0))
           2 | (0,95)  |      40 | f    | (f1)=((0,0))
...
          92 | (0,185) |      40 | f    | (f1)=((0,0))
          93 | (1,1)   |      40 | f    | (f1)=((0,NaN))
(93 rows)

I'm not sure if the issue is in allowing the NaN to be added to the 
index, or not handling it correctly during the index scan.

It's interesting btree_gist does not have issues (for NaN in float8 
columns). It seems not to store NaN in the index,  It seems to replace 
them with tiny values, at least according to pageinspect.


regards

-- 
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company



Re: BUG #17334: Assert failed inside computeDistance() on gist index scanning

От
Tom Lane
Дата:
Tomas Vondra <tomas.vondra@enterprisedb.com> writes:
> FWIW I can reproduce this on master too. The failure happens because of
> NaN value in the index:

Man, what a pain those are.

> I'm not sure if the issue is in allowing the NaN to be added to the
> index, or not handling it correctly during the index scan.

Surely omitting the entry from the index would lead to incorrect
answers.  Without any index, we get

regression=# CREATE TABLE point_tbl(f1 point);
CREATE TABLE
regression=# INSERT INTO point_tbl SELECT ('0,0') FROM generate_series(1, 2);
INSERT 0 2
regression=# INSERT INTO point_tbl VALUES ('0,NaN');
INSERT 0 1
regression=# SELECT f1, f1 <-> point '(0,0)' AS dist FROM point_tbl ORDER BY dist;
   f1    | dist
---------+------
 (0,0)   |    0
 (0,0)   |    0
 (0,NaN) |  NaN
(3 rows)

You can argue about where the NaN distance should sort, but
not about whether the row should appear at all.

> It's interesting btree_gist does not have issues (for NaN in float8
> columns). It seems not to store NaN in the index,  It seems to replace
> them with tiny values, at least according to pageinspect.

Yipes, that's even worse, if true.

            regards, tom lane



Re: BUG #17334: Assert failed inside computeDistance() on gist index scanning

От
Tomas Vondra
Дата:
On 12/13/21 00:37, Tom Lane wrote:
> Tomas Vondra <tomas.vondra@enterprisedb.com> writes:
>> FWIW I can reproduce this on master too. The failure happens because of
>> NaN value in the index:
> 
> Man, what a pain those are.
> 
>> I'm not sure if the issue is in allowing the NaN to be added to the
>> index, or not handling it correctly during the index scan.
> 
> Surely omitting the entry from the index would lead to incorrect
> answers.  Without any index, we get
> 
> regression=# CREATE TABLE point_tbl(f1 point);
> CREATE TABLE
> regression=# INSERT INTO point_tbl SELECT ('0,0') FROM generate_series(1, 2);
> INSERT 0 2
> regression=# INSERT INTO point_tbl VALUES ('0,NaN');
> INSERT 0 1
> regression=# SELECT f1, f1 <-> point '(0,0)' AS dist FROM point_tbl ORDER BY dist;
>     f1    | dist
> ---------+------
>   (0,0)   |    0
>   (0,0)   |    0
>   (0,NaN) |  NaN
> (3 rows)
> 
> You can argue about where the NaN distance should sort, but
> not about whether the row should appear at all.
> 
>> It's interesting btree_gist does not have issues (for NaN in float8
>> columns). It seems not to store NaN in the index,  It seems to replace
>> them with tiny values, at least according to pageinspect.
> 
> Yipes, that's even worse, if true. >

Yeah. I haven't looked at the code, but this is what I see:

   test=# create extension btree_gist ;
   CREATE EXTENSION
   test=# CREATE TABLE t(f1 double precision);
   CREATE TABLE
   test=# INSERT INTO t VALUES ('NaN'::float8);
   INSERT 0 1
   test=# CREATE INDEX idx ON t USING gist(f1);
   CREATE INDEX
   test=# select * from gist_page_items(get_raw_page('idx', 0), 
'idx'::regclass);
    itemoffset | ctid  | itemlen | dead |         keys
   ------------+-------+---------+------+-----------------------
             1 | (0,1) |      24 | f    | (f1)=(1.9187051e-316)
   (1 row)


test=# set enable_seqscan = on;
SET
test=# select * from t where f1 = 'NaN'::float8;
  f1
-----
  NaN
(1 row)


test=# set enable_seqscan = off;
SET
test=# select * from t where f1 = 'NaN'::float8;
  f1
----
(0 rows)


So yeah, that seems like an index corruption. Moreover:

test=# INSERT INTO t VALUES (0::float8);
test=# INSERT INTO t VALUES ('NaN'::float8);
test=# INSERT INTO t VALUES (0::float8);

test=# select * from t order by f1 <-> 0;
  f1
-----
  NaN
    0
    0
  NaN
(4 rows)


regards

-- 
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company



Re: BUG #17334: Assert failed inside computeDistance() on gist index scanning

От
Kyotaro Horiguchi
Дата:
At Mon, 13 Dec 2021 03:32:18 +0100, Tomas Vondra <tomas.vondra@enterprisedb.com> wrote in 
> On 12/13/21 00:37, Tom Lane wrote:
> > Tomas Vondra <tomas.vondra@enterprisedb.com> writes:
> >> FWIW I can reproduce this on master too. The failure happens because
> >> of
> >> NaN value in the index:
> > Man, what a pain those are.
> > 
> >> I'm not sure if the issue is in allowing the NaN to be added to the
> >> index, or not handling it correctly during the index scan.
> > Surely omitting the entry from the index would lead to incorrect
> > answers.  Without any index, we get
> > regression=# CREATE TABLE point_tbl(f1 point);
> > CREATE TABLE
> > regression=# INSERT INTO point_tbl SELECT ('0,0') FROM
> > generate_series(1, 2);
> > INSERT 0 2
> > regression=# INSERT INTO point_tbl VALUES ('0,NaN');
> > INSERT 0 1
> > regression=# SELECT f1, f1 <-> point '(0,0)' AS dist FROM point_tbl
> > ORDER BY dist;
> >     f1    | dist
> > ---------+------
> >   (0,0)   |    0
> >   (0,0)   |    0
> >   (0,NaN) |  NaN
> > (3 rows)
> > You can argue about where the NaN distance should sort, but
> > not about whether the row should appear at all.

We difined NaN as larger value than any non-NaN and that seems a
not-bad compromise.  Anyway there seems to be no other way than
individually fixing every code path that use float types X( For
example, the attached "fixes" the case of the example only. However,
that is a kind of nerve-wrecking..

> >> It's interesting btree_gist does not have issues (for NaN in float8
> >> columns). It seems not to store NaN in the index,  It seems to replace
> >> them with tiny values, at least according to pageinspect.
> > Yipes, that's even worse, if true. >
> 
> Yeah. I haven't looked at the code, but this is what I see:
> 
>   test=# create extension btree_gist ;
>   CREATE EXTENSION
>   test=# CREATE TABLE t(f1 double precision);
>   CREATE TABLE
>   test=# INSERT INTO t VALUES ('NaN'::float8);
>   INSERT 0 1
>   test=# CREATE INDEX idx ON t USING gist(f1);
>   CREATE INDEX
>   test=# select * from gist_page_items(get_raw_page('idx', 0),
>   'idx'::regclass);
>    itemoffset | ctid  | itemlen | dead |         keys
>   ------------+-------+---------+------+-----------------------
>             1 | (0,1) |      24 | f    | (f1)=(1.9187051e-316)
>   (1 row)
> 
> 
> test=# set enable_seqscan = on;
> SET
> test=# select * from t where f1 = 'NaN'::float8;
>  f1
> -----
>  NaN
> (1 row)
> 
> 
> test=# set enable_seqscan = off;
> SET
> test=# select * from t where f1 = 'NaN'::float8;
>  f1
> ----
> (0 rows)
> 
> 
> So yeah, that seems like an index corruption. Moreover:
> 
> test=# INSERT INTO t VALUES (0::float8);
> test=# INSERT INTO t VALUES ('NaN'::float8);
> test=# INSERT INTO t VALUES (0::float8);
> 
> test=# select * from t order by f1 <-> 0;
>  f1
> -----
>  NaN
>    0
>    0
>  NaN
> (4 rows)

Too bad \^_^/

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c
index d474612b77..19a7d5ef2b 100644
--- a/src/backend/access/gist/gistproc.c
+++ b/src/backend/access/gist/gistproc.c
@@ -1237,11 +1237,15 @@ computeDistance(bool isLeaf, BOX *box, Point *point)
     else if (point->x <= box->high.x && point->x >= box->low.x)
     {
         /* point is over or below box */
-        Assert(box->low.y <= box->high.y);
+        Assert(box->low.y <= box->high.y
+               || isnan(box->low.y) || isnan(box->high.y));
+
         if (point->y > box->high.y)
             result = float8_mi(point->y, box->high.y);
         else if (point->y < box->low.y)
             result = float8_mi(box->low.y, point->y);
+        else if (isnan(point->y) || isnan(box->high.y) || isnan(box->low.y))
+            result = get_float8_nan();
         else
             elog(ERROR, "inconsistent point values");
     }