Here is a script that reproduces the issue conveniently.
This is targetted for pg11, might also work on pg10 (uses declarative
partitioning as a convenient way to spread traffic over many tables).
What it does is:
- spin up a new server in ./data1
- populate some data
- start a base backup into ./data2 (but no copying files yet)
- generate a bunch of activity
- actually copy the files for the backup
- finish the backup
- spin up the recovery server in ./data2
(there's some dead code in the script for testing torn-page cases, not
used at present and can be ignored)
Looking at the ./postgres2.log file created for the server in ./data2,
what I see is a ton of this:
LOG: redo starts at 0/15000028
WARNING: xlog min recovery request 0/41420F58 is past current point 0/151D1248
CONTEXT: writing block 2 of relation base/12719/16591_fsm
WAL redo at 0/151D01F0 for Heap/HOT_UPDATE: off 76 xmax 772 ; new off 79 xmax 0
WARNING: xlog min recovery request 0/458157D8 is past current point 0/15392030
CONTEXT: writing block 0 of relation base/12719/16387_vm
WAL redo at 0/15391FD0 for Heap/INSERT: off 14
WARNING: xlog min recovery request 0/42B23FA0 is past current point 0/154A8408
CONTEXT: writing block 2 of relation base/12719/16459_fsm
WAL redo at 0/154A7350 for Heap/HOT_UPDATE: off 43 xmax 776 ; new off 79 xmax 0
WARNING: xlog min recovery request 0/41406A60 is past current point 0/155CD2D0
CONTEXT: writing block 2 of relation base/12719/16393_fsm
WAL redo at 0/155CC230 for Heap/HOT_UPDATE: off 27 xmax 778 ; new off 79 xmax 0
[repeat some dozens of times]
The noise is worst with wal_log_hints=on (as given in the script);
turning that off doesn't solve the issue, there are still warnings given
for _vm pages.
--
Andrew (irc:RhodiumToad)
#!/bin/sh
: ${PSQL:=${BINDIR:+$BINDIR/}psql}
: ${PG_CTL:=${BINDIR:+$BINDIR/}pg_ctl}
: ${PORT1:=65432}
: ${PORT2:=$(( $PORT1 + 1 ))}
DATADIR=${1:-./data1}
DATADIR2=${2:-./data2}
echo "starting: $0 $*" >&2
errfail(){ echo "$*" >&2; exit 1; }
diag(){ echo "$*" >&2; }
find_with_exclusions(){
find . \( -name 'pg_xlog' \
-o -name 'pg_wal' \
-o -name 'pgsql_tmp' \
-o -name 'pg_dynshmem' \
-o -name 'pg_notify' \
-o -name 'pg_serial' \
-o -name 'pg_snapshots' \
-o -name 'pg_replslot' \
-o -name 'pg_stat_tmp' \
-o -name 'pg_subtrans' \
-o -name '*.pid' \
\) -prune \
-o \
-type f \
! -name '*.pid' \
! -name 'pg_internal.init' \
! -name 'postmaster.opts' \
! -name '*.core' \
! -name 'pgsql_tmp*' \
${1+"$@"} \
-print0
}
make_dirtree(){
(cd "$PGDATA" &&
find . -type d \
! -name '.' \
! -name 'pgsql_tmp' \
-print0 \
-name 'pg_replslot' -prune
) | (cd "$DESTDIR" && xargs -0 mkdir -m 'go=')
}
copy_data(){
(cd "$PGDATA" &&
find_with_exclusions ) |
xargs -0 -n1 sh -c 'cp -p -- "$PGDATA/$1" "$DESTDIR/$1"' arg0
}
copy_with_tearing(){
(cd "$PGDATA" && find_with_exclusions ! -name 'pg_control' ) |
xargs -0 -n1 perl -MFcntl=SEEK_SET -e '
my $fn = shift;
open(my $i, "<:raw", $ENV{PGDATA}."/".$fn) or die "$!";
open(my $o, "+<:raw", $ENV{DESTDIR}."/".$fn) or die "open: $!";
my ($p,$d) = (0,'');
while (read($i,$d,8192) == 8192) {
sysseek($o, $p, SEEK_SET) or die "seek: $!";
syswrite($o, $d, 128) or die "write: $!";
$p += 8192;
}
close($o) or die "close: $!";
'
}
case "$1" in
-docopy1)
diag "PGDATA = $PGDATA" "DESTDIR = $DESTDIR"
diag "making dirtree"
make_dirtree
# if testing torn pages, do copy_data here
diag "done (-docopy1)."
exit 0
;;
-docopy2)
diag "PGDATA = $PGDATA" "DESTDIR = $DESTDIR"
diag "starting copy"
# if testing torn pages, do copy_with_tearing here
copy_data
diag "done (-docopy)."
exit 0
;;
-finish)
printf "%s" "$LABELFILE" >"$DESTDIR/backup_label"
[ -z "$SPCMAPFILE" ] || errfail "tablespace map is not empty"
echo "done (-finish)." >&2
exit 0
;;
esac
script=$0
[ "${script#*\'}" != "${script}" ] && errfail "path has single-quote character"
qscript="'$script'"
DESTDIR=$DATADIR2
export DESTDIR
[ -d "$DESTDIR" ] || mkdir -m 'go=' -- "$DESTDIR" || errfail "no destination dir $DESTDIR"
${PG_CTL} initdb -D "$DATADIR" -o '-E UTF8 --no-locale'
${PG_CTL} start -D "$DATADIR" -l postgres1.log \
-o "-c port=$PORT1" \
-o "-c listen_addresses=''" \
-o "-c wal_log_hints=on" \
-o "-c checkpoint_timeout=30s"
${PSQL} -Xq -p "$PORT1" -d postgres -f - <<'EOF'
\echo Creating tables
create table dat (id integer, val text) partition by range (id);
select format('create table %I
partition of dat for values from (%L) to (%L)
with (fillfactor=50);',
'dat'||i, i*100000, (i+1)*100000)
from generate_series(0,49) i \gexec
\echo populating tables
insert into dat select i, 'value '||i from generate_series(1,4999999,2) i;
\echo creating indexes
create index on dat (id);
create function randid() returns integer language sql volatile
as $$ select floor(random()^4 * 5000000)::integer $$;
\echo vacuuming
vacuum analyze;
\echo checkpoint
checkpoint;
\echo done
EOF
${PSQL} -X -v script="$qscript" -p "$PORT1" -d postgres -f - <<'EOF'
select pg_create_physical_replication_slot('mytest', true);
select current_setting('data_directory') as ddir \gset
\setenv PGDATA :ddir
select oid from pg_database where datname=current_database() \gset
\setenv DBDIR :oid
select pg_start_backup('mytest', true, false);
\set cmd '\\! ' :script
:cmd -docopy1
\echo Generating WAL traffic, this will take about 3 mins
do $$
declare et timestamptz := clock_timestamp() + interval '1 minute';
begin
loop
update dat set val = case when random() > 0.1 then val || repeat('foo',20) else '' end
where id in (select randid() from generate_series(1,200));
commit;
insert into dat select randid(), 'some value'
from generate_series(1,200);
delete from dat
where id in (select (select randid()) + i
from generate_series(0,99) i);
commit;
perform count(*)
from (select floor(random()*5000000)::integer as i) s,
dat
where id >= i and id < i+100000;
commit;
exit when clock_timestamp() >= et;
end loop;
end;
$$;
\echo 2 mins remaining
\g
\echo 1 min remaining
\g
\echo traffic generation done
:cmd -docopy2
select * from pg_stop_backup(false) \gset
\setenv LABELFILE :labelfile
\setenv SPCMAPFILE :spcmapfile
:cmd -finish
EOF
diag "Starting server for recovery, this will take several minutes"
cat >$DESTDIR/recovery.conf <<EOF
primary_conninfo = 'port=$PORT1'
standby_mode=on
recovery_target='immediate'
primary_slot_name='mytest'
EOF
${PG_CTL} start -D "$DESTDIR" -l postgres2.log -w -t 300 \
-o "-c port=$PORT2" \
-o "-c listen_addresses=''" \
-o "-c shared_buffers=512kB" \
-o "-c checkpoint_timeout=30s"
diag "done. See postgres2.log for output."
exit 0