Hi,
I got a few reports from users that postmaster disappears for unknown
reason. Inspecting the postmaster log, I found that postmaster exited at:
if (select(nSockets, &rmask, &wmask, (fd_set *) NULL, (struct timeval *) NULL) < 0)
{if (errno == EINTR) continue;fprintf(stderr, "%s: ServerLoop: select failed: %s\n", progname,
strerror(errno));returnSTATUS_ERROR; <-- here
}
In this case errno=ECHILD has been returned that makes postmaster
exiting. This could happen if SIGCHLD raised between select() call and
the next if (errno=...) statement. One of the solution would be
ignoring ECHILD as well as EINTR. Included are patches for this. If
there's no objection, I will commit them to both stable and current
tree.
*** postgresql-6.5.1/src/backend/postmaster/postmaster.c~ Thu Jul 8 02:17:48 1999
--- postgresql-6.5.1/src/backend/postmaster/postmaster.c Thu Sep 9 10:14:30 1999
***************
*** 709,719 **** if (select(nSockets, &rmask, &wmask, (fd_set *) NULL, (struct timeval *)
NULL)< 0) {
! if (errno == EINTR) continue;
! fprintf(stderr, "%s: ServerLoop: select failed: %s\n", progname, strerror(errno));
! return STATUS_ERROR; } /*
--- 709,729 ---- if (select(nSockets, &rmask, &wmask, (fd_set *) NULL, (struct timeval *)
NULL)< 0) {
! switch(errno) {
! case EINTR: continue;
! break;
! case ECHILD:
! fprintf(stderr, "%s: ServerLoop: ignoring ECHILD\n",
! progname);
! continue;
! break;
! default:
! fprintf(stderr, "%s: ServerLoop: select failed: %s\n", progname,
strerror(errno));
! return STATUS_ERROR;
! break;
! } } /*