Re: [COMMITTERS] pgsql: On Windows, when a file is deleted and another process still has

Поиск
Список
Период
Сортировка
От Heikki Linnakangas
Тема Re: [COMMITTERS] pgsql: On Windows, when a file is deleted and another process still has
Дата
Msg-id 4AAA0DFD.70500@enterprisedb.com
обсуждение исходный текст
Ответы Re: Re: [COMMITTERS] pgsql: On Windows, when a file is deleted and another process still has  (Tom Lane <tgl@sss.pgh.pa.us>)
Re: Re: [COMMITTERS] pgsql: On Windows, when a file is deleted and another process still has  (Magnus Hagander <magnus@hagander.net>)
Список pgsql-hackers
(moving to pgsql-hackers)

Tom Lane wrote:
> Heikki Linnakangas <heikki.linnakangas@enterprisedb.com> writes:
>> A completely different approach would be to treat any failure on all
>> platforms as non-fatal. We shouldn't really cut the checkpoint short if
>> recycling a WAL file fails, whatever the reason. That seems like a more
>> robust approach than trying to guess which error codes are OK to ignore.
>
> I could live with that, as long as it gets logged.

Here's a patch implementing that, and changing pgrename() to check for
ERROR_SHARING_VIOLATION and ERROR_LOCK_VIOLATION like pgwin32_open()
does, instead of ERROR_ACCESS_DENIED.

I wonder if we should reduce the timeout in pgrename(). It's 30 s at the
moment, but apparently it hasn't been working correctly, failing
immediately instead if the file is locked. And no-one has complained
about that. But if we sleep in InstallXLogFileSegment(), we're holding
ControlFileLock, which can force other backends to wait, and that might
cause more harm than just failing outright. Something like 5 seconds
might be more appropriate, giving anti-virus and similar software some
time to give up the lock, but not too much to cause long delays. 5
seconds should be enough for anti-virus or backup software to process a
file under normal circumstances.

OTOH, pgwin32_open() uses 30 s, with the same potential for lockups, and
no-one has complained about that either. The bottom line is that if
another program keeps a file locked for any extended period of time,
you're going to have trouble one way or another.

--
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 2262,2273 **** XLogFileInit(uint32 log, uint32 seg,
                                  *use_existent, &max_advance,
                                  use_lock))
      {
!         /* No need for any more future segments... */
          unlink(tmppath);
      }

-     elog(DEBUG2, "done creating and filling new WAL file");
-
      /* Set flag to tell caller there was no existent file */
      *use_existent = false;

--- 2262,2275 ----
                                  *use_existent, &max_advance,
                                  use_lock))
      {
!         /*
!          * No need for any more future segments, or InstallXLogFileSegment()
!          * failed to rename the file into place. If the rename failed, opening
!          * the file below will fail.
!          */
          unlink(tmppath);
      }

      /* Set flag to tell caller there was no existent file */
      *use_existent = false;

***************
*** 2280,2285 **** XLogFileInit(uint32 log, uint32 seg,
--- 2282,2289 ----
             errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
                    path, log, seg)));

+     elog(DEBUG2, "done creating and filling new WAL file");
+
      return fd;
  }

***************
*** 2409,2418 **** XLogFileCopy(uint32 log, uint32 seg,
   * place.  This should be TRUE except during bootstrap log creation.  The
   * caller must *not* hold the lock at call.
   *
!  * Returns TRUE if file installed, FALSE if not installed because of
!  * exceeding max_advance limit.  On Windows, we also return FALSE if we
!  * can't rename the file into place because someone's got it open.
!  * (Any other kind of failure causes ereport().)
   */
  static bool
  InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath,
--- 2413,2421 ----
   * place.  This should be TRUE except during bootstrap log creation.  The
   * caller must *not* hold the lock at call.
   *
!  * Returns TRUE if the file was installed successfully. FALSE indicates that
!  * max_advance limit was exceeded, or an error occurred while renaming the
!  * file into place.
   */
  static bool
  InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath,
***************
*** 2460,2490 **** InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath,
       */
  #if HAVE_WORKING_LINK
      if (link(tmppath, path) < 0)
!         ereport(ERROR,
                  (errcode_for_file_access(),
                   errmsg("could not link file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m",
                          tmppath, path, *log, *seg)));
      unlink(tmppath);
  #else
      if (rename(tmppath, path) < 0)
      {
! #ifdef WIN32
! #if !defined(__CYGWIN__)
!         if (GetLastError() == ERROR_ACCESS_DENIED)
! #else
!         if (errno == EACCES)
! #endif
!         {
!             if (use_lock)
!                 LWLockRelease(ControlFileLock);
!             return false;
!         }
! #endif   /* WIN32 */
!
!         ereport(ERROR,
                  (errcode_for_file_access(),
                   errmsg("could not rename file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m",
                          tmppath, path, *log, *seg)));
      }
  #endif

--- 2463,2488 ----
       */
  #if HAVE_WORKING_LINK
      if (link(tmppath, path) < 0)
!     {
!         if (use_lock)
!             LWLockRelease(ControlFileLock);
!         ereport(LOG,
                  (errcode_for_file_access(),
                   errmsg("could not link file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m",
                          tmppath, path, *log, *seg)));
+         return false;
+     }
      unlink(tmppath);
  #else
      if (rename(tmppath, path) < 0)
      {
!         if (use_lock)
!             LWLockRelease(ControlFileLock);
!         ereport(LOG,
                  (errcode_for_file_access(),
                   errmsg("could not rename file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m",
                          tmppath, path, *log, *seg)));
+         return false;
      }
  #endif

***************
*** 3128,3146 **** RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr)
                       */
                      snprintf(newpath, MAXPGPATH, "%s.deleted", path);
                      if (rename(path, newpath) != 0)
!                         ereport(ERROR,
                                  (errcode_for_file_access(),
!                                  errmsg("could not rename old transaction log file \"%s\"",
                                          path)));
                      rc = unlink(newpath);
  #else
                      rc = unlink(path);
  #endif
                      if (rc != 0)
!                         ereport(ERROR,
                                  (errcode_for_file_access(),
                                   errmsg("could not remove old transaction log file \"%s\": %m",
                                          path)));
                      CheckpointStats.ckpt_segs_removed++;
                  }

--- 3126,3150 ----
                       */
                      snprintf(newpath, MAXPGPATH, "%s.deleted", path);
                      if (rename(path, newpath) != 0)
!                     {
!                         ereport(LOG,
                                  (errcode_for_file_access(),
!                                  errmsg("could not rename old transaction log file \"%s\": %m",
                                          path)));
+                         continue;
+                     }
                      rc = unlink(newpath);
  #else
                      rc = unlink(path);
  #endif
                      if (rc != 0)
!                     {
!                         ereport(LOG,
                                  (errcode_for_file_access(),
                                   errmsg("could not remove old transaction log file \"%s\": %m",
                                          path)));
+                         continue;
+                     }
                      CheckpointStats.ckpt_segs_removed++;
                  }

*** a/src/port/dirmod.c
--- b/src/port/dirmod.c
***************
*** 129,139 **** pgrename(const char *from, const char *to)
  #endif
      {
  #if defined(WIN32) && !defined(__CYGWIN__)
!         if (GetLastError() != ERROR_ACCESS_DENIED)
  #else
          if (errno != EACCES)
  #endif
-             /* set errno? */
              return -1;
          if (++loops > 300)        /* time out after 30 sec */
              return -1;
--- 129,143 ----
  #endif
      {
  #if defined(WIN32) && !defined(__CYGWIN__)
!         DWORD        err = GetLastError();
!
!         _dosmaperr(err);
!
!         if (err != ERROR_SHARING_VIOLATION &&
!             err != ERROR_LOCK_VIOLATION)
  #else
          if (errno != EACCES)
  #endif
              return -1;
          if (++loops > 300)        /* time out after 30 sec */
              return -1;

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Dimitri Fontaine
Дата:
Сообщение: Re: Ragged CSV import
Следующее
От: Itagaki Takahiro
Дата:
Сообщение: Why does LOG have higher priority than ERROR and WARNING?