Re: [PATCH STABLE] cleanup: always `seek(0, io.SEEK_END)` after open in append mode before tell()

2019-06-21 Thread Yuya Nishihara
On Thu, 20 Jun 2019 14:28:58 -0400, Augie Fackler wrote:
> # HG changeset patch
> # User Augie Fackler 
> # Date 1561049708 14400
> #  Thu Jun 20 12:55:08 2019 -0400
> # Branch stable
> # Node ID e99fa717419b71a2493fd7211cab5a0de9c86c7c
> # Parent  b6387a65851d4421d5580b1a4db4c55366a94ec8
> cleanup: always `seek(0, io.SEEK_END)` after open in append mode before tell()
> 
> Consider the program:
> 
> #include 
> 
> int main() {
>   FILE *f = fopen("narf", "w");
>   fprintf(f, "narf\n");
>   fclose(f);
> 
>   f = fopen("narf", "a");
>   printf("%ld\n", ftell(f));
>   fprintf(f, "troz\n");
>   printf("%ld\n", ftell(f));
> 
>   return 0;
> }
> 
> on macOS, FreeBSD, and Linux with glibc, this program prints
> 
> 5
> 10
> 
> but on musl libc (Alpine Linux and probably others) this prints
> 
> 0
> 10
> 
> By my reading of
> https://pubs.opengroup.org/onlinepubs/009695399/functions/fopen.html
> this is technically correct, specifically:
> 
> > Opening a file with append mode (a as the first character in the
> > mode argument) shall cause all subsequent writes to the file to be
> > forced to the then current end-of-file, regardless of intervening
> > calls to fseek().
> 
> in other words, the file position doesn't really matter in append-mode
> files, and we can't depend on it being at all meaningful unless we
> perform a seek() before tell() after open(..., 'a'). Experimentally
> after a .write() we can do a .tell() and it'll always be reasonable,
> but I'm unclear from reading the specification if that's a smart thing
> to rely on.
> 
> I audited the callsites matching the regular expression
> `(open|vfs)\([^,]+, ['"]a` manually. It's possible I missed something
> if I overlooked some other idiom for opening files.
> 
> This is a simple fix for the stable branch. We'll do a more
> comprehensive fix on default.
> 
> diff --git a/mercurial/branchmap.py b/mercurial/branchmap.py
> --- a/mercurial/branchmap.py
> +++ b/mercurial/branchmap.py
> @@ -7,6 +7,7 @@
>  
>  from __future__ import absolute_import
>  
> +import io
>  import struct
>  
>  from .node import (
> @@ -613,6 +614,7 @@ class revbranchcache(object):
>  wlock = repo.wlock(wait=False)
>  if self._rbcnamescount != 0:
>  f = repo.cachevfs.open(_rbcnames, 'ab')
> +f.seek(0, io.SEEK_END)

Maybe we can do that in posix.posixfile()? IIRC, we had the same issue on
Windows.

I understand that the POSIX doesn't guarantee ftell() result of appending
files, but Python 3, which reimplements the whole I/O stack, appears to handle
such cases. So maybe we can expect that f.tell() should just work in future
Python.

https://github.com/python/cpython/blob/v3.7.3/Modules/_io/fileio.c#L474
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH STABLE] cleanup: always `seek(0, io.SEEK_END)` after open in append mode before tell()

2019-06-20 Thread Augie Fackler
# HG changeset patch
# User Augie Fackler 
# Date 1561049708 14400
#  Thu Jun 20 12:55:08 2019 -0400
# Branch stable
# Node ID e99fa717419b71a2493fd7211cab5a0de9c86c7c
# Parent  b6387a65851d4421d5580b1a4db4c55366a94ec8
cleanup: always `seek(0, io.SEEK_END)` after open in append mode before tell()

Consider the program:

#include 

int main() {
  FILE *f = fopen("narf", "w");
  fprintf(f, "narf\n");
  fclose(f);

  f = fopen("narf", "a");
  printf("%ld\n", ftell(f));
  fprintf(f, "troz\n");
  printf("%ld\n", ftell(f));

  return 0;
}

on macOS, FreeBSD, and Linux with glibc, this program prints

5
10

but on musl libc (Alpine Linux and probably others) this prints

0
10

By my reading of
https://pubs.opengroup.org/onlinepubs/009695399/functions/fopen.html
this is technically correct, specifically:

> Opening a file with append mode (a as the first character in the
> mode argument) shall cause all subsequent writes to the file to be
> forced to the then current end-of-file, regardless of intervening
> calls to fseek().

in other words, the file position doesn't really matter in append-mode
files, and we can't depend on it being at all meaningful unless we
perform a seek() before tell() after open(..., 'a'). Experimentally
after a .write() we can do a .tell() and it'll always be reasonable,
but I'm unclear from reading the specification if that's a smart thing
to rely on.

I audited the callsites matching the regular expression
`(open|vfs)\([^,]+, ['"]a` manually. It's possible I missed something
if I overlooked some other idiom for opening files.

This is a simple fix for the stable branch. We'll do a more
comprehensive fix on default.

diff --git a/mercurial/branchmap.py b/mercurial/branchmap.py
--- a/mercurial/branchmap.py
+++ b/mercurial/branchmap.py
@@ -7,6 +7,7 @@
 
 from __future__ import absolute_import
 
+import io
 import struct
 
 from .node import (
@@ -613,6 +614,7 @@ class revbranchcache(object):
 wlock = repo.wlock(wait=False)
 if self._rbcnamescount != 0:
 f = repo.cachevfs.open(_rbcnames, 'ab')
+f.seek(0, io.SEEK_END)
 if f.tell() == self._rbcsnameslen:
 f.write('\0')
 else:
@@ -638,6 +640,7 @@ class revbranchcache(object):
 revs = min(len(repo.changelog),
len(self._rbcrevs) // _rbcrecsize)
 f = repo.cachevfs.open(_rbcrevs, 'ab')
+f.seek(0, io.SEEK_END)
 if f.tell() != start:
 repo.ui.debug("truncating cache/%s to %d\n"
   % (_rbcrevs, start))
diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py
--- a/mercurial/obsolete.py
+++ b/mercurial/obsolete.py
@@ -71,6 +71,7 @@ from __future__ import absolute_import
 
 import errno
 import hashlib
+import io
 import struct
 
 from .i18n import _
@@ -634,6 +635,7 @@ class obsstore(object):
 new.append(m)
 if new:
 f = self.svfs('obsstore', 'ab')
+f.seek(0, io.SEEK_END)
 try:
 offset = f.tell()
 transaction.add('obsstore', offset)
diff --git a/mercurial/tags.py b/mercurial/tags.py
--- a/mercurial/tags.py
+++ b/mercurial/tags.py
@@ -13,6 +13,7 @@
 from __future__ import absolute_import
 
 import errno
+import io
 
 from .node import (
 bin,
@@ -582,6 +583,7 @@ def _tag(repo, names, node, message, loc
 fp = repo.vfs('localtags', 'r+')
 except IOError:
 fp = repo.vfs('localtags', 'a')
+fp.seek(0, io.SEEK_END)
 else:
 prevtags = fp.read()
 
@@ -597,6 +599,7 @@ def _tag(repo, names, node, message, loc
 if e.errno != errno.ENOENT:
 raise
 fp = repo.wvfs('.hgtags', 'ab')
+fp.seek(0, io.SEEK_END)
 else:
 prevtags = fp.read()
 
@@ -767,6 +770,7 @@ class hgtagsfnodescache(object):
 
 try:
 f = repo.cachevfs.open(_fnodescachefile, 'ab')
+f.seek(0, io.SEEK_END)
 try:
 # if the file has been truncated
 actualoffset = f.tell()
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel