Re: [PATCH v3 1/2] use HOST_NAME_MAX to size buffers for gethostname(2)

2017-04-20 Thread Torsten Bögershausen



I think I meant to write "big pidfiles" there.

With xsize_t() gc would die when seeing a pidfile whose size doesn't fit into
size_t.  The version I sent just ignores such files.  However, it would choke
on slightly smaller files that happen to not fit into memory.  And no
reasonable pidfile can be big enough to trigger any of that, so dying on
conversion error wouldn't really be a problem.  Is that what you meant?  It
makes sense, in any case.


In short: Yes.



Thanks,
René


Re: [PATCH v3 1/2] use HOST_NAME_MAX to size buffers for gethostname(2)

2017-04-20 Thread René Scharfe

Am 20.04.2017 um 20:37 schrieb Torsten Bögershausen:

On 2017-04-19 22:02, René Scharfe wrote:

Am 19.04.2017 um 21:09 schrieb Torsten Bögershausen:

On 2017-04-19 19:28, René Scharfe wrote:
[]
One or two minor comments inline

diff --git a/builtin/gc.c b/builtin/gc.c
index 2daede7820..4c1c01e87d 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -228,21 +228,99 @@ static int need_to_gc(void)
   return 1;
   }
   +struct pidfile {
+struct strbuf buf;
+char *hostname;
+};
+
+#define PIDFILE_INIT { STRBUF_INIT }
+
+static void pidfile_release(struct pidfile *pf)
+{
+pf->hostname = NULL;
+strbuf_release(&pf->buf);
+}
+
+static int pidfile_read(struct pidfile *pf, const char *path,
+unsigned int max_age_seconds)
+{
+int fd;
+struct stat st;
+ssize_t len;
+char *space;
+int rc = -1;
+
+fd = open(path, O_RDONLY);
+if (fd < 0)
+return rc;
+
+if (fstat(fd, &st))
+goto out;
+if (time(NULL) - st.st_mtime > max_age_seconds)
+goto out;
+if (st.st_size > (size_t)st.st_size)


Minor: we need xsize_t here ?
if (st.st_size > xsize_t(st.st_size))


No, xsize_t() would do the same check and die on overflow, and pidfile_read() is
supposed to handle big pids gracefully.

This about the file size, isn't it ?
And here xsize_t should be save to use and good practise.


I think I meant to write "big pidfiles" there.

With xsize_t() gc would die when seeing a pidfile whose size doesn't fit 
into size_t.  The version I sent just ignores such files.  However, it 
would choke on slightly smaller files that happen to not fit into 
memory.  And no reasonable pidfile can be big enough to trigger any of 
that, so dying on conversion error wouldn't really be a problem.  Is 
that what you meant?  It makes sense, in any case.


Thanks,
René


Re: [PATCH v3 1/2] use HOST_NAME_MAX to size buffers for gethostname(2)

2017-04-20 Thread Torsten Bögershausen
On 2017-04-19 22:02, René Scharfe wrote:
> Am 19.04.2017 um 21:09 schrieb Torsten Bögershausen:
>> On 2017-04-19 19:28, René Scharfe wrote:
>> []
>> One or two minor comments inline
>>> diff --git a/builtin/gc.c b/builtin/gc.c
>>> index 2daede7820..4c1c01e87d 100644
>>> --- a/builtin/gc.c
>>> +++ b/builtin/gc.c
>>> @@ -228,21 +228,99 @@ static int need_to_gc(void)
>>>   return 1;
>>>   }
>>>   +struct pidfile {
>>> +struct strbuf buf;
>>> +char *hostname;
>>> +};
>>> +
>>> +#define PIDFILE_INIT { STRBUF_INIT }
>>> +
>>> +static void pidfile_release(struct pidfile *pf)
>>> +{
>>> +pf->hostname = NULL;
>>> +strbuf_release(&pf->buf);
>>> +}
>>> +
>>> +static int pidfile_read(struct pidfile *pf, const char *path,
>>> +unsigned int max_age_seconds)
>>> +{
>>> +int fd;
>>> +struct stat st;
>>> +ssize_t len;
>>> +char *space;
>>> +int rc = -1;
>>> +
>>> +fd = open(path, O_RDONLY);
>>> +if (fd < 0)
>>> +return rc;
>>> +
>>> +if (fstat(fd, &st))
>>> +goto out;
>>> +if (time(NULL) - st.st_mtime > max_age_seconds)
>>> +goto out;
>>> +if (st.st_size > (size_t)st.st_size)
>>
>> Minor: we need xsize_t here ?
>> if (st.st_size > xsize_t(st.st_size))
> 
> No, xsize_t() would do the same check and die on overflow, and pidfile_read() 
> is
> supposed to handle big pids gracefully.
This about the file size, isn't it ?
And here xsize_t should be save to use and good practise.






Re: [PATCH v3 1/2] use HOST_NAME_MAX to size buffers for gethostname(2)

2017-04-19 Thread René Scharfe

Am 19.04.2017 um 21:09 schrieb Torsten Bögershausen:

On 2017-04-19 19:28, René Scharfe wrote:
[]
One or two minor comments inline

diff --git a/builtin/gc.c b/builtin/gc.c
index 2daede7820..4c1c01e87d 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -228,21 +228,99 @@ static int need_to_gc(void)
return 1;
  }
  
+struct pidfile {

+   struct strbuf buf;
+   char *hostname;
+};
+
+#define PIDFILE_INIT { STRBUF_INIT }
+
+static void pidfile_release(struct pidfile *pf)
+{
+   pf->hostname = NULL;
+   strbuf_release(&pf->buf);
+}
+
+static int pidfile_read(struct pidfile *pf, const char *path,
+   unsigned int max_age_seconds)
+{
+   int fd;
+   struct stat st;
+   ssize_t len;
+   char *space;
+   int rc = -1;
+
+   fd = open(path, O_RDONLY);
+   if (fd < 0)
+   return rc;
+
+   if (fstat(fd, &st))
+   goto out;
+   if (time(NULL) - st.st_mtime > max_age_seconds)
+   goto out;
+   if (st.st_size > (size_t)st.st_size)


Minor: we need xsize_t here ?
if (st.st_size > xsize_t(st.st_size))


No, xsize_t() would do the same check and die on overflow, and 
pidfile_read() is supposed to handle big pids gracefully.





+   goto out;
+
+   len = strbuf_read(&pf->buf, fd, st.st_size);
+   if (len < 0)
+   goto out;
+
+   space = strchr(pf->buf.buf, ' ');
+   if (!space) {
+   pidfile_release(pf);
+   goto out;
+   }
+   pf->hostname = space + 1;
+   *space = '\0';
+
+   rc = 0;
+out:
+   close(fd);
+   return rc;
+}
+
+static int parse_pid(const char *value, pid_t *ret)
+{
+   if (value && *value) {
+   char *end;
+   intmax_t val;
+
+   errno = 0;
+   val = strtoimax(value, &end, 0);
+   if (errno == ERANGE)
+   return 0;
+   if (*end)
+   return 0;
+   if (labs(val) > maximum_signed_value_of_type(pid_t)) {
+   errno = ERANGE;
+   return 0;
+   }
+   *ret = val;
+   return 1;
+   }
+   errno = EINVAL;
+   return 0;
+}
+
+static int pidfile_process_exists(const struct pidfile *pf)
+{
+   pid_t pid;
+   return parse_pid(pf->buf.buf, &pid) &&
+   (!kill(pid, 0) || errno == EPERM);
+}
+
  /* return NULL on success, else hostname running the gc */
-static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
+static int lock_repo_for_gc(int force, struct pidfile *pf)
  {
static struct lock_file lock;
char my_host[128];


Huh ?
should this be increased, may be in another path ?


It should, but not in this patch.

René


Re: [PATCH v3 1/2] use HOST_NAME_MAX to size buffers for gethostname(2)

2017-04-19 Thread Torsten Bögershausen
On 2017-04-19 19:28, René Scharfe wrote:
[]
One or two minor comments inline
> diff --git a/builtin/gc.c b/builtin/gc.c
> index 2daede7820..4c1c01e87d 100644
> --- a/builtin/gc.c
> +++ b/builtin/gc.c
> @@ -228,21 +228,99 @@ static int need_to_gc(void)
>   return 1;
>  }
>  
> +struct pidfile {
> + struct strbuf buf;
> + char *hostname;
> +};
> +
> +#define PIDFILE_INIT { STRBUF_INIT }
> +
> +static void pidfile_release(struct pidfile *pf)
> +{
> + pf->hostname = NULL;
> + strbuf_release(&pf->buf);
> +}
> +
> +static int pidfile_read(struct pidfile *pf, const char *path,
> + unsigned int max_age_seconds)
> +{
> + int fd;
> + struct stat st;
> + ssize_t len;
> + char *space;
> + int rc = -1;
> +
> + fd = open(path, O_RDONLY);
> + if (fd < 0)
> + return rc;
> +
> + if (fstat(fd, &st))
> + goto out;
> + if (time(NULL) - st.st_mtime > max_age_seconds)
> + goto out;
> + if (st.st_size > (size_t)st.st_size)

Minor: we need xsize_t here ?
if (st.st_size > xsize_t(st.st_size))

> + goto out;
> +
> + len = strbuf_read(&pf->buf, fd, st.st_size);
> + if (len < 0)
> + goto out;
> +
> + space = strchr(pf->buf.buf, ' ');
> + if (!space) {
> + pidfile_release(pf);
> + goto out;
> + }
> + pf->hostname = space + 1;
> + *space = '\0';
> +
> + rc = 0;
> +out:
> + close(fd);
> + return rc;
> +}
> +
> +static int parse_pid(const char *value, pid_t *ret)
> +{
> + if (value && *value) {
> + char *end;
> + intmax_t val;
> +
> + errno = 0;
> + val = strtoimax(value, &end, 0);
> + if (errno == ERANGE)
> + return 0;
> + if (*end)
> + return 0;
> + if (labs(val) > maximum_signed_value_of_type(pid_t)) {
> + errno = ERANGE;
> + return 0;
> + }
> + *ret = val;
> + return 1;
> + }
> + errno = EINVAL;
> + return 0;
> +}
> +
> +static int pidfile_process_exists(const struct pidfile *pf)
> +{
> + pid_t pid;
> + return parse_pid(pf->buf.buf, &pid) &&
> + (!kill(pid, 0) || errno == EPERM);
> +}
> +
>  /* return NULL on success, else hostname running the gc */
> -static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
> +static int lock_repo_for_gc(int force, struct pidfile *pf)
>  {
>   static struct lock_file lock;
>   char my_host[128];

Huh ?
should this be increased, may be in another path ?




RE: [PATCH v3 1/2] use HOST_NAME_MAX to size buffers for gethostname(2)

2017-04-19 Thread David Turner
> I had another look at this last night and cooked up the following patch.  
> Might
> have gone overboard with it..
> 
> -- >8 --
> Subject: [PATCH] gc: support arbitrary hostnames and pids in 
> lock_repo_for_gc()
> 
> git gc writes its pid and hostname into a pidfile to prevent concurrent 
> garbage
> collection.  Repositories may be shared between systems with different limits
> for host name length and different pid ranges.  Use a strbuf to store the file
> contents to allow for arbitrarily long hostnames and pids to be shown to the
> user on early abort.

This is pretty paranoid, but maybe the remote host has a longer pid_t than we 
do, so we should be using intmax_t when reading the pid, and only check its 
size  before passing it to kill?

(Personally, I think this whole patch is kind of overkill, but some folks 
probably
think the same about my original patches, so I'm happy to live and let live).


Re: [PATCH v3 1/2] use HOST_NAME_MAX to size buffers for gethostname(2)

2017-04-19 Thread René Scharfe
Am 19.04.2017 um 03:28 schrieb Jonathan Nieder:
> David Turner wrote:
>> @@ -274,7 +278,7 @@ static const char *lock_repo_for_gc(int force, pid_t* 
>> ret_pid)
>>   * running.
>>   */
>>  time(NULL) - st.st_mtime <= 12 * 3600 &&
>> -fscanf(fp, "%"SCNuMAX" %127c", &pid, locking_host) == 2 
>> &&
>> +fscanf(fp, scan_fmt, &pid, locking_host) == 2 &&
> 
> I hoped this could be simplified since HOST_NAME_MAX is a numeric literal,
> using the double-expansion trick:
> 
> #define STR_(s) # s
> #define STR(s) STR_(s)
> 
>   fscanf(fp, "%" SCNuMAX " %" STR(HOST_NAME_MAX) "c",
>  &pid, locking_host);
> 
> Unfortunately, I don't think there's anything stopping a platform from
> defining
> 
>   #define HOST_NAME_MAX 0x100
> 
> which would break that.
> 
> So this run-time calculation appears to be necessary.

I had another look at this last night and cooked up the following
patch.  Might have gone overboard with it..

-- >8 --
Subject: [PATCH] gc: support arbitrary hostnames and pids in lock_repo_for_gc()

git gc writes its pid and hostname into a pidfile to prevent concurrent
garbage collection.  Repositories may be shared between systems with
different limits for host name length and different pid ranges.  Use a
strbuf to store the file contents to allow for arbitrarily long
hostnames and pids to be shown to the user on early abort.

Signed-off-by: Rene Scharfe 
---
 builtin/gc.c | 151 +++
 1 file changed, 111 insertions(+), 40 deletions(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index 2daede7820..4c1c01e87d 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -228,21 +228,99 @@ static int need_to_gc(void)
return 1;
 }
 
+struct pidfile {
+   struct strbuf buf;
+   char *hostname;
+};
+
+#define PIDFILE_INIT { STRBUF_INIT }
+
+static void pidfile_release(struct pidfile *pf)
+{
+   pf->hostname = NULL;
+   strbuf_release(&pf->buf);
+}
+
+static int pidfile_read(struct pidfile *pf, const char *path,
+   unsigned int max_age_seconds)
+{
+   int fd;
+   struct stat st;
+   ssize_t len;
+   char *space;
+   int rc = -1;
+
+   fd = open(path, O_RDONLY);
+   if (fd < 0)
+   return rc;
+
+   if (fstat(fd, &st))
+   goto out;
+   if (time(NULL) - st.st_mtime > max_age_seconds)
+   goto out;
+   if (st.st_size > (size_t)st.st_size)
+   goto out;
+
+   len = strbuf_read(&pf->buf, fd, st.st_size);
+   if (len < 0)
+   goto out;
+
+   space = strchr(pf->buf.buf, ' ');
+   if (!space) {
+   pidfile_release(pf);
+   goto out;
+   }
+   pf->hostname = space + 1;
+   *space = '\0';
+
+   rc = 0;
+out:
+   close(fd);
+   return rc;
+}
+
+static int parse_pid(const char *value, pid_t *ret)
+{
+   if (value && *value) {
+   char *end;
+   intmax_t val;
+
+   errno = 0;
+   val = strtoimax(value, &end, 0);
+   if (errno == ERANGE)
+   return 0;
+   if (*end)
+   return 0;
+   if (labs(val) > maximum_signed_value_of_type(pid_t)) {
+   errno = ERANGE;
+   return 0;
+   }
+   *ret = val;
+   return 1;
+   }
+   errno = EINVAL;
+   return 0;
+}
+
+static int pidfile_process_exists(const struct pidfile *pf)
+{
+   pid_t pid;
+   return parse_pid(pf->buf.buf, &pid) &&
+   (!kill(pid, 0) || errno == EPERM);
+}
+
 /* return NULL on success, else hostname running the gc */
-static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
+static int lock_repo_for_gc(int force, struct pidfile *pf)
 {
static struct lock_file lock;
char my_host[128];
struct strbuf sb = STRBUF_INIT;
-   struct stat st;
-   uintmax_t pid;
-   FILE *fp;
int fd;
char *pidfile_path;
 
if (is_tempfile_active(&pidfile))
/* already locked */
-   return NULL;
+   return 0;
 
if (gethostname(my_host, sizeof(my_host)))
xsnprintf(my_host, sizeof(my_host), "unknown");
@@ -251,34 +329,27 @@ static const char *lock_repo_for_gc(int force, pid_t* 
ret_pid)
fd = hold_lock_file_for_update(&lock, pidfile_path,
   LOCK_DIE_ON_ERROR);
if (!force) {
-   static char locking_host[128];
-   int should_exit;
-   fp = fopen(pidfile_path, "r");
-   memset(locking_host, 0, sizeof(locking_host));
-   should_exit =
-   fp != NULL &&
-   !fstat(fileno(fp), &st) &&
-   /*
-

Re: [PATCH v3 1/2] use HOST_NAME_MAX to size buffers for gethostname(2)

2017-04-19 Thread René Scharfe
Am 19.04.2017 um 03:28 schrieb Jonathan Nieder:
>> From: René Scharfe 
>>
>> POSIX limits the length of host names to HOST_NAME_MAX.  Export the
>> fallback definition from daemon.c and use this constant to make all
>> buffers used with gethostname(2) big enough for any possible result
>> and a terminating NUL.
> 
> Since some platforms do not define HOST_NAME_MAX and we provide a
> fallback, this is not actually big enough for any possible result.
> For example, the Hurd allows arbitrarily long hostnames.

Interesting.  No limits, eh?  They suggest to allocate memory
dynamically [1].  Perhaps we should import their xgethostname() (which
grows a buffer as needed), or implement a strbuf_add_hostname()?

René


https://www.gnu.org/software/hurd/hurd/porting/guidelines.html#MAXHOSTNAMELEN_tt_


Re: [PATCH v3 1/2] use HOST_NAME_MAX to size buffers for gethostname(2)

2017-04-18 Thread Junio C Hamano
Jonathan Nieder  writes:

>> @@ -274,7 +278,7 @@ static const char *lock_repo_for_gc(int force, pid_t* 
>> ret_pid)
>>   * running.
>>   */
>>  time(NULL) - st.st_mtime <= 12 * 3600 &&
>> -fscanf(fp, "%"SCNuMAX" %127c", &pid, locking_host) == 2 
>> &&
>> +fscanf(fp, scan_fmt, &pid, locking_host) == 2 &&
>
> I hoped this could be simplified since HOST_NAME_MAX is a numeric literal,
> using the double-expansion trick:
>
> #define STR_(s) # s
> #define STR(s) STR_(s)
>
>   fscanf(fp, "%" SCNuMAX " %" STR(HOST_NAME_MAX) "c",
>  &pid, locking_host);
>
> Unfortunately, I don't think there's anything stopping a platform from
> defining
>
>   #define HOST_NAME_MAX 0x100
>
> which would break that.

Yes, that was exactly why I went to the xstrfmt() route when I sent
mine yesterday ;-).

> So this run-time calculation appears to be necessary.
>
> Reviewed-by: Jonathan Nieder 

Thanks.  


Re: [PATCH v3 1/2] use HOST_NAME_MAX to size buffers for gethostname(2)

2017-04-18 Thread Jonathan Nieder
Hi,

David Turner wrote:

> From: René Scharfe 
>
> POSIX limits the length of host names to HOST_NAME_MAX.  Export the
> fallback definition from daemon.c and use this constant to make all
> buffers used with gethostname(2) big enough for any possible result
> and a terminating NUL.

Since some platforms do not define HOST_NAME_MAX and we provide a
fallback, this is not actually big enough for any possible result.
For example, the Hurd allows arbitrarily long hostnames.

Nevertheless this patch seems like the right thing to do.

> Inspired-by: David Turner 
> Signed-off-by: Rene Scharfe 
> Signed-off-by: David Turner 
> ---
>  builtin/gc.c   | 10 +++---
>  builtin/receive-pack.c |  2 +-
>  daemon.c   |  4 
>  fetch-pack.c   |  2 +-
>  git-compat-util.h  |  4 
>  ident.c|  2 +-
>  6 files changed, 14 insertions(+), 10 deletions(-)

Thanks for picking this up.

[...]
> +++ b/builtin/gc.c
[...]
> @@ -257,8 +257,12 @@ static const char *lock_repo_for_gc(int force, pid_t* 
> ret_pid)
>   fd = hold_lock_file_for_update(&lock, pidfile_path,
>  LOCK_DIE_ON_ERROR);
>   if (!force) {
> - static char locking_host[128];
> + static char locking_host[HOST_NAME_MAX + 1];
> + static char *scan_fmt;
>   int should_exit;
> +
> + if (!scan_fmt)
> + scan_fmt = xstrfmt("%s %%%dc", "%"SCNuMAX, 
> HOST_NAME_MAX);
>   fp = fopen(pidfile_path, "r");
>   memset(locking_host, 0, sizeof(locking_host));
>   should_exit =
> @@ -274,7 +278,7 @@ static const char *lock_repo_for_gc(int force, pid_t* 
> ret_pid)
>* running.
>*/
>   time(NULL) - st.st_mtime <= 12 * 3600 &&
> - fscanf(fp, "%"SCNuMAX" %127c", &pid, locking_host) == 2 
> &&
> + fscanf(fp, scan_fmt, &pid, locking_host) == 2 &&

I hoped this could be simplified since HOST_NAME_MAX is a numeric literal,
using the double-expansion trick:

#define STR_(s) # s
#define STR(s) STR_(s)

fscanf(fp, "%" SCNuMAX " %" STR(HOST_NAME_MAX) "c",
   &pid, locking_host);

Unfortunately, I don't think there's anything stopping a platform from
defining

#define HOST_NAME_MAX 0x100

which would break that.

So this run-time calculation appears to be necessary.

Reviewed-by: Jonathan Nieder 

Thanks.