Re: [PATCH 7/7] determine_author_info: stop leaking name/email

2014-06-23 Thread Eric Sunshine
On Wed, Jun 18, 2014 at 4:36 PM, Jeff King p...@peff.net wrote:
 When we get the author name and email either from an
 existing commit or from the --author option, we create a
 copy of the strings. We cannot just free() these copies,
 since the same pointers may also be pointing to getenv()
 storage which we do not own.

 Instead, let's treat these the same way as we do the date
 buffer: keep a strbuf to be released, and point the bare
 pointers at the strbuf.

 Signed-off-by: Jeff King p...@peff.net
 ---
  builtin/commit.c | 20 +---
  1 file changed, 13 insertions(+), 7 deletions(-)

 diff --git a/builtin/commit.c b/builtin/commit.c
 index 62abee0..72beb7f 100644
 --- a/builtin/commit.c
 +++ b/builtin/commit.c
 @@ -546,16 +546,20 @@ static void strbuf_add_pair(struct strbuf *buf, const 
 struct pointer_pair *p)
 strbuf_add(buf, p-begin, p-end - p-begin);
  }

 -static char *xmemdupz_pair(const struct pointer_pair *p)
 +static char *set_pair(struct strbuf *buf, struct pointer_pair *p)
  {
 -   return xmemdupz(p-begin, p-end - p-begin);
 +   strbuf_reset(buf);
 +   strbuf_add_pair(buf, p);
 +   return buf-buf;
  }

  static void determine_author_info(struct strbuf *author_ident)
  {
 char *name, *email, *date;
 struct ident_split author;
 -   struct strbuf date_buf = STRBUF_INIT;
 +   struct strbuf name_buf = STRBUF_INIT,
 + mail_buf = STRBUF_INIT,

nit: The associated 'char *' variable is named email, so perhaps
s/mail_buf/email_buf/g

 + date_buf = STRBUF_INIT;

 name = getenv(GIT_AUTHOR_NAME);
 email = getenv(GIT_AUTHOR_EMAIL);
 @@ -572,8 +576,8 @@ static void determine_author_info(struct strbuf 
 *author_ident)
 if (split_ident_line(ident, a, len)  0)
 die(_(commit '%s' has malformed author line), 
 author_message);

 -   name = xmemdupz_pair(ident.name);
 -   email = xmemdupz_pair(ident.mail);
 +   name = set_pair(name_buf, ident.name);
 +   email = set_pair(mail_buf, ident.mail);
 if (ident.date.begin) {
 strbuf_reset(date_buf);
 strbuf_addch(date_buf, '@');
 @@ -589,8 +593,8 @@ static void determine_author_info(struct strbuf 
 *author_ident)

 if (split_ident_line(ident, force_author, 
 strlen(force_author))  0)
 die(_(malformed --author parameter));
 -   name = xmemdupz_pair(ident.name);
 -   email = xmemdupz_pair(ident.mail);
 +   name = set_pair(name_buf, ident.name);
 +   email = set_pair(mail_buf, ident.mail);

Does the code become too convoluted with these changes? You're now
maintaining three 'char *' variables in parallel with three strbuf
variables. Is it possible to drop the 'char *' variables and just pass
the .buf member of the strbufs to fmt_ident()?

Alternately, you also could solve the leaks by having an envdup() helper:

static char *envdup(const char *s)
{
const char *v = getenv(s);
return v ? xstrdup(v) : NULL;
}

...
name = envdup(GIT_AUTHOR_NAME);
email = envdup(GIT_AUTHOR_EMAIL);
...

And then just free() 'name' and 'email' normally.

 }

 if (force_date) {
 @@ -608,6 +612,8 @@ static void determine_author_info(struct strbuf 
 *author_ident)
 export_one(GIT_AUTHOR_DATE, author.date.begin, 
 author.tz.end, '@');
 }

 +   strbuf_release(name_buf);
 +   strbuf_release(mail_buf);
 strbuf_release(date_buf);
  }

 --
 2.0.0.566.gfe3e6b2
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 7/7] determine_author_info: stop leaking name/email

2014-06-23 Thread Erik Faye-Lund
On Mon, Jun 23, 2014 at 11:28 AM, Eric Sunshine sunsh...@sunshineco.com wrote:
 On Wed, Jun 18, 2014 at 4:36 PM, Jeff King p...@peff.net wrote:
 When we get the author name and email either from an
 existing commit or from the --author option, we create a
 copy of the strings. We cannot just free() these copies,
 since the same pointers may also be pointing to getenv()
 storage which we do not own.

 Instead, let's treat these the same way as we do the date
 buffer: keep a strbuf to be released, and point the bare
 pointers at the strbuf.

 Signed-off-by: Jeff King p...@peff.net
 ---
  builtin/commit.c | 20 +---
  1 file changed, 13 insertions(+), 7 deletions(-)

 diff --git a/builtin/commit.c b/builtin/commit.c
 index 62abee0..72beb7f 100644
 --- a/builtin/commit.c
 +++ b/builtin/commit.c
 @@ -546,16 +546,20 @@ static void strbuf_add_pair(struct strbuf *buf, const 
 struct pointer_pair *p)
 strbuf_add(buf, p-begin, p-end - p-begin);
  }

 -static char *xmemdupz_pair(const struct pointer_pair *p)
 +static char *set_pair(struct strbuf *buf, struct pointer_pair *p)
  {
 -   return xmemdupz(p-begin, p-end - p-begin);
 +   strbuf_reset(buf);
 +   strbuf_add_pair(buf, p);
 +   return buf-buf;
  }

  static void determine_author_info(struct strbuf *author_ident)
  {
 char *name, *email, *date;
 struct ident_split author;
 -   struct strbuf date_buf = STRBUF_INIT;
 +   struct strbuf name_buf = STRBUF_INIT,
 + mail_buf = STRBUF_INIT,

 nit: The associated 'char *' variable is named email, so perhaps
 s/mail_buf/email_buf/g

 + date_buf = STRBUF_INIT;

 name = getenv(GIT_AUTHOR_NAME);
 email = getenv(GIT_AUTHOR_EMAIL);
 @@ -572,8 +576,8 @@ static void determine_author_info(struct strbuf 
 *author_ident)
 if (split_ident_line(ident, a, len)  0)
 die(_(commit '%s' has malformed author line), 
 author_message);

 -   name = xmemdupz_pair(ident.name);
 -   email = xmemdupz_pair(ident.mail);
 +   name = set_pair(name_buf, ident.name);
 +   email = set_pair(mail_buf, ident.mail);
 if (ident.date.begin) {
 strbuf_reset(date_buf);
 strbuf_addch(date_buf, '@');
 @@ -589,8 +593,8 @@ static void determine_author_info(struct strbuf 
 *author_ident)

 if (split_ident_line(ident, force_author, 
 strlen(force_author))  0)
 die(_(malformed --author parameter));
 -   name = xmemdupz_pair(ident.name);
 -   email = xmemdupz_pair(ident.mail);
 +   name = set_pair(name_buf, ident.name);
 +   email = set_pair(mail_buf, ident.mail);

 Does the code become too convoluted with these changes? You're now
 maintaining three 'char *' variables in parallel with three strbuf
 variables. Is it possible to drop the 'char *' variables and just pass
 the .buf member of the strbufs to fmt_ident()?

 Alternately, you also could solve the leaks by having an envdup() helper:

 static char *envdup(const char *s)
 {
 const char *v = getenv(s);
 return v ? xstrdup(v) : NULL;
 }

 ...
 name = envdup(GIT_AUTHOR_NAME);
 email = envdup(GIT_AUTHOR_EMAIL);
 ...

 And then just free() 'name' and 'email' normally.

This approach has the added benefit of fixing the case where getenv
uses a static buffer, like POSIX allows.
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 7/7] determine_author_info: stop leaking name/email

2014-06-23 Thread Eric Sunshine
On Mon, Jun 23, 2014 at 5:33 AM, Erik Faye-Lund kusmab...@gmail.com wrote:
 On Mon, Jun 23, 2014 at 11:28 AM, Eric Sunshine sunsh...@sunshineco.com 
 wrote:
 On Wed, Jun 18, 2014 at 4:36 PM, Jeff King p...@peff.net wrote:
 When we get the author name and email either from an
 existing commit or from the --author option, we create a
 copy of the strings. We cannot just free() these copies,
 since the same pointers may also be pointing to getenv()
 storage which we do not own.

 Instead, let's treat these the same way as we do the date
 buffer: keep a strbuf to be released, and point the bare
 pointers at the strbuf.

 Signed-off-by: Jeff King p...@peff.net
 ---
  builtin/commit.c | 20 +---
  1 file changed, 13 insertions(+), 7 deletions(-)

 diff --git a/builtin/commit.c b/builtin/commit.c
 index 62abee0..72beb7f 100644
 --- a/builtin/commit.c
 +++ b/builtin/commit.c
 @@ -546,16 +546,20 @@ static void strbuf_add_pair(struct strbuf *buf, const 
 struct pointer_pair *p)
 strbuf_add(buf, p-begin, p-end - p-begin);
  }

 -static char *xmemdupz_pair(const struct pointer_pair *p)
 +static char *set_pair(struct strbuf *buf, struct pointer_pair *p)
  {
 -   return xmemdupz(p-begin, p-end - p-begin);
 +   strbuf_reset(buf);
 +   strbuf_add_pair(buf, p);
 +   return buf-buf;
  }

  static void determine_author_info(struct strbuf *author_ident)
  {
 char *name, *email, *date;
 struct ident_split author;
 -   struct strbuf date_buf = STRBUF_INIT;
 +   struct strbuf name_buf = STRBUF_INIT,
 + mail_buf = STRBUF_INIT,

 nit: The associated 'char *' variable is named email, so perhaps
 s/mail_buf/email_buf/g

 + date_buf = STRBUF_INIT;

 name = getenv(GIT_AUTHOR_NAME);
 email = getenv(GIT_AUTHOR_EMAIL);
 @@ -572,8 +576,8 @@ static void determine_author_info(struct strbuf 
 *author_ident)
 if (split_ident_line(ident, a, len)  0)
 die(_(commit '%s' has malformed author line), 
 author_message);

 -   name = xmemdupz_pair(ident.name);
 -   email = xmemdupz_pair(ident.mail);
 +   name = set_pair(name_buf, ident.name);
 +   email = set_pair(mail_buf, ident.mail);
 if (ident.date.begin) {
 strbuf_reset(date_buf);
 strbuf_addch(date_buf, '@');
 @@ -589,8 +593,8 @@ static void determine_author_info(struct strbuf 
 *author_ident)

 if (split_ident_line(ident, force_author, 
 strlen(force_author))  0)
 die(_(malformed --author parameter));
 -   name = xmemdupz_pair(ident.name);
 -   email = xmemdupz_pair(ident.mail);
 +   name = set_pair(name_buf, ident.name);
 +   email = set_pair(mail_buf, ident.mail);

 Does the code become too convoluted with these changes? You're now
 maintaining three 'char *' variables in parallel with three strbuf
 variables. Is it possible to drop the 'char *' variables and just pass
 the .buf member of the strbufs to fmt_ident()?

 Alternately, you also could solve the leaks by having an envdup() helper:

 static char *envdup(const char *s)
 {
 const char *v = getenv(s);
 return v ? xstrdup(v) : NULL;
 }

 ...
 name = envdup(GIT_AUTHOR_NAME);
 email = envdup(GIT_AUTHOR_EMAIL);
 ...

 And then just free() 'name' and 'email' normally.

 This approach has the added benefit of fixing the case where getenv
 uses a static buffer, like POSIX allows.

That reminds me that I was going to suggest a suitable variation of
envdup if Peff wanted to keep the strbufs:

static void strbuf_env(struct strbuf *s, const char *e)
{
const char *v = getenv(e);
if (v)
strbuf_add(s, v);
}

(Or add a generalized strbuf_add_gently() to strbuf.{h,c}, if it seems
likely to come up more often, which would allow
strbuf_add_gently(name_buf, getenv(GIT_AUTHOR_NAME)) -- with a
better name than gently, of course.)
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 7/7] determine_author_info: stop leaking name/email

2014-06-23 Thread Jeff King
On Mon, Jun 23, 2014 at 05:28:14AM -0400, Eric Sunshine wrote:

   static void determine_author_info(struct strbuf *author_ident)
   {
  char *name, *email, *date;
  struct ident_split author;
  -   struct strbuf date_buf = STRBUF_INIT;
  +   struct strbuf name_buf = STRBUF_INIT,
  + mail_buf = STRBUF_INIT,
 
 nit: The associated 'char *' variable is named email, so perhaps
 s/mail_buf/email_buf/g

Yeah, you wouldn't believe the number of times I switched back and forth
while writing the patch. The variable is called mail in struct
ident_split, so you have a mismatch at some point unless we rename the
local pointer.

I avoided doing that to keep the diff smaller, but perhaps it's worth
doing.

  if (split_ident_line(ident, force_author, 
  strlen(force_author))  0)
  die(_(malformed --author parameter));
  -   name = xmemdupz_pair(ident.name);
  -   email = xmemdupz_pair(ident.mail);
  +   name = set_pair(name_buf, ident.name);
  +   email = set_pair(mail_buf, ident.mail);
 
 Does the code become too convoluted with these changes? You're now
 maintaining three 'char *' variables in parallel with three strbuf
 variables. Is it possible to drop the 'char *' variables and just pass
 the .buf member of the strbufs to fmt_ident()?

Yeah, I agree it is getting a bit complicated (I tried to introduce
helpers like set_pair to at least keep the per-variable work down to a
single line in each instance).

It unfortunately doesn't work to just pass the .buf of each strbuf. We
care about the distinction between the empty string and NULL here (the
latter will cause fmt_ident to use the default ident).

 Alternately, you also could solve the leaks by having an envdup() helper:
 
 static char *envdup(const char *s)
 {
 const char *v = getenv(s);
 return v ? xstrdup(v) : NULL;
 }
 
 ...
 name = envdup(GIT_AUTHOR_NAME);
 email = envdup(GIT_AUTHOR_EMAIL);
 ...
 
 And then just free() 'name' and 'email' normally.

Yeah, I also considered that. You end up having to free() before
re-assigning throughout the function, though that is not much worse than
having to strbuf_reset() before re-adding (the reset is hidden in
set_pair, but we could similarly abstract it).

Here's what that looks like (this converts date_buf away, too, to avoid
relying on getenv()'s static return value):

diff --git a/builtin/commit.c b/builtin/commit.c
index 62abee0..35045ca 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -551,15 +551,26 @@ static char *xmemdupz_pair(const struct pointer_pair *p)
return xmemdupz(p-begin, p-end - p-begin);
 }
 
+static void set_ident_var(char **buf, char *val)
+{
+   free(*buf);
+   *buf = val;
+}
+
+static char *envdup(const char *var)
+{
+   const char *val = getenv(var);
+   return val ? xstrdup(val) : NULL;
+}
+
 static void determine_author_info(struct strbuf *author_ident)
 {
char *name, *email, *date;
struct ident_split author;
-   struct strbuf date_buf = STRBUF_INIT;
 
-   name = getenv(GIT_AUTHOR_NAME);
-   email = getenv(GIT_AUTHOR_EMAIL);
-   date = getenv(GIT_AUTHOR_DATE);
+   name = envdup(GIT_AUTHOR_NAME);
+   email = envdup(GIT_AUTHOR_EMAIL);
+   date = envdup(GIT_AUTHOR_DATE);
 
if (author_message) {
struct ident_split ident;
@@ -572,15 +583,15 @@ static void determine_author_info(struct strbuf 
*author_ident)
if (split_ident_line(ident, a, len)  0)
die(_(commit '%s' has malformed author line), 
author_message);
 
-   name = xmemdupz_pair(ident.name);
-   email = xmemdupz_pair(ident.mail);
+   set_ident_var(name, xmemdupz_pair(ident.name));
+   set_ident_var(email, xmemdupz_pair(ident.mail));
if (ident.date.begin) {
-   strbuf_reset(date_buf);
+   struct strbuf date_buf = STRBUF_INIT;
strbuf_addch(date_buf, '@');
strbuf_add_pair(date_buf, ident.date);
strbuf_addch(date_buf, ' ');
strbuf_add_pair(date_buf, ident.tz);
-   date = date_buf.buf;
+   set_ident_var(date, strbuf_detach(date_buf, NULL));
}
}
 
@@ -589,15 +600,15 @@ static void determine_author_info(struct strbuf 
*author_ident)
 
if (split_ident_line(ident, force_author, 
strlen(force_author))  0)
die(_(malformed --author parameter));
-   name = xmemdupz_pair(ident.name);
-   email = xmemdupz_pair(ident.mail);
+   set_ident_var(name, xmemdupz_pair(ident.name));
+   set_ident_var(email, xmemdupz_pair(ident.mail));
}
 
if (force_date) {
-   strbuf_reset(date_buf);
+ 

Re: [PATCH 7/7] determine_author_info: stop leaking name/email

2014-06-23 Thread Jeff King
On Mon, Jun 23, 2014 at 11:33:56AM +0200, Erik Faye-Lund wrote:

 This approach has the added benefit of fixing the case where getenv
 uses a static buffer, like POSIX allows.

Good point. I knew we could invalidate the pointer if setenv() was called, but
I didn't know that another getenv() could (I don't know of any such
implementation offhand, but it is not too hard to handle it).

-Peff
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html