Re: [PATCH v2 1/4] real_path: resolve symlinks by hand

2016-12-09 Thread Brandon Williams
On 12/09, Johannes Sixt wrote:
> Am 09.12.2016 um 00:58 schrieb Brandon Williams:
> >The current implementation of real_path uses chdir() in order to resolve
> >symlinks.  Unfortunately this isn't thread-safe as chdir() affects a
> >process as a whole and not just an individual thread.  Instead perform
> >the symlink resolution by hand so that the calls to chdir() can be
> >removed, making real_path one step closer to being reentrant.
> >
> >Signed-off-by: Brandon Williams 
> >---
> > abspath.c | 183 
> > +-
> > 1 file changed, 122 insertions(+), 61 deletions(-)
> >
> >diff --git a/abspath.c b/abspath.c
> >index 2825de8..92f2a29 100644
> >--- a/abspath.c
> >+++ b/abspath.c
> >@@ -11,8 +11,38 @@ int is_directory(const char *path)
> > return (!stat(path, ) && S_ISDIR(st.st_mode));
> > }
> >
> >+/* removes the last path component from 'path' except if 'path' is root */
> >+static void strip_last_component(struct strbuf *path)
> >+{
> >+if (path->len > offset_1st_component(path->buf)) {
> >+char *last_slash = find_last_dir_sep(path->buf);
> >+strbuf_setlen(path, last_slash - path->buf);
> >+}
> >+}
> 
> This implementation is not correct because when the input is "/foo",
> the result is "" when it should be "/". Also, can the input be a
> non-normalized path? When the input is "foo///bar", should the
> result be "foo" or would "foo//" be an acceptable result? I think it
> should be the former. find_last_dir_sep() returns the last of the
> three slashes, not the first one. Therefore, I've rewritten the
> function thusly:
> 
> static void strip_last_component(struct strbuf *path)
> {
>   size_t offset = offset_1st_component(path->buf);
>   size_t len = path->len;
>   while (offset < len && !is_dir_sep(path->buf[len - 1]))
>   len--;
>   while (offset < len && is_dir_sep(path->buf[len - 1]))
>   len--;
>   strbuf_setlen(path, len);
> }
> 

Thanks for that catch.  So your rewrite takes the offset of the 1st
component and ensures that we don't cut that part off.  It first strips
all non directory separators and then all directory separators.  This
way "/foobar" becomes "/foo" and as you pointed out "/foo" would
become "/".  The offset would also take care of windows drive letters
and the like.  Looks good.  Thanks!

> >+strbuf_addbuf(, );
> >+
> >+if (lstat(resolved.buf, )) {
> >+/* error out unless this was the last component */
> >+if (!(errno == ENOENT && !remaining.len)) {
> 
> Perhaps it was easier to _write_ the condition in this way, but I
> would have an easier time to _read_ it when it is
> 
>   if (errno != ENOENT || remaining.len) {
> 

Yes I did write it out weird, mostly because it made the most sense for
what I was trying to accomplish (add path components must exist, except
for the very last one).  I'm fine applying DeMorgan's since it looks a
little cleaner.

> >+
> >+if (is_absolute_path(symlink.buf)) {
> >+/* absolute symlink; set resolved to root */
> >+int offset = offset_1st_component(symlink.buf);
> >+strbuf_reset();
> >+strbuf_add(, symlink.buf, offset);
> >+strbuf_remove(, 0, offset);
> 
> Good. I would have expected some kind of "goto repeat;" because when
> we encounter a symlink to an absolute path, most, if not all, work
> done so far is obsoleted. But I haven't thought too deeply about
> this.

It made the most sense to just reuse the same looping condition that
I already had in place.  Resetting the resolved string to be the root
component of the absolute symlink made it easy to "throw away" all the
old work to allow us to start from scratch again.

-- 
Brandon Williams


Re: [PATCH v2 1/4] real_path: resolve symlinks by hand

2016-12-09 Thread Johannes Sixt

Am 09.12.2016 um 00:58 schrieb Brandon Williams:

The current implementation of real_path uses chdir() in order to resolve
symlinks.  Unfortunately this isn't thread-safe as chdir() affects a
process as a whole and not just an individual thread.  Instead perform
the symlink resolution by hand so that the calls to chdir() can be
removed, making real_path one step closer to being reentrant.

Signed-off-by: Brandon Williams 
---
 abspath.c | 183 +-
 1 file changed, 122 insertions(+), 61 deletions(-)

diff --git a/abspath.c b/abspath.c
index 2825de8..92f2a29 100644
--- a/abspath.c
+++ b/abspath.c
@@ -11,8 +11,38 @@ int is_directory(const char *path)
return (!stat(path, ) && S_ISDIR(st.st_mode));
 }

+/* removes the last path component from 'path' except if 'path' is root */
+static void strip_last_component(struct strbuf *path)
+{
+   if (path->len > offset_1st_component(path->buf)) {
+   char *last_slash = find_last_dir_sep(path->buf);
+   strbuf_setlen(path, last_slash - path->buf);
+   }
+}


This implementation is not correct because when the input is "/foo", the 
result is "" when it should be "/". Also, can the input be a 
non-normalized path? When the input is "foo///bar", should the result be 
"foo" or would "foo//" be an acceptable result? I think it should be the 
former. find_last_dir_sep() returns the last of the three slashes, not 
the first one. Therefore, I've rewritten the function thusly:


static void strip_last_component(struct strbuf *path)
{
size_t offset = offset_1st_component(path->buf);
size_t len = path->len;
while (offset < len && !is_dir_sep(path->buf[len - 1]))
len--;
while (offset < len && is_dir_sep(path->buf[len - 1]))
len--;
strbuf_setlen(path, len);
}


+
+/* get (and remove) the next component in 'remaining' and place it in 'next' */
+static void get_next_component(struct strbuf *next, struct strbuf *remaining)
+{
+   char *start = NULL;
+   char *end = NULL;
+
+   strbuf_reset(next);
+
+   /* look for the next component */
+   /* Skip sequences of multiple path-separators */
+   for (start = remaining->buf; is_dir_sep(*start); start++)
+   ; /* nothing */
+   /* Find end of the path component */
+   for (end = start; *end && !is_dir_sep(*end); end++)
+   ; /* nothing */
+
+   strbuf_add(next, start, end - start);
+   /* remove the component from 'remaining' */
+   strbuf_remove(remaining, 0, end - remaining->buf);
+}


Mental note: This function strips leading slashes and the first path 
component; post-condition: remaining is empty or has leading slashes.



+
 /* We allow "recursive" symbolic links. Only within reason, though. */
-#define MAXDEPTH 5
+#define MAXSYMLINKS 5

 /*
  * Return the real path (i.e., absolute path, with symlinks resolved
@@ -21,7 +51,6 @@ int is_directory(const char *path)
  * absolute_path().)  The return value is a pointer to a static
  * buffer.
  *
- * The input and all intermediate paths must be shorter than MAX_PATH.
  * The directory part of path (i.e., everything up to the last
  * dir_sep) must denote a valid, existing directory, but the last
  * component need not exist.  If die_on_error is set, then die with an
@@ -33,22 +62,16 @@ int is_directory(const char *path)
  */
 static const char *real_path_internal(const char *path, int die_on_error)
 {
-   static struct strbuf sb = STRBUF_INIT;
+   static struct strbuf resolved = STRBUF_INIT;
+   struct strbuf remaining = STRBUF_INIT;
+   struct strbuf next = STRBUF_INIT;
+   struct strbuf symlink = STRBUF_INIT;
char *retval = NULL;
-
-   /*
-* If we have to temporarily chdir(), store the original CWD
-* here so that we can chdir() back to it at the end of the
-* function:
-*/
-   struct strbuf cwd = STRBUF_INIT;
-
-   int depth = MAXDEPTH;
-   char *last_elem = NULL;
+   int num_symlinks = 0;
struct stat st;

/* We've already done it */
-   if (path == sb.buf)
+   if (path == resolved.buf)
return path;

if (!*path) {
@@ -58,74 +81,112 @@ static const char *real_path_internal(const char *path, 
int die_on_error)
goto error_out;
}

-   strbuf_reset();
-   strbuf_addstr(, path);
-
-   while (depth--) {
-   if (!is_directory(sb.buf)) {
-   char *last_slash = find_last_dir_sep(sb.buf);
-   if (last_slash) {
-   last_elem = xstrdup(last_slash + 1);
-   strbuf_setlen(, last_slash - sb.buf + 1);
-   } else {
-   last_elem = xmemdupz(sb.buf, sb.len);
-   strbuf_reset();
-   }
+ 

Re: [PATCH v2 1/4] real_path: resolve symlinks by hand

2016-12-08 Thread Jacob Keller
On Thu, Dec 8, 2016 at 3:58 PM, Brandon Williams  wrote:
> The current implementation of real_path uses chdir() in order to resolve
> symlinks.  Unfortunately this isn't thread-safe as chdir() affects a
> process as a whole and not just an individual thread.  Instead perform
> the symlink resolution by hand so that the calls to chdir() can be
> removed, making real_path one step closer to being reentrant.
>

Better description for this part of the change. I like the
improvement, as it clearly indicates what this particular patch is
about, and why, but doesn't over-state what we gain here.

The rest of this seems reasonable, though I'm not super familiar with
the code, so I didn't have any comments.

Thanks,
Jake


[PATCH v2 1/4] real_path: resolve symlinks by hand

2016-12-08 Thread Brandon Williams
The current implementation of real_path uses chdir() in order to resolve
symlinks.  Unfortunately this isn't thread-safe as chdir() affects a
process as a whole and not just an individual thread.  Instead perform
the symlink resolution by hand so that the calls to chdir() can be
removed, making real_path one step closer to being reentrant.

Signed-off-by: Brandon Williams 
---
 abspath.c | 183 +-
 1 file changed, 122 insertions(+), 61 deletions(-)

diff --git a/abspath.c b/abspath.c
index 2825de8..92f2a29 100644
--- a/abspath.c
+++ b/abspath.c
@@ -11,8 +11,38 @@ int is_directory(const char *path)
return (!stat(path, ) && S_ISDIR(st.st_mode));
 }
 
+/* removes the last path component from 'path' except if 'path' is root */
+static void strip_last_component(struct strbuf *path)
+{
+   if (path->len > offset_1st_component(path->buf)) {
+   char *last_slash = find_last_dir_sep(path->buf);
+   strbuf_setlen(path, last_slash - path->buf);
+   }
+}
+
+/* get (and remove) the next component in 'remaining' and place it in 'next' */
+static void get_next_component(struct strbuf *next, struct strbuf *remaining)
+{
+   char *start = NULL;
+   char *end = NULL;
+
+   strbuf_reset(next);
+
+   /* look for the next component */
+   /* Skip sequences of multiple path-separators */
+   for (start = remaining->buf; is_dir_sep(*start); start++)
+   ; /* nothing */
+   /* Find end of the path component */
+   for (end = start; *end && !is_dir_sep(*end); end++)
+   ; /* nothing */
+
+   strbuf_add(next, start, end - start);
+   /* remove the component from 'remaining' */
+   strbuf_remove(remaining, 0, end - remaining->buf);
+}
+
 /* We allow "recursive" symbolic links. Only within reason, though. */
-#define MAXDEPTH 5
+#define MAXSYMLINKS 5
 
 /*
  * Return the real path (i.e., absolute path, with symlinks resolved
@@ -21,7 +51,6 @@ int is_directory(const char *path)
  * absolute_path().)  The return value is a pointer to a static
  * buffer.
  *
- * The input and all intermediate paths must be shorter than MAX_PATH.
  * The directory part of path (i.e., everything up to the last
  * dir_sep) must denote a valid, existing directory, but the last
  * component need not exist.  If die_on_error is set, then die with an
@@ -33,22 +62,16 @@ int is_directory(const char *path)
  */
 static const char *real_path_internal(const char *path, int die_on_error)
 {
-   static struct strbuf sb = STRBUF_INIT;
+   static struct strbuf resolved = STRBUF_INIT;
+   struct strbuf remaining = STRBUF_INIT;
+   struct strbuf next = STRBUF_INIT;
+   struct strbuf symlink = STRBUF_INIT;
char *retval = NULL;
-
-   /*
-* If we have to temporarily chdir(), store the original CWD
-* here so that we can chdir() back to it at the end of the
-* function:
-*/
-   struct strbuf cwd = STRBUF_INIT;
-
-   int depth = MAXDEPTH;
-   char *last_elem = NULL;
+   int num_symlinks = 0;
struct stat st;
 
/* We've already done it */
-   if (path == sb.buf)
+   if (path == resolved.buf)
return path;
 
if (!*path) {
@@ -58,74 +81,112 @@ static const char *real_path_internal(const char *path, 
int die_on_error)
goto error_out;
}
 
-   strbuf_reset();
-   strbuf_addstr(, path);
-
-   while (depth--) {
-   if (!is_directory(sb.buf)) {
-   char *last_slash = find_last_dir_sep(sb.buf);
-   if (last_slash) {
-   last_elem = xstrdup(last_slash + 1);
-   strbuf_setlen(, last_slash - sb.buf + 1);
-   } else {
-   last_elem = xmemdupz(sb.buf, sb.len);
-   strbuf_reset();
-   }
+   strbuf_reset();
+
+   if (is_absolute_path(path)) {
+   /* absolute path; start with only root as being resolved */
+   int offset = offset_1st_component(path);
+   strbuf_add(, path, offset);
+   strbuf_addstr(, path + offset);
+   } else {
+   /* relative path; can use CWD as the initial resolved path */
+   if (strbuf_getcwd()) {
+   if (die_on_error)
+   die_errno("unable to get current working 
directory");
+   else
+   goto error_out;
+   }
+   strbuf_addstr(, path);
+   }
+
+   /* Iterate over the remaining path components */
+   while (remaining.len > 0) {
+   get_next_component(, );
+
+   if (next.len == 0) {
+   continue; /* empty component */
+   } else if (next.len == 1 &&