On Sun, 29 Jul 2012 09:50:13 -0400 Dave Reisner <[email protected]> wrote:
> On Sun, Jul 29, 2012 at 04:18:32PM +1000, Allan McRae wrote: > > From: Andrew Gregory <[email protected]> > > > > The _alpm_filelist_resolve function takes a filelist and creates > > a list with any symlinks in directory paths resolved. > > > > Signed-off-by: Allan McRae <[email protected]> > > --- > > lib/libalpm/filelist.c | 156 > > +++++++++++++++++++++++++++++++++++++++++++++++++ > > lib/libalpm/filelist.h | 2 + > > 2 files changed, 158 insertions(+) > > > > diff --git a/lib/libalpm/filelist.c b/lib/libalpm/filelist.c > > index 1928056..d32a3e5 100644 > > --- a/lib/libalpm/filelist.c > > +++ b/lib/libalpm/filelist.c > > @@ -17,10 +17,166 @@ > > * along with this program. If not, see <http://www.gnu.org/licenses/>. > > */ > > > > +#include <limits.h> > > #include <string.h> > > +#include <sys/stat.h> > > > > /* libalpm */ > > #include "filelist.h" > > +#include "util.h" > > + > > +/** Helper function for comparing strings when sorting */ > > +static int _strcmp(const void *s1, const void *s2) > > I'm worried about symbol clashes here, since technically '_' isn't our > namespace. Please pick a name similar to what we use elsewhere for > comparison callbacks, such as _alpm_str_cmp or _alpm_files_cmp. > > > +{ > > + return strcmp(*(char **)s1, *(char **)s2); > > +} > > + > > +/** > > + * @brief Resolves a symlink and its children. > > + * > > + * @attention Pre-condition: files must be sorted! > > + * > > + * @param files filelist to resolve > > + * @param i index in files to start processing > > + * @param path absolute path for the symlink being resolved > > + * @param root_len length of the root portion of path > > + * @param resolving is file \i in \files a symlink that needs to be > > resolved > > + * > > + * @return the index of the last file resolved > > + */ > > +size_t _alpm_filelist_resolve_link( > > + alpm_filelist_t *files, size_t i, char *path, size_t root_len, > > int resolving) > > +{ > > + struct stat sbuf; > > + const char *causal_dir; /* symlink being resolved */ > > + char *filename_r; /* resolved filename */ > > + size_t causal_dir_len = 0, causal_dir_r_len = 0; > > + > > + if(resolving) { > > + /* deal with the symlink being resolved */ > > + filename_r = malloc(PATH_MAX); > > This seems very prone to leakage. Why not just allocate it on the stack? > filename_r is only needed if a symlink is being resolved, so most of the time allocating memory for it would be a waste. It also helps prevent a stack overflow since this function is recursive. You are right about the leakage though, as evidenced a whole 5 lines below... I suppose it should be changed to the MALLOC macro though; I keep forgetting about those things. > > + causal_dir = files->files[i].name; > > + causal_dir_len = strlen(causal_dir); > > + if(realpath(path, filename_r) == NULL) { > > + files->resolved_path[i] = strdup(causal_dir); > > STRDUP macro? > > > + return i; > > + } > > + causal_dir_r_len = strlen(filename_r + root_len) + 1; > > + if(causal_dir_r_len >= PATH_MAX) { > > + files->resolved_path[i] = strdup(causal_dir); > > STRDUP... > > > + return i; > > + } > > + /* remove root_r from filename_r */ > > + memmove(filename_r, filename_r + root_len, causal_dir_r_len + > > 1); > > + strcpy(filename_r + causal_dir_r_len - 1, "/"); > > + files->resolved_path[i] = strdup(filename_r); > > STRDUP... I'm done pointing these out, but there's more. > > > + i++; > > + } > > + > > + for(; i < files->count && (!resolving || > > + strncmp(files->files[i].name, causal_dir, > > causal_dir_len) == 0); i++) { > > This seems really ugly. Can't the second half of the exit condition be > moved into the body of the loop? > > > + char *filename = files->files[i].name; > > + size_t f_len = strlen(filename); > > + > > + if(resolving) { > > + if(f_len + causal_dir_r_len - causal_dir_len > > > PATH_MAX) { > > + files->resolved_path[i] = strdup(filename); > > + continue; > > + } > > + > > + strcpy(filename_r + causal_dir_r_len, filename + > > causal_dir_len); > > Are you sure this never copies more than PATH_MAX bytes into filename_r? > There's more of these buffer overflow concerns that flat out aren't > handled. > Unless I got the math wrong, the if directly above ensures that very thing. All of the strcpy()s, with the notable exception of the one directly below, have similar checks. > > + > > + if(root_len + causal_dir_r_len + f_len - causal_dir_len > > > PATH_MAX) { > > + /* absolute path is too long */ > > + files->resolved_path[i] = strdup(filename_r); > > + continue; > > + } > > + } else { > > + filename_r = filename; > > + } > > + > > + /* deal with files and paths too long to resolve*/ > > + if(filename[f_len-1] != '/') { > > Style nit -- add space around the '-', and a space before the close of > the comment. I'm not really sure how this does what the comment says. > Probably because I erroneously moved the length check above when refactoring... > > + files->resolved_path[i] = strdup(filename_r); > > + continue; > > + } > > + > > + /* construct absolute path and stat() */ > > + strcpy(path + root_len, filename_r); > > + int exists = !_alpm_lstat(path, &sbuf); > > + > > + /* deal with symlinks */ > > + if(exists && S_ISLNK(sbuf.st_mode)) { > > + i = _alpm_filelist_resolve_link(files, i, path, > > root_len, 1); > > + continue; > > + } > > + > > + /* deal with normal directories */ > > + files->resolved_path[i] = strdup(filename_r); > > + > > + /* deal with children of non-existent directories to reduce > > lstat() calls */ > > + if (!exists) { > > + char *f; > > + i++; > > + while(i < files->count && strncmp(files->files[i].name, > > filename, f_len) == 0) { > > + f = files->files[i].name; > > + if(resolving && strlen(f + causal_dir_len) + > > causal_dir_r_len <= PATH_MAX) { > > + strcpy(filename_r + causal_dir_r_len, f > > + causal_dir_len); > > + files->resolved_path[i] = > > strdup(filename_r); > > + } else { > > + files->resolved_path[i] = strdup(f); > > + } > > + i++; > > + } > > + i--; > > + } > > + } > > + > > + if(resolving) { > > + free(filename_r); > > + } > > + > > + return i-1; > > +} > > + > > +/** > > + * @brief Takes a file list and resolves all directory paths according to > > filesystem > > + * > > + * @attention Pre-condition: files must be sorted! > > + * > > + * @note A symlink and directory at the same path in two difference > > packages > > + * causes a conflict so the filepath can not change as packages get > > installed > > + * > > + * @param handle the context handle > > + * @param files list of files to resolve > > + */ > > +void _alpm_filelist_resolve(alpm_handle_t *handle, alpm_filelist_t *files) > > +{ > > + char path[PATH_MAX]; > > + size_t root_len; > > + > > + if(!files || files->resolved_path) { > > + return; > > + } > > + > > + MALLOC(files->resolved_path, files->count * sizeof(char*), return); > > + memset(files->resolved_path, 0, files->count); > > Just use CALLOC instead of MALLOC+memset > > > + > > + /* not much point in going on if we can't even resolve root */ > > + if(realpath(handle->root, path) == NULL){ > > + return; > > + } > > + root_len = strlen(path) + 1; > > + if(root_len >= PATH_MAX) { > > + return; > > + } > > + strcpy(path + root_len - 1, "/"); > > Why not use direct assignment or memcpy here? > > > + > > + _alpm_filelist_resolve_link(files, 0, path, root_len, 0); > > + > > + qsort(files->resolved_path, files->count, sizeof(char *), _strcmp); > > +} > > + > > > > /* Returns the difference of the provided two lists of files. > > * Pre-condition: both lists are sorted! > > diff --git a/lib/libalpm/filelist.h b/lib/libalpm/filelist.h > > index 2d5cbc0..3152b9d 100644 > > --- a/lib/libalpm/filelist.h > > +++ b/lib/libalpm/filelist.h > > @@ -21,6 +21,8 @@ > > > > #include "alpm.h" > > > > +size_t _alpm_filelist_resolve_link(alpm_filelist_t *files, size_t i, char > > *path, size_t root_len, int resolving); > > Wrap at 80 columns, please. > > > +void _alpm_filelist_resolve(alpm_handle_t *handle, alpm_filelist_t *files); > > > > alpm_list_t *_alpm_filelist_difference(alpm_filelist_t *filesA, > > alpm_filelist_t *filesB); > > -- > > 1.7.11.3 > > > > >
