On 30 July 2014 20:27, <stef...@apache.org> wrote: > Author: stefan2 > Date: Wed Jul 30 16:27:44 2014 > New Revision: 1614703 > > URL: http://svn.apache.org/r1614703 > Log: > On the authzperf branch: Implement support for generic / complex > wildcard patterns like "/foo*bar*baz/". > > To do this efficiently requires normalized pattern paths, so we > add the normalization logic as described here: > https://wiki.apache.org/subversion/AuthzImprovements > Stefan,
Please find my comments inline. > * BRANCH-README > (TODO, DONE): The last pattern TODO is now DONE. > > * subversion/include/svn_string.h > (svn_stringbuf_replace_all): Declare another stringbuf API function. > > * subversion/libsvn_subr/string.c > (svn_stringbuf_replace_all): Implement it. > > * subversion/libsvn_repos/authz.c > (node_pattern_t): Add a container of all odd / complex patterns. > No specific ordering on this one. > (insert_path): Finally add the "catch-all-other" case. > (normalize_wildcards): New pattern path normalization code. > (process_path_rule): Ensure properly normalized patterns. > (finalize_tree): Cover yet another set of sub-nodes. > (match_to_next_wildcard, > match_pattern, > add_complex_matches): New generic pattern matching logic. > (lookup): Handle yet another class of patterns. > > Modified: > subversion/branches/authzperf/BRANCH-README > subversion/branches/authzperf/subversion/include/svn_string.h > subversion/branches/authzperf/subversion/libsvn_repos/authz.c > subversion/branches/authzperf/subversion/libsvn_subr/string.c > > Modified: subversion/branches/authzperf/BRANCH-README > URL: > http://svn.apache.org/viewvc/subversion/branches/authzperf/BRANCH-README?rev=1614703&r1=1614702&r2=1614703&view=diff > ============================================================================== > --- subversion/branches/authzperf/BRANCH-README (original) > +++ subversion/branches/authzperf/BRANCH-README Wed Jul 30 16:27:44 2014 > @@ -9,7 +9,6 @@ TODO: > > * implement an authz-specific constructor for the config parser > * implement precedence rules > -* add support for arbitrary patterns ("/foo*bar*baz/" etc.) > * implement value-expansion mode in the config parser > > DONE: > @@ -24,3 +23,4 @@ DONE: > * add support for variable length full-segment wildcards ("/**/") > * add support for prefix segment patterns ("/foo*/") > * add support for suffix segment patterns ("/*foo/") > +* add support for arbitrary patterns ("/foo*bar*baz/" etc.) > > Modified: subversion/branches/authzperf/subversion/include/svn_string.h > URL: > http://svn.apache.org/viewvc/subversion/branches/authzperf/subversion/include/svn_string.h?rev=1614703&r1=1614702&r2=1614703&view=diff > ============================================================================== > --- subversion/branches/authzperf/subversion/include/svn_string.h (original) > +++ subversion/branches/authzperf/subversion/include/svn_string.h Wed Jul 30 > 16:27:44 2014 > @@ -388,6 +388,16 @@ svn_stringbuf_replace(svn_stringbuf_t *s > const char *bytes, > apr_size_t new_count); > > +/** Replace all occurrences of @a to_find in @a str with @a replacement. > + * Return the number of replacements made. > + * > + * @since New in 1.10. > + */ > +apr_size_t > +svn_stringbuf_replace_all(svn_stringbuf_t *str, > + const char *to_find, > + const char *replacement); > + > /** Return a duplicate of @a original_string. */ > svn_stringbuf_t * > svn_stringbuf_dup(const svn_stringbuf_t *original_string, apr_pool_t *pool); > > Modified: subversion/branches/authzperf/subversion/libsvn_repos/authz.c > URL: > http://svn.apache.org/viewvc/subversion/branches/authzperf/subversion/libsvn_repos/authz.c?rev=1614703&r1=1614702&r2=1614703&view=diff > ============================================================================== > --- subversion/branches/authzperf/subversion/libsvn_repos/authz.c (original) > +++ subversion/branches/authzperf/subversion/libsvn_repos/authz.c Wed Jul 30 > 16:27:44 2014 > @@ -325,6 +325,10 @@ typedef struct node_pattern_t > * segment suffix. */ > apr_array_header_t *suffixes; > > + /* If not NULL, the segments of all nodes_t* in this array contain > + * wildcards and don't fit into any of the above categories. */ > + apr_array_header_t *complex; > + > /* This node itself is a "**" segment and must therefore itself be added > * to the matching node list for the next level. */ > svn_boolean_t repeat; > @@ -617,8 +621,10 @@ insert_path(node_t *node, > reversed, result_pool); > } > > - /* More cases to be added here later. > - * There will be no error condition to be checked for. */ > + /* General pattern */ > + else > + sub_node = ensure_node_in_array(&node->pattern_sub_nodes->complex, > + segment, result_pool); > } > else > { > @@ -641,6 +647,43 @@ insert_path(node_t *node, > result_pool, scratch_pool); > } > > +/* Normalize the wildcard pattern PATH in accordance to > + * https://wiki.apache.org/subversion/AuthzImprovements > + * and return the result. > + */ > +const char * > +normalize_wildcards(const char *path, > + apr_pool_t *result_pool) > +{ > + const char *pos; > + svn_stringbuf_t *buffer = svn_stringbuf_create(path, result_pool); > + > + /* Reduce sequences variable-length segment matches to single segment > + * matches with the other segment patterns reduced to "*". */ > + while (svn_stringbuf_replace_all(buffer, "/**/**/", "/*/**/")) > + ; > + > + /* Our tree traversal is more efficient if we put variable segment count > + * wildcards last. */ > + while (svn_stringbuf_replace_all(buffer, "/**/*/", "/**/*/")) > + ; > + > + /* Reduce trailing "**" a single "*". */ > + while ( buffer->len > 1 > + && buffer->data[buffer->len-1] == '*' > + && buffer->data[buffer->len-2] == '*') > + svn_stringbuf_remove(buffer, buffer->len-1, 1); > + > + /* Reduce "**" _inside_ a segment to a single "*". */ > + for (pos = strstr(buffer->data, "**"); pos; pos = strstr(pos, "**")) > + if ((pos > buffer->data && pos[-1] != '/') || (pos[2] != '/')) > + svn_stringbuf_remove(buffer, pos - buffer->data, 1); > + else > + ++pos; Quick quiz: I'm only one dev@ list who do not understand magic peace of code above? I'm fine if majority of Subversion devs do not consider such code as cryptic unreviewable magic. > + > + return buffer->data; > +} > + > /* Callback baton to be used with process_path_rule(). > */ > typedef struct process_path_rule_baton_t > @@ -704,6 +747,9 @@ process_path_rule(const char *name, > return TRUE; > > /* Process the path */ > + if (wildcards && strstr(path, "**")) > + path = normalize_wildcards(path, scratch_pool); > + > segments = svn_cstring_split(path, "/", FALSE, scratch_pool); > APR_ARRAY_PUSH(segments, const char *) = NULL; > > @@ -787,6 +833,8 @@ finalize_tree(node_t *parent, > scratch_pool); > finalize_subnode_array(node, access, node->pattern_sub_nodes->suffixes, > scratch_pool); > + finalize_subnode_array(node, access, node->pattern_sub_nodes->complex, > + scratch_pool); > } > > /* Add our min / max info to the parent's info. > @@ -985,6 +1033,120 @@ add_prefix_matches(lookup_state_t *state > } > } > > +/* Utility factored out from match_pattern. > + * > + * Compare S with PATTERN up to the first wildcard in PATTERN. The first > + * char must be a match already. If PATTERN does not contain a wildcard, > + * compare the full strings. > + * > + * If no mismatch was found, return the number of matching characters and > + * 0 otherwise. > + */ > +static apr_size_t > +match_to_next_wildcard(const char *s, > + const char *pattern) > +{ > + apr_size_t len; > + > + assert(pattern[0] == s[0]); > + for (len = 1; pattern[len] && pattern[len] != '*'; ++len) > + if (pattern[len] != s[len]) > + return 0; > + > + if (!pattern[len]) > + return s[len] == 0 ? len : 0; > + > + return len; > +} > + > +/* Return TRUE if string S matches wildcard PATTERN. The latter must not be > + * empty and must be normalized, i.e. not contain "**". > + */ > +static svn_boolean_t > +match_pattern(const char *s, > + const char *pattern) > +{ > + /* Matching a wildcard pattern is trival: > + * PATTERN can be considered a list of literal strings separated by '*'. > + * We simply have to find all sub-strings in that order, i.e. we can do > + * so greedily. Be careful to match prefix and suffix correctly. > + */ > + char pattern_char, s_char; > + > + /* The prefix part of PATTERN needs special treatment as we can't just > + * match any substring of S. */ > + if (pattern[0] != '*') You'are using inconsitent style to access first char in pattern: pattern[0] and *pattern. It makes code review harder. Why you're doing this way? > + { > + apr_size_t match_len; > + > + /* match_to_next_wildcard() assumes a that the first char matches. */ > + if (pattern[0] != s[0]) > + return FALSE; > + > + /* Match up to but not beyond the next wildcard. */ > + match_len = match_to_next_wildcard(s, pattern); > + if (match_len == 0) > + return FALSE; > + > + /* Continue at next wildcard or end-of-string. */ > + pattern += match_len; > + s += match_len; > + } > + > + /* Process all of PATTERN and match it against S char by char. */ > + while ((pattern_char = *pattern)) > + { > + apr_size_t match_len = 0; > + > + /* If PATTERN ended on a wildcard, S can be nothing but a match. */ > + pattern_char = *++pattern; You're assigning patern_char in while() condition and in while body. What is the point for such code? I'm also find expression like "pattern_char = *++pattern" very hard to follow. > + if (pattern_char == 0) > + return TRUE; > + > + /* Due to normalization, PATTERN_CHAR cannot be '*' because "**" is > + * prohibited. Find the next position in S that matches until the > + * next wildcard in PATTERN or its end. */ > + for (s_char = *s; s_char; s_char = *++s) > + if (pattern_char == s_char) > + { > + /* First char matches, what about the rest? If there is no > + * wildcard left in PATTERN (i.e. the suffix part), we only get > + * a non-zero result if S and PATTERN match completely. */ > + match_len = match_to_next_wildcard(s, pattern); > + > + /* Found a match? If so, greedily take it. */ > + if (match_len) > + break; > + } > + > + /* No match found -> mismatch and done. */ > + if (!match_len) > + return FALSE; > + > + /* Continue at next wildcard or end-of-string. */ > + pattern += match_len; > + s += match_len; > + } > + > + /* The pattern ended and S must either be fully matched now or is not a > + * match at all. */ > + return *s == 0; > +} > + > +static void > +add_complex_matches(lookup_state_t *state, > + const svn_stringbuf_t *segment, > + apr_array_header_t *patterns) > +{ > + int i; > + for (i = 0; i < patterns->nelts; ++i) > + { > + node_t *node = APR_ARRAY_IDX(patterns, i, node_t *); > + if (match_pattern(segment->data, node->segment.data)) > + add_next_node(state, node); > + } > +} > + > /* Extract the next segment from PATH and copy it into SEGMENT, whose current > * contents get overwritten. Empty paths ("") are supported and leading '/' > * segment separators will be interpreted as an empty segment (""). Non- > @@ -1129,6 +1291,10 @@ lookup(lookup_state_t *state, > add_prefix_matches(state, segment, > node->pattern_sub_nodes->prefixes); > > + if (node->pattern_sub_nodes->complex) > + add_complex_matches(state, segment, > + node->pattern_sub_nodes->complex); > + > /* Find all suffux pattern matches. > * This must be the last check as it destroys SEGMENT. */ > if (node->pattern_sub_nodes->suffixes) > > Modified: subversion/branches/authzperf/subversion/libsvn_subr/string.c > URL: > http://svn.apache.org/viewvc/subversion/branches/authzperf/subversion/libsvn_subr/string.c?rev=1614703&r1=1614702&r2=1614703&view=diff > ============================================================================== > --- subversion/branches/authzperf/subversion/libsvn_subr/string.c (original) > +++ subversion/branches/authzperf/subversion/libsvn_subr/string.c Wed Jul 30 > 16:27:44 2014 > @@ -710,6 +710,71 @@ svn_stringbuf_replace(svn_stringbuf_t *s > } > > > +apr_size_t > +svn_stringbuf_replace_all(svn_stringbuf_t *str, > + const char *to_find, > + const char *replacement) > +{ Please add tests for this function. Code doesn't seems trivial and does potential dangerous things like direct manipulation of stringbuf using memcpy. > + apr_size_t replacements = 0; > + > + apr_size_t current = 0; > + apr_size_t original_length = str->len; > + > + apr_size_t to_copy; > + apr_size_t to_find_len; > + apr_size_t replacement_len; > + apr_size_t new_length; > + > + /* Early exit. */ > + const char *pos = strstr(str->data, to_find); > + if (pos == NULL) > + return 0; > + > + to_find_len = strlen(to_find); > + replacement_len = strlen(replacement); > + > + /* We will store the new contents behind the NUL terminator of the current > + * data and track the total length in STR->LEN to make the reallocation > + * code preserve both bits. However, we need to keep the NUL between them > + * to make strstr stop at that boundary. */ Why you've implemented so complicated algorithm instread of calling existing svn_stringbuf_replace() for every match? You're still reallocating buffer multiple times and moving data arround, so I doubt that it significantly more performant that simple implementation based on svn_stringbuf_replace(). > + ++str->len; > + > + /* Find all occurrences of TO_FIND, copy the bits in between to the target, > + * separated by REPLACEMENT. */ > + for ( ; pos; pos = strstr(str->data + current, to_find), ++replacements) > + { > + apr_size_t to_copy = pos - str->data - current; > + svn_stringbuf_ensure(str, str->len + to_copy + replacement_len); > + > + if (to_copy) > + memcpy(str->data + str->len, str->data + current, to_copy); > + current += to_copy + to_find_len; > + > + str->len += to_copy; > + memcpy(str->data + str->len, replacement, replacement_len); > + str->len += replacement_len; > + } > + > + /* Copy remainder. */ > + to_copy = original_length - current; > + if (to_copy) > + { > + svn_stringbuf_ensure(str, str->len + to_copy); > + memcpy(str->data + str->len, str->data + current, to_copy); > + str->len += to_copy; > + } > + > + /* Move new contents to the start of the buffer and terminate it. */ > + new_length = str->len - original_length - 1; > + memmove(str->data, str->data + original_length + 1, new_length); > + str->len = new_length; > + str->data[new_length] = 0; > + > + /* Done. */ > + return replacements; > +} > + > + I'm also +1 Brane comment about using maintaner mode and do not commit code with compilation warnings. -- Ivan Zhakov CTO | VisualSVN | http://www.visualsvn.com