Author: stefan2 Date: Wed Jul 30 16:27:44 2014 New Revision: 1614703 URL: http://svn.apache.org/r1614703 Log: On the authzperf branch: Implement support for generic / complex wildcard patterns like "/foo*bar*baz/".
To do this efficiently requires normalized pattern paths, so we add the normalization logic as described here: https://wiki.apache.org/subversion/AuthzImprovements * BRANCH-README (TODO, DONE): The last pattern TODO is now DONE. * subversion/include/svn_string.h (svn_stringbuf_replace_all): Declare another stringbuf API function. * subversion/libsvn_subr/string.c (svn_stringbuf_replace_all): Implement it. * subversion/libsvn_repos/authz.c (node_pattern_t): Add a container of all odd / complex patterns. No specific ordering on this one. (insert_path): Finally add the "catch-all-other" case. (normalize_wildcards): New pattern path normalization code. (process_path_rule): Ensure properly normalized patterns. (finalize_tree): Cover yet another set of sub-nodes. (match_to_next_wildcard, match_pattern, add_complex_matches): New generic pattern matching logic. (lookup): Handle yet another class of patterns. Modified: subversion/branches/authzperf/BRANCH-README subversion/branches/authzperf/subversion/include/svn_string.h subversion/branches/authzperf/subversion/libsvn_repos/authz.c subversion/branches/authzperf/subversion/libsvn_subr/string.c Modified: subversion/branches/authzperf/BRANCH-README URL: http://svn.apache.org/viewvc/subversion/branches/authzperf/BRANCH-README?rev=1614703&r1=1614702&r2=1614703&view=diff ============================================================================== --- subversion/branches/authzperf/BRANCH-README (original) +++ subversion/branches/authzperf/BRANCH-README Wed Jul 30 16:27:44 2014 @@ -9,7 +9,6 @@ TODO: * implement an authz-specific constructor for the config parser * implement precedence rules -* add support for arbitrary patterns ("/foo*bar*baz/" etc.) * implement value-expansion mode in the config parser DONE: @@ -24,3 +23,4 @@ DONE: * add support for variable length full-segment wildcards ("/**/") * add support for prefix segment patterns ("/foo*/") * add support for suffix segment patterns ("/*foo/") +* add support for arbitrary patterns ("/foo*bar*baz/" etc.) Modified: subversion/branches/authzperf/subversion/include/svn_string.h URL: http://svn.apache.org/viewvc/subversion/branches/authzperf/subversion/include/svn_string.h?rev=1614703&r1=1614702&r2=1614703&view=diff ============================================================================== --- subversion/branches/authzperf/subversion/include/svn_string.h (original) +++ subversion/branches/authzperf/subversion/include/svn_string.h Wed Jul 30 16:27:44 2014 @@ -388,6 +388,16 @@ svn_stringbuf_replace(svn_stringbuf_t *s const char *bytes, apr_size_t new_count); +/** Replace all occurrences of @a to_find in @a str with @a replacement. + * Return the number of replacements made. + * + * @since New in 1.10. + */ +apr_size_t +svn_stringbuf_replace_all(svn_stringbuf_t *str, + const char *to_find, + const char *replacement); + /** Return a duplicate of @a original_string. */ svn_stringbuf_t * svn_stringbuf_dup(const svn_stringbuf_t *original_string, apr_pool_t *pool); Modified: subversion/branches/authzperf/subversion/libsvn_repos/authz.c URL: http://svn.apache.org/viewvc/subversion/branches/authzperf/subversion/libsvn_repos/authz.c?rev=1614703&r1=1614702&r2=1614703&view=diff ============================================================================== --- subversion/branches/authzperf/subversion/libsvn_repos/authz.c (original) +++ subversion/branches/authzperf/subversion/libsvn_repos/authz.c Wed Jul 30 16:27:44 2014 @@ -325,6 +325,10 @@ typedef struct node_pattern_t * segment suffix. */ apr_array_header_t *suffixes; + /* If not NULL, the segments of all nodes_t* in this array contain + * wildcards and don't fit into any of the above categories. */ + apr_array_header_t *complex; + /* This node itself is a "**" segment and must therefore itself be added * to the matching node list for the next level. */ svn_boolean_t repeat; @@ -617,8 +621,10 @@ insert_path(node_t *node, reversed, result_pool); } - /* More cases to be added here later. - * There will be no error condition to be checked for. */ + /* General pattern */ + else + sub_node = ensure_node_in_array(&node->pattern_sub_nodes->complex, + segment, result_pool); } else { @@ -641,6 +647,43 @@ insert_path(node_t *node, result_pool, scratch_pool); } +/* Normalize the wildcard pattern PATH in accordance to + * https://wiki.apache.org/subversion/AuthzImprovements + * and return the result. + */ +const char * +normalize_wildcards(const char *path, + apr_pool_t *result_pool) +{ + const char *pos; + svn_stringbuf_t *buffer = svn_stringbuf_create(path, result_pool); + + /* Reduce sequences variable-length segment matches to single segment + * matches with the other segment patterns reduced to "*". */ + while (svn_stringbuf_replace_all(buffer, "/**/**/", "/*/**/")) + ; + + /* Our tree traversal is more efficient if we put variable segment count + * wildcards last. */ + while (svn_stringbuf_replace_all(buffer, "/**/*/", "/**/*/")) + ; + + /* Reduce trailing "**" a single "*". */ + while ( buffer->len > 1 + && buffer->data[buffer->len-1] == '*' + && buffer->data[buffer->len-2] == '*') + svn_stringbuf_remove(buffer, buffer->len-1, 1); + + /* Reduce "**" _inside_ a segment to a single "*". */ + for (pos = strstr(buffer->data, "**"); pos; pos = strstr(pos, "**")) + if ((pos > buffer->data && pos[-1] != '/') || (pos[2] != '/')) + svn_stringbuf_remove(buffer, pos - buffer->data, 1); + else + ++pos; + + return buffer->data; +} + /* Callback baton to be used with process_path_rule(). */ typedef struct process_path_rule_baton_t @@ -704,6 +747,9 @@ process_path_rule(const char *name, return TRUE; /* Process the path */ + if (wildcards && strstr(path, "**")) + path = normalize_wildcards(path, scratch_pool); + segments = svn_cstring_split(path, "/", FALSE, scratch_pool); APR_ARRAY_PUSH(segments, const char *) = NULL; @@ -787,6 +833,8 @@ finalize_tree(node_t *parent, scratch_pool); finalize_subnode_array(node, access, node->pattern_sub_nodes->suffixes, scratch_pool); + finalize_subnode_array(node, access, node->pattern_sub_nodes->complex, + scratch_pool); } /* Add our min / max info to the parent's info. @@ -985,6 +1033,120 @@ add_prefix_matches(lookup_state_t *state } } +/* Utility factored out from match_pattern. + * + * Compare S with PATTERN up to the first wildcard in PATTERN. The first + * char must be a match already. If PATTERN does not contain a wildcard, + * compare the full strings. + * + * If no mismatch was found, return the number of matching characters and + * 0 otherwise. + */ +static apr_size_t +match_to_next_wildcard(const char *s, + const char *pattern) +{ + apr_size_t len; + + assert(pattern[0] == s[0]); + for (len = 1; pattern[len] && pattern[len] != '*'; ++len) + if (pattern[len] != s[len]) + return 0; + + if (!pattern[len]) + return s[len] == 0 ? len : 0; + + return len; +} + +/* Return TRUE if string S matches wildcard PATTERN. The latter must not be + * empty and must be normalized, i.e. not contain "**". + */ +static svn_boolean_t +match_pattern(const char *s, + const char *pattern) +{ + /* Matching a wildcard pattern is trival: + * PATTERN can be considered a list of literal strings separated by '*'. + * We simply have to find all sub-strings in that order, i.e. we can do + * so greedily. Be careful to match prefix and suffix correctly. + */ + char pattern_char, s_char; + + /* The prefix part of PATTERN needs special treatment as we can't just + * match any substring of S. */ + if (pattern[0] != '*') + { + apr_size_t match_len; + + /* match_to_next_wildcard() assumes a that the first char matches. */ + if (pattern[0] != s[0]) + return FALSE; + + /* Match up to but not beyond the next wildcard. */ + match_len = match_to_next_wildcard(s, pattern); + if (match_len == 0) + return FALSE; + + /* Continue at next wildcard or end-of-string. */ + pattern += match_len; + s += match_len; + } + + /* Process all of PATTERN and match it against S char by char. */ + while ((pattern_char = *pattern)) + { + apr_size_t match_len = 0; + + /* If PATTERN ended on a wildcard, S can be nothing but a match. */ + pattern_char = *++pattern; + if (pattern_char == 0) + return TRUE; + + /* Due to normalization, PATTERN_CHAR cannot be '*' because "**" is + * prohibited. Find the next position in S that matches until the + * next wildcard in PATTERN or its end. */ + for (s_char = *s; s_char; s_char = *++s) + if (pattern_char == s_char) + { + /* First char matches, what about the rest? If there is no + * wildcard left in PATTERN (i.e. the suffix part), we only get + * a non-zero result if S and PATTERN match completely. */ + match_len = match_to_next_wildcard(s, pattern); + + /* Found a match? If so, greedily take it. */ + if (match_len) + break; + } + + /* No match found -> mismatch and done. */ + if (!match_len) + return FALSE; + + /* Continue at next wildcard or end-of-string. */ + pattern += match_len; + s += match_len; + } + + /* The pattern ended and S must either be fully matched now or is not a + * match at all. */ + return *s == 0; +} + +static void +add_complex_matches(lookup_state_t *state, + const svn_stringbuf_t *segment, + apr_array_header_t *patterns) +{ + int i; + for (i = 0; i < patterns->nelts; ++i) + { + node_t *node = APR_ARRAY_IDX(patterns, i, node_t *); + if (match_pattern(segment->data, node->segment.data)) + add_next_node(state, node); + } +} + /* Extract the next segment from PATH and copy it into SEGMENT, whose current * contents get overwritten. Empty paths ("") are supported and leading '/' * segment separators will be interpreted as an empty segment (""). Non- @@ -1129,6 +1291,10 @@ lookup(lookup_state_t *state, add_prefix_matches(state, segment, node->pattern_sub_nodes->prefixes); + if (node->pattern_sub_nodes->complex) + add_complex_matches(state, segment, + node->pattern_sub_nodes->complex); + /* Find all suffux pattern matches. * This must be the last check as it destroys SEGMENT. */ if (node->pattern_sub_nodes->suffixes) Modified: subversion/branches/authzperf/subversion/libsvn_subr/string.c URL: http://svn.apache.org/viewvc/subversion/branches/authzperf/subversion/libsvn_subr/string.c?rev=1614703&r1=1614702&r2=1614703&view=diff ============================================================================== --- subversion/branches/authzperf/subversion/libsvn_subr/string.c (original) +++ subversion/branches/authzperf/subversion/libsvn_subr/string.c Wed Jul 30 16:27:44 2014 @@ -710,6 +710,71 @@ svn_stringbuf_replace(svn_stringbuf_t *s } +apr_size_t +svn_stringbuf_replace_all(svn_stringbuf_t *str, + const char *to_find, + const char *replacement) +{ + apr_size_t replacements = 0; + + apr_size_t current = 0; + apr_size_t original_length = str->len; + + apr_size_t to_copy; + apr_size_t to_find_len; + apr_size_t replacement_len; + apr_size_t new_length; + + /* Early exit. */ + const char *pos = strstr(str->data, to_find); + if (pos == NULL) + return 0; + + to_find_len = strlen(to_find); + replacement_len = strlen(replacement); + + /* We will store the new contents behind the NUL terminator of the current + * data and track the total length in STR->LEN to make the reallocation + * code preserve both bits. However, we need to keep the NUL between them + * to make strstr stop at that boundary. */ + ++str->len; + + /* Find all occurrences of TO_FIND, copy the bits in between to the target, + * separated by REPLACEMENT. */ + for ( ; pos; pos = strstr(str->data + current, to_find), ++replacements) + { + apr_size_t to_copy = pos - str->data - current; + svn_stringbuf_ensure(str, str->len + to_copy + replacement_len); + + if (to_copy) + memcpy(str->data + str->len, str->data + current, to_copy); + current += to_copy + to_find_len; + + str->len += to_copy; + memcpy(str->data + str->len, replacement, replacement_len); + str->len += replacement_len; + } + + /* Copy remainder. */ + to_copy = original_length - current; + if (to_copy) + { + svn_stringbuf_ensure(str, str->len + to_copy); + memcpy(str->data + str->len, str->data + current, to_copy); + str->len += to_copy; + } + + /* Move new contents to the start of the buffer and terminate it. */ + new_length = str->len - original_length - 1; + memmove(str->data, str->data + original_length + 1, new_length); + str->len = new_length; + str->data[new_length] = 0; + + /* Done. */ + return replacements; +} + + svn_stringbuf_t * svn_stringbuf_dup(const svn_stringbuf_t *original_string, apr_pool_t *pool) {
