Author: stefan2
Date: Wed Jul 30 16:27:44 2014
New Revision: 1614703

URL: http://svn.apache.org/r1614703
Log:
On the authzperf branch:  Implement support for generic / complex
wildcard patterns like "/foo*bar*baz/".

To do this efficiently requires normalized pattern paths, so we
add the normalization logic as described here:
https://wiki.apache.org/subversion/AuthzImprovements

* BRANCH-README
  (TODO, DONE): The last pattern TODO is now DONE.

* subversion/include/svn_string.h
  (svn_stringbuf_replace_all): Declare another stringbuf API function.

* subversion/libsvn_subr/string.c
  (svn_stringbuf_replace_all): Implement it.

* subversion/libsvn_repos/authz.c
  (node_pattern_t): Add a container of all odd / complex patterns.
                    No specific ordering on this one.
  (insert_path): Finally add the "catch-all-other" case.
  (normalize_wildcards): New pattern path normalization code.
  (process_path_rule): Ensure properly normalized patterns.
  (finalize_tree): Cover yet another set of sub-nodes.
  (match_to_next_wildcard,
   match_pattern,
   add_complex_matches): New generic pattern matching logic.
  (lookup): Handle yet another class of patterns.

Modified:
    subversion/branches/authzperf/BRANCH-README
    subversion/branches/authzperf/subversion/include/svn_string.h
    subversion/branches/authzperf/subversion/libsvn_repos/authz.c
    subversion/branches/authzperf/subversion/libsvn_subr/string.c

Modified: subversion/branches/authzperf/BRANCH-README
URL: 
http://svn.apache.org/viewvc/subversion/branches/authzperf/BRANCH-README?rev=1614703&r1=1614702&r2=1614703&view=diff
==============================================================================
--- subversion/branches/authzperf/BRANCH-README (original)
+++ subversion/branches/authzperf/BRANCH-README Wed Jul 30 16:27:44 2014
@@ -9,7 +9,6 @@ TODO:
 
 * implement an authz-specific constructor for the config parser
 * implement precedence rules
-* add support for arbitrary patterns ("/foo*bar*baz/" etc.)
 * implement value-expansion mode in the config parser
 
 DONE:
@@ -24,3 +23,4 @@ DONE:
 * add support for variable length full-segment wildcards ("/**/")
 * add support for prefix segment patterns ("/foo*/")
 * add support for suffix segment patterns ("/*foo/")
+* add support for arbitrary patterns ("/foo*bar*baz/" etc.)

Modified: subversion/branches/authzperf/subversion/include/svn_string.h
URL: 
http://svn.apache.org/viewvc/subversion/branches/authzperf/subversion/include/svn_string.h?rev=1614703&r1=1614702&r2=1614703&view=diff
==============================================================================
--- subversion/branches/authzperf/subversion/include/svn_string.h (original)
+++ subversion/branches/authzperf/subversion/include/svn_string.h Wed Jul 30 
16:27:44 2014
@@ -388,6 +388,16 @@ svn_stringbuf_replace(svn_stringbuf_t *s
                       const char *bytes,
                       apr_size_t new_count);
 
+/** Replace all occurrences of @a to_find in @a str with @a replacement.
+ * Return the number of replacements made.
+ *
+ * @since New in 1.10.
+ */
+apr_size_t
+svn_stringbuf_replace_all(svn_stringbuf_t *str,
+                          const char *to_find,
+                          const char *replacement);
+
 /** Return a duplicate of @a original_string. */
 svn_stringbuf_t *
 svn_stringbuf_dup(const svn_stringbuf_t *original_string, apr_pool_t *pool);

Modified: subversion/branches/authzperf/subversion/libsvn_repos/authz.c
URL: 
http://svn.apache.org/viewvc/subversion/branches/authzperf/subversion/libsvn_repos/authz.c?rev=1614703&r1=1614702&r2=1614703&view=diff
==============================================================================
--- subversion/branches/authzperf/subversion/libsvn_repos/authz.c (original)
+++ subversion/branches/authzperf/subversion/libsvn_repos/authz.c Wed Jul 30 
16:27:44 2014
@@ -325,6 +325,10 @@ typedef struct node_pattern_t
    * segment suffix. */
   apr_array_header_t *suffixes;
 
+  /* If not NULL, the segments of all nodes_t* in this array contain
+   * wildcards and don't fit into any of the above categories. */
+  apr_array_header_t *complex;
+
   /* This node itself is a "**" segment and must therefore itself be added
    * to the matching node list for the next level. */
   svn_boolean_t repeat;
@@ -617,8 +621,10 @@ insert_path(node_t *node,
                                           reversed, result_pool);
         }
 
-      /* More cases to be added here later.
-       * There will be no error condition to be checked for. */
+      /* General pattern */
+      else
+        sub_node = ensure_node_in_array(&node->pattern_sub_nodes->complex,
+                                        segment, result_pool);
     }
   else
     {
@@ -641,6 +647,43 @@ insert_path(node_t *node,
               result_pool, scratch_pool);
 }
 
+/* Normalize the wildcard pattern PATH in accordance to
+ * https://wiki.apache.org/subversion/AuthzImprovements
+ * and return the result.
+ */
+const char *
+normalize_wildcards(const char *path,
+                    apr_pool_t *result_pool)
+{
+  const char *pos;
+  svn_stringbuf_t *buffer = svn_stringbuf_create(path, result_pool);
+
+  /* Reduce sequences variable-length segment matches to single segment
+   * matches with the other segment patterns reduced to "*". */
+  while (svn_stringbuf_replace_all(buffer, "/**/**/", "/*/**/"))
+    ;
+
+  /* Our tree traversal is more efficient if we put variable segment count
+   * wildcards last. */
+  while (svn_stringbuf_replace_all(buffer, "/**/*/", "/**/*/"))
+    ;
+
+  /* Reduce trailing "**" a single "*". */
+  while (   buffer->len > 1
+      && buffer->data[buffer->len-1] == '*'
+      && buffer->data[buffer->len-2] == '*')
+    svn_stringbuf_remove(buffer, buffer->len-1, 1);
+
+  /* Reduce "**" _inside_ a segment to a single "*". */
+  for (pos = strstr(buffer->data, "**"); pos; pos = strstr(pos, "**"))
+    if ((pos > buffer->data && pos[-1] != '/') || (pos[2] != '/'))
+      svn_stringbuf_remove(buffer, pos - buffer->data, 1);
+    else
+      ++pos;
+
+  return buffer->data;
+}
+
 /* Callback baton to be used with  process_path_rule().
  */
 typedef struct process_path_rule_baton_t
@@ -704,6 +747,9 @@ process_path_rule(const char *name,
     return TRUE;
 
   /* Process the path */
+  if (wildcards && strstr(path, "**"))
+    path = normalize_wildcards(path, scratch_pool);
+
   segments = svn_cstring_split(path, "/", FALSE, scratch_pool);
   APR_ARRAY_PUSH(segments, const char *) = NULL;
 
@@ -787,6 +833,8 @@ finalize_tree(node_t *parent,
                              scratch_pool);
       finalize_subnode_array(node, access, node->pattern_sub_nodes->suffixes,
                              scratch_pool);
+      finalize_subnode_array(node, access, node->pattern_sub_nodes->complex,
+                             scratch_pool);
     }
 
   /* Add our min / max info to the parent's info.
@@ -985,6 +1033,120 @@ add_prefix_matches(lookup_state_t *state
     }
 }
 
+/* Utility factored out from match_pattern.
+ *
+ * Compare S with PATTERN up to the first wildcard in PATTERN.  The first
+ * char must be a match already.  If PATTERN does not contain a wildcard,
+ * compare the full strings.
+ *
+ * If no mismatch was found, return the number of matching characters and
+ * 0 otherwise.
+ */
+static apr_size_t
+match_to_next_wildcard(const char *s,
+                       const char *pattern)
+{
+  apr_size_t len;
+
+  assert(pattern[0] == s[0]);
+  for (len = 1; pattern[len] && pattern[len] != '*'; ++len)
+    if (pattern[len] != s[len])
+      return 0;
+
+  if (!pattern[len])
+    return s[len] == 0 ? len : 0;
+
+  return len;
+}
+
+/* Return TRUE if string S matches wildcard PATTERN.  The latter must not be
+ * empty and must be normalized, i.e. not contain "**".
+ */
+static svn_boolean_t
+match_pattern(const char *s,
+              const char *pattern)
+{
+  /* Matching a wildcard pattern is trival:
+   * PATTERN can be considered a list of literal strings separated by '*'.
+   * We simply have to find all sub-strings in that order, i.e. we can do
+   * so greedily.  Be careful to match prefix and suffix correctly.
+   */
+  char pattern_char, s_char;
+
+  /* The prefix part of PATTERN needs special treatment as we can't just
+   * match any substring of S. */
+  if (pattern[0] != '*')
+    {
+      apr_size_t match_len;
+
+      /* match_to_next_wildcard() assumes a that the first char matches. */
+      if (pattern[0] != s[0])
+        return FALSE;
+
+      /* Match up to but not beyond the next wildcard. */
+      match_len = match_to_next_wildcard(s, pattern);
+      if (match_len == 0)
+        return FALSE;
+
+      /* Continue at next wildcard or end-of-string. */
+      pattern += match_len;
+      s += match_len;
+    }
+
+  /* Process all of PATTERN and match it against S char by char. */
+  while ((pattern_char = *pattern))
+    {
+      apr_size_t match_len = 0;
+
+      /* If PATTERN ended on a wildcard, S can be nothing but a match. */
+      pattern_char = *++pattern;
+      if (pattern_char == 0)
+        return TRUE;
+
+      /* Due to normalization, PATTERN_CHAR cannot be '*' because "**" is
+       * prohibited.  Find the next position in S that matches until the
+       * next wildcard in PATTERN or its end. */
+      for (s_char = *s; s_char; s_char = *++s)
+        if (pattern_char == s_char)
+          {
+            /* First char matches, what about the rest?  If there is no
+             * wildcard left in PATTERN (i.e. the suffix part), we only get
+             * a non-zero result if S and PATTERN match completely. */
+            match_len = match_to_next_wildcard(s, pattern);
+
+            /* Found a match?  If so, greedily take it. */
+            if (match_len)
+              break;
+          }
+
+      /* No match found -> mismatch and done. */
+      if (!match_len)
+        return FALSE;
+
+      /* Continue at next wildcard or end-of-string. */
+      pattern += match_len;
+      s += match_len;
+    }
+
+  /* The pattern ended and S must either be fully matched now or is not a
+   * match at all. */
+  return *s == 0;
+}
+
+static void
+add_complex_matches(lookup_state_t *state,
+                    const svn_stringbuf_t *segment,
+                    apr_array_header_t *patterns)
+{
+  int i;
+  for (i = 0; i < patterns->nelts; ++i)
+    {
+      node_t *node = APR_ARRAY_IDX(patterns, i, node_t *);
+      if (match_pattern(segment->data, node->segment.data))
+        add_next_node(state, node);
+    }
+}
+
 /* Extract the next segment from PATH and copy it into SEGMENT, whose current
  * contents get overwritten.  Empty paths ("") are supported and leading '/'
  * segment separators will be interpreted as an empty segment ("").  Non-
@@ -1129,6 +1291,10 @@ lookup(lookup_state_t *state,
                 add_prefix_matches(state, segment,
                                    node->pattern_sub_nodes->prefixes);
 
+              if (node->pattern_sub_nodes->complex)
+                add_complex_matches(state, segment,
+                                    node->pattern_sub_nodes->complex);
+
               /* Find all suffux pattern matches.
                * This must be the last check as it destroys SEGMENT. */
               if (node->pattern_sub_nodes->suffixes)

Modified: subversion/branches/authzperf/subversion/libsvn_subr/string.c
URL: 
http://svn.apache.org/viewvc/subversion/branches/authzperf/subversion/libsvn_subr/string.c?rev=1614703&r1=1614702&r2=1614703&view=diff
==============================================================================
--- subversion/branches/authzperf/subversion/libsvn_subr/string.c (original)
+++ subversion/branches/authzperf/subversion/libsvn_subr/string.c Wed Jul 30 
16:27:44 2014
@@ -710,6 +710,71 @@ svn_stringbuf_replace(svn_stringbuf_t *s
 }
 
 
+apr_size_t
+svn_stringbuf_replace_all(svn_stringbuf_t *str,
+                          const char *to_find,
+                          const char *replacement)
+{
+  apr_size_t replacements = 0;
+
+  apr_size_t current = 0;
+  apr_size_t original_length = str->len;
+
+  apr_size_t to_copy;
+  apr_size_t to_find_len;
+  apr_size_t replacement_len;
+  apr_size_t new_length;
+
+  /* Early exit. */
+  const char *pos = strstr(str->data, to_find);
+  if (pos == NULL)
+    return 0;
+
+  to_find_len = strlen(to_find);
+  replacement_len = strlen(replacement);
+
+  /* We will store the new contents behind the NUL terminator of the current
+   * data and track the total length in STR->LEN to make the reallocation
+   * code preserve both bits.  However, we need to keep the NUL between them
+   * to make strstr stop at that boundary. */
+  ++str->len;
+
+  /* Find all occurrences of TO_FIND, copy the bits in between to the target,
+   * separated by REPLACEMENT. */
+  for ( ; pos; pos = strstr(str->data + current, to_find), ++replacements)
+    {
+      apr_size_t to_copy = pos - str->data - current;
+      svn_stringbuf_ensure(str, str->len + to_copy + replacement_len);
+
+      if (to_copy)
+        memcpy(str->data + str->len, str->data + current, to_copy);
+      current += to_copy + to_find_len;
+
+      str->len += to_copy;
+      memcpy(str->data + str->len, replacement, replacement_len);
+      str->len += replacement_len;
+    }
+
+  /* Copy remainder. */
+  to_copy = original_length - current;
+  if (to_copy)
+    {
+      svn_stringbuf_ensure(str, str->len + to_copy);
+      memcpy(str->data + str->len, str->data + current, to_copy);
+      str->len += to_copy;
+    }
+
+  /* Move new contents to the start of the buffer and terminate it. */
+  new_length = str->len - original_length - 1;
+  memmove(str->data, str->data + original_length + 1, new_length);
+  str->len = new_length;
+  str->data[new_length] = 0;
+
+  /* Done. */
+  return replacements;
+}
+
+
 svn_stringbuf_t *
 svn_stringbuf_dup(const svn_stringbuf_t *original_string, apr_pool_t *pool)
 {


Reply via email to