Author: stefan2
Date: Sat Sep  2 15:12:03 2017
New Revision: 1807056

URL: http://svn.apache.org/viewvc?rev=1807056&view=rev
Log:
Make the 'svn ls --search' implementation actually case- and accent-
insensitive.  The option handling of '--search' already assumes it
but the implementation did not follow up.

That's easily done by using the same generic UTF8-based normalization
to the path segments as 'svn' applies to the patterns.  To reduce code
duplication, move the normalization and matching function to lib_subr.
And because there isn't really a better place to put it, add it to
utf8proc.

* subversion/include/private/svn_utf_private.h
  (svn_utf__fuzzy_glob_match): Declare new private API.

* subversion/libsvn_subr/utf8proc.c
  (svn_utf__fuzzy_glob_match): Implement the function, code mostly taken
                               from list.c.

* subversion/libsvn_client/list.c
  (match_patterns): Forward to the new matching function.
  (get_dir_contents): Add SCRATCH_BUFFER as a pass-through argument.
  (list_internal): Provide the SCRATCH_BUFFER.

* subversion/libsvn_repos/list.c
  (matches_any,
   do_list,
   svn_repos_list): Mirror the changes from the client side above.

Modified:
    subversion/trunk/subversion/include/private/svn_utf_private.h
    subversion/trunk/subversion/libsvn_client/list.c
    subversion/trunk/subversion/libsvn_repos/list.c
    subversion/trunk/subversion/libsvn_subr/utf8proc.c

Modified: subversion/trunk/subversion/include/private/svn_utf_private.h
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/include/private/svn_utf_private.h?rev=1807056&r1=1807055&r2=1807056&view=diff
==============================================================================
--- subversion/trunk/subversion/include/private/svn_utf_private.h (original)
+++ subversion/trunk/subversion/include/private/svn_utf_private.h Sat Sep  2 
15:12:03 2017
@@ -172,6 +172,18 @@ svn_utf__xfrm(const char **result,
               svn_boolean_t accent_insensitive,
               svn_membuf_t *buf);
 
+/* Return TRUE if S matches any of the const char * glob patterns in
+ * PATTERNS.
+ *
+ * S will internally be normalized to lower-case and accents removed
+ * using svn_utf__xfrm.  To get a match, the PATTERNS must have been
+ * normalized accordingly before calling this function.
+ */
+svn_boolean_t
+svn_utf__fuzzy_glob_match(const char *str,
+                          const apr_array_header_t *patterns,
+                          svn_membuf_t *buf);
+
 /* Check if STRING is a valid, NFC-normalized UTF-8 string.  Note that
  * a FALSE return value may indicate that STRING is not valid UTF-8 at
  * all.

Modified: subversion/trunk/subversion/libsvn_client/list.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_client/list.c?rev=1807056&r1=1807055&r2=1807056&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_client/list.c (original)
+++ subversion/trunk/subversion/libsvn_client/list.c Sat Sep  2 15:12:03 2017
@@ -37,6 +37,7 @@
 #include "private/svn_fspath.h"
 #include "private/svn_ra_private.h"
 #include "private/svn_sorts_private.h"
+#include "private/svn_utf_private.h"
 #include "private/svn_wc_private.h"
 #include "svn_private_config.h"
 
@@ -69,23 +70,16 @@ list_internal(const char *path_or_url,
               apr_pool_t *pool);
 
 /* Return TRUE if S matches any of the const char * in PATTERNS.
- * Note that any S will match if PATTERNS is empty. */
+ * Note that any S will match if PATTERNS is empty.
+ * Use SCRATCH_BUFFER for temporary string contents. */
 static svn_boolean_t
 match_patterns(const char *s,
-               const apr_array_header_t *patterns)
+               const apr_array_header_t *patterns,
+               svn_membuf_t *scratch_buffer)
 {
-  int i;
-  if (!patterns)
-    return TRUE;
-
-  for (i = 0; i < patterns->nelts; ++i)
-    {
-      const char *pattern = APR_ARRAY_IDX(patterns, i, const char *);
-      if (apr_fnmatch(pattern, s, APR_FNM_PERIOD) == APR_SUCCESS)
-        return TRUE;
-    }
-
-  return FALSE;
+  return patterns
+       ? svn_utf__fuzzy_glob_match(s, patterns, scratch_buffer)
+       : TRUE;
 }
 
 /* Get the directory entries of DIR at REV (relative to the root of
@@ -113,6 +107,8 @@ match_patterns(const char *s,
 
    EXTERNAL_PARENT_URL and EXTERNAL_TARGET are set when external items
    are listed, otherwise both are set to NULL by the caller.
+
+   Use SCRATCH_BUFFER for temporary string contents.
 */
 static svn_error_t *
 get_dir_contents(apr_uint32_t dirent_fields,
@@ -129,6 +125,7 @@ get_dir_contents(apr_uint32_t dirent_fie
                  const char *external_target,
                  svn_client_list_func2_t list_func,
                  void *baton,
+                 svn_membuf_t *scratch_buffer,
                  apr_pool_t *result_pool,
                  apr_pool_t *scratch_pool)
 {
@@ -203,7 +200,7 @@ get_dir_contents(apr_uint32_t dirent_fie
       if (the_ent->kind == svn_node_file
           || depth == svn_depth_immediates
           || depth == svn_depth_infinity)
-        if (match_patterns(item->key, patterns))
+        if (match_patterns(item->key, patterns, scratch_buffer))
           SVN_ERR(list_func(baton, path, the_ent, lock, fs_path,
                             external_parent_url, external_target, iterpool));
 
@@ -214,7 +211,7 @@ get_dir_contents(apr_uint32_t dirent_fie
                                  locks, fs_path, patterns, depth, ctx,
                                  externals, external_parent_url,
                                  external_target, list_func, baton,
-                                 result_pool, iterpool));
+                                 scratch_buffer, result_pool, iterpool));
     }
 
   svn_pool_destroy(iterpool);
@@ -329,6 +326,7 @@ list_internal(const char *path_or_url,
   svn_error_t *err;
   apr_hash_t *locks;
   apr_hash_t *externals;
+  svn_membuf_t scratch_buffer;
 
   if (include_externals)
     externals = apr_hash_make(pool);
@@ -391,8 +389,13 @@ list_internal(const char *path_or_url,
                              _("URL '%s' non-existent in revision %ld"),
                              loc->url, loc->rev);
 
+  /* We need a scratch buffer for temporary string data.
+   * Create one with a reasonable initial size. */
+  svn_membuf__create(&scratch_buffer, 256, pool);
+
   /* Report the dirent for the target. */
-  if (match_patterns(svn_dirent_dirname(fs_path, pool), patterns))
+  if (match_patterns(svn_dirent_dirname(fs_path, pool), patterns,
+                     &scratch_buffer))
     SVN_ERR(list_func(baton, "", dirent, locks
                       ? (svn_hash_gets(locks, fs_path))
                       : NULL, fs_path, external_parent_url,
@@ -405,7 +408,7 @@ list_internal(const char *path_or_url,
     SVN_ERR(get_dir_contents(dirent_fields, "", loc->rev, ra_session, locks,
                              fs_path, patterns, depth, ctx, externals,
                              external_parent_url, external_target, list_func,
-                             baton, pool, pool));
+                             baton, &scratch_buffer, pool, pool));
 
   /* We handle externals after listing entries under path_or_url, so that
      handling external items (and any errors therefrom) doesn't delay

Modified: subversion/trunk/subversion/libsvn_repos/list.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_repos/list.c?rev=1807056&r1=1807055&r2=1807056&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_repos/list.c (original)
+++ subversion/trunk/subversion/libsvn_repos/list.c Sat Sep  2 15:12:03 2017
@@ -30,6 +30,7 @@
 
 #include "private/svn_repos_private.h"
 #include "private/svn_sorts_private.h"
+#include "private/svn_utf_private.h"
 #include "svn_private_config.h" /* for SVN_TEMPLATE_ROOT_DIR */
 
 #include "repos.h"
@@ -91,23 +92,16 @@ svn_repos_stat(svn_dirent_t **dirent,
 }
 
 /* Return TRUE of DIRNAME matches any of the const char * in PATTERNS.
- * Note that any DIRNAME will match if PATTERNS is empty. */
+ * Note that any DIRNAME will match if PATTERNS is empty.
+ * Use SCRATCH_BUFFER for temporary string contents. */
 static svn_boolean_t
 matches_any(const char *dirname,
-            const apr_array_header_t *patterns)
+            const apr_array_header_t *patterns,
+            svn_membuf_t *scratch_buffer)
 {
-  int i;
-  if (!patterns)
-    return TRUE;
-
-  for (i = 0; i < patterns->nelts; ++i)
-    {
-      const char *pattern = APR_ARRAY_IDX(patterns, i, const char *);
-      if (apr_fnmatch(pattern, dirname, APR_FNM_PERIOD) == APR_SUCCESS)
-        return TRUE;
-    }
-
-  return FALSE;
+  return patterns
+       ? svn_utf__fuzzy_glob_match(dirname, patterns, scratch_buffer)
+       : TRUE;
 }
 
 /* Utility to prevent code duplication.
@@ -166,6 +160,8 @@ compare_filtered_dirent(const void *lhs,
  *
  * However, DEPTH is not svn_depth_empty and PATH has already been reported.
  * Therefore, we can call this recursively.
+ *
+ * Uses SCRATCH_BUFFER for temporary string contents.
  */
 static svn_error_t *
 do_list(svn_fs_root_t *root,
@@ -179,6 +175,7 @@ do_list(svn_fs_root_t *root,
         void *receiver_baton,
         svn_cancel_func_t cancel_func,
         void *cancel_baton,
+        svn_membuf_t *scratch_buffer,
         apr_pool_t *scratch_pool)
 {
   apr_hash_t *entries;
@@ -210,7 +207,8 @@ do_list(svn_fs_root_t *root,
         continue;
 
       /* We can skip files that don't match any of the search patterns. */
-      filtered.is_match = matches_any(filtered.dirent->name, patterns);
+      filtered.is_match = matches_any(filtered.dirent->name, patterns,
+                                      scratch_buffer);
       if (!filtered.is_match && filtered.dirent->kind == svn_node_file)
         continue;
 
@@ -258,7 +256,7 @@ do_list(svn_fs_root_t *root,
         SVN_ERR(do_list(root, sub_path, patterns, svn_depth_infinity,
                         path_info_only, authz_read_func, authz_read_baton,
                         receiver, receiver_baton, cancel_func,
-                        cancel_baton, iterpool));
+                        cancel_baton, scratch_buffer, iterpool));
     }
 
   svn_pool_destroy(iterpool);
@@ -280,6 +278,8 @@ svn_repos_list(svn_fs_root_t *root,
                void *cancel_baton,
                apr_pool_t *scratch_pool)
 {
+  svn_membuf_t scratch_buffer;
+
   /* Parameter check. */
   svn_node_kind_t kind;
   if (depth < svn_depth_empty)
@@ -317,8 +317,13 @@ svn_repos_list(svn_fs_root_t *root,
   if (patterns && patterns->nelts == 0)
     return SVN_NO_ERROR;
 
+  /* We need a scratch buffer for temporary string data.
+   * Create one with a reasonable initial size. */
+  svn_membuf__create(&scratch_buffer, 256, scratch_pool);
+
   /* Actually report PATH, if it passes the filters. */
-  if (matches_any(svn_dirent_dirname(path, scratch_pool), patterns))
+  if (matches_any(svn_dirent_dirname(path, scratch_pool), patterns,
+                  &scratch_buffer))
     SVN_ERR(report_dirent(root, path, kind, path_info_only,
                           receiver, receiver_baton, scratch_pool));
 
@@ -327,7 +332,7 @@ svn_repos_list(svn_fs_root_t *root,
     SVN_ERR(do_list(root, path, patterns, depth,
                     path_info_only, authz_read_func, authz_read_baton,
                     receiver, receiver_baton, cancel_func, cancel_baton,
-                    scratch_pool));
+                    &scratch_buffer, scratch_pool));
 
   return SVN_NO_ERROR;
 }

Modified: subversion/trunk/subversion/libsvn_subr/utf8proc.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/utf8proc.c?rev=1807056&r1=1807055&r2=1807056&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/utf8proc.c (original)
+++ subversion/trunk/subversion/libsvn_subr/utf8proc.c Sat Sep  2 15:12:03 2017
@@ -240,6 +240,38 @@ svn_utf__xfrm(const char **result,
   return SVN_NO_ERROR;
 }
 
+svn_boolean_t
+svn_utf__fuzzy_glob_match(const char *str,
+                          const apr_array_header_t *patterns,
+                          svn_membuf_t *buf)
+{
+  const char *normalized;
+  svn_error_t *err;
+  int i;
+
+  /* Try to normalize case and accents in STR.
+   *
+   * If that should fail for some reason, continue with the original STR.
+   * There is still a fair chance that it matches "*.ext" pattern despite
+   * being "broken" UTF8. */
+  err = svn_utf__xfrm(&normalized, str, strlen(str), TRUE, TRUE, buf);
+  if (err)
+    {
+      svn_error_clear(err);
+      normalized = str;
+    }
+
+  /* Now see whether it matches any/all of the patterns. */
+  for (i = 0; i < patterns->nelts; ++i)
+    {
+      const char *pattern = APR_ARRAY_IDX(patterns, i, const char *);
+      if (apr_fnmatch(pattern, normalized, 0) == APR_SUCCESS)
+        return TRUE;
+    }
+
+  return FALSE;
+}
+
 /* Decode a single UCS-4 code point to UTF-8, appending the result to BUFFER.
  * Assume BUFFER is already filled to *LENGTH and return the new size there.
  * This function does *not* nul-terminate the stringbuf!


Reply via email to