Author: stefan2
Date: Sat Sep 2 15:12:03 2017
New Revision: 1807056
URL: http://svn.apache.org/viewvc?rev=1807056&view=rev
Log:
Make the 'svn ls --search' implementation actually case- and accent-
insensitive. The option handling of '--search' already assumes it
but the implementation did not follow up.
That's easily done by using the same generic UTF8-based normalization
to the path segments as 'svn' applies to the patterns. To reduce code
duplication, move the normalization and matching function to lib_subr.
And because there isn't really a better place to put it, add it to
utf8proc.
* subversion/include/private/svn_utf_private.h
(svn_utf__fuzzy_glob_match): Declare new private API.
* subversion/libsvn_subr/utf8proc.c
(svn_utf__fuzzy_glob_match): Implement the function, code mostly taken
from list.c.
* subversion/libsvn_client/list.c
(match_patterns): Forward to the new matching function.
(get_dir_contents): Add SCRATCH_BUFFER as a pass-through argument.
(list_internal): Provide the SCRATCH_BUFFER.
* subversion/libsvn_repos/list.c
(matches_any,
do_list,
svn_repos_list): Mirror the changes from the client side above.
Modified:
subversion/trunk/subversion/include/private/svn_utf_private.h
subversion/trunk/subversion/libsvn_client/list.c
subversion/trunk/subversion/libsvn_repos/list.c
subversion/trunk/subversion/libsvn_subr/utf8proc.c
Modified: subversion/trunk/subversion/include/private/svn_utf_private.h
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/include/private/svn_utf_private.h?rev=1807056&r1=1807055&r2=1807056&view=diff
==============================================================================
--- subversion/trunk/subversion/include/private/svn_utf_private.h (original)
+++ subversion/trunk/subversion/include/private/svn_utf_private.h Sat Sep 2
15:12:03 2017
@@ -172,6 +172,18 @@ svn_utf__xfrm(const char **result,
svn_boolean_t accent_insensitive,
svn_membuf_t *buf);
+/* Return TRUE if S matches any of the const char * glob patterns in
+ * PATTERNS.
+ *
+ * S will internally be normalized to lower-case and accents removed
+ * using svn_utf__xfrm. To get a match, the PATTERNS must have been
+ * normalized accordingly before calling this function.
+ */
+svn_boolean_t
+svn_utf__fuzzy_glob_match(const char *str,
+ const apr_array_header_t *patterns,
+ svn_membuf_t *buf);
+
/* Check if STRING is a valid, NFC-normalized UTF-8 string. Note that
* a FALSE return value may indicate that STRING is not valid UTF-8 at
* all.
Modified: subversion/trunk/subversion/libsvn_client/list.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_client/list.c?rev=1807056&r1=1807055&r2=1807056&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_client/list.c (original)
+++ subversion/trunk/subversion/libsvn_client/list.c Sat Sep 2 15:12:03 2017
@@ -37,6 +37,7 @@
#include "private/svn_fspath.h"
#include "private/svn_ra_private.h"
#include "private/svn_sorts_private.h"
+#include "private/svn_utf_private.h"
#include "private/svn_wc_private.h"
#include "svn_private_config.h"
@@ -69,23 +70,16 @@ list_internal(const char *path_or_url,
apr_pool_t *pool);
/* Return TRUE if S matches any of the const char * in PATTERNS.
- * Note that any S will match if PATTERNS is empty. */
+ * Note that any S will match if PATTERNS is empty.
+ * Use SCRATCH_BUFFER for temporary string contents. */
static svn_boolean_t
match_patterns(const char *s,
- const apr_array_header_t *patterns)
+ const apr_array_header_t *patterns,
+ svn_membuf_t *scratch_buffer)
{
- int i;
- if (!patterns)
- return TRUE;
-
- for (i = 0; i < patterns->nelts; ++i)
- {
- const char *pattern = APR_ARRAY_IDX(patterns, i, const char *);
- if (apr_fnmatch(pattern, s, APR_FNM_PERIOD) == APR_SUCCESS)
- return TRUE;
- }
-
- return FALSE;
+ return patterns
+ ? svn_utf__fuzzy_glob_match(s, patterns, scratch_buffer)
+ : TRUE;
}
/* Get the directory entries of DIR at REV (relative to the root of
@@ -113,6 +107,8 @@ match_patterns(const char *s,
EXTERNAL_PARENT_URL and EXTERNAL_TARGET are set when external items
are listed, otherwise both are set to NULL by the caller.
+
+ Use SCRATCH_BUFFER for temporary string contents.
*/
static svn_error_t *
get_dir_contents(apr_uint32_t dirent_fields,
@@ -129,6 +125,7 @@ get_dir_contents(apr_uint32_t dirent_fie
const char *external_target,
svn_client_list_func2_t list_func,
void *baton,
+ svn_membuf_t *scratch_buffer,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
@@ -203,7 +200,7 @@ get_dir_contents(apr_uint32_t dirent_fie
if (the_ent->kind == svn_node_file
|| depth == svn_depth_immediates
|| depth == svn_depth_infinity)
- if (match_patterns(item->key, patterns))
+ if (match_patterns(item->key, patterns, scratch_buffer))
SVN_ERR(list_func(baton, path, the_ent, lock, fs_path,
external_parent_url, external_target, iterpool));
@@ -214,7 +211,7 @@ get_dir_contents(apr_uint32_t dirent_fie
locks, fs_path, patterns, depth, ctx,
externals, external_parent_url,
external_target, list_func, baton,
- result_pool, iterpool));
+ scratch_buffer, result_pool, iterpool));
}
svn_pool_destroy(iterpool);
@@ -329,6 +326,7 @@ list_internal(const char *path_or_url,
svn_error_t *err;
apr_hash_t *locks;
apr_hash_t *externals;
+ svn_membuf_t scratch_buffer;
if (include_externals)
externals = apr_hash_make(pool);
@@ -391,8 +389,13 @@ list_internal(const char *path_or_url,
_("URL '%s' non-existent in revision %ld"),
loc->url, loc->rev);
+ /* We need a scratch buffer for temporary string data.
+ * Create one with a reasonable initial size. */
+ svn_membuf__create(&scratch_buffer, 256, pool);
+
/* Report the dirent for the target. */
- if (match_patterns(svn_dirent_dirname(fs_path, pool), patterns))
+ if (match_patterns(svn_dirent_dirname(fs_path, pool), patterns,
+ &scratch_buffer))
SVN_ERR(list_func(baton, "", dirent, locks
? (svn_hash_gets(locks, fs_path))
: NULL, fs_path, external_parent_url,
@@ -405,7 +408,7 @@ list_internal(const char *path_or_url,
SVN_ERR(get_dir_contents(dirent_fields, "", loc->rev, ra_session, locks,
fs_path, patterns, depth, ctx, externals,
external_parent_url, external_target, list_func,
- baton, pool, pool));
+ baton, &scratch_buffer, pool, pool));
/* We handle externals after listing entries under path_or_url, so that
handling external items (and any errors therefrom) doesn't delay
Modified: subversion/trunk/subversion/libsvn_repos/list.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_repos/list.c?rev=1807056&r1=1807055&r2=1807056&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_repos/list.c (original)
+++ subversion/trunk/subversion/libsvn_repos/list.c Sat Sep 2 15:12:03 2017
@@ -30,6 +30,7 @@
#include "private/svn_repos_private.h"
#include "private/svn_sorts_private.h"
+#include "private/svn_utf_private.h"
#include "svn_private_config.h" /* for SVN_TEMPLATE_ROOT_DIR */
#include "repos.h"
@@ -91,23 +92,16 @@ svn_repos_stat(svn_dirent_t **dirent,
}
/* Return TRUE of DIRNAME matches any of the const char * in PATTERNS.
- * Note that any DIRNAME will match if PATTERNS is empty. */
+ * Note that any DIRNAME will match if PATTERNS is empty.
+ * Use SCRATCH_BUFFER for temporary string contents. */
static svn_boolean_t
matches_any(const char *dirname,
- const apr_array_header_t *patterns)
+ const apr_array_header_t *patterns,
+ svn_membuf_t *scratch_buffer)
{
- int i;
- if (!patterns)
- return TRUE;
-
- for (i = 0; i < patterns->nelts; ++i)
- {
- const char *pattern = APR_ARRAY_IDX(patterns, i, const char *);
- if (apr_fnmatch(pattern, dirname, APR_FNM_PERIOD) == APR_SUCCESS)
- return TRUE;
- }
-
- return FALSE;
+ return patterns
+ ? svn_utf__fuzzy_glob_match(dirname, patterns, scratch_buffer)
+ : TRUE;
}
/* Utility to prevent code duplication.
@@ -166,6 +160,8 @@ compare_filtered_dirent(const void *lhs,
*
* However, DEPTH is not svn_depth_empty and PATH has already been reported.
* Therefore, we can call this recursively.
+ *
+ * Uses SCRATCH_BUFFER for temporary string contents.
*/
static svn_error_t *
do_list(svn_fs_root_t *root,
@@ -179,6 +175,7 @@ do_list(svn_fs_root_t *root,
void *receiver_baton,
svn_cancel_func_t cancel_func,
void *cancel_baton,
+ svn_membuf_t *scratch_buffer,
apr_pool_t *scratch_pool)
{
apr_hash_t *entries;
@@ -210,7 +207,8 @@ do_list(svn_fs_root_t *root,
continue;
/* We can skip files that don't match any of the search patterns. */
- filtered.is_match = matches_any(filtered.dirent->name, patterns);
+ filtered.is_match = matches_any(filtered.dirent->name, patterns,
+ scratch_buffer);
if (!filtered.is_match && filtered.dirent->kind == svn_node_file)
continue;
@@ -258,7 +256,7 @@ do_list(svn_fs_root_t *root,
SVN_ERR(do_list(root, sub_path, patterns, svn_depth_infinity,
path_info_only, authz_read_func, authz_read_baton,
receiver, receiver_baton, cancel_func,
- cancel_baton, iterpool));
+ cancel_baton, scratch_buffer, iterpool));
}
svn_pool_destroy(iterpool);
@@ -280,6 +278,8 @@ svn_repos_list(svn_fs_root_t *root,
void *cancel_baton,
apr_pool_t *scratch_pool)
{
+ svn_membuf_t scratch_buffer;
+
/* Parameter check. */
svn_node_kind_t kind;
if (depth < svn_depth_empty)
@@ -317,8 +317,13 @@ svn_repos_list(svn_fs_root_t *root,
if (patterns && patterns->nelts == 0)
return SVN_NO_ERROR;
+ /* We need a scratch buffer for temporary string data.
+ * Create one with a reasonable initial size. */
+ svn_membuf__create(&scratch_buffer, 256, scratch_pool);
+
/* Actually report PATH, if it passes the filters. */
- if (matches_any(svn_dirent_dirname(path, scratch_pool), patterns))
+ if (matches_any(svn_dirent_dirname(path, scratch_pool), patterns,
+ &scratch_buffer))
SVN_ERR(report_dirent(root, path, kind, path_info_only,
receiver, receiver_baton, scratch_pool));
@@ -327,7 +332,7 @@ svn_repos_list(svn_fs_root_t *root,
SVN_ERR(do_list(root, path, patterns, depth,
path_info_only, authz_read_func, authz_read_baton,
receiver, receiver_baton, cancel_func, cancel_baton,
- scratch_pool));
+ &scratch_buffer, scratch_pool));
return SVN_NO_ERROR;
}
Modified: subversion/trunk/subversion/libsvn_subr/utf8proc.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/utf8proc.c?rev=1807056&r1=1807055&r2=1807056&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/utf8proc.c (original)
+++ subversion/trunk/subversion/libsvn_subr/utf8proc.c Sat Sep 2 15:12:03 2017
@@ -240,6 +240,38 @@ svn_utf__xfrm(const char **result,
return SVN_NO_ERROR;
}
+svn_boolean_t
+svn_utf__fuzzy_glob_match(const char *str,
+ const apr_array_header_t *patterns,
+ svn_membuf_t *buf)
+{
+ const char *normalized;
+ svn_error_t *err;
+ int i;
+
+ /* Try to normalize case and accents in STR.
+ *
+ * If that should fail for some reason, continue with the original STR.
+ * There is still a fair chance that it matches "*.ext" pattern despite
+ * being "broken" UTF8. */
+ err = svn_utf__xfrm(&normalized, str, strlen(str), TRUE, TRUE, buf);
+ if (err)
+ {
+ svn_error_clear(err);
+ normalized = str;
+ }
+
+ /* Now see whether it matches any/all of the patterns. */
+ for (i = 0; i < patterns->nelts; ++i)
+ {
+ const char *pattern = APR_ARRAY_IDX(patterns, i, const char *);
+ if (apr_fnmatch(pattern, normalized, 0) == APR_SUCCESS)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
/* Decode a single UCS-4 code point to UTF-8, appending the result to BUFFER.
* Assume BUFFER is already filled to *LENGTH and return the new size there.
* This function does *not* nul-terminate the stringbuf!