Author: stsp
Date: Wed Feb  1 16:58:50 2012
New Revision: 1239203

URL: http://svn.apache.org/viewvc?rev=1239203&view=rev
Log:
For issue #2487, "mod_dav_svn and locales fail to play nicely together",
add a new configuration option which allows Subversion libraries called
from mod_dav_svn to process UTF-8 strings, instead of just ASCII strings.

Because httpd is not locale-aware, we cannot assume that a native character
set other than ASCII is present in the environment that mod_dav_svn runs in.
UTF-8 is backward compatible to ASCII, and all paths within Subversion
repositories are already encoded in UTF-8. Thus, we can safely allow UTF-8
without running into character set conversion issues in code called from
mod_dav_svn.

Basic testing shows that httpd is prepared to tolerate non-ASCII characters,
even though it doesn't seem to interpret them. E.g. it logs such characters
as hex codes preceded by a backslash.

The new configuration option is called SVNUseUTF8 and can be set to
"On" or "Off" (the default is "Off"). It is a global option that takes
effect for all repository locations defined in the configuration file.

When the option is "On", files that contain UTF-8 characters in their
names can be processed by the pre-lock hook, hooks are able to write
UTF-8 characters to stdout and stderr, and filesystem paths to Subversion
repositories may contain UTF-8 characters.

* subversion/mod_dav_svn/mod_dav_svn.c
  (server_conf_t): Add use_utf8 boolean.
  (init): Initialise the native character set of Subversion's UTF-8
   conversion routines to either ASCII (default) or UTF-8.
  (SVNUseUTF8_cmd): New option command processor.
  (cmds): Add new "SVNUseUTF8" option.

* subversion/include/svn_utf.h
  (svn_utf_initialize2): Declare. Includes the ablity to force the native
   character set to UTF-8, regardless of locale.
  (svn_utf_initialize): Deprecate.

* subversion/libsvn_subr/deprecated.c
  (svn_utf_initialize): Re-implement as wrapper around svn_utf_initialize2().

* subversion/libsvn_subr/cmdline.c
  (svn_cmdline_init): Update funtion call to svn_utf_initialize2(), keeping
   the native character set based on the current locale.

* subversion/libsvn_subr/utf.c
  (assume_native_charset_is_utf8): New global boolean variable. Defaults
   to FALSE, and may be set to TRUE by svn_utf_initialize2() (which, like
   its predecessor, is documented to not be thread-safe).
  (get_ntou_xlate_handle_node, get_uton_xlate_handle_node): If the native
   character set has been forced to UTF-8, use SVN_APR_UTF8_CHARSET instead
   of SVN_APR_LOCALE_CHARSET.

Modified:
    subversion/trunk/subversion/include/svn_utf.h
    subversion/trunk/subversion/libsvn_subr/cmdline.c
    subversion/trunk/subversion/libsvn_subr/deprecated.c
    subversion/trunk/subversion/libsvn_subr/utf.c
    subversion/trunk/subversion/mod_dav_svn/mod_dav_svn.c

Modified: subversion/trunk/subversion/include/svn_utf.h
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/include/svn_utf.h?rev=1239203&r1=1239202&r2=1239203&view=diff
==============================================================================
--- subversion/trunk/subversion/include/svn_utf.h (original)
+++ subversion/trunk/subversion/include/svn_utf.h Wed Feb  1 16:58:50 2012
@@ -49,12 +49,28 @@ extern "C" {
  * Initialize the UTF-8 encoding/decoding routines.
  * Allocate cached translation handles in a subpool of @a pool.
  *
+ * If @a assume_native_utf8 is TRUE, the native character set is
+ * assumed to be UTF-8, i.e. conversion is a no-op. This is useful
+ * in contexts where the native character set is ASCII but UTF-8
+ * should be used regardless (e.g. for mod_dav_svn which runs within
+ * httpd and always uses the "C" locale).
+ *
  * @note It is optional to call this function, but if it is used, no other
  * svn function may be in use in other threads during the call of this
  * function or when @a pool is cleared or destroyed.
  * Initializing the UTF-8 routines will improve performance.
  *
- * @since New in 1.1.
+ * @since New in 1.8.
+ */
+void
+svn_utf_initialize2(apr_pool_t *pool,
+                    svn_boolean_t assume_native_utf8);
+
+/**
+ * Like svn_utf_initialize but without the ability to force the
+ * native encoding to UTF-8.
+ *
+ * @deprecated Provided for backward compatibility with the 1.7 API.
  */
 void
 svn_utf_initialize(apr_pool_t *pool);

Modified: subversion/trunk/subversion/libsvn_subr/cmdline.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/cmdline.c?rev=1239203&r1=1239202&r2=1239203&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/cmdline.c (original)
+++ subversion/trunk/subversion/libsvn_subr/cmdline.c Wed Feb  1 16:58:50 2012
@@ -226,7 +226,7 @@ svn_cmdline_init(const char *progname, F
   /* Create a pool for use by the UTF-8 routines.  It will be cleaned
      up by APR at exit time. */
   pool = svn_pool_create(NULL);
-  svn_utf_initialize(pool);
+  svn_utf_initialize2(pool, FALSE);
 
   if ((err = svn_nls_init()))
     {

Modified: subversion/trunk/subversion/libsvn_subr/deprecated.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/deprecated.c?rev=1239203&r1=1239202&r2=1239203&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/deprecated.c (original)
+++ subversion/trunk/subversion/libsvn_subr/deprecated.c Wed Feb  1 16:58:50 
2012
@@ -39,6 +39,7 @@
 #include "svn_pools.h"
 #include "svn_dso.h"
 #include "svn_mergeinfo.h"
+#include "svn_utf.h"
 #include "svn_xml.h"
 
 #include "opt.h"
@@ -1173,3 +1174,9 @@ svn_xml_make_header(svn_stringbuf_t **st
 {
   svn_xml_make_header2(str, NULL, pool);
 }
+
+void
+svn_utf_initialize(apr_pool_t *pool)
+{
+  svn_utf_initialize2(pool, FALSE);
+}

Modified: subversion/trunk/subversion/libsvn_subr/utf.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/utf.c?rev=1239203&r1=1239202&r2=1239203&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/utf.c (original)
+++ subversion/trunk/subversion/libsvn_subr/utf.c Wed Feb  1 16:58:50 2012
@@ -23,6 +23,7 @@
 
 
 
+#include <stdlib.h>
 #include <string.h>
 #include <assert.h>
 
@@ -55,6 +56,7 @@ static const char *SVN_UTF_UTON_XLATE_HA
 static const char *SVN_APR_UTF8_CHARSET = "UTF-8";
 
 static svn_mutex__t *xlate_handle_mutex = NULL;
+static svn_boolean_t assume_native_charset_is_utf8 = FALSE;
 
 /* The xlate handle cache is a global hash table with linked lists of xlate
  * handles.  In multi-threaded environments, a thread "borrows" an xlate
@@ -118,7 +120,8 @@ xlate_handle_node_cleanup(void *arg)
 }
 
 void
-svn_utf_initialize(apr_pool_t *pool)
+svn_utf_initialize2(apr_pool_t *pool,
+                    svn_boolean_t assume_native_utf8)
 {
   if (!xlate_handle_hash)
     {
@@ -141,6 +144,9 @@ svn_utf_initialize(apr_pool_t *pool)
       apr_pool_cleanup_register(subpool, NULL, xlate_cleanup,
                                 apr_pool_cleanup_null);
     }
+
+    if (!assume_native_charset_is_utf8)
+      assume_native_charset_is_utf8 = assume_native_utf8;
 }
 
 /* Return a unique string key based on TOPAGE and FROMPAGE.  TOPAGE and
@@ -442,7 +448,9 @@ static svn_error_t *
 get_ntou_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool)
 {
   return get_xlate_handle_node(ret, SVN_APR_UTF8_CHARSET,
-                               SVN_APR_LOCALE_CHARSET,
+                               assume_native_charset_is_utf8
+                                 ? SVN_APR_UTF8_CHARSET
+                                 : SVN_APR_LOCALE_CHARSET,
                                SVN_UTF_NTOU_XLATE_HANDLE, pool);
 }
 
@@ -455,7 +463,10 @@ get_ntou_xlate_handle_node(xlate_handle_
 static svn_error_t *
 get_uton_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool)
 {
-  return get_xlate_handle_node(ret, SVN_APR_LOCALE_CHARSET,
+  return get_xlate_handle_node(ret,
+                               assume_native_charset_is_utf8
+                                 ? SVN_APR_UTF8_CHARSET
+                                 : SVN_APR_LOCALE_CHARSET,
                                SVN_APR_UTF8_CHARSET,
                                SVN_UTF_UTON_XLATE_HANDLE, pool);
 }

Modified: subversion/trunk/subversion/mod_dav_svn/mod_dav_svn.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/mod_dav_svn/mod_dav_svn.c?rev=1239203&r1=1239202&r2=1239203&view=diff
==============================================================================
--- subversion/trunk/subversion/mod_dav_svn/mod_dav_svn.c (original)
+++ subversion/trunk/subversion/mod_dav_svn/mod_dav_svn.c Wed Feb  1 16:58:50 
2012
@@ -22,6 +22,8 @@
  * ====================================================================
  */
 
+#include <stdlib.h>
+
 #include <apr_strings.h>
 
 #include <httpd.h>
@@ -55,6 +57,7 @@
 /* per-server configuration */
 typedef struct server_conf_t {
   const char *special_uri;
+  svn_boolean_t use_utf8;
 } server_conf_t;
 
 
@@ -111,6 +114,8 @@ static int
 init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *s)
 {
   svn_error_t *serr;
+  server_conf_t *conf;
+
   ap_add_version_component(p, "SVN/" SVN_VER_NUMBER);
 
   serr = svn_fs_initialize(p);
@@ -123,7 +128,8 @@ init(apr_pool_t *p, apr_pool_t *plog, ap
     }
 
   /* This returns void, so we can't check for error. */
-  svn_utf_initialize(p);
+  conf = ap_get_module_config(s->module_config, &dav_svn_module);
+  svn_utf_initialize2(p, conf->use_utf8);
 
   return OK;
 }
@@ -510,6 +516,18 @@ SVNCompressionLevel_cmd(cmd_parms *cmd, 
   return NULL;
 }
 
+static const char *
+SVNUseUTF8_cmd(cmd_parms *cmd, void *config, int arg)
+{
+  server_conf_t *conf;
+
+  conf = ap_get_module_config(cmd->server->module_config,
+                              &dav_svn_module);
+  conf->use_utf8 = arg;
+
+  return NULL;
+}
+
 
 /** Accessor functions for the module's configuration state **/
 
@@ -1021,6 +1039,11 @@ static const command_rec cmds[] =
                 "content over the network (0 for no compression, 9 for "
                 "maximum, 5 is default)."),
 
+  /* per server */
+  AP_INIT_FLAG("SVNUseUTF8",
+               SVNUseUTF8_cmd, NULL,
+               RSRC_CONF,
+               "use UTF-8 as native character encoding (default is ASCII)."),
   { NULL }
 };
 


Reply via email to