Following Daniel Shahaf's suggestion of splitting the "allow svnsync
to translate non-UTF-8 log messages to UTF-8" work into two patches, I
have revised my changes, prepared one of the two patches, and have
attached the one patch and suggested log message to this e-mail.
Adds a public API function, svn_subst_translate_string2(), an extension of
svn_subst_translate_string(), that has two, additional output parameters for
determining whether re-encoding and/or line ending translation were performed.
As discussed at:
http://thread.gmane.org/gmane.comp.version-control.subversion.user/100020
http://thread.gmane.org/gmane.comp.version-control.subversion.devel/122518
http://thread.gmane.org/gmane.comp.version-control.subversion.devel/122550
* subversion/include/svn_subst.h
(svn_subst_translate_string2): New function.
(svn_subst_translate_string): Deprecated in favor of
svn_subst_translate_string2().
* subversion/libsvn_subr/subst.c
(translate_newline): Added the new parameter EOL_TRANSLATED, the value at
which is set to TRUE if translate_newline() wrote out the line ending in
NEWLINE_BUF with a different line ending in EOL_STR.
(translation_baton): Added the TRANSLATED_EOL field.
(create_translation_baton): Added a new parameter TRANSLATED_EOL that is
passed to the resulting translation_baton.
(translate_chunk): Modified the three calls to translate_newline() to pass
the TRANSLATED_EOL pointer from the translation baton.
(stream_translated): New static function. Its implementation is the old
implementation of svn_subst_stream_translated(), but accepting another
parameter, TRANSLATED_EOL, that is passed to the in/out translation batons
that it creates.
(svn_subst_stream_translated): Now a wrapper for stream_translated().
(translate_cstring2): New static function. Its implementation is the old
implementation of svn_subst_translate_cstring2(), but modified to accept
another parameter, TRANSLATED_EOL, that is passed to stream_translated().
(svn_subst_translate_cstring2): Now a wrapper for translate_cstring2().
(svn_subst_translate_string2): New function. The task of recording whether
it translated a line ending is delegated to translate_cstring2().
Index: subversion/include/svn_subst.h
===================================================================
--- subversion/include/svn_subst.h (revision 1003341)
+++ subversion/include/svn_subst.h (working copy)
@@ -588,16 +588,41 @@ svn_subst_stream_detranslated(svn_stream_t **strea
/* EOL conversion and character encodings */
-/** Translate the data in @a value (assumed to be in encoded in charset
+/** @deprecated Provided for backward compatibility with the 1.6 API. Callers
+ * should use svn_subst_translate_string2().
+ *
+ * Translate the data in @a value (assumed to be encoded in charset
* @a encoding) to UTF8 and LF line-endings. If @a encoding is @c NULL,
* then assume that @a value is in the system-default language encoding.
* Return the translated data in @a *new_value, allocated in @a pool.
*/
+SVN_DEPRECATED
svn_error_t *svn_subst_translate_string(svn_string_t **new_value,
const svn_string_t *value,
const char *encoding,
apr_pool_t *pool);
+/** Translate the data in @a value (assumed to be encoded in charset
+ * @a encoding) to UTF8 and LF line-endings. If @a encoding is @c NULL,
+ * then assume that @a value is in the system-default character encoding.
+ * If @a translated_to_utf8 is not @c NULL, then
+ * @code *translated_to_utf8 @endcode is set to @c TRUE if at least one
+ * character of @a value in the source charset was translated to UTF-8; to
+ * @c FALSE otherwise. If @a translated_line_endings is not @c NULL, then
+ * @code *translated_line_endings @endcode is set to @c TRUE if at least one
+ * line ending was changed to LF; to @c FALSE otherwise. Return the translated
+ * data in @a *new_value, allocated in @a pool.
+ *
+ * @since New in 1.7
+ */
+svn_error_t *
+svn_subst_translate_string2(svn_string_t **new_value,
+ svn_boolean_t *translated_to_utf8,
+ svn_boolean_t *translated_line_endings,
+ const svn_string_t *value,
+ const char *encoding,
+ apr_pool_t *pool);
+
/** Translate the data in @a value from UTF8 and LF line-endings into
* native locale and native line-endings, or to the output locale if
* @a for_output is TRUE. Return the translated data in @a
Index: subversion/libsvn_subr/subst.c
===================================================================
--- subversion/libsvn_subr/subst.c (revision 1003341)
+++ subversion/libsvn_subr/subst.c (working copy)
@@ -628,7 +628,8 @@ translate_newline(const char *eol_str,
char *newline_buf,
apr_size_t newline_len,
svn_stream_t *dst,
- svn_boolean_t repair)
+ svn_boolean_t repair,
+ svn_boolean_t *eol_translated)
{
/* If this is the first newline we've seen, cache it
future comparisons, else compare it with our cache to
@@ -650,7 +651,13 @@ translate_newline(const char *eol_str,
*src_format_len = newline_len;
}
/* Translate the newline */
- return translate_write(dst, eol_str, eol_str_len);
+ svn_error_t *err = translate_write(dst, eol_str, eol_str_len);
+ if (eol_translated) {
+ if (newline_len != eol_str_len ||
+ strncmp(newline_buf, eol_str, newline_len))
+ *eol_translated = TRUE;
+ }
+ return err;
}
@@ -765,6 +772,7 @@ svn_subst_keywords_differ2(apr_hash_t *a,
struct translation_baton
{
const char *eol_str;
+ svn_boolean_t *translated_eol;
svn_boolean_t repair;
apr_hash_t *keywords;
svn_boolean_t expand;
@@ -805,6 +813,7 @@ struct translation_baton
*/
static struct translation_baton *
create_translation_baton(const char *eol_str,
+ svn_boolean_t *translated_eol,
svn_boolean_t repair,
apr_hash_t *keywords,
svn_boolean_t expand,
@@ -818,6 +827,7 @@ create_translation_baton(const char *eol_str,
b->eol_str = eol_str;
b->eol_str_len = eol_str ? strlen(eol_str) : 0;
+ b->translated_eol = translated_eol;
b->repair = repair;
b->keywords = keywords;
b->expand = expand;
@@ -883,7 +893,8 @@ translate_chunk(svn_stream_t *dst,
SVN_ERR(translate_newline(b->eol_str, b->eol_str_len,
b->src_format,
&b->src_format_len, b->newline_buf,
- b->newline_off, dst, b->repair));
+ b->newline_off, dst, b->repair,
+ b->translated_eol));
b->newline_off = 0;
}
@@ -976,7 +987,8 @@ translate_chunk(svn_stream_t *dst,
b->src_format,
&b->src_format_len,
b->newline_buf,
- b->newline_off, dst, b->repair));
+ b->newline_off, dst, b->repair,
+ b->translated_eol));
b->newline_off = 0;
break;
@@ -992,7 +1004,7 @@ translate_chunk(svn_stream_t *dst,
SVN_ERR(translate_newline(b->eol_str, b->eol_str_len,
b->src_format, &b->src_format_len,
b->newline_buf, b->newline_off,
- dst, b->repair));
+ dst, b->repair, b->translated_eol));
b->newline_off = 0;
}
@@ -1256,13 +1268,14 @@ svn_subst_read_specialfile(svn_stream_t **stream,
}
-svn_stream_t *
-svn_subst_stream_translated(svn_stream_t *stream,
- const char *eol_str,
- svn_boolean_t repair,
- apr_hash_t *keywords,
- svn_boolean_t expand,
- apr_pool_t *result_pool)
+static svn_stream_t *
+stream_translated(svn_stream_t *stream,
+ const char *eol_str,
+ svn_boolean_t *translated_eol,
+ svn_boolean_t repair,
+ apr_hash_t *keywords,
+ svn_boolean_t expand,
+ apr_pool_t *result_pool)
{
struct translated_stream_baton *baton
= apr_palloc(result_pool, sizeof(*baton));
@@ -1304,9 +1317,11 @@ svn_subst_read_specialfile(svn_stream_t **stream,
/* Setup the baton fields */
baton->stream = stream;
baton->in_baton
- = create_translation_baton(eol_str, repair, keywords, expand, result_pool);
+ = create_translation_baton(eol_str, translated_eol, repair, keywords,
+ expand, result_pool);
baton->out_baton
- = create_translation_baton(eol_str, repair, keywords, expand, result_pool);
+ = create_translation_baton(eol_str, translated_eol, repair, keywords,
+ expand, result_pool);
baton->written = FALSE;
baton->readbuf = svn_stringbuf_create("", result_pool);
baton->readbuf_off = 0;
@@ -1323,15 +1338,28 @@ svn_subst_read_specialfile(svn_stream_t **stream,
return s;
}
+svn_stream_t *
+svn_subst_stream_translated(svn_stream_t *stream,
+ const char *eol_str,
+ svn_boolean_t repair,
+ apr_hash_t *keywords,
+ svn_boolean_t expand,
+ apr_pool_t *result_pool)
+{
+ return stream_translated(stream, eol_str, NULL, repair, keywords, expand,
+ result_pool);
+}
-svn_error_t *
-svn_subst_translate_cstring2(const char *src,
- const char **dst,
- const char *eol_str,
- svn_boolean_t repair,
- apr_hash_t *keywords,
- svn_boolean_t expand,
- apr_pool_t *pool)
+
+static svn_error_t *
+translate_cstring2(const char **dst,
+ svn_boolean_t *translated_eol,
+ const char *src,
+ const char *eol_str,
+ svn_boolean_t repair,
+ apr_hash_t *keywords,
+ svn_boolean_t expand,
+ apr_pool_t *pool)
{
svn_stringbuf_t *dst_stringbuf;
svn_stream_t *dst_stream;
@@ -1348,9 +1376,12 @@ svn_subst_read_specialfile(svn_stream_t **stream,
dst_stringbuf = svn_stringbuf_create("", pool);
dst_stream = svn_stream_from_stringbuf(dst_stringbuf, pool);
+ if (translated_eol)
+ *translated_eol = FALSE;
+
/* Another wrapper to translate the content. */
- dst_stream = svn_subst_stream_translated(dst_stream, eol_str, repair,
- keywords, expand, pool);
+ dst_stream = stream_translated(dst_stream, eol_str, translated_eol, repair,
+ keywords, expand, pool);
/* Jam the text into the destination stream (to translate it). */
SVN_ERR(svn_stream_write(dst_stream, src, &len));
@@ -1362,6 +1393,19 @@ svn_subst_read_specialfile(svn_stream_t **stream,
return SVN_NO_ERROR;
}
+svn_error_t *
+svn_subst_translate_cstring2(const char *src,
+ const char **dst,
+ const char *eol_str,
+ svn_boolean_t repair,
+ apr_hash_t *keywords,
+ svn_boolean_t expand,
+ apr_pool_t *pool)
+{
+ return translate_cstring2(dst, NULL, src, eol_str, repair, keywords, expand,
+ pool);
+}
+
/* Given a special file at SRC, generate a textual representation of
it in a normal file at DST. Perform all allocations in POOL. */
/* ### this should be folded into svn_subst_copy_and_translate3 */
@@ -1679,6 +1723,19 @@ svn_subst_translate_string(svn_string_t **new_valu
const char *encoding,
apr_pool_t *pool)
{
+ return svn_subst_translate_string2(new_value, NULL, NULL, value, encoding,
+ pool);
+}
+
+
+svn_error_t *
+svn_subst_translate_string2(svn_string_t **new_value,
+ svn_boolean_t *translated_to_utf8,
+ svn_boolean_t *translated_line_endings,
+ const svn_string_t *value,
+ const char *encoding,
+ apr_pool_t *pool)
+{
const char *val_utf8;
const char *val_utf8_lf;
apr_pool_t *scratch_pool = svn_pool_create(pool);
@@ -1699,14 +1756,20 @@ svn_subst_translate_string(svn_string_t **new_valu
SVN_ERR(svn_utf_cstring_to_utf8(&val_utf8, value->data, scratch_pool));
}
- SVN_ERR(svn_subst_translate_cstring2(val_utf8,
- &val_utf8_lf,
- "\n", /* translate to LF */
- FALSE, /* no repair */
- NULL, /* no keywords */
- FALSE, /* no expansion */
- scratch_pool));
+ if (translated_to_utf8)
+ *translated_to_utf8 = strcmp(value->data, val_utf8)
+ ? /* not equal */ TRUE
+ : FALSE;
+ SVN_ERR(translate_cstring2(&val_utf8_lf,
+ translated_line_endings,
+ val_utf8,
+ "\n", /* translate to LF */
+ FALSE, /* no repair */
+ NULL, /* no keywords */
+ FALSE, /* no expansion */
+ scratch_pool));
+
*new_value = svn_string_create(val_utf8_lf, pool);
svn_pool_destroy(scratch_pool);
return SVN_NO_ERROR;