Re: [Spice-devel] [PATCH spice-gtk 3/4] util: add unix2dos and dos2unix

2013-08-24 Thread Hans de Goede

Hi,

On 08/23/2013 10:25 PM, Marc-André Lureau wrote:

Convert line endings from/to LF/CRLF, in utf8.
---
  gtk/spice-util-priv.h |   2 +
  gtk/spice-util.c  | 122 ++
  2 files changed, 124 insertions(+)

diff --git a/gtk/spice-util-priv.h b/gtk/spice-util-priv.h
index ee5a42d..cc559dc 100644
--- a/gtk/spice-util-priv.h
+++ b/gtk/spice-util-priv.h
@@ -29,6 +29,8 @@ gboolean spice_strv_contains(const GStrv strv, const gchar 
*str);
  gchar* spice_uuid_to_string(const guint8 uuid[16]);
  const gchar* spice_yes_no(gboolean value);
  guint16 spice_make_scancode(guint scancode, gboolean release);
+gchar* spice_unix2dos(const gchar *str, gssize len, GError **error);
+gchar* spice_dos2unix(const gchar *str, gssize len, GError **error);

  #if GLIB_CHECK_VERSION(2,32,0)
  #define STATIC_MUTEXGMutex
diff --git a/gtk/spice-util.c b/gtk/spice-util.c
index 774a145..be10edc 100644
--- a/gtk/spice-util.c
+++ b/gtk/spice-util.c
@@ -19,6 +19,7 @@
  #ifdef HAVE_CONFIG_H
  # include config.h
  #endif
+
  #include stdlib.h
  #include string.h
  #include glib-object.h
@@ -245,3 +246,124 @@ guint16 spice_make_scancode(guint scancode, gboolean 
release)

  g_return_val_if_reached(0);
  }
+
+typedef enum {
+NEWLINE_TYPE_LF,
+NEWLINE_TYPE_CR_LF
+} NewlineType;
+
+static gssize get_line(const gchar *str, gsize len,
+   NewlineType type, gsize *nl_len,
+   GError **error)
+{
+const gchar *p = str;
+gsize nl = 0;
+
+if (type == NEWLINE_TYPE_CR_LF) {
+while ((p - str)  len) {
+p = g_utf8_strchr(p, len, '\r');
+if (!p)
+break;
+p = g_utf8_next_char(p);
+if (g_utf8_get_char(p) == '\n') {
+len = (p - str) - 1;
+nl = 2;
+break;
+}
+}
+} else {
+p = g_utf8_strchr(str, len, '\n');
+if (p) {
+len = p - str;
+nl = 1;
+}
+}


This looks way more complicated then it needs to be, in UTF-8
0x00 - 0x7f only are valid as a single-byte sequence. multi-byte
encoded characters will never contain 0x00 - 0x7f. UTF-8 was designed
this way, is so that existing string parsing code for non multi-byte
encodings, which make look for example for '  = or LF characters does
not break when parsing strings with multi-byte characters in there.

TL;DR: LF and CR will never be part of a multi byte character, so
you can simple do: strstr(str, \r\n) to find the CRLF.



+
+if (!g_utf8_validate(str, len, NULL)) {
+g_set_error_literal(error, G_CONVERT_ERROR,
+G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+Invalid byte sequence in conversion input);
+return -1;
+}


And once you simply treat this as a regular C-string without worrying
about multi-byte encodings you can also drop this.


+
+*nl_len = nl;
+return len;
+}
+
+
+static gchar* spice_convert_newlines(const gchar *str, gssize len,
+ NewlineType from,
+ NewlineType to,
+ GError **error)
+{
+GError *err = NULL;
+gssize length;
+gsize nl;
+GString *output;
+gboolean free_segment = FALSE;
+gint i;
+
+g_return_val_if_fail(str != NULL, NULL);
+g_return_val_if_fail(len = -1, NULL);
+g_return_val_if_fail(error == NULL || *error == NULL, NULL);
+/* only 2 supported combinations */
+g_return_val_if_fail((from == NEWLINE_TYPE_LF 
+  to == NEWLINE_TYPE_CR_LF) ||
+ (from == NEWLINE_TYPE_CR_LF 
+  to == NEWLINE_TYPE_LF), NULL);
+
+if (len == -1)
+len = strlen(str);
+/* sometime we get \0 terminated strings, skip that, or it fails
+   to utf8 validate line with \0 end */
+else if (str[len] == 0)
+len -= 1;
+
+/* allocate worst case, if it's small enough, we don't care much,
+ * if it's big, malloc will put us in mmap'd region, and we can
+ * over allocate.
+ */
+output = g_string_sized_new(len * 2 + 1);
+
+for (i = 0; i  len; i += length + nl) {
+length = get_line(str + i, len - i, from, nl, error);
+if (length  0)
+break;
+
+g_string_append_len(output, str + i, length);
+
+if (nl) {
+/* let's not double \r if it's already in the line */
+if (to == NEWLINE_TYPE_CR_LF 
+output-str[output-len - 1] != '\r')
+g_string_append_c(output, '\r');
+
+g_string_append_c(output, '\n');
+}
+}
+
+if (err) {
+g_propagate_error(error, err);
+free_segment = TRUE;
+}
+
+return g_string_free(output, free_segment);
+}
+
+G_GNUC_INTERNAL
+gchar* spice_dos2unix(const gchar *str, gssize len, GError **error)
+{
+return 

Re: [Spice-devel] [PATCH spice-gtk 3/4] util: add unix2dos and dos2unix

2013-08-24 Thread Marc-André Lureau
On Sat, Aug 24, 2013 at 12:20 PM, Hans de Goede hdego...@redhat.com wrote:
 Hi,


 On 08/23/2013 10:25 PM, Marc-André Lureau wrote:

 Convert line endings from/to LF/CRLF, in utf8.
 ---
   gtk/spice-util-priv.h |   2 +
   gtk/spice-util.c  | 122
 ++
   2 files changed, 124 insertions(+)

 diff --git a/gtk/spice-util-priv.h b/gtk/spice-util-priv.h
 index ee5a42d..cc559dc 100644
 --- a/gtk/spice-util-priv.h
 +++ b/gtk/spice-util-priv.h
 @@ -29,6 +29,8 @@ gboolean spice_strv_contains(const GStrv strv, const
 gchar *str);
   gchar* spice_uuid_to_string(const guint8 uuid[16]);
   const gchar* spice_yes_no(gboolean value);
   guint16 spice_make_scancode(guint scancode, gboolean release);
 +gchar* spice_unix2dos(const gchar *str, gssize len, GError **error);
 +gchar* spice_dos2unix(const gchar *str, gssize len, GError **error);

   #if GLIB_CHECK_VERSION(2,32,0)
   #define STATIC_MUTEXGMutex
 diff --git a/gtk/spice-util.c b/gtk/spice-util.c
 index 774a145..be10edc 100644
 --- a/gtk/spice-util.c
 +++ b/gtk/spice-util.c
 @@ -19,6 +19,7 @@
   #ifdef HAVE_CONFIG_H
   # include config.h
   #endif
 +
   #include stdlib.h
   #include string.h
   #include glib-object.h
 @@ -245,3 +246,124 @@ guint16 spice_make_scancode(guint scancode, gboolean
 release)

   g_return_val_if_reached(0);
   }
 +
 +typedef enum {
 +NEWLINE_TYPE_LF,
 +NEWLINE_TYPE_CR_LF
 +} NewlineType;
 +
 +static gssize get_line(const gchar *str, gsize len,
 +   NewlineType type, gsize *nl_len,
 +   GError **error)
 +{
 +const gchar *p = str;
 +gsize nl = 0;
 +
 +if (type == NEWLINE_TYPE_CR_LF) {
 +while ((p - str)  len) {
 +p = g_utf8_strchr(p, len, '\r');
 +if (!p)
 +break;
 +p = g_utf8_next_char(p);
 +if (g_utf8_get_char(p) == '\n') {
 +len = (p - str) - 1;
 +nl = 2;
 +break;
 +}
 +}
 +} else {
 +p = g_utf8_strchr(str, len, '\n');
 +if (p) {
 +len = p - str;
 +nl = 1;
 +}
 +}


 This looks way more complicated then it needs to be, in UTF-8
 0x00 - 0x7f only are valid as a single-byte sequence. multi-byte
 encoded characters will never contain 0x00 - 0x7f. UTF-8 was designed
 this way, is so that existing string parsing code for non multi-byte
 encodings, which make look for example for '  = or LF characters does
 not break when parsing strings with multi-byte characters in there.

 TL;DR: LF and CR will never be part of a multi byte character, so
 you can simple do: strstr(str, \r\n) to find the CRLF.

g_utf8_strchr is implemented using a regular strstr. Speed shouldn't
be different here. I prefer to use utf8 functions on utf8 strings.



 +
 +if (!g_utf8_validate(str, len, NULL)) {
 +g_set_error_literal(error, G_CONVERT_ERROR,
 +G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
 +Invalid byte sequence in conversion input);
 +return -1;
 +}


 And once you simply treat this as a regular C-string without worrying
 about multi-byte encodings you can also drop this.

Actually, during implementation, I have encountered/produced invalid
utf8 that will break later on in gtk+, so I prefer to validate the
production.


 +
 +*nl_len = nl;
 +return len;
 +}
 +
 +
 +static gchar* spice_convert_newlines(const gchar *str, gssize len,
 + NewlineType from,
 + NewlineType to,
 + GError **error)
 +{
 +GError *err = NULL;
 +gssize length;
 +gsize nl;
 +GString *output;
 +gboolean free_segment = FALSE;
 +gint i;
 +
 +g_return_val_if_fail(str != NULL, NULL);
 +g_return_val_if_fail(len = -1, NULL);
 +g_return_val_if_fail(error == NULL || *error == NULL, NULL);
 +/* only 2 supported combinations */
 +g_return_val_if_fail((from == NEWLINE_TYPE_LF 
 +  to == NEWLINE_TYPE_CR_LF) ||
 + (from == NEWLINE_TYPE_CR_LF 
 +  to == NEWLINE_TYPE_LF), NULL);
 +
 +if (len == -1)
 +len = strlen(str);
 +/* sometime we get \0 terminated strings, skip that, or it fails
 +   to utf8 validate line with \0 end */
 +else if (str[len] == 0)
 +len -= 1;
 +
 +/* allocate worst case, if it's small enough, we don't care much,
 + * if it's big, malloc will put us in mmap'd region, and we can
 + * over allocate.
 + */
 +output = g_string_sized_new(len * 2 + 1);
 +
 +for (i = 0; i  len; i += length + nl) {
 +length = get_line(str + i, len - i, from, nl, error);
 +if (length  0)
 +break;
 +
 +g_string_append_len(output, str + i, length);
 +
 +if (nl) {
 +/* let's not double \r if it's 

Re: [Spice-devel] [PATCH spice-gtk 3/4] util: add unix2dos and dos2unix

2013-08-24 Thread Hans de Goede

Hi,

On 08/24/2013 02:17 PM, Marc-André Lureau wrote:

On Sat, Aug 24, 2013 at 12:20 PM, Hans de Goede hdego...@redhat.com wrote:

Hi,


On 08/23/2013 10:25 PM, Marc-André Lureau wrote:


Convert line endings from/to LF/CRLF, in utf8.
---
   gtk/spice-util-priv.h |   2 +
   gtk/spice-util.c  | 122
++
   2 files changed, 124 insertions(+)

diff --git a/gtk/spice-util-priv.h b/gtk/spice-util-priv.h
index ee5a42d..cc559dc 100644
--- a/gtk/spice-util-priv.h
+++ b/gtk/spice-util-priv.h
@@ -29,6 +29,8 @@ gboolean spice_strv_contains(const GStrv strv, const
gchar *str);
   gchar* spice_uuid_to_string(const guint8 uuid[16]);
   const gchar* spice_yes_no(gboolean value);
   guint16 spice_make_scancode(guint scancode, gboolean release);
+gchar* spice_unix2dos(const gchar *str, gssize len, GError **error);
+gchar* spice_dos2unix(const gchar *str, gssize len, GError **error);

   #if GLIB_CHECK_VERSION(2,32,0)
   #define STATIC_MUTEXGMutex
diff --git a/gtk/spice-util.c b/gtk/spice-util.c
index 774a145..be10edc 100644
--- a/gtk/spice-util.c
+++ b/gtk/spice-util.c
@@ -19,6 +19,7 @@
   #ifdef HAVE_CONFIG_H
   # include config.h
   #endif
+
   #include stdlib.h
   #include string.h
   #include glib-object.h
@@ -245,3 +246,124 @@ guint16 spice_make_scancode(guint scancode, gboolean
release)

   g_return_val_if_reached(0);
   }
+
+typedef enum {
+NEWLINE_TYPE_LF,
+NEWLINE_TYPE_CR_LF
+} NewlineType;
+
+static gssize get_line(const gchar *str, gsize len,
+   NewlineType type, gsize *nl_len,
+   GError **error)
+{
+const gchar *p = str;
+gsize nl = 0;
+
+if (type == NEWLINE_TYPE_CR_LF) {
+while ((p - str)  len) {
+p = g_utf8_strchr(p, len, '\r');
+if (!p)
+break;
+p = g_utf8_next_char(p);
+if (g_utf8_get_char(p) == '\n') {
+len = (p - str) - 1;
+nl = 2;
+break;
+}
+}
+} else {
+p = g_utf8_strchr(str, len, '\n');
+if (p) {
+len = p - str;
+nl = 1;
+}
+}



This looks way more complicated then it needs to be, in UTF-8
0x00 - 0x7f only are valid as a single-byte sequence. multi-byte
encoded characters will never contain 0x00 - 0x7f. UTF-8 was designed
this way, is so that existing string parsing code for non multi-byte
encodings, which make look for example for '  = or LF characters does
not break when parsing strings with multi-byte characters in there.

TL;DR: LF and CR will never be part of a multi byte character, so
you can simple do: strstr(str, \r\n) to find the CRLF.


g_utf8_strchr is implemented using a regular strstr. Speed shouldn't
be different here. I prefer to use utf8 functions on utf8 strings.


Right, but I'm suggesting using strstr instead of strchr replacing:

 +while ((p - str)  len) {
 +p = g_utf8_strchr(p, len, '\r');
 +if (!p)
 +break;
 +p = g_utf8_next_char(p);
 +if (g_utf8_get_char(p) == '\n') {
 +len = (p - str) - 1;
 +nl = 2;
 +break;
 +}
 +}

With a single strstr(str, \r\n) call. Which is one heck of
a lot more readable, the above loop gives me a head-ache, and
I'm quite sure you won't be able to remember what exactly
it is doing in a couple of months from now either, where as
a simple strstr(str, \r\n) is quite obvious. I really believe
we should take the KISS approach here.

I see that there is no g_utf8_strstr, likely simply because there
would be no difference between a g_utf8_strstr and regular strstr.


+
+if (!g_utf8_validate(str, len, NULL)) {
+g_set_error_literal(error, G_CONVERT_ERROR,
+G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+Invalid byte sequence in conversion input);
+return -1;
+}



And once you simply treat this as a regular C-string without worrying
about multi-byte encodings you can also drop this.


Actually, during implementation, I have encountered/produced invalid
utf8 that will break later on in gtk+, so I prefer to validate the
production.


Ok.

Regards,

Hans
___
Spice-devel mailing list
Spice-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/spice-devel


Re: [Spice-devel] [PATCH spice-gtk 3/4] util: add unix2dos and dos2unix

2013-08-24 Thread Hans de Goede

Hi,

On 08/24/2013 02:17 PM, Marc-André Lureau wrote:

snip


+
+if (!g_utf8_validate(str, len, NULL)) {
+g_set_error_literal(error, G_CONVERT_ERROR,
+G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+Invalid byte sequence in conversion input);
+return -1;
+}



And once you simply treat this as a regular C-string without worrying
about multi-byte encodings you can also drop this.


Actually, during implementation, I have encountered/produced invalid
utf8 that will break later on in gtk+, so I prefer to validate the
production.


Thinking more about this, if we want to do utf-8 validation, it should not
be done here, but rather in gtk/channel-main.c, since this code only gets
called in certain guest-line-end + direction cases, and if we want to do
utf-8 validation we should always do it.

Regards,

Hans
___
Spice-devel mailing list
Spice-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/spice-devel


Re: [Spice-devel] [PATCH spice-gtk 3/4] util: add unix2dos and dos2unix

2013-08-24 Thread Marc-André Lureau


- Mensaje original -
 Hi,
 
 On 08/24/2013 02:17 PM, Marc-André Lureau wrote:
 
 snip
 
  +
  +if (!g_utf8_validate(str, len, NULL)) {
  +g_set_error_literal(error, G_CONVERT_ERROR,
  +G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
  +Invalid byte sequence in conversion
  input);
  +return -1;
  +}
 
 
  And once you simply treat this as a regular C-string without worrying
  about multi-byte encodings you can also drop this.
 
  Actually, during implementation, I have encountered/produced invalid
  utf8 that will break later on in gtk+, so I prefer to validate the
  production.
 
 Thinking more about this, if we want to do utf-8 validation, it should not
 be done here, but rather in gtk/channel-main.c, since this code only gets
 called in certain guest-line-end + direction cases, and if we want to do
 utf-8 validation we should always do it.

Perhaps, although the difference is that here we do parse/modify the string,
so it's important to check we don't produce garbage.

gtk/channel-main.c is a pass-through, validation is left to the
other end at this point.
___
Spice-devel mailing list
Spice-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/spice-devel


Re: [Spice-devel] [PATCH spice-gtk 3/4] util: add unix2dos and dos2unix

2013-08-24 Thread Hans de Goede

Hi,

On 08/24/2013 02:32 PM, Marc-André Lureau wrote:



- Mensaje original -

Hi,

On 08/24/2013 02:17 PM, Marc-André Lureau wrote:

snip


+
+if (!g_utf8_validate(str, len, NULL)) {
+g_set_error_literal(error, G_CONVERT_ERROR,
+G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+Invalid byte sequence in conversion
input);
+return -1;
+}



And once you simply treat this as a regular C-string without worrying
about multi-byte encodings you can also drop this.


Actually, during implementation, I have encountered/produced invalid
utf8 that will break later on in gtk+, so I prefer to validate the
production.


Thinking more about this, if we want to do utf-8 validation, it should not
be done here, but rather in gtk/channel-main.c, since this code only gets
called in certain guest-line-end + direction cases, and if we want to do
utf-8 validation we should always do it.


Perhaps, although the difference is that here we do parse/modify the string,
so it's important to check we don't produce garbage.


Right, but since garbage in = garbage out, you're not only checking that
the conversion code did not foo-bar, you're also validating the original input,
at which point it makes sense to me to always do that even when not doing
conversion.

Regards,

Hans
___
Spice-devel mailing list
Spice-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/spice-devel


Re: [Spice-devel] [PATCH spice-gtk 3/4] util: add unix2dos and dos2unix

2013-08-24 Thread Marc-André Lureau


- Mensaje original -
 Hi,
 
 On 08/24/2013 02:32 PM, Marc-André Lureau wrote:
 
 
  - Mensaje original -
  Hi,
 
  On 08/24/2013 02:17 PM, Marc-André Lureau wrote:
 
  snip
 
  +
  +if (!g_utf8_validate(str, len, NULL)) {
  +g_set_error_literal(error, G_CONVERT_ERROR,
  +G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
  +Invalid byte sequence in conversion
  input);
  +return -1;
  +}
 
 
  And once you simply treat this as a regular C-string without worrying
  about multi-byte encodings you can also drop this.
 
  Actually, during implementation, I have encountered/produced invalid
  utf8 that will break later on in gtk+, so I prefer to validate the
  production.
 
  Thinking more about this, if we want to do utf-8 validation, it should not
  be done here, but rather in gtk/channel-main.c, since this code only gets
  called in certain guest-line-end + direction cases, and if we want to do
  utf-8 validation we should always do it.
 
  Perhaps, although the difference is that here we do parse/modify the
  string,
  so it's important to check we don't produce garbage.
 
 Right, but since garbage in = garbage out, you're not only checking that
 the conversion code did not foo-bar, you're also validating the original
 input,
 at which point it makes sense to me to always do that even when not doing
 conversion.

In one case, it's a pass-through, the caller and the destination are 
responsible for validation.

But here, we do parse and modify, so it's necessary to validate.

I am not stricly against validating all the time utf8, but I don't think it 
belongs to the messenger.
___
Spice-devel mailing list
Spice-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/spice-devel


Re: [Spice-devel] [PATCH spice-gtk 3/4] util: add unix2dos and dos2unix

2013-08-24 Thread Marc-André Lureau


- Mensaje original -
 Hi,
 
 On 08/24/2013 02:56 PM, Marc-André Lureau wrote:
 
 
  - Mensaje original -
  Hi,
 
  On 08/24/2013 02:32 PM, Marc-André Lureau wrote:
 
 
  - Mensaje original -
  Hi,
 
  On 08/24/2013 02:17 PM, Marc-André Lureau wrote:
 
  snip
 
  +
  +if (!g_utf8_validate(str, len, NULL)) {
  +g_set_error_literal(error, G_CONVERT_ERROR,
  +G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
  +Invalid byte sequence in conversion
  input);
  +return -1;
  +}
 
 
  And once you simply treat this as a regular C-string without worrying
  about multi-byte encodings you can also drop this.
 
  Actually, during implementation, I have encountered/produced invalid
  utf8 that will break later on in gtk+, so I prefer to validate the
  production.
 
  Thinking more about this, if we want to do utf-8 validation, it should
  not
  be done here, but rather in gtk/channel-main.c, since this code only
  gets
  called in certain guest-line-end + direction cases, and if we want to do
  utf-8 validation we should always do it.
 
  Perhaps, although the difference is that here we do parse/modify the
  string,
  so it's important to check we don't produce garbage.
 
  Right, but since garbage in = garbage out, you're not only checking that
  the conversion code did not foo-bar, you're also validating the original
  input,
  at which point it makes sense to me to always do that even when not doing
  conversion.
 
  In one case, it's a pass-through, the caller and the destination are
  responsible for validation.
 
  But here, we do parse and modify, so it's necessary to validate.
 
  I am not stricly against validating all the time utf8, but I don't think it
  belongs to the messenger.
 
 I agree that validation is best left up to the receiver, but in that case we
 should simply
 never verify, as I suggested in the first place. line-ending conversion only
 inserts / removes
 single-byte characters, and since these can never be part of a multi-byte
 character in UTF-8,
 we cannot make the input any more (or less) broken then it was.
 
 I really think we are doing ourselves a disservice by validating only when
 doing line-ending
 conversion, since we will then likely get difficult to debug bugs, where we
 get non valid utf-8
 in, and end up rejecting it only in some cases (while most receivers will
 likely accept it and
 make the best out of it). Following the receiver should validate (and decide
 whether to outright
 reject, or simply insert some ? chars or some such) reasoning to its logical
 conclusion,
 we should simply never validate.

ok, let's remove it then
___
Spice-devel mailing list
Spice-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/spice-devel


[Spice-devel] [PATCH spice-gtk 3/4] util: add unix2dos and dos2unix

2013-08-23 Thread Marc-André Lureau
Convert line endings from/to LF/CRLF, in utf8.
---
 gtk/spice-util-priv.h |   2 +
 gtk/spice-util.c  | 122 ++
 2 files changed, 124 insertions(+)

diff --git a/gtk/spice-util-priv.h b/gtk/spice-util-priv.h
index ee5a42d..cc559dc 100644
--- a/gtk/spice-util-priv.h
+++ b/gtk/spice-util-priv.h
@@ -29,6 +29,8 @@ gboolean spice_strv_contains(const GStrv strv, const gchar 
*str);
 gchar* spice_uuid_to_string(const guint8 uuid[16]);
 const gchar* spice_yes_no(gboolean value);
 guint16 spice_make_scancode(guint scancode, gboolean release);
+gchar* spice_unix2dos(const gchar *str, gssize len, GError **error);
+gchar* spice_dos2unix(const gchar *str, gssize len, GError **error);
 
 #if GLIB_CHECK_VERSION(2,32,0)
 #define STATIC_MUTEXGMutex
diff --git a/gtk/spice-util.c b/gtk/spice-util.c
index 774a145..be10edc 100644
--- a/gtk/spice-util.c
+++ b/gtk/spice-util.c
@@ -19,6 +19,7 @@
 #ifdef HAVE_CONFIG_H
 # include config.h
 #endif
+
 #include stdlib.h
 #include string.h
 #include glib-object.h
@@ -245,3 +246,124 @@ guint16 spice_make_scancode(guint scancode, gboolean 
release)
 
 g_return_val_if_reached(0);
 }
+
+typedef enum {
+NEWLINE_TYPE_LF,
+NEWLINE_TYPE_CR_LF
+} NewlineType;
+
+static gssize get_line(const gchar *str, gsize len,
+   NewlineType type, gsize *nl_len,
+   GError **error)
+{
+const gchar *p = str;
+gsize nl = 0;
+
+if (type == NEWLINE_TYPE_CR_LF) {
+while ((p - str)  len) {
+p = g_utf8_strchr(p, len, '\r');
+if (!p)
+break;
+p = g_utf8_next_char(p);
+if (g_utf8_get_char(p) == '\n') {
+len = (p - str) - 1;
+nl = 2;
+break;
+}
+}
+} else {
+p = g_utf8_strchr(str, len, '\n');
+if (p) {
+len = p - str;
+nl = 1;
+}
+}
+
+if (!g_utf8_validate(str, len, NULL)) {
+g_set_error_literal(error, G_CONVERT_ERROR,
+G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+Invalid byte sequence in conversion input);
+return -1;
+}
+
+*nl_len = nl;
+return len;
+}
+
+
+static gchar* spice_convert_newlines(const gchar *str, gssize len,
+ NewlineType from,
+ NewlineType to,
+ GError **error)
+{
+GError *err = NULL;
+gssize length;
+gsize nl;
+GString *output;
+gboolean free_segment = FALSE;
+gint i;
+
+g_return_val_if_fail(str != NULL, NULL);
+g_return_val_if_fail(len = -1, NULL);
+g_return_val_if_fail(error == NULL || *error == NULL, NULL);
+/* only 2 supported combinations */
+g_return_val_if_fail((from == NEWLINE_TYPE_LF 
+  to == NEWLINE_TYPE_CR_LF) ||
+ (from == NEWLINE_TYPE_CR_LF 
+  to == NEWLINE_TYPE_LF), NULL);
+
+if (len == -1)
+len = strlen(str);
+/* sometime we get \0 terminated strings, skip that, or it fails
+   to utf8 validate line with \0 end */
+else if (str[len] == 0)
+len -= 1;
+
+/* allocate worst case, if it's small enough, we don't care much,
+ * if it's big, malloc will put us in mmap'd region, and we can
+ * over allocate.
+ */
+output = g_string_sized_new(len * 2 + 1);
+
+for (i = 0; i  len; i += length + nl) {
+length = get_line(str + i, len - i, from, nl, error);
+if (length  0)
+break;
+
+g_string_append_len(output, str + i, length);
+
+if (nl) {
+/* let's not double \r if it's already in the line */
+if (to == NEWLINE_TYPE_CR_LF 
+output-str[output-len - 1] != '\r')
+g_string_append_c(output, '\r');
+
+g_string_append_c(output, '\n');
+}
+}
+
+if (err) {
+g_propagate_error(error, err);
+free_segment = TRUE;
+}
+
+return g_string_free(output, free_segment);
+}
+
+G_GNUC_INTERNAL
+gchar* spice_dos2unix(const gchar *str, gssize len, GError **error)
+{
+return spice_convert_newlines(str, len,
+  NEWLINE_TYPE_CR_LF,
+  NEWLINE_TYPE_LF,
+  error);
+}
+
+G_GNUC_INTERNAL
+gchar* spice_unix2dos(const gchar *str, gssize len, GError **error)
+{
+return spice_convert_newlines(str, len,
+  NEWLINE_TYPE_LF,
+  NEWLINE_TYPE_CR_LF,
+  error);
+}
-- 
1.8.3.rc1.49.g8d97506

___
Spice-devel mailing list
Spice-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/spice-devel


Re: [Spice-devel] [PATCH spice-gtk 3/4] util: add unix2dos and dos2unix

2013-08-23 Thread Marc-André Lureau
On Fri, Aug 23, 2013 at 10:25 PM, Marc-André Lureau
marcandre.lur...@gmail.com wrote:
else if (str[len] == 0)
 +len -= 1;
 +


I added len  0 condition here.

-- 
Marc-André Lureau
___
Spice-devel mailing list
Spice-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/spice-devel