Revision: 43427
          
http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=43427
Author:   lockal
Date:     2012-01-16 16:23:25 +0000 (Mon, 16 Jan 2012)
Log Message:
-----------
patch [#29859] UTF-8 support for text editor.
This also fixes cursor movement in the beginning of line and adds do_versions 
block for converting text files with old extended ascii encoding into UTF-8.

Modified Paths:
--------------
    trunk/blender/source/blender/blenkernel/BKE_text.h
    trunk/blender/source/blender/blenkernel/intern/text.c
    trunk/blender/source/blender/blenloader/intern/readfile.c
    trunk/blender/source/blender/editors/space_text/text_draw.c
    trunk/blender/source/blender/editors/space_text/text_intern.h
    trunk/blender/source/blender/editors/space_text/text_ops.c

Modified: trunk/blender/source/blender/blenkernel/BKE_text.h
===================================================================
--- trunk/blender/source/blender/blenkernel/BKE_text.h  2012-01-16 16:16:55 UTC 
(rev 43426)
+++ trunk/blender/source/blender/blenkernel/BKE_text.h  2012-01-16 16:23:25 UTC 
(rev 43427)
@@ -46,6 +46,7 @@
 void                   txt_set_undostate       (int u);
 int                    txt_get_undostate       (void);
 struct Text*   add_empty_text  (const char *name);
+int             txt_extended_ascii_as_utf8(char **str);
 int                reopen_text         (struct Text *text);
 struct Text*   add_text                (const char *file, const char 
*relpath); 
 struct Text*   copy_text               (struct Text *ta);
@@ -59,6 +60,8 @@
 int            txt_find_string         (struct Text *text, const char 
*findstr, int wrap, int match_case);
 int            txt_has_sel                     (struct Text *text);
 int            txt_get_span            (struct TextLine *from, struct TextLine 
*to);
+int     txt_utf8_offset_to_index(char *str, int offset);
+int     txt_utf8_index_to_offset(char *str, int index);
 void   txt_move_up                     (struct Text *text, short sel);
 void   txt_move_down           (struct Text *text, short sel);
 void   txt_move_left           (struct Text *text, short sel);
@@ -86,9 +89,9 @@
 void   txt_split_curline       (struct Text *text);
 void   txt_backspace_char      (struct Text *text);
 void   txt_backspace_word      (struct Text *text);
-int            txt_add_char            (struct Text *text, char add);
-int            txt_add_raw_char        (struct Text *text, char add);
-int            txt_replace_char        (struct Text *text, char add);
+int            txt_add_char            (struct Text *text, unsigned int add);
+int            txt_add_raw_char        (struct Text *text, unsigned int add);
+int            txt_replace_char        (struct Text *text, unsigned int add);
 void   txt_export_to_object(struct Text *text);
 void   txt_export_to_objects(struct Text *text);
 void   txt_unindent            (struct Text *text);
@@ -127,34 +130,48 @@
 #define UNDO_SLEFT             005
 #define UNDO_SRIGHT            006
 #define UNDO_SUP               007
-#define UNDO_SDOWN             021
+#define UNDO_SDOWN             010
 
 /* Complex movement (opcode is followed
  * by 4 character line ID + a 2 character
  * position ID and opcode (repeat)) */
-#define UNDO_CTO               022
-#define UNDO_STO               023
+#define UNDO_CTO               011
+#define UNDO_STO               012
 
-/* Complex editing (opcode is followed
- * by 1 character ID and opcode (repeat)) */
-#define UNDO_INSERT            024
-#define UNDO_BS                        025
-#define UNDO_DEL               026
+/* Complex editing */
+/* 1 - opcode is followed by 1 byte for ascii character and opcode (repeat)) */
+/* 2 - opcode is followed by 2 bytes for utf-8 character and opcode (repeat)) 
*/
+/* 3 - opcode is followed by 3 bytes for utf-8 character and opcode (repeat)) 
*/
+/* 4 - opcode is followed by 4 bytes for unicode character and opcode 
(repeat)) */
+#define UNDO_INSERT_1   013
+#define UNDO_INSERT_2   014
+#define UNDO_INSERT_3   015
+#define UNDO_INSERT_4   016
 
+#define UNDO_BS_1       017
+#define UNDO_BS_2       020
+#define UNDO_BS_3       021
+#define UNDO_BS_4       022
+
+#define UNDO_DEL_1      023
+#define UNDO_DEL_2      024
+#define UNDO_DEL_3      025
+#define UNDO_DEL_4      026
+
 /* Text block (opcode is followed
  * by 4 character length ID + the text
  * block itself + the 4 character length
  * ID (repeat) and opcode (repeat)) */
-#define UNDO_DBLOCK            027 /* Delete block */
-#define UNDO_IBLOCK            030 /* Insert block */
+#define UNDO_DBLOCK        027 /* Delete block */
+#define UNDO_IBLOCK        030 /* Insert block */
 
 /* Misc */
-#define UNDO_SWAP              031     /* Swap cursors */
+#define UNDO_SWAP       031    /* Swap cursors */
 
-#define UNDO_INDENT            032
-#define UNDO_UNINDENT          033
-#define UNDO_COMMENT           034
-#define UNDO_UNCOMMENT         035
+#define UNDO_INDENT     032
+#define UNDO_UNINDENT   033
+#define UNDO_COMMENT    034
+#define UNDO_UNCOMMENT  035
 
 /* Marker flags */
 #define TMARK_TEMP             0x01    /* Remove on non-editing events, don't 
save */

Modified: trunk/blender/source/blender/blenkernel/intern/text.c
===================================================================
--- trunk/blender/source/blender/blenkernel/intern/text.c       2012-01-16 
16:16:55 UTC (rev 43426)
+++ trunk/blender/source/blender/blenkernel/intern/text.c       2012-01-16 
16:23:25 UTC (rev 43427)
@@ -33,6 +33,8 @@
 #include <string.h> /* strstr */
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <wchar.h>
+#include <wctype.h>
 
 #include "MEM_guardedalloc.h"
 
@@ -215,8 +217,48 @@
        return ta;
 }
 
+/* this function replaces extended ascii characters */
+/* to a valid utf-8 sequences */
+int txt_extended_ascii_as_utf8(char **str)
+{
+       int bad_char, added= 0, i= 0;
+       int length = strlen(*str);
+
+       while ((*str)[i]) {
+               if((bad_char= BLI_utf8_invalid_byte(*str+i, length)) == -1)
+                   break;
+
+               added++;
+               i+= bad_char + 1;
+       }
+       
+       if (added != 0) {
+               char *newstr = MEM_mallocN(length+added+1, "text_line");
+               int mi = 0;
+               i= 0;
+               
+               while ((*str)[i]) {
+                       if((bad_char= BLI_utf8_invalid_byte((*str)+i, length)) 
== -1) {
+                               memcpy(newstr+mi, (*str)+i, length - i + 1);
+                               break;
+                       }
+                       
+                       memcpy(newstr+mi, (*str)+i, bad_char);
+
+                       BLI_str_utf8_from_unicode((*str)[i+bad_char], 
newstr+mi+bad_char);
+                       i+= bad_char+1;
+                       mi+= bad_char+2;
+               }
+               newstr[length+added] = '\0';
+               MEM_freeN(*str);
+               *str = newstr;
+       }
+       
+       return added;
+}
+
 // this function removes any control characters from
-// a textline
+// a textline and fixes invalid utf-8 sequences
 
 static void cleanup_textline(TextLine * tl)
 {
@@ -229,6 +271,7 @@
                        i--;
                }
        }
+       tl->len+= txt_extended_ascii_as_utf8(&tl->line);
 }
 
 int reopen_text(Text *text)
@@ -689,16 +732,10 @@
 }
 
 /* 0:whitespace, 1:punct, 2:alphanumeric */
-static short txt_char_type (char ch)
+static short txt_char_type(unsigned int ch)
 {
-       if (ch <= ' ') return 0; /* 32 */
-       if (ch <= '/') return 1; /* 47 */
-       if (ch <= '9') return 2; /* 57 */
-       if (ch <= '@') return 1; /* 64 */
-       if (ch <= 'Z') return 2; /* 90 */
-       if (ch == '_') return 2; /* 95, dont delimit '_' */
-       if (ch <= '`') return 1; /* 96 */
-       if (ch <= 'z') return 2; /* 122 */
+       if (iswspace(ch)) return 0;
+       if (iswalpha(ch) || iswdigit(ch)) return 2;
        return 1;
 }
 
@@ -731,10 +768,43 @@
        }
 }
 
-/****************************/
+/*****************************/
 /* Cursor movement functions */
-/****************************/
+/*****************************/
 
+int txt_utf8_offset_to_index(char *str, int offset)
+{
+       int index= 0, pos= 0;
+       while (pos != offset) {
+               pos += BLI_str_utf8_size(str + pos);
+               index++;
+       }
+       return index;
+}
+
+int txt_utf8_index_to_offset(char *str, int index)
+{
+       int offset= 0, pos= 0;
+       while (pos != index) {
+               offset += BLI_str_utf8_size(str + offset);
+               pos++;
+       }
+       return offset;
+}
+
+/* returns the real number of characters in string */
+/* not the same as BLI_strlen_utf8, which returns length for wide characters */
+static int txt_utf8_len(const char *src)
+{
+       int len;
+
+       for (len=0; *src; len++) {
+               src += BLI_str_utf8_size(src);
+       }
+
+       return len;
+}
+
 void txt_move_up(Text *text, short sel)
 {
        TextLine **linep;
@@ -747,13 +817,13 @@
        old= *charp;
 
        if((*linep)->prev) {
+               int index = txt_utf8_offset_to_index((*linep)->line, *charp);
                *linep= (*linep)->prev;
-               if (*charp > (*linep)->len) {
-                       *charp= (*linep)->len;
-                       if(!undoing) txt_undo_add_toop(text, 
sel?UNDO_STO:UNDO_CTO, txt_get_span(text->lines.first, (*linep)->next), old, 
txt_get_span(text->lines.first, *linep), (unsigned short) *charp);
-               } else {
-                       if(!undoing) txt_undo_add_op(text, 
sel?UNDO_SUP:UNDO_CUP);
-               }
+               if (index > txt_utf8_len((*linep)->line)) *charp= (*linep)->len;
+               else *charp= txt_utf8_index_to_offset((*linep)->line, index);
+               
+               if(!undoing)
+                       txt_undo_add_op(text, sel?UNDO_SUP:UNDO_CUP);
        } else {
                txt_move_bol(text, sel);
        }
@@ -773,12 +843,13 @@
        old= *charp;
 
        if((*linep)->next) {
+               int index = txt_utf8_offset_to_index((*linep)->line, *charp);
                *linep= (*linep)->next;
-               if (*charp > (*linep)->len) {
-                       *charp= (*linep)->len;
-                       if(!undoing) txt_undo_add_toop(text, 
sel?UNDO_STO:UNDO_CTO, txt_get_span(text->lines.first, (*linep)->prev), old, 
txt_get_span(text->lines.first, *linep), (unsigned short)*charp);
-               } else
-                       if(!undoing) txt_undo_add_op(text, 
sel?UNDO_SDOWN:UNDO_CDOWN);  
+               if (index > txt_utf8_len((*linep)->line)) *charp= (*linep)->len;
+               else *charp= txt_utf8_index_to_offset((*linep)->line, index);
+               
+               if(!undoing)
+                       txt_undo_add_op(text, sel?UNDO_SDOWN:UNDO_CDOWN);
        } else {
                txt_move_eol(text, sel);
        }
@@ -790,7 +861,7 @@
 {
        TextLine **linep;
        int *charp, oundoing= undoing;
-       int tabsize = 1, i=0;
+       int tabsize= 0, i= 0;
        
        if (!text) return;
        if(sel) txt_curs_sel(text, &linep, &charp);
@@ -799,32 +870,36 @@
 
        undoing= 1;
 
-       // do nice left only if there are only spaces
-       // TXT_TABSIZE hardcoded in DNA_text_types.h
-       if (text->flags & TXT_TABSTOSPACES) {
-               tabsize = TXT_TABSIZE;
-
-               if (*charp < tabsize)
-                       tabsize = *charp;
-               else {
-                       for (i=0;i<(*charp);i++)
+       if (*charp== 0) {
+               if ((*linep)->prev) {
+                       txt_move_up(text, sel);
+                       *charp= (*linep)->len;
+               }
+       }
+       else {
+               // do nice left only if there are only spaces
+               // TXT_TABSIZE hardcoded in DNA_text_types.h
+               if (text->flags & TXT_TABSTOSPACES) {
+                       tabsize= (*charp < TXT_TABSIZE) ? *charp : TXT_TABSIZE;
+                       
+                       for (i=0; i<(*charp); i++)
                                if ((*linep)->line[i] != ' ') {
-                                       tabsize = 1;
+                                       tabsize= 0;
                                        break;
                                }
+                       
                        // if in the middle of the space-tab
-                       if ((*charp) % tabsize != 0)
-                                       tabsize = ((*charp) % tabsize);
+                       if (tabsize && (*charp) % TXT_TABSIZE != 0)
+                               tabsize= ((*charp) % TXT_TABSIZE);
                }
-       }
-
-       if (*charp== 0) {
-               if ((*linep)->prev) {
-                       txt_move_up(text, sel);
-                       *charp= (*linep)->len;
+               
+               if (tabsize)
+                       (*charp)-= tabsize;
+               else {
+                       const char *prev= BLI_str_prev_char_utf8((*linep)->line 
+ *charp);
+                       *charp= prev - (*linep)->line;
                }
        }
-       else (*charp)-= tabsize;
 
        undoing= oundoing;
        if(!undoing) txt_undo_add_op(text, sel?UNDO_SLEFT:UNDO_CLEFT);
@@ -835,8 +910,7 @@
 void txt_move_right(Text *text, short sel) 
 {
        TextLine **linep;
-       int *charp, oundoing= undoing;
-       int tabsize=1, i=0;
+       int *charp, oundoing= undoing, do_tab= 0, i;
        
        if (!text) return;
        if(sel) txt_curs_sel(text, &linep, &charp);
@@ -845,32 +919,33 @@
 
        undoing= 1;
 
-       // do nice right only if there are only spaces
-       // spaces hardcoded in DNA_text_types.h
-       if (text->flags & TXT_TABSTOSPACES) {
-               tabsize = TXT_TABSIZE;
-
-               if ((*charp) + tabsize > (*linep)->len)
-                       tabsize = 1;
-               else {
-                       for (i=0;i<(*charp) + tabsize - ((*charp) % 
tabsize);i++)
-                               if ((*linep)->line[i] != ' ') {
-                                       tabsize = 1;
-                                       break;
-                               }

@@ Diff output truncated at 10240 characters. @@
_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to