DO NOT REPLY TO THIS MESSAGE. INSTEAD, POST ANY RESPONSES TO THE LINK BELOW.
[STR Pending]
Link: http://www.fltk.org/str.php?L2348
Version: 1.3-current
Link: http://www.fltk.org/str.php?L2348
Version: 1.3-current
Index: FL/Fl_Text_Buffer.H
===================================================================
--- FL/Fl_Text_Buffer.H (revision 7966)
+++ FL/Fl_Text_Buffer.H (working copy)
@@ -300,23 +300,24 @@
non-zero on error (strerror() contains reason). 1 indicates open
for read failed (no data loaded). 2 indicates error occurred
while reading data (data was partially loaded).
+ File can be UTF-8 or CP1252-encoded.
+ If the input file is not UTF-8-encoded, the Fl_Text_Buffer widget will
contain
+ UTF-8-recoded data. The message
Fl_Text_Buffer::file_encoding_warning_message
+ will warn the user about this.
*/
int insertfile(const char *file, int pos, int buflen = 128*1024);
/**
- Appends the named file to the end of the buffer. Returns 0 on
- success, non-zero on error (strerror() contains reason). 1 indicates
- open for read failed (no data loaded). 2 indicates error occurred
- while reading data (data was partially loaded).
+ Appends the named file to the end of the buffer. See also insertfile().
*/
int appendfile(const char *file, int buflen = 128*1024)
{ return insertfile(file, length(), buflen); }
/**
- Loads a text file into the buffer
+ Loads a text file into the buffer. See also insertfile().
*/
int loadfile(const char *file, int buflen = 128*1024)
- { select(0, length()); remove_selection(); return appendfile(file, buflen); }
+ { select(0, length()); remove_selection(); was_modified=0; return
appendfile(file, buflen); }
/**
Writes the specified portions of the file to a file. Returns 0 on success,
non-zero
@@ -669,6 +670,11 @@
*/
int utf8_align(int) const;
+ /**
+ \brief true iff the text has been modified since initial loading
+ */
+ int was_modified;
+
protected:
/**
@@ -763,6 +769,11 @@
int mPreferredGapSize; /**< the default allocation for the text gap
is 1024
bytes and should only be increased if
frequent
and large changes in buffer size are
expected */
+
+ /** This message is displayed using the fl_alert() function when a file
+ which was not UTF-8 encoded is input.
+ */
+ static const char* file_encoding_warning_message;
};
#endif
Index: src/Fl_Text_Buffer.cxx
===================================================================
--- src/Fl_Text_Buffer.cxx (revision 7979)
+++ src/Fl_Text_Buffer.cxx (working copy)
@@ -32,6 +32,7 @@
#include <ctype.h>
#include <FL/Fl.H>
#include <FL/Fl_Text_Buffer.H>
+#include <FL/fl_ask.H>
/*
@@ -128,6 +129,7 @@
mPredeleteCbArgs = NULL;
mCursorPosHint = 0;
mCanUndo = 1;
+ was_modified = 0;
}
@@ -1307,6 +1309,9 @@
int nInserted, int nRestyled,
const char *deletedText) const {
IS_UTF8_ALIGNED2(this, pos)
+ if (mNModifyProcs > 0) {
+ ((Fl_Text_Buffer*)this)->was_modified = true;
+ }
for (int i = 0; i < mNModifyProcs; i++)
(*mModifyProcs[i]) (pos, nInserted, nDeleted, nRestyled,
deletedText, mCbArgs[i]);
@@ -1514,29 +1519,152 @@
}
+#ifdef EIGHT_BIT_ENCODING // shows how to process any 8-bit encoding
+
+// returns the UCS equivalent of c in CP1252
+unsigned cp1252toucs(char c)
+{
+ // Codes 0x80..0x9f from the Microsoft CP1252 character set, translated
+ // to Unicode
+ static unsigned cp1252[32] = {
+ 0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
+ 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f,
+ 0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+ 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178
+ };
+ unsigned char uc = c;
+ return (uc < 0x80 || uc >= 0xa0 ? uc : cp1252[uc - 0x80]);
+}
+
+
+// filter that produces, from an input stream fed by reading from fp,
+// a UTF-8-encoded output stream written in buffer.
+// Input can be any 8-bit encoding.
+// Output is true UTF-8.
+// p_trf points to a function that transforms a byte into UCS.
+static int eightbit_input_filter(char *buffer, int buflen,
+ char *line, int sline, char* &endline,
+ unsigned (*p_trf)(char),
+ FILE *fp)
+{
+ char *p, *q, multibyte[5];
+ int lq, r;
+ p = endline = line;
+ q = buffer;
+ while (q < buffer + buflen) {
+ if (p >= endline) {
+ r = fread(line, 1, sline, fp);
+ endline = line + r;
+ if (r == 0) return q - buffer;
+ p = line;
+ }
+ lq = fl_utf8encode( p_trf(*p), multibyte );
+ if (q + lq > buffer + buflen) {
+ memmove(line, p, endline - p);
+ endline -= (p - line);
+ return q - buffer;
+ }
+ memcpy(q, multibyte, lq);
+ q += lq;
+ p++;
+ }
+ memmove(line, p, endline - p);
+ endline -= (p - line);
+ return q - buffer;
+}
+#endif // EIGHT_BIT_ENCODING
+
/*
+ filter that produces, from an input stream fed by reading from fp,
+ a UTF-8-encoded output stream written in buffer.
+ Input can be UTF-8. If it is not, it is decoded with CP1252.
+ Output is true UTF-8.
+ *input_was_changed is set to true if the input was not strict UTF-8 so output
+ differs from input.
+ */
+static int utf8_input_filter(char *buffer, int buflen, char *line, int sline,
char* &endline,
+ FILE *fp, int *input_was_changed)
+{
+ char *p, *q, multibyte[5];
+ int l, lp, lq, r;
+ unsigned u;
+ p = endline = line;
+ q = buffer;
+ while (q < buffer + buflen) {
+ if (p >= endline) {
+ r = fread(line, 1, sline, fp);
+ endline = line + r;
+ if (r == 0) return q - buffer;
+ p = line;
+ }
+ l = fl_utf8len1(*p);
+ if (p + l > endline) {
+ memmove(line, p, endline - p);
+ endline -= (p - line);
+ r = fread(endline, 1, sline - (endline - line), fp);
+ endline += r;
+ p = line;
+ if (endline - line < l) break;
+ }
+ while ( l > 0) {
+ u = fl_utf8decode(p, p+l, &lp);
+ lq = fl_utf8encode(u, multibyte);
+ if (lp != l || lq != l) *input_was_changed = true;
+ if (q + lq > buffer + buflen) {
+ memmove(line, p, endline - p);
+ endline -= (p - line);
+ return q - buffer;
+ }
+ memcpy(q, multibyte, lq);
+ q += lq;
+ p += lp;
+ l -= lp;
+ }
+ }
+ memmove(line, p, endline - p);
+ endline -= (p - line);
+ return q - buffer;
+}
+
+const char *Fl_Text_Buffer::file_encoding_warning_message =
+"Displayed text contains the UTF-8 recoding\n"
+"of the input file which was not UTF-8 encoded.\n"
+"Some changes may have occurred.";
+
+/*
Insert text from a file.
- Unicode safe. Input must be correct UTF-8!
+ Unicode safe. Input must be correct UTF-8 or is interpreted as CP1252.
+ Output is correct UTF-8.
*/
-int Fl_Text_Buffer::insertfile(const char *file, int pos, int /*buflen*/) {
+ int Fl_Text_Buffer::insertfile(const char *file, int pos, int buflen)
+{
FILE *fp;
if (!(fp = fl_fopen(file, "r")))
return 1;
- fseek(fp, 0, SEEK_END);
- size_t filesize = ftell(fp);
- fseek(fp, 0, SEEK_SET);
- if (!filesize) return 0;
- char *buffer = new char[filesize+1];
- // Note: If we read Windows text files in text mode, then Windows
- // strips the <CR>'s from the text. Hence, rsize < filesize !
- size_t rsize = fread(buffer, 1, filesize, fp);
- if (rsize > 0) {
- buffer[rsize] = (char) 0;
+ char *buffer = new char[buflen + 1];
+ char *endline, line[100];
+ int l;
+ int input_was_changed = false;
+ int saved_was_modified = was_modified;
+ endline = line;
+ while (true) {
+#ifdef EIGHT_BIT_ENCODING
+ // example of 8-bit encoding: CP1252
+ l = eightbit_input_filter(buffer, buflen, line, sizeof(line), endline,
cp1252toucs, fp);
+#else
+ l = utf8_input_filter(buffer, buflen, line, sizeof(line), endline,
+ fp, &input_was_changed);
+#endif
+ if (l == 0) break;
+ buffer[l] = 0;
insert(pos, buffer);
- }
+ pos += l;
+ }
int e = ferror(fp) ? 2 : 0;
fclose(fp);
delete[]buffer;
+ if (input_was_changed && !e) fl_alert(file_encoding_warning_message);
+ was_modified = saved_was_modified || input_was_changed;
return e;
}
Index: test/editor.cxx
===================================================================
--- test/editor.cxx (revision 7966)
+++ test/editor.cxx (working copy)
@@ -487,11 +487,12 @@
void load_file(const char *newfile, int ipos) {
loading = 1;
int insert = (ipos != -1);
- changed = insert;
+ //changed = insert;
if (!insert) strcpy(filename, "");
int r;
if (!insert) r = textbuf->loadfile(newfile);
else r = textbuf->insertfile(newfile, ipos);
+ changed = textbuf->was_modified;
if (r)
fl_alert("Error reading from file \'%s\':\n%s.", newfile, strerror(errno));
else
_______________________________________________
fltk-bugs mailing list
[email protected]
http://lists.easysw.com/mailman/listinfo/fltk-bugs