[git:edid-decode/master] edid-decode: translate cp437 and ISO 8859-1 to UTF-8

Hans Verkuil Sat, 10 Aug 2024 01:49:13 -0700

This is an automatic generated email to let you know that the following patch 
were queued:


Subject: edid-decode: translate cp437 and ISO 8859-1 to UTF-8
Author:  Hans Verkuil <hverkuil-ci...@xs4all.nl>
Date:    Sat Aug 10 10:46:53 2024 +0200

If the new --utf8 option is used, then the various embedded strings
in EDID are printed as UTF-8 by converting the cp437 (base block)
or ISO 8859-1 (DisplayID block) characters to UTF-8.

Signed-off-by: Hans Verkuil <hverkuil-ci...@xs4all.nl>

 edid-decode.cpp           |  5 +++
 edid-decode.h             |  4 ++-
 parse-base-block.cpp      | 80 ++++++++++++++++++++++++++++++++++++++++-------
 parse-displayid-block.cpp |  2 +-
 4 files changed, 77 insertions(+), 14 deletions(-)

---

diff --git a/edid-decode.cpp b/edid-decode.cpp
index 863364415ff2..ff8f4a7fe48c 100644
--- a/edid-decode.cpp
+++ b/edid-decode.cpp
@@ -54,6 +54,7 @@ enum Option {
        OptPhysicalAddress = 'P',
        OptSkipHexDump = 's',
        OptShortTimings = 'S',
+       OptUTF8 = 'u',
        OptV4L2Timings = 'V',
        OptXModeLineTimings = 'X',
        OptSkipSHA = 128,
@@ -91,6 +92,7 @@ static struct option long_options[] = {
        { "skip-sha", no_argument, 0, OptSkipSHA },
        { "hide-serial-numbers", no_argument, 0, OptHideSerialNumbers },
        { "replace-unique-ids", no_argument, 0, OptReplaceUniqueIDs },
+       { "utf8", no_argument, 0, OptUTF8 },
        { "version", no_argument, 0, OptVersion },
        { "check-inline", no_argument, 0, OptCheckInline },
        { "check", no_argument, 0, OptCheck },
@@ -151,6 +153,7 @@ static void usage(void)
               "  --skip-sha            Skip the SHA report.\n"
               "  --hide-serial-numbers Hide serial numbers with '...'.\n"
               "  --replace-unique-ids  Replace unique IDs (serial numbers, 
dates, Container IDs) with fixed values.\n"
+              "  -u, --utf8            Convert strings in EDIDs to UTF-8.\n"
               "  --version             Show the edid-decode version (SHA).\n"
               "  --diagonal <inches>   Set the display's diagonal in inches.\n"
               "  --std <byte1>,<byte2> Show the standard timing represented by 
these two bytes.\n"
@@ -1475,6 +1478,8 @@ int edid_state::parse_edid()
        hide_serial_numbers = options[OptHideSerialNumbers];
        replace_unique_ids = options[OptReplaceUniqueIDs];
 
+       to_utf8 = options[OptUTF8];
+
        preparse_base_block(edid);
        if (replace_unique_ids)
                replace_checksum(edid, EDID_PAGE_SIZE);
diff --git a/edid-decode.h b/edid-decode.h
index ca6867db1289..bda212d6f934 100644
--- a/edid-decode.h
+++ b/edid-decode.h
@@ -608,7 +608,9 @@ const struct timings *find_hdmi_vic_id(unsigned char 
hdmi_vic);
 const struct timings *cta_close_match_to_vic(const timings &t, unsigned &vic);
 bool cta_matches_vic(const timings &t, unsigned &vic);
 unsigned char hdmi_vic_to_vic(unsigned char hdmi_vic);
-char *extract_string(const unsigned char *x, unsigned len);
+
+extern bool to_utf8;
+char *extract_string(const unsigned char *x, unsigned len, bool is_cp437);
 
 #define oneoui(c,k,n) const unsigned kOUI_##k = __LINE__<<12;
 #include "oui.h"
diff --git a/parse-base-block.cpp b/parse-base-block.cpp
index e97dad42539b..a2f0e7408ef8 100644
--- a/parse-base-block.cpp
+++ b/parse-base-block.cpp
@@ -489,11 +489,55 @@ void edid_state::detailed_cvt_descriptor(const char 
*prefix, const unsigned char
        }
 }
 
+// Base Block uses Code Page 437, unprintable characters are represented by â¯
+static const char *cp437[256] = {
+"â¯", "âº", "â»", "â¥", "â¦", "â£", "â ", "â¢", "â", "â", "â", 
"â", "â", "âª", "â«", "â¼",
+"âº", "â", "â", "â¼", "Â¶", "Â§", "â¬", "â¨", "â", "â", "â", 
"â", "â", "â", "â²", "â¼",
+" ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", 
"/",
+"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?",
+"@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
+"P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", 
"_",
+"`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
+"p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", 
"â",
+"Ã", "Ã¼", "Ã©", "Ã¢", "Ã¤", "Ã ", "Ã¥", "Ã§", "Ãª", "Ã«", "Ã¨", "Ã¯", "Ã®", 
"Ã¬", "Ã", "Ã",
+"Ã", "Ã¦", "Ã", "Ã´", "Ã¶", "Ã²", "Ã»", "Ã¹", "Ã¿", "Ã", "Ã", "Â¢", "Â£", 
"Â¥", "â§", "Æ",
+"Ã¡", "Ã", "Ã³", "Ãº", "Ã±", "Ã", "Âª", "Âº", "Â¿", "â", "Â¬", "Â½", "Â¼", 
"Â¡", "Â«", "Â»",
+"â", "â", "â", "â", "â¤", "â¡", "â¢", "â", "â", "â£", "â", 
"â", "â", "â", "â", "â",
+"â", "â´", "â¬", "â", "â", "â¼", "â", "â", "â", "â", "â©", 
"â¦", "â ", "â", "â¬", "â§",
+"â¨", "â¤", "â¥", "â", "â", "â", "â", "â«", "âª", "â", "â", 
"â", "â", "â", "â", "â",
+"Î±", "Ã", "Î", "Ï", "Î£", "Ï", "Âµ", "Ï", "Î¦", "Î", "Î©", "Î´", "â", 
"Ï", "Îµ", "â©",
+"â¡", "Â±", "â¥", "â¤", "â ", "â¡", "Ã·", "â", "Â°", "â", "Â·", 
"â", "â¿", "Â²", "â ", "â¯"
+};
+
+// DisplayID uses ISO 8859-1, unprintable chararcters are represented by â¯
+static const char *ascii[256] = {
+"â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", 
"â¯", "â¯", "â¯", "â¯", "â¯",
+"â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", 
"â¯", "â¯", "â¯", "â¯", "â¯",
+" ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", 
"/",
+"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?",
+"@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
+"P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", 
"_",
+"`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
+"p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", 
"â¯",
+"â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", 
"â¯", "â¯", "â¯", "â¯", "â¯",
+"â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", "â¯", 
"â¯", "â¯", "â¯", "â¯", "â¯",
+"â¯", "Â¡", "Â¢", "Â£", "Â¤", "Â¥", "Â¦", "Â§", "Â¨", "Â©", "Âª", "Â«", "Â¬", 
"â¯", "Â®", "Â¯",
+"Â°", "Â±", "Â²", "Â³", "Â´", "Âµ", "Â¶", "Â·", "Â¸", "Â¹", "Âº", "Â»", "Â¼", 
"Â½", "Â¾", "Â¿",
+"Ã", "Ã", "Ã", "Ã", "Ã", "Ã", "Ã", "Ã", "Ã", "Ã", "Ã", "Ã", "Ã", 
"Ã", "Ã", "Ã",
+"Ã", "Ã", "Ã", "Ã", "Ã", "Ã", "Ã", "Ã", "Ã", "Ã", "Ã", "Ã", "Ã", 
"Ã", "Ã", "Ã",
+"Ã ", "Ã¡", "Ã¢", "Ã£", "Ã¤", "Ã¥", "Ã¦", "Ã§", "Ã¨", "Ã©", "Ãª", "Ã«", "Ã¬", 
"Ã", "Ã®", "Ã¯",
+"Ã°", "Ã±", "Ã²", "Ã³", "Ã´", "Ãµ", "Ã¶", "Ã·", "Ã¸", "Ã¹", "Ãº", "Ã»", "Ã¼", 
"Ã½", "Ã¾", "Ã¿"
+};
+
+bool to_utf8 = false;
+
 /* extract a string from a detailed subblock, checking for termination */
-char *extract_string(const unsigned char *x, unsigned len)
+char *extract_string(const unsigned char *x, unsigned len, bool is_cp437)
 {
-       static char s[EDID_PAGE_SIZE];
+       static char s[1025];
+       const char **conv = is_cp437 ? cp437 : ascii;
        bool seen_newline = false;
+       bool added_space = false;
        unsigned i;
 
        memset(s, 0, sizeof(s));
@@ -512,8 +556,9 @@ char *extract_string(const unsigned char *x, unsigned len)
                        seen_newline = true;
                        if (!i)
                                fail("Empty string.\n");
-                       else if (s[i - 1] == 0x20)
+                       else if (added_space)
                                fail("One or more trailing spaces before 
newline.\n");
+                       added_space = false;
                } else if (!x[i]) {
                        // While incorrect, a \0 is often used to end the string
                        fail("NUL byte at position %u.\n", i);
@@ -524,15 +569,26 @@ char *extract_string(const unsigned char *x, unsigned len)
                        fail("0xff byte at position %u.\n", i);
                        return s;
                } else if (!non_ascii) {
-                       s[i] = x[i];
+                       added_space = x[i] == ' ';
+                       if (to_utf8)
+                               strcat(s, conv[x[i]]);
+                       else
+                               s[i] = x[i];
                } else {
-                       warn("Non-ASCII character 0x%02x at position %u, can 
cause problems.\n",
-                            x[i], i);
-                       s[i] = '.';
+                       if (to_utf8) {
+                               warn("Non-ASCII character 0x%02x (%s) at 
position %u, can cause problems.\n",
+                                    x[i], conv[x[i]], i);
+                               strcat(s, conv[x[i]]);
+                       } else {
+                               warn("Non-ASCII character 0x%02x at position 
%u, can cause problems.\n",
+                                    x[i], i);
+                               s[i] = '.';
+                       }
+                       added_space = false;
                }
        }
        /* Does the string end with a space? */
-       if (!seen_newline && s[len - 1] == 0x20)
+       if (!seen_newline && added_space)
                fail("No newline, but one or more trailing spaces.\n");
 
        return s;
@@ -1134,7 +1190,7 @@ bool edid_state::preparse_detailed_block(unsigned char *x)
                break;
        case 0xff:
                data_block = "Display Product Serial Number";
-               serial_strings.push_back(extract_string(x + 5, 13));
+               serial_strings.push_back(extract_string(x + 5, 13, true));
                data_block.clear();
                if (replace_unique_ids) {
                        // Replace with 123456
@@ -1281,7 +1337,7 @@ void edid_state::detailed_block(const unsigned char *x)
        case 0xfc:
                data_block = "Display Product Name";
                base.has_name_descriptor = 1;
-               printf("    %s: '%s'\n", data_block.c_str(), extract_string(x + 
5, 13));
+               printf("    %s: '%s'\n", data_block.c_str(), extract_string(x + 
5, 13, true));
                return;
        case 0xfd:
                detailed_display_range_limits(x);
@@ -1290,7 +1346,7 @@ void edid_state::detailed_block(const unsigned char *x)
                if (!base.has_spwg || base.detailed_block_cnt < 3) {
                        data_block = "Alphanumeric Data String";
                        printf("    %s: '%s'\n", data_block.c_str(),
-                              extract_string(x + 5, 13));
+                              extract_string(x + 5, 13, true));
                        return;
                }
                if (base.detailed_block_cnt == 3) {
@@ -1303,7 +1359,7 @@ void edid_state::detailed_block(const unsigned char *x)
                                fail("Invalid PC Maker P/N length.\n");
                        printf("      SPWG PC Maker P/N: '%s'\n", buf);
                        printf("      SPWG LCD Supplier EEDID Revision: 
%hhu\n", x[10]);
-                       printf("      SPWG Manufacturer P/N: '%s'\n", 
extract_string(x + 11, 7));
+                       printf("      SPWG Manufacturer P/N: '%s'\n", 
extract_string(x + 11, 7, true));
                } else {
                        data_block = "SPWG Descriptor #4";
                        printf("    %s:\n", data_block.c_str());
diff --git a/parse-displayid-block.cpp b/parse-displayid-block.cpp
index f4c8fc047737..7e8da41d7f5a 100644
--- a/parse-displayid-block.cpp
+++ b/parse-displayid-block.cpp
@@ -563,7 +563,7 @@ void edid_state::parse_displayid_string(const unsigned char 
*x)
 {
        check_displayid_datablock_revision(x[1]);
        if (check_displayid_datablock_length(x))
-               printf("    Text: '%s'\n", extract_string(x + 3, x[2]));
+               printf("    Text: '%s'\n", extract_string(x + 3, x[2], true));
 }
 
 // tag 0x0c

[git:edid-decode/master] edid-decode: translate cp437 and ISO 8859-1 to UTF-8

Reply via email to