Please support meta charset attribute of HTML5. http://www.w3.org/TR/html5/semantics.html#the-meta-element
Because lynx does not recognize charset attribute on the meta element, following site which contains <meta charset="UTF-8"> is not shown correctly. http://vim-jp.org ("Charset: euc-jp" in information page shown by '=' key. "Display character set" option is "Japanese (EUC-JP)"). Screen captures: Lynx 2.8.8dev.9: http://www1.interq.or.jp/~deton/lynx/meta-charset-NG.png Lynx 2.8.8dev.9 with patch: http://www1.interq.or.jp/~deton/lynx/meta-charset-OK.png Lynx.trace log: SGML: Unknown attribute charset for tag META SGML: Attribute value UTF-8 ***ignored SGML: Start <META> LYHandleMETA: HTTP-EQUIV="(null)" NAME="(null)" CONTENT="(null)" Here is a patch to support meta charset attribute of HTML5. diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/hdr_HTMLDTD.h ./WWW/Library/Implementation/hdr_HTMLDTD.h --- ../lynx2-8-8.orig/WWW/Library/Implementation/hdr_HTMLDTD.h 2011-06-13 09:18:54.000000000 +0900 +++ ./WWW/Library/Implementation/hdr_HTMLDTD.h 2011-09-27 20:53:44.000000000 +0900 @@ -670,11 +670,12 @@ extern "C" { #define HTML_MATH_TITLE 7 #define HTML_MATH_ATTRIBUTES 8 -#define HTML_META_CONTENT 0 -#define HTML_META_HTTP_EQUIV 1 -#define HTML_META_NAME 2 -#define HTML_META_SCHEME 3 -#define HTML_META_ATTRIBUTES 4 +#define HTML_META_CHARSET 0 +#define HTML_META_CONTENT 1 +#define HTML_META_HTTP_EQUIV 2 +#define HTML_META_NAME 3 +#define HTML_META_SCHEME 4 +#define HTML_META_ATTRIBUTES 5 #define HTML_NEXTID_N 0 #define HTML_NEXTID_ATTRIBUTES 1 diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/src0_HTMLDTD.h ./WWW/Library/Implementation/src0_HTMLDTD.h --- ../lynx2-8-8.orig/WWW/Library/Implementation/src0_HTMLDTD.h 2011-06-13 09:18:54.000000000 +0900 +++ ./WWW/Library/Implementation/src0_HTMLDTD.h 2011-09-27 20:53:44.000000000 +0900 @@ -847,6 +847,7 @@ static const AttrType MATH_attr_type[] = }; static const attr META_attr_list[] = { + { "CHARSET" T(N) }, { "CONTENT" T(N) }, { "HTTP-EQUIV" T(N) }, { "NAME" T(N) }, @@ -1794,6 +1795,7 @@ static const attr MATH_attr[] = { }; static const attr META_attr[] = { /* META attributes */ + { "CHARSET" T(N) }, { "CONTENT" T(N) }, { "HTTP-EQUIV" T(N) }, { "NAME" T(N) }, diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/src0_HTMLDTD.txt ./WWW/Library/Implementation/src0_HTMLDTD.txt --- ../lynx2-8-8.orig/WWW/Library/Implementation/src0_HTMLDTD.txt 2011-06-13 09:18:54.000000000 +0900 +++ ./WWW/Library/Implementation/src0_HTMLDTD.txt 2011-09-27 20:10:29.000000000 +0900 @@ -336,11 +336,12 @@ 0:0:BOX 1:0:CLEAR 40:META - 4 attributes: - 0:0:CONTENT - 1:0:HTTP-EQUIV - 2:0:NAME - 3:0:SCHEME + 5 attributes: + 0:0:CHARSET + 1:0:CONTENT + 2:0:HTTP-EQUIV + 3:0:NAME + 4:0:SCHEME 41:NEXTID 1 attributes: 0:0:N @@ -2509,11 +2510,12 @@ flags: 75:META justify - 4 attributes: - 0:0:CONTENT - 1:0:HTTP-EQUIV - 2:0:NAME - 3:0:SCHEME + 5 attributes: + 0:0:CHARSET + 1:0:CONTENT + 2:0:HTTP-EQUIV + 3:0:NAME + 4:0:SCHEME 1 attr_types META contents: SGML_EMPTY diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/src1_HTMLDTD.h ./WWW/Library/Implementation/src1_HTMLDTD.h --- ../lynx2-8-8.orig/WWW/Library/Implementation/src1_HTMLDTD.h 2011-06-13 09:18:54.000000000 +0900 +++ ./WWW/Library/Implementation/src1_HTMLDTD.h 2011-09-27 20:53:44.000000000 +0900 @@ -847,6 +847,7 @@ static const AttrType MATH_attr_type[] = }; static const attr META_attr_list[] = { + { "CHARSET" T(N) }, { "CONTENT" T(N) }, { "HTTP-EQUIV" T(N) }, { "NAME" T(N) }, @@ -1794,6 +1795,7 @@ static const attr MATH_attr[] = { }; static const attr META_attr[] = { /* META attributes */ + { "CHARSET" T(N) }, { "CONTENT" T(N) }, { "HTTP-EQUIV" T(N) }, { "NAME" T(N) }, diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/src1_HTMLDTD.txt ./WWW/Library/Implementation/src1_HTMLDTD.txt --- ../lynx2-8-8.orig/WWW/Library/Implementation/src1_HTMLDTD.txt 2011-06-13 09:18:54.000000000 +0900 +++ ./WWW/Library/Implementation/src1_HTMLDTD.txt 2011-09-27 20:11:33.000000000 +0900 @@ -336,11 +336,12 @@ 0:0:BOX 1:0:CLEAR 40:META - 4 attributes: - 0:0:CONTENT - 1:0:HTTP-EQUIV - 2:0:NAME - 3:0:SCHEME + 5 attributes: + 0:0:CHARSET + 1:0:CONTENT + 2:0:HTTP-EQUIV + 3:0:NAME + 4:0:SCHEME 41:NEXTID 1 attributes: 0:0:N @@ -2509,11 +2510,12 @@ flags: 75:META justify - 4 attributes: - 0:0:CONTENT - 1:0:HTTP-EQUIV - 2:0:NAME - 3:0:SCHEME + 5 attributes: + 0:0:CHARSET + 1:0:CONTENT + 2:0:HTTP-EQUIV + 3:0:NAME + 4:0:SCHEME 1 attr_types META contents: SGML_EMPTY diff -urp ../lynx2-8-8.orig/src/LYCharUtils.c ./src/LYCharUtils.c --- ../lynx2-8-8.orig/src/LYCharUtils.c 2011-06-13 09:18:54.000000000 +0900 +++ ./src/LYCharUtils.c 2011-09-29 07:21:32.000000000 +0900 @@ -2029,7 +2029,7 @@ void LYHandleMETA(HTStructured * me, con const char **value, char **include GCC_UNUSED) { - char *http_equiv = NULL, *name = NULL, *content = NULL; + char *http_equiv = NULL, *name = NULL, *content = NULL, *charset = NULL; char *href = NULL, *id_string = NULL, *temp = NULL; char *cp, *cp0, *cp1 = NULL; int url_type = 0; @@ -2079,141 +2079,49 @@ void LYHandleMETA(HTStructured * me, con FREE(content); } } + if (present[HTML_META_CHARSET] && + non_empty(value[HTML_META_CHARSET])) { + StrAllocCopy(charset, value[HTML_META_CHARSET]); + convert_to_spaces(charset, TRUE); + LYUCTranslateHTMLString(&charset, me->tag_charset, me->tag_charset, + NO, NO, YES, st_other); + if (*charset == '\0') { + FREE(charset); + } + } CTRACE((tfp, - "LYHandleMETA: HTTP-EQUIV=\"%s\" NAME=\"%s\" CONTENT=\"%s\"\n", + "LYHandleMETA: HTTP-EQUIV=\"%s\" NAME=\"%s\" CONTENT=\"%s\" CHARSET=\"%s\"\n", NONNULL(http_equiv), NONNULL(name), - NONNULL(content))); + NONNULL(content), + NONNULL(charset))); /* - * Make sure we have META name/value pairs to handle. - FM + * Check for a text/html Content-Type with a charset directive, if we + * didn't already set the charset via a server's header. - AAC & FM */ - if (!(http_equiv || name) || !content) - goto free_META_copies; - - /* - * Check for a no-cache Pragma - * or Cache-Control directive. - FM - */ - if (!strcasecomp(NonNull(http_equiv), "Pragma") || - !strcasecomp(NonNull(http_equiv), "Cache-Control")) { - LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset, - NO, NO, YES, st_other); - if (!strcasecomp(content, "no-cache")) { - me->node_anchor->no_cache = TRUE; - HText_setNoCache(me->text); - } - - /* - * If we didn't get a Cache-Control MIME header, and the META has one, - * convert to lowercase, store it in the anchor element, and if we - * haven't yet set no_cache, check whether we should. - FM - */ - if ((!me->node_anchor->cache_control) && - !strcasecomp(NonNull(http_equiv), "Cache-Control")) { - LYLowerCase(content); - StrAllocCopy(me->node_anchor->cache_control, content); - if (me->node_anchor->no_cache == FALSE) { - cp0 = content; - while ((cp = strstr(cp0, "no-cache")) != NULL) { - cp += 8; - while (*cp != '\0' && WHITE(*cp)) - cp++; - if (*cp == '\0' || *cp == ';') { - me->node_anchor->no_cache = TRUE; - HText_setNoCache(me->text); - break; - } - cp0 = cp; - } - if (me->node_anchor->no_cache == TRUE) - goto free_META_copies; - cp0 = content; - while ((cp = strstr(cp0, "max-age")) != NULL) { - cp += 7; - while (*cp != '\0' && WHITE(*cp)) - cp++; - if (*cp == '=') { - cp++; - while (*cp != '\0' && WHITE(*cp)) - cp++; - if (isdigit(UCH(*cp))) { - cp0 = cp; - while (isdigit(UCH(*cp))) - cp++; - if (*cp0 == '0' && cp == (cp0 + 1)) { - me->node_anchor->no_cache = TRUE; - HText_setNoCache(me->text); - break; - } - } - } - cp0 = cp; - } - } - } - - /* - * Check for an Expires directive. - FM - */ - } else if (!strcasecomp(NonNull(http_equiv), "Expires")) { - /* - * If we didn't get an Expires MIME header, store it in the anchor - * element, and if we haven't yet set no_cache, check whether we - * should. Note that we don't accept a Date header via META tags, - * because it's likely to be untrustworthy, but do check for a Date - * header from a server when making the comparison. - FM - */ - LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset, - NO, NO, YES, st_other); - StrAllocCopy(me->node_anchor->expires, content); - if (me->node_anchor->no_cache == FALSE) { - if (!strcmp(content, "0")) { - /* - * The value is zero, which we treat as an absolute no-cache - * directive. - FM - */ - me->node_anchor->no_cache = TRUE; - HText_setNoCache(me->text); - } else if (me->node_anchor->date != NULL) { - /* - * We have a Date header, so check if the value is less than or - * equal to that. - FM - */ - if (LYmktime(content, TRUE) <= - LYmktime(me->node_anchor->date, TRUE)) { - me->node_anchor->no_cache = TRUE; - HText_setNoCache(me->text); - } - } else if (LYmktime(content, FALSE) == 0) { - /* - * We don't have a Date header, and the value is in past for - * us. - FM - */ - me->node_anchor->no_cache = TRUE; - HText_setNoCache(me->text); - } - } - - /* - * Check for a text/html Content-Type with a charset directive, if we - * didn't already set the charset via a server's header. - AAC & FM - */ - } else if (isEmpty(me->node_anchor->charset) && - !strcasecomp(NonNull(http_equiv), "Content-Type")) { + if (isEmpty(me->node_anchor->charset) && + (charset || + !strcasecomp(NonNull(http_equiv), "Content-Type") && content)) { LYUCcharset *p_in = NULL; LYUCcharset *p_out = NULL; - LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset, - NO, NO, YES, st_other); - LYLowerCase(content); + if (charset) { + LYLowerCase(charset); + } else { + LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset, + NO, NO, YES, st_other); + LYLowerCase(content); + } - if ((cp1 = strstr(content, "charset")) != NULL) { + if ((cp1 = charset) != NULL || + (cp1 = strstr(content, "charset")) != NULL) { BOOL chartrans_ok = NO; char *cp3 = NULL, *cp4; int chndl; - cp1 += 7; + if (!charset) + cp1 += 7; while (*cp1 == ' ' || *cp1 == '=' || *cp1 == '"') cp1++; @@ -2378,6 +2286,117 @@ void LYHandleMETA(HTStructured * me, con * Set the kcode element based on the charset. - FM */ HText_setKcode(me->text, me->node_anchor->charset, p_in); + } + + /* + * Make sure we have META name/value pairs to handle. - FM + */ + if (!(http_equiv || name) || !content) + goto free_META_copies; + + /* + * Check for a no-cache Pragma + * or Cache-Control directive. - FM + */ + if (!strcasecomp(NonNull(http_equiv), "Pragma") || + !strcasecomp(NonNull(http_equiv), "Cache-Control")) { + LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset, + NO, NO, YES, st_other); + if (!strcasecomp(content, "no-cache")) { + me->node_anchor->no_cache = TRUE; + HText_setNoCache(me->text); + } + + /* + * If we didn't get a Cache-Control MIME header, and the META has one, + * convert to lowercase, store it in the anchor element, and if we + * haven't yet set no_cache, check whether we should. - FM + */ + if ((!me->node_anchor->cache_control) && + !strcasecomp(NonNull(http_equiv), "Cache-Control")) { + LYLowerCase(content); + StrAllocCopy(me->node_anchor->cache_control, content); + if (me->node_anchor->no_cache == FALSE) { + cp0 = content; + while ((cp = strstr(cp0, "no-cache")) != NULL) { + cp += 8; + while (*cp != '\0' && WHITE(*cp)) + cp++; + if (*cp == '\0' || *cp == ';') { + me->node_anchor->no_cache = TRUE; + HText_setNoCache(me->text); + break; + } + cp0 = cp; + } + if (me->node_anchor->no_cache == TRUE) + goto free_META_copies; + cp0 = content; + while ((cp = strstr(cp0, "max-age")) != NULL) { + cp += 7; + while (*cp != '\0' && WHITE(*cp)) + cp++; + if (*cp == '=') { + cp++; + while (*cp != '\0' && WHITE(*cp)) + cp++; + if (isdigit(UCH(*cp))) { + cp0 = cp; + while (isdigit(UCH(*cp))) + cp++; + if (*cp0 == '0' && cp == (cp0 + 1)) { + me->node_anchor->no_cache = TRUE; + HText_setNoCache(me->text); + break; + } + } + } + cp0 = cp; + } + } + } + + /* + * Check for an Expires directive. - FM + */ + } else if (!strcasecomp(NonNull(http_equiv), "Expires")) { + /* + * If we didn't get an Expires MIME header, store it in the anchor + * element, and if we haven't yet set no_cache, check whether we + * should. Note that we don't accept a Date header via META tags, + * because it's likely to be untrustworthy, but do check for a Date + * header from a server when making the comparison. - FM + */ + LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset, + NO, NO, YES, st_other); + StrAllocCopy(me->node_anchor->expires, content); + if (me->node_anchor->no_cache == FALSE) { + if (!strcmp(content, "0")) { + /* + * The value is zero, which we treat as an absolute no-cache + * directive. - FM + */ + me->node_anchor->no_cache = TRUE; + HText_setNoCache(me->text); + } else if (me->node_anchor->date != NULL) { + /* + * We have a Date header, so check if the value is less than or + * equal to that. - FM + */ + if (LYmktime(content, TRUE) <= + LYmktime(me->node_anchor->date, TRUE)) { + me->node_anchor->no_cache = TRUE; + HText_setNoCache(me->text); + } + } else if (LYmktime(content, FALSE) == 0) { + /* + * We don't have a Date header, and the value is in past for + * us. - FM + */ + me->node_anchor->no_cache = TRUE; + HText_setNoCache(me->text); + } + } /* * Check for a Refresh directive. - FM @@ -2566,6 +2585,7 @@ void LYHandleMETA(HTStructured * me, con FREE(http_equiv); FREE(name); FREE(content); + FREE(charset); } /* _______________________________________________ Lynx-dev mailing list [email protected] https://lists.nongnu.org/mailman/listinfo/lynx-dev
