Re: [elinks-dev] [0.12 PATCH] HTML: Rewrite parsing of meta refresh

2011-04-27 Thread Kalle Olavi Niemitalo
Kalle Olavi Niemitalo  writes:

> All this is consistent with Debian Iceweasel 3.5.16.

Less consistent than I thought... my patch lost support for
content="42, http://example.org/";, which Iceweasel does support.
I'll add more test cases and perhaps post a revised patch later.


pgpHZiy9ySdgY.pgp
Description: PGP signature
___
elinks-dev mailing list
[email protected]
http://linuxfromscratch.org/mailman/listinfo/elinks-dev


[elinks-dev] [0.12 PATCH] HTML: Rewrite parsing of meta refresh

2011-04-27 Thread Kalle Olavi Niemitalo
The URL in 
can now freely contain spaces and semicolons.  There cannot be other
parameters between the delay and the URL.  If the URL is not quoted,
then it spans to the end of the attribute, except not to trailing
spaces.  If the URL is quoted, then it ends at the first closing
quotation mark.  All this is consistent with Debian Iceweasel 3.5.16.
---
 src/document/html/Makefile   |4 +-
 src/document/html/parse-meta-refresh.c   |   97 
 src/document/html/parse-meta-refresh.h   |   21 +++
 src/document/html/parser.c   |  170 +++---
 src/document/html/test/Makefile  |9 +
 src/document/html/test/parse-meta-refresh-test.c |  174 ++
 src/document/html/test/test-parse-meta-refresh   |3 +
 7 files changed, 325 insertions(+), 153 deletions(-)
 create mode 100644 src/document/html/parse-meta-refresh.c
 create mode 100644 src/document/html/parse-meta-refresh.h
 create mode 100644 src/document/html/test/Makefile
 create mode 100644 src/document/html/test/parse-meta-refresh-test.c
 create mode 100755 src/document/html/test/test-parse-meta-refresh

diff --git a/src/document/html/Makefile b/src/document/html/Makefile
index 5f7510b..91e7e08 100644
--- a/src/document/html/Makefile
+++ b/src/document/html/Makefile
@@ -1,7 +1,7 @@
 top_builddir=../../..
 include $(top_builddir)/Makefile.config
 
-SUBDIRS = parser
-OBJS   = frames.o parser.o renderer.o tables.o
+SUBDIRS = parser test
+OBJS   = frames.o parse-meta-refresh.o parser.o renderer.o tables.o
 
 include $(top_srcdir)/Makefile.lib
diff --git a/src/document/html/parse-meta-refresh.c 
b/src/document/html/parse-meta-refresh.c
new file mode 100644
index 000..b26135a
--- /dev/null
+++ b/src/document/html/parse-meta-refresh.c
@@ -0,0 +1,97 @@
+/* Parse  */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include 
+#include 
+
+#include "elinks.h"
+
+#include "document/html/parse-meta-refresh.h"
+#include "osdep/ascii.h"
+#include "util/string.h"
+
+#define LWS(c) ((c) == ' ' || (c) == ASCII_TAB)
+
+int
+html_parse_meta_refresh(const unsigned char *content,
+   unsigned long *delay_out,
+   unsigned char **url_out)
+{
+   const unsigned char *end_url = NULL;
+   const unsigned char *scan = content;
+   int negative = 0;
+   const unsigned char *lookahead;
+
+   *url_out = NULL;
+   *delay_out = 0;
+
+   while (LWS(*scan))
+   ++scan;
+
+   if (!*scan)
+   return -1;
+
+
+   /* Is there something that looks vaguely like a number?  */
+   lookahead = scan;
+   if (*lookahead == '-') {
+   negative = 1;
+   ++lookahead;
+   } else if (*lookahead == '+') {
+   ++lookahead;
+   }
+   if (isdigit(*lookahead) || *lookahead == '.') {
+   unsigned long delay = strtoul(lookahead, NULL, 10);
+
+   if (negative && delay != 0)
+   return -1;
+   *delay_out = delay;
+
+   while (isdigit(*lookahead) || *lookahead == '.')
+   ++lookahead;
+   scan = lookahead;
+   }
+
+   while (LWS(*scan) || *scan == ';')
+   ++scan;
+
+   /* Skip "URL=" if any.  With at least one equals sign,
+* and optional spaces.  */
+   if ((scan[0] == 'U' || scan[0] == 'u')
+   && (scan[1] == 'R' || scan[1] == 'r')
+   && (scan[2] == 'L' || scan[2] == 'l')) {
+   lookahead = scan + 3;
+
+   while (LWS(*lookahead))
+   ++lookahead;
+   if (*lookahead == '=') {
+   while (LWS(*lookahead) || *lookahead == '=')
+   ++lookahead;
+   scan = lookahead;
+   }
+   }
+
+   if (*scan == '"' || *scan == '\'') {
+   unsigned char quote = *scan++;
+
+   end_url = strchr(scan, quote);
+   if (end_url == NULL)
+   end_url = strchr(scan, '\0');
+   } else {
+   end_url = strchr(scan, '\0');
+   while (scan < end_url && LWS(end_url[-1]))
+   --end_url;
+   }
+
+   if (end_url == scan)
+   return 0;
+
+   *url_out = memacpy(scan, end_url - scan);
+   if (*url_out)
+   return 0;
+   else
+   return -1;
+}
diff --git a/src/document/html/parse-meta-refresh.h 
b/src/document/html/parse-meta-refresh.h
new file mode 100644
index 000..d81409d
--- /dev/null
+++ b/src/document/html/parse-meta-refresh.h
@@ -0,0 +1,21 @@
+#ifndef EL__DOCUMENT_HTML_PARSE_META_REFRESH_H
+#define EL__DOCUMENT_HTML_PARSE_META_REFRESH_H
+
+/** Parses a \ element.
+ *
+ * @param[in] content
+ *   The value of the content attribute, with entities already expanded.
+ * @param[out] delay
+ *   How many seconds to wait before