Well, that took a while but I have the contents of text nodes now showing if
you are at db6.
- I bring in the tidybuffio.h so that I can make a TidyBuffer
- I bring in a TidyBuffer because the tidyNodeGetText routine puts its output
in one.
- Unlike tidyDoc, it seems as though in order to free a TidyBuffer, you must
test for null.
Otherwise the program seg faults, based on a thread I was reading.
I have a phrasing at the end of the routine for how to know whether it is safe
to call tidyBufFree.
I test the .size. I'm not sure if this is correct- does anyone know? It
wouldn't let me compare the TidyBuffer object with null.
- The mechanics of the node traversal, I brought in from the Tidy example as
well as other snippets of work.
So I introduced two routines from the sample code: dumpBody and dumpNode.
I placed them just after encodeTags.
- Why do they hardcode those first several cases when they are switching on the
node name?
I assume it is something to do with the laws of the W3C spec?
Like, are these branches that terminate, so you don't have to worry about
additional levels?
Anyway, I left them alone.
Over to you! I'm sure this will raise some fertile issues in what to do from
here.
I hope there will not be \r introduced into this attachment. If there is, the
email client is ruled out as a culprit, and I'll worry about other causes.
thanks
Kevin
diff -Naur 1/edbrowse-master/src/html.c 2/edbrowse-master/src/html.c
--- 1/edbrowse-master/src/html.c 2015-08-27 14:18:35.000000000 -0700
+++ 2/edbrowse-master/src/html.c 2015-08-28 17:59:09.092626328 -0700
@@ -5,7 +5,7 @@
#include "eb.h"
#include "tidy.h"
-
+#include "tidybuffio.h"
#define handlerPresent(obj, name) (has_property(obj, name) == EJ_PROP_FUNCTION)
static TidyDoc tdoc;
@@ -1695,6 +1695,10 @@
showTidyMessages = false;
tidySetCharEncoding(tdoc, (cons_utf8 ? "utf8" : "latin1"));
tidyParseString(tdoc, html);
+ if (debugLevel >= 5) {
+ tidyCleanAndRepair(tdoc);
+ dumpBody(tdoc);
+ }
ns = initString(&ns_l);
preamble = initString(&preamble_l);
@@ -2641,6 +2645,88 @@
return ns;
} /* encodeTags */
+void dumpBody(TidyDoc tdoc)
+{
+/* just for debugging - we only reach this routine at db5 or above */
+ dumpNode(tidyGetBody(tdoc), 0);
+}
+
+void dumpNode(TidyNode tnod, int indent)
+{
+/* just for debugging - we only reach this routine at db5 or above */
+ TidyNode child;
+ TidyBuffer tnv = { 0 }; /* text-node value */
+ for (child = tidyGetChild(tnod); child; child = tidyGetNext(child)) {
+ ctmbstr name;
+ tidyBufClear(&tnv);
+ switch (tidyNodeGetType(child)) {
+ case TidyNode_Root:
+ name = "Root";
+ break;
+ case TidyNode_DocType:
+ name = "DOCTYPE";
+ break;
+ case TidyNode_Comment:
+ name = "Comment";
+ break;
+ case TidyNode_ProcIns:
+ name = "Processing Instruction";
+ break;
+ case TidyNode_Text:
+ name = "Text";
+ break;
+ case TidyNode_CDATA:
+ name = "CDATA";
+ break;
+ case TidyNode_Section:
+ name = "XML Section";
+ break;
+ case TidyNode_Asp:
+ name = "ASP";
+ break;
+ case TidyNode_Jste:
+ name = "JSTE";
+ break;
+ case TidyNode_Php:
+ name = "PHP";
+ break;
+ case TidyNode_XmlDecl:
+ name = "XML Declaration";
+ break;
+ case TidyNode_Start:
+ case TidyNode_End:
+ case TidyNode_StartEnd:
+ default:
+ name = tidyNodeGetName(child);
+ break;
+ }
+ assert(name != NULL);
+ printf("Node(%d): %s\n", (indent / 4), ((char *)name));
+ if (debugLevel >= 6) {
+/* the ifs could be combined with && */
+ if (strcmp(((char *)name), "Text") == 0) {
+ tidyNodeGetText(tdoc, child, &tnv);
+ printf("Text: %s", tnv.bp);
+/* no trailing newline because it appears that there already is one */
+ }
+ }
+
+/* Get the first attribute for all nodes */
+ TidyAttr tattr = tidyAttrFirst(child);
+ while (tattr != NULL) {
+/* Print the node and its attribute */
+ printf("Attribute: %s = %s\n", tidyAttrName(tattr),
+ tidyAttrValue(tattr));
+/* Get the next attribute */
+ tattr = tidyAttrNext(tattr);
+ }
+ dumpNode(child, indent + 4);
+ }
+ if (tnv.size > 0) {
+ tidyBufFree(&tnv);
+ }
+}
+
void preFormatCheck(int tagno, bool * pretag, bool * slash)
{
const struct htmlTag *t;
_______________________________________________
Edbrowse-dev mailing list
[email protected]
http://lists.the-brannons.com/mailman/listinfo/edbrowse-dev