Well, that took a while but I have the contents of text nodes now showing if 
you are at db6.

- I bring in the tidybuffio.h so that I can make a TidyBuffer
- I bring in a TidyBuffer because the tidyNodeGetText routine puts its output 
in one.
- Unlike tidyDoc, it seems as though in order to free a TidyBuffer, you must 
test for null.
Otherwise the program seg faults, based on a thread I was reading.  
I have a phrasing at the end of the routine for how to know whether it is safe 
to call tidyBufFree.
I test the .size.  I'm not sure if this is correct- does anyone know?  It 
wouldn't let me compare the TidyBuffer object with null.
- The mechanics of the node traversal, I brought in from the Tidy example as 
well as other snippets of work.  
So I introduced two routines from the sample code: dumpBody and dumpNode.  
I placed them just after encodeTags.
- Why do they hardcode those first several cases when they are switching on the 
node name?
I assume it is something to do with the laws of the W3C spec?
Like, are these branches that terminate, so you don't have to worry about 
additional levels?
Anyway, I left them alone.

Over to you!  I'm sure this will raise some fertile issues in what to do from 
here.

I hope there will not be \r introduced into this attachment.  If there is, the 
email client is ruled out as a culprit, and I'll worry about other causes.
thanks
Kevin
diff -Naur 1/edbrowse-master/src/html.c 2/edbrowse-master/src/html.c
--- 1/edbrowse-master/src/html.c        2015-08-27 14:18:35.000000000 -0700
+++ 2/edbrowse-master/src/html.c        2015-08-28 17:59:09.092626328 -0700
@@ -5,7 +5,7 @@
 
 #include "eb.h"
 #include "tidy.h"
-
+#include "tidybuffio.h"
 #define handlerPresent(obj, name) (has_property(obj, name) == EJ_PROP_FUNCTION)
 
 static TidyDoc tdoc;
@@ -1695,6 +1695,10 @@
                showTidyMessages = false;
        tidySetCharEncoding(tdoc, (cons_utf8 ? "utf8" : "latin1"));
        tidyParseString(tdoc, html);
+       if (debugLevel >= 5) {
+               tidyCleanAndRepair(tdoc);
+               dumpBody(tdoc);
+       }
 
        ns = initString(&ns_l);
        preamble = initString(&preamble_l);
@@ -2641,6 +2645,88 @@
        return ns;
 }                              /* encodeTags */
 
+void dumpBody(TidyDoc tdoc)
+{
+/* just for debugging - we only reach this routine at db5 or above */
+       dumpNode(tidyGetBody(tdoc), 0);
+}
+
+void dumpNode(TidyNode tnod, int indent)
+{
+/* just for debugging - we only reach this routine at db5 or above */
+       TidyNode child;
+       TidyBuffer tnv = { 0 }; /* text-node value */
+       for (child = tidyGetChild(tnod); child; child = tidyGetNext(child)) {
+               ctmbstr name;
+               tidyBufClear(&tnv);
+               switch (tidyNodeGetType(child)) {
+               case TidyNode_Root:
+                       name = "Root";
+                       break;
+               case TidyNode_DocType:
+                       name = "DOCTYPE";
+                       break;
+               case TidyNode_Comment:
+                       name = "Comment";
+                       break;
+               case TidyNode_ProcIns:
+                       name = "Processing Instruction";
+                       break;
+               case TidyNode_Text:
+                       name = "Text";
+                       break;
+               case TidyNode_CDATA:
+                       name = "CDATA";
+                       break;
+               case TidyNode_Section:
+                       name = "XML Section";
+                       break;
+               case TidyNode_Asp:
+                       name = "ASP";
+                       break;
+               case TidyNode_Jste:
+                       name = "JSTE";
+                       break;
+               case TidyNode_Php:
+                       name = "PHP";
+                       break;
+               case TidyNode_XmlDecl:
+                       name = "XML Declaration";
+                       break;
+               case TidyNode_Start:
+               case TidyNode_End:
+               case TidyNode_StartEnd:
+               default:
+                       name = tidyNodeGetName(child);
+                       break;
+               }
+               assert(name != NULL);
+               printf("Node(%d): %s\n", (indent / 4), ((char *)name));
+               if (debugLevel >= 6) {
+/* the ifs could be combined with && */
+                       if (strcmp(((char *)name), "Text") == 0) {
+                               tidyNodeGetText(tdoc, child, &tnv);
+                               printf("Text: %s", tnv.bp);
+/* no trailing newline because it appears that there already is one */
+                       }
+               }
+
+/* Get the first attribute for all nodes */
+               TidyAttr tattr = tidyAttrFirst(child);
+               while (tattr != NULL) {
+/* Print the node and its attribute */
+                       printf("Attribute: %s = %s\n", tidyAttrName(tattr),
+                              tidyAttrValue(tattr));
+/* Get the next attribute */
+                       tattr = tidyAttrNext(tattr);
+               }
+               dumpNode(child, indent + 4);
+       }
+       if (tnv.size > 0) {
+               tidyBufFree(&tnv);
+       }
+}
+
 void preFormatCheck(int tagno, bool * pretag, bool * slash)
 {
        const struct htmlTag *t;
_______________________________________________
Edbrowse-dev mailing list
[email protected]
http://lists.the-brannons.com/mailman/listinfo/edbrowse-dev

Reply via email to