Hi all,

In the spirit of continuing patches, here's a patch against htdig-3.0.8b2 I
wrote on request from Brian Kariger. It defines a new config file option
"use_meta_description" which is false by default. Setting this to true will
check for <META NAME="description"> tags and set the excerpt to the content
of these if they exist (and aren't empty).

Comments, questions and bugs should be directed to me,
-Geoff Hutchison
Williams Students Online
http://wso.williams.edu/

*** htcommon/defaults.cc.orig   Tue Jan  6 13:18:12 1998
--- htcommon/defaults.cc        Sat Mar 21 10:33:47 1998
***************
*** 112,117 ****
--- 112,118 ----
      {"title_factor",                  "100"},
      {"url_list",                      "${database_base}.urls"},
      {"use_star_image",                        "true"},
+     {"use_meta_description",            "false"},
      {"valid_punctuation",             ".-_/!#$%^&*'"},
      {"version",                               HTDIG_VERSION},
      {"word_db",                               "${database_base}.words.gdbm"},
*** htdig/HTML.h.orig   Sat Mar 21 13:31:49 1998
--- htdig/HTML.h        Sat Mar 21 10:44:22 1998
***************
*** 45,50 ****
--- 45,51 ----
      int                       in_ref;
      int                       in_heading;
      int                       doindex;
+     int                       dohead;
      int                       minimumWordLength;
      URL                       *base;

*** htdig/HTML.cc.orig  Sat Mar 21 21:12:00 1998
--- htdig/HTML.cc       Sat Mar 21 20:41:50 1998
***************
*** 66,71 ****
--- 66,72 ----
      in_heading = 0;
      base = 0;
      doindex = 1;
+     dohead = 1;
      minimumWordLength = config.Value("minimum_word_length", 3);
  }

***************
*** 103,108 ****
--- 104,110 ----
      start = position;
      title = 0;
      head = 0;
+     dohead = 1;
      doindex = 1;
      in_heading = 0;
      in_title = 0;
***************
*** 231,237 ****
                //
                // Append the word to the head (excerpt)
                //
!               head << word;
            }

            if (word.length() >= minimumWordLength && doindex)
--- 233,240 ----
                //
                // Append the word to the head (excerpt)
                //
!               if (dohead)
!                 head << word;
            }

            if (word.length() >= minimumWordLength && doindex)
***************
*** 260,266 ****
                    //
                    if (!in_space)
                    {
!                       if (head.length() < max_head_length)
                        {
                            head << ' ';
                        }
--- 263,269 ----
                    //
                    if (!in_space)
                    {
!                       if (head.length() < max_head_length && dohead)
                        {
                            head << ' ';
                        }
***************
*** 280,286 ****
                    //
                    // Not whitespace
                    //
!                   if (head.length() < max_head_length)
                    {
                        head << *position;
                    }
--- 283,289 ----
                    //
                    // Not whitespace
                    //
!                   if (head.length() < max_head_length && dohead)
                    {
                        head << *position;
                    }
***************
*** 503,509 ****
        }

        case 19:        // "li"
!           if (doindex && head.length() < max_head_length)
                head << "* ";
            break;

--- 506,512 ----
        }

        case 19:        // "li"
!           if (doindex && head.length() < max_head_length && dohead)
                head << "* ";
            break;

***************
*** 588,593 ****
--- 591,608 ----
                {
                    doindex = 0;
                }
+               else if (mystrcasecmp(cache, "description") == 0
+                        && config.Boolean("use_meta_description")
+                        && strlen(conf["content"]) != 0)
+                 {
+                   head = conf["content"];
+                   if (head.length() > max_head_length)
+                     head = head.sub(0, max_head_length);
+                   if (debug > 0)
+                     cout << "META Description: " << conf["content"] << endl;
+                   retriever.got_head(head);
+                   dohead = 0;
+                 }
            }
            else if (conf["name"] &&
                     mystrcasecmp(conf["name"], "htdig-noindex") == 0)


----------------------------------------------------------------------
To unsubscribe from the htdig mailing list, send a message to
[EMAIL PROTECTED] containing the single word "unsubscribe" in
the body of the message.

Reply via email to