Hi all, In the spirit of continuing patches, here's a patch against htdig-3.0.8b2 I wrote on request from Brian Kariger. It defines a new config file option "use_meta_description" which is false by default. Setting this to true will check for <META NAME="description"> tags and set the excerpt to the content of these if they exist (and aren't empty). Comments, questions and bugs should be directed to me, -Geoff Hutchison Williams Students Online http://wso.williams.edu/ *** htcommon/defaults.cc.orig Tue Jan 6 13:18:12 1998 --- htcommon/defaults.cc Sat Mar 21 10:33:47 1998 *************** *** 112,117 **** --- 112,118 ---- {"title_factor", "100"}, {"url_list", "${database_base}.urls"}, {"use_star_image", "true"}, + {"use_meta_description", "false"}, {"valid_punctuation", ".-_/!#$%^&*'"}, {"version", HTDIG_VERSION}, {"word_db", "${database_base}.words.gdbm"}, *** htdig/HTML.h.orig Sat Mar 21 13:31:49 1998 --- htdig/HTML.h Sat Mar 21 10:44:22 1998 *************** *** 45,50 **** --- 45,51 ---- int in_ref; int in_heading; int doindex; + int dohead; int minimumWordLength; URL *base; *** htdig/HTML.cc.orig Sat Mar 21 21:12:00 1998 --- htdig/HTML.cc Sat Mar 21 20:41:50 1998 *************** *** 66,71 **** --- 66,72 ---- in_heading = 0; base = 0; doindex = 1; + dohead = 1; minimumWordLength = config.Value("minimum_word_length", 3); } *************** *** 103,108 **** --- 104,110 ---- start = position; title = 0; head = 0; + dohead = 1; doindex = 1; in_heading = 0; in_title = 0; *************** *** 231,237 **** // // Append the word to the head (excerpt) // ! head << word; } if (word.length() >= minimumWordLength && doindex) --- 233,240 ---- // // Append the word to the head (excerpt) // ! if (dohead) ! head << word; } if (word.length() >= minimumWordLength && doindex) *************** *** 260,266 **** // if (!in_space) { ! if (head.length() < max_head_length) { head << ' '; } --- 263,269 ---- // if (!in_space) { ! if (head.length() < max_head_length && dohead) { head << ' '; } *************** *** 280,286 **** // // Not whitespace // ! if (head.length() < max_head_length) { head << *position; } --- 283,289 ---- // // Not whitespace // ! if (head.length() < max_head_length && dohead) { head << *position; } *************** *** 503,509 **** } case 19: // "li" ! if (doindex && head.length() < max_head_length) head << "* "; break; --- 506,512 ---- } case 19: // "li" ! if (doindex && head.length() < max_head_length && dohead) head << "* "; break; *************** *** 588,593 **** --- 591,608 ---- { doindex = 0; } + else if (mystrcasecmp(cache, "description") == 0 + && config.Boolean("use_meta_description") + && strlen(conf["content"]) != 0) + { + head = conf["content"]; + if (head.length() > max_head_length) + head = head.sub(0, max_head_length); + if (debug > 0) + cout << "META Description: " << conf["content"] << endl; + retriever.got_head(head); + dohead = 0; + } } else if (conf["name"] && mystrcasecmp(conf["name"], "htdig-noindex") == 0) ---------------------------------------------------------------------- To unsubscribe from the htdig mailing list, send a message to [EMAIL PROTECTED] containing the single word "unsubscribe" in the body of the message.
