On Fri, May 10, 2002 at 06:19:11PM -0400, Geoff Hutchison wrote: > On Fri, 10 May 2002, Lachlan Andrew wrote: > > KDE help used to use ht://Dig to provide a search capability. > > They changed the format of their files from HTML to docbook (XML). > > For some reason, ht://Dig refuses to call the parser that one of > > the KDE developers wrote. The response was that it was not a bug, > > but a calculated feature, because ht://Dig didn't know that no server > > parsing was necessary. > > For 3.2, the best approach is to either: > a) Index using file:// URLs, which should use the appropriate mime.types > file: <http://www.htdig.org/dev/htdig-3.2/attrs.html#mime_types> > b) Code the RetrieveLocal method to produce temporary file:// URLs that > are retrieved using the htnet/HtFile methods. (which again should use the > appropriate mime.types file)
I don't understand why there needs to be a temporary file:// URL. I've attached a patch (against the latest beta, b3) in which RetrieveLocal explicitly calls the method from HtFile which checks the MIME type. Please let me know if this patch is unsuitable, and if so how I can fix it. If it is OK, I'll go ahead and implement bad_local_ext etc. Regards, Lachlan *** htdig/Document.cc Wed Jun 12 22:48:25 2002 --- htdig/Document.cc.lha Wed Jun 12 22:46:02 2002 *************** *** 494,507 **** char *ext = strrchr((char*)*filename, '.'); if (ext == NULL) return Transport::Document_not_local; ! if ((mystrcasecmp(ext, ".html") == 0) || (mystrcasecmp(ext, ".htm") == 0)) ! contentType = "text/html"; ! else if ((mystrcasecmp(ext, ".txt") == 0) || (mystrcasecmp(ext, ".asc") == 0)) ! contentType = "text/plain"; ! else if ((mystrcasecmp(ext, ".pdf") == 0)) ! contentType = "application/pdf"; ! else if ((mystrcasecmp(ext, ".ps") == 0) || (mystrcasecmp(ext, ".eps") == 0)) ! contentType = "application/postscript"; else return Transport::Document_not_local; --- 494,502 ---- char *ext = strrchr((char*)*filename, '.'); if (ext == NULL) return Transport::Document_not_local; ! const String *type = HtFile::Ext2Mime (ext + 1); ! if (type != NULL) ! contentType = *type; else return Transport::Document_not_local; *** htnet/HtFile.cc Wed Jun 12 22:48:50 2002 --- htnet/HtFile.cc.lha Wed Jun 12 22:46:15 2002 *************** *** 77,92 **** } ! /////// ! // Manages the requesting process ! /////// ! ! HtFile::DocStatus HtFile::Request() { static Dictionary *mime_map = 0; if (!mime_map) { ifstream in(config["mime_types"].get()); if (in) { --- 77,92 ---- } ! // Return mime type indicated by extension ext (which is assumed not ! // to contain the '.'), or NULL if ext is not a know mime type. ! const String *HtFile::Ext2Mime (const char *ext) { static Dictionary *mime_map = 0; if (!mime_map) { + if (debug > 2) + cout << "MIME types: " << config ["mime_types"].get() << endl; ifstream in(config["mime_types"].get()); if (in) { *************** *** 104,114 **** --- 104,138 ---- String mime_type = split_line[0]; // Fill map with values. for (int i = 1; i < split_line.Count(); i++) + { + if (debug > 3) + cout << "MIME: " << split_line[i] + << "\t-> " << mime_type << endl; mime_map->Add(split_line[i], new String(mime_type)); + } } } } + if (debug > 4) + cout << "Checking extension: " << ext << endl; + if (mime_map) // is this 'if' needed? + { + const String *mime_type = (String *)mime_map->Find(ext); + if (mime_type) + return mime_type; + else + return NULL; + } + else + return NULL; + } + + /////// + // Manages the requesting process + /////// + HtFile::DocStatus HtFile::Request() + { // Reset the response _response.Reset(); *************** *** 169,184 **** if (ext == NULL) return Transport::Document_not_local; ! if (mime_map) ! { ! String *mime_type = (String *)mime_map->Find(ext + 1); ! if (mime_type) ! _response._content_type = *mime_type; ! else ! return Transport::Document_not_local; ! } else { if ((mystrcasecmp(ext, ".html") == 0) || (mystrcasecmp(ext, ".htm") == 0)) _response._content_type = "text/html"; else if (mystrcasecmp(ext, ".txt") == 0) --- 193,205 ---- if (ext == NULL) return Transport::Document_not_local; ! const String *mime_type = Ext2Mime (ext + 1); ! if (mime_type) ! // if (bad_local_ext (ext)) return Transport::Document_not_local; else ! _response._content_type = *mime_type; else { + if (debug > 2) cout << "Extension " << ext+1 << " not found\n"; if ((mystrcasecmp(ext, ".html") == 0) || (mystrcasecmp(ext, ".htm") == 0)) _response._content_type = "text/html"; else if (mystrcasecmp(ext, ".txt") == 0) *** htnet/HtFile.h Wed Jun 12 22:48:57 2002 --- htnet/HtFile.h.lha Wed Jun 12 22:46:19 2002 *************** *** 63,68 **** --- 63,72 ---- // manages a Transport request (method inherited from Transport class) virtual DocStatus Request (); + + // Determine Mime type of file + // (Does it belong here??) + static const String *Ext2Mime (const char *); /////// // Interface for resource retrieving *** htsearch/Display.cc Wed Jun 12 22:49:28 2002 --- htsearch/Display.cc.lha Wed Jun 12 22:46:32 2002 *************** *** 35,40 **** --- 35,41 ---- #include <ctype.h> #include <syslog.h> #include <locale.h> + #include <float.h> // for DBL_MAX on Mandrake 8.2, gcc 2.96 #include <math.h> #if !defined(DBL_MAX) && defined(MAXFLOAT) *** installdir/mime.types Wed Jun 12 22:49:45 2002 --- installdir/mime.types.lha Wed Jun 12 22:46:44 2002 *************** *** 264,269 **** --- 264,270 ---- text/vnd.latex-z text/x-setext etx text/xml xml + text/docbook docbook video/mpeg mpeg mpg mpe video/quicktime qt mov video/vnd.motorola.video -- Lachlan Andrew [EMAIL PROTECTED] Phone: +613 8344-3816 Fax: +613 8344-6678 Department of Electrical and Electronic Engineering CRICOS Provider Code University of Melbourne, Victoria, 3010 AUSTRALIA 00116K
*** htdig/Document.cc Wed Jun 12 22:48:25 2002 --- htdig/Document.cc.lha Wed Jun 12 22:46:02 2002 *************** *** 494,507 **** char *ext = strrchr((char*)*filename, '.'); if (ext == NULL) return Transport::Document_not_local; ! if ((mystrcasecmp(ext, ".html") == 0) || (mystrcasecmp(ext, ".htm") == 0)) ! contentType = "text/html"; ! else if ((mystrcasecmp(ext, ".txt") == 0) || (mystrcasecmp(ext, ".asc") == 0)) ! contentType = "text/plain"; ! else if ((mystrcasecmp(ext, ".pdf") == 0)) ! contentType = "application/pdf"; ! else if ((mystrcasecmp(ext, ".ps") == 0) || (mystrcasecmp(ext, ".eps") == 0)) ! contentType = "application/postscript"; else return Transport::Document_not_local; --- 494,502 ---- char *ext = strrchr((char*)*filename, '.'); if (ext == NULL) return Transport::Document_not_local; ! const String *type = HtFile::Ext2Mime (ext + 1); ! if (type != NULL) ! contentType = *type; else return Transport::Document_not_local; *** htnet/HtFile.cc Wed Jun 12 22:48:50 2002 --- htnet/HtFile.cc.lha Wed Jun 12 22:46:15 2002 *************** *** 77,92 **** } ! /////// ! // Manages the requesting process ! /////// ! ! HtFile::DocStatus HtFile::Request() { static Dictionary *mime_map = 0; if (!mime_map) { ifstream in(config["mime_types"].get()); if (in) { --- 77,92 ---- } ! // Return mime type indicated by extension ext (which is assumed not ! // to contain the '.'), or NULL if ext is not a know mime type. ! const String *HtFile::Ext2Mime (const char *ext) { static Dictionary *mime_map = 0; if (!mime_map) { + if (debug > 2) + cout << "MIME types: " << config ["mime_types"].get() << endl; ifstream in(config["mime_types"].get()); if (in) { *************** *** 104,114 **** --- 104,138 ---- String mime_type = split_line[0]; // Fill map with values. for (int i = 1; i < split_line.Count(); i++) + { + if (debug > 3) + cout << "MIME: " << split_line[i] + << "\t-> " << mime_type << endl; mime_map->Add(split_line[i], new String(mime_type)); + } } } } + if (debug > 4) + cout << "Checking extension: " << ext << endl; + if (mime_map) // is this 'if' needed? + { + const String *mime_type = (String *)mime_map->Find(ext); + if (mime_type) + return mime_type; + else + return NULL; + } + else + return NULL; + } + + /////// + // Manages the requesting process + /////// + HtFile::DocStatus HtFile::Request() + { // Reset the response _response.Reset(); *************** *** 169,184 **** if (ext == NULL) return Transport::Document_not_local; ! if (mime_map) ! { ! String *mime_type = (String *)mime_map->Find(ext + 1); ! if (mime_type) ! _response._content_type = *mime_type; ! else ! return Transport::Document_not_local; ! } else { if ((mystrcasecmp(ext, ".html") == 0) || (mystrcasecmp(ext, ".htm") == 0)) _response._content_type = "text/html"; else if (mystrcasecmp(ext, ".txt") == 0) --- 193,205 ---- if (ext == NULL) return Transport::Document_not_local; ! const String *mime_type = Ext2Mime (ext + 1); ! if (mime_type) ! // if (bad_local_ext (ext)) return Transport::Document_not_local; else ! _response._content_type = *mime_type; else { + if (debug > 2) cout << "Extension " << ext+1 << " not found\n"; if ((mystrcasecmp(ext, ".html") == 0) || (mystrcasecmp(ext, ".htm") == 0)) _response._content_type = "text/html"; else if (mystrcasecmp(ext, ".txt") == 0) *** htnet/HtFile.h Wed Jun 12 22:48:57 2002 --- htnet/HtFile.h.lha Wed Jun 12 22:46:19 2002 *************** *** 63,68 **** --- 63,72 ---- // manages a Transport request (method inherited from Transport class) virtual DocStatus Request (); + + // Determine Mime type of file + // (Does it belong here??) + static const String *Ext2Mime (const char *); /////// // Interface for resource retrieving *** htsearch/Display.cc Wed Jun 12 22:49:28 2002 --- htsearch/Display.cc.lha Wed Jun 12 22:46:32 2002 *************** *** 35,40 **** --- 35,41 ---- #include <ctype.h> #include <syslog.h> #include <locale.h> + #include <float.h> // for DBL_MAX on Mandrake 8.2, gcc 2.96 #include <math.h> #if !defined(DBL_MAX) && defined(MAXFLOAT) *** installdir/mime.types Wed Jun 12 22:49:45 2002 --- installdir/mime.types.lha Wed Jun 12 22:46:44 2002 *************** *** 264,269 **** --- 264,270 ---- text/vnd.latex-z text/x-setext etx text/xml xml + text/docbook docbook video/mpeg mpeg mpg mpe video/quicktime qt mov video/vnd.motorola.video