On Fri, May 10, 2002 at 06:19:11PM -0400, Geoff Hutchison wrote:
> On Fri, 10 May 2002, Lachlan Andrew wrote:
> > KDE help used to use  ht://Dig  to provide a search capability.
> > They changed the format of their files from HTML to  docbook  (XML).
> > For some reason,  ht://Dig  refuses to call the parser that one of
> > the KDE developers wrote.  The response was that it was not a bug,
> > but a calculated feature, because  ht://Dig  didn't know that no server
> > parsing was necessary.
> 
> For 3.2, the best approach is to either:
> a) Index using file:// URLs, which should use the appropriate mime.types
> file: <http://www.htdig.org/dev/htdig-3.2/attrs.html#mime_types>
> b) Code the RetrieveLocal method to produce temporary file:// URLs that
> are retrieved using the htnet/HtFile methods. (which again should use the
> appropriate mime.types file)

I don't understand why there needs to be a temporary  file://  URL.
I've attached a patch (against the latest beta, b3) in which
RetrieveLocal  explicitly calls the method from  HtFile  which checks
the MIME type.

Please let me know if this patch is unsuitable, and if so how I can
fix it.  If it is OK, I'll go ahead and implement  bad_local_ext  etc.

Regards,
Lachlan


*** htdig/Document.cc   Wed Jun 12 22:48:25 2002
--- htdig/Document.cc.lha       Wed Jun 12 22:46:02 2002
***************
*** 494,507 ****
      char *ext = strrchr((char*)*filename, '.');
      if (ext == NULL)
        return Transport::Document_not_local;
!     if ((mystrcasecmp(ext, ".html") == 0) || (mystrcasecmp(ext, ".htm") == 0))
!         contentType = "text/html";
!     else if ((mystrcasecmp(ext, ".txt") == 0) || (mystrcasecmp(ext, ".asc") == 0))
!         contentType = "text/plain";
!     else if ((mystrcasecmp(ext, ".pdf") == 0))
!         contentType = "application/pdf";
!     else if ((mystrcasecmp(ext, ".ps") == 0) || (mystrcasecmp(ext, ".eps") == 0))
!         contentType = "application/postscript";
      else 
        return Transport::Document_not_local;
  
--- 494,502 ----
      char *ext = strrchr((char*)*filename, '.');
      if (ext == NULL)
        return Transport::Document_not_local;
!     const String *type = HtFile::Ext2Mime (ext + 1);
!     if (type != NULL)
!       contentType = *type;
      else 
        return Transport::Document_not_local;
  
*** htnet/HtFile.cc     Wed Jun 12 22:48:50 2002
--- htnet/HtFile.cc.lha Wed Jun 12 22:46:15 2002
***************
*** 77,92 ****
  }
  
  
! ///////
!    //    Manages the requesting process
! ///////
! 
! HtFile::DocStatus HtFile::Request()
  {
     static Dictionary *mime_map = 0;
  
     if (!mime_map)
       {
         ifstream in(config["mime_types"].get());
         if (in)
           {
--- 77,92 ----
  }
  
  
! // Return mime type indicated by extension  ext  (which is assumed not
! // to contain the '.'), or  NULL  if  ext  is not a know mime type.
! const String *HtFile::Ext2Mime (const char *ext)
  {
     static Dictionary *mime_map = 0;
  
     if (!mime_map)
       {
+       if (debug > 2)
+           cout << "MIME types: " << config ["mime_types"].get() << endl;
         ifstream in(config["mime_types"].get());
         if (in)
           {
***************
*** 104,114 ****
--- 104,138 ----
                 String mime_type = split_line[0];
                 // Fill map with values.
                 for (int i = 1; i < split_line.Count(); i++)
+              {
+                if (debug > 3)
+                  cout << "MIME: " << split_line[i]
+                       << "\t-> " << mime_type << endl;
                   mime_map->Add(split_line[i], new String(mime_type));
+              }
               }
           }
       }
+    if (debug > 4)
+      cout << "Checking extension:  " << ext << endl;
+    if (mime_map)      // is this 'if' needed?
+      {
+        const String *mime_type = (String *)mime_map->Find(ext);
+        if (mime_type)
+          return mime_type;
+        else
+          return NULL;
+      }
+    else
+      return NULL;
+ }
+ 
+ ///////
+    //    Manages the requesting process
+ ///////
  
+ HtFile::DocStatus HtFile::Request()
+ {
     // Reset the response
     _response.Reset();
     
***************
*** 169,184 ****
     if (ext == NULL)
       return Transport::Document_not_local;
  
!    if (mime_map)
!      {
!        String *mime_type = (String *)mime_map->Find(ext + 1);
!        if (mime_type)
!          _response._content_type = *mime_type;
!        else
!          return Transport::Document_not_local;
!      }
     else
       {
         if ((mystrcasecmp(ext, ".html") == 0) || (mystrcasecmp(ext, ".htm") == 0))
           _response._content_type = "text/html";
         else if (mystrcasecmp(ext, ".txt") == 0)
--- 193,205 ----
     if (ext == NULL)
       return Transport::Document_not_local;
  
!    const String *mime_type = Ext2Mime (ext + 1);
!    if (mime_type)
! //     if (bad_local_ext (ext)) return Transport::Document_not_local; else
!      _response._content_type = *mime_type;
     else
       {
+        if (debug > 2) cout << "Extension  " << ext+1 << "  not found\n";
         if ((mystrcasecmp(ext, ".html") == 0) || (mystrcasecmp(ext, ".htm") == 0))
           _response._content_type = "text/html";
         else if (mystrcasecmp(ext, ".txt") == 0)
*** htnet/HtFile.h      Wed Jun 12 22:48:57 2002
--- htnet/HtFile.h.lha  Wed Jun 12 22:46:19 2002
***************
*** 63,68 ****
--- 63,72 ----
  
     // manages a Transport request (method inherited from Transport class)
     virtual DocStatus Request ();
+ 
+    // Determine Mime type of file
+    // (Does it belong here??)
+    static const String *Ext2Mime (const char *);
     
   ///////
      //    Interface for resource retrieving
*** htsearch/Display.cc Wed Jun 12 22:49:28 2002
--- htsearch/Display.cc.lha     Wed Jun 12 22:46:32 2002
***************
*** 35,40 ****
--- 35,41 ----
  #include <ctype.h>
  #include <syslog.h>
  #include <locale.h>
+ #include <float.h>            // for DBL_MAX  on Mandrake 8.2, gcc 2.96
  #include <math.h>
  
  #if !defined(DBL_MAX) && defined(MAXFLOAT)
*** installdir/mime.types       Wed Jun 12 22:49:45 2002
--- installdir/mime.types.lha   Wed Jun 12 22:46:44 2002
***************
*** 264,269 ****
--- 264,270 ----
  text/vnd.latex-z
  text/x-setext                 etx
  text/xml                      xml
+ text/docbook                  docbook
  video/mpeg                    mpeg mpg mpe
  video/quicktime                       qt mov
  video/vnd.motorola.video

-- 
Lachlan Andrew  [EMAIL PROTECTED]  Phone: +613 8344-3816 Fax: +613 8344-6678
Department of Electrical and Electronic Engineering        CRICOS Provider Code
University of Melbourne, Victoria, 3010    AUSTRALIA            00116K
*** htdig/Document.cc   Wed Jun 12 22:48:25 2002
--- htdig/Document.cc.lha       Wed Jun 12 22:46:02 2002
***************
*** 494,507 ****
      char *ext = strrchr((char*)*filename, '.');
      if (ext == NULL)
        return Transport::Document_not_local;
!     if ((mystrcasecmp(ext, ".html") == 0) || (mystrcasecmp(ext, ".htm") == 0))
!         contentType = "text/html";
!     else if ((mystrcasecmp(ext, ".txt") == 0) || (mystrcasecmp(ext, ".asc") == 0))
!         contentType = "text/plain";
!     else if ((mystrcasecmp(ext, ".pdf") == 0))
!         contentType = "application/pdf";
!     else if ((mystrcasecmp(ext, ".ps") == 0) || (mystrcasecmp(ext, ".eps") == 0))
!         contentType = "application/postscript";
      else 
        return Transport::Document_not_local;
  
--- 494,502 ----
      char *ext = strrchr((char*)*filename, '.');
      if (ext == NULL)
        return Transport::Document_not_local;
!     const String *type = HtFile::Ext2Mime (ext + 1);
!     if (type != NULL)
!       contentType = *type;
      else 
        return Transport::Document_not_local;
  
*** htnet/HtFile.cc     Wed Jun 12 22:48:50 2002
--- htnet/HtFile.cc.lha Wed Jun 12 22:46:15 2002
***************
*** 77,92 ****
  }
  
  
! ///////
!    //    Manages the requesting process
! ///////
! 
! HtFile::DocStatus HtFile::Request()
  {
     static Dictionary *mime_map = 0;
  
     if (!mime_map)
       {
         ifstream in(config["mime_types"].get());
         if (in)
           {
--- 77,92 ----
  }
  
  
! // Return mime type indicated by extension  ext  (which is assumed not
! // to contain the '.'), or  NULL  if  ext  is not a know mime type.
! const String *HtFile::Ext2Mime (const char *ext)
  {
     static Dictionary *mime_map = 0;
  
     if (!mime_map)
       {
+       if (debug > 2)
+           cout << "MIME types: " << config ["mime_types"].get() << endl;
         ifstream in(config["mime_types"].get());
         if (in)
           {
***************
*** 104,114 ****
--- 104,138 ----
                 String mime_type = split_line[0];
                 // Fill map with values.
                 for (int i = 1; i < split_line.Count(); i++)
+              {
+                if (debug > 3)
+                  cout << "MIME: " << split_line[i]
+                       << "\t-> " << mime_type << endl;
                   mime_map->Add(split_line[i], new String(mime_type));
+              }
               }
           }
       }
+    if (debug > 4)
+      cout << "Checking extension:  " << ext << endl;
+    if (mime_map)      // is this 'if' needed?
+      {
+        const String *mime_type = (String *)mime_map->Find(ext);
+        if (mime_type)
+          return mime_type;
+        else
+          return NULL;
+      }
+    else
+      return NULL;
+ }
+ 
+ ///////
+    //    Manages the requesting process
+ ///////
  
+ HtFile::DocStatus HtFile::Request()
+ {
     // Reset the response
     _response.Reset();
     
***************
*** 169,184 ****
     if (ext == NULL)
       return Transport::Document_not_local;
  
!    if (mime_map)
!      {
!        String *mime_type = (String *)mime_map->Find(ext + 1);
!        if (mime_type)
!          _response._content_type = *mime_type;
!        else
!          return Transport::Document_not_local;
!      }
     else
       {
         if ((mystrcasecmp(ext, ".html") == 0) || (mystrcasecmp(ext, ".htm") == 0))
           _response._content_type = "text/html";
         else if (mystrcasecmp(ext, ".txt") == 0)
--- 193,205 ----
     if (ext == NULL)
       return Transport::Document_not_local;
  
!    const String *mime_type = Ext2Mime (ext + 1);
!    if (mime_type)
! //     if (bad_local_ext (ext)) return Transport::Document_not_local; else
!      _response._content_type = *mime_type;
     else
       {
+        if (debug > 2) cout << "Extension  " << ext+1 << "  not found\n";
         if ((mystrcasecmp(ext, ".html") == 0) || (mystrcasecmp(ext, ".htm") == 0))
           _response._content_type = "text/html";
         else if (mystrcasecmp(ext, ".txt") == 0)
*** htnet/HtFile.h      Wed Jun 12 22:48:57 2002
--- htnet/HtFile.h.lha  Wed Jun 12 22:46:19 2002
***************
*** 63,68 ****
--- 63,72 ----
  
     // manages a Transport request (method inherited from Transport class)
     virtual DocStatus Request ();
+ 
+    // Determine Mime type of file
+    // (Does it belong here??)
+    static const String *Ext2Mime (const char *);
     
   ///////
      //    Interface for resource retrieving
*** htsearch/Display.cc Wed Jun 12 22:49:28 2002
--- htsearch/Display.cc.lha     Wed Jun 12 22:46:32 2002
***************
*** 35,40 ****
--- 35,41 ----
  #include <ctype.h>
  #include <syslog.h>
  #include <locale.h>
+ #include <float.h>            // for DBL_MAX  on Mandrake 8.2, gcc 2.96
  #include <math.h>
  
  #if !defined(DBL_MAX) && defined(MAXFLOAT)
*** installdir/mime.types       Wed Jun 12 22:49:45 2002
--- installdir/mime.types.lha   Wed Jun 12 22:46:44 2002
***************
*** 264,269 ****
--- 264,270 ----
  text/vnd.latex-z
  text/x-setext                 etx
  text/xml                      xml
+ text/docbook                  docbook
  video/mpeg                    mpeg mpg mpe
  video/quicktime                       qt mov
  video/vnd.motorola.video

Reply via email to