Hi,

We patched htdig 3.1.4 to support openssl. Now you can index http and https
server as well.

Have fun.

SAP IT BSS Internet Services
diff -u --recursive htdig-3.1.4.org/CONFIG htdig-3.1.4/CONFIG
--- htdig-3.1.4.org/CONFIG      Fri Dec 10 01:29:30 1999
+++ htdig-3.1.4/CONFIG  Wed Jan 19 14:39:56 2000
@@ -8,7 +8,7 @@
 # These variables are set by configure
 #
 # This specifies the root of the directory tree to be used by ht://Dig
-prefix=                 /opt/www/htdig
+prefix=                 /opt/htdig
 
 # This specifies the root of the directory tree to be used for programs
 # installed by ht://Dig
diff -u --recursive htdig-3.1.4.org/Makefile.config.in htdig-3.1.4/Makefile.config.in
--- htdig-3.1.4.org/Makefile.config.in  Fri Dec 10 01:28:21 1999
+++ htdig-3.1.4/Makefile.config.in      Wed Jan 19 14:39:35 2000
@@ -24,13 +24,13 @@
 SENDMAIL=      @SENDMAIL@
 
 DEFINES=       -DDEFAULT_CONFIG_FILE=\"$(DEFAULT_CONFIG_FILE)\"
-LIBDIRS=       -L../htlib -L../htcommon -L../db/dist -L/usr/lib
+LIBDIRS=       -L../htlib -L../htcommon -L../db/dist -L/usr/lib -L/opt/ssl/lib
 INCS=          -I$(top_srcdir)/htlib -I$(top_srcdir)/htcommon \
-               -I../db/dist -I../include
+               -I../db/dist -I../include -I/opt/ssl/include
 HTLIBS=                ../htcommon/libcommon.a \
                ../htlib/libht.a \
                ../db/dist/libdb.a
-LIBS=          $(HTLIBS) @LIBS@
+LIBS=          $(HTLIBS) @LIBS@ -lssl -lcrypto
 
 DIST=          @PACKAGE@-@VERSION@
 DISTDIR=       $(top_srcdir)/../$(DIST)
diff -u --recursive htdig-3.1.4.org/htcommon/DocumentDB.cc 
htdig-3.1.4/htcommon/DocumentDB.cc
--- htdig-3.1.4.org/htcommon/DocumentDB.cc      Fri Dec 10 01:28:44 1999
+++ htdig-3.1.4/htcommon/DocumentDB.cc  Thu Jan 20 10:16:44 2000
@@ -217,7 +217,7 @@
     while ((key = dbf->Get_Next()))
     {
        dbf->Get(key, data);
-       if (strncmp(HtURLCodec::instance()->decode(key), "http:", 5) == 0)
+       if (strncmp(HtURLCodec::instance()->decode(key), "http:", 5) == 0 || 
+strncmp(HtURLCodec::instance()->decode(key), "https:", 6) == 0)
        {
            ref = new DocumentRef;
            ref->Deserialize(data);
@@ -284,7 +284,7 @@
     while ((coded_key = dbf->Get_Next()))
     {
        String key = HtURLCodec::instance()->decode(coded_key);
-       if (mystrncasecmp(key, "http:", 5) == 0)
+       if (mystrncasecmp(key, "http:", 5) == 0 || mystrncasecmp(key, "https:", 6) == 
+0)
        {
            DocumentRef *ref = (*this)[key];
            if (ref)
diff -u --recursive htdig-3.1.4.org/htcommon/defaults.cc 
htdig-3.1.4/htcommon/defaults.cc
--- htdig-3.1.4.org/htcommon/defaults.cc        Fri Dec 10 01:28:44 1999
+++ htdig-3.1.4/htcommon/defaults.cc    Thu Jan 20 10:16:37 2000
@@ -37,7 +37,7 @@
     {"bad_querystr",                    ""},
     {"bad_word_list",                  "${common_dir}/bad_words"},
     {"case_sensitive",                  "true"},
-    {"common_url_parts",                "http:// http://www. ftp:// ftp://ftp. /pub/ 
.html .htm .gif .jpg .jpeg /index.html /index.htm .com/ .com mailto:"},
+    {"common_url_parts",                "https:// https://www. http:// http://www. 
+ftp:// ftp://ftp. /pub/ .html .htm .gif .jpg .jpeg /index.html /index.htm .com/ .com 
+mailto:"},
     {"create_image_list",              "false"},
     {"create_url_list",                        "false"},
     {"compression_level",               "0"},
diff -u --recursive htdig-3.1.4.org/htdig/Document.cc htdig-3.1.4/htdig/Document.cc
--- htdig-3.1.4.org/htdig/Document.cc   Fri Dec 10 01:28:44 1999
+++ htdig-3.1.4/htdig/Document.cc       Fri Jan 21 10:33:46 2000
@@ -220,6 +220,7 @@
          tm.tm_year += 1900;
        tm.tm_yday = 0; // clear these to prevent problems in strftime()
        tm.tm_wday = 0;
+       tm.tm_isdst = -1;
        
        if (debug > 2)
          {
@@ -328,7 +329,7 @@
            return Document_no_host;
        }
     }
-       
+    c.assign_ssl(strcmp(url->service(), "https") == 0);
     if (c.connect(1) == NOTOK)
     {
        if (debug)
diff -u --recursive htdig-3.1.4.org/htdig/Images.cc htdig-3.1.4/htdig/Images.cc
--- htdig-3.1.4.org/htdig/Images.cc     Fri Dec 10 01:28:44 1999
+++ htdig-3.1.4/htdig/Images.cc Thu Jan 20 10:15:16 2000
@@ -61,7 +61,7 @@
 {
        String  u = url;
        URL             Url(url);
-       if (strcmp(Url.service(), "http") != 0)
+       if (strcmp(Url.service(), "http") != 0 && strcmp(Url.service(), "https") != 0)
                return 0;
 
        u.lowercase();
@@ -81,6 +81,7 @@
                        return 0;
                if (c.assign_server(Url.host()) == NOTOK)
                        return 0;
+               c.assign_ssl(strcmp(Url.service(), "https") == 0);
 
                if (c.connect(1) == NOTOK)
                {
diff -u --recursive htdig-3.1.4.org/htdig/Retriever.cc htdig-3.1.4/htdig/Retriever.cc
--- htdig-3.1.4.org/htdig/Retriever.cc  Fri Dec 10 01:28:44 1999
+++ htdig-3.1.4/htdig/Retriever.cc      Thu Jan 20 16:09:36 2000
@@ -117,8 +117,7 @@
 //   from == 2 add url from db.log 
 //   from == 3 urls in db.docs and there was a db.log 
 //
-void
-Retriever::Initial(char *list, int from)
+void Retriever::Initial(char *list, int from)
 {
     //
     // Split the list of urls up into individual urls.
@@ -137,10 +136,10 @@
            cout << "\t" << from << ":" << (int) log << ":" << url;
        if (!server)
        {
-           String robotsURL = "http://";
-           robotsURL << u.host() << "/robots.txt";
+           String robotsURL = u.service();
+           robotsURL << "://" << u.host() << "/robots.txt";
            String *localRobotsFile = GetLocal(robotsURL.get());
-           server = new Server(u.host(), u.port(), localRobotsFile);
+           server = new Server(u.host(), u.port(), strcmp(u.service(), "https") == 0, 
+localRobotsFile);
            servers.Add(u.signature(), server);
            delete localRobotsFile;
        }
@@ -668,10 +667,10 @@
     // Currently, we only deal with HTTP URLs.  Gopher and ftp will
     // come later...  ***FIX***
     //
-    if (strstr(u, "/../") || strncmp(u, "http://", 7) != 0)
+    if (strstr(u, "/../") || (strncmp(u, "http://", 7) != 0 && strncmp(u, "https://", 
+8) != 0))
       {
        if (debug > 2)
-         cout << endl <<"   Rejected: Not an http or relative link!";
+         cout << endl <<"   Rejected: Not an http, https or relative link!";
        return FALSE;
       }
 
@@ -1172,10 +1171,10 @@
                    //
                    // Hadn't seen this server, yet.  Register it
                    //
-                   String robotsURL = "http://";
-                   robotsURL << url.host() << "/robots.txt";
+                   String robotsURL = url.service();
+                   robotsURL << "://" << url.host() << "/robots.txt";
                    String *localRobotsFile = GetLocal(robotsURL.get());
-                   server = new Server(url.host(), url.port(), localRobotsFile);
+                   server = new Server(url.host(), url.port(), strcmp(url.service(), 
+"https") == 0, localRobotsFile);
                    servers.Add(url.signature(), server);
                    delete localRobotsFile;
                }
@@ -1305,10 +1304,10 @@
                    //
                    // Hadn't seen this server, yet.  Register it
                    //
-                   String robotsURL = "http://";
-                   robotsURL << url.host() << "/robots.txt";
+                   String robotsURL = url.service();
+                   robotsURL << "://" << url.host() << "/robots.txt";
                    String *localRobotsFile = GetLocal(robotsURL.get());
-                   server = new Server(url.host(), url.port(), localRobotsFile);
+                   server = new Server(url.host(), url.port(), strcmp(url.service(), 
+"https") == 0, localRobotsFile);
                    servers.Add(url.signature(), server);
                    delete localRobotsFile;
                }
diff -u --recursive htdig-3.1.4.org/htdig/Server.cc htdig-3.1.4/htdig/Server.cc
--- htdig-3.1.4.org/htdig/Server.cc     Fri Dec 10 01:28:44 1999
+++ htdig-3.1.4/htdig/Server.cc Thu Jan 20 10:14:55 2000
@@ -20,9 +20,9 @@
 
 
 //*****************************************************************************
-// Server::Server(char *host, int port, String *local_robots_file)
+// Server::Server(char *host, int port, int ssl, String *local_robots_file)
 //
-Server::Server(char *host, int port, String *local_robots_file)
+Server::Server(char *host, int port, int ssl, String *local_robots_file)
 {
     if (debug > 0)
        cout << endl << "New server: " << host << ", " << port << endl;
@@ -40,7 +40,8 @@
     //
     // Attempt to get a robots.txt file from the specified server
     //
-    String     url = "http://";
+    String     url;
+    url = ssl ? "https://" : "http://";
     url << host << ':' << port << "/robots.txt";
     Document   doc(url, 0);
 
diff -u --recursive htdig-3.1.4.org/htdig/Server.h htdig-3.1.4/htdig/Server.h
--- htdig-3.1.4.org/htdig/Server.h      Fri Dec 10 01:28:44 1999
+++ htdig-3.1.4/htdig/Server.h  Thu Jan 20 10:14:56 2000
@@ -25,7 +25,7 @@
        //
        // Construction/Destruction
        //
-       Server(char *host, int port, String *local_robots_file = NULL);
+       Server(char *host, int port, int ssl, String *local_robots_file = NULL);
        ~Server();
 
        //
diff -u --recursive htdig-3.1.4.org/htlib/Connection.cc htdig-3.1.4/htlib/Connection.cc
--- htdig-3.1.4.org/htlib/Connection.cc Fri Dec 10 01:28:46 1999
+++ htdig-3.1.4/htlib/Connection.cc     Thu Jan 20 18:23:34 2000
@@ -39,6 +39,10 @@
     int rresvport(int *);
 }
 
+SSL_CTX *Connection::ctx = NULL;
+SSL_METHOD *Connection::meth = NULL;
+
+
 List   all_connections;
 
 Connection::Connection()
@@ -49,8 +53,26 @@
     server_name = 0;
     all_connections.Add(this);
     timeout_value = 0;
+    ssl = NULL;
+    m_ssl_on = 0;
+    initSSL();
 }
 
+void Connection::initSSL()
+{
+    if (ctx == NULL)
+    {
+       SSLeay_add_ssl_algorithms();
+       meth = SSLv2_client_method();
+       SSL_load_error_strings();
+       ctx = SSL_CTX_new(meth);
+       if (ctx == NULL)
+       {
+           printf("ctx NULL\n");
+           exit(1);
+       }
+    }
+}
 
 //*************************************************************************
 // Connection::Connection(int socket)
@@ -72,6 +94,9 @@
     server_name = 0;
     all_connections.Add(this);
     timeout_value = 0;
+    ssl = NULL;
+    m_ssl_on = 0;
+    initSSL();
 }
 
 
@@ -94,15 +119,15 @@
 {
     if (priv)
     {
-       int     aport = IPPORT_RESERVED - 1;
+               int     aport = IPPORT_RESERVED - 1;
 
-       sock = rresvport(&aport);
+               sock = rresvport(&aport);
     }
     else
-       sock = socket(AF_INET, SOCK_STREAM, 0);
+               sock = socket(AF_INET, SOCK_STREAM, 0);
 
     if (sock == NOTOK)
-       return NOTOK;
+               return NOTOK;
 
     int        on = 1;
     setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof(on));
@@ -149,6 +174,8 @@
     if (sock >= 0)
     {
        int ret = ::close(sock);
+       SSL_free(ssl);
+       ssl = NULL;
        sock = -1;
        return ret;
     }
@@ -224,6 +251,16 @@
 
 
 //*****************************************************************************
+// int Connection::assign_ssl(int ssl_on)
+//
+int Connection::assign_ssl(int ssl_on)
+{
+    m_ssl_on = ssl_on;
+    return OK;
+}
+
+
+//*****************************************************************************
 // int Connection::connect(int allow_EINTR)
 //
 int Connection::connect(int allow_EINTR)
@@ -244,8 +281,24 @@
        
     if (status == 0 || errno == EALREADY || errno == EISCONN)
     {
-       connected = 1;
-       return OK;
+       if (m_ssl_on)
+       {
+           ssl = SSL_new(ctx);
+           if (ssl != NULL)
+           {
+               SSL_set_fd(ssl, sock);
+               if (SSL_connect(ssl) != -1)
+               {
+                   connected = 1;
+                   return OK;
+               }
+           }
+       }
+       else
+       {
+           connected = 1;
+           return OK;
+       }
     }
 #if 0
     if (status == ECONNREFUSED)
@@ -373,26 +426,32 @@
     need_io_stop = 0;
     do
     {
-      errno = 0;
+       errno = 0;
+
+       if (timeout_value > 0)
+       {
+           fd_set fds;
+           FD_ZERO(&fds);
+           FD_SET(sock, &fds);
+
+           timeval tv;
+           tv.tv_sec = timeout_value;
+           tv.tv_usec = 0;
+
+           int selected = ::select(sock+1, &fds, 0, 0, &tv);
+           if (selected <= 0)
+               need_io_stop++;
+       }
 
-      if (timeout_value > 0) {
-          fd_set fds;
-          FD_ZERO(&fds);
-          FD_SET(sock, &fds);
-
-          timeval tv;
-          tv.tv_sec = timeout_value;
-          tv.tv_usec = 0;
-
-          int selected = ::select(sock+1, &fds, 0, 0, &tv);
-          if (selected <= 0)
-              need_io_stop++;
-      }
-
-      if (!need_io_stop)
-          count = ::read(sock, buffer, maxlength);
-      else
-          count = -1;         // Input timed out
+       if (!need_io_stop)
+       {
+           if (ssl != NULL)
+               count = SSL_read(ssl, buffer, maxlength);
+           else
+               count = ::read(sock, buffer, maxlength);
+       }
+       else
+       count = -1;         // Input timed out
     }
     while (count < 0 && errno == EINTR && !need_io_stop);
     need_io_stop = 0;
@@ -410,7 +469,10 @@
 
     do
     {
-       count = ::write(sock, buffer, maxlength);
+       if (ssl != NULL)
+           count = SSL_write(ssl, buffer, maxlength);
+       else
+           count = ::write(sock, buffer, maxlength);
     }
     while (count < 0 && errno == EINTR && !need_io_stop);
     need_io_stop = 0;
diff -u --recursive htdig-3.1.4.org/htlib/Connection.h htdig-3.1.4/htlib/Connection.h
--- htdig-3.1.4.org/htlib/Connection.h  Fri Dec 10 01:28:46 1999
+++ htdig-3.1.4/htlib/Connection.h      Thu Jan 20 10:16:09 2000
@@ -36,6 +36,14 @@
 #include <netinet/in.h>
 #include <netdb.h>
 
+#include <openssl/rsa.h>
+#include <openssl/crypto.h>
+#include <openssl/x509.h>
+#include <openssl/pem.h>
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+
+
 class String;
 
 class Connection : public io
@@ -45,6 +53,7 @@
     Connection();
     Connection(int socket);
     ~Connection();
+    void initSSL();
 
     // (De)initialization
     int                                open(int priv = 0);
@@ -64,6 +73,9 @@
     int                                assign_server(unsigned int addr = INADDR_ANY);
     char                               *get_server()           {return server_name;}
 
+       // SLL stuff
+       int                             assign_ssl(int ssl_on);
+
     // Connection establishment
     int                                connect(int allow_EINTR = 0);
     Connection                 *accept(int priv = 0);
@@ -90,6 +102,10 @@
 
 private:
     int                                sock;
+       int                             m_ssl_on;
+    SSL *ssl;
+    static SSL_CTX *ctx;
+    static SSL_METHOD *meth;
     struct sockaddr_in         server;
     int                                connected;
     char                               *peer;
diff -u --recursive htdig-3.1.4.org/htlib/URL.cc htdig-3.1.4/htlib/URL.cc
--- htdig-3.1.4.org/htlib/URL.cc        Fri Dec 10 01:28:47 1999
+++ htdig-3.1.4/htlib/URL.cc    Fri Jan 21 13:55:00 2000
@@ -130,8 +130,9 @@
     while (isalpha(*p))
        p++;
     int        hasService = (*p == ':');
-    if (hasService && ((strncmp(ref, "http://", 7) == 0) ||
-                      (strncmp(ref, "http:", 5) != 0)))
+    if (hasService && (
+           ((strncmp(ref, "http://", 7) == 0) || (strncmp(ref, "http:", 5) != 0)) ||
+               ((strncmp(ref, "https://", 8) == 0) || (strncmp(ref, "https:", 6) != 
+0))))
     {
        //
        // No need to look at the parent url since this is a complete url...
@@ -216,7 +217,7 @@
     _url << ":";
     if (_host.length())
        _url << "//" << _host;
-    if (_port != 80 && strcmp(_service, "http") == 0)
+    if (_port != 80 && (strcmp(_service, "http") == 0 || strcmp(_service, "https") == 
+0))
        _url << ':' << _port;
     _url << _path;
 }
@@ -464,7 +465,7 @@
     if (_service.length() == 0 || _normal)
        return;
 
-    if (strcmp(_service, "http") != 0)
+    if (strcmp(_service, "http") != 0 && strcmp(_service, "https") != 0)
        return;
 
     removeIndex(_path);
@@ -521,7 +522,7 @@
     _url << ":";
     if (_host.length())
        _url << "//" << _host;
-    if (_port != 80 && strcmp(_service, "http") == 0)
+    if (_port != 80 && (strcmp(_service, "http") == 0 || strcmp(_service, "https") == 
+0))
        _url << ':' << _port;
     _url << _path;
     _normal = 1;
diff -u --recursive htdig-3.1.4.org/htlib/URL.h htdig-3.1.4/htlib/URL.h
--- htdig-3.1.4.org/htlib/URL.h Fri Dec 10 01:28:47 1999
+++ htdig-3.1.4/htlib/URL.h     Thu Jan 20 15:58:47 2000
@@ -40,7 +40,7 @@
     void               host(char *h)           {_host = h;}
     int                        port()                  {return _port;}
     void               port(int p)             {_port = p;}
-    char               *service()              {return _service;}
+    char               *service()              {return _service.get();}
     void               service(char *s)        {_service = s;}
     char               *path()                 {return _path;}
     void               path(char *p);

------------------------------------
To unsubscribe from the htdig mailing list, send a message to
[EMAIL PROTECTED]
You will receive a message to confirm this.

Reply via email to