Hi,
We patched htdig 3.1.4 to support openssl. Now you can index http and https
server as well.
Have fun.
SAP IT BSS Internet Services
diff -u --recursive htdig-3.1.4.org/CONFIG htdig-3.1.4/CONFIG
--- htdig-3.1.4.org/CONFIG Fri Dec 10 01:29:30 1999
+++ htdig-3.1.4/CONFIG Wed Jan 19 14:39:56 2000
@@ -8,7 +8,7 @@
# These variables are set by configure
#
# This specifies the root of the directory tree to be used by ht://Dig
-prefix= /opt/www/htdig
+prefix= /opt/htdig
# This specifies the root of the directory tree to be used for programs
# installed by ht://Dig
diff -u --recursive htdig-3.1.4.org/Makefile.config.in htdig-3.1.4/Makefile.config.in
--- htdig-3.1.4.org/Makefile.config.in Fri Dec 10 01:28:21 1999
+++ htdig-3.1.4/Makefile.config.in Wed Jan 19 14:39:35 2000
@@ -24,13 +24,13 @@
SENDMAIL= @SENDMAIL@
DEFINES= -DDEFAULT_CONFIG_FILE=\"$(DEFAULT_CONFIG_FILE)\"
-LIBDIRS= -L../htlib -L../htcommon -L../db/dist -L/usr/lib
+LIBDIRS= -L../htlib -L../htcommon -L../db/dist -L/usr/lib -L/opt/ssl/lib
INCS= -I$(top_srcdir)/htlib -I$(top_srcdir)/htcommon \
- -I../db/dist -I../include
+ -I../db/dist -I../include -I/opt/ssl/include
HTLIBS= ../htcommon/libcommon.a \
../htlib/libht.a \
../db/dist/libdb.a
-LIBS= $(HTLIBS) @LIBS@
+LIBS= $(HTLIBS) @LIBS@ -lssl -lcrypto
DIST= @PACKAGE@-@VERSION@
DISTDIR= $(top_srcdir)/../$(DIST)
diff -u --recursive htdig-3.1.4.org/htcommon/DocumentDB.cc
htdig-3.1.4/htcommon/DocumentDB.cc
--- htdig-3.1.4.org/htcommon/DocumentDB.cc Fri Dec 10 01:28:44 1999
+++ htdig-3.1.4/htcommon/DocumentDB.cc Thu Jan 20 10:16:44 2000
@@ -217,7 +217,7 @@
while ((key = dbf->Get_Next()))
{
dbf->Get(key, data);
- if (strncmp(HtURLCodec::instance()->decode(key), "http:", 5) == 0)
+ if (strncmp(HtURLCodec::instance()->decode(key), "http:", 5) == 0 ||
+strncmp(HtURLCodec::instance()->decode(key), "https:", 6) == 0)
{
ref = new DocumentRef;
ref->Deserialize(data);
@@ -284,7 +284,7 @@
while ((coded_key = dbf->Get_Next()))
{
String key = HtURLCodec::instance()->decode(coded_key);
- if (mystrncasecmp(key, "http:", 5) == 0)
+ if (mystrncasecmp(key, "http:", 5) == 0 || mystrncasecmp(key, "https:", 6) ==
+0)
{
DocumentRef *ref = (*this)[key];
if (ref)
diff -u --recursive htdig-3.1.4.org/htcommon/defaults.cc
htdig-3.1.4/htcommon/defaults.cc
--- htdig-3.1.4.org/htcommon/defaults.cc Fri Dec 10 01:28:44 1999
+++ htdig-3.1.4/htcommon/defaults.cc Thu Jan 20 10:16:37 2000
@@ -37,7 +37,7 @@
{"bad_querystr", ""},
{"bad_word_list", "${common_dir}/bad_words"},
{"case_sensitive", "true"},
- {"common_url_parts", "http:// http://www. ftp:// ftp://ftp. /pub/
.html .htm .gif .jpg .jpeg /index.html /index.htm .com/ .com mailto:"},
+ {"common_url_parts", "https:// https://www. http:// http://www.
+ftp:// ftp://ftp. /pub/ .html .htm .gif .jpg .jpeg /index.html /index.htm .com/ .com
+mailto:"},
{"create_image_list", "false"},
{"create_url_list", "false"},
{"compression_level", "0"},
diff -u --recursive htdig-3.1.4.org/htdig/Document.cc htdig-3.1.4/htdig/Document.cc
--- htdig-3.1.4.org/htdig/Document.cc Fri Dec 10 01:28:44 1999
+++ htdig-3.1.4/htdig/Document.cc Fri Jan 21 10:33:46 2000
@@ -220,6 +220,7 @@
tm.tm_year += 1900;
tm.tm_yday = 0; // clear these to prevent problems in strftime()
tm.tm_wday = 0;
+ tm.tm_isdst = -1;
if (debug > 2)
{
@@ -328,7 +329,7 @@
return Document_no_host;
}
}
-
+ c.assign_ssl(strcmp(url->service(), "https") == 0);
if (c.connect(1) == NOTOK)
{
if (debug)
diff -u --recursive htdig-3.1.4.org/htdig/Images.cc htdig-3.1.4/htdig/Images.cc
--- htdig-3.1.4.org/htdig/Images.cc Fri Dec 10 01:28:44 1999
+++ htdig-3.1.4/htdig/Images.cc Thu Jan 20 10:15:16 2000
@@ -61,7 +61,7 @@
{
String u = url;
URL Url(url);
- if (strcmp(Url.service(), "http") != 0)
+ if (strcmp(Url.service(), "http") != 0 && strcmp(Url.service(), "https") != 0)
return 0;
u.lowercase();
@@ -81,6 +81,7 @@
return 0;
if (c.assign_server(Url.host()) == NOTOK)
return 0;
+ c.assign_ssl(strcmp(Url.service(), "https") == 0);
if (c.connect(1) == NOTOK)
{
diff -u --recursive htdig-3.1.4.org/htdig/Retriever.cc htdig-3.1.4/htdig/Retriever.cc
--- htdig-3.1.4.org/htdig/Retriever.cc Fri Dec 10 01:28:44 1999
+++ htdig-3.1.4/htdig/Retriever.cc Thu Jan 20 16:09:36 2000
@@ -117,8 +117,7 @@
// from == 2 add url from db.log
// from == 3 urls in db.docs and there was a db.log
//
-void
-Retriever::Initial(char *list, int from)
+void Retriever::Initial(char *list, int from)
{
//
// Split the list of urls up into individual urls.
@@ -137,10 +136,10 @@
cout << "\t" << from << ":" << (int) log << ":" << url;
if (!server)
{
- String robotsURL = "http://";
- robotsURL << u.host() << "/robots.txt";
+ String robotsURL = u.service();
+ robotsURL << "://" << u.host() << "/robots.txt";
String *localRobotsFile = GetLocal(robotsURL.get());
- server = new Server(u.host(), u.port(), localRobotsFile);
+ server = new Server(u.host(), u.port(), strcmp(u.service(), "https") == 0,
+localRobotsFile);
servers.Add(u.signature(), server);
delete localRobotsFile;
}
@@ -668,10 +667,10 @@
// Currently, we only deal with HTTP URLs. Gopher and ftp will
// come later... ***FIX***
//
- if (strstr(u, "/../") || strncmp(u, "http://", 7) != 0)
+ if (strstr(u, "/../") || (strncmp(u, "http://", 7) != 0 && strncmp(u, "https://",
+8) != 0))
{
if (debug > 2)
- cout << endl <<" Rejected: Not an http or relative link!";
+ cout << endl <<" Rejected: Not an http, https or relative link!";
return FALSE;
}
@@ -1172,10 +1171,10 @@
//
// Hadn't seen this server, yet. Register it
//
- String robotsURL = "http://";
- robotsURL << url.host() << "/robots.txt";
+ String robotsURL = url.service();
+ robotsURL << "://" << url.host() << "/robots.txt";
String *localRobotsFile = GetLocal(robotsURL.get());
- server = new Server(url.host(), url.port(), localRobotsFile);
+ server = new Server(url.host(), url.port(), strcmp(url.service(),
+"https") == 0, localRobotsFile);
servers.Add(url.signature(), server);
delete localRobotsFile;
}
@@ -1305,10 +1304,10 @@
//
// Hadn't seen this server, yet. Register it
//
- String robotsURL = "http://";
- robotsURL << url.host() << "/robots.txt";
+ String robotsURL = url.service();
+ robotsURL << "://" << url.host() << "/robots.txt";
String *localRobotsFile = GetLocal(robotsURL.get());
- server = new Server(url.host(), url.port(), localRobotsFile);
+ server = new Server(url.host(), url.port(), strcmp(url.service(),
+"https") == 0, localRobotsFile);
servers.Add(url.signature(), server);
delete localRobotsFile;
}
diff -u --recursive htdig-3.1.4.org/htdig/Server.cc htdig-3.1.4/htdig/Server.cc
--- htdig-3.1.4.org/htdig/Server.cc Fri Dec 10 01:28:44 1999
+++ htdig-3.1.4/htdig/Server.cc Thu Jan 20 10:14:55 2000
@@ -20,9 +20,9 @@
//*****************************************************************************
-// Server::Server(char *host, int port, String *local_robots_file)
+// Server::Server(char *host, int port, int ssl, String *local_robots_file)
//
-Server::Server(char *host, int port, String *local_robots_file)
+Server::Server(char *host, int port, int ssl, String *local_robots_file)
{
if (debug > 0)
cout << endl << "New server: " << host << ", " << port << endl;
@@ -40,7 +40,8 @@
//
// Attempt to get a robots.txt file from the specified server
//
- String url = "http://";
+ String url;
+ url = ssl ? "https://" : "http://";
url << host << ':' << port << "/robots.txt";
Document doc(url, 0);
diff -u --recursive htdig-3.1.4.org/htdig/Server.h htdig-3.1.4/htdig/Server.h
--- htdig-3.1.4.org/htdig/Server.h Fri Dec 10 01:28:44 1999
+++ htdig-3.1.4/htdig/Server.h Thu Jan 20 10:14:56 2000
@@ -25,7 +25,7 @@
//
// Construction/Destruction
//
- Server(char *host, int port, String *local_robots_file = NULL);
+ Server(char *host, int port, int ssl, String *local_robots_file = NULL);
~Server();
//
diff -u --recursive htdig-3.1.4.org/htlib/Connection.cc htdig-3.1.4/htlib/Connection.cc
--- htdig-3.1.4.org/htlib/Connection.cc Fri Dec 10 01:28:46 1999
+++ htdig-3.1.4/htlib/Connection.cc Thu Jan 20 18:23:34 2000
@@ -39,6 +39,10 @@
int rresvport(int *);
}
+SSL_CTX *Connection::ctx = NULL;
+SSL_METHOD *Connection::meth = NULL;
+
+
List all_connections;
Connection::Connection()
@@ -49,8 +53,26 @@
server_name = 0;
all_connections.Add(this);
timeout_value = 0;
+ ssl = NULL;
+ m_ssl_on = 0;
+ initSSL();
}
+void Connection::initSSL()
+{
+ if (ctx == NULL)
+ {
+ SSLeay_add_ssl_algorithms();
+ meth = SSLv2_client_method();
+ SSL_load_error_strings();
+ ctx = SSL_CTX_new(meth);
+ if (ctx == NULL)
+ {
+ printf("ctx NULL\n");
+ exit(1);
+ }
+ }
+}
//*************************************************************************
// Connection::Connection(int socket)
@@ -72,6 +94,9 @@
server_name = 0;
all_connections.Add(this);
timeout_value = 0;
+ ssl = NULL;
+ m_ssl_on = 0;
+ initSSL();
}
@@ -94,15 +119,15 @@
{
if (priv)
{
- int aport = IPPORT_RESERVED - 1;
+ int aport = IPPORT_RESERVED - 1;
- sock = rresvport(&aport);
+ sock = rresvport(&aport);
}
else
- sock = socket(AF_INET, SOCK_STREAM, 0);
+ sock = socket(AF_INET, SOCK_STREAM, 0);
if (sock == NOTOK)
- return NOTOK;
+ return NOTOK;
int on = 1;
setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof(on));
@@ -149,6 +174,8 @@
if (sock >= 0)
{
int ret = ::close(sock);
+ SSL_free(ssl);
+ ssl = NULL;
sock = -1;
return ret;
}
@@ -224,6 +251,16 @@
//*****************************************************************************
+// int Connection::assign_ssl(int ssl_on)
+//
+int Connection::assign_ssl(int ssl_on)
+{
+ m_ssl_on = ssl_on;
+ return OK;
+}
+
+
+//*****************************************************************************
// int Connection::connect(int allow_EINTR)
//
int Connection::connect(int allow_EINTR)
@@ -244,8 +281,24 @@
if (status == 0 || errno == EALREADY || errno == EISCONN)
{
- connected = 1;
- return OK;
+ if (m_ssl_on)
+ {
+ ssl = SSL_new(ctx);
+ if (ssl != NULL)
+ {
+ SSL_set_fd(ssl, sock);
+ if (SSL_connect(ssl) != -1)
+ {
+ connected = 1;
+ return OK;
+ }
+ }
+ }
+ else
+ {
+ connected = 1;
+ return OK;
+ }
}
#if 0
if (status == ECONNREFUSED)
@@ -373,26 +426,32 @@
need_io_stop = 0;
do
{
- errno = 0;
+ errno = 0;
+
+ if (timeout_value > 0)
+ {
+ fd_set fds;
+ FD_ZERO(&fds);
+ FD_SET(sock, &fds);
+
+ timeval tv;
+ tv.tv_sec = timeout_value;
+ tv.tv_usec = 0;
+
+ int selected = ::select(sock+1, &fds, 0, 0, &tv);
+ if (selected <= 0)
+ need_io_stop++;
+ }
- if (timeout_value > 0) {
- fd_set fds;
- FD_ZERO(&fds);
- FD_SET(sock, &fds);
-
- timeval tv;
- tv.tv_sec = timeout_value;
- tv.tv_usec = 0;
-
- int selected = ::select(sock+1, &fds, 0, 0, &tv);
- if (selected <= 0)
- need_io_stop++;
- }
-
- if (!need_io_stop)
- count = ::read(sock, buffer, maxlength);
- else
- count = -1; // Input timed out
+ if (!need_io_stop)
+ {
+ if (ssl != NULL)
+ count = SSL_read(ssl, buffer, maxlength);
+ else
+ count = ::read(sock, buffer, maxlength);
+ }
+ else
+ count = -1; // Input timed out
}
while (count < 0 && errno == EINTR && !need_io_stop);
need_io_stop = 0;
@@ -410,7 +469,10 @@
do
{
- count = ::write(sock, buffer, maxlength);
+ if (ssl != NULL)
+ count = SSL_write(ssl, buffer, maxlength);
+ else
+ count = ::write(sock, buffer, maxlength);
}
while (count < 0 && errno == EINTR && !need_io_stop);
need_io_stop = 0;
diff -u --recursive htdig-3.1.4.org/htlib/Connection.h htdig-3.1.4/htlib/Connection.h
--- htdig-3.1.4.org/htlib/Connection.h Fri Dec 10 01:28:46 1999
+++ htdig-3.1.4/htlib/Connection.h Thu Jan 20 10:16:09 2000
@@ -36,6 +36,14 @@
#include <netinet/in.h>
#include <netdb.h>
+#include <openssl/rsa.h>
+#include <openssl/crypto.h>
+#include <openssl/x509.h>
+#include <openssl/pem.h>
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+
+
class String;
class Connection : public io
@@ -45,6 +53,7 @@
Connection();
Connection(int socket);
~Connection();
+ void initSSL();
// (De)initialization
int open(int priv = 0);
@@ -64,6 +73,9 @@
int assign_server(unsigned int addr = INADDR_ANY);
char *get_server() {return server_name;}
+ // SLL stuff
+ int assign_ssl(int ssl_on);
+
// Connection establishment
int connect(int allow_EINTR = 0);
Connection *accept(int priv = 0);
@@ -90,6 +102,10 @@
private:
int sock;
+ int m_ssl_on;
+ SSL *ssl;
+ static SSL_CTX *ctx;
+ static SSL_METHOD *meth;
struct sockaddr_in server;
int connected;
char *peer;
diff -u --recursive htdig-3.1.4.org/htlib/URL.cc htdig-3.1.4/htlib/URL.cc
--- htdig-3.1.4.org/htlib/URL.cc Fri Dec 10 01:28:47 1999
+++ htdig-3.1.4/htlib/URL.cc Fri Jan 21 13:55:00 2000
@@ -130,8 +130,9 @@
while (isalpha(*p))
p++;
int hasService = (*p == ':');
- if (hasService && ((strncmp(ref, "http://", 7) == 0) ||
- (strncmp(ref, "http:", 5) != 0)))
+ if (hasService && (
+ ((strncmp(ref, "http://", 7) == 0) || (strncmp(ref, "http:", 5) != 0)) ||
+ ((strncmp(ref, "https://", 8) == 0) || (strncmp(ref, "https:", 6) !=
+0))))
{
//
// No need to look at the parent url since this is a complete url...
@@ -216,7 +217,7 @@
_url << ":";
if (_host.length())
_url << "//" << _host;
- if (_port != 80 && strcmp(_service, "http") == 0)
+ if (_port != 80 && (strcmp(_service, "http") == 0 || strcmp(_service, "https") ==
+0))
_url << ':' << _port;
_url << _path;
}
@@ -464,7 +465,7 @@
if (_service.length() == 0 || _normal)
return;
- if (strcmp(_service, "http") != 0)
+ if (strcmp(_service, "http") != 0 && strcmp(_service, "https") != 0)
return;
removeIndex(_path);
@@ -521,7 +522,7 @@
_url << ":";
if (_host.length())
_url << "//" << _host;
- if (_port != 80 && strcmp(_service, "http") == 0)
+ if (_port != 80 && (strcmp(_service, "http") == 0 || strcmp(_service, "https") ==
+0))
_url << ':' << _port;
_url << _path;
_normal = 1;
diff -u --recursive htdig-3.1.4.org/htlib/URL.h htdig-3.1.4/htlib/URL.h
--- htdig-3.1.4.org/htlib/URL.h Fri Dec 10 01:28:47 1999
+++ htdig-3.1.4/htlib/URL.h Thu Jan 20 15:58:47 2000
@@ -40,7 +40,7 @@
void host(char *h) {_host = h;}
int port() {return _port;}
void port(int p) {_port = p;}
- char *service() {return _service;}
+ char *service() {return _service.get();}
void service(char *s) {_service = s;}
char *path() {return _path;}
void path(char *p);
------------------------------------
To unsubscribe from the htdig mailing list, send a message to
[EMAIL PROTECTED]
You will receive a message to confirm this.