Hello Samuel and the Hurd team,
this is the second patch in the series of patches that tries to fix
the httpfs translator.
This patch fixes the open_connection function by removing the HEAD
request and using only a GET request to parse the headers and the body
of the HTTP response.
Sincerely
Gianluca
Index: httpfs/http.c
===================================================================
--- httpfs.orig/http.c
+++ httpfs/http.c
@@ -60,128 +60,78 @@ error_t lookup_host (char *url, struct h
/* open a connection with the remote web server */
error_t open_connection(struct netnode *node, int *fd,off_t *head_len)
{
- /* HTTP GET command returns head and body so we have to prune the
- * head. *HEAD_LEN indicates the header length, for pruning upto that */
-
error_t err;
struct hostent *hptr;
struct sockaddr_in dest;
- ssize_t written;
- size_t towrite;
char buffer[4096];
- ssize_t bytes_read;
- char *token,*mesg;
- int code;
- char delimiters0[] = " ";
- char delimiters1[] = "\n";
+ ssize_t nread;
const char *relative_path = get_safe_path (node->conn_req);
- bzero(&dest,sizeof(dest));
+ /* Setup sockaddr */
+ bzero (&dest, sizeof (dest));
dest.sin_family = AF_INET;
dest.sin_port = htons (port);
- if ( !strcmp(ip_addr,"0.0.0.0") )
- {
- /* connection is not through a proxy server
- * find IP addr. of remote server */
- err = lookup_host (dir_tok, &hptr);
- if (err)
- {
- fprintf(stderr,"Could not find IP addr %s\n",node->url);
- return err;
- }
- dest.sin_addr = *(struct in_addr *)hptr->h_addr;
- }
- else
- {
- /* connection is through the proxy server
- * need not find IP of remote server
- * find IP of the proxy server */
- if ( inet_aton(ip_addr,&dest.sin_addr) == 0 )
- {
- fprintf(stderr,"Invalid IP for proxy\n");
- return -1;
- }
- }
+ err = lookup_host (dir_tok, &hptr);
+ if (err) return err;
+ dest.sin_addr = *(struct in_addr *)hptr->h_addr;
if (debug_flag)
- fprintf (stderr, "trying to open %s:%d%s\n", dir_tok,
- port, relative_path);
+ fprintf (stderr, "[debug] fetching http://%s%s\n", dir_tok, relative_path);
*fd = socket (AF_INET, SOCK_STREAM, 0);
- if (*fd == -1)
- {
- fprintf(stderr,"Socket creation error\n");
- return errno;
- }
-
- err = connect (*fd, (struct sockaddr *)&dest, sizeof (dest));
- if (err == -1)
- {
- fprintf(stderr,"Cannot connect to remote host\n");
- return errno;
- }
-
- /* Send a HEAD request find header length */
- sprintf(buffer,"HEAD %s HTTP/1.0\n\n", relative_path);
- towrite = strlen (buffer);
- written = TEMP_FAILURE_RETRY (write (*fd, buffer, towrite));
- if ( written == -1 || written < towrite )
- {
- fprintf(stderr,"Could not send an HTTP request to host\n");
- return errno;
- }
-
- bytes_read = read(*fd,buffer,sizeof(buffer));
- if ( bytes_read < 0 )
- {
- fprintf(stderr,"Error with HEAD read\n");
- return errno;
- }
-
- *head_len = bytes_read;
- token = strtok(buffer,delimiters0);
- token = strtok(NULL,delimiters0);
- sscanf(token,"%d",&code);
- token = strtok(NULL,delimiters1);
- mesg = strdup(token);
- if ( code != 200 )
- {
- /* page does not exist */
- fprintf(stderr,"Error Page not Accesible\n");
- fprintf(stderr,"%d %s\n",code,mesg);
- return EBADF;
- }
-
- close(*fd);
-
- /* Send the GET request for the url */
- *fd = socket (AF_INET, SOCK_STREAM, 0);
- if (*fd == -1)
- {
- fprintf(stderr,"Socket creation error\n");
- return errno;
- }
-
- err = connect (*fd, (struct sockaddr *)&dest, sizeof (dest));
- if (err == -1)
- {
- fprintf(stderr,"Cannot connect to remote host\n");
- return errno;
- }
-
- towrite = strlen (node->comm_buf);
-
- /* guard against EINTR failures */
- written = TEMP_FAILURE_RETRY (write (*fd, node->comm_buf, towrite));
- written += TEMP_FAILURE_RETRY (write (*fd, "\n\n", 2));
- if (written == -1 || written < (towrite+2))
- {
- fprintf(stderr,"Could not send GET request to remote host\n");
- return errno;
- }
- return 0;
+ if (*fd == -1) return errno;
+
+ if (connect(*fd, (struct sockaddr *)&dest, sizeof(dest)) == -1) {
+ close(*fd);
+ return errno;
+ }
+
+ /* HTTP/1.1 request correctly formatted */
+ /* We use \r\n as per requested by RFC standard */
+ int req_len = snprintf(buffer, sizeof(buffer),
+ "GET %s HTTP/1.1\r\n"
+ "Host: %s\r\n"
+ "User-Agent: GNU-Hurd-httpfs/0.1\r\n"
+ "Connection: close\r\n\r\n",
+ relative_path, dir_tok);
+
+ if (write(*fd, buffer, req_len) <= 0) {
+ close(*fd);
+ return EIO;
+ }
+
+ /* We only read a portion of the response for finding the headers */
+ nread = recv(*fd, buffer, sizeof(buffer) - 1, MSG_PEEK);
+ if (nread <= 0) {
+ close(*fd);
+ return EIO;
+ }
+ buffer[nread] = '\0';
+
+ /* Verify if the response is valid */
+ if (!strstr(buffer, " 200") && !strstr(buffer, " 301") && !strstr(buffer, " 302")) {
+ fprintf(stderr, "httpfs: Server returned error or not found for %s\n", relative_path);
+ close(*fd);
+ return ENOENT;
+ }
+
+ /* We lookup for the end of the header */
+ char *header_end = strstr(buffer, "\r\n\r\n");
+ if (header_end) {
+ *head_len = (header_end - buffer) + 4;
+
+ /* Consume headers from socket so that the next read() starts with the body */
+ char discard_buf[8192];
+ read(*fd, discard_buf, *head_len);
+ } else {
+ /* If it cannot find the end of the header in 4KB of buffer, there is a problem */
+ close(*fd);
+ return EPROTO;
+ }
+
+ return 0;
}
/* fetch a directory node from the web server