Hello Samuel and the Hurd team,

this is the second patch in the series of patches that tries to fix
the httpfs translator.

This patch fixes the open_connection function by removing the HEAD
request and using only a GET request to parse the headers and the body
of the HTTP response.

Sincerely

Gianluca
Index: httpfs/http.c
===================================================================
--- httpfs.orig/http.c
+++ httpfs/http.c
@@ -60,128 +60,78 @@ error_t lookup_host (char *url, struct h
 /* open a connection with the remote web server */
 error_t open_connection(struct netnode *node, int *fd,off_t *head_len) 
 {
-	/* HTTP GET command returns head and body so we have to prune the
-	 * head. *HEAD_LEN indicates the header length, for pruning upto that */
-	
 	error_t err;
 	struct hostent *hptr;
 	struct sockaddr_in dest;
-	ssize_t written;
-	size_t towrite;
 	char buffer[4096];
-	ssize_t bytes_read;
-	char *token,*mesg;
-	int code;
-	char delimiters0[] = " ";
-	char delimiters1[] = "\n";
+	ssize_t nread;
 
 	const char *relative_path = get_safe_path (node->conn_req);
 
-	bzero(&dest,sizeof(dest));
+	/* Setup sockaddr */
+	bzero (&dest, sizeof (dest));
 	dest.sin_family = AF_INET;
 	dest.sin_port = htons (port);
 
-	if ( !strcmp(ip_addr,"0.0.0.0") )
-	{
-		/* connection is not through a proxy server
-		 * find IP addr. of remote server */
-		err = lookup_host (dir_tok, &hptr);
-		if (err)
-		{
-			fprintf(stderr,"Could not find IP addr %s\n",node->url);
-			return err;
-		}
-		dest.sin_addr = *(struct in_addr *)hptr->h_addr;
-	}
-	else
-	{
-		/* connection is through the proxy server
-		 * need not find IP of remote server
-	         * find IP of the proxy server */
-		if ( inet_aton(ip_addr,&dest.sin_addr) == 0 )
-		{
-			fprintf(stderr,"Invalid IP for proxy\n");
-			return -1;
-		}
-	}
+	err = lookup_host (dir_tok, &hptr);
+	if (err) return err;
+	dest.sin_addr = *(struct in_addr *)hptr->h_addr;
 
 	if (debug_flag)
-		fprintf (stderr, "trying to open %s:%d%s\n", dir_tok,
-				port, relative_path);
+		fprintf (stderr, "[debug] fetching http://%s%s\n";, dir_tok, relative_path);
 
 	*fd = socket (AF_INET, SOCK_STREAM, 0);
-	if (*fd == -1)
-	{
-		fprintf(stderr,"Socket creation error\n");
-		return errno;
-	}
-
-	err = connect (*fd, (struct sockaddr *)&dest, sizeof (dest));
-	if (err == -1)
-	{
-		fprintf(stderr,"Cannot connect to remote host\n");
-		return errno;
-	}
-
-	/* Send a HEAD request find header length */
-	sprintf(buffer,"HEAD %s HTTP/1.0\n\n", relative_path);
-	towrite = strlen (buffer);
-	written = TEMP_FAILURE_RETRY (write (*fd, buffer, towrite));
-	if ( written == -1 || written < towrite )
-	{
-		fprintf(stderr,"Could not send an HTTP request to host\n");
-		return errno;
-	}
-	
-	bytes_read = read(*fd,buffer,sizeof(buffer));
-	if ( bytes_read < 0 ) 
-	{
-		fprintf(stderr,"Error with HEAD read\n");
-		return errno;
-	}
-
-	*head_len = bytes_read;
-	token = strtok(buffer,delimiters0);
-	token = strtok(NULL,delimiters0);
-	sscanf(token,"%d",&code);
-	token = strtok(NULL,delimiters1);
-	mesg = strdup(token);
-	if ( code != 200 ) 
-	{
-		/* page does not exist */
-		fprintf(stderr,"Error Page not Accesible\n");
-		fprintf(stderr,"%d %s\n",code,mesg);
-		return EBADF;
-	}
-	
-	close(*fd);
-	
-	/* Send the GET request for the url */
-	*fd = socket (AF_INET, SOCK_STREAM, 0);
-	if (*fd == -1)
-	{
-		fprintf(stderr,"Socket creation error\n");
-		return errno;
-	}
-
-	err = connect (*fd, (struct sockaddr *)&dest, sizeof (dest));
-	if (err == -1)
-	{
-		fprintf(stderr,"Cannot connect to remote host\n");
-		return errno;
-	}
-
-	towrite = strlen (node->comm_buf);
-
-	/* guard against EINTR failures */
-	written = TEMP_FAILURE_RETRY (write (*fd, node->comm_buf, towrite));
-	written += TEMP_FAILURE_RETRY (write (*fd, "\n\n",  2));
-	if (written == -1 || written < (towrite+2)) 
-	{
-		fprintf(stderr,"Could not send GET request to remote host\n");
-		return errno;
-	}
-	return 0;
+	if (*fd == -1) return errno;
+
+    	if (connect(*fd, (struct sockaddr *)&dest, sizeof(dest)) == -1) {
+        	close(*fd);
+        	return errno;
+    	}
+
+	/* HTTP/1.1 request correctly formatted */
+    /* We use \r\n as per requested by RFC standard */
+    int req_len = snprintf(buffer, sizeof(buffer),
+             "GET %s HTTP/1.1\r\n"
+             "Host: %s\r\n"
+             "User-Agent: GNU-Hurd-httpfs/0.1\r\n"
+             "Connection: close\r\n\r\n",
+             relative_path, dir_tok);
+
+    if (write(*fd, buffer, req_len) <= 0) {
+        close(*fd);
+        return EIO;
+    }
+
+    /* We only read a portion of the response for finding the headers */
+    nread = recv(*fd, buffer, sizeof(buffer) - 1, MSG_PEEK); 
+    if (nread <= 0) {
+        close(*fd);
+        return EIO;
+    }
+    buffer[nread] = '\0';
+
+    /* Verify if the response is valid */
+    if (!strstr(buffer, " 200") && !strstr(buffer, " 301") && !strstr(buffer, " 302")) {
+        fprintf(stderr, "httpfs: Server returned error or not found for %s\n", relative_path);
+        close(*fd);
+        return ENOENT;
+    }
+
+    /* We lookup for the end of the header */
+    char *header_end = strstr(buffer, "\r\n\r\n");
+    if (header_end) {
+        *head_len = (header_end - buffer) + 4;
+        
+        /* Consume headers from socket so that the next read() starts with the body */
+        char discard_buf[8192];
+        read(*fd, discard_buf, *head_len);
+    } else {
+        /* If it cannot find the end of the header in 4KB of buffer, there is a problem */
+        close(*fd);
+        return EPROTO;
+    }
+
+    return 0;
 }
 
 /* fetch a directory node from the web server

Reply via email to