Gianluca Cannata, le mar. 13 janv. 2026 16:10:04 +0100, a ecrit:
> This patch fixes the open_connection function by removing the HEAD
> request and using only a GET request to parse the headers and the body
> of the HTTP response.
It indeed looks bogus to send HEAD then GET, we cannot be sure that the
GET will have the same header size as the HEAD.
> Index: httpfs/http.c
> ===================================================================
> --- httpfs.orig/http.c
> +++ httpfs/http.c
> @@ -60,128 +60,78 @@ error_t lookup_host (char *url, struct h
> /* open a connection with the remote web server */
> error_t open_connection(struct netnode *node, int *fd,off_t *head_len)
> {
> - /* HTTP GET command returns head and body so we have to prune the
> - * head. *HEAD_LEN indicates the header length, for pruning upto that */
Why removing this? This is still true.
> error_t err;
> struct hostent *hptr;
> struct sockaddr_in dest;
> - ssize_t written;
> - size_t towrite;
> char buffer[4096];
> - ssize_t bytes_read;
> - char *token,*mesg;
> - int code;
> - char delimiters0[] = " ";
> - char delimiters1[] = "\n";
> + ssize_t nread;
>
> const char *relative_path = get_safe_path (node->conn_req);
>
> - bzero(&dest,sizeof(dest));
> + /* Setup sockaddr */
> + bzero (&dest, sizeof (dest));
Please avoid unrelated formatting changes, that makes reviewing harder.
> dest.sin_family = AF_INET;
> dest.sin_port = htons (port);
>
> - if ( !strcmp(ip_addr,"0.0.0.0") )
> - {
> - /* connection is not through a proxy server
> - * find IP addr. of remote server */
> - err = lookup_host (dir_tok, &hptr);
> - if (err)
> - {
> - fprintf(stderr,"Could not find IP addr %s\n",node->url);
> - return err;
> - }
> - dest.sin_addr = *(struct in_addr *)hptr->h_addr;
> - }
> - else
> - {
> - /* connection is through the proxy server
> - * need not find IP of remote server
> - * find IP of the proxy server */
> - if ( inet_aton(ip_addr,&dest.sin_addr) == 0 )
> - {
> - fprintf(stderr,"Invalid IP for proxy\n");
> - return -1;
> - }
Why dropping this part? This is dropping proxy support.
> - }
> + err = lookup_host (dir_tok, &hptr);
> + if (err) return err;
> + dest.sin_addr = *(struct in_addr *)hptr->h_addr;
>
> if (debug_flag)
> - fprintf (stderr, "trying to open %s:%d%s\n", dir_tok,
> - port, relative_path);
> + fprintf (stderr, "[debug] fetching http://%s%s\n", dir_tok,
> relative_path);
We do want to see the port, it may not be 80.
>
> *fd = socket (AF_INET, SOCK_STREAM, 0);
> - if (*fd == -1)
> - {
> - fprintf(stderr,"Socket creation error\n");
> - return errno;
> - }
> -
> - err = connect (*fd, (struct sockaddr *)&dest, sizeof (dest));
> - if (err == -1)
> - {
> - fprintf(stderr,"Cannot connect to remote host\n");
> - return errno;
> - }
> -
> - /* Send a HEAD request find header length */
> - sprintf(buffer,"HEAD %s HTTP/1.0\n\n", relative_path);
> - towrite = strlen (buffer);
> - written = TEMP_FAILURE_RETRY (write (*fd, buffer, towrite));
> - if ( written == -1 || written < towrite )
> - {
> - fprintf(stderr,"Could not send an HTTP request to host\n");
> - return errno;
> - }
> -
> - bytes_read = read(*fd,buffer,sizeof(buffer));
> - if ( bytes_read < 0 )
> - {
> - fprintf(stderr,"Error with HEAD read\n");
> - return errno;
> - }
> -
> - *head_len = bytes_read;
> - token = strtok(buffer,delimiters0);
> - token = strtok(NULL,delimiters0);
> - sscanf(token,"%d",&code);
> - token = strtok(NULL,delimiters1);
> - mesg = strdup(token);
> - if ( code != 200 )
> - {
> - /* page does not exist */
> - fprintf(stderr,"Error Page not Accesible\n");
> - fprintf(stderr,"%d %s\n",code,mesg);
> - return EBADF;
> - }
> -
> - close(*fd);
> -
> - /* Send the GET request for the url */
> - *fd = socket (AF_INET, SOCK_STREAM, 0);
> - if (*fd == -1)
> - {
> - fprintf(stderr,"Socket creation error\n");
> - return errno;
> - }
> -
> - err = connect (*fd, (struct sockaddr *)&dest, sizeof (dest));
> - if (err == -1)
> - {
> - fprintf(stderr,"Cannot connect to remote host\n");
> - return errno;
> - }
> -
> - towrite = strlen (node->comm_buf);
> -
> - /* guard against EINTR failures */
> - written = TEMP_FAILURE_RETRY (write (*fd, node->comm_buf, towrite));
> - written += TEMP_FAILURE_RETRY (write (*fd, "\n\n", 2));
> - if (written == -1 || written < (towrite+2))
> - {
> - fprintf(stderr,"Could not send GET request to remote host\n");
> - return errno;
> - }
> - return 0;
> + if (*fd == -1) return errno;
Keep the error message, it's precious.
> +
> + if (connect(*fd, (struct sockaddr *)&dest, sizeof(dest)) == -1) {
> + close(*fd);
Also keep the error message.
> + return errno;
> + }
> +
> + /* HTTP/1.1 request correctly formatted */
> + /* We use \r\n as per requested by RFC standard */
> + int req_len = snprintf(buffer, sizeof(buffer),
> + "GET %s HTTP/1.1\r\n"
Why inventing the GET string? It's already available in node->comm_buf.
> + "Host: %s\r\n"
> + "User-Agent: GNU-Hurd-httpfs/0.1\r\n"
> + "Connection: close\r\n\r\n",
> + relative_path, dir_tok);
> +
> + if (write(*fd, buffer, req_len) <= 0) {
You want a loop around the write, because you can have partial writes.
> + close(*fd);
> + return EIO;
> + }
> +
> + /* We only read a portion of the response for finding the headers */
> + nread = recv(*fd, buffer, sizeof(buffer) - 1, MSG_PEEK);
> + if (nread <= 0) {
We want an error message.
> + close(*fd);
> + return EIO;
> + }
> + buffer[nread] = '\0';
> +
> + /* Verify if the response is valid */
> + if (!strstr(buffer, " 200") && !strstr(buffer, " 301") &&
> !strstr(buffer, " 302")) {
This is not actually reading only the first line of the server response,
but all of the server response. If some field happens to contain " 200",
we'll erroneously think it's good.
> + fprintf(stderr, "httpfs: Server returned error or not found for
> %s\n", relative_path);
> + close(*fd);
> + return ENOENT;
> + }
> +
> + /* We lookup for the end of the header */
> + char *header_end = strstr(buffer, "\r\n\r\n");
Some servers might also be returning "\n\n"
> + if (header_end) {
> + *head_len = (header_end - buffer) + 4;
> +
> + /* Consume headers from socket so that the next read() starts with
> the body */
> + char discard_buf[8192];
You can use *head_len rather than a hardcoded value.
> + read(*fd, discard_buf, *head_len);
> + } else {
> + /* If it cannot find the end of the header in 4KB of buffer, there
> is a problem */
We want an error message.
> + close(*fd);
> + return EPROTO;
> + }
> +
> + return 0;
> }
>
> /* fetch a directory node from the web server
Samuel