Gianluca Cannata, le mar. 13 janv. 2026 16:10:04 +0100, a ecrit:
> This patch fixes the open_connection function by removing the HEAD
> request and using only a GET request to parse the headers and the body
> of the HTTP response.

It indeed looks bogus to send HEAD then GET, we cannot be sure that the
GET will have the same header size as the HEAD.

> Index: httpfs/http.c
> ===================================================================
> --- httpfs.orig/http.c
> +++ httpfs/http.c
> @@ -60,128 +60,78 @@ error_t lookup_host (char *url, struct h
>  /* open a connection with the remote web server */
>  error_t open_connection(struct netnode *node, int *fd,off_t *head_len) 
>  {
> -     /* HTTP GET command returns head and body so we have to prune the
> -      * head. *HEAD_LEN indicates the header length, for pruning upto that */

Why removing this? This is still true.

>       error_t err;
>       struct hostent *hptr;
>       struct sockaddr_in dest;
> -     ssize_t written;
> -     size_t towrite;
>       char buffer[4096];
> -     ssize_t bytes_read;
> -     char *token,*mesg;
> -     int code;
> -     char delimiters0[] = " ";
> -     char delimiters1[] = "\n";
> +     ssize_t nread;
>  
>       const char *relative_path = get_safe_path (node->conn_req);
>  
> -     bzero(&dest,sizeof(dest));
> +     /* Setup sockaddr */
> +     bzero (&dest, sizeof (dest));

Please avoid unrelated formatting changes, that makes reviewing harder.

>       dest.sin_family = AF_INET;
>       dest.sin_port = htons (port);
>  
> -     if ( !strcmp(ip_addr,"0.0.0.0") )
> -     {
> -             /* connection is not through a proxy server
> -              * find IP addr. of remote server */
> -             err = lookup_host (dir_tok, &hptr);
> -             if (err)
> -             {
> -                     fprintf(stderr,"Could not find IP addr %s\n",node->url);
> -                     return err;
> -             }
> -             dest.sin_addr = *(struct in_addr *)hptr->h_addr;
> -     }
> -     else
> -     {
> -             /* connection is through the proxy server
> -              * need not find IP of remote server
> -              * find IP of the proxy server */
> -             if ( inet_aton(ip_addr,&dest.sin_addr) == 0 )
> -             {
> -                     fprintf(stderr,"Invalid IP for proxy\n");
> -                     return -1;
> -             }

Why dropping this part? This is dropping proxy support.

> -     }
> +     err = lookup_host (dir_tok, &hptr);
> +     if (err) return err;
> +     dest.sin_addr = *(struct in_addr *)hptr->h_addr;
>  
>       if (debug_flag)
> -             fprintf (stderr, "trying to open %s:%d%s\n", dir_tok,
> -                             port, relative_path);
> +             fprintf (stderr, "[debug] fetching http://%s%s\n";, dir_tok, 
> relative_path);

We do want to see the port, it may not be 80.

>  
>       *fd = socket (AF_INET, SOCK_STREAM, 0);
> -     if (*fd == -1)
> -     {
> -             fprintf(stderr,"Socket creation error\n");
> -             return errno;
> -     }
> -
> -     err = connect (*fd, (struct sockaddr *)&dest, sizeof (dest));
> -     if (err == -1)
> -     {
> -             fprintf(stderr,"Cannot connect to remote host\n");
> -             return errno;
> -     }
> -
> -     /* Send a HEAD request find header length */
> -     sprintf(buffer,"HEAD %s HTTP/1.0\n\n", relative_path);
> -     towrite = strlen (buffer);
> -     written = TEMP_FAILURE_RETRY (write (*fd, buffer, towrite));
> -     if ( written == -1 || written < towrite )
> -     {
> -             fprintf(stderr,"Could not send an HTTP request to host\n");
> -             return errno;
> -     }
> -     
> -     bytes_read = read(*fd,buffer,sizeof(buffer));
> -     if ( bytes_read < 0 ) 
> -     {
> -             fprintf(stderr,"Error with HEAD read\n");
> -             return errno;
> -     }
> -
> -     *head_len = bytes_read;
> -     token = strtok(buffer,delimiters0);
> -     token = strtok(NULL,delimiters0);
> -     sscanf(token,"%d",&code);
> -     token = strtok(NULL,delimiters1);
> -     mesg = strdup(token);
> -     if ( code != 200 ) 
> -     {
> -             /* page does not exist */
> -             fprintf(stderr,"Error Page not Accesible\n");
> -             fprintf(stderr,"%d %s\n",code,mesg);
> -             return EBADF;
> -     }
> -     
> -     close(*fd);
> -     
> -     /* Send the GET request for the url */
> -     *fd = socket (AF_INET, SOCK_STREAM, 0);
> -     if (*fd == -1)
> -     {
> -             fprintf(stderr,"Socket creation error\n");
> -             return errno;
> -     }
> -
> -     err = connect (*fd, (struct sockaddr *)&dest, sizeof (dest));
> -     if (err == -1)
> -     {
> -             fprintf(stderr,"Cannot connect to remote host\n");
> -             return errno;
> -     }
> -
> -     towrite = strlen (node->comm_buf);
> -
> -     /* guard against EINTR failures */
> -     written = TEMP_FAILURE_RETRY (write (*fd, node->comm_buf, towrite));
> -     written += TEMP_FAILURE_RETRY (write (*fd, "\n\n",  2));
> -     if (written == -1 || written < (towrite+2)) 
> -     {
> -             fprintf(stderr,"Could not send GET request to remote host\n");
> -             return errno;
> -     }
> -     return 0;
> +     if (*fd == -1) return errno;

Keep the error message, it's precious.

> +
> +     if (connect(*fd, (struct sockaddr *)&dest, sizeof(dest)) == -1) {
> +             close(*fd);

Also keep the error message.

> +             return errno;
> +     }
> +
> +     /* HTTP/1.1 request correctly formatted */
> +    /* We use \r\n as per requested by RFC standard */
> +    int req_len = snprintf(buffer, sizeof(buffer),
> +             "GET %s HTTP/1.1\r\n"

Why inventing the GET string? It's already available in node->comm_buf.

> +             "Host: %s\r\n"
> +             "User-Agent: GNU-Hurd-httpfs/0.1\r\n"
> +             "Connection: close\r\n\r\n",
> +             relative_path, dir_tok);
> +
> +    if (write(*fd, buffer, req_len) <= 0) {

You want a loop around the write, because you can have partial writes.

> +        close(*fd);
> +        return EIO;
> +    }
> +
> +    /* We only read a portion of the response for finding the headers */
> +    nread = recv(*fd, buffer, sizeof(buffer) - 1, MSG_PEEK); 
> +    if (nread <= 0) {

We want an error message.

> +        close(*fd);
> +        return EIO;
> +    }
> +    buffer[nread] = '\0';
> +
> +    /* Verify if the response is valid */
> +    if (!strstr(buffer, " 200") && !strstr(buffer, " 301") && 
> !strstr(buffer, " 302")) {

This is not actually reading only the first line of the server response,
but all of the server response. If some field happens to contain " 200",
we'll erroneously think it's good.

> +        fprintf(stderr, "httpfs: Server returned error or not found for 
> %s\n", relative_path);
> +        close(*fd);
> +        return ENOENT;
> +    }
> +
> +    /* We lookup for the end of the header */
> +    char *header_end = strstr(buffer, "\r\n\r\n");

Some servers might also be returning "\n\n"

> +    if (header_end) {
> +        *head_len = (header_end - buffer) + 4;
> +        
> +        /* Consume headers from socket so that the next read() starts with 
> the body */
> +        char discard_buf[8192];

You can use *head_len rather than a hardcoded value.

> +        read(*fd, discard_buf, *head_len);
> +    } else {
> +        /* If it cannot find the end of the header in 4KB of buffer, there 
> is a problem */

We want an error message.

> +        close(*fd);
> +        return EPROTO;
> +    }
> +
> +    return 0;
>  }
>  
>  /* fetch a directory node from the web server

Samuel

Reply via email to