My 2c below,
On 25/01/2026 04:18, Gianluca Cannata wrote:
Hi Samuel,
This is the first patch that strives to handle HTTP status codes other
than 200 OK.
Only 4xx and 5xx are actually handled in their own way.
Let me know what you think.
Sincerely,
Gianluca
>
Index: httpfs/http.c
===================================================================
--- httpfs.orig/http.c
+++ httpfs/http.c
@@ -100,6 +100,37 @@ error_t lookup_host (const char *host, s
return 0;
}
+/* read the first line from the socket and return an error_t error */
+static error_t translate_http_status (int fd, ssize_t *nread)
+{
+ char buf[32];
What using magic numbers, it is best to document what the value means.
In this case the buffer needs to store at least the protocol
first-bytes. Which are "HTTP/1.N SSS ".
Also, something to consider. In modern HTTP the "reason phrase" is
optional. As are the headers. Which makes the minimum valid HTTP/1.x
response only 13 bytes.
This function may read up to
If more than that are read in by this function, any following data will
be lost.
+
+ *nread = read (fd, buf, sizeof (buf) - 1);
+ if (*nread < 12) return EIO;
+ buf[*nread] = '\0';
+
+ if (strncmp (buf, "HTTP/", 5) != 0)
+ return EPROTO;
With modern standard changing to non-ASCII the HTTP/1 response syntax is
exactly this:
* the 7-octet string "HTTP/1."
* Then a DIGIT ('0'-'9'),
* Then an SP (0x20),
* Then three DIGIT ('0'-'9'),
* Then one of: SP (0x20), or LF (0x0A), or CRLF (0x0D0A)
Anything not matching that exact pattern is invalid HTTP and should
produce EPROTO.
+
+ int status = atoi (buf + 9);
+
+ if (debug_flag)
+ fprintf (stderr, "HTTP Status: %d\n", status);
+
The HTTP specification, RFC 9110:
"a client MUST understand the class of any status code, as indicated
by the first digit, and treat an unrecognized status code as being
equivalent to the x00 status code of that class."
I think it might be better for comprehension to have an if statement
checking which range the status is part of, with a switch inside those
for the specially handled code.
eg.
if (status < 100 || 600 < status)
return EPROTO;
else if (status < 200)
return EIO; // Informational Responses not supported yet
else if (status < 300)
return 0;
else if (status < 400)
{
switch (status)
{
case 301:
case 302:
case 303:
case 307:
case 308:
return EIO; // Redirection is not supported yet
default:
return EINVAL;
}
}
else if (status < 400)
...
+ switch (status)
+ {
+ case 200: return 0;
The 205 and 210 need some consideration. They should either map to the
same as 200, or possibly ENOENT depending on how the error produced by
this function is intended to be used.
+ case 301:
+ case 302: return EAGAIN;
The new 303, 307 and 308 should also have the same handling.
However, AFAIK these status do not actually mean the same thing that
EAGAIN does. In their case a *different* URL should be tried on the
followup, not the same one repeated.
In HTTP the "try again later" meaning is given by 429 and 503 status.
Otherwise the "Retry-After" header has to be looked for - and this can
appear in any 300-599 status code.
+ case 401:
+ case 403: return EACCES;
The 402, 405, 407, 421, 451, 503, and 511 are all about wrong or broken
access and should also map to EACCES.
+ case 404: return ENOENT;
+ case 410: return ENOENT;
The 406, 412, 415, 428 mean the specific resource representation being
requested does not exist and thus ENOENT.
Also:
The status 501, 505, and 510 are about the server not accepting what the
client has sent - thus map to EINVAL.
The status 306, 417, and 422 all mean transport issues - thus map to EIO.
Status 426 and 428 should map to EPROTO.
+ default:
+ return (status >= 500) ? EIO : EINVAL;
If the status parsed is <100 or >= 600, or 418 that should produce
EPROTO while this translator is is still sending HTTP/1.0.
FYI: Handling 1xx properly is one of the things needed to use "1.1"
version labels.
+ }
+}
+
/* open a connection with the remote web server */
error_t open_connection(struct netnode *node, int *fd,off_t *head_len)
{
@@ -115,10 +146,6 @@ error_t open_connection(struct netnode *
size_t towrite;
char buffer[4096];
ssize_t bytes_read;
- char *token,*mesg;
- int code;
- char delimiters0[] = " ";
- char delimiters1[] = "\n";
/* 1. Target selection.
* If ip_addr (proxy global variable) is set, we use it.
@@ -165,27 +192,24 @@ error_t open_connection(struct netnode *
fprintf(stderr,"Could not send an HTTP request to host\n");
return errno;
}
+
+ /* Check HTTP status code and handle other than 200 OK only */
+ if ((err = translate_http_status (*fd, &bytes_read)) != 0)
+ {
+ close (*fd);
+ return err;
+ }
- bytes_read = read(*fd,buffer,sizeof(buffer));
- if ( bytes_read < 0 )
+ int n = read(*fd,buffer,sizeof(buffer));
+ if ( n < 0 )
{
- fprintf(stderr,"Error with HEAD read\n");
+ perror ("Failed to read HEAD response");
+ close (*fd);
return errno;
}
+ buffer[n] = '\0';
- *head_len = bytes_read;
- token = strtok(buffer,delimiters0);
- token = strtok(NULL,delimiters0);
- sscanf(token,"%d",&code);
- token = strtok(NULL,delimiters1);
- mesg = strdup(token);
- if ( code != 200 )
- {
- /* page does not exist */
- fprintf(stderr,"Error Page not Accesible\n");
- fprintf(stderr,"%d %s\n",code,mesg);
- return EBADF;
- }
+ *head_len = bytes_read + n;
close(*fd);