Hi Samuel,
the previous patch was malformed because I thought I had used the
quilt command correctly instead.
This new patch fixes the previous one.
Gianluca
Il giorno mar 13 gen 2026 alle ore 14:54 Gianluca Cannata
<[email protected]> ha scritto:
>
> Hello Samuel and the bug-hurd team,
>
> As requested, I am submitting my changes in separate, logical patches.
>
> This is the first patch of the series. In this patch, I have
> refactored the initial URL parsing logic to use the argz library.
>
> Instead of simple string storage, the base URL is now stored as an argz
> vector.
>
> This change provides a more idiomatic Hurd implementation and creates
> the necessary infrastructure for the upcoming patches, which will
> improve directory traversal and path normalization by iterating over
> these argz components.
>
> The code compiles cleanly and prepares the translator for the
> subsequent path-handling fixes.
>
> Best regards,
>
> Gianluca
Index: httpfs/extract.c
===================================================================
--- httpfs.orig/extract.c
+++ httpfs/extract.c
@@ -24,6 +24,7 @@
#include<netdb.h>
#include<stdlib.h>
#include<unistd.h>
+#include<argz.h>
#include"httpfs.h"
@@ -117,7 +118,7 @@ void extract(char *_string,char *parent)
strcpy(temp,strchr(temp,'/'));
temp++;
}
- if ( no_of_slashes_here < no_of_slashes )
+ if ( no_of_slashes_here < argz_count (dir_tok, dir_tok_len) )
{
/* not going to support top level directory
* from a given directory
@@ -137,7 +138,12 @@ void extract(char *_string,char *parent)
}
token = strdupa(string);
- for ( i=0 ; i<no_of_slashes-1 ; i++ )
+
+ int base_depth = argz_count (dir_tok, dir_tok_len);
+ char *current_dir_tok_part = dir_tok;
+
+
+ for ( i=0 ; i<(base_depth-1) ; i++ )
{
/* extract file and directory from the string
* like /file/temp/a.html
@@ -145,11 +151,19 @@ void extract(char *_string,char *parent)
* extract temp fill it as a directory under file/
* extract a.html fill it as a file under temp/ */
- temp = strdupa(token);
- if ( strcmp(dir_tok[i],strtok(temp,"/")) )
- return;
- strcpy(token,strchr(token,'/'));
- token++;
+ temp = strdupa(token);
+ char *current_token = strtok (temp, "/");
+
+ if (current_dir_tok_part && current_token)
+ if (strcmp (current_dir_tok_part, current_token) != 0)
+ return;
+
+ char *next_slash = strchr (token, '/');
+ if (next_slash)
+ token = next_slash + 1;
+
+ current_dir_tok_part = argz_next (dir_tok, dir_tok_len, current_dir_tok_part);
+
}
parent = strdupa("tmp");
string = strdupa(token);
Index: httpfs/http.c
===================================================================
--- httpfs.orig/http.c
+++ httpfs/http.c
@@ -28,12 +28,24 @@
#include <string.h>
#include <malloc.h>
#include <stddef.h>
+#include <argz.h>
#include "httpfs.h"
#include <hurd/hurd_types.h>
#include <hurd/netfs.h>
+/* Extracts the relative path. If it receives "/" returns "/" */
+static const char *get_safe_path(const char *req) {
+ if (!req || strlen(req) == 0) return "/";
+ const char *p = strstr(req, "://");
+ if (p) {
+ p = strchr(p + 3, '/');
+ return p ? p : "/";
+ }
+ return (req[0] == '/') ? req : "/";
+}
+
/* do a DNS lookup for NAME and store result in *ENT */
error_t lookup_host (char *url, struct hostent **ent)
{
@@ -63,6 +75,8 @@ error_t open_connection(struct netnode *
char delimiters0[] = " ";
char delimiters1[] = "\n";
+ const char *relative_path = get_safe_path (node->conn_req);
+
bzero(&dest,sizeof(dest));
dest.sin_family = AF_INET;
dest.sin_port = htons (port);
@@ -71,7 +85,7 @@ error_t open_connection(struct netnode *
{
/* connection is not through a proxy server
* find IP addr. of remote server */
- err = lookup_host (node->url, &hptr);
+ err = lookup_host (dir_tok, &hptr);
if (err)
{
fprintf(stderr,"Could not find IP addr %s\n",node->url);
@@ -92,8 +106,8 @@ error_t open_connection(struct netnode *
}
if (debug_flag)
- fprintf (stderr, "trying to open %s:%d/%s\n", node->url,
- port, node->conn_req);
+ fprintf (stderr, "trying to open %s:%d%s\n", dir_tok,
+ port, relative_path);
*fd = socket (AF_INET, SOCK_STREAM, 0);
if (*fd == -1)
@@ -110,7 +124,7 @@ error_t open_connection(struct netnode *
}
/* Send a HEAD request find header length */
- sprintf(buffer,"HEAD %s HTTP/1.0\n\n",node->conn_req);
+ sprintf(buffer,"HEAD %s HTTP/1.0\n\n", relative_path);
towrite = strlen (buffer);
written = TEMP_FAILURE_RETRY (write (*fd, buffer, towrite));
if ( written == -1 || written < towrite )
@@ -177,19 +191,17 @@ error_t fill_dirnode (struct netnode *di
error_t err = 0;
struct node *nd, **prevp;
struct files *go;
- char *comm_buf,*url,*conn_req,*f_name,*temp,*temp1;
if (debug_flag)
fprintf (stderr, "filling out dir %s\n", dir->file_name);
if ( dir->type == HTTP_DIR_NOT_FILLED ) {
- /* it is an unfilled directory so send a GET request for that
- * directory and parse the incoming HTML stream to get the file
- * and directories within that
- * and Fill the intermediate data-structure *file */
err = parse(dir);
- if ( err )
- return err;
+ if ( err ) {
+ fprintf (stderr, "httpfs: Parse error for %s\n", dir->file_name);
+ dir->type = HTTP_DIR;
+ return 0;
+ }
dir->type = HTTP_DIR;
}
@@ -200,103 +212,51 @@ error_t fill_dirnode (struct netnode *di
for(go=list_of_entries;go!=NULL;go=go->next)
{
- /* *file linked list contains all the file info obtained from
- * parsing the <a href="..">
- * select the ones belonging to this particular directory
- * and fill its node */
-
- if(strcmp(dir->file_name,go->parent)==0)
- {
- /* got a file in this directory
- * directory under consideration is dir->file_name
- * so have to fetch all files whose parent is
- * dir->file_name, i.e. dir->file_name==go->parent */
-
- if ( go->f_type == HTTP_URL )
- {
- /* its an url
- * url is shown as regular file
- * its name is altered by changing / to .
- * www.gnu.org/gpl.html will be changed to
- * www.gnu.org.gpl.html */
- char *slash;
- conn_req=(char *)malloc((strlen(go->f_name)+8)*sizeof(char));
- slash = strchr(go->f_name, '/');
- if (slash)
- url = strndup(go->f_name, slash - go->f_name);
+ /* We filter the host itself to avoid infinite recursions */
+ if (strcmp (go->f_name, dir_tok) == 0) continue;
+
+ if (strcmp (dir->file_name, go->parent) == 0) {
+ char *f_name = strdup (go->f_name);
+ char *url = strdup (dir_tok);
+ char *conn_req;
+
+ /* Making of conn_req (the complete path for the server ) */
+ if (go->f_type == HTTP_URL) {
+ asprintf (&conn_req, "http://%s", go->f_name);
+ } else {
+ /* Concat the path of the father with the name of the file */
+ const char *p_path = dir->conn_req;
+ int p_len = strlen (p_path);
+
+ if (p_path[p_len - 1] == '/')
+ asprintf (&conn_req, "%s%s", p_path, go->f_name);
else
- url = strdup(go->f_name);
- f_name = strdup(go->f_name);
- int i;
- for (i = 0; f_name[i] != '\0'; i++)
- if (f_name[i] == '/')
- f_name[i] = '.';
-
- sprintf(conn_req,"%s%s","http://",go->f_name);
+ asprintf (&conn_req, "%s/%s", p_path, go->f_name);
}
- else
- {
- /* its not an url */
- f_name = strdup(go->f_name);
- url=strdup(dir->url);
- if ( go != list_of_entries )
- {
- size_t conn_req_size = strlen(dir->conn_req) + strlen(go->f_name) + 1;
- if( go->f_type==HTTP_DIR || go->f_type==HTTP_DIR_NOT_FILLED )
- conn_req_size++; /* We'll need to add a trailing slash later. */
- conn_req=(char *)malloc(conn_req_size*sizeof(char));
- sprintf(conn_req,"%s%s",dir->conn_req,go->f_name);
- }
- else
- {
- if ( dir_tok[no_of_slashes] == NULL )
- {
- /* the file corresponding to base url
- * user has given a file explicitly in
- * the url */
- size_t conn_req_size = strlen(dir->conn_req) + strlen(go->f_name) + 1;
- if( go->f_type==HTTP_DIR || go->f_type==HTTP_DIR_NOT_FILLED )
- conn_req_size++; /* We'll need to add a trailing slash later. */
- conn_req=(char *)malloc(conn_req_size*sizeof(char));
- sprintf(conn_req,"%s%s",dir->conn_req,go->f_name);
- }
- else
- {
- /* the file corresponding to base url
- * user has not given a file explicitly
- * the url so its the index.html */
- size_t conn_req_size = strlen(dir->conn_req) + 1;
- if( go->f_type==HTTP_DIR || go->f_type==HTTP_DIR_NOT_FILLED )
- conn_req_size++; /* We'll need to add a trailing slash later. */
- conn_req=(char *)malloc(conn_req_size*sizeof(char));
- sprintf(conn_req,"%s",dir->conn_req);
- }
+
+ /* If it is a directory, assure it ends up with a slash */
+ if (go->f_type == HTTP_DIR || go->f_type == HTTP_DIR_NOT_FILLED) {
+ if (conn_req[strlen (conn_req) - 1] != '/') {
+ char *tmp;
+ asprintf (&tmp, "%s/", conn_req);
+ free (conn_req);
+ conn_req = tmp;
}
- if( go->f_type==HTTP_DIR || go->f_type==HTTP_DIR_NOT_FILLED )
- /* the filled file is directory so it has to end
- * with a / */
- strcat(conn_req,"/");
}
- comm_buf=(char *)malloc((strlen(conn_req)+20)*sizeof(char));
- sprintf(comm_buf,"GET %s HTTP/1.0",conn_req);
- nd = httpfs_make_node (go->f_type,url,conn_req,comm_buf,f_name);
- if (!nd)
- {
- err = ENOMEM;
- return err;
- }
- free(comm_buf);
- free(conn_req);
- free(f_name);
- *prevp = nd;
- nd->prevp = prevp;
- prevp = &nd->next;
- dir->num_ents++;
- if (dir->noents)
+ nd = httpfs_make_node (go->f_type, url, conn_req, "", f_name);
+ if (nd) {
+ *prevp = nd;
+ nd->prevp = prevp;
+ prevp = &nd->next;
+ dir->num_ents++;
dir->noents = FALSE;
+ }
+
+ free (conn_req); free (url); free (f_name);
}
}
- return err;
+
+ return 0;
}
Index: httpfs/httpfs.c
===================================================================
--- httpfs.orig/httpfs.c
+++ httpfs/httpfs.c
@@ -24,6 +24,7 @@
#include <errno.h>
#include <error.h>
#include <argp.h>
+#include <argz.h>
#include <hurd/netfs.h>
@@ -36,7 +37,8 @@ int mode;
int no_of_slashes = 0;
char *url, *conn_req;
char *ip_addr;
-char *dir_tok[25];
+char *dir_tok = NULL;
+size_t dir_tok_len = 0;
struct files *list_of_entries = NULL, *this_entry;
struct httpfs *httpfs; /* filesystem global pointer */
@@ -50,9 +52,12 @@ main (int argc, char **argv)
{
error_t err;
mach_port_t bootstrap;
- char *temp_url, *temp, *run;
+ char *temp_url, *clean_url;
+ char *host_name = NULL;
char type;
char *comm_buf; /* XXX: Is an http request limited to 200 bytes? */
+
+ /* Defaults */
port = 80;
debug_flag = 0;
mode = 1; /* means directory */
@@ -70,77 +75,60 @@ main (int argc, char **argv)
if (err)
error (1, 0, "Map time error.");
- if (strchr (url, '/') == NULL)
- error (1, 0, "Url must have a /, e.g., www.gnu.org/");
+ extern char *url;
- conn_req = (char *) malloc ((strlen (url) + 7) * sizeof (char));
- if (! conn_req)
- error (1, errno, "Cannot malloc conn_req.");
-
- temp_url = strdup (url);
- if (! temp_url)
- error (1, errno, "Cannot duplicate url.");
-
- if (!strncmp (temp_url, "http://", 7))
- /* go ahead of http:// if given in url */
- temp_url = temp_url + 7;
-
- if (strchr (temp_url, '/') == NULL)
- error (1, 0, "Url must have a /, e.g., www.gnu.org/");
-
- /* XXX: strtok is not reentrant. This will have to be fixed */
- temp = strdup (temp_url);
- url = strtok (temp, "/");
-
- /* Find the directories given in URL */
- temp = strdup (temp_url);
- no_of_slashes++;
- strcpy (temp, strchr (temp, '/'));
- temp++;
- while (strchr (temp, '/') != NULL)
- {
- /* go to the end of url */
- run = strdup (temp);
- dir_tok[no_of_slashes - 1] = strtok (run, "/");
- strcpy (temp, strchr (temp, '/'));
- temp++;
- no_of_slashes++;
- }
- if (strlen (temp))
- {
- /* user has input a specific html file in the url */
- dir_tok[no_of_slashes - 1] = strdup (temp);
- dir_tok[no_of_slashes] = NULL;
- }
- else
- {
- /* user has input just an url no file names specifed
- * assume the base url request is to index.html */
- dir_tok[no_of_slashes - 1] = strdup ("index.html");
- dir_tok[no_of_slashes] = strdup ("index.html");
- }
+ if (url == NULL || strlen (url) == 0)
+ error (1, 0, "URL must not be empty.");
+
+ /* Remove http:// if present */
+ clean_url = url;
+ if (strncasecmp (clean_url, "http://", 7) == 0)
+ clean_url = clean_url + 7;
+
+ /* Directory hierarchy creation with argz. This substitute strtok. */
+ /* It breaks the string 'clean_url' every time it finds a '/' character. */
+ if (argz_create_sep (clean_url, '/', &dir_tok, &dir_tok_len) != 0)
+ error (1, errno, "Cannot create directory hierarchy from parsing the URL.");
+
+ /* Extracts hostname */
+ host_name = dir_tok;
+
+ if (!host_name)
+ error (1, 0, "Invalid URL: No hostname found.");
+
+ /* Build the complete URL for the GET request by iterating the argz vector */
+ /* e.g. conn_req: "http://host/path/file" */
+ size_t total_len = 7 + 1; /* "http://" + null terminator */
+ char *entry = NULL;
+ /* Calculate the require total length first */
+ while ((entry = argz_next (dir_tok, dir_tok_len, entry)))
+ total_len = total_len + strlen (entry) + 1; /* +1 is for the slash */
+
+ conn_req = (char *) malloc (total_len);
+ if (!conn_req)
+ error (1, errno, "Cannot allocate connection request string.");
+
+ /* Build our string */
strcpy (conn_req, "http://");
- if (temp_url[strlen (temp_url) - 1] == '/')
- {
- strcat (conn_req, temp_url);
- err = asprintf (&comm_buf, "GET %s HTTP/1.0", conn_req);
- }
- else
- {
- while (strchr (temp_url, '/') != NULL)
- {
- temp = strdup (temp_url);
- strcat (conn_req, strtok (temp, "/"));
- strcat (conn_req, "/");
- strcpy (temp_url, strchr (temp_url, '/'));
- temp_url++;
- }
- err = asprintf (&comm_buf, "GET %s%s HTTP/1.0", conn_req, temp_url);
- }
- if (err < 0) /* check the return value of asprintf */
- error (1, errno, "Cannot allocate comm_buf.");
+ entry = NULL;
+ int first = 1;
+ while ((entry = argz_next (dir_tok, dir_tok_len, entry))) {
+ strcat (conn_req, entry);
+ /* Add the slash if not the last element, or if it is the host */
+ if (entry < (dir_tok + dir_tok_len - strlen (entry) - 1) || first)
+ strcat (conn_req, "/");
+
+ first = 0;
+ }
+
+ /* Creation of GET request buffer */
+ /* TODO: For monder HTTP, we should add here "Host: %s\r\n" */
+ if (asprintf (&comm_buf, "GET %s HTTP/1.0\r\n\r\n", conn_req) < 0)
+ error (1, errno, "Cannot allocate command request string.");
+
+ /* Initialize the filesystem */
httpfs = (struct httpfs *) malloc (sizeof (struct httpfs));
if (! httpfs)
error (1, errno, "Cannot allocate httpfs.");
@@ -149,26 +137,30 @@ main (int argc, char **argv)
httpfs->uid = getuid ();
httpfs->gid = getgid ();
httpfs->next_inode = 0;
+
if (mode)
type = HTTP_DIR;
else
type = HTTP_FILE;
+ /* Create root node */
/* XXX: why is tmp hardcoded? */
httpfs->root = httpfs_make_node (type, url, conn_req, comm_buf, "tmp");
+
netfs_init ();
- /* translator set to a directory */
+
+ /* If a directory, populates the contents. */
if (mode)
{
- /* fill the directory node with files
- * call parser for that
+ /* fill the directory node with files
+ * call parser for that
* only the current directory is filled
* subdirectories within them are indicated by type
* HTTP_DIR_UNFILLED, and are filled as on demand when an
* ls request comes for them */
err = parse (httpfs->root->nn);
if (err)
- error (1, err, "Error in Parsing.");
+ error (1, err, "Error in Parsing.");
}
if (debug_flag)
@@ -176,10 +168,15 @@ main (int argc, char **argv)
netfs_root_node = httpfs->root;
netfs_startup (bootstrap, 0);
+
for (;;)
netfs_server_loop ();
/* NOT REACHED */
+ free (conn_req);
+ free (dir_tok);
+ free (comm_buf);
free (httpfs);
+
return 0;
}
Index: httpfs/parsehtml.c
===================================================================
--- httpfs.orig/parsehtml.c
+++ httpfs/parsehtml.c
@@ -27,6 +27,7 @@
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
#include <libxml/SAX.h>
+#include <argz.h>
#include "httpfs.h"
@@ -110,12 +111,18 @@ error_t parse(struct netnode *node)
if ( debug_flag )
fprintf(stderr,"In the HTML parser for parsing %s\n",parent);
+ char *last_component = dir_tok;
+ char *entry_ptr = NULL;
+
+ while ((entry_ptr = argz_next (dir_tok, dir_tok_len, entry_ptr)))
+ last_component = entry_ptr;
+
/* Create a file for base url */
if ( list_of_entries == NULL )
{
/* The node of the url entered */
list_of_entries = (struct files *)malloc(sizeof(struct files));
- list_of_entries->f_name = strdup(dir_tok[no_of_slashes-1]);
+ list_of_entries->f_name = strdup(last_component);
list_of_entries->parent = strdup("tmp");
list_of_entries->f_size = 0;//content_len;
list_of_entries->f_type = HTTP_FILE;