Hi Samuel,

the previous patch was malformed because I thought I had used the
quilt command correctly instead.

This new patch fixes the previous one.

Gianluca

Il giorno mar 13 gen 2026 alle ore 14:54 Gianluca Cannata
<[email protected]> ha scritto:
>
> Hello Samuel and the bug-hurd team,
>
> As requested, I am submitting my changes in separate, logical patches.
>
> This is the first patch of the series. In this patch, I have
> refactored the initial URL parsing logic to use the argz library.
>
> Instead of simple string storage, the base URL is now stored as an argz 
> vector.
>
> This change provides a more idiomatic Hurd implementation and creates
> the necessary infrastructure for the upcoming patches, which will
> improve directory traversal and path normalization by iterating over
> these argz components.
>
> The code compiles cleanly and prepares the translator for the
> subsequent path-handling fixes.
>
> Best regards,
>
> Gianluca
Index: httpfs/extract.c
===================================================================
--- httpfs.orig/extract.c
+++ httpfs/extract.c
@@ -24,6 +24,7 @@
 #include<netdb.h>
 #include<stdlib.h>
 #include<unistd.h>
+#include<argz.h>
 
 #include"httpfs.h"
 
@@ -117,7 +118,7 @@ void extract(char *_string,char *parent)
 			strcpy(temp,strchr(temp,'/'));
 			temp++;
 		}
-		if ( no_of_slashes_here < no_of_slashes ) 
+		if ( no_of_slashes_here < argz_count (dir_tok, dir_tok_len) ) 
 		{
 			/* not going to support top level directory
 			 * from a given directory
@@ -137,7 +138,12 @@ void extract(char *_string,char *parent)
 		}
 
 		token = strdupa(string);
-		for ( i=0 ; i<no_of_slashes-1 ; i++ ) 
+
+		int base_depth = argz_count (dir_tok, dir_tok_len);
+		char *current_dir_tok_part = dir_tok;
+
+
+		for ( i=0 ; i<(base_depth-1) ; i++ ) 
 		{
 			/* extract file and directory from the string
 			 * like /file/temp/a.html
@@ -145,11 +151,19 @@ void extract(char *_string,char *parent)
 			 * extract temp fill it as a directory under file/
 			 * extract a.html fill it as a file under temp/ */
 			
-			temp = strdupa(token); 
-			if ( strcmp(dir_tok[i],strtok(temp,"/")) )
-				return;
-			strcpy(token,strchr(token,'/'));
-			token++;
+			temp = strdupa(token);
+			char *current_token = strtok (temp, "/");
+
+			if (current_dir_tok_part && current_token)
+				if (strcmp (current_dir_tok_part, current_token) != 0)
+					return;
+
+			char *next_slash = strchr (token, '/');
+			if (next_slash)
+				token = next_slash + 1;
+
+			current_dir_tok_part = argz_next (dir_tok, dir_tok_len, current_dir_tok_part);
+
 		}
 		parent = strdupa("tmp");
 		string = strdupa(token);
Index: httpfs/http.c
===================================================================
--- httpfs.orig/http.c
+++ httpfs/http.c
@@ -28,12 +28,24 @@
 #include <string.h>
 #include <malloc.h>
 #include <stddef.h>
+#include <argz.h>
 
 #include "httpfs.h"
 
 #include <hurd/hurd_types.h>
 #include <hurd/netfs.h>
 
+/* Extracts the relative path. If it receives "/" returns "/" */
+static const char *get_safe_path(const char *req) {
+    if (!req || strlen(req) == 0) return "/";
+    const char *p = strstr(req, "://");
+    if (p) {
+        p = strchr(p + 3, '/');
+        return p ? p : "/";
+    }
+    return (req[0] == '/') ? req : "/";
+}
+
 /* do a DNS lookup for NAME and store result in *ENT */
 error_t lookup_host (char *url, struct hostent **ent) 
 {
@@ -63,6 +75,8 @@ error_t open_connection(struct netnode *
 	char delimiters0[] = " ";
 	char delimiters1[] = "\n";
 
+	const char *relative_path = get_safe_path (node->conn_req);
+
 	bzero(&dest,sizeof(dest));
 	dest.sin_family = AF_INET;
 	dest.sin_port = htons (port);
@@ -71,7 +85,7 @@ error_t open_connection(struct netnode *
 	{
 		/* connection is not through a proxy server
 		 * find IP addr. of remote server */
-		err = lookup_host (node->url, &hptr);
+		err = lookup_host (dir_tok, &hptr);
 		if (err)
 		{
 			fprintf(stderr,"Could not find IP addr %s\n",node->url);
@@ -92,8 +106,8 @@ error_t open_connection(struct netnode *
 	}
 
 	if (debug_flag)
-		fprintf (stderr, "trying to open %s:%d/%s\n", node->url,
-				port, node->conn_req);
+		fprintf (stderr, "trying to open %s:%d%s\n", dir_tok,
+				port, relative_path);
 
 	*fd = socket (AF_INET, SOCK_STREAM, 0);
 	if (*fd == -1)
@@ -110,7 +124,7 @@ error_t open_connection(struct netnode *
 	}
 
 	/* Send a HEAD request find header length */
-	sprintf(buffer,"HEAD %s HTTP/1.0\n\n",node->conn_req);
+	sprintf(buffer,"HEAD %s HTTP/1.0\n\n", relative_path);
 	towrite = strlen (buffer);
 	written = TEMP_FAILURE_RETRY (write (*fd, buffer, towrite));
 	if ( written == -1 || written < towrite )
@@ -177,19 +191,17 @@ error_t fill_dirnode (struct netnode *di
 	error_t err = 0;
 	struct node *nd, **prevp;
 	struct files *go;
-	char *comm_buf,*url,*conn_req,*f_name,*temp,*temp1;
 
 	if (debug_flag)
 		fprintf (stderr, "filling out dir %s\n", dir->file_name);
 	
 	if ( dir->type == HTTP_DIR_NOT_FILLED ) {
-		/* it is an unfilled directory so send a GET request for that
-		 * directory and parse the incoming HTML stream to get the file 
-		 * and directories within that
-		 * and Fill the intermediate data-structure *file */
 		err = parse(dir);
-		if ( err )
-			return err;
+		if ( err ) {
+			fprintf (stderr, "httpfs: Parse error for %s\n", dir->file_name);
+			dir->type = HTTP_DIR;
+			return 0;
+		}
 		dir->type = HTTP_DIR;
 	}
 
@@ -200,103 +212,51 @@ error_t fill_dirnode (struct netnode *di
 	
 	for(go=list_of_entries;go!=NULL;go=go->next)
 	{
-		/* *file linked list contains all the file info obtained from
-		 * parsing the <a href="..">
-		 * select the ones belonging to this particular directory
-		 * and fill its node */
-		
-		if(strcmp(dir->file_name,go->parent)==0)
-		{
-			/* got a file in this directory 
-			 * directory under consideration is dir->file_name
-			 * so have to fetch all files whose parent is
-			 * dir->file_name, i.e. dir->file_name==go->parent */
-			
-			if ( go->f_type == HTTP_URL ) 
-			{
-				/* its an url 
-				 * url is shown as regular file 
-				 * its name is altered by changing / to .
-				 * www.gnu.org/gpl.html will be changed to
-				 * www.gnu.org.gpl.html */
-				char *slash;
-				conn_req=(char *)malloc((strlen(go->f_name)+8)*sizeof(char));
-				slash = strchr(go->f_name, '/');
-				if (slash)
-					url = strndup(go->f_name, slash - go->f_name);
+		/* We filter the host itself to avoid infinite recursions */
+		if (strcmp (go->f_name, dir_tok) == 0) continue;
+
+		if (strcmp (dir->file_name, go->parent) == 0) {
+			char *f_name = strdup (go->f_name);
+			char *url = strdup (dir_tok);
+			char *conn_req;
+
+			/* Making of conn_req (the complete path for the server ) */
+			if (go->f_type == HTTP_URL) {
+				asprintf (&conn_req, "http://%s";, go->f_name);
+			} else {
+				/* Concat the path of the father with the name of the file */
+				const char *p_path = dir->conn_req;
+				int p_len = strlen (p_path);
+
+				if (p_path[p_len - 1] == '/')
+					asprintf (&conn_req, "%s%s", p_path, go->f_name);
 				else
-					url = strdup(go->f_name);
-				f_name = strdup(go->f_name);
-				int i;
-				for (i = 0; f_name[i] != '\0'; i++)
-					if (f_name[i] == '/')
-						f_name[i] = '.';
-				
-				sprintf(conn_req,"%s%s","http://",go->f_name);
+					asprintf (&conn_req, "%s/%s", p_path, go->f_name);
 			}
-			else 
-			{	
-				/* its not an url */
-				f_name = strdup(go->f_name);
-				url=strdup(dir->url);
-				if ( go != list_of_entries )
-				{
-					size_t conn_req_size = strlen(dir->conn_req) + strlen(go->f_name) + 1;
-					if( go->f_type==HTTP_DIR || go->f_type==HTTP_DIR_NOT_FILLED )
-						conn_req_size++; /* We'll need to add a trailing slash later. */
-					conn_req=(char *)malloc(conn_req_size*sizeof(char));
-					sprintf(conn_req,"%s%s",dir->conn_req,go->f_name);
-				}
-				else
-				{
-					if ( dir_tok[no_of_slashes] == NULL ) 
-					{
-						/* the file corresponding to base url
-						 * user has given a file explicitly in
-						 * the url */
-						size_t conn_req_size = strlen(dir->conn_req) + strlen(go->f_name) + 1;
-						if( go->f_type==HTTP_DIR || go->f_type==HTTP_DIR_NOT_FILLED )
-							conn_req_size++; /* We'll need to add a trailing slash later. */
-						conn_req=(char *)malloc(conn_req_size*sizeof(char));
-						sprintf(conn_req,"%s%s",dir->conn_req,go->f_name);
-					}
-					else 
-					{
-						/* the file corresponding to base url
-						 * user has not given a file explicitly 
-						 * the url so its the index.html */
-						size_t conn_req_size = strlen(dir->conn_req) + 1;
-						if( go->f_type==HTTP_DIR || go->f_type==HTTP_DIR_NOT_FILLED )
-							conn_req_size++; /* We'll need to add a trailing slash later. */
-						conn_req=(char *)malloc(conn_req_size*sizeof(char));
-						sprintf(conn_req,"%s",dir->conn_req);
-					}
+
+			/* If it is a directory, assure it ends up with a slash */
+			if (go->f_type == HTTP_DIR || go->f_type == HTTP_DIR_NOT_FILLED) {
+				if (conn_req[strlen (conn_req) - 1] != '/') {
+					char *tmp;
+					asprintf (&tmp, "%s/", conn_req);
+					free (conn_req);
+					conn_req = tmp;
 				}
-				if( go->f_type==HTTP_DIR || go->f_type==HTTP_DIR_NOT_FILLED ) 
-					/* the filled file is directory so it has to end
-					 * with a / */
-					strcat(conn_req,"/");
 			}
-			comm_buf=(char *)malloc((strlen(conn_req)+20)*sizeof(char));
-			sprintf(comm_buf,"GET %s HTTP/1.0",conn_req);
 
-			nd = httpfs_make_node (go->f_type,url,conn_req,comm_buf,f_name);
-			if (!nd)
-			{
-				err = ENOMEM;
-				return err;
-			}
-			free(comm_buf);
-			free(conn_req);
-			free(f_name);
-			*prevp = nd;
-			nd->prevp = prevp;
-			prevp = &nd->next;
-			dir->num_ents++;
-			if (dir->noents)
+			nd = httpfs_make_node (go->f_type, url, conn_req, "", f_name);
+			if (nd) {
+				*prevp = nd;
+				nd->prevp = prevp;
+				prevp = &nd->next;
+				dir->num_ents++;
 				dir->noents = FALSE;
+			}
+
+			free (conn_req); free (url); free (f_name);
 		}
 	}
-	return err;
+
+	return 0;
 }
 
Index: httpfs/httpfs.c
===================================================================
--- httpfs.orig/httpfs.c
+++ httpfs/httpfs.c
@@ -24,6 +24,7 @@
 #include <errno.h>
 #include <error.h>
 #include <argp.h>
+#include <argz.h>
 
 #include <hurd/netfs.h>
 
@@ -36,7 +37,8 @@ int mode;
 int no_of_slashes = 0;
 char *url, *conn_req;
 char *ip_addr;
-char *dir_tok[25];
+char *dir_tok = NULL;
+size_t dir_tok_len = 0;
 struct files *list_of_entries = NULL, *this_entry;
 
 struct httpfs *httpfs;		/* filesystem global pointer */
@@ -50,9 +52,12 @@ main (int argc, char **argv)
 {
   error_t err;
   mach_port_t bootstrap;
-  char *temp_url, *temp, *run;
+  char *temp_url, *clean_url;
+  char *host_name = NULL;
   char type;
   char *comm_buf; /* XXX: Is an http request limited to 200 bytes? */
+
+  /* Defaults */
   port = 80;
   debug_flag = 0;
   mode = 1;			/* means directory */
@@ -70,77 +75,60 @@ main (int argc, char **argv)
   if (err)
     error (1, 0, "Map time error.");
 
-  if (strchr (url, '/') == NULL)
-    error (1, 0, "Url must have a /, e.g., www.gnu.org/");
+  extern char *url;
 
-  conn_req = (char *) malloc ((strlen (url) + 7) * sizeof (char));
-  if (! conn_req)
-    error (1, errno, "Cannot malloc conn_req.");
-
-  temp_url = strdup (url);
-  if (! temp_url)
-    error (1, errno, "Cannot duplicate url.");
-
-  if (!strncmp (temp_url, "http://";, 7))
-    /* go ahead of http:// if given in url */
-    temp_url = temp_url + 7;
-
-  if (strchr (temp_url, '/') == NULL)
-    error (1, 0, "Url must have a /, e.g., www.gnu.org/");
-
-  /* XXX: strtok is not reentrant.  This will have to be fixed */
-  temp = strdup (temp_url);
-  url = strtok (temp, "/");
-
-  /* Find the directories given in URL */
-  temp = strdup (temp_url);
-  no_of_slashes++;
-  strcpy (temp, strchr (temp, '/'));
-  temp++;
-  while (strchr (temp, '/') != NULL)
-    {
-      /* go to the end of url */
-      run = strdup (temp);
-      dir_tok[no_of_slashes - 1] = strtok (run, "/");
-      strcpy (temp, strchr (temp, '/'));
-      temp++;
-      no_of_slashes++;
-    }
-  if (strlen (temp))
-    {
-      /* user has input a specific html file in the url */
-      dir_tok[no_of_slashes - 1] = strdup (temp);
-      dir_tok[no_of_slashes] = NULL;
-    }
-  else
-    {
-      /* user has input just an url no file names specifed 
-       * assume the base url request is to index.html */
-      dir_tok[no_of_slashes - 1] = strdup ("index.html");
-      dir_tok[no_of_slashes] = strdup ("index.html");
-    }
+  if (url == NULL || strlen (url) == 0)
+	  error (1, 0, "URL must not be empty.");
+
+  /* Remove http:// if present */
+  clean_url = url;
+  if (strncasecmp (clean_url, "http://";, 7) == 0)
+	  clean_url = clean_url + 7;
+
+  /* Directory hierarchy creation with argz. This substitute strtok. */
+  /* It breaks the string 'clean_url' every time it finds a '/' character. */
+  if (argz_create_sep (clean_url, '/', &dir_tok, &dir_tok_len) != 0)
+    error (1, errno, "Cannot create directory hierarchy from parsing the URL.");
+
+  /* Extracts hostname */
+  host_name = dir_tok;
+
+  if (!host_name)
+	error (1, 0, "Invalid URL: No hostname found.");
+
+  /* Build the complete URL for the GET request by iterating the argz vector */
+  /* e.g. conn_req: "http://host/path/file"; */
+  size_t total_len = 7 + 1; /* "http://"; + null terminator */
+  char *entry = NULL;
 
+  /* Calculate the require total length first */
+  while ((entry = argz_next (dir_tok, dir_tok_len, entry)))
+	  total_len = total_len + strlen (entry) + 1; /* +1 is for the slash */
+
+  conn_req = (char *) malloc (total_len);
+  if (!conn_req)
+	  error (1, errno, "Cannot allocate connection request string.");
+
+  /* Build our string */
   strcpy (conn_req, "http://";);
-  if (temp_url[strlen (temp_url) - 1] == '/')
-    {
-      strcat (conn_req, temp_url);
-      err = asprintf (&comm_buf, "GET %s HTTP/1.0", conn_req);
-    }
-  else
-    {
-      while (strchr (temp_url, '/') != NULL)
-	{
-	  temp = strdup (temp_url);
-	  strcat (conn_req, strtok (temp, "/"));
-	  strcat (conn_req, "/");
-	  strcpy (temp_url, strchr (temp_url, '/'));
-	  temp_url++;
-	}
-      err = asprintf (&comm_buf, "GET %s%s HTTP/1.0", conn_req, temp_url);
-    }
-  if (err < 0)  /* check the return value of asprintf */
-    error (1, errno, "Cannot allocate comm_buf.");
 
+  entry = NULL;
+  int first = 1;
+  while ((entry = argz_next (dir_tok, dir_tok_len, entry))) {
+	strcat (conn_req, entry);
+	/* Add the slash if not the last element, or if it is the host */
+	if (entry < (dir_tok + dir_tok_len - strlen (entry) - 1) || first)
+		strcat (conn_req, "/");
+
+	first = 0;
+  }
+
+  /* Creation of GET request buffer */
+  /* TODO: For monder HTTP, we should add here "Host: %s\r\n" */
+  if (asprintf (&comm_buf, "GET %s HTTP/1.0\r\n\r\n", conn_req) < 0)
+    error (1, errno, "Cannot allocate command request string.");
+
+  /* Initialize the filesystem */
   httpfs = (struct httpfs *) malloc (sizeof (struct httpfs));
   if (! httpfs)
     error (1, errno, "Cannot allocate httpfs.");
@@ -149,26 +137,30 @@ main (int argc, char **argv)
   httpfs->uid = getuid ();
   httpfs->gid = getgid ();
   httpfs->next_inode = 0;
+
   if (mode)
     type = HTTP_DIR;
   else
     type = HTTP_FILE;
 
+  /* Create root node */
   /* XXX: why is tmp hardcoded? */
   httpfs->root = httpfs_make_node (type, url, conn_req, comm_buf, "tmp");
+
   netfs_init ();
-  /* translator set to a directory */
+
+  /* If a directory, populates the contents. */
   if (mode)
     {
-      /* fill the directory node with files 
-       * call parser for that 
+      /* fill the directory node with files
+       * call parser for that
        * only the current directory is filled
        * subdirectories within them are indicated by type
        * HTTP_DIR_UNFILLED, and are filled as on demand when an
        * ls request comes for them */
       err = parse (httpfs->root->nn);
       if (err)
-	error (1, err, "Error in Parsing.");
+        error (1, err, "Error in Parsing.");
     }
 
   if (debug_flag)
@@ -176,10 +168,15 @@ main (int argc, char **argv)
 
   netfs_root_node = httpfs->root;
   netfs_startup (bootstrap, 0);
+
   for (;;)
     netfs_server_loop ();
 
   /* NOT REACHED */
+  free (conn_req);
+  free (dir_tok);
+  free (comm_buf);
   free (httpfs);
+
   return 0;
 }
Index: httpfs/parsehtml.c
===================================================================
--- httpfs.orig/parsehtml.c
+++ httpfs/parsehtml.c
@@ -27,6 +27,7 @@
 #include <libxml/HTMLparser.h>
 #include <libxml/HTMLtree.h>
 #include <libxml/SAX.h>
+#include <argz.h>
 
 #include "httpfs.h"
 
@@ -110,12 +111,18 @@ error_t parse(struct netnode *node)
 	if ( debug_flag )
 		fprintf(stderr,"In the HTML parser for parsing %s\n",parent);
 	
+	char *last_component = dir_tok;
+	char *entry_ptr = NULL;
+
+	while ((entry_ptr = argz_next (dir_tok, dir_tok_len, entry_ptr)))
+		last_component = entry_ptr;
+
 	/* Create a file for base url */
 	if ( list_of_entries == NULL )
 	{
 		/* The node of the url entered */
 		list_of_entries = (struct files *)malloc(sizeof(struct files));
-		list_of_entries->f_name = strdup(dir_tok[no_of_slashes-1]);
+		list_of_entries->f_name = strdup(last_component);
 		list_of_entries->parent = strdup("tmp");
 		list_of_entries->f_size = 0;//content_len;
 		list_of_entries->f_type = HTTP_FILE;

Reply via email to