The attached patch is a cleanup of the recurse_scan_intern() function in src/core/share.c. I was looking at the code, and noticed redundant calls to stat(), and by the time I got through eliminating them the structure of the code had changed noticeably.

The new version is driven by a single while() loop. Directories are added to a list for later processing, while files are fully processed immediately (no more intermediate list).

Patch was made against SVN r12353

Summary of changes:

src/core/share.c:
 Modified recurse_scan_intern()
 Added shared_file_valid_extension()
src/core/share/h:
 Added shared_file_valid_extension() prototype
src/lib/file.h
 Added S_ISLNK macro (this seemed like the correct file for this)

Testing shows that the new code is slightly faster than the original (about 2% faster for 7659 files processed).

I have NOT replaced the call to "request_sha1()" - I just added a large comment where it should be added. This change need to be synchronized with the changes to "src/core/huge.c"

Lloyd Bryant

diff -ru -X diff.exclude /usr/local/src/base.gtkg/gtk-gnutella/src/core/share.c src/core/share.c
--- /usr/local/src/base.gtkg/gtk-gnutella/src/core/share.c	2006-11-18 01:17:55.000000000 -0700
+++ src/core/share.c	2006-11-18 16:45:11.000000000 -0700
@@ -932,15 +932,15 @@
 static void
 recurse_scan_intern(const gchar * const base_dir, const gchar * const dir)
 {
-	GSList *exts = NULL;
 	DIR *directory;			/* Dir stream used by opendir, readdir etc.. */
 	struct dirent *dir_entry;
 	GSList *files = NULL;
 	GSList *directories = NULL;
 	GSList *sl;
-	guint i;
-	const gchar *entry_end;
 	gchar *dir_name;
+	tm_t base, current, elapsed;
+
+	tm_now_exact(&base); 
 
 	g_return_if_fail('\0' != dir[0]);
 	g_return_if_fail(is_absolute_path(base_dir));
@@ -951,145 +951,156 @@
 		return;
 	}
 
-	while ((dir_entry = readdir(directory))) {
-		struct stat sb;
-		gchar *full;
-
-		if (dir_entry->d_name[0] == '.')	/* Hidden file, or "." or ".." */
-			continue;
-
-		full = make_pathname(dir, dir_entry->d_name);
-		if (stat(full, &sb)) {
-			g_warning("can't stat %s: %s", full, g_strerror(errno));
-			break;
-		}
-		if (S_ISDIR(sb.st_mode)) {
-			if (!scan_ignore_symlink_dirs || !is_symlink(full)) {
-				directories = g_slist_prepend(directories, full);
-				full = NULL;
-			}
-		} else if (S_ISREG(sb.st_mode)) {
-			if (!scan_ignore_symlink_regfiles || !is_symlink(full)) {
-				files = g_slist_prepend(files, full);
-				full = NULL;
-			}
-		}
-		G_FREE_NULL(full);
-	}
-
+	/* Get relative path if required */
 	if (search_results_expose_relative_paths) {
 		dir_name = get_relative_path(base_dir, dir);
 	} else {
 		dir_name = NULL;
 	}
+	
+	/* Process the entries in this directory */
+	while ((dir_entry = readdir(directory))) {
+		struct stat file_stat;
+		gchar *fullpath;
+		gboolean is_slink = FALSE;
+		gchar *name;
+		struct shared_file *found = NULL;
 
-	for (i = 0, sl = files; sl; i++, sl = g_slist_next(sl)) {
-		const gchar *full, *name;
-
-		full = sl->data;
-
-		/*
-		 * In the "tmp" directory, don't share files that have a trailer.
-		 * It's probably a file being downloaded, and which is not complete yet.
-		 * This check is necessary in case they choose to share their
-		 * downloading directory...
-		 */
 
-		name = strrchr(full, G_DIR_SEPARATOR);
-		g_assert(name && G_DIR_SEPARATOR == name[0]);
-		name++;						/* Start of file name */
+		if (dir_entry->d_name[0] == '.')	/* Hidden file, or "." or ".." */
+			continue;
 
-		entry_end = strchr(name, '\0');
+		fullpath = make_pathname(dir, dir_entry->d_name);
 
-		for (exts = extensions; exts; exts = exts->next) {
-			struct extension *e = exts->data;
-			const gchar *start = entry_end - (e->len + 1);	/* +1 for "." */
+		/* First, lstat the file */
+		if (lstat(fullpath, &file_stat)) {
+			g_warning("can't lstat %s: %s", fullpath, g_strerror(errno));
+			G_FREE_NULL(fullpath);
+			continue;
+		} 
 
-			/*
-			 * Look for the trailing chars (we're matching an extension).
-			 * Matching is case-insensitive, and the extension opener is ".".
-			 *
-			 * An extension "--all--" matches all files, even if they
-			 * don't have any extension. [Patch from Zygo Blaxell].
-			 */
+		/* If it's a symlink, call stat to get info on the linked file */
+		if (S_ISLNK(file_stat.st_mode)) {
+			is_slink = TRUE;
+			if (stat(fullpath, &file_stat)) {
+				g_warning("Broken symlink %s: %s", fullpath, g_strerror(errno));
+				G_FREE_NULL(fullpath);
+				continue;
+			}
+		}
+		
+		/*
+		 * For symlinks, we check whether we are supposed to process symlinks
+		 * for that type of entry, then either proceed or skip the entry. 
+		 */
+		if (S_ISDIR(file_stat.st_mode)) {
+			if (!is_slink || !scan_ignore_symlink_dirs) {
 
-			if (
-				0 == ascii_strcasecmp("--all--", e->str) ||	/* All files */
-				(start >= name && *start == '.' &&
-					0 == ascii_strcasecmp(start + 1, e->str))
-			) {
-				struct shared_file *found = NULL;
-				struct stat file_stat;
+				/* If a directory, add to list for later processing */
+				directories = g_slist_prepend(directories, fullpath);
+				fullpath = NULL;
+			}
+		} else if (S_ISREG(file_stat.st_mode)) {
+			if (!is_slink || !scan_ignore_symlink_regfiles) {
 
-				if (share_debug > 5)
-					g_message("recurse_scan: full=\"%s\"", full);
+				/*
+				 * It's a regular file - validate it, and add it to 
+				 * shared_files
+				 */
+				name = strrchr(fullpath, G_DIR_SEPARATOR); /* find last '/' */
+				g_assert(name && G_DIR_SEPARATOR == name[0]);
+				name++;  /* Increment to start of filename */
 
-				if (contains_control_chars(full)) {
+				/* Some sanity checks on the file */ 
+				if (contains_control_chars(fullpath)) {
 					g_warning("Not sharing filename with control characters: "
-						"\"%s\"", full);
-					break;
+						"\"%s\"", fullpath);
+					G_FREE_NULL(fullpath);
+					continue;
 				}
 
-				if (stat(full, &file_stat) == -1) {
-					g_warning("can't stat %s: %s", full, g_strerror(errno));
-					break;
+				if (too_big_for_gnutella(file_stat.st_size)) {
+					g_warning("File is too big to be shared: \"%s\"", fullpath);
+					G_FREE_NULL(fullpath);
+					continue;
 				}
 
 				if (0 == file_stat.st_size) {
 					if (share_debug > 5)
-						g_warning("Not sharing empty file: \"%s\"", full);
-					break;
+						g_warning("Not sharing empty file: \"%s\"", fullpath);
+					G_FREE_NULL(fullpath);
+					continue;
 				}
 
-				if (!S_ISREG(file_stat.st_mode)) {
-					g_warning("Not sharing non-regular file: \"%s\"", full);
-					break;
+				if (!shared_file_valid_extension(name)) {
+					G_FREE_NULL(fullpath);
+					continue;
 				}
 
-				if (too_big_for_gnutella(file_stat.st_size)) {
-					g_warning("File is too big to be shared: \"%s\"", full);
-					break;
-				}
+				if (share_debug > 5)
+					g_message("recurse_scan: full=\"%s\"", fullpath);
 
+				/* Checks complete - add the file */
 				found = shared_file_alloc();
-				found->file_path = atom_str_get(full);
+				found->file_path = atom_str_get(fullpath);
 				found->relative_path = dir_name ? atom_str_get(dir_name) : NULL;
 				found->file_size = file_stat.st_size;
 				found->mtime = file_stat.st_mtime;
-				found->content_type =
-					share_mime_type(SHARE_M_APPLICATION_BINARY);
+				found->content_type = share_mime_type(
+										SHARE_M_APPLICATION_BINARY); 
+
 
 				if (shared_file_set_names(found, name)) {
 					shared_file_free(&found);
-					break;
-				}
-
-				if (!sha1_is_cached(found) && file_info_has_trailer(full)) {
-					/*
-	 		 	 	 * It's probably a file being downloaded, and which is
-					 * not complete yet. This check is necessary in case
-					 * they choose to share their downloading directory...
-	 		  	 	 */
+					continue; 
+				}	
 
-					g_warning("will not share partial file \"%s\"", full);
-					shared_file_free(&found);
-					break;
+				if (!sha1_is_cached(found)) {
+					if (file_info_has_trailer(found->file_path)) { 
+						/*
+ 						 * It's probably a file being downloaded, and which is
+						 * not complete yet. This check is necessary in case
+						 * they choose to share their downloading directory...
+	  	 				 */
+
+						g_warning("will not share partial file \"%s\"", 
+									fullpath);
+						shared_file_free(&found);
+						continue;  /* while ((dir_entry... */ 
+					}
 				}
+				/* XXX
+				 * An else clause here would be if the file WAS found in the
+				 * sha1_cache.  Good place to set the SHA1 in "found".
+				 */
 
 				files_scanned++;
 				bytes_scanned += found->file_size; 
 				st_insert_item(search_table, found->name_canonic, found);
-				shared_files = g_slist_prepend(shared_files,
-										shared_file_ref(found));
-				break;			/* for loop */
+				shared_files = g_slist_prepend(shared_files, 
+												shared_file_ref(found));
+				G_FREE_NULL(fullpath);
 			}
 		}
-		G_FREE_NULL(sl->data);
 
-		if (!(i & 0x3f)) {
+		/*
+		 * gcu_gtk_main_flush() processes all pending GUI events.
+		 *
+		 * I'm setting it to trigger anytime the elapsed exceeds 50ms.
+		 * This should keep the GUI responsive, even if we hit a 
+		 * directory with a large number of files to process.
+		 */
+		tm_now_exact(&current);
+		tm_elapsed(&elapsed, &current, &base);
+		if (50 <= tm2ms(&elapsed)) {
+			memcpy(&base, &current, sizeof(tm_t));
 			gcu_gtk_main_flush();
 		}
 	}
+
+	/* Execute this at least once per directory processed */
+	gcu_gtk_main_flush();
+
 	gcu_gui_update_files_scanned();	/* Interim view */
 
 	atom_str_free_null(&dir_name);
@@ -2678,4 +2689,43 @@
 	return files_scanned;
 }
 
+/**
+ * Verify that a file extension is valid for sharing
+ *
+ * @param filename  The name of the file to check (without path).
+ */
+gboolean
+shared_file_valid_extension(gchar *filename)
+{
+		GSList *exts;
+		gchar *entry_end;
+		struct extension *e;
+		gchar *start;
+
+		entry_end = strchr(filename, '\0');
+
+		/* 
+	 	 * Match the file extension (if any) against the extensions list.
+		 * All valid extensions start with '.'.  Matching is case-insensitive
+		 */
+		for (exts = extensions; exts; exts = exts->next) {
+			e = exts->data;
+			start = entry_end - (e->len + 1);  /* +1 for "." */
+
+			/*
+			 * An extension "--all--" matches all files, even those that don't
+			 * have any extension. [Patch from Zygo Blaxell].
+			 */
+			if (0 == ascii_strcasecmp("--all--", e->str) ||
+				(start >= filename && *start == '.' &&
+				 0 == ascii_strcasecmp(start + 1, e->str)))
+			{
+				return TRUE;
+			}
+
+		}
+
+		return FALSE;
+}
+
 /* vi: set ts=4 sw=4 cindent: */
diff -ru -X diff.exclude /usr/local/src/base.gtkg/gtk-gnutella/src/core/share.h src/core/share.h
--- /usr/local/src/base.gtkg/gtk-gnutella/src/core/share.h	2006-11-18 01:17:55.000000000 -0700
+++ src/core/share.h	2006-11-18 16:22:00.000000000 -0700
@@ -130,6 +130,7 @@
 fileinfo_t *shared_file_fileinfo(const shared_file_t *sf);
 const gchar *shared_file_content_type(const shared_file_t *sf);
 void shared_file_from_fileinfo(fileinfo_t *fi);
+gboolean shared_file_valid_extension(gchar *filename);
 
 const gchar *map_muid_to_query_string(const gchar muid[GUID_RAW_SIZE]);
 
diff -ru -X diff.exclude /usr/local/src/base.gtkg/gtk-gnutella/src/lib/file.h src/lib/file.h
--- /usr/local/src/base.gtkg/gtk-gnutella/src/lib/file.h	2006-11-18 01:17:42.000000000 -0700
+++ src/lib/file.h	2006-11-18 15:47:49.000000000 -0700
@@ -70,5 +70,14 @@
 FILE *file_fopen(const gchar *path, const gchar *mode);
 FILE *file_fopen_missing(const gchar *path, const gchar *mode);
 
+/*
+ * S_ISLNK is a standard macro on most *nix variants, but the man page notes
+ * that it is not POSIX.  This means we can't rely on it being available 
+ * everywhere.
+ */
+#ifndef S_ISLNK
+#define S_ISLNK(mode) ((mode & S_IFMT) == S_IFLNK)
+#endif
+
 #endif /* _file_ */
 /* vi: set ts=4 sw=4 cindent: */

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Gtk-gnutella-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gtk-gnutella-devel

Reply via email to