Module Name:    src
Committed By:   manu
Date:           Mon Sep 20 07:00:22 UTC 2010

Modified Files:
        src/lib/libperfuse: ops.c perfuse.c perfuse_priv.h subr.c

Log Message:
- performance improvement for read, readdir and write. Now we use
SOCK_DGRAM, we can send many pages at once without hitting any bug

- when creating a file, it is open for FUSE, but not for the kernel.
If the kernel does not do a subsequent open, we have a leak. We fight
against this by trying to close such file that the kernel left unopen
for some time.

- some code refactoring to make message exchange debug easier (more to come)


To generate a diff of this commit:
cvs rdiff -u -r1.15 -r1.16 src/lib/libperfuse/ops.c
cvs rdiff -u -r1.6 -r1.7 src/lib/libperfuse/perfuse.c
cvs rdiff -u -r1.10 -r1.11 src/lib/libperfuse/perfuse_priv.h
cvs rdiff -u -r1.4 -r1.5 src/lib/libperfuse/subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/lib/libperfuse/ops.c
diff -u src/lib/libperfuse/ops.c:1.15 src/lib/libperfuse/ops.c:1.16
--- src/lib/libperfuse/ops.c:1.15	Wed Sep 15 01:51:43 2010
+++ src/lib/libperfuse/ops.c	Mon Sep 20 07:00:21 2010
@@ -1,4 +1,4 @@
-/*  $NetBSD: ops.c,v 1.15 2010/09/15 01:51:43 manu Exp $ */
+/*  $NetBSD: ops.c,v 1.16 2010/09/20 07:00:21 manu Exp $ */
 
 /*-
  *  Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved.
@@ -43,7 +43,8 @@
 
 extern int perfuse_diagflags;
 
-static int node_close_common(struct puffs_usermount *, puffs_cookie_t, int);
+static int xchg_msg(struct puffs_usermount *, puffs_cookie_t, 
+    perfuse_msg_t *, size_t, enum perfuse_xchg_pb_reply); 
 static int no_access(puffs_cookie_t, const struct puffs_cred *, mode_t);
 static void fuse_attr_to_vap(struct perfuse_state *,
     struct vattr *, struct fuse_attr *);
@@ -94,8 +95,8 @@
 #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
 #define VTTOIF(indx) (vttoif_tab[(int)(indx)])
 
-static int
-node_close_common(pu, opc, mode)
+int
+perfuse_node_close_common(pu, opc, mode)
 	struct puffs_usermount *pu;
 	puffs_cookie_t opc;
 	int mode;
@@ -148,7 +149,8 @@
 			 __func__, (void *)opc, pnd->pnd_ino, fri->fh);
 #endif
 
-	if ((error = XCHG_MSG(ps, pu, pm, NO_PAYLOAD_REPLY_LEN)) != 0)
+	if ((error = xchg_msg(pu, opc, pm,
+			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
 		goto out;
 
 	ps->ps_destroy_msg(pm);
@@ -163,6 +165,30 @@
 	return error;
 }
 
+/* ARGSUSED1 */
+static int
+xchg_msg(pu, opc, pm, len, wait)
+	struct puffs_usermount *pu;
+	puffs_cookie_t opc;
+	perfuse_msg_t *pm;
+	size_t len;
+     	enum perfuse_xchg_pb_reply wait;
+{
+	struct perfuse_state *ps;
+	int error;
+
+	ps = puffs_getspecific(pu);
+
+#ifdef PERFUSE_DEBUG
+	if ((perfuse_diagflags & PDF_FUSE) && (opc != 0))
+		DPRINTF("file = \"%s\"\n", 
+			(char *)PNPATH((struct puffs_node *)opc));
+#endif
+	error = ps->ps_xchg_msg(pu, pm, len, wait);
+
+	return error;
+}
+
 static int
 no_access(opc, pcr, mode)
 	puffs_cookie_t opc;
@@ -285,7 +311,7 @@
 	pm = ps->ps_new_msg(pu, opc, FUSE_LOOKUP, len, NULL);
 	(void)strlcpy(_GET_INPAYLOAD(ps, pm, char *), path, len);
 
-	if ((error = XCHG_MSG(ps, pu, pm, sizeof(*feo))) != 0)
+	if ((error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply)) != 0)
 		goto out;
 
 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
@@ -327,7 +353,7 @@
 
 	ps =  puffs_getspecific(pu);
 
-	if ((error = XCHG_MSG(ps, pu, pm, sizeof(*feo))) != 0)
+	if ((error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply)) != 0)
 		goto out;
 
 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
@@ -357,7 +383,8 @@
 	/*
 	 * A fuse_attr_out is returned, but we ignore it.
 	 */
-	error = XCHG_MSG(ps, pu, pm, sizeof(struct fuse_attr_out));
+	error = xchg_msg(pu, (puffs_cookie_t)pn, 
+			 pm, sizeof(struct fuse_attr_out), wait_reply);
 
 	/*
 	 * The parent directory needs a sync
@@ -604,9 +631,6 @@
 	ps = perfuse_getspecific(pu);
 #endif
 
-	/*
-	 * XXX Add a lock he day we go multithreaded
-	 */
 	pnd = PERFUSE_NODE_DATA(opc);
 	pcq.pcq_type = type;
 	pcq.pcq_cc = puffs_cc_getcc(pu);
@@ -644,9 +668,6 @@
 	struct perfuse_node_data *pnd;
 	int dequeued;
 
-	/*
-	 * XXX Add a lock he day we go multithreaded
-	 */
 	pnd = PERFUSE_NODE_DATA(opc);
 	dequeued = 0;
 	TAILQ_FOREACH(pcq, &pnd->pnd_pcq, pcq_next) {
@@ -684,7 +705,7 @@
 	int error;
 
 	ps = puffs_getspecific(pu);
-
+	
         if (puffs_mount(pu, ps->ps_target, ps->ps_mountflags, ps->ps_root) != 0)
                 DERR(EX_OSERR, "puffs_mount failed");
 
@@ -702,7 +723,7 @@
 	fii->max_readahead = 32 * PAGE_SIZE; 
 	fii->flags = (FUSE_ASYNC_READ|FUSE_POSIX_LOCKS|FUSE_ATOMIC_O_TRUNC);
 
-	if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fio))) != 0)
+	if ((error = xchg_msg(pu, 0, pm, sizeof(*fio), wait_reply)) != 0)
 		DERRX(EX_SOFTWARE, "init message exchange failed (%d)", error);
 
 	fio = GET_OUTPAYLOAD(ps, pm, fuse_init_out);
@@ -729,7 +750,7 @@
 	opc = (puffs_cookie_t)puffs_getroot(pu);
 	pm = ps->ps_new_msg(pu, opc, FUSE_DESTROY, 0, NULL);
 
-	if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0) {
+	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0){
 		DWARN("unmount %s", ps->ps_target);
 		if (!(flags & MNT_FORCE))
 			goto out;
@@ -759,7 +780,7 @@
 	opc = (puffs_cookie_t)puffs_getroot(pu);
 	pm = ps->ps_new_msg(pu, opc, FUSE_STATFS, 0, NULL);
 
-	if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fso))) != 0)
+	if ((error = xchg_msg(pu, opc, pm, sizeof(*fso), wait_reply)) != 0)
 		goto out;
 
 	fso = GET_OUTPAYLOAD(ps, pm, fuse_statfs_out);
@@ -964,10 +985,17 @@
 
 		opc = (puffs_cookie_t)pn;
 
-		error = perfuse_node_open(pu, opc, FREAD|FWRITE, pcn->pcn_cred);
+		error = perfuse_node_open(pu, opc, FWRITE, pcn->pcn_cred);
 		if (error != 0)	
 			return error;
 
+		/*
+		 * This node has been open in the filesystem,
+		 * but not by the kernel. We will have to close
+		 * it on our own to avoid a leak
+		 */
+		PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_OPENFS;
+
 		return 0;
 	}
 
@@ -989,7 +1017,7 @@
 	(void)strlcpy((char*)(void *)(fci + 1), name, namelen);
 
 	len = sizeof(*feo) + sizeof(*foo);
-	if ((error = XCHG_MSG(ps, pu, pm, len)) != 0)
+	if ((error = xchg_msg(pu, opc, pm, len, wait_reply)) != 0)
 		goto out;
 
 	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
@@ -1002,7 +1030,7 @@
 	 * so that we can reuse it later
 	 */
 	pn = perfuse_new_pn(pu, opc);
-	perfuse_new_fh((puffs_cookie_t)pn, foo->fh, FWRITE);
+	perfuse_new_fh(pu, (puffs_cookie_t)pn, foo->fh, FWRITE);
 	PERFUSE_NODE_DATA(pn)->pnd_ino = feo->nodeid;
 
 #ifdef PERFUSE_DEBUG
@@ -1020,6 +1048,14 @@
 	 * The parent directory needs a sync
 	 */
 	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
+
+	/*
+	 * This node has been open in the filesystem,
+	 * but not by the kernel. We will have to close
+	 * it on our own to avoid a leak
+	 */
+	PERFUSE_NODE_DATA(pn)->pnd_flags |= PND_OPENFS;
+
 out: 
 	ps->ps_destroy_msg(pm);
 
@@ -1113,6 +1149,14 @@
 
 	pn = (struct puffs_node *)opc;
 	if (puffs_pn_getvap(pn)->va_type == VDIR) {
+		/*
+		 * We may open removed files, but it seems much more 
+		 * troublesome to open removed directories. glusterfs says 
+		 * "OPENDIR (null) (fuse_loc_fill() failed)"
+		 */
+		if (pnd->pnd_flags & PND_REMOVED)
+			return ENOENT;
+
 		op = FUSE_OPENDIR;
 		pmode = PUFFS_VREAD|PUFFS_VEXEC;
 	} else {
@@ -1143,10 +1187,20 @@
 	 * Do not open twice, and do not reopen for reading
 	 * if we already have write handle.
 	 */
-	if ((mode & FREAD) && (pnd->pnd_flags & PND_RFH))
-		return 0;
-	if ((mode & FWRITE) && (pnd->pnd_flags & PND_WFH))
+	if (((mode & FREAD) && (pnd->pnd_flags & PND_RFH)) ||
+	    ((mode & FWRITE) && (pnd->pnd_flags & PND_WFH))) {
+		/*
+		 * If the file was created, it was open for
+		 * the filesystem but not for the kernel. This
+		 * is not the case anymore, therefore we cleanup
+		 * the flag to avoid an unwanted cleanup close
+		 * after PERFUSE_OPENFS_TIMEOUT.
+		 */
+		pnd->pnd_flags &= ~PND_OPENFS;
+
 		return 0;
+	}
+	
 
 	/*
 	 * Convert PUFFS mode to FUSE mode: convert FREAD/FWRITE
@@ -1160,16 +1214,16 @@
 	foi->flags = fmode;
 	foi->unused = 0;
 
-	if ((error = XCHG_MSG(ps, pu, pm, sizeof(*foo))) != 0)
+	if ((error = xchg_msg(pu, opc, pm, sizeof(*foo), wait_reply)) != 0)
 		goto out;
 
 	foo = GET_OUTPAYLOAD(ps, pm, fuse_open_out);
-	
+
 	/*
 	 * Save the file handle in node private data 
 	 * so that we can reuse it later
 	 */
-	perfuse_new_fh((puffs_cookie_t)pn, foo->fh, mode);
+	perfuse_new_fh(pu, (puffs_cookie_t)pn, foo->fh, mode);
 
 #ifdef PERFUSE_DEBUG
 	if (perfuse_diagflags & PDF_FH)
@@ -1180,6 +1234,7 @@
 			pnd->pnd_ino, mode & FREAD ? "r" : "",
 			mode & FWRITE ? "w" : "", foo->fh);
 #endif
+
 out:
 	ps->ps_destroy_msg(pm);
 
@@ -1210,7 +1265,7 @@
 	 * therefore postpone the close operation at reclaim time.
 	 */
 	if (puffs_pn_getvap(pn)->va_type != VREG)
-		return node_close_common(pu, opc, flags);
+		return perfuse_node_close_common(pu, opc, flags);
 
 	return 0;
 }
@@ -1243,7 +1298,7 @@
 		fai = GET_INPAYLOAD(ps, pm, fuse_access_in);
 		fai->mask = mode;
 
-		error = XCHG_MSG(ps, pu, pm, NO_PAYLOAD_REPLY_LEN);
+		error = xchg_msg(pu, opc, pm, NO_PAYLOAD_REPLY_LEN, wait_reply);
 		ps->ps_destroy_msg(pm);
 	}
 
@@ -1266,7 +1321,8 @@
 				"fh = 0x%"PRIx64"\n", __func__, (void *)opc,
 				PERFUSE_NODE_DATA(opc)->pnd_ino, fgi->fh);
 #endif
-		if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fao))) != 0) {
+		if ((error = xchg_msg(pu, opc, pm, 
+				      sizeof(*fao), wait_reply)) != 0) {
 			ps->ps_destroy_msg(pm);
 			goto out;
 		}
@@ -1318,7 +1374,7 @@
 	fgi->dummy = 0;
 	fgi->fh = 0;
 
-	if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fao))) != 0)
+	if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), wait_reply)) != 0)
 		goto out;
 
 	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
@@ -1479,7 +1535,7 @@
 	/*
 	 * A fuse_attr_out is returned, but we ignore it.
 	 */
-	error = XCHG_MSG(ps, pu, pm, sizeof(struct fuse_attr_out));
+	error = xchg_msg(pu, opc, pm, sizeof(struct fuse_attr_out), wait_reply);
 
 	ps->ps_destroy_msg(pm);
 
@@ -1514,7 +1570,7 @@
 			__func__, (void *)opc,	
 			PERFUSE_NODE_DATA(opc)->pnd_ino, fpi->fh);
 #endif
-	if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fpo))) != 0)
+	if ((error = xchg_msg(pu, opc, pm, sizeof(*fpo), wait_reply)) != 0)
 		goto out;
 
 	fpo = GET_OUTPAYLOAD(ps, pm, fuse_poll_out);
@@ -1610,7 +1666,8 @@
 			PERFUSE_NODE_DATA(opc)->pnd_ino, ffi->fh);
 #endif
 
-	if ((error = XCHG_MSG(ps, pu, pm, NO_PAYLOAD_REPLY_LEN)) != 0)
+	if ((error = xchg_msg(pu, opc, pm, 
+			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
 		goto out;	
 
 	/*
@@ -1637,7 +1694,7 @@
 		ps->ps_destroy_msg(pm);
 
 	if (open_self) 
-		(void)node_close_common(pu, opc, FWRITE);
+		(void)perfuse_node_close_common(pu, opc, FWRITE);
 
 	return error;
 }
@@ -1700,7 +1757,7 @@
 	path = _GET_INPAYLOAD(ps, pm, char *);
 	(void)strlcpy(path, name, len);
 
-	if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0)
+	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
 		goto out;
 
 	if (puffs_inval_namecache_dir(pu, opc) != 0)
@@ -1756,7 +1813,7 @@
 	fli->oldnodeid = PERFUSE_NODE_DATA(pn)->pnd_ino;
 	(void)strlcpy((char *)(void *)(fli + 1), name, len - sizeof(*fli));
 
-	error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN);
+	error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply);
 
 	ps->ps_destroy_msg(pm);
 
@@ -1808,7 +1865,7 @@
 	np += oldname_len;
 	(void)strlcpy(np, newname, newname_len);
 
-	if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0)
+	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
 		goto out;
 
 	/*
@@ -1898,7 +1955,7 @@
 	path = _GET_INPAYLOAD(ps, pm, char *);
 	(void)strlcpy(path, name, len);
 
-	if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0)
+	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
 		goto out;
 
 	if (puffs_inval_namecache_dir(pu, opc) != 0)
@@ -1982,6 +2039,7 @@
 	int error;
 	int open_self;
 	uint64_t fd_offset;
+	size_t fd_maxlen;
 	
 	pm = NULL;
 	error = 0;
@@ -2033,6 +2091,7 @@
 	pnd->pnd_all_fd = NULL;
 	pnd->pnd_all_fd_len = 0;
 	fd_offset = 0;
+	fd_maxlen = ps->ps_max_readahead - sizeof(*foh);
 	
 	do {
 		size_t fd_len;
@@ -2042,24 +2101,17 @@
 
 		/*
 		 * read_flags, lock_owner and flags are unused in libfuse
-		 * 
-		 * XXX if fri->size is too big (bigger than PAGE_SIZE?), 			 * we get strange bugs. ktrace shows 16 bytes or garbage
-		 * at the end of sent frames, but perfused does not receive
-		 * that data. The data length is hoverver the same, which 
-		 * cause perfused to use the last 16 bytes of the frame
-		 * as the frame header of the next frame.
-		 *
-		 * This may be a kernel bug.
 		 */
 		fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
 		fri->fh = fh;
 		fri->offset = fd_offset;
-		fri->size = PAGE_SIZE - sizeof(struct fuse_out_header);
+		fri->size = fd_maxlen;
 		fri->read_flags = 0;
 		fri->lock_owner = 0;
 		fri->flags = 0;
 
-		if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0)
+		if ((error = xchg_msg(pu, opc, pm, 	
+				      UNSPEC_REPLY_LEN, wait_reply)) != 0)
 			goto out;
 		
 		/* 
@@ -2071,8 +2123,7 @@
 		foh_len = foh->len;
 
 		/*
-		 * It seems that the only way to discover the end
-		 * of the buffer is to get an empty read
+		 * Empty read: we reached the end of the buffer.
 		 */
 		if (foh_len == sizeof(*foh))
 			break;
@@ -2103,7 +2154,15 @@
 
 		ps->ps_destroy_msg(pm);
 		pm = NULL;
-	} while (1 /* CONSTCOND */);
+
+		/*
+		 * If the buffer was not completely filled, 
+		 * that is, if there is room for the biggest 
+		 * struct dirent possible, then we are done:
+		 * no need to issue another READDIR to see
+		 * an empty reply.
+		 */
+	} while (foh_len >= fd_maxlen - (sizeof(*fd) + MAXPATHLEN));
 
 	if (fuse_to_dirent(pu, opc, pnd->pnd_all_fd, pnd->pnd_all_fd_len) == -1)
 		error = EIO;
@@ -2170,7 +2229,7 @@
 
 	pm = ps->ps_new_msg(pu, opc, FUSE_READLINK, 0, pcr);
 
-	if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0)
+	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
 		goto out;
 
 	foh = GET_OUTHDR(ps, pm);
@@ -2243,7 +2302,7 @@
 
 		/*
 		 * Make sure all operation are finished
-		 * There can be an ongoing write, or queued operations
+		 * There can be an ongoing write or
 		 */
 		while (pnd->pnd_flags & PND_INWRITE) {
 			requeue_request(pu, opc, PCQ_AFTERWRITE);
@@ -2267,10 +2326,10 @@
 		 * Close open files
 		 */
 		if (pnd->pnd_flags & PND_WFH)
-			(void)node_close_common(pu, opc, FWRITE);
+			(void)perfuse_node_close_common(pu, opc, FWRITE);
 
 		if (pnd->pnd_flags & PND_RFH)
-			(void)node_close_common(pu, opc, FREAD);
+			(void)perfuse_node_close_common(pu, opc, FREAD);
 
 		/*
 		 * And send the FORGET message
@@ -2281,13 +2340,14 @@
 		ffi->nlookup = pnd->pnd_nlookup;
 
 		/*
-		 * No reply is expected, pm is freed in XCHG_MSG
+		 * No reply is expected, pm is freed in xchg_msg
 		 */
-		(void)XCHG_MSG_NOREPLY(ps, pu, pm, UNSPEC_REPLY_LEN);
+		(void)xchg_msg(pu, (puffs_cookie_t)pn, 
+			       pm, UNSPEC_REPLY_LEN, no_reply);
 
 		parent_pn = pnd->pnd_parent;
 
-		perfuse_destroy_pn(pn);
+		perfuse_destroy_pn(pu, pn);
 		puffs_pn_put(pn);
 
 		pn = parent_pn;
@@ -2370,7 +2430,7 @@
 			PERFUSE_NODE_DATA(opc)->pnd_ino, fli->fh);
 #endif
 
-	if ((error = XCHG_MSG(ps, pu, pm, sizeof(*flo))) != 0)
+	if ((error = xchg_msg(pu, opc, pm, sizeof(*flo), wait_reply)) != 0)
 		goto out;
 
 	flo = GET_OUTPAYLOAD(ps, pm, fuse_lk_out);
@@ -2416,7 +2476,6 @@
 	struct fuse_read_in *fri;
 	struct fuse_out_header *foh;
 	size_t readen;
-	size_t requested;
 	int error;
 	
 	ps = puffs_getspecific(pu);
@@ -2426,27 +2485,19 @@
 	if (puffs_pn_getvap((struct puffs_node *)opc)->va_type == VDIR) 
 		return EBADF;
 
-	requested = *resid;
-	if ((ps->ps_readahead + requested) > ps->ps_max_readahead) {
-		if (perfuse_diagflags & PDF_REQUEUE)
-			DPRINTF("readahead = %zd\n", ps->ps_readahead);
-		requeue_request(pu, opc, PCQ_READ);
-	}
-	ps->ps_readahead += requested;
-			
 	do {
+		size_t max_read;
+
+		max_read = ps->ps_max_readahead - sizeof(*foh);
 		/*
 		 * flags may be set to FUSE_READ_LOCKOWNER 
 		 * if lock_owner is provided.
-		 *
-		 * XXX See comment about fri->size in perfuse_node_readdir
-		 * We encounter the same bug here.
 		 */
 		pm = ps->ps_new_msg(pu, opc, FUSE_READ, sizeof(*fri), pcr);
 		fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
 		fri->fh = perfuse_get_fh(opc, FREAD);
 		fri->offset = offset;
-		fri->size = (uint32_t)MIN(*resid, PAGE_SIZE - sizeof(*foh));
+		fri->size = (uint32_t)MIN(*resid, max_read);
 		fri->read_flags = 0; /* XXX Unused by libfuse? */
 		fri->lock_owner = pnd->pnd_lock_owner;
 		fri->flags = 0;
@@ -2457,7 +2508,7 @@
 		DPRINTF("%s: opc = %p, ino = %"PRId64", fh = 0x%"PRIx64"\n",
 			__func__, (void *)opc, pnd->pnd_ino, fri->fh);
 #endif
-		error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN);
+		error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply);
 
 		if (error  != 0)
 			goto out;
@@ -2465,6 +2516,12 @@
 		foh = GET_OUTHDR(ps, pm);
 		readen = foh->len - sizeof(*foh);
 
+#ifdef PERFUSE_DEBUG
+		if (readen > *resid)
+			DERRX(EX_SOFTWARE, "%s: Unexpected big read %zd",
+			      __func__, readen);
+#endif
+
 		(void)memcpy(buf,  _GET_OUTPAYLOAD(ps, pm, char *), readen);
 
 		buf += readen;
@@ -2484,10 +2541,6 @@
 	if (pm != NULL)
 		ps->ps_destroy_msg(pm);
 
-	ps->ps_readahead -= requested;
-
-	(void)dequeue_requests(ps, opc, PCQ_READ, 1);
-
 	return error;
 }
 
@@ -2509,7 +2562,6 @@
 	size_t data_len;
 	size_t payload_len;
 	size_t written;
-	size_t requested;
 	int error;
 	
 	ps = puffs_getspecific(pu);
@@ -2520,27 +2572,28 @@
 	if (puffs_pn_getvap((struct puffs_node *)opc)->va_type == VDIR) 
 		return EBADF;
 
+	/*
+	 * We need to queue write requests in order to avoid
+	 * dequeueing PCQ_AFTERWRITE when there are pending writes.
+	 */
 	while (pnd->pnd_flags & PND_INWRITE)
 		requeue_request(pu, opc, PCQ_WRITE);
 	pnd->pnd_flags |= PND_INWRITE;
 
-
-	requested = *resid;
-	if ((ps->ps_write + requested) > ps->ps_max_write) {
-		if (perfuse_diagflags & PDF_REQUEUE)
-			DPRINTF("write = %zd\n", ps->ps_write);
-		requeue_request(pu, opc, PCQ_WRITE);
-	}
-	ps->ps_write += requested;
-			
 	do {
+		size_t max_write;
 		/*
-		 * It seems libfuse does not expects big chunks, so 
-		 * send it page per page. The writepage feature is
-		 * probably there to minmize data movement.
-		 * XXX use ps->ps_maxwrite?
+		 * There is a writepage flag when data
+		 * is PAGE_SIZE-aligned. Use it for
+		 * everything but the data after the last
+		 * page boundary.
 		 */
-		data_len = MIN(*resid, PAGE_SIZE);
+		max_write = ps->ps_max_write - sizeof(*fwi); 
+
+		data_len = MIN(*resid, max_write);
+		if (data_len > PAGE_SIZE)
+			data_len = data_len & ~(PAGE_SIZE - 1);
+
 		payload_len = data_len + sizeof(*fwi);
 
 		/*
@@ -2565,11 +2618,17 @@
 		DPRINTF("%s: opc = %p, ino = %"PRId64", fh = 0x%"PRIx64"\n",
 			__func__, (void *)opc, pnd->pnd_ino, fwi->fh);
 #endif
-		if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fwo))) != 0)
+		if ((error = xchg_msg(pu, opc, pm, 
+				      sizeof(*fwo), wait_reply)) != 0)
 			goto out;
 
 		fwo = GET_OUTPAYLOAD(ps, pm, fuse_write_out);
 		written = fwo->size;
+#ifdef PERFUSE_DEBUG
+		if (written > *resid)
+			DERRX(EX_SOFTWARE, "%s: Unexpected big write %zd",
+			      __func__, written);
+#endif
 		*resid -= written;
 		offset += written;
 		buf += written;
@@ -2605,9 +2664,6 @@
 	if (pm != NULL)
 		ps->ps_destroy_msg(pm);
 
-	ps->ps_write -= requested;
-
-
 	/*
 	 * If there are no more queued write, we can resume
 	 * an operation awaiting write completion.

Index: src/lib/libperfuse/perfuse.c
diff -u src/lib/libperfuse/perfuse.c:1.6 src/lib/libperfuse/perfuse.c:1.7
--- src/lib/libperfuse/perfuse.c:1.6	Wed Sep 15 01:51:43 2010
+++ src/lib/libperfuse/perfuse.c	Mon Sep 20 07:00:21 2010
@@ -1,4 +1,4 @@
-/*  $NetBSD: perfuse.c,v 1.6 2010/09/15 01:51:43 manu Exp $ */
+/*  $NetBSD: perfuse.c,v 1.7 2010/09/20 07:00:21 manu Exp $ */
 
 /*-
  *  Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved.
@@ -58,6 +58,7 @@
 	(void)memset(ps, 0, sizeof(*ps));
 	ps->ps_max_write = UINT_MAX;
 	ps->ps_max_readahead = UINT_MAX;
+	TAILQ_INIT(&ps->ps_pnd);
 	
 	return ps;
 }
@@ -220,7 +221,7 @@
 	struct perfuse_mount_out *pmo;
 #if (PERFUSE_SOCKTYPE == SOCK_DGRAM)
 	struct sockaddr_storage ss;
-	struct sockaddr_un sun;
+	struct sockaddr_un *sun;
 	struct sockaddr *sa;
 	socklen_t sa_len;
 #endif
@@ -246,22 +247,22 @@
 	sock_len = 0;
 #if (PERFUSE_SOCKTYPE == SOCK_DGRAM)
 	sa = (struct sockaddr *)(void *)&ss;
+	sun = (struct sockaddr_un *)(void *)&ss;
 	sa_len = sizeof(ss);
 	if ((getpeername(s, sa, &sa_len) == 0) &&
 	    (sa->sa_family = AF_LOCAL) &&
-	    (strcmp(((struct sockaddr_un *)sa)->sun_path, _PATH_FUSE) == 0)) {
+	    (strcmp(sun->sun_path, _PATH_FUSE) == 0)) {
 
-		sa = (struct sockaddr *)(void *)&sun;
-		sun.sun_len = sizeof(sun);
-		sun.sun_family = AF_LOCAL;
-		(void)sprintf(sun.sun_path, "%s/%s-%d",
+		sun->sun_len = sizeof(*sun);
+		sun->sun_family = AF_LOCAL;
+		(void)sprintf(sun->sun_path, "%s/%s-%d",
 			      _PATH_TMP, getprogname(), getpid());
 		
-		if (bind(s, sa, sa->sa_len) != 0)
+		if (bind(s, sa, (socklen_t)sa->sa_len) != 0)
 			DERR(EX_OSERR, "%s:%d bind to \"%s\" failed",
-			     __func__, __LINE__, sun.sun_path);
+			     __func__, __LINE__, sun->sun_path);
 
-		sock_len = strlen(sun.sun_path) + 1;
+		sock_len = strlen(sun->sun_path) + 1;
 	}
 #endif /* PERFUSE_SOCKTYPE */
 		
@@ -317,7 +318,7 @@
 	}
 
 	if (sock_len != 0) {
-		(void)strcpy(cp, sun.sun_path);
+		(void)strcpy(cp, sun->sun_path);
 		cp += pmo->pmo_sock_len;
 	}
 

Index: src/lib/libperfuse/perfuse_priv.h
diff -u src/lib/libperfuse/perfuse_priv.h:1.10 src/lib/libperfuse/perfuse_priv.h:1.11
--- src/lib/libperfuse/perfuse_priv.h:1.10	Wed Sep 15 01:51:43 2010
+++ src/lib/libperfuse/perfuse_priv.h	Mon Sep 20 07:00:22 2010
@@ -1,4 +1,4 @@
-/*  $NetBSD: perfuse_priv.h,v 1.10 2010/09/15 01:51:43 manu Exp $ */
+/*  $NetBSD: perfuse_priv.h,v 1.11 2010/09/20 07:00:22 manu Exp $ */
 
 /*-
  *  Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved.
@@ -37,6 +37,17 @@
 #include "perfuse_if.h"
 #include "fuse.h"
 
+/* 
+ * When a file is created, it is open for the filesystem, but not
+ * for the kernel. We keep the file open to avoid re-open it, but
+ * once we open PERFUSE_OPENFS_MAXFILES files, we start closing
+ * on our own any file that has not been open for PERFUSE_OPENFS_TIMEOUT
+ * seconds. This is to avoid file leaks and getting "Too many open 
+ * files in system"
+ */
+#define PERFUSE_OPENFS_TIMEOUT 3
+#define PERFUSE_OPENFS_MAXFILES 32
+
 struct perfuse_state {
 	void *ps_private;	/* Private field for libperfuse user */
 	struct puffs_usermount *ps_pu;
@@ -58,8 +69,6 @@
 	char *ps_filesystemtype;
 	int ps_mountflags;
 	uint64_t ps_unique;
-	size_t ps_readahead;
-	size_t ps_write;
 	perfuse_new_msg_fn ps_new_msg;
 	perfuse_xchg_msg_fn ps_xchg_msg;
 	perfuse_destroy_msg_fn ps_destroy_msg;
@@ -67,10 +76,13 @@
 	perfuse_get_inpayload_fn ps_get_inpayload;
 	perfuse_get_outhdr_fn ps_get_outhdr;
 	perfuse_get_outpayload_fn ps_get_outpayload;
+	TAILQ_HEAD(, perfuse_node_data) ps_pnd;
+	int ps_pnd_count;
 };
 
 
-enum perfuse_qtype { PCQ_READDIR, PCQ_READ, PCQ_WRITE, PCQ_AFTERWRITE };
+enum perfuse_qtype { 	PCQ_READDIR, PCQ_READ, PCQ_WRITE, PCQ_AFTERWRITE };
+
 #ifdef PERFUSE_DEBUG
 extern const char *perfuse_qtypestr[];
 #endif
@@ -81,7 +93,6 @@
 	TAILQ_ENTRY(perfuse_cc_queue) pcq_next;
 };
 
-
 struct perfuse_node_data {
 	uint64_t pnd_rfh;
 	uint64_t pnd_wfh;
@@ -95,18 +106,22 @@
 	size_t pnd_all_fd_len;
 	TAILQ_HEAD(,perfuse_cc_queue) pnd_pcq;	/* queued requests */
 	int pnd_flags;
-#define PND_RECLAIMED		0x01	/* reclaim pending */
-#define PND_INREADDIR		0x02	/* readdir in progress */
-#define PND_DIRTY		0x04	/* There is some data to sync */
-#define PND_RFH			0x08	/* Read FH allocated */
-#define PND_WFH			0x10	/* Write FH allocated */
-#define PND_REMOVED		0x20	/* Node was removed */
-#define PND_INWRITE		0x40	/* write in progress */
+#define PND_RECLAIMED		0x001	/* reclaim pending */
+#define PND_INREADDIR		0x002	/* readdir in progress */
+#define PND_DIRTY		0x004	/* There is some data to sync */
+#define PND_RFH			0x008	/* Read FH allocated */
+#define PND_WFH			0x010	/* Write FH allocated */
+#define PND_REMOVED		0x020	/* Node was removed */
+#define PND_INWRITE		0x040	/* write in progress */
+#define PND_OPENFS		0x080	/* Open by fs but not by kernel */
 
 #define PND_OPEN		(PND_RFH|PND_WFH)	/* File is open */
 #define PND_BUSY		(PND_INREADDIR|PND_INWRITE)
 	puffs_cookie_t pnd_parent;
 	int pnd_childcount;
+	time_t pnd_timestamp;
+	TAILQ_ENTRY(perfuse_node_data) pnd_next;
+	puffs_cookie_t pnd_pn;
 };
 
 #define PERFUSE_NODE_DATA(opc)	\
@@ -125,19 +140,16 @@
 	(struct type *)(void *)ps->ps_get_outpayload(pm)
 #define _GET_OUTPAYLOAD(ps, pm, type) (type)ps->ps_get_outpayload(pm)
 
-#define XCHG_MSG(ps, pu, opc, len) ps->ps_xchg_msg(pu, opc, len, wait_reply)
-#define XCHG_MSG_NOREPLY(ps, pu, opc, len) \
-    ps->ps_xchg_msg(pu, opc, len, no_reply)
-
 __BEGIN_DECLS
 
 struct puffs_node *perfuse_new_pn(struct puffs_usermount *, 
     struct puffs_node *);
-void perfuse_destroy_pn(struct puffs_node *);
-void perfuse_new_fh(puffs_cookie_t, uint64_t, int);
+void perfuse_destroy_pn(struct puffs_usermount *, struct puffs_node *);
+void perfuse_new_fh(struct puffs_usermount *, puffs_cookie_t, uint64_t, int);
 void perfuse_destroy_fh(puffs_cookie_t, uint64_t);
 uint64_t perfuse_get_fh(puffs_cookie_t, int);
 uint64_t perfuse_next_unique(struct puffs_usermount *);
+int perfuse_node_close_common(struct puffs_usermount *, puffs_cookie_t, int);
 
 char *perfuse_fs_mount(int, ssize_t);
 

Index: src/lib/libperfuse/subr.c
diff -u src/lib/libperfuse/subr.c:1.4 src/lib/libperfuse/subr.c:1.5
--- src/lib/libperfuse/subr.c:1.4	Fri Sep  3 07:15:18 2010
+++ src/lib/libperfuse/subr.c	Mon Sep 20 07:00:22 2010
@@ -1,4 +1,4 @@
-/*  $NetBSD: subr.c,v 1.4 2010/09/03 07:15:18 manu Exp $ */
+/*  $NetBSD: subr.c,v 1.5 2010/09/20 07:00:22 manu Exp $ */
 
 /*-
  *  Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved.
@@ -41,9 +41,12 @@
 	struct puffs_usermount *pu;
 	struct puffs_node *parent;
 {
+	struct perfuse_state *ps;
 	struct puffs_node *pn;
 	struct perfuse_node_data *pnd;
 
+	ps = puffs_getspecific(pu);
+
 	if ((pnd = malloc(sizeof(*pnd))) == NULL)
 		DERR(EX_OSERR, "malloc failed");
 
@@ -56,8 +59,13 @@
 	pnd->pnd_ino = PERFUSE_UNKNOWN_INO;
 	pnd->pnd_nlookup = 1;
 	pnd->pnd_parent = parent;
+	pnd->pnd_timestamp = time(NULL);
+	pnd->pnd_pn = (puffs_cookie_t)pn;
 	TAILQ_INIT(&pnd->pnd_pcq);
 
+	TAILQ_INSERT_TAIL(&ps->ps_pnd, pnd, pnd_next);
+	ps->ps_pnd_count++;
+
 	if (parent != NULL)
 		PERFUSE_NODE_DATA(parent)->pnd_childcount++;
 
@@ -65,11 +73,19 @@
 }
 
 void
-perfuse_destroy_pn(pn)
+perfuse_destroy_pn(pu, pn)
+	struct puffs_usermount *pu;
 	struct puffs_node *pn;
 {
+	struct perfuse_state *ps;
 	struct perfuse_node_data *pnd;
 
+	ps = puffs_getspecific(pu);
+	pnd = PERFUSE_NODE_DATA(pn);
+
+	TAILQ_REMOVE(&ps->ps_pnd, pnd, pnd_next);
+	ps->ps_pnd_count--;
+
 	if ((pnd = puffs_pn_getpriv(pn)) != NULL) {
 		if (pnd->pnd_parent != NULL)
 			PERFUSE_NODE_DATA(pnd->pnd_parent)->pnd_childcount--;
@@ -97,13 +113,40 @@
 
 
 void
-perfuse_new_fh(opc, fh, mode)
+perfuse_new_fh(pu, opc, fh, mode)
+	struct puffs_usermount *pu;
 	puffs_cookie_t opc;
 	uint64_t fh;
 	int mode;
 {
+	struct perfuse_state *ps;
 	struct perfuse_node_data *pnd;
 
+	ps = puffs_getspecific(pu);
+
+	/*
+	 * Nodes file with PND_OPENFS are open by the filesystem but
+	 * not by the kernel, because of a CREATE operation. If
+	 * the kernel never opens them, we have a leak to fix. 
+	 * If we have enough open files, we start closing the
+	 * one that had been open for too long.
+	 */
+	if (ps->ps_pnd_count > PERFUSE_OPENFS_MAXFILES) {
+		time_t now;
+
+		now = time(NULL);
+
+		TAILQ_FOREACH(pnd, &ps->ps_pnd, pnd_next) {
+			if ((pnd->pnd_ino == FUSE_ROOT_ID) ||
+			    !(pnd->pnd_flags & PND_OPENFS) ||
+			    (now < pnd->pnd_timestamp + PERFUSE_OPENFS_TIMEOUT))
+				continue;
+
+		pnd->pnd_flags &= ~PND_OPENFS;
+			perfuse_node_close_common(pu, pnd->pnd_pn, FWRITE);
+		}
+	}
+
 	pnd = PERFUSE_NODE_DATA(opc);
 
 	if (mode & FWRITE) {

Reply via email to