Module Name:    src
Committed By:   manu
Date:           Fri Sep  3 07:15:18 UTC 2010

Modified Files:
        src/lib/libperfuse: ops.c perfuse_priv.h subr.c

Log Message:
- Postpone file close at reclaim time, since NetBSD sends fsync and
setattr(mtime, ctime) after close, while FUSE expects the file
to be open for these operations

- remove unused argument to node_mk_common()

- remove requeued requests when they are executed, not when they
are tagged for schedule

- try to make filehandle management simplier, by keeping track of only
one read and one write filehandle (the latter being really read/write).

- when CREATE is not available, we use the MKNOD/OPEN path. Fix a
bug here where we opened the parent directory instead of the node:
add the missing lookup of the mknod'ed node.

- lookup file we just created: glusterfs does not really see them
otherwise.

- open file when doing setattr(mtime, ctime) on non open files, as
some filesystems seems to require it.

- Do not flush pagecache for removed nodes

- Keep track of read/write operations in progress, and at reclaim
time, make sure they are over before closing and forgeting the file.


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/lib/libperfuse/ops.c
cvs rdiff -u -r1.4 -r1.5 src/lib/libperfuse/perfuse_priv.h
cvs rdiff -u -r1.3 -r1.4 src/lib/libperfuse/subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/lib/libperfuse/ops.c
diff -u src/lib/libperfuse/ops.c:1.6 src/lib/libperfuse/ops.c:1.7
--- src/lib/libperfuse/ops.c:1.6	Thu Sep  2 08:58:06 2010
+++ src/lib/libperfuse/ops.c	Fri Sep  3 07:15:18 2010
@@ -1,4 +1,4 @@
-/*  $NetBSD: ops.c,v 1.6 2010/09/02 08:58:06 manu Exp $ */
+/*  $NetBSD: ops.c,v 1.7 2010/09/03 07:15:18 manu Exp $ */
 
 /*-
  *  Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved.
@@ -41,6 +41,7 @@
 #include "perfuse_priv.h"
 #include "fuse.h"
 
+static int node_close_common(struct puffs_usermount *, puffs_cookie_t, int);
 static int no_access(puffs_cookie_t, const struct puffs_cred *, mode_t);
 static void fuse_attr_to_vap(struct perfuse_state *,
     struct vattr *, struct fuse_attr *);
@@ -74,6 +75,7 @@
  */
 #define F_WAIT		0x010
 #define F_FLOCK		0x020
+#define OFLAGS(fflags)  ((fflags) - 1)
 
 /* 
  * Borrowed from src/sys/kern/vfs_subr.c and src/sys/sys/vnode.h 
@@ -90,6 +92,74 @@
 #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
 #define VTTOIF(indx) (vttoif_tab[(int)(indx)])
 
+static int
+node_close_common(pu, opc, mode)
+	struct puffs_usermount *pu;
+	puffs_cookie_t opc;
+	int mode;
+{
+	struct perfuse_state *ps;
+	perfuse_msg_t *pm;
+	int op;
+	uint64_t fh;
+	struct fuse_release_in *fri;
+	struct perfuse_node_data *pnd;
+	struct puffs_node *pn;
+	int error;
+
+	ps = puffs_getspecific(pu);
+	pn = (struct puffs_node *)opc;
+	pnd = PERFUSE_NODE_DATA(pn);
+
+	if (puffs_pn_getvap(pn)->va_type == VDIR) {
+		op = FUSE_RELEASEDIR;
+		mode = FREAD;
+	} else {
+		op = FUSE_RELEASE;
+	}
+
+	/*
+	 * Destroy the filehandle before sending the 
+	 * request to the FUSE filesystem, otherwise 
+	 * we may get a second close() while we wait
+	 * for the reply, and we would end up closing
+	 * the same fh twice instead of closng both.
+	 */
+	fh = perfuse_get_fh(opc, mode);
+	perfuse_destroy_fh(pn, fh);
+
+	/*
+	 * release_flags may be set to FUSE_RELEASE_FLUSH
+	 * to flush locks. lock_owner must be set in that case
+	 */
+	pm = ps->ps_new_msg(pu, opc, op, sizeof(*fri), NULL);
+	fri = GET_INPAYLOAD(ps, pm, fuse_release_in);
+	fri->fh = fh;
+	fri->flags = 0;
+	fri->release_flags = 0;
+	fri->lock_owner = pnd->pnd_lock_owner;
+	fri->flags = (fri->lock_owner != 0) ? FUSE_RELEASE_FLUSH : 0;
+
+#ifdef PERFUSE_DEBUG
+	if (perfuse_diagflags & PDF_FH)
+		DPRINTF("%s: opc = %p, ino = %"PRId64", fh = 0x%"PRIx64"\n",
+			 __func__, (void *)opc, pnd->pnd_ino, fri->fh);
+#endif
+
+	if ((error = XCHG_MSG(ps, pu, pm, NO_PAYLOAD_REPLY_LEN)) != 0)
+		goto out;
+
+	ps->ps_destroy_msg(pm);
+
+	error = 0;
+
+out:
+	if (error != 0)
+		DERRX(EX_SOFTWARE, "%s: freed fh = 0x%"PRIx64" but filesystem "
+		      "returned error = %d", __func__, fh, error);
+
+	return error;
+}
 
 static int
 no_access(opc, pcr, mode)
@@ -802,6 +872,19 @@
 	int error;
 	
 	/*
+	 * Special case for ..
+	 */
+	if (PCNISDOTDOT(pcn)) {
+		pn = PERFUSE_NODE_DATA(opc)->pnd_parent;
+		PERFUSE_NODE_DATA(pn)->pnd_flags &= ~PND_RECLAIMED;
+		
+		puffs_newinfo_setcookie(pni, pn);
+		puffs_newinfo_setvtype(pni, VDIR);
+
+		return 0;
+	}
+
+	/*
 	 * XXX This is borrowed from librefuse, 
 	 * and __UNCONST is said to be fixed.
 	 */
@@ -866,6 +949,12 @@
 		if (error != 0)
 			return error;
 
+		error = node_lookup_common(pu, opc, (char*)PCNPATH(pcn), &pn);
+		if (error != 0)	
+			return error;
+
+		opc = (puffs_cookie_t)pn;
+
 		error = perfuse_node_open(pu, opc, FREAD|FWRITE, pcn->pcn_cred);
 		if (error != 0)	
 			return error;
@@ -898,12 +987,26 @@
 	 * so that we can reuse it later
 	 */
 	pn = perfuse_new_pn(pu, opc);
-	perfuse_new_fh((puffs_cookie_t)pn, foo->fh);
+	perfuse_new_fh((puffs_cookie_t)pn, foo->fh, FWRITE);
 	PERFUSE_NODE_DATA(pn)->pnd_ino = feo->nodeid;
 
+#ifdef PERFUSE_DEBUG
+	if (perfuse_diagflags & PDF_FH)
+		DPRINTF("%s: opc = %p, file = \"%s\", "
+			"ino = %"PRId64", rfh = 0x%"PRIx64"\n",
+			__func__, (void *)pn, (char *)PCNPATH(pcn),
+			feo->nodeid, foo->fh);
+#endif
+
 	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
 	puffs_newinfo_setcookie(pni, pn);
 
+	/*
+	 * It seems we need to do this so that glusterfs gets fully
+	 * aware that the file was created. If we do not do it, we 
+	 * get "SETATTR (null) (fuse_loc_fill() failed)"
+	 */
+	(void)node_lookup_common(pu, opc, (char*)PCNPATH(pcn), NULL);
 out: 
 	ps->ps_destroy_msg(pm);
 
@@ -979,8 +1082,10 @@
 	const struct puffs_cred *pcr;
 {
 	struct perfuse_state *ps;
+	struct perfuse_node_data *pnd;
 	perfuse_msg_t *pm;
 	mode_t pmode;
+	mode_t fmode;
 	int op;
 	struct fuse_open_in *foi;
 	struct fuse_open_out *foo;
@@ -988,6 +1093,7 @@
 	int error;
 	
 	ps = puffs_getspecific(pu);
+	pnd = PERFUSE_NODE_DATA(opc);
 
 	pn = (struct puffs_node *)opc;
 	if (puffs_pn_getvap(pn)->va_type == VDIR) {
@@ -995,8 +1101,8 @@
 		pmode = PUFFS_VREAD|PUFFS_VEXEC;
 	} else {
 		op = FUSE_OPEN;
-		if (mode & (O_RDWR|O_WRONLY))
-			pmode = PUFFS_VWRITE;
+		if (mode & FWRITE)
+			pmode = PUFFS_VWRITE|PUFFS_VREAD;
 		else
 			pmode = PUFFS_VREAD;
 	}
@@ -1006,8 +1112,7 @@
 	 * Opening a file requires R-- for reading, -W- for writing
 	 * In both cases, --X is required on the parent.
 	 */
-	if (no_access((puffs_cookie_t)PERFUSE_NODE_DATA(opc)->pnd_parent,
-	    pcr, PUFFS_VEXEC))
+	if (no_access((puffs_cookie_t)pnd->pnd_parent, pcr, PUFFS_VEXEC))
 		return EACCES;
 
 	if (no_access(opc, pcr, pmode))
@@ -1017,23 +1122,30 @@
 	 * libfuse docs say O_CREAT should not be set.
 	 */
 	mode &= ~O_CREAT;
-		
+
+	/*
+	 * Do not open twice, and do not reopen for reading
+	 * if we already have write handle.
+	 * Directories are always open with read access only, 
+	 * whatever flags we get.
+	 */
+	if (op == FUSE_OPENDIR)
+		mode = (mode & ~(FREAD|FWRITE)) | FREAD;
+	if ((mode & FREAD) && (pnd->pnd_flags & PND_RFH))
+		return 0;
+	if ((mode & FWRITE) && (pnd->pnd_flags & PND_WFH))
+		return 0;
+
+	/*
+	 * Convert PUFFS mode to FUSE mode: convert FREAD/FWRITE
+	 * to O_RDONLY/O_WRONLY while perserving the other options.
+	 */
+	fmode = mode & ~(FREAD|FWRITE);
+	fmode |= (mode & FWRITE) ? O_RDWR : O_RDONLY;
+
 	pm = ps->ps_new_msg(pu, opc, op, sizeof(*foi), pcr);
 	foi = GET_INPAYLOAD(ps, pm, fuse_open_in);
-	foi->flags = mode & ~O_ACCMODE; 
-	switch (mode & (FREAD|FWRITE)) {
-	case FREAD|FWRITE:
-		foi->flags |= O_RDWR;
-		break;
-	case FREAD:
-		foi->flags |= O_RDONLY;
-		break;
-	case FWRITE:
-		foi->flags |= O_WRONLY;
-		break;
-	default:
-		break;
-	}
+	foi->flags = fmode;
 	foi->unused = 0;
 
 	if ((error = XCHG_MSG(ps, pu, pm, sizeof(*foo))) != 0)
@@ -1045,15 +1157,16 @@
 	 * Save the file handle in node private data 
 	 * so that we can reuse it later
 	 */
-	perfuse_new_fh((puffs_cookie_t)pn, foo->fh);
+	perfuse_new_fh((puffs_cookie_t)pn, foo->fh, mode);
 
 #ifdef PERFUSE_DEBUG
 	if (perfuse_diagflags & PDF_FH)
 		DPRINTF("%s: opc = %p, file = \"%s\", "
-			"ino = %"PRId64", fh = 0x%"PRIx64"\n",
+			"ino = %"PRId64", %s%sfh = 0x%"PRIx64"\n",
 			__func__, (void *)opc, 
 			(char *)PNPATH((struct puffs_node *)opc),
-			PERFUSE_NODE_DATA(opc)->pnd_ino, foo->fh);
+			pnd->pnd_ino, mode & FREAD ? "r" : "",
+			mode & FWRITE ? "w" : "", foo->fh);
 #endif
 out:
 	ps->ps_destroy_msg(pm);
@@ -1061,7 +1174,7 @@
 	return error;
 }
 
-/* ARGSUSED2 */
+/* ARGSUSED0 */
 int
 perfuse_node_close(pu, opc, flags, pcr)
 	struct puffs_usermount *pu;
@@ -1069,95 +1182,34 @@
 	int flags;
 	const struct puffs_cred *pcr;
 {
-	struct perfuse_state *ps;
-	perfuse_msg_t *pm;
-	int op;
-	uint64_t fh;
-	struct perfuse_node_data *pnd;
-	struct fuse_release_in *fri;
 	struct puffs_node *pn;
-	int error;
-	
-	ps = puffs_getspecific(pu);
-	pn = (struct puffs_node *)opc;
-	pnd = PERFUSE_NODE_DATA(pn);
+	struct perfuse_node_data *pnd;
 
-	if (puffs_pn_getvap(pn)->va_type == VDIR)
-		op = FUSE_RELEASEDIR;
-	else
-		op = FUSE_RELEASE;
+	pn = (struct puffs_node *)opc;
+	pnd = PERFUSE_NODE_DATA(opc);
 
 	if (!(pnd->pnd_flags & PND_OPEN))
 		return EBADF;
 
-	fh = perfuse_get_fh(opc);
-
 	/*
-	 * Sync before close for files
-	 */
-	if ((op == FUSE_RELEASE) && (pnd->pnd_flags & PND_DIRTY)) {
-#ifdef PERFUSE_DEBUG
-		if (perfuse_diagflags & PDF_SYNC)
-			DPRINTF("%s: SYNC opc = %p, file = \"%s\"\n", 
-				__func__, (void*)opc, (char *)PNPATH(pn));
-#endif
-		if ((error = perfuse_node_fsync(pu, opc, pcr, 0, 0, 0)) != 0)
-			return error;
-
-		pnd->pnd_flags &= ~PND_DIRTY;
-
-#ifdef PERFUSE_DEBUG
-		if (perfuse_diagflags & PDF_SYNC)
-			DPRINTF("%s: CLEAR opc = %p, file = \"%s\"\n", 
-				__func__, (void*)opc, (char *)PNPATH((pn)));
-#endif
-	}
-
-	/*
-	 * Destroy the filehandle before sending the 
-	 * request to the FUSE filesystem, otherwise 
-	 * we may get a second close() while we wait
-	 * for the reply, and we would end up closing
-	 * the same fh twice instead of closng both.
-	 */
-	perfuse_destroy_fh(pn, fh);
-
-#ifdef PERFUSE_DEBUG
-	if (perfuse_diagflags & PDF_FH)
-		DPRINTF("%s: opc = %p, ino = %"PRId64", fh = 0x%"PRIx64"\n",
-			__func__, (void *)opc, pnd->pnd_ino, fh);
-#endif
+	 * Make sure all operation are finished
+	 * There can be an ongoing write, or queued operations
+	 * XXX perhaps deadlock. Use requeue_request
+	 */
+	while ((pnd->pnd_flags & PND_BUSY) ||
+	       !TAILQ_EMPTY(&pnd->pnd_pcq))
+		puffs_cc_yield(puffs_cc_getcc(pu));
 
-	/*
-	 * release_flags may be set to FUSE_RELEASE_FLUSH
-	 * to flush locks. lock_owner must be set in that case
+	/* 
+	 * The NetBSD kernel will send sync and setattr(mtime, ctime)
+	 * afer a close on a regular file. Some FUSE filesystem will 
+	 * assume theses operations are performed on open files. We 
+	 * therefore postpone the close operation at reclaim time.
 	 */
-	pm = ps->ps_new_msg(pu, opc, op, sizeof(*fri), NULL);
-	fri = GET_INPAYLOAD(ps, pm, fuse_release_in);
-	fri->fh = fh;
-	fri->flags = 0;
-	fri->release_flags = 0;
-	fri->lock_owner = PERFUSE_NODE_DATA(pn)->pnd_lock_owner;
-	fri->flags = (fri->lock_owner != 0) ? FUSE_RELEASE_FLUSH : 0;
-
-#ifdef PERFUSE_DEBUG
-	if (perfuse_diagflags & PDF_FH)
-		DPRINTF("%s: opc = %p, ino = %"PRId64", fh = 0x%"PRIx64"\n",
-			 __func__, (void *)opc, pnd->pnd_ino, fri->fh);
-#endif
-
-	if ((error = XCHG_MSG(ps, pu, pm, NO_PAYLOAD_REPLY_LEN)) != 0)
-		goto out;
-
-out:
-	if (error != 0)
-		DWARNX("%s: freed fh = 0x%"PRIx64" but filesystem "
-		       "returned error = %d",
-		       __func__, fh, error);
+	if (puffs_pn_getvap(pn)->va_type != VREG)
+		return node_close_common(pu, opc, flags);
 
-	ps->ps_destroy_msg(pm);
-
-	return error;
+	return 0;
 }
 
 int
@@ -1200,7 +1252,7 @@
 		fgi = GET_INPAYLOAD(ps, pm, fuse_getattr_in);
 		fgi->getattr_flags = 0; 
 		fgi->dummy = 0;
-		fgi->fh = perfuse_get_fh(opc);
+		fgi->fh = perfuse_get_fh(opc, FREAD);
 
 #ifdef PERFUSE_DEBUG
 		if (perfuse_diagflags & PDF_FH)
@@ -1288,15 +1340,20 @@
 	perfuse_msg_t *pm;
 	uint64_t fh;
 	struct perfuse_state *ps;
+	struct perfuse_node_data *pnd;
 	struct fuse_setattr_in *fsi;
 	int error;
+	int open_self;
 	struct vattr *old_va;
 
+	open_self = 0;
+	ps = puffs_getspecific(pu);
+	pnd = PERFUSE_NODE_DATA(opc);
+
 	/*
 	 * setattr requires --X on the parent directory
 	 */
-	if (no_access((puffs_cookie_t)PERFUSE_NODE_DATA(opc)->pnd_parent,
-	    pcr, PUFFS_VEXEC))
+	if (no_access((puffs_cookie_t)pnd->pnd_parent, pcr, PUFFS_VEXEC))
 		return EACCES;
 
 	old_va = puffs_pn_getvap((struct puffs_node *)opc);
@@ -1331,21 +1388,38 @@
 	 */
 	if ((vap->va_mode != (mode_t)PUFFS_VNOVAL) &&
 	    (puffs_access_chmod(old_va->va_uid, old_va->va_gid,
-				 old_va->va_type, vap->va_mode, pcr)) != 0)
+				old_va->va_type, vap->va_mode, pcr)) != 0)
 		return EACCES;
 	
+	/*
+	 * setattr(mtime, ctime) require an open file,
+	 * at least for glusterfs.
+	 */
+	if (((vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL) ||
+	     (vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL)) &&
+	    !(pnd->pnd_flags & PND_WFH)) {
+		if ((error = perfuse_node_open(pu, opc, FWRITE, pcr)) != 0)
+			return error;
+		open_self = 1;
+	}
+	/*
+	 * It seems troublesome to resize a file while
+	 * a write is just beeing done. Wait for
+	 * it to finish.
+	 */
+	if (vap->va_size != (u_quad_t)PUFFS_VNOVAL)
+		while (pnd->pnd_flags & PND_INWRITE)
+			requeue_request(pu, opc, PCQ_AFTERWRITE);
 
-	ps = puffs_getspecific(pu);
 
 	pm = ps->ps_new_msg(pu, opc, FUSE_SETATTR, sizeof(*fsi), pcr);
 	fsi = GET_INPAYLOAD(ps, pm, fuse_setattr_in);
 	fsi->valid = 0;
 
-	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_OPEN) {
-		fh = perfuse_get_fh(opc);
+	if (pnd->pnd_flags & PND_WFH) {
+		fh = perfuse_get_fh(opc, FWRITE);
 		fsi->fh = fh;
-		if (fh != FUSE_UNKNOWN_FH)
-			fsi->valid |= FUSE_FATTR_FH;
+		fsi->valid |= FUSE_FATTR_FH;
 	}
 
 	if (vap->va_size != (u_quad_t)PUFFS_VNOVAL) {
@@ -1380,8 +1454,8 @@
 		fsi->valid |= FUSE_FATTR_GID;
 	}
 
-	if (PERFUSE_NODE_DATA(opc)->pnd_lock_owner != 0) {
-		fsi->lock_owner = PERFUSE_NODE_DATA(opc)->pnd_lock_owner;
+	if (pnd->pnd_lock_owner != 0) {
+		fsi->lock_owner = pnd->pnd_lock_owner;
 		fsi->valid |= FUSE_FATTR_LOCKOWNER;
 	}
 
@@ -1392,6 +1466,9 @@
 
 	ps->ps_destroy_msg(pm);
 
+	if (open_self)
+		(void)perfuse_node_close(pu, opc, FWRITE, pcr);
+
 	return error;
 }
 
@@ -1413,7 +1490,7 @@
  	 */
 	pm = ps->ps_new_msg(pu, opc, FUSE_POLL, sizeof(*fpi), NULL);
 	fpi = GET_INPAYLOAD(ps, pm, fuse_poll_in);
-	fpi->fh = perfuse_get_fh(opc);
+	fpi->fh = perfuse_get_fh(opc, FREAD);
 	fpi->kh = 0;
 	fpi->flags = 0;
 
@@ -1479,7 +1556,7 @@
 
 	/*
 	 * Do not sync if there are no change to sync
-	 * XXX remove that testif we implement mmap
+	 * XXX remove that test if we implement mmap
 	 */
 	pnd = PERFUSE_NODE_DATA(opc);
 #ifdef PERFUSE_DEBUG
@@ -1498,12 +1575,12 @@
 	 * "FSYNC() ERR => -1 (Invalid argument)"
 	 */
 	if (!(pnd->pnd_flags & PND_OPEN)) {
-		if ((error = perfuse_node_open(pu, opc, FREAD, pcr)) != 0)
+		if ((error = perfuse_node_open(pu, opc, FWRITE, pcr)) != 0)
 			goto out;
 		open_self = 1;
 	}
 
-	fh = perfuse_get_fh(opc);
+	fh = perfuse_get_fh(opc, FWRITE);
 	
 	/*
 	 * If fsync_flags  is set, meta data should not be flushed.
@@ -1543,8 +1620,8 @@
 	if (pm != NULL)
 		ps->ps_destroy_msg(pm);
 
-	if (open_self)
-		(void)perfuse_node_close(pu, opc, 0, pcr);
+	if (open_self) 
+		(void)node_close_common(pu, opc, FWRITE);
 
 	return error;
 }
@@ -1576,26 +1653,28 @@
 	const struct puffs_cn *pcn;
 {
 	struct perfuse_state *ps;
-	perfuse_msg_t *pm;
 	struct puffs_node *pn;
+	struct perfuse_node_data *pnd;
+	perfuse_msg_t *pm;
 	char *path;
 	const char *name;
 	size_t len;
 	int error;
 	
+	pnd = PERFUSE_NODE_DATA(opc);
+
 	/*
 	 * remove requires -WX on the parent directory 
 	 * no right required on the object.
 	 */
-	if (no_access((puffs_cookie_t)PERFUSE_NODE_DATA(opc)->pnd_parent,
+	if (no_access((puffs_cookie_t)pnd->pnd_parent,
 	    pcn->pcn_cred, PUFFS_VWRITE|PUFFS_VEXEC))
 		return EACCES;
 
-	ps = puffs_getspecific(pu);
-
 	if (targ == NULL)
 		DERRX(EX_SOFTWARE, "%s: targ is NULL", __func__);
 
+	ps = puffs_getspecific(pu);
 	pn = (struct puffs_node *)targ;
 	name = basename_r((char *)PNPATH(pn));
 	len = strlen(name) + 1;
@@ -1610,9 +1689,6 @@
 	if (puffs_inval_namecache_dir(pu, opc) != 0)
 		DERR(EX_OSERR, "puffs_inval_namecache_dir failed");
 
-	if (puffs_inval_pagecache_node(pu, (puffs_cookie_t)pn) != 0)
-		DERR(EX_OSERR, "puffs_inval_namecache_node failed");
-
 	puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N2);
 
 	/*
@@ -1767,6 +1843,7 @@
 	const struct puffs_cn *pcn;
 {
 	struct perfuse_state *ps;
+	struct perfuse_node_data *pnd;
 	perfuse_msg_t *pm;
 	struct puffs_node *pn;
 	char *path;
@@ -1774,11 +1851,13 @@
 	size_t len;
 	int error;
 	
+	pnd = PERFUSE_NODE_DATA(opc);
+
 	/*
 	 * remove requires -WX on the parent directory 
 	 * no right required on the object.
 	 */
-	if (no_access((puffs_cookie_t)PERFUSE_NODE_DATA(opc)->pnd_parent,
+	if (no_access((puffs_cookie_t)pnd->pnd_parent,
 	    pcn->pcn_cred, PUFFS_VWRITE|PUFFS_VEXEC))
 		return EACCES;
 
@@ -1797,9 +1876,6 @@
 	if (puffs_inval_namecache_dir(pu, opc) != 0)
 		DERR(EX_OSERR, "puffs_inval_namecache_dir failed");
 
-	if (puffs_inval_pagecache_node(pu, (puffs_cookie_t)pn) != 0)
-		DERR(EX_OSERR, "puffs_inval_namecache_node failed");
-
 	puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N2);
 
 out:
@@ -1905,17 +1981,17 @@
 	 * It seems NetBSD can call readdir without open first
 	 * libfuse will crash if it is done that way, hence open first.
 	 */
-	if (!(PERFUSE_NODE_DATA(opc)->pnd_flags & PND_OPEN)) {
+	if (!(pnd->pnd_flags & PND_OPEN)) {
 		if ((error = perfuse_node_open(pu, opc, FREAD, pcr)) != 0)
 			goto out;
 		open_self = 1;
 	}
 
-	fh = perfuse_get_fh(opc);
+	fh = perfuse_get_fh(opc, FREAD);
 
 #ifdef PERFUSE_DEBUG
 	if (perfuse_diagflags & PDF_FH)
-		DPRINTF("%s: opc = %p, ino = %"PRId64", fh = 0x%"PRIx64"\n",
+		DPRINTF("%s: opc = %p, ino = %"PRId64", rfh = 0x%"PRIx64"\n",
 			__func__, (void *)opc,
 			PERFUSE_NODE_DATA(opc)->pnd_ino, fh);
 #endif
@@ -2013,7 +2089,7 @@
 	 * errors are ignored.
 	 */
 	if (open_self)
-		(void)perfuse_node_close(pu, opc, 0, pcr);
+		(void)perfuse_node_close(pu, opc, FWRITE, pcr);
 
 	if (error == 0)
 		error = readdir_buffered(ps, opc, dent, readoff,
@@ -2092,12 +2168,6 @@
 	pnd = PERFUSE_NODE_DATA(opc);
 
 	/*
-	 * Make sure open files are properly closed when reclaimed.
-	 */
-	while (pnd->pnd_flags & PND_OPEN)
-		(void)perfuse_node_close(pu, opc, 0, NULL);
-		
-	/*
 	 * Never forget the root.
 	 */
 	if (pnd->pnd_ino == FUSE_ROOT_ID)
@@ -2121,15 +2191,17 @@
 #ifdef PERFUSE_DEBUG
 	if (perfuse_diagflags & PDF_RECLAIM)
 		DPRINTF("%s (nodeid %"PRId64") is %sreclaimed, "
-			"has childcount %d, %sopen\n", 
+			"has childcount %d %s%s%s, pending ops:%s%s%s%s\n", 
 		        (char *)PNPATH(pn), pnd->pnd_ino,
 		        pnd->pnd_flags & PND_RECLAIMED ? "" : "not ",
 		        pnd->pnd_childcount,
-			pnd->pnd_flags & PND_OPEN ? "" : "not ");
-
-	if (pnd->pnd_flags & PND_OPEN)
-		DWARNX("%s: (nodeid %"PRId64") %s is still open",
-		       __func__, pnd->pnd_ino, (char *)PNPATH(pn));
+			pnd->pnd_flags & PND_OPEN ? "open " : "not open",
+			pnd->pnd_flags & PND_RFH ? "r" : "",
+			pnd->pnd_flags & PND_WFH ? "w" : "",
+			pnd->pnd_flags & PND_INREADDIR ? " readdir" : "",
+			pnd->pnd_flags & PND_INREAD ? " read" : "",
+			pnd->pnd_flags & PND_INWRITE ? " write" : "",
+			pnd->pnd_flags & PND_BUSY ? "" : " none");
 #endif
 
 		if (!(pnd->pnd_flags & PND_RECLAIMED) ||
@@ -2137,12 +2209,38 @@
 			return 0;
 
 		/*
-		 * If the file is still open, close all file handles
-		 * XXX no pcr arguement to send.
+		 * Make sure all operation are finished
+		 * There can be an ongoing write, or queued operations
 		 */
-		while(pnd->pnd_flags & PND_OPEN)
-			(void)perfuse_node_close(pu, opc, 0, NULL);
+		while (pnd->pnd_flags & PND_INWRITE) {
+			requeue_request(pu, opc, PCQ_AFTERWRITE);
+
+			/*
+			 * It may have been cancelled in the meantime
+			 */
+			if (!(pnd->pnd_flags & PND_RECLAIMED))
+				return 0;
+		}
+
+#ifdef PERFUSE_DEBUG
+		if ((pnd->pnd_flags & PND_BUSY) ||
+		       !TAILQ_EMPTY(&pnd->pnd_pcq))
+			DERRX(EX_SOFTWARE, "%s: opc = %p: ongoing operations",
+			      __func__, (void *)opc);
+#endif
 
+		/*
+		 * Close open files
+		 */
+		if (pnd->pnd_flags & PND_WFH)
+			(void)node_close_common(pu, opc, FREAD);
+
+		if (pnd->pnd_flags & PND_RFH)
+			(void)node_close_common(pu, opc, FWRITE);
+
+		/*
+		 * And send the FORGET message
+		 */
 		pm = ps->ps_new_msg(pu, (puffs_cookie_t)pn, FUSE_FORGET, 
 			      sizeof(*ffi), NULL);
 		ffi = GET_INPAYLOAD(ps, pm, fuse_forget_in);
@@ -2223,7 +2321,7 @@
 			
 	pm = ps->ps_new_msg(pu, opc, fop, sizeof(*fli), NULL);
 	fli = GET_INPAYLOAD(ps, pm, fuse_lk_in);
-	fli->fh = perfuse_get_fh(opc);
+	fli->fh = perfuse_get_fh(opc, FWRITE);
 	fli->owner = fl->l_pid;
 	fli->lk.start = fl->l_start;
 	fli->lk.end = fl->l_start + fl->l_len;
@@ -2279,6 +2377,7 @@
 	int ioflag;
 {
 	struct perfuse_state *ps;
+	struct perfuse_node_data *pnd;
 	perfuse_msg_t *pm;
 	struct fuse_read_in *fri;
 	struct fuse_out_header *foh;
@@ -2287,8 +2386,14 @@
 	int error;
 	
 	ps = puffs_getspecific(pu);
+	pnd = PERFUSE_NODE_DATA(opc);
 	pm = NULL;
 
+	if (puffs_pn_getvap((struct puffs_node *)opc)->va_type == VDIR) 
+		return EBADF;
+
+	pnd->pnd_flags |= PND_INREAD;
+
 	requested = *resid;
 	if ((ps->ps_readahead + requested) > ps->ps_max_readahead) {
 		if (perfuse_diagflags & PDF_REQUEUE)
@@ -2307,19 +2412,18 @@
 		 */
 		pm = ps->ps_new_msg(pu, opc, FUSE_READ, sizeof(*fri), pcr);
 		fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
-		fri->fh = perfuse_get_fh(opc);
+		fri->fh = perfuse_get_fh(opc, FREAD);
 		fri->offset = offset;
 		fri->size = (uint32_t)MIN(*resid, PAGE_SIZE - sizeof(*foh));
 		fri->read_flags = 0; /* XXX Unused by libfuse? */
-		fri->lock_owner = PERFUSE_NODE_DATA(opc)->pnd_lock_owner;
+		fri->lock_owner = pnd->pnd_lock_owner;
 		fri->flags = 0;
 		fri->flags |= (fri->lock_owner != 0) ? FUSE_READ_LOCKOWNER : 0;
 
 #ifdef PERFUSE_DEBUG
 	if (perfuse_diagflags & PDF_FH)
 		DPRINTF("%s: opc = %p, ino = %"PRId64", fh = 0x%"PRIx64"\n",
-			__func__, (void *)opc,
-			PERFUSE_NODE_DATA(opc)->pnd_ino, fri->fh);
+			__func__, (void *)opc, pnd->pnd_ino, fri->fh);
 #endif
 		error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN);
 
@@ -2351,6 +2455,8 @@
 	ps->ps_readahead -= requested;
 	dequeue_requests(ps, opc, PCQ_READ, 1);
 
+	pnd->pnd_flags &= ~PND_INREAD;
+
 	return error;
 }
 
@@ -2365,6 +2471,7 @@
 	int ioflag;
 {
 	struct perfuse_state *ps;
+	struct perfuse_node_data *pnd;
 	perfuse_msg_t *pm;
 	struct fuse_write_in *fwi;
 	struct fuse_write_out *fwo;
@@ -2375,9 +2482,16 @@
 	int error;
 	
 	ps = puffs_getspecific(pu);
+	pnd = PERFUSE_NODE_DATA(opc);
 	pm = NULL;
 	written = 0;
 
+	if (puffs_pn_getvap((struct puffs_node *)opc)->va_type == VDIR) 
+		return EBADF;
+
+DPRINTF("%s ENTER\n", __func__);
+	pnd->pnd_flags |= PND_INWRITE;
+
 	requested = *resid;
 	if ((ps->ps_write + requested) > ps->ps_max_write) {
 		if (perfuse_diagflags & PDF_REQUEUE)
@@ -2403,11 +2517,11 @@
 		 */
 		pm = ps->ps_new_msg(pu, opc, FUSE_WRITE, payload_len, pcr);
 		fwi = GET_INPAYLOAD(ps, pm, fuse_write_in);
-		fwi->fh = perfuse_get_fh(opc);
+		fwi->fh = perfuse_get_fh(opc, FWRITE);
 		fwi->offset = offset;
 		fwi->size = (uint32_t)data_len;
 		fwi->write_flags = (fwi->size % PAGE_SIZE) ? 0 : 1;
-		fwi->lock_owner = PERFUSE_NODE_DATA(opc)->pnd_lock_owner;
+		fwi->lock_owner = pnd->pnd_lock_owner;
 		fwi->flags = 0;
 		fwi->flags |= (fwi->lock_owner != 0) ? FUSE_WRITE_LOCKOWNER : 0;
 		fwi->flags |= (ioflag & IO_DIRECT) ? 0 : FUSE_WRITE_CACHE; 
@@ -2416,8 +2530,7 @@
 #ifdef PERFUSE_DEBUG
 	if (perfuse_diagflags & PDF_FH)
 		DPRINTF("%s: opc = %p, ino = %"PRId64", fh = 0x%"PRIx64"\n",
-			__func__, (void *)opc,
-			PERFUSE_NODE_DATA(opc)->pnd_ino, fwi->fh);
+			__func__, (void *)opc, pnd->pnd_ino, fwi->fh);
 #endif
 		if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fwo))) != 0)
 			goto out;
@@ -2447,7 +2560,7 @@
 	/*
 	 * Remember to sync the file
 	 */
-	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
+	pnd->pnd_flags |= PND_DIRTY;
 
 #ifdef PERFUSE_DEBUG
 	if (perfuse_diagflags & PDF_SYNC)
@@ -2462,6 +2575,13 @@
 	ps->ps_write -= requested;
 	dequeue_requests(ps, opc, PCQ_WRITE, 1);
 
+	pnd->pnd_flags &= ~PND_INWRITE;
+
+	/*
+	 * Dequeue operation that were waiting for write to complete
+	 */ 
+	dequeue_requests(ps, opc, PCQ_AFTERWRITE, DEQUEUE_ALL);
+
 	return error;
 }
 

Index: src/lib/libperfuse/perfuse_priv.h
diff -u src/lib/libperfuse/perfuse_priv.h:1.4 src/lib/libperfuse/perfuse_priv.h:1.5
--- src/lib/libperfuse/perfuse_priv.h:1.4	Wed Sep  1 14:57:24 2010
+++ src/lib/libperfuse/perfuse_priv.h	Fri Sep  3 07:15:18 2010
@@ -1,4 +1,4 @@
-/*  $NetBSD: perfuse_priv.h,v 1.4 2010/09/01 14:57:24 manu Exp $ */
+/*  $NetBSD: perfuse_priv.h,v 1.5 2010/09/03 07:15:18 manu Exp $ */
 
 /*-
  *  Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved.
@@ -71,13 +71,7 @@
 };
 
 
-struct perfuse_file_handle {
-	uint64_t pfh_fh;
-	TAILQ_ENTRY(perfuse_file_handle) pfh_entries;
-};
-
-
-enum perfuse_qtype { PCQ_READDIR, PCQ_READ, PCQ_WRITE };
+enum perfuse_qtype { PCQ_READDIR, PCQ_READ, PCQ_WRITE, PCQ_AFTERWRITE };
 
 struct perfuse_cc_queue {
 	enum perfuse_qtype pcq_type;
@@ -87,7 +81,8 @@
 
 
 struct perfuse_node_data {
-	TAILQ_HEAD(,perfuse_file_handle) pnd_fh;
+	uint64_t pnd_rfh;
+	uint64_t pnd_wfh;
 	uint64_t pnd_ino;			/* inode */
 	uint64_t pnd_nlookup;			/* vnode refcount */
 	uint64_t pnd_offset;			/* seek state */
@@ -98,10 +93,16 @@
 	size_t pnd_all_fd_len;
 	TAILQ_HEAD(,perfuse_cc_queue) pnd_pcq;	/* queued requests */
 	int pnd_flags;
-#define PND_RECLAIMED		0x1	/* reclaim pending */
-#define PND_INREADDIR		0x2	/* readdir in progress */
-#define PND_OPEN		0x4	/* At least one fh is allocated */
-#define PND_DIRTY		0x8	/* There is some data to sync */
+#define PND_RECLAIMED		0x01	/* reclaim pending */
+#define PND_INREADDIR		0x02	/* readdir in progress */
+#define PND_DIRTY		0x04	/* There is some data to sync */
+#define PND_RFH			0x08	/* Read FH allocated */
+#define PND_WFH			0x10	/* Write FH allocated */
+#define PND_INREAD		0x20	/* read in progress */
+#define PND_INWRITE		0x40	/* write in progress */
+
+#define PND_OPEN		(PND_RFH|PND_WFH)	/* File is open */
+#define PND_BUSY		(PND_INREADDIR|PND_INREAD|PND_INWRITE)
 	puffs_cookie_t pnd_parent;
 	int pnd_childcount;
 };
@@ -131,9 +132,9 @@
 struct puffs_node *perfuse_new_pn(struct puffs_usermount *, 
     struct puffs_node *);
 void perfuse_destroy_pn(struct puffs_node *);
-void perfuse_new_fh(puffs_cookie_t, uint64_t);
+void perfuse_new_fh(puffs_cookie_t, uint64_t, int);
 void perfuse_destroy_fh(puffs_cookie_t, uint64_t);
-uint64_t perfuse_get_fh(puffs_cookie_t);
+uint64_t perfuse_get_fh(puffs_cookie_t, int);
 uint64_t perfuse_next_unique(struct puffs_usermount *);
 
 char *perfuse_fs_mount(int, ssize_t);

Index: src/lib/libperfuse/subr.c
diff -u src/lib/libperfuse/subr.c:1.3 src/lib/libperfuse/subr.c:1.4
--- src/lib/libperfuse/subr.c:1.3	Wed Sep  1 14:57:24 2010
+++ src/lib/libperfuse/subr.c	Fri Sep  3 07:15:18 2010
@@ -1,4 +1,4 @@
-/*  $NetBSD: subr.c,v 1.3 2010/09/01 14:57:24 manu Exp $ */
+/*  $NetBSD: subr.c,v 1.4 2010/09/03 07:15:18 manu Exp $ */
 
 /*-
  *  Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved.
@@ -51,7 +51,8 @@
 		DERR(EX_SOFTWARE, "puffs_pn_new failed");
 
 	(void)memset(pnd, 0, sizeof(*pnd));
-	TAILQ_INIT(&pnd->pnd_fh);
+	pnd->pnd_rfh = FUSE_UNKNOWN_FH;
+	pnd->pnd_wfh = FUSE_UNKNOWN_FH;
 	pnd->pnd_ino = PERFUSE_UNKNOWN_INO;
 	pnd->pnd_nlookup = 1;
 	pnd->pnd_parent = parent;
@@ -79,8 +80,8 @@
 		if (pnd->pnd_all_fd != NULL)
 			free(pnd->pnd_all_fd);
 #ifdef PERFUSE_DEBUG
-		if (!TAILQ_EMPTY(&pnd->pnd_fh))
-			DERRX(EX_SOFTWARE, "%s: non empty pnd_fh", __func__);
+		if (pnd->pnd_flags & PND_OPEN)
+			DERRX(EX_SOFTWARE, "%s: file open", __func__);
 
 		if (!TAILQ_EMPTY(&pnd->pnd_pcq))
 			DERRX(EX_SOFTWARE, "%s: non empty pnd_pcq", __func__);
@@ -96,25 +97,30 @@
 
 
 void
-perfuse_new_fh(opc, fh)
+perfuse_new_fh(opc, fh, mode)
 	puffs_cookie_t opc;
 	uint64_t fh;
+	int mode;
 {
 	struct perfuse_node_data *pnd;
-	struct perfuse_file_handle *pfh;
-
-	if (fh == FUSE_UNKNOWN_FH)
-		return;
 
 	pnd = PERFUSE_NODE_DATA(opc);
-	pnd->pnd_flags |= PND_OPEN;
-
-	if ((pfh = malloc(sizeof(*pfh))) == NULL)
-		DERR(EX_OSERR, "malloc failed");
-
-	pfh->pfh_fh = fh;
 
-	TAILQ_INSERT_TAIL(&pnd->pnd_fh, pfh, pfh_entries);
+	if (mode & FWRITE) {
+		if (pnd->pnd_flags & PND_WFH)
+			DERRX(EX_SOFTWARE, "%s: opc = %p, write fh already set",
+			      __func__, (void *)opc);	
+		pnd->pnd_wfh = fh;
+		pnd->pnd_flags |= PND_WFH;
+	} 
+
+	if (mode & FREAD) {
+		if (pnd->pnd_flags & PND_RFH)
+			DERRX(EX_SOFTWARE, "%s: opc = %p, read fh already set",
+			      __func__, (void *)opc);	
+		pnd->pnd_rfh = fh;
+		pnd->pnd_flags |= PND_RFH;
+	}
 
 	return;
 }
@@ -125,42 +131,51 @@
 	uint64_t fh; 
 {
 	struct perfuse_node_data *pnd;
-	struct perfuse_file_handle *pfh;
 
 	pnd = PERFUSE_NODE_DATA(opc);
 
-	TAILQ_FOREACH(pfh, &pnd->pnd_fh, pfh_entries) {
-		if (pfh->pfh_fh == fh) {
-			TAILQ_REMOVE(&pnd->pnd_fh, pfh, pfh_entries);
-			free(pfh);
-			break;
-		}
+	if (fh == pnd->pnd_rfh) {
+		if (!(pnd->pnd_flags & PND_RFH) && (fh != FUSE_UNKNOWN_FH))
+			DERRX(EX_SOFTWARE, 
+			      "%s: opc = %p, unset rfh = %"PRIx64"",
+			      __func__, (void *)opc, fh);	
+		pnd->pnd_rfh = FUSE_UNKNOWN_FH;
+		pnd->pnd_flags &= ~PND_RFH;
 	}
 
-	if (TAILQ_EMPTY(&pnd->pnd_fh))
-		pnd->pnd_flags &= ~PND_OPEN;
+	if (fh == pnd->pnd_wfh) {
+		if (!(pnd->pnd_flags & PND_WFH) && (fh != FUSE_UNKNOWN_FH))
+			DERRX(EX_SOFTWARE,
+			      "%s: opc = %p, unset wfh = %"PRIx64"",
+			      __func__, (void *)opc, fh);	
+		pnd->pnd_wfh = FUSE_UNKNOWN_FH;
+		pnd->pnd_flags &= ~PND_WFH;
+	} 
 
-	if (pfh == NULL)
-		DERRX(EX_SOFTWARE, 
-		      "%s: unexistant fh = %"PRId64" (double close?)",
-		      __func__, fh);
-	
 	return;
 }
 
 uint64_t
-perfuse_get_fh(opc)
+perfuse_get_fh(opc, mode)
 	puffs_cookie_t opc;
+	int mode;
 {
 	struct perfuse_node_data *pnd;
-	struct perfuse_file_handle *pfh;
-	uint64_t fh = FUSE_UNKNOWN_FH;
 
 	pnd = PERFUSE_NODE_DATA(opc);
 
-	if ((pfh = TAILQ_FIRST(&pnd->pnd_fh)) != NULL)
-		fh = pfh->pfh_fh;;
+	if (mode & FWRITE) 
+		if (pnd->pnd_flags & PND_WFH)
+			return pnd->pnd_wfh;
+
+	if (mode & FREAD) {
+		if (pnd->pnd_flags & PND_RFH)
+			return pnd->pnd_rfh;
+
+		if (pnd->pnd_flags & PND_WFH)
+			return pnd->pnd_wfh;
+	}
 
-	return fh;
+	return FUSE_UNKNOWN_FH;
 }
 

Reply via email to