Module Name: src Committed By: manu Date: Mon Sep 20 07:00:22 UTC 2010
Modified Files: src/lib/libperfuse: ops.c perfuse.c perfuse_priv.h subr.c Log Message: - performance improvement for read, readdir and write. Now we use SOCK_DGRAM, we can send many pages at once without hitting any bug - when creating a file, it is open for FUSE, but not for the kernel. If the kernel does not do a subsequent open, we have a leak. We fight against this by trying to close such file that the kernel left unopen for some time. - some code refactoring to make message exchange debug easier (more to come) To generate a diff of this commit: cvs rdiff -u -r1.15 -r1.16 src/lib/libperfuse/ops.c cvs rdiff -u -r1.6 -r1.7 src/lib/libperfuse/perfuse.c cvs rdiff -u -r1.10 -r1.11 src/lib/libperfuse/perfuse_priv.h cvs rdiff -u -r1.4 -r1.5 src/lib/libperfuse/subr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/lib/libperfuse/ops.c diff -u src/lib/libperfuse/ops.c:1.15 src/lib/libperfuse/ops.c:1.16 --- src/lib/libperfuse/ops.c:1.15 Wed Sep 15 01:51:43 2010 +++ src/lib/libperfuse/ops.c Mon Sep 20 07:00:21 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: ops.c,v 1.15 2010/09/15 01:51:43 manu Exp $ */ +/* $NetBSD: ops.c,v 1.16 2010/09/20 07:00:21 manu Exp $ */ /*- * Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved. @@ -43,7 +43,8 @@ extern int perfuse_diagflags; -static int node_close_common(struct puffs_usermount *, puffs_cookie_t, int); +static int xchg_msg(struct puffs_usermount *, puffs_cookie_t, + perfuse_msg_t *, size_t, enum perfuse_xchg_pb_reply); static int no_access(puffs_cookie_t, const struct puffs_cred *, mode_t); static void fuse_attr_to_vap(struct perfuse_state *, struct vattr *, struct fuse_attr *); @@ -94,8 +95,8 @@ #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12]) #define VTTOIF(indx) (vttoif_tab[(int)(indx)]) -static int -node_close_common(pu, opc, mode) +int +perfuse_node_close_common(pu, opc, mode) struct puffs_usermount *pu; puffs_cookie_t opc; int mode; @@ -148,7 +149,8 @@ __func__, (void *)opc, pnd->pnd_ino, fri->fh); #endif - if ((error = XCHG_MSG(ps, pu, pm, NO_PAYLOAD_REPLY_LEN)) != 0) + if ((error = xchg_msg(pu, opc, pm, + NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0) goto out; ps->ps_destroy_msg(pm); @@ -163,6 +165,30 @@ return error; } +/* ARGSUSED1 */ +static int +xchg_msg(pu, opc, pm, len, wait) + struct puffs_usermount *pu; + puffs_cookie_t opc; + perfuse_msg_t *pm; + size_t len; + enum perfuse_xchg_pb_reply wait; +{ + struct perfuse_state *ps; + int error; + + ps = puffs_getspecific(pu); + +#ifdef PERFUSE_DEBUG + if ((perfuse_diagflags & PDF_FUSE) && (opc != 0)) + DPRINTF("file = \"%s\"\n", + (char *)PNPATH((struct puffs_node *)opc)); +#endif + error = ps->ps_xchg_msg(pu, pm, len, wait); + + return error; +} + static int no_access(opc, pcr, mode) puffs_cookie_t opc; @@ -285,7 +311,7 @@ pm = ps->ps_new_msg(pu, opc, FUSE_LOOKUP, len, NULL); (void)strlcpy(_GET_INPAYLOAD(ps, pm, char *), path, len); - if ((error = XCHG_MSG(ps, pu, pm, sizeof(*feo))) != 0) + if ((error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply)) != 0) goto out; feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out); @@ -327,7 +353,7 @@ ps = puffs_getspecific(pu); - if ((error = XCHG_MSG(ps, pu, pm, sizeof(*feo))) != 0) + if ((error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply)) != 0) goto out; feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out); @@ -357,7 +383,8 @@ /* * A fuse_attr_out is returned, but we ignore it. */ - error = XCHG_MSG(ps, pu, pm, sizeof(struct fuse_attr_out)); + error = xchg_msg(pu, (puffs_cookie_t)pn, + pm, sizeof(struct fuse_attr_out), wait_reply); /* * The parent directory needs a sync @@ -604,9 +631,6 @@ ps = perfuse_getspecific(pu); #endif - /* - * XXX Add a lock he day we go multithreaded - */ pnd = PERFUSE_NODE_DATA(opc); pcq.pcq_type = type; pcq.pcq_cc = puffs_cc_getcc(pu); @@ -644,9 +668,6 @@ struct perfuse_node_data *pnd; int dequeued; - /* - * XXX Add a lock he day we go multithreaded - */ pnd = PERFUSE_NODE_DATA(opc); dequeued = 0; TAILQ_FOREACH(pcq, &pnd->pnd_pcq, pcq_next) { @@ -684,7 +705,7 @@ int error; ps = puffs_getspecific(pu); - + if (puffs_mount(pu, ps->ps_target, ps->ps_mountflags, ps->ps_root) != 0) DERR(EX_OSERR, "puffs_mount failed"); @@ -702,7 +723,7 @@ fii->max_readahead = 32 * PAGE_SIZE; fii->flags = (FUSE_ASYNC_READ|FUSE_POSIX_LOCKS|FUSE_ATOMIC_O_TRUNC); - if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fio))) != 0) + if ((error = xchg_msg(pu, 0, pm, sizeof(*fio), wait_reply)) != 0) DERRX(EX_SOFTWARE, "init message exchange failed (%d)", error); fio = GET_OUTPAYLOAD(ps, pm, fuse_init_out); @@ -729,7 +750,7 @@ opc = (puffs_cookie_t)puffs_getroot(pu); pm = ps->ps_new_msg(pu, opc, FUSE_DESTROY, 0, NULL); - if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0) { + if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0){ DWARN("unmount %s", ps->ps_target); if (!(flags & MNT_FORCE)) goto out; @@ -759,7 +780,7 @@ opc = (puffs_cookie_t)puffs_getroot(pu); pm = ps->ps_new_msg(pu, opc, FUSE_STATFS, 0, NULL); - if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fso))) != 0) + if ((error = xchg_msg(pu, opc, pm, sizeof(*fso), wait_reply)) != 0) goto out; fso = GET_OUTPAYLOAD(ps, pm, fuse_statfs_out); @@ -964,10 +985,17 @@ opc = (puffs_cookie_t)pn; - error = perfuse_node_open(pu, opc, FREAD|FWRITE, pcn->pcn_cred); + error = perfuse_node_open(pu, opc, FWRITE, pcn->pcn_cred); if (error != 0) return error; + /* + * This node has been open in the filesystem, + * but not by the kernel. We will have to close + * it on our own to avoid a leak + */ + PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_OPENFS; + return 0; } @@ -989,7 +1017,7 @@ (void)strlcpy((char*)(void *)(fci + 1), name, namelen); len = sizeof(*feo) + sizeof(*foo); - if ((error = XCHG_MSG(ps, pu, pm, len)) != 0) + if ((error = xchg_msg(pu, opc, pm, len, wait_reply)) != 0) goto out; feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out); @@ -1002,7 +1030,7 @@ * so that we can reuse it later */ pn = perfuse_new_pn(pu, opc); - perfuse_new_fh((puffs_cookie_t)pn, foo->fh, FWRITE); + perfuse_new_fh(pu, (puffs_cookie_t)pn, foo->fh, FWRITE); PERFUSE_NODE_DATA(pn)->pnd_ino = feo->nodeid; #ifdef PERFUSE_DEBUG @@ -1020,6 +1048,14 @@ * The parent directory needs a sync */ PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY; + + /* + * This node has been open in the filesystem, + * but not by the kernel. We will have to close + * it on our own to avoid a leak + */ + PERFUSE_NODE_DATA(pn)->pnd_flags |= PND_OPENFS; + out: ps->ps_destroy_msg(pm); @@ -1113,6 +1149,14 @@ pn = (struct puffs_node *)opc; if (puffs_pn_getvap(pn)->va_type == VDIR) { + /* + * We may open removed files, but it seems much more + * troublesome to open removed directories. glusterfs says + * "OPENDIR (null) (fuse_loc_fill() failed)" + */ + if (pnd->pnd_flags & PND_REMOVED) + return ENOENT; + op = FUSE_OPENDIR; pmode = PUFFS_VREAD|PUFFS_VEXEC; } else { @@ -1143,10 +1187,20 @@ * Do not open twice, and do not reopen for reading * if we already have write handle. */ - if ((mode & FREAD) && (pnd->pnd_flags & PND_RFH)) - return 0; - if ((mode & FWRITE) && (pnd->pnd_flags & PND_WFH)) + if (((mode & FREAD) && (pnd->pnd_flags & PND_RFH)) || + ((mode & FWRITE) && (pnd->pnd_flags & PND_WFH))) { + /* + * If the file was created, it was open for + * the filesystem but not for the kernel. This + * is not the case anymore, therefore we cleanup + * the flag to avoid an unwanted cleanup close + * after PERFUSE_OPENFS_TIMEOUT. + */ + pnd->pnd_flags &= ~PND_OPENFS; + return 0; + } + /* * Convert PUFFS mode to FUSE mode: convert FREAD/FWRITE @@ -1160,16 +1214,16 @@ foi->flags = fmode; foi->unused = 0; - if ((error = XCHG_MSG(ps, pu, pm, sizeof(*foo))) != 0) + if ((error = xchg_msg(pu, opc, pm, sizeof(*foo), wait_reply)) != 0) goto out; foo = GET_OUTPAYLOAD(ps, pm, fuse_open_out); - + /* * Save the file handle in node private data * so that we can reuse it later */ - perfuse_new_fh((puffs_cookie_t)pn, foo->fh, mode); + perfuse_new_fh(pu, (puffs_cookie_t)pn, foo->fh, mode); #ifdef PERFUSE_DEBUG if (perfuse_diagflags & PDF_FH) @@ -1180,6 +1234,7 @@ pnd->pnd_ino, mode & FREAD ? "r" : "", mode & FWRITE ? "w" : "", foo->fh); #endif + out: ps->ps_destroy_msg(pm); @@ -1210,7 +1265,7 @@ * therefore postpone the close operation at reclaim time. */ if (puffs_pn_getvap(pn)->va_type != VREG) - return node_close_common(pu, opc, flags); + return perfuse_node_close_common(pu, opc, flags); return 0; } @@ -1243,7 +1298,7 @@ fai = GET_INPAYLOAD(ps, pm, fuse_access_in); fai->mask = mode; - error = XCHG_MSG(ps, pu, pm, NO_PAYLOAD_REPLY_LEN); + error = xchg_msg(pu, opc, pm, NO_PAYLOAD_REPLY_LEN, wait_reply); ps->ps_destroy_msg(pm); } @@ -1266,7 +1321,8 @@ "fh = 0x%"PRIx64"\n", __func__, (void *)opc, PERFUSE_NODE_DATA(opc)->pnd_ino, fgi->fh); #endif - if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fao))) != 0) { + if ((error = xchg_msg(pu, opc, pm, + sizeof(*fao), wait_reply)) != 0) { ps->ps_destroy_msg(pm); goto out; } @@ -1318,7 +1374,7 @@ fgi->dummy = 0; fgi->fh = 0; - if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fao))) != 0) + if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), wait_reply)) != 0) goto out; fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out); @@ -1479,7 +1535,7 @@ /* * A fuse_attr_out is returned, but we ignore it. */ - error = XCHG_MSG(ps, pu, pm, sizeof(struct fuse_attr_out)); + error = xchg_msg(pu, opc, pm, sizeof(struct fuse_attr_out), wait_reply); ps->ps_destroy_msg(pm); @@ -1514,7 +1570,7 @@ __func__, (void *)opc, PERFUSE_NODE_DATA(opc)->pnd_ino, fpi->fh); #endif - if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fpo))) != 0) + if ((error = xchg_msg(pu, opc, pm, sizeof(*fpo), wait_reply)) != 0) goto out; fpo = GET_OUTPAYLOAD(ps, pm, fuse_poll_out); @@ -1610,7 +1666,8 @@ PERFUSE_NODE_DATA(opc)->pnd_ino, ffi->fh); #endif - if ((error = XCHG_MSG(ps, pu, pm, NO_PAYLOAD_REPLY_LEN)) != 0) + if ((error = xchg_msg(pu, opc, pm, + NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0) goto out; /* @@ -1637,7 +1694,7 @@ ps->ps_destroy_msg(pm); if (open_self) - (void)node_close_common(pu, opc, FWRITE); + (void)perfuse_node_close_common(pu, opc, FWRITE); return error; } @@ -1700,7 +1757,7 @@ path = _GET_INPAYLOAD(ps, pm, char *); (void)strlcpy(path, name, len); - if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0) + if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0) goto out; if (puffs_inval_namecache_dir(pu, opc) != 0) @@ -1756,7 +1813,7 @@ fli->oldnodeid = PERFUSE_NODE_DATA(pn)->pnd_ino; (void)strlcpy((char *)(void *)(fli + 1), name, len - sizeof(*fli)); - error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN); + error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply); ps->ps_destroy_msg(pm); @@ -1808,7 +1865,7 @@ np += oldname_len; (void)strlcpy(np, newname, newname_len); - if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0) + if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0) goto out; /* @@ -1898,7 +1955,7 @@ path = _GET_INPAYLOAD(ps, pm, char *); (void)strlcpy(path, name, len); - if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0) + if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0) goto out; if (puffs_inval_namecache_dir(pu, opc) != 0) @@ -1982,6 +2039,7 @@ int error; int open_self; uint64_t fd_offset; + size_t fd_maxlen; pm = NULL; error = 0; @@ -2033,6 +2091,7 @@ pnd->pnd_all_fd = NULL; pnd->pnd_all_fd_len = 0; fd_offset = 0; + fd_maxlen = ps->ps_max_readahead - sizeof(*foh); do { size_t fd_len; @@ -2042,24 +2101,17 @@ /* * read_flags, lock_owner and flags are unused in libfuse - * - * XXX if fri->size is too big (bigger than PAGE_SIZE?), * we get strange bugs. ktrace shows 16 bytes or garbage - * at the end of sent frames, but perfused does not receive - * that data. The data length is hoverver the same, which - * cause perfused to use the last 16 bytes of the frame - * as the frame header of the next frame. - * - * This may be a kernel bug. */ fri = GET_INPAYLOAD(ps, pm, fuse_read_in); fri->fh = fh; fri->offset = fd_offset; - fri->size = PAGE_SIZE - sizeof(struct fuse_out_header); + fri->size = fd_maxlen; fri->read_flags = 0; fri->lock_owner = 0; fri->flags = 0; - if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0) + if ((error = xchg_msg(pu, opc, pm, + UNSPEC_REPLY_LEN, wait_reply)) != 0) goto out; /* @@ -2071,8 +2123,7 @@ foh_len = foh->len; /* - * It seems that the only way to discover the end - * of the buffer is to get an empty read + * Empty read: we reached the end of the buffer. */ if (foh_len == sizeof(*foh)) break; @@ -2103,7 +2154,15 @@ ps->ps_destroy_msg(pm); pm = NULL; - } while (1 /* CONSTCOND */); + + /* + * If the buffer was not completely filled, + * that is, if there is room for the biggest + * struct dirent possible, then we are done: + * no need to issue another READDIR to see + * an empty reply. + */ + } while (foh_len >= fd_maxlen - (sizeof(*fd) + MAXPATHLEN)); if (fuse_to_dirent(pu, opc, pnd->pnd_all_fd, pnd->pnd_all_fd_len) == -1) error = EIO; @@ -2170,7 +2229,7 @@ pm = ps->ps_new_msg(pu, opc, FUSE_READLINK, 0, pcr); - if ((error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN)) != 0) + if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0) goto out; foh = GET_OUTHDR(ps, pm); @@ -2243,7 +2302,7 @@ /* * Make sure all operation are finished - * There can be an ongoing write, or queued operations + * There can be an ongoing write or */ while (pnd->pnd_flags & PND_INWRITE) { requeue_request(pu, opc, PCQ_AFTERWRITE); @@ -2267,10 +2326,10 @@ * Close open files */ if (pnd->pnd_flags & PND_WFH) - (void)node_close_common(pu, opc, FWRITE); + (void)perfuse_node_close_common(pu, opc, FWRITE); if (pnd->pnd_flags & PND_RFH) - (void)node_close_common(pu, opc, FREAD); + (void)perfuse_node_close_common(pu, opc, FREAD); /* * And send the FORGET message @@ -2281,13 +2340,14 @@ ffi->nlookup = pnd->pnd_nlookup; /* - * No reply is expected, pm is freed in XCHG_MSG + * No reply is expected, pm is freed in xchg_msg */ - (void)XCHG_MSG_NOREPLY(ps, pu, pm, UNSPEC_REPLY_LEN); + (void)xchg_msg(pu, (puffs_cookie_t)pn, + pm, UNSPEC_REPLY_LEN, no_reply); parent_pn = pnd->pnd_parent; - perfuse_destroy_pn(pn); + perfuse_destroy_pn(pu, pn); puffs_pn_put(pn); pn = parent_pn; @@ -2370,7 +2430,7 @@ PERFUSE_NODE_DATA(opc)->pnd_ino, fli->fh); #endif - if ((error = XCHG_MSG(ps, pu, pm, sizeof(*flo))) != 0) + if ((error = xchg_msg(pu, opc, pm, sizeof(*flo), wait_reply)) != 0) goto out; flo = GET_OUTPAYLOAD(ps, pm, fuse_lk_out); @@ -2416,7 +2476,6 @@ struct fuse_read_in *fri; struct fuse_out_header *foh; size_t readen; - size_t requested; int error; ps = puffs_getspecific(pu); @@ -2426,27 +2485,19 @@ if (puffs_pn_getvap((struct puffs_node *)opc)->va_type == VDIR) return EBADF; - requested = *resid; - if ((ps->ps_readahead + requested) > ps->ps_max_readahead) { - if (perfuse_diagflags & PDF_REQUEUE) - DPRINTF("readahead = %zd\n", ps->ps_readahead); - requeue_request(pu, opc, PCQ_READ); - } - ps->ps_readahead += requested; - do { + size_t max_read; + + max_read = ps->ps_max_readahead - sizeof(*foh); /* * flags may be set to FUSE_READ_LOCKOWNER * if lock_owner is provided. - * - * XXX See comment about fri->size in perfuse_node_readdir - * We encounter the same bug here. */ pm = ps->ps_new_msg(pu, opc, FUSE_READ, sizeof(*fri), pcr); fri = GET_INPAYLOAD(ps, pm, fuse_read_in); fri->fh = perfuse_get_fh(opc, FREAD); fri->offset = offset; - fri->size = (uint32_t)MIN(*resid, PAGE_SIZE - sizeof(*foh)); + fri->size = (uint32_t)MIN(*resid, max_read); fri->read_flags = 0; /* XXX Unused by libfuse? */ fri->lock_owner = pnd->pnd_lock_owner; fri->flags = 0; @@ -2457,7 +2508,7 @@ DPRINTF("%s: opc = %p, ino = %"PRId64", fh = 0x%"PRIx64"\n", __func__, (void *)opc, pnd->pnd_ino, fri->fh); #endif - error = XCHG_MSG(ps, pu, pm, UNSPEC_REPLY_LEN); + error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply); if (error != 0) goto out; @@ -2465,6 +2516,12 @@ foh = GET_OUTHDR(ps, pm); readen = foh->len - sizeof(*foh); +#ifdef PERFUSE_DEBUG + if (readen > *resid) + DERRX(EX_SOFTWARE, "%s: Unexpected big read %zd", + __func__, readen); +#endif + (void)memcpy(buf, _GET_OUTPAYLOAD(ps, pm, char *), readen); buf += readen; @@ -2484,10 +2541,6 @@ if (pm != NULL) ps->ps_destroy_msg(pm); - ps->ps_readahead -= requested; - - (void)dequeue_requests(ps, opc, PCQ_READ, 1); - return error; } @@ -2509,7 +2562,6 @@ size_t data_len; size_t payload_len; size_t written; - size_t requested; int error; ps = puffs_getspecific(pu); @@ -2520,27 +2572,28 @@ if (puffs_pn_getvap((struct puffs_node *)opc)->va_type == VDIR) return EBADF; + /* + * We need to queue write requests in order to avoid + * dequeueing PCQ_AFTERWRITE when there are pending writes. + */ while (pnd->pnd_flags & PND_INWRITE) requeue_request(pu, opc, PCQ_WRITE); pnd->pnd_flags |= PND_INWRITE; - - requested = *resid; - if ((ps->ps_write + requested) > ps->ps_max_write) { - if (perfuse_diagflags & PDF_REQUEUE) - DPRINTF("write = %zd\n", ps->ps_write); - requeue_request(pu, opc, PCQ_WRITE); - } - ps->ps_write += requested; - do { + size_t max_write; /* - * It seems libfuse does not expects big chunks, so - * send it page per page. The writepage feature is - * probably there to minmize data movement. - * XXX use ps->ps_maxwrite? + * There is a writepage flag when data + * is PAGE_SIZE-aligned. Use it for + * everything but the data after the last + * page boundary. */ - data_len = MIN(*resid, PAGE_SIZE); + max_write = ps->ps_max_write - sizeof(*fwi); + + data_len = MIN(*resid, max_write); + if (data_len > PAGE_SIZE) + data_len = data_len & ~(PAGE_SIZE - 1); + payload_len = data_len + sizeof(*fwi); /* @@ -2565,11 +2618,17 @@ DPRINTF("%s: opc = %p, ino = %"PRId64", fh = 0x%"PRIx64"\n", __func__, (void *)opc, pnd->pnd_ino, fwi->fh); #endif - if ((error = XCHG_MSG(ps, pu, pm, sizeof(*fwo))) != 0) + if ((error = xchg_msg(pu, opc, pm, + sizeof(*fwo), wait_reply)) != 0) goto out; fwo = GET_OUTPAYLOAD(ps, pm, fuse_write_out); written = fwo->size; +#ifdef PERFUSE_DEBUG + if (written > *resid) + DERRX(EX_SOFTWARE, "%s: Unexpected big write %zd", + __func__, written); +#endif *resid -= written; offset += written; buf += written; @@ -2605,9 +2664,6 @@ if (pm != NULL) ps->ps_destroy_msg(pm); - ps->ps_write -= requested; - - /* * If there are no more queued write, we can resume * an operation awaiting write completion. Index: src/lib/libperfuse/perfuse.c diff -u src/lib/libperfuse/perfuse.c:1.6 src/lib/libperfuse/perfuse.c:1.7 --- src/lib/libperfuse/perfuse.c:1.6 Wed Sep 15 01:51:43 2010 +++ src/lib/libperfuse/perfuse.c Mon Sep 20 07:00:21 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: perfuse.c,v 1.6 2010/09/15 01:51:43 manu Exp $ */ +/* $NetBSD: perfuse.c,v 1.7 2010/09/20 07:00:21 manu Exp $ */ /*- * Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved. @@ -58,6 +58,7 @@ (void)memset(ps, 0, sizeof(*ps)); ps->ps_max_write = UINT_MAX; ps->ps_max_readahead = UINT_MAX; + TAILQ_INIT(&ps->ps_pnd); return ps; } @@ -220,7 +221,7 @@ struct perfuse_mount_out *pmo; #if (PERFUSE_SOCKTYPE == SOCK_DGRAM) struct sockaddr_storage ss; - struct sockaddr_un sun; + struct sockaddr_un *sun; struct sockaddr *sa; socklen_t sa_len; #endif @@ -246,22 +247,22 @@ sock_len = 0; #if (PERFUSE_SOCKTYPE == SOCK_DGRAM) sa = (struct sockaddr *)(void *)&ss; + sun = (struct sockaddr_un *)(void *)&ss; sa_len = sizeof(ss); if ((getpeername(s, sa, &sa_len) == 0) && (sa->sa_family = AF_LOCAL) && - (strcmp(((struct sockaddr_un *)sa)->sun_path, _PATH_FUSE) == 0)) { + (strcmp(sun->sun_path, _PATH_FUSE) == 0)) { - sa = (struct sockaddr *)(void *)&sun; - sun.sun_len = sizeof(sun); - sun.sun_family = AF_LOCAL; - (void)sprintf(sun.sun_path, "%s/%s-%d", + sun->sun_len = sizeof(*sun); + sun->sun_family = AF_LOCAL; + (void)sprintf(sun->sun_path, "%s/%s-%d", _PATH_TMP, getprogname(), getpid()); - if (bind(s, sa, sa->sa_len) != 0) + if (bind(s, sa, (socklen_t)sa->sa_len) != 0) DERR(EX_OSERR, "%s:%d bind to \"%s\" failed", - __func__, __LINE__, sun.sun_path); + __func__, __LINE__, sun->sun_path); - sock_len = strlen(sun.sun_path) + 1; + sock_len = strlen(sun->sun_path) + 1; } #endif /* PERFUSE_SOCKTYPE */ @@ -317,7 +318,7 @@ } if (sock_len != 0) { - (void)strcpy(cp, sun.sun_path); + (void)strcpy(cp, sun->sun_path); cp += pmo->pmo_sock_len; } Index: src/lib/libperfuse/perfuse_priv.h diff -u src/lib/libperfuse/perfuse_priv.h:1.10 src/lib/libperfuse/perfuse_priv.h:1.11 --- src/lib/libperfuse/perfuse_priv.h:1.10 Wed Sep 15 01:51:43 2010 +++ src/lib/libperfuse/perfuse_priv.h Mon Sep 20 07:00:22 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: perfuse_priv.h,v 1.10 2010/09/15 01:51:43 manu Exp $ */ +/* $NetBSD: perfuse_priv.h,v 1.11 2010/09/20 07:00:22 manu Exp $ */ /*- * Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved. @@ -37,6 +37,17 @@ #include "perfuse_if.h" #include "fuse.h" +/* + * When a file is created, it is open for the filesystem, but not + * for the kernel. We keep the file open to avoid re-open it, but + * once we open PERFUSE_OPENFS_MAXFILES files, we start closing + * on our own any file that has not been open for PERFUSE_OPENFS_TIMEOUT + * seconds. This is to avoid file leaks and getting "Too many open + * files in system" + */ +#define PERFUSE_OPENFS_TIMEOUT 3 +#define PERFUSE_OPENFS_MAXFILES 32 + struct perfuse_state { void *ps_private; /* Private field for libperfuse user */ struct puffs_usermount *ps_pu; @@ -58,8 +69,6 @@ char *ps_filesystemtype; int ps_mountflags; uint64_t ps_unique; - size_t ps_readahead; - size_t ps_write; perfuse_new_msg_fn ps_new_msg; perfuse_xchg_msg_fn ps_xchg_msg; perfuse_destroy_msg_fn ps_destroy_msg; @@ -67,10 +76,13 @@ perfuse_get_inpayload_fn ps_get_inpayload; perfuse_get_outhdr_fn ps_get_outhdr; perfuse_get_outpayload_fn ps_get_outpayload; + TAILQ_HEAD(, perfuse_node_data) ps_pnd; + int ps_pnd_count; }; -enum perfuse_qtype { PCQ_READDIR, PCQ_READ, PCQ_WRITE, PCQ_AFTERWRITE }; +enum perfuse_qtype { PCQ_READDIR, PCQ_READ, PCQ_WRITE, PCQ_AFTERWRITE }; + #ifdef PERFUSE_DEBUG extern const char *perfuse_qtypestr[]; #endif @@ -81,7 +93,6 @@ TAILQ_ENTRY(perfuse_cc_queue) pcq_next; }; - struct perfuse_node_data { uint64_t pnd_rfh; uint64_t pnd_wfh; @@ -95,18 +106,22 @@ size_t pnd_all_fd_len; TAILQ_HEAD(,perfuse_cc_queue) pnd_pcq; /* queued requests */ int pnd_flags; -#define PND_RECLAIMED 0x01 /* reclaim pending */ -#define PND_INREADDIR 0x02 /* readdir in progress */ -#define PND_DIRTY 0x04 /* There is some data to sync */ -#define PND_RFH 0x08 /* Read FH allocated */ -#define PND_WFH 0x10 /* Write FH allocated */ -#define PND_REMOVED 0x20 /* Node was removed */ -#define PND_INWRITE 0x40 /* write in progress */ +#define PND_RECLAIMED 0x001 /* reclaim pending */ +#define PND_INREADDIR 0x002 /* readdir in progress */ +#define PND_DIRTY 0x004 /* There is some data to sync */ +#define PND_RFH 0x008 /* Read FH allocated */ +#define PND_WFH 0x010 /* Write FH allocated */ +#define PND_REMOVED 0x020 /* Node was removed */ +#define PND_INWRITE 0x040 /* write in progress */ +#define PND_OPENFS 0x080 /* Open by fs but not by kernel */ #define PND_OPEN (PND_RFH|PND_WFH) /* File is open */ #define PND_BUSY (PND_INREADDIR|PND_INWRITE) puffs_cookie_t pnd_parent; int pnd_childcount; + time_t pnd_timestamp; + TAILQ_ENTRY(perfuse_node_data) pnd_next; + puffs_cookie_t pnd_pn; }; #define PERFUSE_NODE_DATA(opc) \ @@ -125,19 +140,16 @@ (struct type *)(void *)ps->ps_get_outpayload(pm) #define _GET_OUTPAYLOAD(ps, pm, type) (type)ps->ps_get_outpayload(pm) -#define XCHG_MSG(ps, pu, opc, len) ps->ps_xchg_msg(pu, opc, len, wait_reply) -#define XCHG_MSG_NOREPLY(ps, pu, opc, len) \ - ps->ps_xchg_msg(pu, opc, len, no_reply) - __BEGIN_DECLS struct puffs_node *perfuse_new_pn(struct puffs_usermount *, struct puffs_node *); -void perfuse_destroy_pn(struct puffs_node *); -void perfuse_new_fh(puffs_cookie_t, uint64_t, int); +void perfuse_destroy_pn(struct puffs_usermount *, struct puffs_node *); +void perfuse_new_fh(struct puffs_usermount *, puffs_cookie_t, uint64_t, int); void perfuse_destroy_fh(puffs_cookie_t, uint64_t); uint64_t perfuse_get_fh(puffs_cookie_t, int); uint64_t perfuse_next_unique(struct puffs_usermount *); +int perfuse_node_close_common(struct puffs_usermount *, puffs_cookie_t, int); char *perfuse_fs_mount(int, ssize_t); Index: src/lib/libperfuse/subr.c diff -u src/lib/libperfuse/subr.c:1.4 src/lib/libperfuse/subr.c:1.5 --- src/lib/libperfuse/subr.c:1.4 Fri Sep 3 07:15:18 2010 +++ src/lib/libperfuse/subr.c Mon Sep 20 07:00:22 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: subr.c,v 1.4 2010/09/03 07:15:18 manu Exp $ */ +/* $NetBSD: subr.c,v 1.5 2010/09/20 07:00:22 manu Exp $ */ /*- * Copyright (c) 2010 Emmanuel Dreyfus. All rights reserved. @@ -41,9 +41,12 @@ struct puffs_usermount *pu; struct puffs_node *parent; { + struct perfuse_state *ps; struct puffs_node *pn; struct perfuse_node_data *pnd; + ps = puffs_getspecific(pu); + if ((pnd = malloc(sizeof(*pnd))) == NULL) DERR(EX_OSERR, "malloc failed"); @@ -56,8 +59,13 @@ pnd->pnd_ino = PERFUSE_UNKNOWN_INO; pnd->pnd_nlookup = 1; pnd->pnd_parent = parent; + pnd->pnd_timestamp = time(NULL); + pnd->pnd_pn = (puffs_cookie_t)pn; TAILQ_INIT(&pnd->pnd_pcq); + TAILQ_INSERT_TAIL(&ps->ps_pnd, pnd, pnd_next); + ps->ps_pnd_count++; + if (parent != NULL) PERFUSE_NODE_DATA(parent)->pnd_childcount++; @@ -65,11 +73,19 @@ } void -perfuse_destroy_pn(pn) +perfuse_destroy_pn(pu, pn) + struct puffs_usermount *pu; struct puffs_node *pn; { + struct perfuse_state *ps; struct perfuse_node_data *pnd; + ps = puffs_getspecific(pu); + pnd = PERFUSE_NODE_DATA(pn); + + TAILQ_REMOVE(&ps->ps_pnd, pnd, pnd_next); + ps->ps_pnd_count--; + if ((pnd = puffs_pn_getpriv(pn)) != NULL) { if (pnd->pnd_parent != NULL) PERFUSE_NODE_DATA(pnd->pnd_parent)->pnd_childcount--; @@ -97,13 +113,40 @@ void -perfuse_new_fh(opc, fh, mode) +perfuse_new_fh(pu, opc, fh, mode) + struct puffs_usermount *pu; puffs_cookie_t opc; uint64_t fh; int mode; { + struct perfuse_state *ps; struct perfuse_node_data *pnd; + ps = puffs_getspecific(pu); + + /* + * Nodes file with PND_OPENFS are open by the filesystem but + * not by the kernel, because of a CREATE operation. If + * the kernel never opens them, we have a leak to fix. + * If we have enough open files, we start closing the + * one that had been open for too long. + */ + if (ps->ps_pnd_count > PERFUSE_OPENFS_MAXFILES) { + time_t now; + + now = time(NULL); + + TAILQ_FOREACH(pnd, &ps->ps_pnd, pnd_next) { + if ((pnd->pnd_ino == FUSE_ROOT_ID) || + !(pnd->pnd_flags & PND_OPENFS) || + (now < pnd->pnd_timestamp + PERFUSE_OPENFS_TIMEOUT)) + continue; + + pnd->pnd_flags &= ~PND_OPENFS; + perfuse_node_close_common(pu, pnd->pnd_pn, FWRITE); + } + } + pnd = PERFUSE_NODE_DATA(opc); if (mode & FWRITE) {