Author: gleb
Date: Sun Jan  6 22:15:44 2013
New Revision: 245115
URL: http://svnweb.freebsd.org/changeset/base/245115

Log:
  tmpfs: Replace directory entry linked list with RB-Tree.
  
  Use file name hash as a tree key, handle duplicate keys.  Both VOP_LOOKUP
  and VOP_READDIR operations utilize same tree for search.  Directory
  entry offset (cookie) is either file name hash or incremental id in case
  of hash collisions (duplicate-cookies).  Keep sorted per directory list
  of duplicate-cookie entries to facilitate cookie number allocation.
  
  Don't fail if previous VOP_READDIR() offset is no longer valid, start
  with next dirent instead.  Other file system handle it similarly.
  
  Workaround race prone tn_readdir_last[pn] fields update.
  
  Add tmpfs_dir_destroy() to free all dirents.
  
  Set NFS cookies in tmpfs_dir_getdents(). Return EJUSTRETURN from
  tmpfs_dir_getdents() instead of hard coded -1.
  
  Mark directory traversal routines static as they are no longer
  used outside of tmpfs_subr.c

Modified:
  head/sys/fs/tmpfs/tmpfs.h
  head/sys/fs/tmpfs/tmpfs_subr.c
  head/sys/fs/tmpfs/tmpfs_vfsops.c
  head/sys/fs/tmpfs/tmpfs_vnops.c

Modified: head/sys/fs/tmpfs/tmpfs.h
==============================================================================
--- head/sys/fs/tmpfs/tmpfs.h   Sun Jan  6 21:56:58 2013        (r245114)
+++ head/sys/fs/tmpfs/tmpfs.h   Sun Jan  6 22:15:44 2013        (r245115)
@@ -49,6 +49,7 @@
 /* --------------------------------------------------------------------- */
 #include <sys/malloc.h>
 #include <sys/systm.h>
+#include <sys/tree.h>
 #include <sys/vmmeter.h>
 #include <vm/swap_pager.h>
 
@@ -60,104 +61,81 @@ MALLOC_DECLARE(M_TMPFSNAME);
 /*
  * Internal representation of a tmpfs directory entry.
  */
+
+LIST_HEAD(tmpfs_dir_duphead, tmpfs_dirent);
+
 struct tmpfs_dirent {
-       TAILQ_ENTRY(tmpfs_dirent)       td_entries;
+       /*
+        * Depending on td_cookie flag entry can be of 3 types:
+        * - regular -- no hash collisions, stored in RB-Tree
+        * - duphead -- synthetic linked list head for dup entries
+        * - dup -- stored in linked list instead of RB-Tree
+        */
+       union {
+               /* regular and duphead entry types */
+               RB_ENTRY(tmpfs_dirent)          td_entries;
 
-       /* Length of the name stored in this directory entry.  This avoids
-        * the need to recalculate it every time the name is used. */
-       uint16_t                        td_namelen;
-
-       /* The name of the entry, allocated from a string pool.  This
-       * string is not required to be zero-terminated; therefore, the
-       * td_namelen field must always be used when accessing its value. */
-       char *                          td_name;
+               /* dup entry type */
+               struct {
+                       LIST_ENTRY(tmpfs_dirent) entries;
+                       LIST_ENTRY(tmpfs_dirent) index_entries;
+               } td_dup;
+       } uh;
+
+       uint32_t                        td_cookie;
+       uint32_t                        td_hash;
+       u_int                           td_namelen;
 
        /* Pointer to the node this entry refers to.  In case this field
         * is NULL, the node is a whiteout. */
        struct tmpfs_node *             td_node;
+
+       union {
+               /*
+                * The name of the entry, allocated from a string pool.  This
+                * string is not required to be zero-terminated.
+                */
+               char *                  td_name;        /* regular, dup */
+               struct tmpfs_dir_duphead td_duphead;    /* duphead */
+       } ud;
 };
 
-/* A directory in tmpfs holds a sorted list of directory entries, which in
+/* A directory in tmpfs holds a list of directory entries, which in
  * turn point to other files (which can be directories themselves).
  *
- * In tmpfs, this list is managed by a tail queue, whose head is defined by
+ * In tmpfs, this list is managed by a RB-Tree, whose head is defined by
  * the struct tmpfs_dir type.
  *
- * It is imporant to notice that directories do not have entries for . and
+ * It is important to notice that directories do not have entries for . and
  * .. as other file systems do.  These can be generated when requested
  * based on information available by other means, such as the pointer to
  * the node itself in the former case or the pointer to the parent directory
  * in the latter case.  This is done to simplify tmpfs's code and, more
  * importantly, to remove redundancy. */
-TAILQ_HEAD(tmpfs_dir, tmpfs_dirent);
+RB_HEAD(tmpfs_dir, tmpfs_dirent);
 
 /* Each entry in a directory has a cookie that identifies it.  Cookies
  * supersede offsets within directories because, given how tmpfs stores
- * directories in memory, there is no such thing as an offset.  (Emulating
- * a real offset could be very difficult.)
- * 
+ * directories in memory, there is no such thing as an offset.
+ *
  * The '.', '..' and the end of directory markers have fixed cookies which
  * cannot collide with the cookies generated by other entries.  The cookies
- * fot the other entries are generated based on the memory address on which
- * stores their information is stored.
- *
- * Ideally, using the entry's memory pointer as the cookie would be enough
- * to represent it and it wouldn't cause collisions in any system.
- * Unfortunately, this results in "offsets" with very large values which
- * later raise problems in the Linux compatibility layer (and maybe in other
- * places) as described in PR kern/32034.  Hence we need to workaround this
- * with a rather ugly hack.
- *
- * Linux 32-bit binaries, unless built with _FILE_OFFSET_BITS=64, have off_t
- * set to 'long', which is a 32-bit *signed* long integer.  Regardless of
- * the macro value, GLIBC (2.3 at least) always uses the getdents64
- * system call (when calling readdir) which internally returns off64_t
- * offsets.  In order to make 32-bit binaries work, *GLIBC* converts the
- * 64-bit values returned by the kernel to 32-bit ones and aborts with
- * EOVERFLOW if the conversion results in values that won't fit in 32-bit
- * integers (which it assumes is because the directory is extremely large).
- * This wouldn't cause problems if we were dealing with unsigned integers,
- * but as we have signed integers, this check fails due to sign expansion.
+ * for the other entries are generated based on the file name hash value or
+ * unique number in case of name hash collision.
  *
- * For example, consider that the kernel returns the 0xc1234567 cookie to
- * userspace in a off64_t integer.  Later on, GLIBC casts this value to
- * off_t (remember, signed) with code similar to:
- *     system call returns the offset in kernel_value;
- *     off_t casted_value = kernel_value;
- *     if (sizeof(off_t) != sizeof(off64_t) &&
- *         kernel_value != casted_value)
- *             error!
- * In this case, casted_value still has 0xc1234567, but when it is compared
- * for equality against kernel_value, it is promoted to a 64-bit integer and
- * becomes 0xffffffffc1234567, which is different than 0x00000000c1234567.
- * Then, GLIBC assumes this is because the directory is very large.
- *
- * Given that all the above happens in user-space, we have no control over
- * it; therefore we must workaround the issue here.  We do this by
- * truncating the pointer value to a 32-bit integer and hope that there
- * won't be collisions.  In fact, this will not cause any problems in
- * 32-bit platforms but some might arise in 64-bit machines (I'm not sure
- * if they can happen at all in practice).
- *
- * XXX A nicer solution shall be attempted. */
-#ifdef _KERNEL
-#define        TMPFS_DIRCOOKIE_DOT     0
-#define        TMPFS_DIRCOOKIE_DOTDOT  1
-#define        TMPFS_DIRCOOKIE_EOF     2
-static __inline
-off_t
-tmpfs_dircookie(struct tmpfs_dirent *de)
-{
-       off_t cookie;
-
-       cookie = ((off_t)(uintptr_t)de >> 1) & 0x7FFFFFFF;
-       MPASS(cookie != TMPFS_DIRCOOKIE_DOT);
-       MPASS(cookie != TMPFS_DIRCOOKIE_DOTDOT);
-       MPASS(cookie != TMPFS_DIRCOOKIE_EOF);
+ * To preserve compatibility cookies are limited to 31 bits.
+ */
 
-       return cookie;
-}
-#endif
+#define        TMPFS_DIRCOOKIE_DOT             0
+#define        TMPFS_DIRCOOKIE_DOTDOT          1
+#define        TMPFS_DIRCOOKIE_EOF             2
+#define        TMPFS_DIRCOOKIE_MASK            ((off_t)0x3fffffffU)
+#define        TMPFS_DIRCOOKIE_MIN             ((off_t)0x00000004U)
+#define        TMPFS_DIRCOOKIE_DUP             ((off_t)0x40000000U)
+#define        TMPFS_DIRCOOKIE_DUPHEAD         ((off_t)0x80000000U)
+#define        TMPFS_DIRCOOKIE_DUP_MIN         TMPFS_DIRCOOKIE_DUP
+#define        TMPFS_DIRCOOKIE_DUP_MAX         \
+       (TMPFS_DIRCOOKIE_DUP | TMPFS_DIRCOOKIE_MASK)
 
 /* --------------------------------------------------------------------- */
 
@@ -243,29 +221,31 @@ struct tmpfs_node {
                dev_t                   tn_rdev;
 
                /* Valid when tn_type == VDIR. */
-               struct tn_dir{
+               struct tn_dir {
                        /* Pointer to the parent directory.  The root
                         * directory has a pointer to itself in this field;
                         * this property identifies the root node. */
                        struct tmpfs_node *     tn_parent;
 
-                       /* Head of a tail-queue that links the contents of
-                        * the directory together.  See above for a
-                        * description of its contents. */
+                       /* Head of a tree that links the contents of
+                        * the directory together. */
                        struct tmpfs_dir        tn_dirhead;
 
+                       /* Head of a list the contains fake directory entries
+                        * heads, i.e. entries with TMPFS_DIRCOOKIE_DUPHEAD
+                        * flag. */
+                       struct tmpfs_dir_duphead tn_dupindex;
+
                        /* Number and pointer of the first directory entry
                         * returned by the readdir operation if it were
                         * called again to continue reading data from the
                         * same directory as before.  This is used to speed
                         * up reads of long directories, assuming that no
                         * more than one read is in progress at a given time.
-                        * Otherwise, these values are discarded and a linear
-                        * scan is performed from the beginning up to the
-                        * point where readdir starts returning values. */
+                        * Otherwise, these values are discarded. */
                        off_t                   tn_readdir_lastn;
                        struct tmpfs_dirent *   tn_readdir_lastp;
-               }tn_dir;
+               } tn_dir;
 
                /* Valid when tn_type == VLNK. */
                /* The link's target, allocated from a string pool. */
@@ -419,9 +399,9 @@ int tmpfs_alloc_node(struct tmpfs_mount 
            char *, dev_t, struct tmpfs_node **);
 void   tmpfs_free_node(struct tmpfs_mount *, struct tmpfs_node *);
 int    tmpfs_alloc_dirent(struct tmpfs_mount *, struct tmpfs_node *,
-           const char *, uint16_t, struct tmpfs_dirent **);
-void   tmpfs_free_dirent(struct tmpfs_mount *, struct tmpfs_dirent *,
-           boolean_t);
+           const char *, u_int, struct tmpfs_dirent **);
+void   tmpfs_free_dirent(struct tmpfs_mount *, struct tmpfs_dirent *);
+void   tmpfs_dirent_init(struct tmpfs_dirent *, const char *, u_int);
 int    tmpfs_alloc_vp(struct mount *, struct tmpfs_node *, int,
            struct vnode **);
 void   tmpfs_free_vp(struct vnode *);
@@ -429,13 +409,12 @@ int       tmpfs_alloc_file(struct vnode *, str
            struct componentname *, char *);
 void   tmpfs_dir_attach(struct vnode *, struct tmpfs_dirent *);
 void   tmpfs_dir_detach(struct vnode *, struct tmpfs_dirent *);
+void   tmpfs_dir_destroy(struct tmpfs_mount *, struct tmpfs_node *);
 struct tmpfs_dirent *  tmpfs_dir_lookup(struct tmpfs_node *node,
                            struct tmpfs_node *f,
                            struct componentname *cnp);
-int    tmpfs_dir_getdotdent(struct tmpfs_node *, struct uio *);
-int    tmpfs_dir_getdotdotdent(struct tmpfs_node *, struct uio *);
-struct tmpfs_dirent *  tmpfs_dir_lookupbycookie(struct tmpfs_node *, off_t);
-int    tmpfs_dir_getdents(struct tmpfs_node *, struct uio *, off_t *);
+int    tmpfs_dir_getdents(struct tmpfs_node *, struct uio *, int,
+           u_long *, int *);
 int    tmpfs_dir_whiteout_add(struct vnode *, struct componentname *);
 void   tmpfs_dir_whiteout_remove(struct vnode *, struct componentname *);
 int    tmpfs_reg_resize(struct vnode *, off_t, boolean_t);
@@ -467,8 +446,8 @@ int tmpfs_truncate(struct vnode *, off_t
  * with a length of 'len'.
  */
 #define TMPFS_DIRENT_MATCHES(de, name, len) \
-    (de->td_namelen == (uint16_t)len && \
-    bcmp((de)->td_name, (name), (de)->td_namelen) == 0)
+    (de->td_namelen == len && \
+    bcmp((de)->ud.td_name, (name), (de)->td_namelen) == 0)
 
 /* --------------------------------------------------------------------- */
 
@@ -476,11 +455,10 @@ int       tmpfs_truncate(struct vnode *, off_t
  * Ensures that the node pointed by 'node' is a directory and that its
  * contents are consistent with respect to directories.
  */
-#define TMPFS_VALIDATE_DIR(node) \
-    MPASS((node)->tn_type == VDIR); \
-    MPASS((node)->tn_size % sizeof(struct tmpfs_dirent) == 0); \
-    MPASS((node)->tn_dir.tn_readdir_lastp == NULL || \
-       tmpfs_dircookie((node)->tn_dir.tn_readdir_lastp) == 
(node)->tn_dir.tn_readdir_lastn);
+#define TMPFS_VALIDATE_DIR(node) do { \
+       MPASS((node)->tn_type == VDIR); \
+       MPASS((node)->tn_size % sizeof(struct tmpfs_dirent) == 0); \
+} while (0)
 
 /* --------------------------------------------------------------------- */
 

Modified: head/sys/fs/tmpfs/tmpfs_subr.c
==============================================================================
--- head/sys/fs/tmpfs/tmpfs_subr.c      Sun Jan  6 21:56:58 2013        
(r245114)
+++ head/sys/fs/tmpfs/tmpfs_subr.c      Sun Jan  6 22:15:44 2013        
(r245115)
@@ -37,6 +37,7 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/fnv_hash.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
@@ -58,6 +59,11 @@ __FBSDID("$FreeBSD$");
 #include <fs/tmpfs/tmpfs_fifoops.h>
 #include <fs/tmpfs/tmpfs_vnops.h>
 
+struct tmpfs_dir_cursor {
+       struct tmpfs_dirent     *tdc_current;
+       struct tmpfs_dirent     *tdc_tree;
+};
+
 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "tmpfs file system");
 
 static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED;
@@ -87,6 +93,10 @@ SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory
     &tmpfs_pages_reserved, 0, sysctl_mem_reserved, "L",
     "Amount of available memory and swap below which tmpfs growth stops");
 
+static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a,
+    struct tmpfs_dirent *b);
+RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp);
+
 size_t
 tmpfs_mem_avail(void)
 {
@@ -188,7 +198,8 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp
                break;
 
        case VDIR:
-               TAILQ_INIT(&nnode->tn_dir.tn_dirhead);
+               RB_INIT(&nnode->tn_dir.tn_dirhead);
+               LIST_INIT(&nnode->tn_dir.tn_dupindex);
                MPASS(parent != nnode);
                MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL));
                nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent;
@@ -309,6 +320,49 @@ tmpfs_free_node(struct tmpfs_mount *tmp,
 
 /* --------------------------------------------------------------------- */
 
+static __inline uint32_t
+tmpfs_dirent_hash(const char *name, u_int len)
+{
+       uint32_t hash;
+
+       hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK;
+#ifdef TMPFS_DEBUG_DIRCOOKIE_DUP
+       hash &= 0xf;
+#endif
+       if (hash < TMPFS_DIRCOOKIE_MIN)
+               hash += TMPFS_DIRCOOKIE_MIN;
+
+       return (hash);
+}
+
+static __inline off_t
+tmpfs_dirent_cookie(struct tmpfs_dirent *de)
+{
+       MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN);
+
+       return (de->td_cookie);
+}
+
+static __inline boolean_t
+tmpfs_dirent_dup(struct tmpfs_dirent *de)
+{
+       return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0);
+}
+
+static __inline boolean_t
+tmpfs_dirent_duphead(struct tmpfs_dirent *de)
+{
+       return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0);
+}
+
+void
+tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen)
+{
+       de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen);
+       memcpy(de->ud.td_name, name, namelen);
+       de->td_namelen = namelen;
+}
+
 /*
  * Allocates a new directory entry for the node node with a name of name.
  * The new directory entry is returned in *de.
@@ -320,17 +374,17 @@ tmpfs_free_node(struct tmpfs_mount *tmp,
  */
 int
 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
-    const char *name, uint16_t len, struct tmpfs_dirent **de)
+    const char *name, u_int len, struct tmpfs_dirent **de)
 {
        struct tmpfs_dirent *nde;
 
-       nde = (struct tmpfs_dirent *)uma_zalloc(
-                                       tmp->tm_dirent_pool, M_WAITOK);
-       nde->td_name = malloc(len, M_TMPFSNAME, M_WAITOK);
-       nde->td_namelen = len;
-       memcpy(nde->td_name, name, len);
-
+       nde = uma_zalloc(tmp->tm_dirent_pool, M_WAITOK);
        nde->td_node = node;
+       if (name != NULL) {
+               nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK);
+               tmpfs_dirent_init(nde, name, len);
+       } else
+               nde->td_namelen = 0;
        if (node != NULL)
                node->tn_links++;
 
@@ -351,20 +405,17 @@ tmpfs_alloc_dirent(struct tmpfs_mount *t
  * directory entry, as it may already have been released from the outside.
  */
 void
-tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de,
-    boolean_t node_exists)
+tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de)
 {
-       if (node_exists) {
-               struct tmpfs_node *node;
+       struct tmpfs_node *node;
 
-               node = de->td_node;
-               if (node != NULL) {
-                       MPASS(node->tn_links > 0);
-                       node->tn_links--;
-               }
+       node = de->td_node;
+       if (node != NULL) {
+               MPASS(node->tn_links > 0);
+               node->tn_links--;
        }
-
-       free(de->td_name, M_TMPFSNAME);
+       if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL)
+               free(de->ud.td_name, M_TMPFSNAME);
        uma_zfree(tmp->tm_dirent_pool, de);
 }
 
@@ -586,7 +637,7 @@ tmpfs_alloc_file(struct vnode *dvp, stru
        /* Allocate a vnode for the new file. */
        error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp);
        if (error != 0) {
-               tmpfs_free_dirent(tmp, de, TRUE);
+               tmpfs_free_dirent(tmp, de);
                tmpfs_free_node(tmp, node);
                goto out;
        }
@@ -605,6 +656,215 @@ out:
 
 /* --------------------------------------------------------------------- */
 
+static struct tmpfs_dirent *
+tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc)
+{
+       struct tmpfs_dirent *de;
+
+       de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead);
+       dc->tdc_tree = de;
+       if (de != NULL && tmpfs_dirent_duphead(de))
+               de = LIST_FIRST(&de->ud.td_duphead);
+       dc->tdc_current = de;
+
+       return (dc->tdc_current);
+}
+
+static struct tmpfs_dirent *
+tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc)
+{
+       struct tmpfs_dirent *de;
+
+       MPASS(dc->tdc_tree != NULL);
+       if (tmpfs_dirent_dup(dc->tdc_current)) {
+               dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries);
+               if (dc->tdc_current != NULL)
+                       return (dc->tdc_current);
+       }
+       dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir,
+           &dnode->tn_dir.tn_dirhead, dc->tdc_tree);
+       if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) {
+               dc->tdc_current = LIST_FIRST(&de->ud.td_duphead);
+               MPASS(dc->tdc_current != NULL);
+       }
+
+       return (dc->tdc_current);
+}
+
+/* Lookup directory entry in RB-Tree. Function may return duphead entry. */
+static struct tmpfs_dirent *
+tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash)
+{
+       struct tmpfs_dirent *de, dekey;
+
+       dekey.td_hash = hash;
+       de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey);
+       return (de);
+}
+
+/* Lookup directory entry by cookie, initialize directory cursor accordingly. 
*/
+static struct tmpfs_dirent *
+tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie,
+    struct tmpfs_dir_cursor *dc)
+{
+       struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead;
+       struct tmpfs_dirent *de, dekey;
+
+       MPASS(cookie >= TMPFS_DIRCOOKIE_MIN);
+
+       if (cookie == node->tn_dir.tn_readdir_lastn &&
+           (de = node->tn_dir.tn_readdir_lastp) != NULL) {
+               /* Protect against possible race, tn_readdir_last[pn]
+                * may be updated with only shared vnode lock held. */
+               if (cookie == tmpfs_dirent_cookie(de))
+                       goto out;
+       }
+
+       if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) {
+               LIST_FOREACH(de, &node->tn_dir.tn_dupindex,
+                   uh.td_dup.index_entries) {
+                       MPASS(tmpfs_dirent_dup(de));
+                       if (de->td_cookie == cookie)
+                               goto out;
+                       /* dupindex list is sorted. */
+                       if (de->td_cookie < cookie) {
+                               de = NULL;
+                               goto out;
+                       }
+               }
+               MPASS(de == NULL);
+               goto out;
+       }
+
+       MPASS((cookie & TMPFS_DIRCOOKIE_MASK) == cookie);
+       dekey.td_hash = cookie;
+       /* Recover if direntry for cookie was removed */
+       de = RB_NFIND(tmpfs_dir, dirhead, &dekey);
+       dc->tdc_tree = de;
+       dc->tdc_current = de;
+       if (de != NULL && tmpfs_dirent_duphead(de)) {
+               dc->tdc_current = LIST_FIRST(&de->ud.td_duphead);
+               MPASS(dc->tdc_current != NULL);
+       }
+       return (dc->tdc_current);
+
+out:
+       dc->tdc_tree = de;
+       dc->tdc_current = de;
+       if (de != NULL && tmpfs_dirent_dup(de))
+               dc->tdc_tree = tmpfs_dir_xlookup_hash(node,
+                   de->td_hash);
+       return (dc->tdc_current);
+}
+
+/*
+ * Looks for a directory entry in the directory represented by node.
+ * 'cnp' describes the name of the entry to look for.  Note that the .
+ * and .. components are not allowed as they do not physically exist
+ * within directories.
+ *
+ * Returns a pointer to the entry when found, otherwise NULL.
+ */
+struct tmpfs_dirent *
+tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f,
+    struct componentname *cnp)
+{
+       struct tmpfs_dir_duphead *duphead;
+       struct tmpfs_dirent *de;
+       uint32_t hash;
+
+       MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
+       MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
+           cnp->cn_nameptr[1] == '.')));
+       TMPFS_VALIDATE_DIR(node);
+
+       hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen);
+       de = tmpfs_dir_xlookup_hash(node, hash);
+       if (de != NULL && tmpfs_dirent_duphead(de)) {
+               duphead = &de->ud.td_duphead;
+               LIST_FOREACH(de, duphead, uh.td_dup.entries) {
+                       if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr,
+                           cnp->cn_namelen))
+                               break;
+               }
+       } else if (de != NULL) {
+               if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr,
+                   cnp->cn_namelen))
+                       de = NULL;
+       }
+       if (de != NULL && f != NULL && de->td_node != f)
+               de = NULL;
+
+       return (de);
+}
+
+/*
+ * Attach duplicate-cookie directory entry nde to dnode and insert to dupindex
+ * list, allocate new cookie value.
+ */
+static void
+tmpfs_dir_attach_dup(struct tmpfs_node *dnode,
+    struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde)
+{
+       struct tmpfs_dir_duphead *dupindex;
+       struct tmpfs_dirent *de, *pde;
+
+       dupindex = &dnode->tn_dir.tn_dupindex;
+       de = LIST_FIRST(dupindex);
+       if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) {
+               if (de == NULL)
+                       nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN;
+               else
+                       nde->td_cookie = de->td_cookie + 1;
+               MPASS(tmpfs_dirent_dup(nde));
+               LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries);
+               LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
+               return;
+       }
+
+       /*
+        * Cookie numbers are near exhaustion. Scan dupindex list for unused
+        * numbers. dupindex list is sorted in descending order. Keep it so
+        * after inserting nde.
+        */
+       while (1) {
+               pde = de;
+               de = LIST_NEXT(de, uh.td_dup.index_entries);
+               if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) {
+                       /*
+                        * Last element of the index doesn't have minimal cookie
+                        * value, use it.
+                        */
+                       nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN;
+                       LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries);
+                       LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
+                       return;
+               } else if (de == NULL) {
+                       /*
+                        * We are so lucky have 2^30 hash duplicates in single
+                        * directory :) Return largest possible cookie value.
+                        * It should be fine except possible issues with
+                        * VOP_READDIR restart.
+                        */
+                       nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX;
+                       LIST_INSERT_HEAD(dupindex, nde,
+                           uh.td_dup.index_entries);
+                       LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
+                       return;
+               }
+               if (de->td_cookie + 1 == pde->td_cookie ||
+                   de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX)
+                       continue;       /* No hole or invalid cookie. */
+               nde->td_cookie = de->td_cookie + 1;
+               MPASS(tmpfs_dirent_dup(nde));
+               MPASS(pde->td_cookie > nde->td_cookie);
+               MPASS(nde->td_cookie > de->td_cookie);
+               LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries);
+               LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
+               return;
+       };
+}
+
 /*
  * Attaches the directory entry de to the directory represented by vp.
  * Note that this does not change the link count of the node pointed by
@@ -614,10 +874,38 @@ void
 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
 {
        struct tmpfs_node *dnode;
+       struct tmpfs_dirent *xde, *nde;
 
        ASSERT_VOP_ELOCKED(vp, __func__);
+       MPASS(de->td_namelen > 0);
+       MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN);
+       MPASS(de->td_cookie == de->td_hash);
+
        dnode = VP_TO_TMPFS_DIR(vp);
-       TAILQ_INSERT_TAIL(&dnode->tn_dir.tn_dirhead, de, td_entries);
+       dnode->tn_dir.tn_readdir_lastn = 0;
+       dnode->tn_dir.tn_readdir_lastp = NULL;
+
+       MPASS(!tmpfs_dirent_dup(de));
+       xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de);
+       if (xde != NULL && tmpfs_dirent_duphead(xde))
+               tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de);
+       else if (xde != NULL) {
+               /*
+                * Allocate new duphead. Swap xde with duphead to avoid
+                * adding/removing elements with the same hash.
+                */
+               MPASS(!tmpfs_dirent_dup(xde));
+               tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0,
+                   &nde);
+               /* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */
+               memcpy(nde, xde, sizeof(*xde));
+               xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD;
+               LIST_INIT(&xde->ud.td_duphead);
+               xde->td_namelen = 0;
+               xde->td_node = NULL;
+               tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde);
+               tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de);
+       }
        dnode->tn_size += sizeof(struct tmpfs_dirent);
        dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
            TMPFS_NODE_MODIFIED;
@@ -633,58 +921,61 @@ tmpfs_dir_attach(struct vnode *vp, struc
 void
 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
 {
+       struct tmpfs_mount *tmp;
+       struct tmpfs_dir *head;
        struct tmpfs_node *dnode;
+       struct tmpfs_dirent *xde;
 
        ASSERT_VOP_ELOCKED(vp, __func__);
-       dnode = VP_TO_TMPFS_DIR(vp);
 
-       if (dnode->tn_dir.tn_readdir_lastp == de) {
-               dnode->tn_dir.tn_readdir_lastn = 0;
-               dnode->tn_dir.tn_readdir_lastp = NULL;
-       }
+       dnode = VP_TO_TMPFS_DIR(vp);
+       head = &dnode->tn_dir.tn_dirhead;
+       dnode->tn_dir.tn_readdir_lastn = 0;
+       dnode->tn_dir.tn_readdir_lastp = NULL;
+
+       if (tmpfs_dirent_dup(de)) {
+               /* Remove duphead if de was last entry. */
+               if (LIST_NEXT(de, uh.td_dup.entries) == NULL) {
+                       xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash);
+                       MPASS(tmpfs_dirent_duphead(xde));
+               } else
+                       xde = NULL;
+               LIST_REMOVE(de, uh.td_dup.entries);
+               LIST_REMOVE(de, uh.td_dup.index_entries);
+               if (xde != NULL) {
+                       if (LIST_EMPTY(&xde->ud.td_duphead)) {
+                               RB_REMOVE(tmpfs_dir, head, xde);
+                               tmp = VFS_TO_TMPFS(vp->v_mount);
+                               MPASS(xde->td_node == NULL);
+                               tmpfs_free_dirent(tmp, xde);
+                       }
+               }
+       } else
+               RB_REMOVE(tmpfs_dir, head, de);
 
-       TAILQ_REMOVE(&dnode->tn_dir.tn_dirhead, de, td_entries);
        dnode->tn_size -= sizeof(struct tmpfs_dirent);
        dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
            TMPFS_NODE_MODIFIED;
 }
 
-/* --------------------------------------------------------------------- */
-
-/*
- * Looks for a directory entry in the directory represented by node.
- * 'cnp' describes the name of the entry to look for.  Note that the .
- * and .. components are not allowed as they do not physically exist
- * within directories.
- *
- * Returns a pointer to the entry when found, otherwise NULL.
- */
-struct tmpfs_dirent *
-tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f,
-    struct componentname *cnp)
+void
+tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode)
 {
-       boolean_t found;
-       struct tmpfs_dirent *de;
-
-       MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
-       MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
-           cnp->cn_nameptr[1] == '.')));
-       TMPFS_VALIDATE_DIR(node);
+       struct tmpfs_dirent *de, *dde, *nde;
 
-       found = 0;
-       TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) {
-               if (f != NULL && de->td_node != f)
-                   continue;
-               MPASS(cnp->cn_namelen < 0xffff);
-               if (de->td_namelen == (uint16_t)cnp->cn_namelen &&
-                   bcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) {
-                       found = 1;
-                       break;
+       RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) {
+               RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de);
+               /* Node may already be destroyed. */
+               de->td_node = NULL;
+               if (tmpfs_dirent_duphead(de)) {
+                       while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) {
+                               LIST_REMOVE(dde, uh.td_dup.entries);
+                               dde->td_node = NULL;
+                               tmpfs_free_dirent(tmp, dde);
+                       }
                }
+               tmpfs_free_dirent(tmp, de);
        }
-       node->tn_status |= TMPFS_NODE_ACCESSED;
-
-       return found ? de : NULL;
 }
 
 /* --------------------------------------------------------------------- */
@@ -696,7 +987,7 @@ tmpfs_dir_lookup(struct tmpfs_node *node
  * hold the directory entry or an appropriate error code if another
  * error happens.
  */
-int
+static int
 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
 {
        int error;
@@ -713,12 +1004,9 @@ tmpfs_dir_getdotdent(struct tmpfs_node *
        dent.d_reclen = GENERIC_DIRSIZ(&dent);
 
        if (dent.d_reclen > uio->uio_resid)
-               error = -1;
-       else {
+               error = EJUSTRETURN;
+       else
                error = uiomove(&dent, dent.d_reclen, uio);
-               if (error == 0)
-                       uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
-       }
 
        node->tn_status |= TMPFS_NODE_ACCESSED;
 
@@ -734,7 +1022,7 @@ tmpfs_dir_getdotdent(struct tmpfs_node *
  * hold the directory entry or an appropriate error code if another
  * error happens.
  */
-int
+static int
 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
 {
        int error;
@@ -763,19 +1051,9 @@ tmpfs_dir_getdotdotdent(struct tmpfs_nod
        dent.d_reclen = GENERIC_DIRSIZ(&dent);
 
        if (dent.d_reclen > uio->uio_resid)
-               error = -1;
-       else {
+               error = EJUSTRETURN;
+       else
                error = uiomove(&dent, dent.d_reclen, uio);
-               if (error == 0) {
-                       struct tmpfs_dirent *de;
-
-                       de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
-                       if (de == NULL)
-                               uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
-                       else
-                               uio->uio_offset = tmpfs_dircookie(de);
-               }
-       }
 
        node->tn_status |= TMPFS_NODE_ACCESSED;
 
@@ -785,30 +1063,6 @@ tmpfs_dir_getdotdotdent(struct tmpfs_nod
 /* --------------------------------------------------------------------- */
 
 /*
- * Lookup a directory entry by its associated cookie.
- */
-struct tmpfs_dirent *
-tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie)
-{
-       struct tmpfs_dirent *de;
-
-       if (cookie == node->tn_dir.tn_readdir_lastn &&
-           node->tn_dir.tn_readdir_lastp != NULL) {
-               return node->tn_dir.tn_readdir_lastp;
-       }
-
-       TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) {
-               if (tmpfs_dircookie(de) == cookie) {
-                       break;
-               }
-       }
-
-       return de;
-}
-
-/* --------------------------------------------------------------------- */
-
-/*
  * Helper function for tmpfs_readdir.  Returns as much directory entries
  * as can fit in the uio space.  The read starts at uio->uio_offset.
  * The function returns 0 on success, -1 if there was not enough space
@@ -816,27 +1070,47 @@ tmpfs_dir_lookupbycookie(struct tmpfs_no
  * error code if another error happens.
  */
 int
-tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp)
+tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, int cnt,
+    u_long *cookies, int *ncookies)
 {
-       int error;
-       off_t startcookie;
+       struct tmpfs_dir_cursor dc;
        struct tmpfs_dirent *de;
+       off_t off;
+       int error;
 
        TMPFS_VALIDATE_DIR(node);
 
-       /* Locate the first directory entry we have to return.  We have cached
-        * the last readdir in the node, so use those values if appropriate.
-        * Otherwise do a linear scan to find the requested entry. */
-       startcookie = uio->uio_offset;
-       MPASS(startcookie != TMPFS_DIRCOOKIE_DOT);
-       MPASS(startcookie != TMPFS_DIRCOOKIE_DOTDOT);
-       if (startcookie == TMPFS_DIRCOOKIE_EOF) {
-               return 0;
-       } else {
-               de = tmpfs_dir_lookupbycookie(node, startcookie);
-       }
-       if (de == NULL) {
-               return EINVAL;
+       off = 0;
+       switch (uio->uio_offset) {
+       case TMPFS_DIRCOOKIE_DOT:
+               error = tmpfs_dir_getdotdent(node, uio);
+               if (error != 0)
+                       return (error);
+               uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
+               if (cnt != 0)
+                       cookies[(*ncookies)++] = off = uio->uio_offset;
+       case TMPFS_DIRCOOKIE_DOTDOT:
+               error = tmpfs_dir_getdotdotdent(node, uio);
+               if (error != 0)
+                       return (error);
+               de = tmpfs_dir_first(node, &dc);
+               if (de == NULL)
+                       uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
+               else
+                       uio->uio_offset = tmpfs_dirent_cookie(de);
+               if (cnt != 0)
+                       cookies[(*ncookies)++] = off = uio->uio_offset;
+               if (de == NULL)
+                       return (0);
+               break;
+       case TMPFS_DIRCOOKIE_EOF:
+               return (0);
+       default:
+               de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc);
+               if (de == NULL)
+                       return (EINVAL);
+               if (cnt != 0)
+                       off = tmpfs_dirent_cookie(de);
        }
 
        /* Read as much entries as possible; i.e., until we reach the end of
@@ -887,14 +1161,14 @@ tmpfs_dir_getdents(struct tmpfs_node *no
                }
                d.d_namlen = de->td_namelen;
                MPASS(de->td_namelen < sizeof(d.d_name));
-               (void)memcpy(d.d_name, de->td_name, de->td_namelen);
+               (void)memcpy(d.d_name, de->ud.td_name, de->td_namelen);
                d.d_name[de->td_namelen] = '\0';
                d.d_reclen = GENERIC_DIRSIZ(&d);
 
                /* Stop reading if the directory entry we are treating is
                 * bigger than the amount of data that can be returned. */
                if (d.d_reclen > uio->uio_resid) {
-                       error = -1;
+                       error = EJUSTRETURN;
                        break;
                }
 
@@ -902,21 +1176,30 @@ tmpfs_dir_getdents(struct tmpfs_node *no
                 * advance pointers. */
                error = uiomove(&d, d.d_reclen, uio);
                if (error == 0) {
-                       (*cntp)++;
-                       de = TAILQ_NEXT(de, td_entries);
+                       de = tmpfs_dir_next(node, &dc);
+                       if (cnt != 0) {
+                               if (de == NULL)
+                                       off = TMPFS_DIRCOOKIE_EOF;
+                               else
+                                       off = tmpfs_dirent_cookie(de);
+                               MPASS(*ncookies < cnt);
+                               cookies[(*ncookies)++] = off;
+                       }
                }
        } while (error == 0 && uio->uio_resid > 0 && de != NULL);
 
        /* Update the offset and cache. */
-       if (de == NULL) {
-               uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
-               node->tn_dir.tn_readdir_lastn = 0;
-               node->tn_dir.tn_readdir_lastp = NULL;
-       } else {
-               node->tn_dir.tn_readdir_lastn = uio->uio_offset = 
tmpfs_dircookie(de);
-               node->tn_dir.tn_readdir_lastp = de;
+       if (cnt == 0) {
+               if (de == NULL)
+                       off = TMPFS_DIRCOOKIE_EOF;
+               else
+                       off = tmpfs_dirent_cookie(de);
        }
 
+       uio->uio_offset = off;
+       node->tn_dir.tn_readdir_lastn = off;
+       node->tn_dir.tn_readdir_lastp = de;
+
        node->tn_status |= TMPFS_NODE_ACCESSED;
        return error;
 }
@@ -943,7 +1226,7 @@ tmpfs_dir_whiteout_remove(struct vnode *
        de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
        MPASS(de != NULL && de->td_node == NULL);
        tmpfs_dir_detach(dvp, de);
-       tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de, TRUE);
+       tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
 }
 
 /* --------------------------------------------------------------------- */
@@ -1436,3 +1719,15 @@ out:
 
        return error;
 }
+
+static __inline int
+tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b)
+{
+       if (a->td_hash > b->td_hash)

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to