Module Name:    src
Committed By:   dholland
Date:           Sun Aug  7 02:31:03 UTC 2016

Modified Files:
        src/sys/ufs/lfs: lfs_balloc.c

Log Message:
comments


To generate a diff of this commit:
cvs rdiff -u -r1.89 -r1.90 src/sys/ufs/lfs/lfs_balloc.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/ufs/lfs/lfs_balloc.c
diff -u src/sys/ufs/lfs/lfs_balloc.c:1.89 src/sys/ufs/lfs/lfs_balloc.c:1.90
--- src/sys/ufs/lfs/lfs_balloc.c:1.89	Sun Aug  7 00:25:22 2016
+++ src/sys/ufs/lfs/lfs_balloc.c	Sun Aug  7 02:31:03 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_balloc.c,v 1.89 2016/08/07 00:25:22 dholland Exp $	*/
+/*	$NetBSD: lfs_balloc.c,v 1.90 2016/08/07 02:31:03 dholland Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -60,7 +60,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.89 2016/08/07 00:25:22 dholland Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.90 2016/08/07 02:31:03 dholland Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_quota.h"
@@ -97,17 +97,23 @@ static int lfs_fragextend(struct vnode *
 u_int64_t locked_fakequeue_count;
 
 /*
- * Allocate a block, and to inode and filesystem block accounting for it
- * and for any indirect blocks the may need to be created in order for
- * this block to be created.
- *
- * Blocks which have never been accounted for (i.e., which "do not exist")
- * have disk address 0, which is translated by ulfs_bmap to the special value
- * UNASSIGNED == -1, as in the historical ULFS.
- *
- * Blocks which have been accounted for but which have not yet been written
- * to disk are given the new special disk address UNWRITTEN == -2, so that
- * they can be differentiated from completely new blocks.
+ * Allocate a block, and do inode and filesystem block accounting for
+ * it and for any indirect blocks that may need to be created in order
+ * to handle this block.
+ *
+ * Blocks which have never been accounted for (i.e., which "do not
+ * exist") have disk address 0, which is translated by ulfs_bmap to
+ * the special value UNASSIGNED == -1, as in historical FFS-related
+ * code.
+ *
+ * Blocks which have been accounted for but which have not yet been
+ * written to disk are given the new special disk address UNWRITTEN ==
+ * -2, so that they can be differentiated from completely new blocks.
+ *
+ * Note: it seems that bpp is passed as NULL for blocks that are file
+ * pages that will be handled by UVM and not the buffer cache.
+ *
+ * XXX: locking?
  */
 /* VOP_BWRITE ULFS_NIADDR+2 times */
 int
@@ -126,12 +132,27 @@ lfs_balloc(struct vnode *vp, off_t start
 
 	ip = VTOI(vp);
 	fs = ip->i_lfs;
+
+	/* Declare to humans that we might have the seglock here */
+	ASSERT_MAYBE_SEGLOCK(fs);
+
+
+	/* offset within block */
 	offset = lfs_blkoff(fs, startoffset);
+
+	/* This is usually but not always exactly the block size */
 	KASSERT(iosize <= lfs_sb_getbsize(fs));
+
+	/* block number (within file) */
 	lbn = lfs_lblkno(fs, startoffset);
+
+	/*
+	 * This checks for whether pending stuff needs to be flushed
+	 * out and potentially waits. It's been disabled since UBC
+	 * support was added to LFS in 2003. -- dholland 20160806
+	 */
 	/* (void)lfs_check(vp, lbn, 0); */
 
-	ASSERT_MAYBE_SEGLOCK(fs);
 
 	/*
 	 * Three cases: it's a block beyond the end of file, it's a block in
@@ -152,19 +173,29 @@ lfs_balloc(struct vnode *vp, off_t start
 	if (bpp)
 		*bpp = NULL;
 
-	/* Check for block beyond end of file and fragment extension needed. */
+	/* Last block number in file */
 	lastblock = lfs_lblkno(fs, ip->i_size);
+
 	if (lastblock < ULFS_NDADDR && lastblock < lbn) {
+		/*
+		 * The file is small enough to have fragments, and we're
+		 * allocating past EOF.
+		 *
+		 * If the last block was a fragment we need to rewrite it
+		 * as a full block.
+		 */
 		osize = lfs_blksize(fs, ip, lastblock);
 		if (osize < lfs_sb_getbsize(fs) && osize > 0) {
 			if ((error = lfs_fragextend(vp, osize, lfs_sb_getbsize(fs),
 						    lastblock,
 						    (bpp ? &bp : NULL), cred)))
 				return (error);
+			/* Update the file size with what we just did (only) */
 			ip->i_size = (lastblock + 1) * lfs_sb_getbsize(fs);
 			lfs_dino_setsize(fs, ip->i_din, ip->i_size);
 			uvm_vnp_setsize(vp, ip->i_size);
 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+			/* if we got a buffer for this, write it out now */
 			if (bpp)
 				(void) VOP_BWRITE(bp->b_vp, bp);
 		}
@@ -192,12 +223,24 @@ lfs_balloc(struct vnode *vp, off_t start
 				if (flags & B_CLRBUF)
 					clrbuf(bp);
 			}
+
+			/*
+			 * Update the effective block count (this count
+			 * includes blocks that don't have an on-disk
+			 * presence or location yet)
+			 */
 			ip->i_lfs_effnblks += frags;
+
+			/* account for the space we're taking */
 			mutex_enter(&lfs_lock);
 			lfs_sb_subbfree(fs, frags);
 			mutex_exit(&lfs_lock);
+
+			/* update the inode */
 			lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN);
 		} else {
+			/* extending a block that already has fragments */
+
 			if (nsize <= osize) {
 				/* No need to extend */
 				if (bpp && (error = bread(vp, lbn, osize,
@@ -216,6 +259,10 @@ lfs_balloc(struct vnode *vp, off_t start
 		return 0;
 	}
 
+	/*
+	 * Look up what's already here.
+	 */
+
 	error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL, NULL);
 	if (error)
 		return (error);
@@ -227,54 +274,99 @@ lfs_balloc(struct vnode *vp, off_t start
 	 * we start assigning blocks.
 	 */
 	frags = fs->um_seqinc;
-	bcount = 0;
+	bcount = 0; /* number of frags we need */
 	if (daddr == UNASSIGNED) {
+		/* no block yet, going to need a whole block */
 		bcount = frags;
 	}
 	for (i = 1; i < num; ++i) {
 		if (!indirs[i].in_exists) {
+			/* need an indirect block at this level */
 			bcount += frags;
 		}
 	}
 	if (ISSPACE(fs, bcount, cred)) {
+		/* update the superblock's free block count */
 		mutex_enter(&lfs_lock);
 		lfs_sb_subbfree(fs, bcount);
 		mutex_exit(&lfs_lock);
+		/* update the file's effective block count */
 		ip->i_lfs_effnblks += bcount;
 	} else {
+		/* whoops, no can do */
 		return ENOSPC;
 	}
 
 	if (daddr == UNASSIGNED) {
+		/*
+		 * There is nothing here yet.
+		 */
+
+		/*
+		 * If there's no indirect block in the inode, change it
+		 * to UNWRITTEN to indicate that it exists but doesn't
+		 * have an on-disk address yet.
+		 *
+		 * (Question: where's the block data initialized?)
+		 */
 		if (num > 0 && lfs_dino_getib(fs, ip->i_din, indirs[0].in_off) == 0) {
 			lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN);
 		}
 
 		/*
-		 * Create new indirect blocks if necessary
+		 * If we need more layers of indirect blocks, create what
+		 * we need.
 		 */
 		if (num > 1) {
+			/*
+			 * The outermost indirect block address is the one
+			 * in the inode, so fetch that.
+			 */
 			idaddr = lfs_dino_getib(fs, ip->i_din, indirs[0].in_off);
+			/*
+			 * For each layer of indirection...
+			 */
 			for (i = 1; i < num; ++i) {
+				/*
+				 * Get a buffer for the indirect block data.
+				 *
+				 * (XXX: the logic here seems twisted. What's
+				 * wrong with testing in_exists first and then
+				 * doing either bread or getblk to get a
+				 * buffer?)
+				 */
 				ibp = getblk(vp, indirs[i].in_lbn,
 				    lfs_sb_getbsize(fs), 0,0);
 				if (!indirs[i].in_exists) {
+					/*
+					 * There isn't actually a block here,
+					 * so clear the buffer data and mark
+					 * the address of the block as
+					 * UNWRITTEN.
+					 */
 					clrbuf(ibp);
 					ibp->b_blkno = UNWRITTEN;
 				} else if (!(ibp->b_oflags & (BO_DELWRI | BO_DONE))) {
+					/*
+					 * Otherwise read it in.
+					 */
 					ibp->b_blkno = LFS_FSBTODB(fs, idaddr);
 					ibp->b_flags |= B_READ;
 					VOP_STRATEGY(vp, ibp);
 					biowait(ibp);
 				}
+
 				/*
-				 * This block exists, but the next one may not.
-				 * If that is the case mark it UNWRITTEN to keep
+				 * Now this indirect block exists, but
+				 * the next one down may not yet. If
+				 * so, set it to UNWRITTEN. This keeps
 				 * the accounting straight.
 				 */
 				if (lfs_iblock_get(fs, ibp->b_data, indirs[i].in_off) == 0)
 					lfs_iblock_set(fs, ibp->b_data, indirs[i].in_off,
 						UNWRITTEN);
+
+				/* get the block for the next iteration */
 				idaddr = lfs_iblock_get(fs, ibp->b_data, indirs[i].in_off);
 #ifdef DEBUG
 				if (vp == fs->lfs_ivnode) {
@@ -283,6 +375,17 @@ lfs_balloc(struct vnode *vp, off_t start
 						ibp->b_flags, curproc->p_pid);
 				}
 #endif
+				/*
+				 * Write out the updated indirect block. Note
+				 * that this writes it out even if we didn't
+				 * modify it - ultimately because the final
+				 * block didn't exist we'll need to write a
+				 * new version of all the blocks that lead to
+				 * it. Hopefully all that gets in before any
+				 * actual disk I/O so we don't end up writing
+				 * any of them twice... this is currently not
+				 * very clear.
+				 */
 				if ((error = VOP_BWRITE(ibp->b_vp, ibp)))
 					return error;
 			}
@@ -307,7 +410,7 @@ lfs_balloc(struct vnode *vp, off_t start
 	 * in which case we need to do accounting.
 	 *
 	 * We can tell a truly new block because ulfs_bmaparray will say
-	 * it is UNASSIGNED.  Once we allocate it we will assign it the
+	 * it is UNASSIGNED. Once we allocate it we will assign it the
 	 * disk address UNWRITTEN.
 	 */
 	if (daddr == UNASSIGNED) {
@@ -321,12 +424,26 @@ lfs_balloc(struct vnode *vp, off_t start
 
 		switch (num) {
 		    case 0:
+			/* direct block - update the inode */
 			lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN);
 			break;
 		    case 1:
+			/*
+			 * using a single indirect block - update the inode
+			 *
+			 * XXX: is this right? We already set this block
+			 * pointer above. I think we want to be writing *in*
+			 * the single indirect block and this case shouldn't
+			 * exist. (just case 0 and default)
+			 *    -- dholland 20160806
+			 */
 			lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN);
 			break;
 		    default:
+			/*
+			 * using multiple indirect blocks - update the
+			 * innermost one
+			 */
 			idp = &indirs[num - 1];
 			if (bread(vp, idp->in_lbn, lfs_sb_getbsize(fs),
 				  B_MODIFY, &ibp))
@@ -365,6 +482,11 @@ lfs_balloc(struct vnode *vp, off_t start
 	return (0);
 }
 
+/*
+ * Extend a file that uses fragments with more fragments.
+ *
+ * XXX: locking?
+ */
 /* VOP_BWRITE 1 time */
 static int
 lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn,
@@ -374,21 +496,34 @@ lfs_fragextend(struct vnode *vp, int osi
 	struct lfs *fs;
 	long frags;
 	int error;
-	extern long locked_queue_bytes;
 	size_t obufsize;
 
+	/* XXX move this to a header file */
+	/* (XXX: except it's not clear what purpose it serves) */
+	extern long locked_queue_bytes;
+
+	/*
+	 * XXX: is there some reason we know more about the seglock
+	 * state here than at the top of lfs_balloc?
+	 */
+	ASSERT_NO_SEGLOCK(fs);
+
 	ip = VTOI(vp);
 	fs = ip->i_lfs;
+
+	/* number of frags we're adding */
 	frags = (long)lfs_numfrags(fs, nsize - osize);
-	error = 0;
 
-	ASSERT_NO_SEGLOCK(fs);
+	error = 0;
 
 	/*
 	 * Get the seglock so we don't enlarge blocks while a segment
 	 * is being written.  If we're called with bpp==NULL, though,
 	 * we are only pretending to change a buffer, so we don't have to
 	 * lock.
+	 *
+	 * XXX: the above comment is lying, as fs->lfs_fraglock is not
+	 * the segment lock.
 	 */
     top:
 	if (bpp) {
@@ -396,6 +531,7 @@ lfs_fragextend(struct vnode *vp, int osi
 		LFS_DEBUG_COUNTLOCKED("frag");
 	}
 
+	/* check if we actually have enough frags available */
 	if (!ISSPACE(fs, frags, cred)) {
 		error = ENOSPC;
 		goto out;
@@ -438,10 +574,13 @@ lfs_fragextend(struct vnode *vp, int osi
 		lfs_sb_subavail(fs, frags);
 	}
 
+	/* decrease the free block count in the superblock */
 	mutex_enter(&lfs_lock);
 	lfs_sb_subbfree(fs, frags);
 	mutex_exit(&lfs_lock);
+	/* increase the file's effective block count */
 	ip->i_lfs_effnblks += frags;
+	/* mark the inode dirty */
 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
 
 	if (bpp) {
@@ -456,6 +595,7 @@ lfs_fragextend(struct vnode *vp, int osi
 			mutex_exit(&lfs_lock);
 		}
 
+		/* zero the new space */
 		memset((char *)((*bpp)->b_data) + osize, 0, (u_int)(nsize - osize));
 	}
 

Reply via email to