This patch adds support for incompat_64bit on ext4 filesystem. This feature is enabled by default on new filesystems on Ubuntu and probably other distros
From 101769076e44208c2fac45aceddd4a46c33eaefb Mon Sep 17 00:00:00 2001 From: Vladimir Serbinenko <phco...@gmail.com> Date: Mon, 21 Aug 2023 20:55:35 +0200 Subject: [PATCH 1/2] ext2fs: Support INCOMPAT_64BIT feature
--- sys/ufs/ext2fs/ext2fs.h | 27 ++++++++---- sys/ufs/ext2fs/ext2fs_alloc.c | 75 ++++++++++++++++++++------------- sys/ufs/ext2fs/ext2fs_vfsops.c | 76 ++++++++++++++++++++++++++++++---- 3 files changed, 132 insertions(+), 46 deletions(-) diff --git a/sys/ufs/ext2fs/ext2fs.h b/sys/ufs/ext2fs/ext2fs.h index 320192b03a8..ae814b76798 100644 --- a/sys/ufs/ext2fs/ext2fs.h +++ b/sys/ufs/ext2fs/ext2fs.h @@ -252,6 +252,7 @@ struct m_ext2fs { int32_t e2fs_ngdb; /* number of group descriptor blocks */ int32_t e2fs_ipb; /* number of inodes per block */ int32_t e2fs_itpg; /* number of inode table blocks per group */ + u_int8_t e2fs_group_desc_shift; /* binary log group desc size */ struct ext2_gd *e2fs_gd; /* group descriptors (data not byteswapped) */ }; @@ -370,7 +371,8 @@ struct m_ext2fs { | EXT2F_ROCOMPAT_GDT_CSUM) #define EXT2F_INCOMPAT_SUPP (EXT2F_INCOMPAT_FTYPE \ | EXT2F_INCOMPAT_EXTENTS \ - | EXT2F_INCOMPAT_FLEX_BG) + | EXT2F_INCOMPAT_FLEX_BG \ + | EXT2F_INCOMPAT_64BIT) /* * Feature set definitions @@ -432,10 +434,14 @@ struct ext2_gd { uint16_t ext2bgd_itable_unused_lo; /* Low unused inode offset */ uint16_t ext2bgd_checksum; /* Group desc checksum */ - /* - * XXX disk32 Further fields only exist if 64BIT feature is on - * and superblock desc_size > 32, not supported for now. - */ + u_int32_t ext2bgd_b_bitmap_hi; /* blocks bitmap block (high bits) */ + u_int32_t ext2bgd_i_bitmap_hi; /* inodes bitmap block (high bits) */ + u_int32_t ext2bgd_i_tables_hi; /* inodes table block (high bits) */ + u_int16_t ext2bgd_nbfree_hi; /* number of free blocks (high bits) */ + u_int16_t ext2bgd_nifree_hi; /* number of free inodes (high bits) */ + u_int16_t ext2bgd_ndirs_hi; /* number of directories (high bits) */ + u_int16_t reserved_hi; + u_int32_t reserved2_hi[3]; }; #define E2FS_BG_INODE_UNINIT 0x0001 /* Inode bitmap not used/initialized */ @@ -492,15 +498,18 @@ void e2fs_sb_bswap(struct ext2fs *, struct ext2fs *); # define e2fs_sbsave(old, new) e2fs_sb_bswap((old), (new)) #endif -/* Group descriptors are not byte swapped */ -#define e2fs_cgload(old, new, size) memcpy((new), (old), (size)) -#define e2fs_cgsave(old, new, size) memcpy((new), (old), (size)) +void e2fs_cgload(const char *ondisk, struct ext2_gd *inmemory, + int shift_cg_entry_size, int cg_size); +void e2fs_cgsave(const struct ext2_gd *inmemory, char *ondisk, + int shift_cg_entry_size, int cg_size); /* * Turn file system block numbers into disk block addresses. * This maps file system blocks to device size blocks. */ #define EXT2_FSBTODB(fs, b) ((b) << (fs)->e2fs_fsbtodb) +#define EXT2_FSBTODB64(fs, b, b_hi) (((((u_int64_t)(b_hi)) << 32) | (b)) << (fs)->e2fs_fsbtodb) +#define EXT2_FSBTODB64OFF(fs, b, b_hi, off) ((((((u_int64_t)(b_hi)) << 32) | (b)) + (off)) << (fs)->e2fs_fsbtodb) #define EXT2_DBTOFSB(fs, b) ((b) >> (fs)->e2fs_fsbtodb) /* @@ -512,6 +521,8 @@ void e2fs_sb_bswap(struct ext2fs *, struct ext2fs *); #define ino_to_cg(fs, x) (((x) - 1) / (fs)->e2fs.e2fs_ipg) #define ino_to_fsba(fs, x) \ (fs2h32((fs)->e2fs_gd[ino_to_cg((fs), (x))].ext2bgd_i_tables) + \ + (((u_int64_t)fs2h32((fs)->e2fs_gd[ino_to_cg((fs), (x))].ext2bgd_i_tables_hi)) \ + << 32) + \ (((x) - 1) % (fs)->e2fs.e2fs_ipg) / (fs)->e2fs_ipb) #define ino_to_fsbo(fs, x) (((x) - 1) % (fs)->e2fs_ipb) diff --git a/sys/ufs/ext2fs/ext2fs_alloc.c b/sys/ufs/ext2fs/ext2fs_alloc.c index a130242833a..b78ba827641 100644 --- a/sys/ufs/ext2fs/ext2fs_alloc.c +++ b/sys/ufs/ext2fs/ext2fs_alloc.c @@ -91,7 +91,7 @@ static u_long ext2fs_hashalloc(struct inode *, int, long, int, static daddr_t ext2fs_nodealloccg(struct inode *, int, daddr_t, int); static daddr_t ext2fs_mapsearch(struct m_ext2fs *, char *, daddr_t); static __inline void ext2fs_cg_update(struct m_ext2fs *, int, struct ext2_gd *, int, int, int, daddr_t); -static uint16_t ext2fs_cg_get_csum(struct m_ext2fs *, int, struct ext2_gd *); +static uint16_t ext2fs_cg_get_csum(struct m_ext2fs *, int, struct ext2_gd *, size_t); static void ext2fs_init_bb(struct m_ext2fs *, int, struct ext2_gd *, char *); /* @@ -212,13 +212,19 @@ ext2fs_dirpref(struct m_ext2fs *fs) avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg; maxspace = 0; mincg = -1; - for (cg = 0; cg < fs->e2fs_ncg; cg++) - if (fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) >= avgifree) { - if (mincg == -1 || fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree) > maxspace) { + for (cg = 0; cg < fs->e2fs_ncg; cg++) { + u_int32_t nifree = (fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree_hi) << 16) + | fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree); + if (nifree >= avgifree) { + u_int32_t nbfree + = (fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree_hi) << 16) + | fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree); + if (mincg == -1 || nbfree > maxspace) { mincg = cg; - maxspace = fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree); + maxspace = nbfree; } } + } return mincg; } @@ -333,14 +339,14 @@ ext2fs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) struct m_ext2fs *fs; char *bbp; struct buf *bp; - /* XXX ondisk32 */ int error, bno, start, end, loc; fs = ip->i_e2fs; - if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) + if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0 && fs->e2fs_gd[cg].ext2bgd_nbfree_hi == 0) return 0; - error = bread(ip->i_devvp, EXT2_FSBTODB(fs, - fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap)), + error = bread(ip->i_devvp, EXT2_FSBTODB64(fs, + fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap), + fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)), (int)fs->e2fs_bsize, B_MODIFY, &bp); if (error) { return 0; @@ -437,10 +443,11 @@ ext2fs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) if (ipref == -1) ipref = 0; fs = ip->i_e2fs; - if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) + if (fs->e2fs_gd[cg].ext2bgd_nifree == 0 || fs->e2fs_gd[cg].ext2bgd_nifree_hi == 0) return 0; - error = bread(ip->i_devvp, EXT2_FSBTODB(fs, - fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap)), + error = bread(ip->i_devvp, EXT2_FSBTODB64(fs, + fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap), + fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)), (int)fs->e2fs_bsize, B_MODIFY, &bp); if (error) { return 0; @@ -519,7 +526,7 @@ ext2fs_blkfree(struct inode *ip, daddr_t bno) return; } error = bread(ip->i_devvp, - EXT2_FSBTODB(fs, fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap)), + EXT2_FSBTODB64(fs, fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap), fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)), (int)fs->e2fs_bsize, B_MODIFY, &bp); if (error) { return; @@ -566,7 +573,7 @@ ext2fs_vfree(struct vnode *pvp, ino_t ino, int mode) KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)) == 0); error = bread(pip->i_devvp, - EXT2_FSBTODB(fs, fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap)), + EXT2_FSBTODB64(fs, fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap), fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)), (int)fs->e2fs_bsize, B_MODIFY, &bp); if (error) { return 0; @@ -647,9 +654,11 @@ ext2fs_fserr(struct m_ext2fs *fs, u_int uid, const char *cp) static __inline void ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, int nifree, int ndirs, daddr_t ioff) { - /* XXX disk32 */ if (nifree) { - gd->ext2bgd_nifree = h2fs16(fs2h16(gd->ext2bgd_nifree) + nifree); + u_int32_t ext2bgd_nifree = fs2h16(gd->ext2bgd_nifree) | (fs2h16(gd->ext2bgd_nifree_hi) << 16); + ext2bgd_nifree += nifree; + gd->ext2bgd_nifree = h2fs16(ext2bgd_nifree); + gd->ext2bgd_nifree_hi = h2fs16(ext2bgd_nifree >> 16); /* * If we allocated inode on bigger offset than what was * ever used before, bump the itable_unused count. This @@ -662,18 +671,27 @@ ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, in gd->ext2bgd_itable_unused_lo = h2fs16(fs->e2fs.e2fs_ipg - (ioff + 1)); } - KASSERT(!E2FS_HAS_GD_CSUM(fs) || gd->ext2bgd_itable_unused_lo <= gd->ext2bgd_nifree); + KASSERT(!E2FS_HAS_GD_CSUM(fs) || gd->ext2bgd_itable_unused_lo <= ext2bgd_nifree); } - if (nbfree) - gd->ext2bgd_nbfree = h2fs16(fs2h16(gd->ext2bgd_nbfree) + nbfree); + if (nbfree) { + u_int32_t ext2bgd_nbfree = fs2h16(gd->ext2bgd_nbfree) | (fs2h16(gd->ext2bgd_nbfree_hi) << 16); + ext2bgd_nbfree += nbfree; + gd->ext2bgd_nbfree = h2fs16(ext2bgd_nbfree); + gd->ext2bgd_nbfree_hi = h2fs16(ext2bgd_nbfree >> 16); + + } - if (ndirs) - gd->ext2bgd_ndirs = h2fs16(fs2h16(gd->ext2bgd_ndirs) + ndirs); + if (ndirs) { + u_int32_t ext2bgd_ndirs = fs2h16(gd->ext2bgd_ndirs) | (fs2h16(gd->ext2bgd_ndirs_hi) << 16); + ext2bgd_ndirs += ndirs; + gd->ext2bgd_ndirs = h2fs16(ext2bgd_ndirs); + gd->ext2bgd_ndirs_hi = h2fs16(ext2bgd_ndirs >> 16); + } if (E2FS_HAS_GD_CSUM(fs)) - gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd); + gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd, 1 << fs->e2fs_group_desc_shift); } /* @@ -681,7 +699,7 @@ ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, in * Returned as LE (disk encoding). */ static uint16_t -ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd) +ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, size_t cgsize) { uint16_t crc; uint32_t cg_bswapped = h2fs32((uint32_t)cg); @@ -695,7 +713,7 @@ ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd) crc = crc16(~0, (uint8_t *)fs->e2fs.e2fs_uuid, sizeof(fs->e2fs.e2fs_uuid)); crc = crc16(crc, (uint8_t *)&cg_bswapped, sizeof(cg_bswapped)); crc = crc16(crc, (uint8_t *)gd, off); - /* XXX ondisk32 */ + crc = crc16(crc, (uint8_t *)gd + off + 2, cgsize - (off + 2)); return h2fs16(crc); } @@ -723,7 +741,6 @@ ext2fs_init_bb(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, char *bbp) int ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ronly) { - /* XXX disk32 */ struct ext2_gd *gd; ino_t ioff; size_t boff; @@ -737,7 +754,7 @@ ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ro gd = &fs->e2fs_gd[cg]; /* Verify checksum */ - if (gd->ext2bgd_checksum != ext2fs_cg_get_csum(fs, cg, gd)) { + if (gd->ext2bgd_checksum != ext2fs_cg_get_csum(fs, cg, gd, 1 << fs->e2fs_group_desc_shift)) { printf("ext2fs_cg_verify_and_initialize: group %d invalid csum\n", cg); return EINVAL; } @@ -761,7 +778,7 @@ ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ro if (boff) { /* partial wipe, must read old data */ error = bread(devvp, - EXT2_FSBTODB(fs, fs2h32(gd->ext2bgd_i_tables) + i), + EXT2_FSBTODB64OFF(fs, fs2h32(gd->ext2bgd_i_tables), fs2h32(gd->ext2bgd_i_tables_hi), i), (int)fs->e2fs_bsize, B_MODIFY, &bp); if (error) { printf("ext2fs_cg_verify_and_initialize: can't read itable block"); @@ -775,7 +792,7 @@ ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ro * assumes nothing else is changing the data. */ bp = getblk(devvp, - EXT2_FSBTODB(fs, fs2h32(gd->ext2bgd_i_tables) + i), + EXT2_FSBTODB64OFF(fs, fs2h32(gd->ext2bgd_i_tables), fs2h32(gd->ext2bgd_i_tables_hi), i), (int)fs->e2fs_bsize, 0, 0); clrbuf(bp); } @@ -784,7 +801,7 @@ ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ro } gd->ext2bgd_flags |= h2fs16(E2FS_BG_INODE_ZEROED); - gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd); + gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd, 1 << fs->e2fs_group_desc_shift); fs->e2fs_fmod = 1; } diff --git a/sys/ufs/ext2fs/ext2fs_vfsops.c b/sys/ufs/ext2fs/ext2fs_vfsops.c index c7fb6a2346b..21e851e5fdb 100644 --- a/sys/ufs/ext2fs/ext2fs_vfsops.c +++ b/sys/ufs/ext2fs/ext2fs_vfsops.c @@ -597,9 +597,9 @@ ext2fs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l) if (error) { return error; } - e2fs_cgload((struct ext2_gd *)bp->b_data, + e2fs_cgload(bp->b_data, &fs->e2fs_gd[i * fs->e2fs_bsize / sizeof(struct ext2_gd)], - fs->e2fs_bsize); + fs->e2fs_bsize, 1 << fs->e2fs_group_desc_shift); brelse(bp, 0); } @@ -705,7 +705,8 @@ ext2fs_mountfs(struct vnode *devvp, struct mount *mp) } /* XXX: should be added in ext2fs_sbfill()? */ - m_fs->e2fs_gd = kmem_alloc(m_fs->e2fs_ngdb * m_fs->e2fs_bsize, KM_SLEEP); + m_fs->e2fs_gd = kmem_alloc(m_fs->e2fs_ngdb * (m_fs->e2fs_bsize >> m_fs->e2fs_group_desc_shift) + * sizeof(struct ext2_gd), KM_SLEEP); for (i = 0; i < m_fs->e2fs_ngdb; i++) { error = bread(devvp, EXT2_FSBTODB(m_fs, m_fs->e2fs.e2fs_first_dblock + @@ -716,10 +717,9 @@ ext2fs_mountfs(struct vnode *devvp, struct mount *mp) m_fs->e2fs_ngdb * m_fs->e2fs_bsize); goto out; } - e2fs_cgload((struct ext2_gd *)bp->b_data, - &m_fs->e2fs_gd[ - i * m_fs->e2fs_bsize / sizeof(struct ext2_gd)], - m_fs->e2fs_bsize); + e2fs_cgload(bp->b_data, + &m_fs->e2fs_gd[i * (m_fs->e2fs_bsize >> m_fs->e2fs_group_desc_shift)], + m_fs->e2fs_bsize, m_fs->e2fs_group_desc_shift); brelse(bp, 0); bp = NULL; } @@ -1277,7 +1277,7 @@ ext2fs_cgupdate(struct ufsmount *mp, int waitfor) 1 /* superblock */ + i), fs->e2fs_bsize, 0, 0); e2fs_cgsave(&fs->e2fs_gd[ i * fs->e2fs_bsize / sizeof(struct ext2_gd)], - (struct ext2_gd *)bp->b_data, fs->e2fs_bsize); + bp->b_data, fs->e2fs_bsize, fs->e2fs_group_desc_shift); if (waitfor == MNT_WAIT) error = bwrite(bp); else @@ -1348,7 +1348,17 @@ ext2fs_sbfill(struct m_ext2fs *m_fs, int ronly) m_fs->e2fs_qbmask = m_fs->e2fs_bsize - 1; m_fs->e2fs_bmask = ~m_fs->e2fs_qbmask; - if ((u32 = m_fs->e2fs_bsize / sizeof(struct ext2_gd)) == 0) { + if (!(fs->e2fs_features_incompat & EXT2F_INCOMPAT_64BIT) || + (fs->e2fs_rev == E2FS_REV0)) + m_fs->e2fs_group_desc_shift = 5; + else { + for (m_fs->e2fs_group_desc_shift = 0; + (1 << m_fs->e2fs_group_desc_shift) + < fs->e3fs_desc_size; + m_fs->e2fs_group_desc_shift++); + } + + if ((u32 = (m_fs->e2fs_bsize >> m_fs->e2fs_group_desc_shift)) == 0) { /* Unlikely to happen */ printf("ext2fs: invalid block size\n"); return EINVAL; @@ -1401,3 +1411,51 @@ ext2fs_sbfill(struct m_ext2fs *m_fs, int ronly) return 0; } + +void e2fs_cgload(const char *ondisk, struct ext2_gd *inmemory, + int cg_size, int shift_cg_entry_size) +{ + const char *iptr = ondisk; + struct ext2_gd *optr = inmemory; + if (shift_cg_entry_size > 6) { + int i; + for (i=0; i < (cg_size >> shift_cg_entry_size); i++, optr++, + iptr += (1 << shift_cg_entry_size)) { + memcpy(optr, iptr, sizeof(struct ext2_gd)); + } + } else if (shift_cg_entry_size == 6) { + memcpy(inmemory, ondisk, cg_size); + } else { + int i; + for (i=0; i < (cg_size >> shift_cg_entry_size); i++, optr++, + iptr += (1 << shift_cg_entry_size)) { + memcpy(optr, iptr, 32); + memset(optr + 32, 0, sizeof(struct ext2_gd) - 32); + } + } +} + +void e2fs_cgsave(const struct ext2_gd *inmemory, char *ondisk, + int cg_size, int shift_cg_entry_size) +{ + const struct ext2_gd *iptr = inmemory; + char *optr = ondisk; + if (shift_cg_entry_size > 6) { + int i; + for (i=0; i < (cg_size >> shift_cg_entry_size); i++, iptr++, + optr += (1 << shift_cg_entry_size)) { + memcpy(optr, iptr, sizeof(struct ext2_gd)); + memset(optr + sizeof(struct ext2_gd), 0, + (1 << shift_cg_entry_size) + - sizeof(struct ext2_gd)); + } + } else if (shift_cg_entry_size == 6) { + memcpy(ondisk, inmemory, cg_size); + } else { + int i; + for (i=0; i < (cg_size >> shift_cg_entry_size); i++, iptr++, + optr += (1 << shift_cg_entry_size)) { + memcpy(optr, iptr, 32); + } + } +} -- 2.39.2