Module Name: src Committed By: riastradh Date: Fri Apr 21 18:25:49 UTC 2023
Modified Files: src/sys/dev/dkwedge: dk.c Log Message: dk(4): Prevent races in access to struct dkwedge_softc::sc_size. Rules: 1. Only ever increases, never decreases. (Decreases require removing and readding the wedge.) 2. Increases are serialized by dk_openlock. 3. Reads can happen unlocked in any context where the softc is valid. Access is gathered into dkwedge_size* subroutines -- don't touch sc_size outside these. For now, we use rwlock(9) to keep the reasoning simple. This should be done with atomics on 64-bit platforms and a seqlock on 32-bit platforms to avoid contention. However, we can do that in a later change. To generate a diff of this commit: cvs rdiff -u -r1.134 -r1.135 src/sys/dev/dkwedge/dk.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/dkwedge/dk.c diff -u src/sys/dev/dkwedge/dk.c:1.134 src/sys/dev/dkwedge/dk.c:1.135 --- src/sys/dev/dkwedge/dk.c:1.134 Fri Apr 21 18:25:30 2023 +++ src/sys/dev/dkwedge/dk.c Fri Apr 21 18:25:49 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: dk.c,v 1.134 2023/04/21 18:25:30 riastradh Exp $ */ +/* $NetBSD: dk.c,v 1.135 2023/04/21 18:25:49 riastradh Exp $ */ /*- * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.134 2023/04/21 18:25:30 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.135 2023/04/21 18:25:49 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_dkwedge.h" @@ -79,6 +79,7 @@ struct dkwedge_softc { struct disk *sc_parent; /* parent disk */ daddr_t sc_offset; /* LBA offset of wedge in parent */ + krwlock_t sc_sizelock; uint64_t sc_size; /* size of wedge in blocks */ char sc_ptype[32]; /* partition type */ dev_t sc_pdev; /* cached parent's dev_t */ @@ -289,6 +290,47 @@ out: rw_exit(&dkwedges_lock); } static void +dkwedge_size_init(struct dkwedge_softc *sc, uint64_t size) +{ + + rw_init(&sc->sc_sizelock); + sc->sc_size = size; +} + +static void +dkwedge_size_fini(struct dkwedge_softc *sc) +{ + + rw_destroy(&sc->sc_sizelock); +} + +static uint64_t +dkwedge_size(struct dkwedge_softc *sc) +{ + uint64_t size; + + rw_enter(&sc->sc_sizelock, RW_READER); + size = sc->sc_size; + rw_exit(&sc->sc_sizelock); + + return size; +} + +static void +dkwedge_size_increase(struct dkwedge_softc *sc, uint64_t size) +{ + + KASSERT(mutex_owned(&sc->sc_dk.dk_openlock)); + + rw_enter(&sc->sc_sizelock, RW_WRITER); + KASSERTMSG(size >= sc->sc_size, + "decreasing dkwedge size from %"PRIu64" to %"PRIu64, + sc->sc_size, size); + sc->sc_size = size; + rw_exit(&sc->sc_sizelock); +} + +static void dk_set_geometry(struct dkwedge_softc *sc, struct disk *pdk) { struct disk *dk = &sc->sc_dk; @@ -296,7 +338,7 @@ dk_set_geometry(struct dkwedge_softc *sc memset(dg, 0, sizeof(*dg)); - dg->dg_secperunit = sc->sc_size; + dg->dg_secperunit = dkwedge_size(sc); dg->dg_secsize = DEV_BSIZE << pdk->dk_blkshift; /* fake numbers, 1 cylinder is 1 MB with default sector size */ @@ -351,11 +393,11 @@ dkwedge_add(struct dkwedge_info *dkw) break; if (strcmp(lsc->sc_ptype, dkw->dkw_ptype) != 0) break; - if (lsc->sc_size > dkw->dkw_size) + if (dkwedge_size(lsc) > dkw->dkw_size) break; sc = lsc; - sc->sc_size = dkw->dkw_size; + dkwedge_size_increase(sc, dkw->dkw_size); dk_set_geometry(sc, pdk); break; @@ -370,7 +412,7 @@ dkwedge_add(struct dkwedge_info *dkw) sc->sc_parent = pdk; sc->sc_pdev = pdev; sc->sc_offset = dkw->dkw_offset; - sc->sc_size = dkw->dkw_size; + dkwedge_size_init(sc, dkw->dkw_size); memcpy(sc->sc_wname, dkw->dkw_wname, sizeof(sc->sc_wname)); sc->sc_wname[sizeof(sc->sc_wname) - 1] = '\0'; @@ -396,8 +438,11 @@ dkwedge_add(struct dkwedge_info *dkw) else { /* Check for wedge overlap. */ LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) { - daddr_t lastblk = sc->sc_offset + sc->sc_size - 1; - daddr_t llastblk = lsc->sc_offset + lsc->sc_size - 1; + /* XXX arithmetic overflow */ + uint64_t size = dkwedge_size(sc); + uint64_t lsize = dkwedge_size(lsc); + daddr_t lastblk = sc->sc_offset + size - 1; + daddr_t llastblk = lsc->sc_offset + lsize - 1; if (sc->sc_offset >= lsc->sc_offset && sc->sc_offset <= llastblk) { @@ -412,7 +457,7 @@ dkwedge_add(struct dkwedge_info *dkw) } if (lsc != NULL) { if (sc->sc_offset == lsc->sc_offset && - sc->sc_size == lsc->sc_size && + dkwedge_size(sc) == dkwedge_size(lsc) && strcmp(sc->sc_wname, lsc->sc_wname) == 0) error = EEXIST; else @@ -427,6 +472,7 @@ dkwedge_add(struct dkwedge_info *dkw) cv_destroy(&sc->sc_dkdrn); mutex_destroy(&sc->sc_iolock); bufq_free(sc->sc_bufq); + dkwedge_size_fini(sc); free(sc, M_DKWEDGE); return error; } @@ -484,6 +530,7 @@ dkwedge_add(struct dkwedge_info *dkw) cv_destroy(&sc->sc_dkdrn); mutex_destroy(&sc->sc_iolock); bufq_free(sc->sc_bufq); + dkwedge_size_fini(sc); free(sc, M_DKWEDGE); return error; } @@ -512,6 +559,7 @@ dkwedge_add(struct dkwedge_info *dkw) cv_destroy(&sc->sc_dkdrn); mutex_destroy(&sc->sc_iolock); bufq_free(sc->sc_bufq); + dkwedge_size_fini(sc); free(sc, M_DKWEDGE); return ENOMEM; } @@ -534,7 +582,7 @@ announce: "%s at %s: \"%s\", %"PRIu64" blocks at %"PRId64", type: %s\n", device_xname(sc->sc_dev), pdk->dk_name, sc->sc_wname, /* XXX Unicode */ - sc->sc_size, sc->sc_offset, + dkwedge_size(sc), sc->sc_offset, sc->sc_ptype[0] == '\0' ? "<unknown>" : sc->sc_ptype); /* Return the devname to the caller. */ @@ -706,6 +754,7 @@ dkwedge_detach(device_t self, int flags) mutex_destroy(&sc->sc_iolock); cv_destroy(&sc->sc_dkdrn); + dkwedge_size_fini(sc); free(sc, M_DKWEDGE); @@ -797,7 +846,7 @@ dkwedge_list(struct disk *pdk, struct dk strlcpy(dkw.dkw_parent, sc->sc_parent->dk_name, sizeof(dkw.dkw_parent)); dkw.dkw_offset = sc->sc_offset; - dkw.dkw_size = sc->sc_size; + dkw.dkw_size = dkwedge_size(sc); strlcpy(dkw.dkw_ptype, sc->sc_ptype, sizeof(dkw.dkw_ptype)); error = uiomove(&dkw, sizeof(dkw), &uio); @@ -1341,7 +1390,7 @@ dkstrategy(struct buf *bp) goto done; p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift; - p_size = sc->sc_size << sc->sc_parent->dk_blkshift; + p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift; /* Make sure it's in-range. */ if (bounds_check_with_mediasize(bp, DEV_BSIZE, p_size) <= 0) @@ -1596,7 +1645,7 @@ dkioctl(dev_t dev, u_long cmd, void *dat strlcpy(dkw->dkw_parent, sc->sc_parent->dk_name, sizeof(dkw->dkw_parent)); dkw->dkw_offset = sc->sc_offset; - dkw->dkw_size = sc->sc_size; + dkw->dkw_size = dkwedge_size(sc); strlcpy(dkw->dkw_ptype, sc->sc_ptype, sizeof(dkw->dkw_ptype)); break; @@ -1634,6 +1683,7 @@ static int dkdiscard(dev_t dev, off_t pos, off_t len) { struct dkwedge_softc *sc = dkwedge_lookup(dev); + uint64_t size = dkwedge_size(sc); unsigned shift; off_t offset, maxlen; int error; @@ -1645,14 +1695,15 @@ dkdiscard(dev_t dev, off_t pos, off_t le if (sc->sc_parent->dk_rawvp == NULL) return ENXIO; + /* XXX check bounds on size/offset up front */ shift = (sc->sc_parent->dk_blkshift + DEV_BSHIFT); - KASSERT(__type_fit(off_t, sc->sc_size)); + KASSERT(__type_fit(off_t, size)); KASSERT(__type_fit(off_t, sc->sc_offset)); KASSERT(0 <= sc->sc_offset); - KASSERT(sc->sc_size <= (__type_max(off_t) >> shift)); - KASSERT(sc->sc_offset <= ((__type_max(off_t) >> shift) - sc->sc_size)); + KASSERT(size <= (__type_max(off_t) >> shift)); + KASSERT(sc->sc_offset <= ((__type_max(off_t) >> shift) - size)); offset = ((off_t)sc->sc_offset << shift); - maxlen = ((off_t)sc->sc_size << shift); + maxlen = ((off_t)size << shift); if (len > maxlen) return EINVAL; @@ -1691,7 +1742,7 @@ dksize(dev_t dev) /* Our content type is static, no need to open the device. */ - p_size = sc->sc_size << sc->sc_parent->dk_blkshift; + p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift; if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) == 0) { /* Saturate if we are larger than INT_MAX. */ if (p_size > INT_MAX) @@ -1741,7 +1792,7 @@ dkdump(dev_t dev, daddr_t blkno, void *v } p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift; - p_size = sc->sc_size << sc->sc_parent->dk_blkshift; + p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift; if (blkno < 0 || blkno + size/DEV_BSIZE > p_size) { printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > " @@ -1784,7 +1835,7 @@ dkwedge_find_partition(device_t parent, continue; if (strcmp(sc->sc_parent->dk_name, device_xname(parent)) == 0 && sc->sc_offset == startblk && - sc->sc_size == nblks) { + dkwedge_size(sc) == nblks) { if (wedge) { printf("WARNING: double match for boot wedge " "(%s, %s)\n",