Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package drbd for openSUSE:Factory checked in at 2021-06-19 23:04:14 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/drbd (Old) and /work/SRC/openSUSE:Factory/.drbd.new.2625 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "drbd" Sat Jun 19 23:04:14 2021 rev:93 rq:900786 version:9.0.29~1+git.cc622880 Changes: -------- --- /work/SRC/openSUSE:Factory/drbd/drbd.changes 2021-05-07 16:46:08.004215053 +0200 +++ /work/SRC/openSUSE:Factory/.drbd.new.2625/drbd.changes 2021-06-19 23:04:57.691849105 +0200 @@ -1,0 +2,21 @@ +Fri Jun 11 15:28:19 UTC 2021 - nick wang <[email protected]> + +- Correct the real version 9.0.29~1 +- Disable _servicedata to avoid verbose commits log + +------------------------------------------------------------------- +Fri Jun 04 13:27:15 UTC 2021 - [email protected] + +- Update to version 9.1.2 + * drbd: fix termination of verify with stop sector + * drbd: Fix locking for the drbd_devices idr + * drbd: use DEFINE_MUTEX insteadm of mutex_init() + * drbd: remove device_to_minor() + * drbd: fix race condition resetting resync_next_bit + * build: fix make; make clean; make; with pre-packaged compat.patch + * compat: fix compat implementation of CRYPTO_TFM_NEED_KEY + * drbd: remove non-existant argument from kerneldoc + * drbd: kernel-doc and related fixes from upstream + * drbd: Avoid comma separated statements + +------------------------------------------------------------------- Old: ---- drbd-9.0.29~0+git.9a7bc817.tar.bz2 New: ---- _servicedata drbd-9.0.29~1+git.cc622880.tar.bz2 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ drbd.spec ++++++ --- /var/tmp/diff_new_pack.mOqrRV/_old 2021-06-19 23:04:58.239849951 +0200 +++ /var/tmp/diff_new_pack.mOqrRV/_new 2021-06-19 23:04:58.243849957 +0200 @@ -24,7 +24,7 @@ %endif %endif Name: drbd -Version: 9.0.29~0+git.9a7bc817 +Version: 9.0.29~1+git.cc622880 Release: 0 Summary: Linux driver for the "Distributed Replicated Block Device" License: GPL-2.0-or-later ++++++ _service ++++++ --- /var/tmp/diff_new_pack.mOqrRV/_old 2021-06-19 23:04:58.275850006 +0200 +++ /var/tmp/diff_new_pack.mOqrRV/_new 2021-06-19 23:04:58.279850012 +0200 @@ -3,16 +3,9 @@ <param name="url">[email protected]:LINBIT/drbd-9.0.git</param> <param name="scm">git</param> <param name="filename">drbd</param> -<!-- - To update to a new release, change "revision" to the desired - git commit hash and bump "version" if necessary - - This will download branch first instead of tag. - <param name="revision">drbd-9.0</param> - <param name="version">9.0.29~0</param> ---> - <param name="versionformat">9.0.29~0+git.%h</param> + <param name="versionformat">9.0.29~1+git.%h</param> <param name="revision">drbd-9.0</param> + <param name="changesgenerate">disable</param> </service> <service name="recompress" mode="disabled"> ++++++ _servicedata ++++++ <servicedata> <service name="tar_scm"> <param name="url">[email protected]:LINBIT/drbd-9.0.git</param> <param name="changesrevision">cc6228800d630a19f2bf37af41bab566011286c0</param></service></servicedata> ++++++ drbd-9.0.29~0+git.9a7bc817.tar.bz2 -> drbd-9.0.29~1+git.cc622880.tar.bz2 ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/drbd-9.0.29~0+git.9a7bc817/drbd/Makefile new/drbd-9.0.29~1+git.cc622880/drbd/Makefile --- old/drbd-9.0.29~0+git.9a7bc817/drbd/Makefile 2021-05-06 09:44:10.000000000 +0200 +++ new/drbd-9.0.29~1+git.cc622880/drbd/Makefile 2021-06-09 11:03:53.000000000 +0200 @@ -153,6 +153,7 @@ rm -f dummy-for-compat-h.c rm -f drbd-kernel-compat/*.[oas] drbd-kernel-compat/.*.cmd rm -f drbd-kernel-compat/compat.patch drbd-kernel-compat/.compat.cocci + rm -f .timestamps_fixed rm -rf .compat_test.* .cache.mk distclean: clean diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd-kernel-compat/cocci/crypto_tfm_need_key__no_present.cocci new/drbd-9.0.29~1+git.cc622880/drbd/drbd-kernel-compat/cocci/crypto_tfm_need_key__no_present.cocci --- old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd-kernel-compat/cocci/crypto_tfm_need_key__no_present.cocci 2021-05-06 09:44:10.000000000 +0200 +++ new/drbd-9.0.29~1+git.cc622880/drbd/drbd-kernel-compat/cocci/crypto_tfm_need_key__no_present.cocci 2021-06-09 11:03:53.000000000 +0200 @@ -2,9 +2,9 @@ identifier h; @@ - return h && (crypto_shash_get_flags(h) & CRYPTO_TFM_NEED_KEY); -+ if (h) { -+ /* HACK: try to set a dummy key. if it succeeds, that's bad: we only want algorithms that don't support keys */ -+ u8 dummy_key[] = {'a'}; -+ return crypto_shash_setkey(h, dummy_key, 1) != -ENOSYS; -+ } ++ /* ++ * On kernels before 4.15, there is no way to check whether or not an algorithm ++ * requires a key. Allow all algorithms, possibly leading to BUGs if they are ++ * used later. ++ */ + return false; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_actlog.c new/drbd-9.0.29~1+git.cc622880/drbd/drbd_actlog.c --- old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_actlog.c 2021-05-06 09:44:10.000000000 +0200 +++ new/drbd-9.0.29~1+git.cc622880/drbd/drbd_actlog.c 2021-06-09 11:03:53.000000000 +0200 @@ -91,7 +91,7 @@ device->md_io.done = 0; device->md_io.error = -ENODEV; - bio = bio_alloc_drbd(GFP_NOIO); + bio = bio_alloc_bioset(GFP_NOIO, 1, &drbd_md_io_bio_set); bio_set_dev(bio, bdev->md_bdev); bio->bi_iter.bi_sector = sector; err = -EIO; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_bitmap.c new/drbd-9.0.29~1+git.cc622880/drbd/drbd_bitmap.c --- old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_bitmap.c 2021-05-06 09:44:10.000000000 +0200 +++ new/drbd-9.0.29~1+git.cc622880/drbd/drbd_bitmap.c 2021-06-09 11:03:53.000000000 +0200 @@ -1123,7 +1123,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local) { - struct bio *bio = bio_alloc_drbd(GFP_NOIO); + struct bio *bio = bio_alloc_bioset(GFP_NOIO, 1, &drbd_md_io_bio_set); struct drbd_device *device = ctx->device; struct drbd_bitmap *b = device->bitmap; struct page *page; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_int.h new/drbd-9.0.29~1+git.cc622880/drbd/drbd_int.h --- old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_int.h 2021-05-06 09:44:10.000000000 +0200 +++ new/drbd-9.0.29~1+git.cc622880/drbd/drbd_int.h 2021-06-09 11:03:53.000000000 +0200 @@ -138,7 +138,7 @@ (typecheck(struct drbd_device*, x) && \ ((x) ? (((x)->magic ^ DRBD_MAGIC) == (long)(x)) : 0)) -extern struct idr drbd_devices; /* RCU, updates: genl_lock() */ +extern struct idr drbd_devices; /* RCU, updates: drbd_devices_lock */ extern struct list_head drbd_resources; /* RCU, updates: resources_mutex */ extern struct mutex resources_mutex; @@ -530,7 +530,6 @@ DESTROY_DISK, /* tell worker to close backing devices and destroy related structures. */ MD_SYNC, /* tell worker to call drbd_md_sync() */ MAKE_NEW_CUR_UUID, /* tell worker to ping peers and eventually write new current uuid */ - MAKE_RESYNC_REQUEST, /* tell worker to send resync requests */ STABLE_RESYNC, /* One peer_device finished the resync stable! */ READ_BALANCE_RR, @@ -1132,6 +1131,7 @@ bool resync_susp_peer[2]; bool resync_susp_dependency[2]; bool resync_susp_other_c[2]; + bool resync_active[2]; enum drbd_repl_state negotiation_result; /* To find disk state after attach */ unsigned int send_cnt; unsigned int recv_cnt; @@ -1151,6 +1151,7 @@ enum drbd_repl_state start_resync_side; enum drbd_repl_state last_repl_state; /* What we received from the peer */ struct timer_list start_resync_timer; + struct drbd_work resync_work; struct timer_list resync_timer; struct drbd_work propagate_uuids_work; @@ -1490,11 +1491,6 @@ peer_device; \ peer_device = __drbd_next_peer_device_ref(&m, peer_device, device)) -static inline unsigned int device_to_minor(struct drbd_device *device) -{ - return device->minor; -} - /* * function declarations *************************/ @@ -1822,8 +1818,6 @@ /* We also need to make sure we get a bio * when we need it for housekeeping purposes */ extern struct bio_set drbd_md_io_bio_set; -/* to allocate from that set */ -extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); /* And a bio_set for cloning */ extern struct bio_set drbd_io_bio_set; @@ -1950,6 +1944,7 @@ extern int w_e_end_csum_rs_req(struct drbd_work *, int); extern int w_e_end_ov_reply(struct drbd_work *, int); extern int w_e_end_ov_req(struct drbd_work *, int); +extern int w_resync_timer(struct drbd_work *, int); extern int w_send_dblock(struct drbd_work *, int); extern int w_send_read_req(struct drbd_work *, int); extern int w_e_reissue(struct drbd_work *, int); @@ -2506,27 +2501,38 @@ return atomic_sub_return(n, &peer_device->unacked_cnt); } +static inline bool repl_is_sync_target(enum drbd_repl_state repl_state) +{ + return repl_state == L_SYNC_TARGET || repl_state == L_PAUSED_SYNC_T; +} + +static inline bool repl_is_sync_source(enum drbd_repl_state repl_state) +{ + return repl_state == L_SYNC_SOURCE || repl_state == L_PAUSED_SYNC_S; +} + +static inline bool repl_is_sync(enum drbd_repl_state repl_state) +{ + return repl_is_sync_source(repl_state) || + repl_is_sync_target(repl_state); +} + static inline bool is_sync_target_state(struct drbd_peer_device *peer_device, enum which_state which) { - enum drbd_repl_state repl_state = peer_device->repl_state[which]; - - return repl_state == L_SYNC_TARGET || repl_state == L_PAUSED_SYNC_T; + return repl_is_sync_target(peer_device->repl_state[which]); } static inline bool is_sync_source_state(struct drbd_peer_device *peer_device, enum which_state which) { - enum drbd_repl_state repl_state = peer_device->repl_state[which]; - - return repl_state == L_SYNC_SOURCE || repl_state == L_PAUSED_SYNC_S; + return repl_is_sync_source(peer_device->repl_state[which]); } static inline bool is_sync_state(struct drbd_peer_device *peer_device, enum which_state which) { - return is_sync_source_state(peer_device, which) || - is_sync_target_state(peer_device, which); + return repl_is_sync(peer_device->repl_state[which]); } static inline bool is_verify_state(struct drbd_peer_device *peer_device, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_interval.c new/drbd-9.0.29~1+git.cc622880/drbd/drbd_interval.c --- old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_interval.c 2021-05-06 09:44:10.000000000 +0200 +++ new/drbd-9.0.29~1+git.cc622880/drbd/drbd_interval.c 2021-06-09 11:03:53.000000000 +0200 @@ -1,7 +1,7 @@ #include "drbd_interval.h" #include "drbd_wrappers.h" -/** +/* * interval_end - return end of @node */ static inline @@ -11,7 +11,7 @@ return this->end; } -/** +/* * update_interval_end - recompute end of @node * * The end of an interval is the highest (start + (size >> 9)) value of this @@ -38,7 +38,7 @@ this->end = end; } -/** +/* * drbd_insert_interval - insert a new interval into a tree */ bool @@ -73,6 +73,7 @@ /** * drbd_contains_interval - check if a tree contains a given interval + * @root: red black tree root * @sector: start sector of @interval * @interval: may be an invalid pointer * @@ -105,7 +106,7 @@ return false; } -/** +/* * drbd_remove_interval - remove an interval from a tree */ void @@ -124,6 +125,7 @@ /** * drbd_find_overlap - search for an interval overlapping with [sector, sector + size) + * @root: red black tree root * @sector: start sector * @size: size, aligned to 512 bytes * diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_main.c new/drbd-9.0.29~1+git.cc622880/drbd/drbd_main.c --- old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_main.c 2021-05-06 09:44:10.000000000 +0200 +++ new/drbd-9.0.29~1+git.cc622880/drbd/drbd_main.c 2021-06-09 11:03:53.000000000 +0200 @@ -134,6 +134,8 @@ */ struct idr drbd_devices; struct list_head drbd_resources; +DEFINE_SPINLOCK(drbd_devices_lock); +DEFINE_MUTEX(resources_mutex); struct kmem_cache *drbd_request_cache; struct kmem_cache *drbd_ee_cache; /* peer requests */ @@ -152,14 +154,6 @@ .release = drbd_release, }; -struct bio *bio_alloc_drbd(gfp_t gfp_mask) -{ - if (!bioset_initialized(&drbd_md_io_bio_set)) - return bio_alloc(gfp_mask, 1); - - return bio_alloc_bioset(gfp_mask, 1, &drbd_md_io_bio_set); -} - #ifdef __CHECKER__ /* When checking with sparse, and this is an inline function, sparse will give tons of false positives. When this is a real functions sparse works. @@ -748,7 +742,7 @@ } #ifdef CONFIG_SMP -/** +/* * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs * * Forces all threads of a resource onto the same CPU. This is beneficial for @@ -786,7 +780,6 @@ /** * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread - * @device: DRBD device. * @thi: drbd_thread object * * call in the "main loop" of _all_ threads, no need for any mutex, current won't die @@ -846,7 +839,7 @@ return device_stable; } -/** +/* * drbd_header_size - size of a packet header * * The header size is a multiple of 8, so any payload following the header is @@ -1915,7 +1908,7 @@ return len; } -/** +/* * send_bitmap_rle_or_plain * * Return 0 when done, 1 when another iteration is needed, and a negative error @@ -3617,6 +3610,9 @@ } timer_setup(&peer_device->start_resync_timer, start_resync_timer_fn, 0); + + INIT_LIST_HEAD(&peer_device->resync_work.list); + peer_device->resync_work.cb = w_resync_timer; timer_setup(&peer_device->resync_timer, resync_timer_fn, 0); INIT_LIST_HEAD(&peer_device->propagate_uuids_work.list); @@ -3775,7 +3771,9 @@ locked = true; spin_lock_irq(&resource->req_lock); + spin_lock(&drbd_devices_lock); id = idr_alloc(&drbd_devices, device, minor, minor + 1, GFP_NOWAIT); + spin_unlock(&drbd_devices_lock); if (id < 0) { if (id == -ENOSPC) err = ERR_MINOR_OR_VOLUME_EXISTS; @@ -3849,7 +3847,9 @@ kref_debug_put(&device->kref_debug, 1); out_idr_remove_minor: + spin_lock(&drbd_devices_lock); idr_remove(&drbd_devices, minor); + spin_unlock(&drbd_devices_lock); kref_debug_put(&device->kref_debug, 1); out_no_minor_idr: if (locked) @@ -3897,7 +3897,9 @@ idr_remove(&connection->peer_devices, device->vnr); } idr_remove(&resource->devices, device->vnr); - idr_remove(&drbd_devices, device_to_minor(device)); + spin_lock(&drbd_devices_lock); + idr_remove(&drbd_devices, device->minor); + spin_unlock(&drbd_devices_lock); spin_unlock_irq(&resource->req_lock); for_each_peer_device(peer_device, device) @@ -4021,7 +4023,6 @@ drbd_proc = NULL; /* play safe for drbd_cleanup */ idr_init(&drbd_devices); - mutex_init(&resources_mutex); INIT_LIST_HEAD(&drbd_resources); err = drbd_genl_register(); @@ -5556,7 +5557,7 @@ unsigned int ret = ( (drbd_fault_devs == 0 || - ((1 << device_to_minor(device)) & drbd_fault_devs) != 0) && + ((1 << device->minor) & drbd_fault_devs) != 0) && (((_drbd_fault_random(&rrs) % 100) + 1) <= drbd_fault_rate)); if (ret) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_nl.c new/drbd-9.0.29~1+git.cc622880/drbd/drbd_nl.c --- old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_nl.c 2021-05-06 09:44:10.000000000 +0200 +++ new/drbd-9.0.29~1+git.cc622880/drbd/drbd_nl.c 2021-06-09 11:03:53.000000000 +0200 @@ -608,7 +608,7 @@ env_print(&env, "TERM=linux"); env_print(&env, "PATH=/sbin:/usr/sbin:/bin:/usr/bin"); if (device) { - env_print(&env, "DRBD_MINOR=%u", device_to_minor(device)); + env_print(&env, "DRBD_MINOR=%u", device->minor); env_print(&env, "DRBD_VOLUME=%u", device->vnr); if (get_ldev(device)) { struct disk_conf *disk_conf = @@ -1340,12 +1340,13 @@ mutex_lock(&adm_ctx.resource->adm_mutex); if (info->genlhdr->cmd == DRBD_ADM_PRIMARY) { - retcode = drbd_set_role(adm_ctx.resource, R_PRIMARY, parms.assume_uptodate, - adm_ctx.reply_skb); + retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.resource, + R_PRIMARY, parms.assume_uptodate, adm_ctx.reply_skb); if (retcode >= SS_SUCCESS) set_bit(EXPLICIT_PRIMARY, &adm_ctx.resource->flags); } else { - retcode = drbd_set_role(adm_ctx.resource, R_SECONDARY, false, adm_ctx.reply_skb); + retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.resource, + R_SECONDARY, false, adm_ctx.reply_skb); if (retcode >= SS_SUCCESS) clear_bit(EXPLICIT_PRIMARY, &adm_ctx.resource->flags); else @@ -1536,7 +1537,7 @@ ppsize(ppb, size>>1), (unsigned long long)size>>1); } -/** +/* * drbd_determine_dev_size() - Sets the right device size obeying all constraints * @device: DRBD device. * @@ -1871,7 +1872,7 @@ return size; } -/** +/* * drbd_check_al_size() - Ensures that the AL is of the right size * @device: DRBD device. * @@ -3168,7 +3169,7 @@ rv = stable_state_change(resource, change_disk_state(device, D_ATTACHING, CS_VERBOSE | CS_SERIALIZE, NULL)); - retcode = rv; /* FIXME: Type mismatch. */ + retcode = (enum drbd_ret_code)rv; if (rv >= SS_SUCCESS) update_resource_dagtag(resource, nbc); drbd_resume_io(device); @@ -3669,7 +3670,6 @@ * alloc_shash() - Allocate a keyed or unkeyed shash algorithm * @tfm: Destination crypto_shash * @tfm_name: Which algorithm to use - * @err_alg: The error code to return on allocation failure * @type: The functionality that the hash is used for * @must_unkeyed: If set, a check is included which ensures that the algorithm * does not require a key @@ -4763,7 +4763,7 @@ mutex_unlock(&connection->resource->conf_update); } if (rv < SS_SUCCESS) - retcode = rv; /* FIXME: Type mismatch. */ + retcode = (enum drbd_ret_code)rv; else retcode = NO_ERROR; mutex_unlock(&adm_ctx.resource->adm_mutex); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_receiver.c new/drbd-9.0.29~1+git.cc622880/drbd/drbd_receiver.c --- old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_receiver.c 2021-05-06 09:44:10.000000000 +0200 +++ new/drbd-9.0.29~1+git.cc622880/drbd/drbd_receiver.c 2021-06-09 11:03:53.000000000 +0200 @@ -313,8 +313,10 @@ { struct page *tmp; int i = 1; - while ((tmp = page_chain_next(page))) - ++i, page = tmp; + while ((tmp = page_chain_next(page))) { + ++i; + page = tmp; + } if (len) *len = i; return page; @@ -394,7 +396,9 @@ /* In case resync runs faster than anticipated, run the resync_work early */ if (rs_sect_in >= peer_device->rs_in_flight) - drbd_device_post_work(peer_device->device, MAKE_RESYNC_REQUEST); + drbd_queue_work_if_unqueued( + &peer_device->connection->sender_work, + &peer_device->resync_work); } static void reclaim_finished_net_peer_reqs(struct drbd_connection *connection, @@ -430,7 +434,7 @@ /** * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled) - * @device: DRBD device. + * @transport: DRBD transport. * @number: number of pages requested * @gfp_mask: how to allocate and whether to loop until we succeed * @@ -1532,9 +1536,8 @@ return wo; } -/** +/* * drbd_bump_write_ordering() - Fall back to an other write ordering method - * @resource: DRBD resource. * @wo: Write ordering method to try. */ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev, @@ -3629,7 +3632,7 @@ return err; } -/** +/* * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries */ static enum sync_strategy drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local) @@ -3713,7 +3716,7 @@ return rv; } -/** +/* * drbd_asb_recover_1p - Recover after split-brain with one remaining primary */ static enum sync_strategy drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local) @@ -3773,7 +3776,7 @@ return rv; } -/** +/* * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries */ static enum sync_strategy drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local) @@ -7527,7 +7530,7 @@ return 0; } -/** +/* * receive_bitmap_plain * * Return 0 when done, 1 when another iteration is needed, and a negative error @@ -7580,7 +7583,7 @@ return (p->encoding >> 4) & 0x7; } -/** +/* * recv_bm_rle_bits * * Return 0 when done, 1 when another iteration is needed, and a negative error @@ -7649,7 +7652,7 @@ return (s != c->bm_bits); } -/** +/* * decode_bitmap_c * * Return 0 when done, 1 when another iteration is needed, and a negative error @@ -8334,13 +8337,6 @@ * again via drbd_try_clear_on_disk_bm(). */ drbd_rs_cancel_all(peer_device); - if (get_ldev(device)) { - /* Avoid holding a different resync because this one looks like it is - * still active. */ - drbd_rs_controller_reset(peer_device); - put_ldev(device); - } - peer_device->uuids_received = false; if (!drbd_suspended(device)) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_req.c new/drbd-9.0.29~1+git.cc622880/drbd/drbd_req.c --- old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_req.c 2021-05-06 09:44:10.000000000 +0200 +++ new/drbd-9.0.29~1+git.cc622880/drbd/drbd_req.c 2021-06-09 11:03:53.000000000 +0200 @@ -1506,18 +1506,6 @@ wake_up(&device->al_wait); } -static void req_make_private_bio(struct drbd_request *req, struct bio *bio_src) -{ - struct bio *bio; - bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set); - - req->private_bio = bio; - - bio->bi_private = req; - bio->bi_end_io = drbd_request_endio; - bio->bi_next = NULL; -} - static void drbd_req_in_actlog(struct drbd_request *req) { req->local_rq_state |= RQ_IN_ACT_LOG; @@ -1556,8 +1544,11 @@ /* Update disk stats */ req->start_jif = bio_start_io_acct(req->master_bio); - if (get_ldev(device)) - req_make_private_bio(req, bio); + if (get_ldev(device)) { + req->private_bio = bio_clone_fast(bio, GFP_NOIO, &drbd_io_bio_set); + req->private_bio->bi_private = req; + req->private_bio->bi_end_io = drbd_request_endio; + } ktime_get_accounting_assign(req->start_kt, start_kt); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_sender.c new/drbd-9.0.29~1+git.cc622880/drbd/drbd_sender.c --- old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_sender.c 2021-05-06 09:44:10.000000000 +0200 +++ new/drbd-9.0.29~1+git.cc622880/drbd/drbd_sender.c 2021-06-09 11:03:53.000000000 +0200 @@ -31,8 +31,8 @@ void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device); -static int make_ov_request(struct drbd_peer_device *, unsigned int sect_in); -static int make_resync_request(struct drbd_peer_device *, unsigned int sect_in); +static int make_ov_request(struct drbd_peer_device *, int); +static int make_resync_request(struct drbd_peer_device *, int); static bool should_send_barrier(struct drbd_connection *, unsigned int epoch); static void maybe_send_barrier(struct drbd_connection *, unsigned int); static unsigned long get_work_bits(const unsigned long mask, unsigned long *flags); @@ -50,8 +50,6 @@ * */ -struct mutex resources_mutex; - /* used for synchronous meta data and bitmap IO * submitted by drbd_md_sync_page_io() */ @@ -449,67 +447,34 @@ return -EAGAIN; } -static void drbd_device_resync_request(struct drbd_device *device) +int w_resync_timer(struct drbd_work *w, int cancel) { - struct drbd_peer_device *peer_device; - struct drbd_peer_device *peer_device_active = NULL; - struct drbd_peer_device *peer_device_target = NULL; - unsigned int sect_in_target = 0; /* Number of sectors that came in since the last turn for peer to which we are target. */ - bool other_peer_active; - - rcu_read_lock(); - for_each_peer_device_rcu(peer_device, device) { - unsigned int sect_in; /* Number of sectors that came in since the last turn */ - - sect_in = atomic_xchg(&peer_device->rs_sect_in, 0); - peer_device->rs_in_flight -= sect_in; - - if (peer_device->repl_state[NOW] == L_VERIFY_S || peer_device->repl_state[NOW] == L_SYNC_TARGET) { - if (peer_device_target && drbd_ratelimit()) - drbd_warn(device, "%s to peer %d while %s to %d\n", - drbd_repl_str(peer_device_target->repl_state[NOW]), - peer_device_target->connection->peer_node_id, - drbd_repl_str(peer_device->repl_state[NOW]), - peer_device->connection->peer_node_id); - peer_device_target = peer_device; - sect_in_target = sect_in; - } - - if (peer_device->connection->cstate[NOW] == C_CONNECTED && peer_device->rs_in_flight > 0) { - if (peer_device_active && drbd_ratelimit()) - drbd_warn(device, "resync requests in-flight with peer %d and peer %d\n", - peer_device_active->connection->peer_node_id, - peer_device->connection->peer_node_id); - peer_device_active = peer_device; - } - } - - other_peer_active = peer_device_active && peer_device_target != peer_device_active; - if (!peer_device_target || /* Nothing to do. */ - other_peer_active || /* Wait for activity to drain before making requests to other peer. */ - test_bit(SYNC_TARGET_TO_BEHIND, &peer_device_target->flags)) { - rcu_read_unlock(); - return; - } - - kref_get(&peer_device_target->connection->kref); - rcu_read_unlock(); + struct drbd_peer_device *peer_device = + container_of(w, struct drbd_peer_device, resync_work); - mutex_lock(&peer_device_target->resync_next_bit_mutex); - switch (peer_device_target->repl_state[NOW]) { + switch (peer_device->repl_state[NOW]) { case L_VERIFY_S: - make_ov_request(peer_device_target, sect_in_target); + mutex_lock(&peer_device->resync_next_bit_mutex); + make_ov_request(peer_device, cancel); + mutex_unlock(&peer_device->resync_next_bit_mutex); break; case L_SYNC_TARGET: - make_resync_request(peer_device_target, sect_in_target); + mutex_lock(&peer_device->resync_next_bit_mutex); + make_resync_request(peer_device, cancel); + mutex_unlock(&peer_device->resync_next_bit_mutex); break; default: + if (atomic_read(&peer_device->rs_sect_in) >= peer_device->rs_in_flight) { + struct drbd_resource *resource = peer_device->device->resource; + unsigned long irq_flags; + begin_state_change(resource, &irq_flags, 0); + peer_device->resync_active[NEW] = false; + end_state_change(resource, &irq_flags); + } break; } - mutex_unlock(&peer_device_target->resync_next_bit_mutex); - kref_put(&peer_device_target->connection->kref, drbd_destroy_connection); - return; + return 0; } int w_send_uuids(struct drbd_work *w, int cancel) @@ -530,13 +495,9 @@ { struct drbd_peer_device *peer_device = from_timer(peer_device, t, resync_timer); - if (test_bit(SYNC_TARGET_TO_BEHIND, &peer_device->flags)) - return; - - /* Post work for the device regardless of the peer_device to which this - * timer is attached. This may result in some extra runs of the resync - * work, but that is harmless. */ - drbd_device_post_work(peer_device->device, MAKE_RESYNC_REQUEST); + drbd_queue_work_if_unqueued( + &peer_device->connection->sender_work, + &peer_device->resync_work); } static void fifo_set(struct fifo_buffer *fb, int value) @@ -663,12 +624,16 @@ return req_sect; } -static int drbd_rs_number_requests(struct drbd_peer_device *peer_device, unsigned int sect_in) +static int drbd_rs_number_requests(struct drbd_peer_device *peer_device) { struct net_conf *nc; ktime_t duration, now; + unsigned int sect_in; /* Number of sectors that came in since the last turn */ int number, mxb; + sect_in = atomic_xchg(&peer_device->rs_sect_in, 0); + peer_device->rs_in_flight -= sect_in; + now = ktime_get(); duration = ktime_sub(now, peer_device->rs_last_mk_req_kt); peer_device->rs_last_mk_req_kt = now; @@ -737,7 +702,7 @@ return delay; } -static int make_resync_request(struct drbd_peer_device *peer_device, unsigned int sect_in) +static int make_resync_request(struct drbd_peer_device *peer_device, int cancel) { struct drbd_device *device = peer_device->device; struct drbd_transport *transport = &peer_device->connection->transport; @@ -750,6 +715,9 @@ int i; int discard_granularity = 0; + if (unlikely(cancel)) + return 0; + if (peer_device->rs_total == 0) { /* empty resync? */ drbd_resync_finished(peer_device, D_MASK); @@ -781,7 +749,7 @@ } max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; - number = drbd_rs_number_requests(peer_device, sect_in); + number = drbd_rs_number_requests(peer_device); /* don't let rs_sectors_came_in() re-schedule us "early" * just because the first reply came "fast", ... */ peer_device->rs_in_flight += number * BM_SECT_PER_BIT; @@ -920,7 +888,9 @@ /* and in case that raced with the receiver, reschedule ourselves right now */ if (i > 0 && atomic_read(&peer_device->rs_sect_in) >= peer_device->rs_in_flight) { - drbd_device_post_work(device, MAKE_RESYNC_REQUEST); + drbd_queue_work_if_unqueued( + &peer_device->connection->sender_work, + &peer_device->resync_work); } else { mod_timer(&peer_device->resync_timer, jiffies + drbd_resync_delay(peer_device)); } @@ -928,7 +898,7 @@ return 0; } -static int make_ov_request(struct drbd_peer_device *peer_device, unsigned int sect_in) +static int make_ov_request(struct drbd_peer_device *peer_device, int cancel) { struct drbd_device *device = peer_device->device; int number, i, size; @@ -936,7 +906,10 @@ const sector_t capacity = get_capacity(device->vdisk); bool stop_sector_reached = false; - number = drbd_rs_number_requests(peer_device, sect_in); + if (unlikely(cancel)) + return 1; + + number = drbd_rs_number_requests(peer_device); sector = peer_device->ov_position; /* don't let rs_sectors_came_in() re-schedule us "early" @@ -949,7 +922,7 @@ /* We check for "finished" only in the reply path: * w_e_end_ov_reply(). * We need to send at least one request out. */ - stop_sector_reached = i > 0 + stop_sector_reached = sector > peer_device->ov_start_sector && verify_can_do_stop_sector(peer_device) && sector >= peer_device->ov_stop_sector; if (stop_sector_reached) @@ -978,7 +951,9 @@ /* ... and in case that raced with the receiver, * reschedule ourselves right now */ if (i > 0 && atomic_read(&peer_device->rs_sect_in) >= peer_device->rs_in_flight) - drbd_device_post_work(device, MAKE_RESYNC_REQUEST); + drbd_queue_work_if_unqueued( + &peer_device->connection->sender_work, + &peer_device->resync_work); if (i == 0) mod_timer(&peer_device->resync_timer, jiffies + RS_MAKE_REQS_INTV); return 1; @@ -1175,14 +1150,10 @@ int verify_done = 0; bool aborted = false; - if (repl_state[NOW] == L_SYNC_SOURCE || repl_state[NOW] == L_PAUSED_SYNC_S || - repl_state[NOW] == L_SYNC_TARGET || repl_state[NOW] == L_PAUSED_SYNC_T) { + + if (repl_state[NOW] == L_SYNC_SOURCE || repl_state[NOW] == L_PAUSED_SYNC_S) { /* Make sure all queued w_update_peers()/consider_sending_peers_in_sync() - * executed before killing the resync_lru with drbd_rs_del_all(). - * - * Also make sure w_after_state_change has run and sent notifications - * for the new state before potentially calling a usermode helper - * corresponding to the new sync target state. */ + executed before killing the resync_lru with drbd_rs_del_all() */ if (current == device->resource->worker.task) goto queue_on_sender_workq; else @@ -1240,6 +1211,7 @@ of application IO), and against connectivity loss just before we arrive here. */ if (peer_device->repl_state[NOW] <= L_ESTABLISHED) goto out_unlock; + peer_device->resync_active[NEW] = false; __change_repl_state(peer_device, L_ESTABLISHED); aborted = device->disk_state[NOW] == D_OUTDATED && new_peer_disk_state == D_INCONSISTENT; @@ -2005,9 +1977,6 @@ plan->total = 0; fifo_set(plan, 0); rcu_read_unlock(); - - /* Clearing rs_in_flight may release some other resync. */ - drbd_device_post_work(peer_device->device, MAKE_RESYNC_REQUEST); } void start_resync_timer_fn(struct timer_list *t) @@ -2082,18 +2051,6 @@ clear_bit(AHEAD_TO_SYNC_SOURCE, &peer_device->flags); } -static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device) -{ - bool csums_after_crash_only; - rcu_read_lock(); - csums_after_crash_only = rcu_dereference(connection->transport.net_conf)->csums_after_crash_only; - rcu_read_unlock(); - return connection->agreed_pro_version >= 89 && /* supported? */ - connection->csums_tfm && /* configured? */ - (csums_after_crash_only == false /* use for each resync? */ - || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */ -} - /** * drbd_start_resync() - Start the resync process * @side: Either L_SYNC_SOURCE or L_SYNC_TARGET @@ -2203,85 +2160,10 @@ if (repl_state < L_ESTABLISHED) r = SS_UNKNOWN_ERROR; - if (r == SS_SUCCESS) { - if (side == L_SYNC_TARGET) - drbd_set_exposed_data_uuid(device, peer_device->current_uuid); - + if (r == SS_SUCCESS) drbd_pause_after(device); - /* Forget potentially stale cached per resync extent bit-counts. - * Open coded drbd_rs_cancel_all(device), we already have IRQs - * disabled, and know the disk state is ok. */ - spin_lock(&device->al_lock); - lc_reset(peer_device->resync_lru); - peer_device->resync_locked = 0; - peer_device->resync_wenr = LC_FREE; - spin_unlock(&device->al_lock); - } unlock_all_resources(); - - if (r == SS_SUCCESS) { - drbd_info(peer_device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", - drbd_repl_str(repl_state), - (unsigned long) peer_device->rs_total << (BM_BLOCK_SHIFT-10), - (unsigned long) peer_device->rs_total); - if (side == L_SYNC_TARGET) { - peer_device->resync_next_bit = 0; - peer_device->use_csums = use_checksum_based_resync(connection, device); - } else { - peer_device->use_csums = false; - } - - if ((side == L_SYNC_TARGET || side == L_PAUSED_SYNC_T) && - !(peer_device->uuid_flags & UUID_FLAG_STABLE) && - !drbd_stable_sync_source_present(peer_device, NOW)) - set_bit(UNSTABLE_RESYNC, &peer_device->flags); - - /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid - * with w_send_oos, or the sync target will get confused as to - * how much bits to resync. We cannot do that always, because for an - * empty resync and protocol < 95, we need to do it here, as we call - * drbd_resync_finished from here in that case. - * We drbd_gen_and_send_sync_uuid here for protocol < 96, - * and from after_state_ch otherwise. */ - if (side == L_SYNC_SOURCE && connection->agreed_pro_version < 96) - drbd_gen_and_send_sync_uuid(peer_device); - - if (connection->agreed_pro_version < 95 && peer_device->rs_total == 0) { - /* This still has a race (about when exactly the peers - * detect connection loss) that can lead to a full sync - * on next handshake. In 8.3.9 we fixed this with explicit - * resync-finished notifications, but the fix - * introduces a protocol change. Sleeping for some - * time longer than the ping interval + timeout on the - * SyncSource, to give the SyncTarget the chance to - * detect connection loss, then waiting for a ping - * response (implicit in drbd_resync_finished) reduces - * the race considerably, but does not solve it. */ - if (side == L_SYNC_SOURCE) { - struct net_conf *nc; - int timeo; - - rcu_read_lock(); - nc = rcu_dereference(connection->transport.net_conf); - timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9; - rcu_read_unlock(); - schedule_timeout_interruptible(timeo); - } - drbd_resync_finished(peer_device, D_MASK); - } - - /* ns.conn may already be != peer_device->repl_state[NOW], - * we may have been paused in between, or become paused until - * the timer triggers. - * No matter, that is handled in resync_timer_fn() */ - if (repl_state == L_SYNC_TARGET) { - drbd_uuid_resync_starting(peer_device); - drbd_device_post_work(peer_device->device, MAKE_RESYNC_REQUEST); - } - - drbd_md_sync_if_dirty(device); - } put_ldev(device); out: up(&device->resource->state_sem); @@ -2518,8 +2400,6 @@ drbd_ldev_destroy(device); if (test_bit(MAKE_NEW_CUR_UUID, &todo)) make_new_current_uuid(device); - if (test_bit(MAKE_RESYNC_REQUEST, &todo)) - drbd_device_resync_request(device); } static void do_peer_device_work(struct drbd_peer_device *peer_device, const unsigned long todo) @@ -2541,7 +2421,6 @@ |(1UL << DESTROY_DISK) \ |(1UL << MD_SYNC) \ |(1UL << MAKE_NEW_CUR_UUID)\ - |(1UL << MAKE_RESYNC_REQUEST)\ ) #define DRBD_PEER_DEVICE_WORK_MASK \ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_state.c new/drbd-9.0.29~1+git.cc622880/drbd/drbd_state.c --- old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_state.c 2021-05-06 09:44:10.000000000 +0200 +++ new/drbd-9.0.29~1+git.cc622880/drbd/drbd_state.c 2021-06-09 11:03:53.000000000 +0200 @@ -360,6 +360,9 @@ memcpy(peer_device_state_change->resync_susp_other_c, peer_device->resync_susp_other_c, sizeof(peer_device->resync_susp_other_c)); + memcpy(peer_device_state_change->resync_active, + peer_device->resync_active, + sizeof(peer_device->resync_active)); peer_device_state_change++; } device_state_change++; @@ -441,6 +444,7 @@ OLD_TO_NEW(p->resync_susp_peer); OLD_TO_NEW(p->resync_susp_dependency); OLD_TO_NEW(p->resync_susp_other_c); + OLD_TO_NEW(p->resync_active); } #undef OLD_TO_NEW @@ -523,6 +527,8 @@ peer_device->resync_susp_dependency[NEW] || peer_device->resync_susp_other_c[OLD] != peer_device->resync_susp_other_c[NEW] || + peer_device->resync_active[OLD] != + peer_device->resync_active[NEW] || peer_device->uuid_flags & UUID_FLAG_GOT_STABLE) return true; } @@ -564,6 +570,8 @@ peer_device->resync_susp_dependency[NOW]; peer_device->resync_susp_other_c[NEW] = peer_device->resync_susp_other_c[NOW]; + peer_device->resync_active[NEW] = + peer_device->resync_active[NOW]; } } } @@ -772,6 +780,8 @@ peer_device->resync_susp_dependency[NEW]; peer_device->resync_susp_other_c[NOW] = peer_device->resync_susp_other_c[NEW]; + peer_device->resync_active[NOW] = + peer_device->resync_active[NEW]; } device->cached_state_unstable = !state_is_stable(device); device->cached_err_io = @@ -1028,61 +1038,6 @@ resync_susp_comb_dep(peer_device, which); } -static void set_resync_susp_other_c(struct drbd_peer_device *peer_device, bool val, bool start) -{ - struct drbd_device *device = peer_device->device; - struct drbd_peer_device *p; - enum drbd_repl_state r; - - /* When the resync_susp_other_connection flag gets cleared, make sure it gets - cleared first on all connections where we are L_PAUSED_SYNC_T. Clear it on - one L_PAUSED_SYNC_T at a time. Only if we have no connection that is - L_PAUSED_SYNC_T clear it on all L_PAUSED_SYNC_S connections at once. */ - - if (val) { - for_each_peer_device(p, device) { - if (p == peer_device) - continue; - - r = p->repl_state[NEW]; - p->resync_susp_other_c[NEW] = true; - - if (start && p->disk_state[NEW] >= D_INCONSISTENT && r == L_ESTABLISHED) - p->repl_state[NEW] = L_PAUSED_SYNC_T; - - if (r == L_SYNC_SOURCE) - p->repl_state[NEW] = L_PAUSED_SYNC_S; - else if (r == L_SYNC_TARGET) - p->repl_state[NEW] = L_PAUSED_SYNC_T; - } - } else { - for_each_peer_device(p, device) { - if (p == peer_device) - continue; - - r = p->repl_state[NEW]; - if (r == L_PAUSED_SYNC_S) - continue; - - p->resync_susp_other_c[NEW] = false; - if (r == L_PAUSED_SYNC_T && !resync_suspended(p, NEW)) { - p->repl_state[NEW] = L_SYNC_TARGET; - return; - } - } - - for_each_peer_device(p, device) { - if (p == peer_device) - continue; - - p->resync_susp_other_c[NEW] = false; - - if (p->repl_state[NEW] == L_PAUSED_SYNC_S && !resync_suspended(p, NEW)) - p->repl_state[NEW] = L_SYNC_SOURCE; - } - } -} - static int scnprintf_resync_suspend_flags(char *buffer, size_t size, struct drbd_peer_device *peer_device, enum which_state which) @@ -1734,6 +1689,8 @@ * several "soft" transitions to get the resource back to normal. To allow * those, rather than checking if the desired new state is valid, we can only * check if the desired new state is "at least as good" as the current state. + * + * @resource: DRBD resource */ static enum drbd_state_rv is_valid_soft_transition(struct drbd_resource *resource) { @@ -1822,31 +1779,88 @@ return false; } -static void select_best_resync_source(struct drbd_peer_device *candidate_pd) +static void drbd_start_other_targets_paused(struct drbd_peer_device *peer_device) { - struct drbd_device *device = candidate_pd->device; - struct drbd_peer_device *current_pd; - long diff_w, candidate_w, current_w; + struct drbd_device *device = peer_device->device; + struct drbd_peer_device *p; - for_each_peer_device_rcu(current_pd, device) { - if (current_pd == candidate_pd) + for_each_peer_device(p, device) { + if (p == peer_device) continue; - if (current_pd->repl_state[NEW] == L_SYNC_TARGET) - goto found_pd; + + if (p->disk_state[NEW] >= D_INCONSISTENT && p->repl_state[NEW] == L_ESTABLISHED) + p->repl_state[NEW] = L_PAUSED_SYNC_T; } - return; +} + +static bool drbd_is_sync_target_candidate(struct drbd_peer_device *peer_device) +{ + if (!repl_is_sync_target(peer_device->repl_state[NEW])) + return false; + + if (peer_device->resync_susp_dependency[NEW] || + peer_device->resync_susp_peer[NEW] || + peer_device->resync_susp_user[NEW]) + return false; + + if (peer_device->disk_state[NEW] < D_OUTDATED) + return false; + + return true; + +} + +static void drbd_select_sync_target(struct drbd_device *device) +{ + struct drbd_peer_device *peer_device; + struct drbd_peer_device *target_current = NULL; + struct drbd_peer_device *target_active = NULL; + struct drbd_peer_device *target_desired = NULL; + + /* Find current and active resync peers. */ + for_each_peer_device_rcu(peer_device, device) { + if (peer_device->repl_state[OLD] == L_SYNC_TARGET && drbd_is_sync_target_candidate(peer_device)) + target_current = peer_device; + + if (peer_device->resync_active[NEW]) + target_active = peer_device; + } + + /* Choose desired resync peer. */ + for_each_peer_device_rcu(peer_device, device) { + if (!drbd_is_sync_target_candidate(peer_device)) + continue; + + if (target_desired && drbd_bm_total_weight(peer_device) > drbd_bm_total_weight(target_desired)) + continue; -found_pd: - candidate_w = drbd_bm_total_weight(candidate_pd); - current_w = drbd_bm_total_weight(current_pd); - diff_w = candidate_w - current_w; - if (diff_w < -256L) { - /* Only switch resync source if it is at least 1MByte storage - (256 bits) less to resync than from the previous sync source */ - candidate_pd->repl_state[NEW] = L_SYNC_TARGET; - candidate_pd->resync_susp_other_c[NEW] = false; - current_pd->repl_state[NEW] = L_PAUSED_SYNC_T; - current_pd->resync_susp_other_c[NEW] = true; + target_desired = peer_device; + } + + /* Keep current resync target if the alternative has less than 1MiB + * storage (256 bits) less to resync. */ + if (target_current && target_desired && + drbd_bm_total_weight(target_current) < drbd_bm_total_weight(target_desired) + 256UL) + target_desired = target_current; + + /* Do not activate/unpause a resync if some other is still active. */ + if (target_desired && target_active && target_desired != target_active) + target_desired = NULL; + + /* Activate resync (if not already active). */ + if (target_desired) + target_desired->resync_active[NEW] = true; + + /* Make sure that the targets are correctly paused/unpaused. */ + for_each_peer_device_rcu(peer_device, device) { + enum drbd_repl_state *repl_state = peer_device->repl_state; + + peer_device->resync_susp_other_c[NEW] = target_desired && peer_device != target_desired; + + if (!repl_is_sync_target(repl_state[NEW])) + continue; + + peer_device->repl_state[NEW] = peer_device == target_desired ? L_SYNC_TARGET : L_PAUSED_SYNC_T; } } @@ -1954,10 +1968,6 @@ enum drbd_disk_state *peer_disk_state = peer_device->disk_state; struct drbd_connection *connection = peer_device->connection; enum drbd_conn_state *cstate = connection->cstate; - enum drbd_disk_state min_disk_state, max_disk_state; - enum drbd_disk_state min_peer_disk_state, max_peer_disk_state; - enum drbd_role *peer_role = connection->peer_role; - bool uuids_match; if (repl_state[NEW] < L_ESTABLISHED) { peer_device->resync_susp_peer[NEW] = false; @@ -1965,8 +1975,10 @@ peer_disk_state[NEW] < D_INCONSISTENT) peer_disk_state[NEW] = D_UNKNOWN; } - if (repl_state[OLD] >= L_ESTABLISHED && repl_state[NEW] < L_ESTABLISHED) + if (repl_state[OLD] >= L_ESTABLISHED && repl_state[NEW] < L_ESTABLISHED) { lost_connection = true; + peer_device->resync_active[NEW] = false; + } /* Clear the aftr_isp when becoming unconfigured */ if (cstate[NEW] == C_STANDALONE && @@ -1980,6 +1992,7 @@ peer_disk_state[NEW] <= D_FAILED)) { repl_state[NEW] = L_ESTABLISHED; clear_bit(RECONCILIATION_RESYNC, &peer_device->flags); + peer_device->resync_active[NEW] = false; } /* D_CONSISTENT vanish when we get connected (pre 9.0) */ @@ -1999,38 +2012,35 @@ (role[OLD] != R_PRIMARY || peer_disk_state[OLD] != D_UNKNOWN)) connection->susp_fen[NEW] = true; + } + + drbd_select_sync_target(device); + + for_each_peer_device_rcu(peer_device, device) { + enum drbd_repl_state *repl_state = peer_device->repl_state; + enum drbd_disk_state *peer_disk_state = peer_device->disk_state; + struct drbd_connection *connection = peer_device->connection; + enum drbd_disk_state min_disk_state, max_disk_state; + enum drbd_disk_state min_peer_disk_state, max_peer_disk_state; + enum drbd_role *peer_role = connection->peer_role; + bool uuids_match; /* Pause a SyncSource until it finishes resync as target on other connections */ if (repl_state[OLD] != L_SYNC_SOURCE && repl_state[NEW] == L_SYNC_SOURCE && is_sync_target_other_c(peer_device)) peer_device->resync_susp_other_c[NEW] = true; - if (peer_device->resync_susp_other_c[NEW] && - repl_state[NEW] == L_SYNC_TARGET) - select_best_resync_source(peer_device); - if (resync_suspended(peer_device, NEW)) { if (repl_state[NEW] == L_SYNC_SOURCE) repl_state[NEW] = L_PAUSED_SYNC_S; - if (repl_state[NEW] == L_SYNC_TARGET) - repl_state[NEW] = L_PAUSED_SYNC_T; } else { if (repl_state[NEW] == L_PAUSED_SYNC_S) repl_state[NEW] = L_SYNC_SOURCE; - if (repl_state[NEW] == L_PAUSED_SYNC_T) - repl_state[NEW] = L_SYNC_TARGET; } - /* This needs to be after the previous block, since we should not set - the bit if we are paused ourselves */ - if (repl_state[OLD] != L_SYNC_TARGET && repl_state[NEW] == L_SYNC_TARGET) - set_resync_susp_other_c(peer_device, true, false); - if (repl_state[OLD] == L_SYNC_TARGET && repl_state[NEW] != L_SYNC_TARGET) - set_resync_susp_other_c(peer_device, false, false); - /* Implication of the repl state on other peer's repl state */ if (repl_state[OLD] != L_STARTING_SYNC_T && repl_state[NEW] == L_STARTING_SYNC_T) - set_resync_susp_other_c(peer_device, true, true); + drbd_start_other_targets_paused(peer_device); /* Implications of the repl state on the disk states */ min_disk_state = D_DISKLESS; @@ -2282,6 +2292,7 @@ unsigned long tw = drbd_bm_total_weight(peer_device); unsigned long now = jiffies; + peer_device->resync_next_bit = 0; peer_device->rs_failed = 0; peer_device->rs_paused = 0; peer_device->rs_same_csum = 0; @@ -3299,6 +3310,87 @@ } } +static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device) +{ + bool csums_after_crash_only; + rcu_read_lock(); + csums_after_crash_only = rcu_dereference(connection->transport.net_conf)->csums_after_crash_only; + rcu_read_unlock(); + return connection->agreed_pro_version >= 89 && /* supported? */ + connection->csums_tfm && /* configured? */ + (csums_after_crash_only == false /* use for each resync? */ + || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */ +} + +static void drbd_run_resync(struct drbd_peer_device *peer_device, enum drbd_repl_state repl_state) +{ + struct drbd_device *device = peer_device->device; + struct drbd_connection *connection = peer_device->connection; + enum drbd_repl_state side = repl_is_sync_target(repl_state) ? L_SYNC_TARGET : L_SYNC_SOURCE; + + drbd_info(peer_device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", + drbd_repl_str(repl_state), + (unsigned long) peer_device->rs_total << (BM_BLOCK_SHIFT-10), + (unsigned long) peer_device->rs_total); + + if (side == L_SYNC_TARGET) + drbd_set_exposed_data_uuid(device, peer_device->current_uuid); + + /* Forget potentially stale cached per resync extent bit-counts. */ + drbd_rs_cancel_all(peer_device); + + peer_device->use_csums = side == L_SYNC_TARGET ? + use_checksum_based_resync(connection, device) : false; + + if (side == L_SYNC_TARGET && + !(peer_device->uuid_flags & UUID_FLAG_STABLE) && + !drbd_stable_sync_source_present(peer_device, NOW)) + set_bit(UNSTABLE_RESYNC, &peer_device->flags); + + /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid + * with w_send_oos, or the sync target will get confused as to + * how much bits to resync. We cannot do that always, because for an + * empty resync and protocol < 95, we need to do it here, as we call + * drbd_resync_finished from here in that case. + * We drbd_gen_and_send_sync_uuid here for protocol < 96, + * and from after_state_ch otherwise. */ + if (side == L_SYNC_SOURCE && connection->agreed_pro_version < 96) + drbd_gen_and_send_sync_uuid(peer_device); + + if (connection->agreed_pro_version < 95 && peer_device->rs_total == 0) { + /* This still has a race (about when exactly the peers + * detect connection loss) that can lead to a full sync + * on next handshake. In 8.3.9 we fixed this with explicit + * resync-finished notifications, but the fix + * introduces a protocol change. Sleeping for some + * time longer than the ping interval + timeout on the + * SyncSource, to give the SyncTarget the chance to + * detect connection loss, then waiting for a ping + * response (implicit in drbd_resync_finished) reduces + * the race considerably, but does not solve it. */ + if (side == L_SYNC_SOURCE) { + struct net_conf *nc; + int timeo; + + rcu_read_lock(); + nc = rcu_dereference(connection->transport.net_conf); + timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9; + rcu_read_unlock(); + schedule_timeout_interruptible(timeo); + } + drbd_resync_finished(peer_device, D_MASK); + } + + /* ns.conn may already be != peer_device->repl_state[NOW], + * we may have been paused in between, or become paused until + * the timer triggers. + * No matter, that is handled in resync_timer_fn() */ + if (repl_state == L_SYNC_TARGET) + drbd_uuid_resync_starting(peer_device); + + drbd_md_sync_if_dirty(device); +} + /* * Perform after state change actions that may sleep. @@ -3490,10 +3582,6 @@ (resync_susp_user[OLD] != resync_susp_user[NEW]))) send_state = true; - /* Resync continues, start making requests. */ - if (repl_state[OLD] == L_PAUSED_SYNC_T && repl_state[NEW] == L_SYNC_TARGET) - drbd_device_post_work(device, MAKE_RESYNC_REQUEST); - /* finished resync, tell sync source */ if ((repl_state[OLD] == L_SYNC_TARGET || repl_state[OLD] == L_PAUSED_SYNC_T) && repl_state[NEW] == L_ESTABLISHED) @@ -3634,12 +3722,21 @@ drbd_uuid_new_current(device, false); } - if (repl_state[OLD] != L_VERIFY_S && repl_state[NEW] == L_VERIFY_S && get_ldev(device)) { + if (repl_state[OLD] != L_VERIFY_S && repl_state[NEW] == L_VERIFY_S) { drbd_info(peer_device, "Starting Online Verify from sector %llu\n", (unsigned long long)peer_device->ov_position); - drbd_device_post_work(peer_device->device, MAKE_RESYNC_REQUEST); - put_ldev(device); + drbd_queue_work_if_unqueued( + &peer_device->connection->sender_work, + &peer_device->resync_work); } + + if (!repl_is_sync(repl_state[OLD]) && repl_is_sync(repl_state[NEW])) + drbd_run_resync(peer_device, repl_state[NEW]); + + if (!peer_device_state_change->resync_active[OLD] && peer_device_state_change->resync_active[NEW]) + drbd_queue_work_if_unqueued( + &peer_device->connection->sender_work, + &peer_device->resync_work); } if (((role[OLD] == R_PRIMARY && role[NEW] == R_SECONDARY) || some_peer_demoted) && @@ -5370,7 +5467,7 @@ .peer_device = peer_device }; - if (new_repl_state == L_WF_BITMAP_S) + if (new_repl_state == L_WF_BITMAP_S || new_repl_state == L_VERIFY_S) repl_context.context.change_local_state_last = true; return change_cluster_wide_state(do_change_repl_state, &repl_context.context); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_state_change.h new/drbd-9.0.29~1+git.cc622880/drbd/drbd_state_change.h --- old/drbd-9.0.29~0+git.9a7bc817/drbd/drbd_state_change.h 2021-05-06 09:44:10.000000000 +0200 +++ new/drbd-9.0.29~1+git.cc622880/drbd/drbd_state_change.h 2021-06-09 11:03:53.000000000 +0200 @@ -39,6 +39,7 @@ bool resync_susp_peer[2]; bool resync_susp_dependency[2]; bool resync_susp_other_c[2]; + bool resync_active[2]; }; struct drbd_state_change_object_count { ++++++ drbd_git_revision ++++++ --- /var/tmp/diff_new_pack.mOqrRV/_old 2021-06-19 23:04:58.579850475 +0200 +++ /var/tmp/diff_new_pack.mOqrRV/_new 2021-06-19 23:04:58.579850475 +0200 @@ -1 +1 @@ -GIT-hash: 9114a0383f72b87610cd9ee282676cf94213da5b +GIT-hash: cc6228800d630a19f2bf37af41bab566011286c0
