Quick experiment: in src/vppinfra/bihash_template.h:clib_bihash_search_inline_2_with_hash(), replace this:
if (PREDICT_FALSE (b->lock)) { volatile BVT (clib_bihash_bucket) * bv = b; while (bv->lock) CLIB_PAUSE (); } With: BV(clib_bihash_lock_bucket(b)); and make sure to BV(clib_bihash_unlock_bucket(b)); just prior to return 0 and return -1 in that function. Please let me know what happens. Thanks... Dave -----Original Message----- From: vpp-dev@lists.fd.io <vpp-dev@lists.fd.io> On Behalf Of Hao Tian Sent: Tuesday, March 14, 2023 4:13 AM To: vpp-dev@lists.fd.io Subject: [vpp-dev] Race condition between bihash deletion and searching - misuse or bug? Hi all, I found that bihash might return zero'ed value (0xff's) if deletion and searching were performed in parallel on different threads. This is reproducible by only ~100 lines of code, from 21.06 all the way up to git master, and on multiple machines from Coffee Lake to Tiger Lake. The code (a plugin) and test command is shown below. Given how easy it is to reproduce the problem, I'm not sure whether this is a bug or something missing on my end. Any advice is welcomed. Thanks! ======== bihash_race.c ======== #include <vnet/plugin/plugin.h> #include <vpp/app/version.h> #include <vppinfra/bihash_8_8.h> static volatile int br_run; static clib_bihash_8_8_t br_bihash_8_8; static int test_run_cnt[2]; static void bihash_race_set (u64 key) { clib_bihash_kv_8_8_t kv = { .key = key, .value = 1 }; clib_bihash_add_del_8_8 (&br_bihash_8_8, &kv, BIHASH_ADD); } static void bihash_race_del (u64 key) { clib_bihash_kv_8_8_t kv = { .key = key, .value = 1 }; clib_bihash_add_del_8_8 (&br_bihash_8_8, &kv, BIHASH_DEL); } static void bihash_race_get (u64 key) { clib_bihash_kv_8_8_t kv = { .key = key, .value = 1 }; clib_bihash_kv_8_8_t value; if (!clib_bihash_search_8_8 (&br_bihash_8_8, &kv, &value)) { if (value.value != 1) { clib_warning ("suspicious value: 0x%lx\n", value.value); } } } static clib_error_t * bihash_race_init (vlib_main_t *vm) { clib_bihash_init_8_8 (&br_bihash_8_8, "bihash_race hash", 4096, 4096 * 256); return 0; } VLIB_NODE_FN (br_test_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { u64 k = 0x01010101UL; if (!br_run || vm->thread_index == 0) { return 0; } // perform bihash set and delete for 50000 times each in thread 1 if (vm->thread_index == 1 && test_run_cnt[0] < 100000) { if (test_run_cnt[1] & 1) bihash_race_del (k); else bihash_race_set (k); test_run_cnt[0]++; return 0; } // perform bihash lookup for 100000 times in thread 2 if (vm->thread_index == 2 && test_run_cnt[1] < 100000) { bihash_race_get (k); test_run_cnt[1]++; return 0; } return 0; } void bihash_race_test_reset () { memset (test_run_cnt, 0, sizeof (test_run_cnt)); clib_warning ("========== test reset =========="); } static clib_error_t * bihash_race_test_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) { u8 state = ~0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "on")) state = 1; else if (unformat (input, "off")) state = 0; else return clib_error_return (0, "invalid input"); } if (state) { br_run = 1; } else { br_run = 0; bihash_race_test_reset (); vlib_cli_output (vm, "========== test reset =========="); } return 0; } VLIB_CLI_COMMAND (bihash_race_test_command, static) = { .path = "set bihash-race-test", .short_help = "set bihash-race-test <on>|<off>", .function = bihash_race_test_command_fn, }; VLIB_REGISTER_NODE (br_test_node) = { .name = "bihash-race-test", .type = VLIB_NODE_TYPE_INPUT, .state = VLIB_NODE_STATE_POLLING, }; VLIB_INIT_FUNCTION (bihash_race_init); VLIB_PLUGIN_REGISTER () = { .version = VPP_BUILD_VER, .description = "bihash race test", }; ========= CMakeLists.txt ========= add_vpp_plugin(bihash_race SOURCES bihash_race.c ) ========= startup.conf ========= unix { nodaemon full-coredump cli-listen /run/vpp/cli.sock gid 1000 interactive } cpu { workers 3 main-core 1 } dpdk { no-pci } ========= test command ========= terminal1 # vpp -c startup.conf terminal2 # while true; do vppctl set bihash-race-test on; sleep 1; vppctl set bihash-race-test off; done VPP will spit out a lot of "suspicious value: 0xffffffffffffffff" in terminal1, while the code above never saves such value into bihash - the value comes from the !is_add branch in clib_bihash_add_del_inline_with_hash. Changing the memset value (and clib_bihash_is_free_* obviously) in this branch will lead to this new value being returned. Regards, Hao Tian
-=-=-=-=-=-=-=-=-=-=-=- Links: You receive all messages sent to this group. View/Reply Online (#22703): https://lists.fd.io/g/vpp-dev/message/22703 Mute This Topic: https://lists.fd.io/mt/97599770/21656 Group Owner: vpp-dev+ow...@lists.fd.io Unsubscribe: https://lists.fd.io/g/vpp-dev/leave/1480452/21656/631435203/xyzzy [arch...@mail-archive.com] -=-=-=-=-=-=-=-=-=-=-=-