Sorry for the amount of emails I'm sending, but I noticed something that's probably important. I'm also appending some gdb log from tracing through the function (trying to answer why it's doing cluster mode stuff at all).

While tracing through, I noticed that *before* the write-bitmap loop, mdadm -E considers the superblock valid. That agrees with what I saw from strace, I suppose. To my first glance, it figures out how much to write by calling this function:

static unsigned int calc_bitmap_size(bitmap_super_t *bms, unsigned int boundary)
{
        unsigned long long bits, bytes;

        bits = __le64_to_cpu(bms->sync_size) / 
(__le32_to_cpu(bms->chunksize)>>9);
        bytes = (bits+7) >> 3;
        bytes += sizeof(bitmap_super_t);
        bytes = ROUND_UP(bytes, boundary);

        return bytes;
}

That code looked familiar, and I figured out where—it's also in 95a05b37e8eb2bc0803b1a0298fce6adc60eff16, the commit that I found originally broke it. But that commit is making a change to it: it changed the ROUND_UP line from 512 to 4096 (and from the gdb trace, boundary==4096).

I tested changing that line to "bytes = ROUND_UP(bytes, 512);", and it works. Adds the new disk to the array and produces no warnings or errors.
Starting program: /var/tmp/mdadm/mdadm/mdadm -a /dev/md/pv0 /dev/sdc3

Breakpoint 1, write_bitmap1 (st=0x6b0780, fd=5, update=NodeNumUpdate) at 
super1.c:2351
2351            struct mdp_superblock_1 *sb = st->sb;
st = 0x6b0780
fd = 5
update = NodeNumUpdate
$1 = (struct supertype *) 0x6b0780
$2 = {ss = 0x69c060 <super1>, minor_version = 0, max_devs = 1920, 
container_devnm = '\000' <repeats 31 times>, sb = 0x6c7000, 
  info = 0x6c6450, other = 0x0, devsize = 0, data_offset = 0, ignore_hw_compat 
= 0, updates = 0x0, update_tail = 0x0, arrays = 0x0, 
  sock = 0, devnm = "md127", '\000' <repeats 26 times>, devcnt = 0, retry_soon 
= 0, nodes = 0, cluster_name = 0x0, devs = 0x0}
#0  write_bitmap1 (st=0x6b0780, fd=5, update=NodeNumUpdate) at super1.c:2351
        sb = 0x6c8000
        bms = 0x8e6492c800000400
        rv = 0
        buf = 0x15250a2b
        towrite = 1953005968
        n = 0
        len = 0
        afd = {fd = 243328694, blk_sz = 5}
        i = 7106560
        total_bm_space = 2199023255557
        bm_space_per_node = 7110656
#1  0x000000000044530c in write_init_super1 (st=0x6b0780) at super1.c:1851
        sb = 0x6c7000
        refst = 0x6c6490
        rv = 0
        bm_space = 264
        di = 0x6c6450
        dsize = 1953005985
        array_size = 1953005568
        sb_offset = 1953005968
        data_offset = 0
#2  0x00000000004169d0 in Manage_add (fd=3, tfd=4, dv=0x6b0040, tst=0x6b0780, 
array=0x7fffffffda40, force=0, verbose=0, 
    devname=0x7fffffffe4b7 "/dev/md/pv0", update=0x0, rdev=2083, 
array_size=1953005568, raid_slot=-1) at Manage.c:971
        dfd = 5
        ldsize = 999939064320
        dev_st = 0x6c6390
        j = 8
        disc = {number = 8, major = 8, minor = 35, raid_disk = -1, state = 0}
#3  0x00000000004183f5 in Manage_subdevs (devname=0x7fffffffe4b7 "/dev/md/pv0", 
fd=3, devlist=0x6b0040, verbose=0, test=0, update=0x0, 
    force=0) at Manage.c:1617
        rdev = 2083
        rv = 0
        mj = -142377600
        mn = 32767
        array = {major_version = 1, minor_version = 0, patch_version = 3, ctime 
= 1276712708, level = 10, size = 976502784, nr_disks = 4, 
          raid_disks = 4, md_minor = 127, not_persistent = 0, utime = 
1474393877, state = 256, active_disks = 4, working_disks = 4, 
          failed_disks = 0, spare_disks = 0, layout = 513, chunk_size = 524288}
        array_size = 1953005568
        dv = 0x6b0040
        tfd = 4
        tst = 0x6b0780
        subarray = 0x0
        sysfd = -1
        count = 0
        info = {array = {major_version = -9784, minor_version = 32767, 
patch_version = -136434289, ctime = 32767, level = 2, size = 0, 
            nr_disks = -134254776, raid_disks = 32767, md_minor = 1, 
not_persistent = 0, utime = 0, state = 0, active_disks = 1, 
            working_disks = 0, failed_disks = -134225560, spare_disks = 32767, 
layout = -7824, chunk_size = 32767}, disk = {
            number = -10032, major = 32767, minor = -117177849, raid_disk = 0, 
state = 0}, events = 140737354130624, uuid = {-9968, 32767, 
            0, 1}, 
          name = 
"\000\331\377\377\377\177\000\000\354\222s\360\000\000\000\000\223\024@\000\000\000\000\000\377\377\377\377\000\000\000\000@",
 data_offset = 140737346016776, new_data_offset = 140737354099120, 
component_size = 140737488345760, custom_array_size = 140737351942788, 
          reshape_active = 1, reshape_progress = 140737354129344, 
recovery_blocked = 0, journal_device_required = 0, 
          journal_clean = -136478512, space_before = 140737351876824, 
space_after = 140737351876808, {resync_start = 140737349770912, 
            recovery_start = 140737349770912}, bitmap_offset = 140737488345760, 
safe_mode_delay = 0, new_level = 6905808, delta_disks = 0, 
          new_layout = 4206336, new_chunk = 0, errors = -7872, cache_size = 0, 
mismatch_cnt = 0, 
          text_version = 
"\000\000\000\000`\340\377\377\377\177\000\000\326w\336\367\377\177\000\000\001",
 '\000' <repeats 23 times>, "\b\026\204\367\377\177", container_member = -9504, 
container_enough = 32767, sys_name = "md127", '\000' <repeats 26 times>, 
          devs = 0xff000000000000, next = 0x0, recovery_fd = -16777216, 
state_fd = -65536, prev_state = 0, curr_state = 0, next_state = 0, 
          sysfs_array_state = "\000\000\377\377", '\000' <repeats 15 times>}
        devinfo = {array = {major_version = -142323768, minor_version = 32767, 
patch_version = 0, ctime = 0, level = -2147483646, size = 0, 
            nr_disks = 4706142, raid_disks = 0, md_minor = 4, not_persistent = 
0, utime = 4272203, state = 0, active_disks = -10000, 
            working_disks = 32767, failed_disks = -136412540, spare_disks = 
32767, layout = -142323768, chunk_size = 32767}, disk = {
            number = -134225984, major = 32767, minor = -9856, raid_disk = 
32767, state = -10144}, events = 140737488345192, uuid = {
            -10145, 32767, -136395088, 32767}, 
          name = 
"p\330\377\377\377\177\000\000\310d\377\367\377\177\000\000\225W\275\367\002\000\000\000`\330\377\377\377\177\000\000t",
 
          data_offset = 140737488345183, new_data_offset = 1627, component_size 
= 140737354099120, custom_array_size = 140737345977728, 
          reshape_active = -142323768, reshape_progress = 140737351919787, 
recovery_blocked = 1627, journal_device_required = 0, 
          journal_clean = -142323768, space_before = 140737354099120, 
space_after = 140737488345144, {resync_start = 140737488345140, 
            recovery_start = 140737488345140}, bitmap_offset = 140737351918145, 
safe_mode_delay = 7, new_level = 4199571, delta_disks = 0, 
          new_layout = 4196120, new_chunk = 0, errors = -10184, cache_size = 
4034106092, mismatch_cnt = 63032907, 
          text_version = 
"\000\000\000\000,\000\000\000\000\000\000\000\020\331\377\377\377\177\000\000\310O\204\367\377\177\000\000\200}\203\367\377\177\000\000\064\330\377\377\377\177\000\000\000\331\377\377\377\177",
 container_member = -134254856, container_enough = 32767, 
          sys_name = 
"\004\000\000\000\000\000\000\000ibcm\000\000\000\000o.4\000\377\177\000\000\376\377\377\377\000\000\000",
 devs = 0x0, 
          next = 0x7fffffffd998, recovery_fd = -134224704, state_fd = 32767, 
prev_state = -9824, curr_state = 32767, 
          next_state = -134254776, sysfs_array_state = 
"\377\177\000\000\000\000\000\000\000\000\000\000h\341\377\367\377\177\000"}
        frozen = 1
        busy = 0
        raid_slot = -1
#4  0x0000000000406948 in main (argc=4, argv=0x7fffffffe148) at mdadm.c:1368
        mode = 4
        opt = -1
        option_index = -1
        rv = 0
        i = 0
        array_size = 0
        data_offset = 1
        ident = {devname = 0x7fffffffdff8 "\340C\204", <incomplete sequence 
\367>, uuid_set = 0, uuid = {32767, 2, 0, -134254776}, 
          name = "\000\177\000\000\001", '\000' <repeats 15 times>, 
"\001\000\000\000\000\000\000\000h\341\377\367\377", 
          super_minor = 65534, devices = 0x0, level = 65534, raid_disks = 
65534, spare_disks = 0, st = 0x0, autof = 0, spare_group = 0x0, 
          bitmap_file = 0x0, bitmap_fd = -1, container = 0x0, member = 0x0, 
next = 0x7ffff7ffe168, {assembled = -142326816}}
        configfile = 0x0
        devmode = 97
        bitmap_fd = -1
        devlist = 0x6b0010
        devlistend = 0x6b0060
        dv = 0x6b0040
        devs_found = 2
        symlinks = 0x0
        grow_continue = 0
        c = {readonly = 0, runstop = 0, verbose = 0, brief = 0, force = 0, 
homehost = 0x7fffffffdcd0 "Zia", require_homehost = 1, 
          prefer = 0x0, export = 0, test = 0, subarray = 0x0, update = 0x0, 
scan = 0, SparcAdjust = 0, autof = 0, delay = 0, 
          freeze_reshape = 0, backup_file = 0x0, invalid_backup = 0, action = 
0x0, nodes = 0, homecluster = 0x0}
        s = {raiddisks = 0, sparedisks = 0, journaldisks = 0, level = 65534, 
layout = 65534, layout_str = 0x0, chunk = 0, 
          bitmap_chunk = 65534, bitmap_file = 0x0, assume_clean = 0, 
write_behind = 0, size = 0}
        sys_hostname = 
"Zia\000\377\177\000\000\360\303\373\367\377\177\000\000\000\000\000\000\000\000\000\000\330\331\377\367\377\177\000\000\340\336\377\377\377\177\000\000\217-\336\367\377\177\000\000\002\000\000\000\000\000\000\000\360\303\373\367\377\177\000\000\001",
 '\000' <repeats 15 times>, 
"\001\000\000\000\000\000\000\000\330\331\377\367\377\177\000\000\000\000 
\271\377\377\377\377\000\000\342\004\275\357\377\377`\\i", '\000' <repeats 13 
times>, 
"\300\344\377\367\377\177\000\000\220\335\377\377\377\177\000\000\000\000\200\271\001\000\000\000\200\335\377\377\377\177\000\000\307\016\340=\000\000\000\000t
 
\336\367\377\177\000\000\377\377\377\377\000\000\000\000D\b\000\000\000\000\000\000\260i\377\367\377\177\000\000"...
        mailaddr = 0x0
        program = 0x0
        increments = 20
        daemonise = 0
        pidfile = 0x0
        oneshot = 0
        spare_sharing = 1
        ss = 0x0
        writemostly = 0
        shortopt = 0x6965a0 <short_bitmap_options> 
"-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:"
        dosyslog = 0
        rebuild_map = 0
        remove_path = 0x0
        udev_filename = 0x0
        dump_directory = 0x0
        print_help = 0
        outf = 0x0
        mdfd = 3
2352            bitmap_super_t *bms = 
(bitmap_super_t*)(((char*)sb)+MAX_SB_SIZE);
2353            int rv = 0;
$3 = {magic = 1836345698, version = 4, uuid = 
"\310@\320\336\006&׃?\033(\334\305\354d\232", events = 1124486, events_cleared 
= 1124486, 
  sync_size = 3906011136, state = 0, chunksize = 2097152, daemon_sleep = 5, 
write_behind = 0, sectors_reserved = 0, nodes = 0, 
  cluster_name = '\000' <repeats 63 times>, pad = '\000' <repeats 119 times>}
$4 = (void *) 0x6c7000
2357            unsigned int i = 0;
2360            switch (update) {
2373                    if (st->minor_version != 2 && bms->version == 
BITMAP_MAJOR_CLUSTERED) {
2378                    if (bms->version == BITMAP_MAJOR_CLUSTERED) {
2394                            if (st->nodes)
No symbol "BITMAP_MAJOR_CLUSTERED" in current context.
$5 = 4
2396                            break;
2419            init_afd(&afd, fd);
2421            locate_bitmap1(st, fd, 0);
$6 = {fd = 5, blk_sz = 512}
2423            if (posix_memalign(&buf, 4096, 4096))
$7 = (struct supertype *) 0x6b0780
$8 = {ss = 0x69c060 <super1>, minor_version = 0, max_devs = 1920, 
container_devnm = '\000' <repeats 31 times>, sb = 0x6c7000, 
  info = 0x6c6450, other = 0x0, devsize = 0, data_offset = 0, ignore_hw_compat 
= 0, updates = 0x0, update_tail = 0x0, arrays = 0x0, 
  sock = 0, devnm = "md127", '\000' <repeats 26 times>, devcnt = 0, retry_soon 
= 0, nodes = 0, cluster_name = 0x0, devs = 0x0}
2430                    if (i)
2433                            memset(buf, 0xff, 4096);
2434                    memcpy(buf, (char *)bms, sizeof(bitmap_super_t));
2436                    towrite = calc_bitmap_size(bms, 4096);
2437                    while (towrite > 0) {
$9 = 122880
2438                            n = towrite;
2439                            if (n > 4096)
2440                                    n = 4096;
2441                            n = awrite(&afd, buf, n);
2442                            if (n > 0)
2443                                    towrite -= n;
2446                            if (i)
2449                                    memset(buf, 0xff, 4096);
2437                    while (towrite > 0) {
2438                            n = towrite;
2439                            if (n > 4096)
2440                                    n = 4096;
2441                            n = awrite(&afd, buf, n);
2442                            if (n > 0)
2443                                    towrite -= n;
2446                            if (i)
2449                                    memset(buf, 0xff, 4096);
2437                    while (towrite > 0) {
2438                            n = towrite;
2439                            if (n > 4096)
2440                                    n = 4096;
2441                            n = awrite(&afd, buf, n);
2442                            if (n > 0)
2443                                    towrite -= n;
2446                            if (i)
2449                                    memset(buf, 0xff, 4096);
2437                    while (towrite > 0) {
2438                            n = towrite;
2439                            if (n > 4096)
$10 = 110592
Continue program being debugged, after signal or breakpoint.
Usage: continue [N]
If proceeding from breakpoint, a number N may be used as an argument,
which means to set the ignore count of that breakpoint to N - 1 (so that
the breakpoint won't break until the Nth time it is reached).

If non-stop mode is enabled, continue only the current thread,
otherwise all the threads in the program are continued.  To 
continue all stopped threads in non-stop mode, use the -a option.
Specifying -a and an ignore count simultaneously is an error.
Execute until the program reaches a source line greater than the current
or a specified location (same args as break command) within the current frame.
write_bitmap1 (st=0x6b0780, fd=5, update=NodeNumUpdate) at super1.c:2451
2451                    fsync(fd);
Continuing.
[Inferior 1 (process 23866) exited with code 01]
Breakpoint 2 at 0x440d25: file super1.c, line 165.
Starting program: /var/tmp/mdadm/mdadm/mdadm -a /dev/md/pv0 /dev/sdc3

Breakpoint 1, write_bitmap1 (st=0x6b0780, fd=5, update=NodeNumUpdate) at 
super1.c:2351
2351            struct mdp_superblock_1 *sb = st->sb;
Continuing.

Breakpoint 2, calc_bitmap_size (bms=0x6c8000, boundary=4096) at super1.c:165
165             bits = __le64_to_cpu(bms->sync_size) / 
(__le32_to_cpu(bms->chunksize)>>9);
bms = 0x6c8000
boundary = 4096
$11 = {magic = 1836345698, version = 4, uuid = 
"\310@\320\336\006&׃?\033(\334\305\354d\232", events = 1124486, events_cleared 
= 1124486, 
  sync_size = 3906011136, state = 0, chunksize = 2097152, daemon_sleep = 5, 
write_behind = 0, sectors_reserved = 0, nodes = 0, 
  cluster_name = '\000' <repeats 63 times>, pad = '\000' <repeats 119 times>}
166             bytes = (bits+7) >> 3;
167             bytes += sizeof(bitmap_super_t);
168             bytes = ROUND_UP(bytes, boundary);
$12 = 119458
170             return bytes;
$13 = 122880
Continuing.
[Inferior 1 (process 25040) exited with code 01]
quit

Reply via email to