we are working on it and we are at a good point
you can see in attach the diff

On Mon, Sep 6, 2010 at 7:42 PM, martin f krafft <[email protected]> wrote:

> also sprach martin f krafft <[email protected]> [2010.09.06.1935 +0200]:
> > 2. Your solution works, and it's exactly what the (useless)
> >    /usr/bin/hostid should do. I am a little sceptical about using
> >    a non-standard approach for this.
>
> This seems better:
>
> 06 19:40 < jcristau> madduck: echo $(($RANDOM % 28)) >
> /var/lib/mdadm/mdadmday on first install
>
> And I think it could even be exported with debconf (priority low).
>
> Want to send a patch? ;)
>
> --
>  .''`.   martin f. krafft <[email protected]>      Related projects:
> : :'  :  proud Debian developer               http://debiansystem.info
> `. `'`   
> http://people.debian.org/~madduck<http://people.debian.org/%7Emadduck>
> http://vcs-pkg.org
>  `-  Debian - when you have better things to do than fixing systems
>
> -----BEGIN PGP SIGNATURE-----
> Version: GnuPG v1.4.10 (GNU/Linux)
>
> iEYEAREDAAYFAkyFKAQACgkQIgvIgzMMSnVCdwCfa5wh8TiHsW1T8gmDx3PTTS3g
> kdMAoIB53DPeRmcTRU1PiR0xKDTHqGk/
> =LAnm
> -----END PGP SIGNATURE-----
>
> _______________________________________________
> pkg-mdadm-devel mailing list
> [email protected]
> http://lists.alioth.debian.org/mailman/listinfo/pkg-mdadm-devel
>
--- checkarray.orig2	2010-08-29 23:38:13.345157837 +0200
+++ checkarray	2010-09-07 00:16:15.892806704 +0200
@@ -8,6 +8,7 @@
 set -eu
 
 PROGNAME=${0##*/}
+CANCEL_FILE=/var/lib/mdadm/cancelled
 
 about()
 {
@@ -53,6 +54,68 @@
   echo "You can also control the status of a check with /proc/mdstat ."
 }
 
+# $1 is the action to do, $2 what we should echo to sync_action, $3 sync_min $4 sync_max $5 $array
+# $1 can be "b" and sync_action will be set before min and max or "a" and action will be set after
+# if you don't want to set the action just use "n" as the first argument
+setup_array() {
+  [ "$1" = b ] && echo $2 > /sys/block/$5/md/sync_action
+  echo $3 > /sys/block/$5/md/sync_min
+  echo $4 > /sys/block/$5/md/sync_max
+  [ "$1" = a ] && echo $2 > /sys/block/$5/md/sync_action
+  return 0
+}
+
+# set the array as idle and restore sync_{min,max} to default value. $1 is $array
+alias cleanup_array='setup_array b idle 0 max'
+
+# start an array check, $1 is sync_min, $2 sync_max, $3 $array
+alias start_check='setup_array a check'
+
+# this must be called if check is splitted because when syc_max is not equal to 'max' the check will just
+# pause, waiting for sync_max to be rised and sync_action is not restored to idle
+# $1 must be $array, $2 $next_last_sector, $3 $save_file 
+wait_completed() {
+  sleep 3
+  # the first one checks if sync_completed is 'none', in case $last_sector is equal to $asize
+  # because sync_action is set to 'idle' automatically in this situation
+  # the second condition checks if the last sector checked is equal to $last_sector
+  while [ "$(cat /sys/block/$1/md/sync_completed)" != "none" ] ; do
+    # save this if the check has been cancelled
+    last_checked=$(cut -d ' ' -f1 /sys/block/$1/md/sync_completed)
+    # this must be done after the previous check otherwise -ne might complain that the first argument is not a number
+    if [ "$last_checked" -ne "$2" ] ; then
+      sleep 3
+      # inotifywait might be used here instead of sleep with something like
+      # inotifywait -q -e modify /sys/block/$1/md/sync_completed | :
+      # the | : is necessary couse (according to man inotifywait) it returns 1 when success... not tested myself
+    else
+      break
+    fi
+  done
+  # if the check has been cancelled echo last_checked sector otherwise next_last_sector
+  if [ -e $CANCEL_FILE.$1 ] ; then
+    echo $last_checked > $3
+    rm -rf $CANCEL_FILE.$1
+  else
+    echo $2 > $3
+  fi
+  cleanup_array $1
+  [ $quiet -lt 1 ] && echo "$PROGNAME: I: check for array $array terminated" >&2
+  return 0
+}
+
+# start a fake check and reads sync_completed to get the array size 
+# (not equal to arraysize reported in mdstats and in mdadm -D
+# $1 is $array, $2 $chunk_size
+# TODO pls find or implement in md-mod a better way to get this
+get_size() {
+  start_check 0 $2 $1
+  local a_size=$(cut -d ' ' -f3 /sys/block/$1/md/sync_completed)
+  cleanup_array $1
+  echo $a_size
+  return 0
+}
+
 SHORTOPTS=achVqQsxilf
 LONGOPTS=all,cron,help,version,quiet,real-quiet,status,cancel,idle,slow,fast,realtime
 
@@ -165,7 +228,9 @@
 
   case "$action" in
     idle)
-      echo $action > $SYNC_ACTION_CTL
+      cleanup_array $array
+      # we must remeber the check was cancelled for wait_completed
+      touch $CANCEL_FILE.$array || :  # TODO how to handle this error ?
       [ $quiet -lt 1 ] && echo "$PROGNAME: I: cancel request queued for array $array." >&2
       ;;
 
@@ -174,11 +239,48 @@
         [ $quiet -lt 2 ] && echo "$PROGNAME: W: array $array not idle, skipping..." >&2
         continue
       fi
+      # we don't need this anymore
+      [ -e $CANCEL_FILE.$array ] && rm -rf $CANCEL_FILE.$array
+
+      # let's split the check
+      # this is the file where we save/read the last checked sector from the last run
+      save_file=/var/lib/mdadm/last_sector.$array
+      # how many times in a month we want to do the check?
+      CHECK_SPLIT=28 #TODO remove from this file and put it in /etc/default/mdadm, it must be between 1 and 28, modify cron too
+      [ $CHECK_SPLIT -gt 28 ] && CHECK_SPLIT=28
+      [ $CHECK_SPLIT -lt 1 ] && CHECK_SPLIT=1
+      chunk_size=$(cat /sys/block/$array/md/chunk_size)
+      # not all raid level has chunk_size (for example raid 1), if so set it to 1
+      [ $chunk_size -lt 1 ] && chunk_size=1
+      # get the array size
+      #asize=$(mdadm -D /dev/$array | awk '/Array Size : / { print $4 }')
+      asize=$(get_size $array $chunk_size 2>/dev/null)
+      [ $? -ne 0 ] && exit 1
+      # now calculate how much sectors should be checked (asize / CHECK_SPLIT)
+      check_size=$(($asize/$CHECK_SPLIT))
+      # $save_file must have +rw perms
+      if [ -r $save_file -a -w $save_file ] ; then
+        last_sector=$(cat $save_file)
+      elif [ ! $(echo 0 > "$save_file") ] ; then
+        #this is likely the first run, so we begin from zero
+        last_sector=0
+      else
+        # TODO print error in syslog too with logger ?
+        echo "$PROGNAME: E: $save_file has wrong perms or it cannot be created" >&2
+        continue
+      fi
+      [ $last_sector -lt 0 -o $last_sector -ge $asize -o -z "$last_sector" ] && last_sector=0
+      # be pedantic, check that last_sector is a chunk_size multiple
+      last_sector=$(($last_sector-$last_sector%$chunk_size))
+      next_last_sector=$(($last_sector+$check_size))
+      # we must round it to a multiple of the chunk_size
+      next_last_sector=$(($next_last_sector+$chunk_size-$next_last_sector%$chunk_size))
+      [ $next_last_sector -gt $asize ] && next_last_sector=$asize
 
       # queue request for the array. The kernel will make sure that these requests
       # are properly queued so as to not kill one of the array.
-      echo $action > $SYNC_ACTION_CTL
-      [ $quiet -lt 1 ] && echo "$PROGNAME: I: check queued for array $array." >&2
+      start_check $last_sector $next_last_sector $array
+      [ $quiet -lt 1 ] && echo "$PROGNAME: I: check queued for array $array, from sector $last_sector to $next_last_sector." >&2
 
       case "$ionice" in
         idle) arg='-c3';;
@@ -199,6 +301,7 @@
         fi
         sleep 1
       done
+      wait_completed $array $next_last_sector $save_file &
       ;;
   esac

Reply via email to