On 2020-04-18 11:07, Erik Rijkers wrote:
Hi Erik,

While setting up the cascading replication I have hit one issue on
base code[1].  After fixing that I have got one crash with streaming
on patch.  I am not sure whether you are facing any of these 2 issues
or any other issue.  If your issue is not any of these then plese
share the callstack and steps to reproduce.

I figured out a few things about this. Attached is a bash script
test.sh, to reproduce:

And the attached file, test.sh.  (sorry)

There is a variable  CRASH_IT  that determines whether the whole thing
will fail (with a segmentation fault) or not.  As attached it has
CRASH_IT=0 and does not crash.  When you change that to CRASH_IT=1,
then it will crash.  It turns out that this just depends on a short
wait state (3 seconds, on my machine) between setting up de
replication, and the running of pgbench.  It's possible that on very
fast machines maybe it does not occur; we've had such difference
between hardware before. This is a i5-3330S.

It deletes files so look it over before you run it.  It may also
depend on some of my local set-up but I guess that should be easily
fixed.

Can you let me know if you can reproduce the problem with this?

thanks,

Erik Rijkers




[1]
https://www.postgresql.org/message-id/CAFiTN-u64S5bUiPL1q5kwpHNd0hRnf1OE-bzxNiOs5zo84i51w%40mail.gmail.com


--
Regards,
Dilip Kumar
EnterpriseDB: http://www.enterprisedb.com
#!/bin/bash
unset PGSERVICE PGSERVICEFILE PGDATA PGPORT PGDATABASE
# PGPASSFILE must be set and have the appropriate entries

env | grep -E '^PG'

      PROJECT=large_logical
    # PROJECT=HEAD
NUM_INSTANCES=3

      BIN_DIR=$HOME/pg_stuff/pg_installations/pgsql.$PROJECT/bin
     POSTGRES=$BIN_DIR/postgres
       INITDB=$BIN_DIR/initdb
      TMP_DIR=$HOME'/tmp/'$PROJECT
   devel_file=${TMP_DIR}'/.devel'

  BASE_PORT=6015  #   ports 6015, 6016, 6017 
# BASE_PORT=6515
port1=$(( $BASE_PORT + 0 ))
port2=$(( $port1 + 1 ))
port3=$(( $port1 + 2 ))
scale=1  dbname=postgres  pubname=pub1  subname=sub1
if [[ ! -d $TMP_DIR ]]; then mkdir $TMP_DIR; fi
echo 's3kr1t' > $devel_file
                  max_wal_senders=10  # publication side
            max_replication_slots=10  # publication side and subscription side
             max_worker_processes=12  # subscription side
  max_logical_replication_workers=10  # subscription side
max_sync_workers_per_subscription=4   # subscription side
for n in `seq 1 $NUM_INSTANCES`; do
  port=$(( $BASE_PORT + $n -1 ))
    data_dir=$TMP_DIR/pgsql.instance${n}/data
  server_dir=$TMP_DIR/pgsql.instance${n}
  $INITDB --pgdata=$data_dir --encoding=UTF8 --auth=scram-sha-256 --pwfile=$devel_file &> initdb.$port.log 
  rc=$?
  if [[ $rc -ne 0 ]]; then
    echo "-- initdb $?"
  fi
 ( $POSTGRES -D $data_dir -p $port \
    --wal_level=logical \
    --max_replication_slots=$max_replication_slots \
    --max_worker_processes=$max_worker_processes \
    --max_logical_replication_workers=$max_logical_replication_workers \
    --max_wal_senders=$max_wal_senders \
    --max_sync_workers_per_subscription=$max_sync_workers_per_subscription \
    --logging_collector=on \
    --log_directory=${server_dir} \
    --log_filename=logfile.${port} \
    --log_replication_commands=on \
    --autovacuum=off  & )
#   --logical_work_mem=128MB & )
#   pg_isready -d $dbname --timeout=60 -p $port
done

#sleep $NUM_INSTANCES
#sleep $NUM_INSTANCES

#pg_isready -d $dbname -qp 6015 --timeout=60
#pg_isready -d $dbname -qp 6016 --timeout=60
num_loop=$(( $NUM_INSTANCES - 1 ))

$BIN_DIR/pgbench --port=$BASE_PORT --quiet --initialize --scale=$scale $dbname

echo "alter table pgbench_history add column hid serial primary key" | $BIN_DIR/psql -d $dbname -p $BASE_PORT -X
#pg_isready -d $dbname -qp 6015 --timeout=60
#pg_isready -d $dbname -qp 6016 --timeout=60
for n in `seq 1 $num_loop`; do
  target_port=$(( $BASE_PORT + $n ))
  pg_dump -Fc -p $BASE_PORT \
    --exclude-table-data=pgbench_history  --exclude-table-data=pgbench_accounts \
    --exclude-table-data=pgbench_branches --exclude-table-data=pgbench_tellers \
    -tpgbench_history -tpgbench_accounts \
    -tpgbench_branches -tpgbench_tellers \
        $dbname | pg_restore -1 -p $target_port -d $dbname
done

for n in `seq 1 $num_loop`; do
  pubport=$(( $BASE_PORT + $n - 1 ))
  subport=$(( $BASE_PORT + $n     ))
  appname='casc:'${subport}'<'${pubport}
  echo "create publication  $pubname for all tables" | psql -d $dbname -p $pubport -X
  echo "create subscription $subname
        connection 'port=${pubport} dbname=${dbname} application_name=${appname}'
        publication $pubname with (enabled=false, slot_name=${subname}_${subport});" | psql -d $dbname -p $subport -X
  echo "alter subscription $subname enable; " | psql -d $dbname -p $subport -X
done

CRASH_IT=0  # 0: no crash;  1: crash

if [[ $CRASH_IT -ne 1 ]]
then
  echo "-- sleep 3" 
           sleep 3  #  this wait will avoid the crash
fi

echo "------------------------ (doing a pgbench run for 3 seconds)"
echo "-- $BIN_DIR/pgbench -c 32 -T 3 -p $BASE_PORT $dbname -- scale $scale"
         $BIN_DIR/pgbench -c 32 -T 3 -p $BASE_PORT $dbname
echo "------------------------ (and back from pgbench run)"

c_a1=$( echo "select count(*) from pgbench_accounts"|psql -d$dbname -qtAX -p$port1) 
c_b1=$( echo "select count(*) from pgbench_branches"|psql -d$dbname -qtAX -p$port1) 
c_t1=$( echo "select count(*) from pgbench_tellers "|psql -d$dbname -qtAX -p$port1) 
c_h1=$( echo "select count(*) from pgbench_history "|psql -d$dbname -qtAX -p$port1) 
mda1=$( echo "select aid,bid,abalance,filler            from pgbench_accounts order by aid"|psql -d $dbname -qtAX -p$port1|md5sum|cut -b 1-7 ) 
mdb1=$( echo "select bid,bbalance,filler                from pgbench_branches order by bid"|psql -d $dbname -qtAX -p$port1|md5sum|cut -b 1-7 )
mdt1=$( echo "select tid,bid,tbalance,filler            from pgbench_tellers  order by tid"|psql -d $dbname -qtAX -p$port1|md5sum|cut -b 1-7 )
mdh1=$( echo "select hid,bid,aid,delta,mtime,filler,hid from pgbench_history  order by hid"|psql -d $dbname -qtAX -p$port1|md5sum|cut -b 1-7 )
md5_1=$(echo "$mda1  $mdb1  $mdt1  $mdh1" | md5sum | cut -b 1-7 )
printf "a,b,t,h  $port1    %6d  %6d  %6d  %6d   $mda1  $mdb1  $mdt1  $mdh1     $md5_1\n" $c_a1  $c_b1  $c_t1  $c_h1
ver1=$( echo "select substring(version(),1,70)" | psql -d $dbname -qtAXp $port1 )
ver2=$( echo "select substring(version(),1,70)" | psql -d $dbname -qtAXp $port2 )
if [[ $NUM_INSTANCES -gt 2 ]]; then
ver3=$( echo "select substring(version(),1,70)" | psql -d $dbname -qtAXp $port3 )
fi
echo 
rc=0
while [[ $rc -eq 0 ]]
do
# echo "select column_name from information_schema.columns where table_schema = 'public' and table_name = 'pgbench_accounts' order by ordinal_position"|psql -d$dbname -qtAXp$port2
  mda2=$(echo "select aid,bid,abalance,filler            from pgbench_accounts order by aid"|psql -d$dbname -qtAXp$port2|md5sum|cut -b 1-7) 
  mdb2=$(echo "select bid,bbalance,filler                from pgbench_branches order by bid"|psql -d$dbname -qtAXp$port2|md5sum|cut -b 1-7)
  mdt2=$(echo "select tid,bid,tbalance,filler            from pgbench_tellers  order by tid"|psql -d$dbname -qtAXp$port2|md5sum|cut -b 1-7)
  mdh2=$(echo "select hid,bid,aid,delta,mtime,filler,hid from pgbench_history  order by hid"|psql -d$dbname -qtAXp$port2|md5sum|cut -b 1-7)
  c_a2=$(echo "select count(*) from pgbench_accounts"|psql -d$dbname -qtAX -p$port2) 
  c_b2=$(echo "select count(*) from pgbench_branches"|psql -d$dbname -qtAX -p$port2) 
  c_t2=$(echo "select count(*) from pgbench_tellers "|psql -d$dbname -qtAX -p$port2) 
  c_h2=$(echo "select count(*) from pgbench_history "|psql -d$dbname -qtAX -p$port2) 
  md5_2=$(echo "$mda2  $mdb2  $mdt2  $mdh2" | md5sum | cut -b 1-7 )
  if [[ $NUM_INSTANCES -gt 2 ]]; then
  mda3=$(echo "select aid,bid,abalance,filler            from pgbench_accounts order by aid"|psql -d$dbname -qtAXp$port3|md5sum|cut -b 1-7) 
  mdb3=$(echo "select bid,bbalance,filler                from pgbench_branches order by bid"|psql -d$dbname -qtAXp$port3|md5sum|cut -b 1-7)
  mdt3=$(echo "select tid,bid,tbalance,filler            from pgbench_tellers  order by tid"|psql -d$dbname -qtAXp$port3|md5sum|cut -b 1-7)
  mdh3=$(echo "select hid,bid,aid,delta,mtime,filler,hid from pgbench_history  order by hid"|psql -d$dbname -qtAXp$port3|md5sum|cut -b 1-7)
  c_a3=$(echo "select count(*) from pgbench_accounts"|psql -d$dbname -qtAX -p$port3) 
  c_b3=$(echo "select count(*) from pgbench_branches"|psql -d$dbname -qtAX -p$port3) 
  c_t3=$(echo "select count(*) from pgbench_tellers "|psql -d$dbname -qtAX -p$port3) 
  c_h3=$(echo "select count(*) from pgbench_history "|psql -d$dbname -qtAX -p$port3) 
  md5_3=$(echo "$mda3  $mdb3  $mdt3  $mdh3" | md5sum | cut -b 1-7 )
  fi
  echo "-- $POSTGRES"
  printf "a,b,t,h  $port1    %7d %6d %6d %6d    $mda1 $mdb1 $mdt1 $mdh1   $md5_1   " $c_a1 $c_b1 $c_t1 $c_h1; echo "$ver1"
  printf "a,b,t,h  $port2    %7d %6d %6d %6d    $mda2 $mdb2 $mdt2 $mdh2   $md5_2   " $c_a2 $c_b2 $c_t2 $c_h2; echo "$ver2"
  if [[ $NUM_INSTANCES -gt 2 ]]; then
  printf "a,b,t,h  $port3    %7d %6d %6d %6d    $mda3 $mdb3 $mdt3 $mdh3   $md5_3   " $c_a3 $c_b3 $c_t3 $c_h3; echo "$ver3"
  fi
  if   [[ $NUM_INSTANCES -eq 2 ]]; then
       if [[ "$md5_1" == "$md5_2" ]] ; then echo "OK - done."; break; fi
  elif [[ $NUM_INSTANCES -eq 3 ]]; then
       if [[ "$md5_1" == "$md5_3" ]] ; then echo "OK - done."; break; fi
  fi
  sleep 1
  rc=$?
done

for n in `seq 1 $NUM_INSTANCES`; do 
  port=$(( $BASE_PORT + $n -1 ))
  inst_dir=$TMP_DIR/pgsql.instance${n}
  data_dir=$TMP_DIR/pgsql.instance${n}/data
  echo "-- grep -Ei fault $TMP_DIR/pgsql.instance${n}/logf*"
           grep -Ei fault $TMP_DIR/pgsql.instance${n}/logf*
  sleep 2
  $BIN_DIR/pg_ctl stop -w -D $data_dir
  if [[ 1 -eq 1 ]]; then
    echo "-- removing the lot:  rm -rf $inst_dir"
    rm -rf $inst_dir
  fi
done


Reply via email to