Hi all,

background:

   1. lustre(2.15.5) + corosync(3.1.5) + pacemaker(2.1.0-8.el8) +
   pcs(0.10.8)
   2. there are 11 nodes in total, divided into 3 groups. If a node fails
   within a group, the resources can only be taken over by nodes within that
   group.
   3. Each node has 2 MDTs and 16 OSTs.

Issues:

   1. The resource configuration time progressively increases.  the second
   mdt-0  cost  only   8s,the last ost-175 cost  1min:37s
   2. The total time taken for the configuration is approximately 2 hours
   and 31 minutes. Is there a way to improve it?


attachment:
create bash: pcs_create.sh
create log: pcs_create.log
#!/bin/bash


source /opt/storage/lustre/conf/install-pcs.conf              #请根据实际情况调整相关内容
input_file="/opt/storage/lustre/conf/lustre-nvme.info"                       # 定义输入文件

echo "pcs create begin ($(date))"

# 将 mgs_nodes 转换为数组
IFS=',' read -ra mgs_array <<< "$mgs_nodes"

# 使用 IFS 和 read 命令将 host_groups 按分号拆分为数组
IFS=';' read -ra host_groups_array <<< "$host_groups"

# 获取总的组个数
total_groups=${#host_groups_array[@]}

total_hosts=0
for group in "${host_groups_array[@]}"; do
  IFS=',' read -ra nodes <<< "$group"
  total_hosts=$((total_hosts + ${#nodes[@]}))
done

all_nodes=()
# 遍历每个主机组并将主机添加到all_nodes数组中
for group in "${host_groups_array[@]}"; do
  # 将每个组按逗号分割为单独的主机
  IFS=',' read -ra nodes <<< "$group"
  all_nodes+=("${nodes[@]}")
done



# 逐行读取文件
while read -r line; do
    # 跳过标题行和空行
    if [[ "$line" =~ ^(lustre_name|service|$) ]]; then
        continue
    fi
    
    # 使用awk提取字段
    role=$(echo "$line" | awk '{print $1}')
    vol_name=$(echo "$line" | awk '{print $3}')
    dev_name=$(echo "$line" | awk '{print $4}')

    # 从vol_name提取最后的数字
    last_number=$(echo "$vol_name" | grep -o -E '[0-9]+$')

    # 根据角色生成目录名称和pcs命令
    case "$role" in
        MGS)
            echo mgs
            directory="/lustre/mgs"
            pcs resource create mgs "ocf:heartbeat:Filesystem" device="$dev_name" directory="$directory" fstype="lustre" --disabled
            pcs resource update mgs op start timeout=300s stop timeout=300s monitor timeout=300s
            pcs resource update mgs meta migration-threshold=0

            first_node=${mgs_array[0]}
            pcs constraint location mgs prefers $first_node=2000
            for node in "${all_nodes[@]}"; do
              if [[ ! " ${mgs_array[*]} " =~ " $node " ]]; then
                pcs constraint location mgs avoids $node
              fi
            done

            pcs resource enable mgs
            ;;
        MDS)
            echo mdt-$last_number
	    echo "1 ($(date))"
            directory="/lustre/mdt-$last_number"
            pcs resource create mdt-$last_number "ocf:heartbeat:Filesystem" device="$dev_name" directory="$directory" fstype="lustre" --disabled
            pcs resource update mdt-$last_number op start timeout=300s stop timeout=300s monitor timeout=300s
            pcs resource update mdt-$last_number meta migration-threshold=0
            pcs constraint order start mgs then mdt-$last_number kind=Mandatory symmetrical=false > /dev/null
            echo "2 ($(date))"
            if [[ $first_node_have_mdt == "false" ]]; then
               host_number=$(( last_number % (total_hosts-1) ))
               host_number=$((host_number+1))
            else
               host_number=$(( last_number % total_hosts ))
            fi
            host_node="${all_nodes[$host_number]}"

            #避免在第一个主机上运行
            if [[ $first_node_have_mdt == "false" ]]; then
               group="${host_groups_array[0]}"
               IFS=',' read -ra nodes <<< "$group"
               if [[ " ${nodes[*]} " == *" $host_node "* ]]; then
                 first_node=${all_nodes[0]}
                 pcs constraint location mdt-$last_number avoids $first_node
               fi
            fi
	    echo "3 ($(date))"

            # 配置位置限制
            pcs constraint location mdt-$last_number prefers $host_node=600
            for i in "${!host_groups_array[@]}"; do
              group="${host_groups_array[$i]}"
              IFS=',' read -ra nodes <<< "$group"
              if [[ " ${nodes[*]} " != *" $host_node "* ]]; then
                for node in "${nodes[@]}"; do
                   pcs constraint location mdt-$last_number avoids $node
                done
              fi
            done
	    echo "4 ($(date))"
            pcs resource enable mdt-$last_number
	    echo "5 ($(date))"
            ;;
        OSS)
            echo ost-$last_number
	    echo "1 ($(date))"
            directory="/lustre/ost-$last_number"
            pcs resource create ost-$last_number "ocf:heartbeat:Filesystem" device="$dev_name" directory="$directory" fstype="lustre" --disabled
            pcs resource update ost-$last_number op start timeout=300s stop timeout=300s monitor timeout=300s
            pcs resource update ost-$last_number meta migration-threshold=0
            pcs constraint order start mgs then ost-$last_number kind=Mandatory symmetrical=false > /dev/null

            host_number=$(( last_number % total_hosts ))  # 计算资源应分配的主机序号
            host_node="${all_nodes[$host_number]}"
            
	    echo "2 ($(date))"
            # 配置位置限制
            pcs constraint location ost-$last_number prefers $host_node=600
            for i in "${!host_groups_array[@]}"; do
              group="${host_groups_array[$i]}"
              IFS=',' read -ra nodes <<< "$group"
              if [[ " ${nodes[*]} " != *" $host_node "* ]]; then
                for node in "${nodes[@]}"; do
                   pcs constraint location ost-$last_number avoids $node
                done
              fi
            done
	    echo "3 ($(date))"
            pcs resource enable ost-$last_number
	    echo "4 ($(date))"
            ;;
        *)
            echo "未知角色:$role"
            ;;
    esac

done < "$input_file"

echo "pcs create end ($(date))"

Attachment: pcs_create.log
Description: Binary data

_______________________________________________
Manage your subscription:
https://lists.clusterlabs.org/mailman/listinfo/users

ClusterLabs home: https://www.clusterlabs.org/

Reply via email to