Hi all, background:
1. lustre(2.15.5) + corosync(3.1.5) + pacemaker(2.1.0-8.el8) + pcs(0.10.8) 2. there are 11 nodes in total, divided into 3 groups. If a node fails within a group, the resources can only be taken over by nodes within that group. 3. Each node has 2 MDTs and 16 OSTs. Issues: 1. The resource configuration time progressively increases. the second mdt-0 cost only 8s,the last ost-175 cost 1min:37s 2. The total time taken for the configuration is approximately 2 hours and 31 minutes. Is there a way to improve it? attachment: create bash: pcs_create.sh create log: pcs_create.log
#!/bin/bash source /opt/storage/lustre/conf/install-pcs.conf #请根据实际情况调整相关内容 input_file="/opt/storage/lustre/conf/lustre-nvme.info" # 定义输入文件 echo "pcs create begin ($(date))" # 将 mgs_nodes 转换为数组 IFS=',' read -ra mgs_array <<< "$mgs_nodes" # 使用 IFS 和 read 命令将 host_groups 按分号拆分为数组 IFS=';' read -ra host_groups_array <<< "$host_groups" # 获取总的组个数 total_groups=${#host_groups_array[@]} total_hosts=0 for group in "${host_groups_array[@]}"; do IFS=',' read -ra nodes <<< "$group" total_hosts=$((total_hosts + ${#nodes[@]})) done all_nodes=() # 遍历每个主机组并将主机添加到all_nodes数组中 for group in "${host_groups_array[@]}"; do # 将每个组按逗号分割为单独的主机 IFS=',' read -ra nodes <<< "$group" all_nodes+=("${nodes[@]}") done # 逐行读取文件 while read -r line; do # 跳过标题行和空行 if [[ "$line" =~ ^(lustre_name|service|$) ]]; then continue fi # 使用awk提取字段 role=$(echo "$line" | awk '{print $1}') vol_name=$(echo "$line" | awk '{print $3}') dev_name=$(echo "$line" | awk '{print $4}') # 从vol_name提取最后的数字 last_number=$(echo "$vol_name" | grep -o -E '[0-9]+$') # 根据角色生成目录名称和pcs命令 case "$role" in MGS) echo mgs directory="/lustre/mgs" pcs resource create mgs "ocf:heartbeat:Filesystem" device="$dev_name" directory="$directory" fstype="lustre" --disabled pcs resource update mgs op start timeout=300s stop timeout=300s monitor timeout=300s pcs resource update mgs meta migration-threshold=0 first_node=${mgs_array[0]} pcs constraint location mgs prefers $first_node=2000 for node in "${all_nodes[@]}"; do if [[ ! " ${mgs_array[*]} " =~ " $node " ]]; then pcs constraint location mgs avoids $node fi done pcs resource enable mgs ;; MDS) echo mdt-$last_number echo "1 ($(date))" directory="/lustre/mdt-$last_number" pcs resource create mdt-$last_number "ocf:heartbeat:Filesystem" device="$dev_name" directory="$directory" fstype="lustre" --disabled pcs resource update mdt-$last_number op start timeout=300s stop timeout=300s monitor timeout=300s pcs resource update mdt-$last_number meta migration-threshold=0 pcs constraint order start mgs then mdt-$last_number kind=Mandatory symmetrical=false > /dev/null echo "2 ($(date))" if [[ $first_node_have_mdt == "false" ]]; then host_number=$(( last_number % (total_hosts-1) )) host_number=$((host_number+1)) else host_number=$(( last_number % total_hosts )) fi host_node="${all_nodes[$host_number]}" #避免在第一个主机上运行 if [[ $first_node_have_mdt == "false" ]]; then group="${host_groups_array[0]}" IFS=',' read -ra nodes <<< "$group" if [[ " ${nodes[*]} " == *" $host_node "* ]]; then first_node=${all_nodes[0]} pcs constraint location mdt-$last_number avoids $first_node fi fi echo "3 ($(date))" # 配置位置限制 pcs constraint location mdt-$last_number prefers $host_node=600 for i in "${!host_groups_array[@]}"; do group="${host_groups_array[$i]}" IFS=',' read -ra nodes <<< "$group" if [[ " ${nodes[*]} " != *" $host_node "* ]]; then for node in "${nodes[@]}"; do pcs constraint location mdt-$last_number avoids $node done fi done echo "4 ($(date))" pcs resource enable mdt-$last_number echo "5 ($(date))" ;; OSS) echo ost-$last_number echo "1 ($(date))" directory="/lustre/ost-$last_number" pcs resource create ost-$last_number "ocf:heartbeat:Filesystem" device="$dev_name" directory="$directory" fstype="lustre" --disabled pcs resource update ost-$last_number op start timeout=300s stop timeout=300s monitor timeout=300s pcs resource update ost-$last_number meta migration-threshold=0 pcs constraint order start mgs then ost-$last_number kind=Mandatory symmetrical=false > /dev/null host_number=$(( last_number % total_hosts )) # 计算资源应分配的主机序号 host_node="${all_nodes[$host_number]}" echo "2 ($(date))" # 配置位置限制 pcs constraint location ost-$last_number prefers $host_node=600 for i in "${!host_groups_array[@]}"; do group="${host_groups_array[$i]}" IFS=',' read -ra nodes <<< "$group" if [[ " ${nodes[*]} " != *" $host_node "* ]]; then for node in "${nodes[@]}"; do pcs constraint location ost-$last_number avoids $node done fi done echo "3 ($(date))" pcs resource enable ost-$last_number echo "4 ($(date))" ;; *) echo "未知角色:$role" ;; esac done < "$input_file" echo "pcs create end ($(date))"
pcs_create.log
Description: Binary data
_______________________________________________ Manage your subscription: https://lists.clusterlabs.org/mailman/listinfo/users ClusterLabs home: https://www.clusterlabs.org/