On August 12, 2025 6:46 pm, Max R. Carrara wrote:
> Introduce a new helper command pve-osd-lvm-enable-autoactivation,
> which gracefully tries to enable autoactivation for all logical
> volumes used by Ceph OSDs while also activating any LVs that aren't
> active yet. Afterwards, the helper attempts to bring all OSDs online.

I think this is probably overkill - this only affects a specific non
standard setup, the breakage is really obvious, and the fix is easy:
either run lvchange on all those LVs, or recreate the OSDs after the fix
for creation is rolled out..

i.e., the fallout from some edge cases not being handled correctly in
the 200 line helper script here is probably worse than the few setups
that run into the original issue that we can easily help along
manually..

> 
> Fixes: #6652
> Signed-off-by: Max R. Carrara <m.carr...@proxmox.com>
> ---
>  bin/Makefile                          |   3 +-
>  bin/pve-osd-lvm-enable-autoactivation | 195 ++++++++++++++++++++++++++
>  debian/postinst                       |  16 +++
>  3 files changed, 213 insertions(+), 1 deletion(-)
>  create mode 100644 bin/pve-osd-lvm-enable-autoactivation
> 
> diff --git a/bin/Makefile b/bin/Makefile
> index 777e6759..0a0df34d 100644
> --- a/bin/Makefile
> +++ b/bin/Makefile
> @@ -32,7 +32,8 @@ HELPERS =                   \
>       pve-startall-delay      \
>       pve-init-ceph-crash     \
>       pve-firewall-commit     \
> -     pve-sdn-commit
> +     pve-sdn-commit          \
> +     pve-osd-lvm-enable-autoactivation
>  
>  MIGRATIONS =                 \
>       pve-lvm-disable-autoactivation          \
> diff --git a/bin/pve-osd-lvm-enable-autoactivation 
> b/bin/pve-osd-lvm-enable-autoactivation
> new file mode 100644
> index 00000000..acdc91e8
> --- /dev/null
> +++ b/bin/pve-osd-lvm-enable-autoactivation
> @@ -0,0 +1,195 @@
> +#!/usr/bin/perl
> +
> +use v5.36;
> +
> +use JSON qw(decode_json);
> +
> +use PVE::Tools;
> +
> +my sub ceph_volume_lvm_osd_info : prototype() () {
> +    my $cmd = [
> +        "/usr/sbin/ceph-volume", "lvm", "list", "--format", "json",
> +    ];
> +
> +    my $stdout = '';
> +    my $outfunc = sub($line) {
> +        $stdout .= "$line\n";
> +    };
> +
> +    PVE::Tools::run_command($cmd, timeout => 10, outfunc => $outfunc);
> +    my $osd_info = decode_json($stdout);
> +
> +    return $osd_info;
> +}
> +
> +my sub lvs_from_osd_info : prototype($) ($osd_info) {
> +    my @lvs_for_osds = ();
> +
> +    for my $osd (keys $osd_info->%*) {
> +        my $osd_lvs = $osd_info->{$osd};
> +
> +        for my $osd_lv ($osd_lvs->@*) {
> +            my ($lv_name, $vg_name) = $osd_lv->@{qw(lv_name vg_name)};
> +            push(@lvs_for_osds, "$vg_name/$lv_name");
> +        }
> +    }
> +
> +    return \@lvs_for_osds;
> +}
> +
> +my sub lvs : prototype() () {
> +    my $cmd = [
> +        "/usr/sbin/lvs",
> +        "--noheadings",
> +        "--separator",
> +        ":",
> +        "--options",
> +        "lv_name,vg_name,autoactivation,active",
> +    ];
> +
> +    my $all_lvs = {};
> +
> +    my $outfunc = sub($line) {
> +        $line = PVE::Tools::trim($line);
> +
> +        my ($lv_name, $vg_name, $autoactivation, $active) = split(':', 
> $line, -1);
> +
> +        return undef if ($lv_name eq '' || $vg_name eq '');
> +
> +        $all_lvs->{"$vg_name/$lv_name"} = {
> +            autoactivation => $autoactivation,
> +            active => $active,
> +        };
> +    };
> +
> +    PVE::Tools::run_command(
> +        $cmd,
> +        timeout => 10,
> +        outfunc => $outfunc,
> +    );
> +
> +    return $all_lvs;
> +}
> +
> +my sub main : prototype() () {
> +    my $osd_info = ceph_volume_lvm_osd_info();
> +    my $all_lvs = lvs();
> +
> +    my @osd_lvs_no_autoactivation = ();
> +    my @osd_lvs_inactive = ();
> +
> +    for my $osd (keys $osd_info->%*) {
> +        for my $osd_lv ($osd_info->{$osd}->@*) {
> +            my ($lv_name, $vg_name) = $osd_lv->@{qw(lv_name vg_name)};
> +
> +            my $osd_lv = "$vg_name/$lv_name";
> +
> +            push(@osd_lvs_no_autoactivation, $osd_lv) if 
> !$all_lvs->{$osd_lv}->{autoactivation};
> +         push(@osd_lvs_inactive, $osd_lv) if !$all_lvs->{$osd_lv}->{active};
> +        }
> +    }
> +
> +    my $has_set_autoactivation_err = 0;
> +
> +    # Logical volumes are formatted as "vg_name/lv_name", which is necessary 
> for lvchange
> +    for my $lv (@osd_lvs_no_autoactivation) {
> +        print("Enabling autoactivation for OSD logical volume '$lv' ...\n");
> +
> +        eval {
> +            my $cmd = [
> +                '/usr/sbin/lvchange', '--setautoactivation', 'y', $lv,
> +            ];
> +
> +            PVE::Tools::run_command(
> +                $cmd,
> +                quiet => 1,
> +                timeout => 10,
> +            );
> +        };
> +        if (my $err = $@) {
> +            $has_set_autoactivation_err = 1;
> +
> +            warn("Error: Failed to enable autoactivation for OSD LV 
> '$lv'\n");
> +            warn("$@\n");
> +
> +            next;
> +        }
> +
> +    }
> +
> +    my $has_activation_err = 0;
> +
> +    # Activate any inactive OSD LVs so that ceph-volume can later bring up 
> any failed OSDs
> +    for my $lv (@osd_lvs_inactive) {
> +        print("Activating OSD logical volume '$lv' ...\n");
> +
> +        eval {
> +            my $cmd = [
> +                '/usr/sbin/lvchange', '--activate', 'y', $lv,
> +            ];
> +
> +            PVE::Tools::run_command(
> +                $cmd,
> +                quiet => 1,
> +                timeout => 10,
> +            );
> +        };
> +        if (my $err = $@) {
> +            $has_activation_err = 1;
> +
> +            warn("Error: Failed to activate OSD LV '$lv'\n");
> +            warn("$@\n");
> +
> +            next;
> +        }
> +    }
> +
> +    # ceph-volume requires all LVs used by OSDs to be active,
> +    # so exit in case there are any we couldn't activate
> +    if ($has_set_autoactivation_err || $has_activation_err) {
> +        if ($has_set_autoactivation_err) {
> +            warn("Couldn't enable autoactivation for all OSD LVs.\n");
> +        }
> +
> +        if ($has_activation_err) {
> +            warn("Couldn't activate all OSD LVs.\n");
> +        }
> +
> +        exit 1;
> +    }
> +
> +    print("Activating OSDs ...\n");
> +    eval {
> +        my $cmd = [
> +            "/usr/sbin/ceph-volume", "lvm", "activate", "--all",
> +        ];
> +
> +     # ceph-volume prints everything to stderr for whatever reason and 
> prefixes
> +     # what actually goes to stderr with " stderr:", so separate the output 
> here.
> +     # Lines starting with "Running command:" are removed because they're 
> overly
> +     # verbose.
> +     my $logfunc = sub ($line) {
> +         return if $line =~ m/^Running command:/;
> +
> +         if ($line =~ s/^\s*stderr:\s*//) {
> +             print STDERR "$line\n";
> +         } else {
> +             print STDOUT "$line\n";
> +         }
> +     };
> +
> +        PVE::Tools::run_command(
> +            $cmd,
> +         logfunc => $logfunc,
> +            timeout => 30,
> +        );
> +    };
> +    if (my $err = $@) {
> +        warn("Error while activating all Ceph LVM volumes: $@\n");
> +        exit 1;
> +    }
> +
> +    return undef;
> +}
> +
> +main();
> diff --git a/debian/postinst b/debian/postinst
> index b6e07fd9..f550e7bb 100755
> --- a/debian/postinst
> +++ b/debian/postinst
> @@ -133,6 +133,18 @@ migrate_apt_auth_conf() {
>      fi
>  }
>  
> +ceph_osd_lvm_enable_autoactivation() {
> +    if ! test -e /usr/sbin/ceph-volume; then
> +        return
> +    fi
> +
> +    if ! /usr/share/pve-manager/helpers/pve-osd-lvm-enable-autoactivation; 
> then
> +        printf "\nEnabling autoactivation for logical volumes used by Ceph 
> OSDs failed.";
> +        printf " Check the output above for errors and try to enable 
> autoactivation for OSD LVs";
> +        printf " manually by running 
> '/usr/share/pve-manager/helpers/pve-osd-lvm-enable-autoactivation'";
> +    fi
> +}
> +
>  # Copied from dh_installtmpfiles/13.24.2
>  if [ "$1" = "configure" ] || [ "$1" = "abort-upgrade" ] || [ "$1" = 
> "abort-deconfigure" ] || [ "$1" = "abort-remove" ] ; then
>         if [ -x "$(command -v systemd-tmpfiles)" ]; then
> @@ -246,6 +258,10 @@ case "$1" in
>          fi
>      fi
>  
> +    if test -n "$2" && dpkg --compare-versions "$2" 'lt' '9.0.5'; then
> +        ceph_osd_lvm_enable_autoactivation
> +    fi
> +
>      ;;
>  
>    abort-upgrade|abort-remove|abort-deconfigure)
> -- 
> 2.47.2
> 
> 
> 
> _______________________________________________
> pve-devel mailing list
> pve-devel@lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
> 
> 
> 


_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to