This is an automated email from the ASF dual-hosted git repository.

HoustonPutman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr-operator.git


The following commit(s) were added to refs/heads/main by this push:
     new e0d36ee  Support online resizing (expansion) of persistent data PVCs 
(#712)
e0d36ee is described below

commit e0d36ee7c1d5995a5617fdf0396effaf788149df
Author: Houston Putman <[email protected]>
AuthorDate: Mon Jun 8 11:17:01 2026 -0700

    Support online resizing (expansion) of persistent data PVCs (#712)
    
    Grow SolrCloud data PVCs in place via the storage request; the operator 
resizes the PVCs and rolls the pods.
    
    The integration (e2e) tests now use the `rawfile-localpv` provisioner that 
supports resizing.
---
 Makefile                               |   2 +-
 config/rbac/role.yaml                  |  18 ++++
 controllers/solr_cluster_ops_util.go   | 107 ++++++++++++++++++-
 controllers/solr_pvc_expansion_test.go |  94 +++++++++++++++++
 controllers/solrcloud_controller.go    | 184 ++++++++++++++++++++++++++++++---
 controllers/suite_test.go              |   5 +-
 controllers/util/solr_util.go          |  24 +++++
 docs/solr-cloud/solr-cloud-crd.md      |   9 +-
 docs/upgrade-notes.md                  |   2 +-
 helm/solr-operator/Chart.yaml          |   7 ++
 helm/solr-operator/templates/role.yaml |  18 ++++
 helm/solr/Chart.yaml                   |   9 +-
 main.go                                |   5 +-
 tests/e2e/solrcloud_storage_test.go    | 171 ++++++++++++++++++++++++++++++
 tests/e2e/suite_test.go                |  85 ++++++++++-----
 tests/scripts/manage_e2e_tests.sh      |  10 +-
 16 files changed, 693 insertions(+), 57 deletions(-)

diff --git a/Makefile b/Makefile
index c637225..c9a1044 100644
--- a/Makefile
+++ b/Makefile
@@ -43,7 +43,7 @@ KUSTOMIZE_VERSION=v4.5.2
 CONTROLLER_GEN_VERSION=v0.16.4
 GO_LICENSES_VERSION=v1.6.0
 GINKGO_VERSION = $(shell cat go.mod | grep 'github.com/onsi/ginkgo' | sed 
's/.*\(v.*\)$$/\1/g')
-KIND_VERSION=v0.23.0
+KIND_VERSION=v0.30.0
 YQ_VERSION=v4.33.3
 CONTROLLER_RUNTIME_VERSION = $(shell cat go.mod | grep 
'sigs.k8s.io/controller-runtime' | sed 's/.*\(v\(.*\)\.[^.]*\)$$/\2/g')
 # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be 
downloaded by envtest binary.
diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml
index d8239bb..53b8d47 100644
--- a/config/rbac/role.yaml
+++ b/config/rbac/role.yaml
@@ -43,6 +43,16 @@ rules:
   - ""
   resources:
   - persistentvolumeclaims
+  verbs:
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - ""
+  resources:
   - pods
   verbs:
   - delete
@@ -144,6 +154,14 @@ rules:
   - get
   - patch
   - update
+- apiGroups:
+  - storage.k8s.io
+  resources:
+  - storageclasses
+  verbs:
+  - get
+  - list
+  - watch
 - apiGroups:
   - zookeeper.pravega.io
   resources:
diff --git a/controllers/solr_cluster_ops_util.go 
b/controllers/solr_cluster_ops_util.go
index 916446b..deecd21 100644
--- a/controllers/solr_cluster_ops_util.go
+++ b/controllers/solr_cluster_ops_util.go
@@ -21,18 +21,20 @@ import (
        "context"
        "encoding/json"
        "errors"
+       "net/url"
+       "strconv"
+       "time"
+
        solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
        "github.com/apache/solr-operator/controllers/util"
        "github.com/apache/solr-operator/controllers/util/solr_api"
        "github.com/go-logr/logr"
        appsv1 "k8s.io/api/apps/v1"
        corev1 "k8s.io/api/core/v1"
+       "k8s.io/apimachinery/pkg/api/resource"
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
        "k8s.io/utils/pointer"
-       "net/url"
        "sigs.k8s.io/controller-runtime/pkg/client"
-       "strconv"
-       "time"
 )
 
 // SolrClusterOp contains metadata for cluster operations performed on 
SolrClouds.
@@ -53,6 +55,7 @@ const (
        ScaleUpLock         SolrClusterOperationType = "ScalingUp"
        UpdateLock          SolrClusterOperationType = "RollingUpdate"
        BalanceReplicasLock SolrClusterOperationType = "BalanceReplicas"
+       PvcExpansionLock    SolrClusterOperationType = "PVCExpansion"
 )
 
 // RollingUpdateMetadata contains metadata for rolling update cluster 
operations.
@@ -150,6 +153,101 @@ func retryNextQueuedClusterOpWithQueue(statefulSet 
*appsv1.StatefulSet, clusterO
        return hasOp, err
 }
 
+func determinePvcExpansionClusterOpLockIfNecessary(ctx context.Context, r 
*SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet 
*appsv1.StatefulSet, logger logr.Logger) (clusterOp *SolrClusterOp, 
retryLaterDuration time.Duration, err error) {
+       if instance.Spec.StorageOptions.PersistentStorage == nil ||
+               
instance.Spec.StorageOptions.PersistentStorage.PersistentVolumeClaimTemplate.Spec.Resources.Requests.Storage()
 == nil {
+               return
+       }
+       newSize := 
instance.Spec.StorageOptions.PersistentStorage.PersistentVolumeClaimTemplate.Spec.Resources.Requests.Storage()
+       // If there is no old size to update, the StatefulSet can just be set 
to use the new PVC size without any issue.
+       // Only do a cluster operation if we are expanding from an existing 
size to a new size.
+       oldSizeStr, hasOldSize := 
statefulSet.Annotations[util.StorageMinimumSizeAnnotation]
+       if !hasOldSize || newSize.String() == oldSizeStr {
+               return
+       }
+       oldSize, e := resource.ParseQuantity(oldSizeStr)
+       if e != nil {
+               err = e
+               logger.Error(err, "Could not parse the existing minimum PVC 
size from the StatefulSet annotation", "annotation", 
util.StorageMinimumSizeAnnotation, "value", oldSizeStr)
+               if r.Recorder != nil {
+                       r.Recorder.Eventf(instance, corev1.EventTypeWarning, 
"PVCExpansionError",
+                               "Could not parse the existing minimum data PVC 
size %q recorded on the StatefulSet: %v", oldSizeStr, e)
+               }
+               return
+       }
+       // PVCs cannot be shrunk, so only proceed if the new size is strictly 
bigger than the recorded size.
+       if newSize.Cmp(oldSize) <= 0 {
+               logger.Info("Cannot shrink existing data PVCs; ignoring the 
decreased storage request", "currentSize", oldSize.String(), "requestedSize", 
newSize.String())
+               if r.Recorder != nil {
+                       r.Recorder.Eventf(instance, corev1.EventTypeWarning, 
"PVCExpansionForbidden",
+                               "Cannot shrink data PersistentVolumeClaims from 
%s to %s; PersistentVolumeClaims can only be expanded.", oldSize.String(), 
newSize.String())
+               }
+               return
+       }
+       // Pre-flight: make sure the storage class backing the data PVCs allows 
volume expansion. If it
+       // explicitly does not, there is no point acquiring a cluster operation 
lock that can never
+       // complete; surface it as an event instead.
+       if allowed, className, scErr := r.storageClassAllowsExpansion(ctx, 
instance, statefulSet.Spec.Selector.MatchLabels); scErr != nil {
+               // Could not determine; proceed best-effort and let the PVC 
patch surface any hard rejection.
+               logger.Error(scErr, "Could not verify whether the storage class 
allows volume expansion; proceeding with the expansion attempt")
+       } else if !allowed {
+               logger.Info("Storage class does not allow volume expansion; 
ignoring the increased storage request", "storageClass", className, 
"currentSize", oldSize.String(), "requestedSize", newSize.String())
+               if r.Recorder != nil {
+                       r.Recorder.Eventf(instance, corev1.EventTypeWarning, 
"PVCExpansionForbidden",
+                               "Storage class %q does not allow volume 
expansion (allowVolumeExpansion); cannot expand data PersistentVolumeClaims 
from %s to %s.", className, oldSize.String(), newSize.String())
+               }
+               return
+       }
+       clusterOp = &SolrClusterOp{
+               Operation: PvcExpansionLock,
+               Metadata:  newSize.String(),
+       }
+       return
+}
+
+// handlePvcExpansion handles the logic of a persistent volume claim expansion 
operation.
+func handlePvcExpansion(ctx context.Context, r *SolrCloudReconciler, instance 
*solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, clusterOp 
*SolrClusterOp, logger logr.Logger) (operationComplete bool, retryLaterDuration 
time.Duration, err error) {
+       var newSize resource.Quantity
+       newSize, err = resource.ParseQuantity(clusterOp.Metadata)
+       if err != nil {
+               logger.Error(err, "Could not convert PvcExpansion metadata to a 
resource.Quantity, as it represents the new size of PVCs", "metadata", 
clusterOp.Metadata)
+               return
+       }
+       var resizeInfeasible bool
+       operationComplete, resizeInfeasible, err = r.expandPVCs(ctx, instance, 
statefulSet.Spec.Selector.MatchLabels, newSize, logger)
+       if err == nil && operationComplete {
+               originalStatefulSet := statefulSet.DeepCopy()
+               statefulSet.Annotations[util.StorageMinimumSizeAnnotation] = 
newSize.String()
+               if statefulSet.Spec.Template.Annotations == nil {
+                       statefulSet.Spec.Template.Annotations = 
make(map[string]string, 1)
+               }
+               
statefulSet.Spec.Template.Annotations[util.StorageMinimumSizeAnnotation] = 
newSize.String()
+               if err = r.Patch(ctx, statefulSet, 
client.StrategicMergeFrom(originalStatefulSet)); err != nil {
+                       logger.Error(err, "Error while patching StatefulSet to 
set the new minimum PVC size after PVCs the completion of PVC resizing", 
"newSize", newSize)
+                       operationComplete = false
+               } else {
+                       logger.Info("All PersistentVolumeClaims have been 
expanded, now issuing a rolling restart", "statefulSet", statefulSet.Name)
+               }
+               // Return and wait for the StatefulSet to be updated which will 
call the reconcile to start the rolling restart
+               retryLaterDuration = 0
+       } else if err == nil {
+               if resizeInfeasible {
+                       // The storage backend has declared the requested size 
infeasible. There is nothing the
+                       // operator can do until the user lowers the requested 
size, so surface it as an event and
+                       // back off significantly instead of retrying tightly.
+                       if r.Recorder != nil {
+                               r.Recorder.Eventf(instance, 
corev1.EventTypeWarning, "PVCExpansionInfeasible",
+                                       "The storage backend reported that 
expanding the data PersistentVolumeClaims to %s is infeasible (e.g. it exceeds 
backend or quota limits). Reduce the requested storage size to a feasible value 
to recover.",
+                                       newSize.String())
+                       }
+                       retryLaterDuration = time.Minute
+               } else {
+                       retryLaterDuration = time.Second * 5
+               }
+       }
+       return
+}
+
 func determineScaleClusterOpLockIfNecessary(ctx context.Context, r 
*SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet 
*appsv1.StatefulSet, scaleDownOpIsQueued bool, podList []corev1.Pod, 
blockReconciliationOfStatefulSet bool, logger logr.Logger) (clusterOp 
*SolrClusterOp, retryLaterDuration time.Duration, err error) {
        desiredPods := int(*instance.Spec.Replicas)
        configuredPods := int(*statefulSet.Spec.Replicas)
@@ -291,7 +389,8 @@ func cleanupManagedCloudScaleDown(ctx context.Context, r 
*SolrCloudReconciler, p
 // handleManagedCloudScaleUp does the logic of a managed and "locked" cloud 
scale up operation.
 // This will likely take many reconcile loops to complete, as it is moving 
replicas to the pods that have recently been scaled up.
 func handleManagedCloudScaleUp(ctx context.Context, r *SolrCloudReconciler, 
instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, clusterOp 
*SolrClusterOp, podList []corev1.Pod, logger logr.Logger) (operationComplete 
bool, nextClusterOperation *SolrClusterOp, err error) {
-       desiredPods, err := strconv.Atoi(clusterOp.Metadata)
+       desiredPods := 0
+       desiredPods, err = strconv.Atoi(clusterOp.Metadata)
        if err != nil {
                logger.Error(err, "Could not convert ScaleUp metadata to int, 
as it represents the number of nodes to scale to", "metadata", 
clusterOp.Metadata)
                return
diff --git a/controllers/solr_pvc_expansion_test.go 
b/controllers/solr_pvc_expansion_test.go
new file mode 100644
index 0000000..9ac4ca2
--- /dev/null
+++ b/controllers/solr_pvc_expansion_test.go
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controllers
+
+import (
+       "testing"
+
+       corev1 "k8s.io/api/core/v1"
+)
+
+// pvcWithCondition builds a PVC carrying a single resize condition.
+func pvcWithCondition(condType corev1.PersistentVolumeClaimConditionType, 
status corev1.ConditionStatus) *corev1.PersistentVolumeClaim {
+       return &corev1.PersistentVolumeClaim{
+               Status: corev1.PersistentVolumeClaimStatus{
+                       Conditions: 
[]corev1.PersistentVolumeClaimCondition{{Type: condType, Status: status}},
+               },
+       }
+}
+
+// pvcWithAllocatedStatus builds a PVC carrying a storage 
allocatedResourceStatus.
+func pvcWithAllocatedStatus(status corev1.ClaimResourceStatus) 
*corev1.PersistentVolumeClaim {
+       return &corev1.PersistentVolumeClaim{
+               Status: corev1.PersistentVolumeClaimStatus{
+                       AllocatedResourceStatuses: 
map[corev1.ResourceName]corev1.ClaimResourceStatus{
+                               corev1.ResourceStorage: status,
+                       },
+               },
+       }
+}
+
+// TestPvcControllerExpansionComplete verifies that the controller-side 
expansion is reported as
+// complete for the "offline" provisioner signals (FileSystemResizePending 
condition or a pending/
+// in-progress node resize status), so that the rolling restart is not gated 
on status.capacity.
+func TestPvcControllerExpansionComplete(t *testing.T) {
+       cases := []struct {
+               name string
+               pvc  *corev1.PersistentVolumeClaim
+               want bool
+       }{
+               {"empty pvc", &corev1.PersistentVolumeClaim{}, false},
+               {"filesystem resize pending (offline ready-to-restart)", 
pvcWithCondition(corev1.PersistentVolumeClaimFileSystemResizePending, 
corev1.ConditionTrue), true},
+               {"filesystem resize pending but condition false", 
pvcWithCondition(corev1.PersistentVolumeClaimFileSystemResizePending, 
corev1.ConditionFalse), false},
+               {"unrelated resizing condition", 
pvcWithCondition(corev1.PersistentVolumeClaimResizing, corev1.ConditionTrue), 
false},
+               {"node resize pending status", 
pvcWithAllocatedStatus(corev1.PersistentVolumeClaimNodeResizePending), true},
+               {"node resize in progress status", 
pvcWithAllocatedStatus(corev1.PersistentVolumeClaimNodeResizeInProgress), true},
+               {"controller resize in progress status", 
pvcWithAllocatedStatus(corev1.PersistentVolumeClaimControllerResizeInProgress), 
false},
+               {"controller resize infeasible status", 
pvcWithAllocatedStatus(corev1.PersistentVolumeClaimControllerResizeInfeasible), 
false},
+       }
+       for _, tc := range cases {
+               t.Run(tc.name, func(t *testing.T) {
+                       if got := pvcControllerExpansionComplete(tc.pvc); got 
!= tc.want {
+                               t.Errorf("pvcControllerExpansionComplete() = 
%v, want %v", got, tc.want)
+                       }
+               })
+       }
+}
+
+// TestPvcResizeInfeasible verifies that a backend-declared infeasible 
expansion is detected from the
+// allocatedResourceStatuses (best-effort; populated on Kubernetes >= 1.34).
+func TestPvcResizeInfeasible(t *testing.T) {
+       cases := []struct {
+               name string
+               pvc  *corev1.PersistentVolumeClaim
+               want bool
+       }{
+               {"empty pvc", &corev1.PersistentVolumeClaim{}, false},
+               {"controller resize infeasible", 
pvcWithAllocatedStatus(corev1.PersistentVolumeClaimControllerResizeInfeasible), 
true},
+               {"node resize infeasible", 
pvcWithAllocatedStatus(corev1.PersistentVolumeClaimNodeResizeInfeasible), true},
+               {"node resize pending is not infeasible", 
pvcWithAllocatedStatus(corev1.PersistentVolumeClaimNodeResizePending), false},
+               {"controller resize in progress is not infeasible", 
pvcWithAllocatedStatus(corev1.PersistentVolumeClaimControllerResizeInProgress), 
false},
+       }
+       for _, tc := range cases {
+               t.Run(tc.name, func(t *testing.T) {
+                       if got := pvcResizeInfeasible(tc.pvc); got != tc.want {
+                               t.Errorf("pvcResizeInfeasible() = %v, want %v", 
got, tc.want)
+                       }
+               })
+       }
+}
diff --git a/controllers/solrcloud_controller.go 
b/controllers/solrcloud_controller.go
index b18dbd1..e94ef7b 100644
--- a/controllers/solrcloud_controller.go
+++ b/controllers/solrcloud_controller.go
@@ -21,13 +21,15 @@ import (
        "context"
        "crypto/md5"
        "fmt"
-       policyv1 "k8s.io/api/policy/v1"
-       "k8s.io/apimachinery/pkg/runtime"
        "reflect"
        "sort"
        "strings"
        "time"
 
+       policyv1 "k8s.io/api/policy/v1"
+       "k8s.io/apimachinery/pkg/api/resource"
+       "k8s.io/apimachinery/pkg/runtime"
+
        solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
        "github.com/apache/solr-operator/controllers/util"
        "github.com/go-logr/logr"
@@ -35,11 +37,13 @@ import (
        appsv1 "k8s.io/api/apps/v1"
        corev1 "k8s.io/api/core/v1"
        netv1 "k8s.io/api/networking/v1"
+       storagev1 "k8s.io/api/storage/v1"
        "k8s.io/apimachinery/pkg/api/errors"
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
        "k8s.io/apimachinery/pkg/fields"
        "k8s.io/apimachinery/pkg/labels"
        "k8s.io/apimachinery/pkg/types"
+       "k8s.io/client-go/tools/record"
        ctrl "sigs.k8s.io/controller-runtime"
        "sigs.k8s.io/controller-runtime/pkg/builder"
        "sigs.k8s.io/controller-runtime/pkg/client"
@@ -53,7 +57,8 @@ import (
 // SolrCloudReconciler reconciles a SolrCloud object
 type SolrCloudReconciler struct {
        client.Client
-       Scheme *runtime.Scheme
+       Scheme   *runtime.Scheme
+       Recorder record.EventRecorder
 }
 
 var useZkCRD bool
@@ -72,7 +77,8 @@ func UseZkCRD(useCRD bool) {
 
//+kubebuilder:rbac:groups=networking.k8s.io,resources=ingresses/status,verbs=get
 
//+kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;update;patch;delete
 //+kubebuilder:rbac:groups="",resources=configmaps/status,verbs=get
-//+kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;delete
+//+kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;update;patch;delete
+//+kubebuilder:rbac:groups=storage.k8s.io,resources=storageclasses,verbs=get;list;watch
 
//+kubebuilder:rbac:groups=policy,resources=poddisruptionbudgets,verbs=get;list;watch;create;update;patch;delete
 
//+kubebuilder:rbac:groups=zookeeper.pravega.io,resources=zookeeperclusters,verbs=get;list;watch;create;update;patch;delete
 
//+kubebuilder:rbac:groups=zookeeper.pravega.io,resources=zookeeperclusters/status,verbs=get
@@ -493,6 +499,11 @@ func (r *SolrCloudReconciler) Reconcile(ctx 
context.Context, req ctrl.Request) (
                        operationComplete, nextClusterOperation, err = 
handleManagedCloudScaleUp(ctx, r, instance, statefulSet, clusterOp, podList, 
logger)
                case BalanceReplicasLock:
                        operationComplete, requestInProgress, 
retryLaterDuration, err = util.BalanceReplicasForCluster(ctx, instance, 
statefulSet, clusterOp.Metadata, clusterOp.Metadata, logger)
+               case PvcExpansionLock:
+                       operationComplete, retryLaterDuration, err = 
handlePvcExpansion(ctx, r, instance, statefulSet, clusterOp, logger)
+                       // PVC expansion (the controller-side volume resize) 
can take a long time on some provisioners,
+                       // so it should use the long requeue timeout rather 
than being preempted after a minute.
+                       shortTimeoutForRequeue = false
                default:
                        operationFound = false
                        // This shouldn't happen, but we don't want to be stuck 
if it does.
@@ -561,6 +572,15 @@ func (r *SolrCloudReconciler) Reconcile(ctx 
context.Context, req ctrl.Request) (
                                clusterOp = nil
                        }
 
+                       if clusterOp == nil {
+                               clusterOp, retryLaterDuration, err = 
determinePvcExpansionClusterOpLockIfNecessary(ctx, r, instance, statefulSet, 
logger)
+                               // If the new clusterOperation is an update to 
a queued PVC expansion clusterOp, just change the operation that is already 
queued
+                               if queueIdx, opIsQueued := 
queuedRetryOps[PvcExpansionLock]; clusterOp != nil && opIsQueued {
+                                       clusterOpQueue[queueIdx] = *clusterOp
+                                       clusterOp = nil
+                               }
+                       }
+
                        // If a non-managed scale needs to take place, this 
method will update the StatefulSet without starting
                        // a "locked" cluster operation
                        if clusterOp == nil {
@@ -1018,6 +1038,144 @@ func (r *SolrCloudReconciler) reconcileZk(ctx 
context.Context, logger logr.Logge
        return nil
 }
 
+func (r *SolrCloudReconciler) expandPVCs(ctx context.Context, cloud 
*solrv1beta1.SolrCloud, pvcLabelSelector map[string]string, newSize 
resource.Quantity, logger logr.Logger) (expansionComplete bool, 
resizeInfeasible bool, err error) {
+       var pvcList corev1.PersistentVolumeClaimList
+       pvcList, err = r.getPVCList(ctx, cloud, pvcLabelSelector)
+       if err != nil {
+               return
+       }
+       expansionCompleteCount := 0
+       for _, pvcItem := range pvcList.Items {
+               if pvcExpansionComplete, pvcInfeasible, e := r.expandPVC(ctx, 
&pvcItem, newSize, logger); e != nil {
+                       err = e
+               } else {
+                       if pvcExpansionComplete {
+                               expansionCompleteCount += 1
+                       }
+                       if pvcInfeasible {
+                               resizeInfeasible = true
+                       }
+               }
+       }
+       // If all PVCs have completed their controller-side expansion, then we 
are done
+       expansionComplete = err == nil && expansionCompleteCount == 
len(pvcList.Items)
+       return
+}
+
+// expandPVC requests (and detects the completion of) the controller-side 
expansion of a single PVC.
+//
+// "Complete" here means the controller-side volume expansion has finished, so 
the cluster operation
+// can hand off to a rolling restart that will carry out any remaining 
node-side filesystem resize.
+// This intentionally does NOT wait for the filesystem resize itself, because 
some provisioners only
+// resize the filesystem "offline" (when the volume is remounted during the 
restart). Waiting for
+// status.capacity in that case would deadlock: capacity can't update until 
the pod restarts, but the
+// operator wouldn't restart until capacity updated.
+func (r *SolrCloudReconciler) expandPVC(ctx context.Context, pvc 
*corev1.PersistentVolumeClaim, newSize resource.Quantity, logger logr.Logger) 
(expansionComplete bool, resizeInfeasible bool, err error) {
+       // If the current capacity is >= the new size, then there is nothing to 
do, expansion is complete.
+       // Treat missing capacity as zero.
+       capacityQty, hasCapacity := pvc.Status.Capacity[corev1.ResourceStorage]
+       if !hasCapacity {
+               capacityQty = resource.Quantity{}
+       }
+       if capacityQty.Cmp(newSize) >= 0 || pvcControllerExpansionComplete(pvc) 
{
+               // Either the volume has already been fully expanded (online 
resize), or the controller-side
+               // expansion is done and only a node/filesystem resize remains 
(offline resize), which the
+               // subsequent rolling restart will complete on remount.
+               expansionComplete = true
+               return
+       }
+       // Surface (best-effort) a backend that has declared the requested size 
infeasible, so it can be
+       // reported instead of being silently retried forever. 
allocatedResourceStatuses is populated on
+       // Kubernetes clusters with the RecoverVolumeExpansionFailure feature 
(GA in 1.34); on older
+       // clusters this is simply never true and behavior is unchanged.
+       resizeInfeasible = pvcResizeInfeasible(pvc)
+
+       // Determine if the current request already matches the desired size.
+       requestQty, hasRequest := 
pvc.Spec.Resources.Requests[corev1.ResourceStorage]
+       sameRequest := hasRequest && requestQty.Equal(newSize)
+       if !sameRequest {
+               // Update the pvc if the capacity request is different.
+               // The newSize might be smaller than the current size, but this 
is supported as the last size might have been too
+               // big for the storage quota, so it was lowered.
+               // As long as the PVCs current capacity is lower than the new 
size, we are still good to update the PVC.
+               originalPvc := pvc.DeepCopy()
+               if pvc.Spec.Resources.Requests == nil {
+                       pvc.Spec.Resources.Requests = corev1.ResourceList{}
+               }
+               pvc.Spec.Resources.Requests[corev1.ResourceStorage] = newSize
+               if err = r.Patch(ctx, pvc, 
client.StrategicMergeFrom(originalPvc)); err != nil {
+                       logger.Error(err, "Error while expanding 
PersistentVolumeClaim size", "persistentVolumeClaim", pvc.Name, "size", newSize)
+               } else {
+                       logger.Info("Expanded PersistentVolumeClaim size", 
"persistentVolumeClaim", pvc.Name, "size", newSize)
+               }
+       }
+       return
+}
+
+// pvcControllerExpansionComplete reports whether the controller-side 
expansion of the PVC has
+// finished and only a node-side filesystem resize remains. This is the signal 
that it is safe (and,
+// for offline provisioners, necessary) to proceed to a rolling restart to 
apply the resize.
+//
+// It checks the FileSystemResizePending condition (available on all supported 
Kubernetes versions)
+// as the primary signal, and falls back to allocatedResourceStatuses 
(best-effort, populated on
+// clusters with RecoverVolumeExpansionFailure / Kubernetes >= 1.34).
+func pvcControllerExpansionComplete(pvc *corev1.PersistentVolumeClaim) bool {
+       for _, cond := range pvc.Status.Conditions {
+               if cond.Type == 
corev1.PersistentVolumeClaimFileSystemResizePending && cond.Status == 
corev1.ConditionTrue {
+                       return true
+               }
+       }
+       if status, hasStatus := 
pvc.Status.AllocatedResourceStatuses[corev1.ResourceStorage]; hasStatus {
+               if status == corev1.PersistentVolumeClaimNodeResizePending || 
status == corev1.PersistentVolumeClaimNodeResizeInProgress {
+                       return true
+               }
+       }
+       return false
+}
+
+// pvcResizeInfeasible reports (best-effort) whether the storage backend has 
declared the requested
+// expansion infeasible (e.g. the size exceeds backend/quota limits). This 
relies on
+// allocatedResourceStatuses, which is populated on Kubernetes clusters with 
the
+// RecoverVolumeExpansionFailure feature (GA in 1.34); on older clusters it is 
never true.
+func pvcResizeInfeasible(pvc *corev1.PersistentVolumeClaim) bool {
+       if status, hasStatus := 
pvc.Status.AllocatedResourceStatuses[corev1.ResourceStorage]; hasStatus {
+               return status == 
corev1.PersistentVolumeClaimControllerResizeInfeasible || status == 
corev1.PersistentVolumeClaimNodeResizeInfeasible
+       }
+       return false
+}
+
+// storageClassAllowsExpansion reports whether the storage class backing the 
SolrCloud's data PVCs
+// allows volume expansion. The storage class name is resolved from the actual 
provisioned PVCs
+// (whose StorageClassName is always populated, even when the SolrCloud relies 
on the cluster
+// default). When the class cannot be determined, this returns allowed=true so 
the expansion is still
+// attempted (the PVC patch itself will surface a hard rejection).
+func (r *SolrCloudReconciler) storageClassAllowsExpansion(ctx context.Context, 
cloud *solrv1beta1.SolrCloud, pvcLabelSelector map[string]string) (allowed 
bool, className string, err error) {
+       pvcList, err := r.getPVCList(ctx, cloud, pvcLabelSelector)
+       if err != nil {
+               return false, "", err
+       }
+       for i := range pvcList.Items {
+               if scn := pvcList.Items[i].Spec.StorageClassName; scn != nil && 
*scn != "" {
+                       className = *scn
+                       break
+               }
+       }
+       if className == "" {
+               // Could not determine the storage class; allow the attempt.
+               return true, "", nil
+       }
+       storageClass := &storagev1.StorageClass{}
+       if err = r.Get(ctx, types.NamespacedName{Name: className}, 
storageClass); err != nil {
+               if errors.IsNotFound(err) {
+                       // Could not find the storage class; allow the attempt 
and let the PVC patch surface any error.
+                       return true, className, nil
+               }
+               return false, className, err
+       }
+       allowed = storageClass.AllowVolumeExpansion != nil && 
*storageClass.AllowVolumeExpansion
+       return allowed, className, nil
+}
+
 // Logic derived from:
 // - https://book.kubebuilder.io/reference/using-finalizers.html
 // - 
https://github.com/pravega/zookeeper-operator/blob/v0.2.9/pkg/controller/zookeepercluster/zookeepercluster_controller.go#L629
@@ -1064,16 +1222,15 @@ func (r *SolrCloudReconciler) 
reconcileStorageFinalizer(ctx context.Context, clo
        return nil
 }
 
-func (r *SolrCloudReconciler) getPVCCount(ctx context.Context, cloud 
*solrv1beta1.SolrCloud, pvcLabelSelector map[string]string) (pvcCount int, err 
error) {
+func (r *SolrCloudReconciler) getPVCCount(ctx context.Context, cloud 
*solrv1beta1.SolrCloud, pvcLabelSelector map[string]string) (int, error) {
        pvcList, err := r.getPVCList(ctx, cloud, pvcLabelSelector)
        if err != nil {
                return -1, err
        }
-       pvcCount = len(pvcList.Items)
-       return pvcCount, nil
+       return len(pvcList.Items), nil
 }
 
-func (r *SolrCloudReconciler) cleanupOrphanPVCs(ctx context.Context, cloud 
*solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, pvcLabelSelector 
map[string]string, logger logr.Logger) (err error) {
+func (r *SolrCloudReconciler) cleanupOrphanPVCs(ctx context.Context, cloud 
*solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, pvcLabelSelector 
map[string]string, logger logr.Logger) error {
        // this check should make sure we do not delete the PVCs before the STS 
has scaled down
        if cloud.Status.ReadyReplicas == cloud.Status.Replicas {
                pvcList, err := r.getPVCList(ctx, cloud, pvcLabelSelector)
@@ -1093,24 +1250,25 @@ func (r *SolrCloudReconciler) cleanupOrphanPVCs(ctx 
context.Context, cloud *solr
                                }
                        }
                }
+               return err
        }
        return nil
 }
 
-func (r *SolrCloudReconciler) getPVCList(ctx context.Context, cloud 
*solrv1beta1.SolrCloud, pvcLabelSelector map[string]string) (pvList 
corev1.PersistentVolumeClaimList, err error) {
+func (r *SolrCloudReconciler) getPVCList(ctx context.Context, cloud 
*solrv1beta1.SolrCloud, pvcLabelSelector map[string]string) 
(corev1.PersistentVolumeClaimList, error) {
        selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
                MatchLabels: pvcLabelSelector,
        })
-       pvclistOps := &client.ListOptions{
+       pvcListOps := &client.ListOptions{
                Namespace:     cloud.Namespace,
                LabelSelector: selector,
        }
        pvcList := &corev1.PersistentVolumeClaimList{}
-       err = r.Client.List(ctx, pvcList, pvclistOps)
+       err = r.Client.List(ctx, pvcList, pvcListOps)
        return *pvcList, err
 }
 
-func (r *SolrCloudReconciler) cleanUpAllPVCs(ctx context.Context, cloud 
*solrv1beta1.SolrCloud, pvcLabelSelector map[string]string, logger logr.Logger) 
(err error) {
+func (r *SolrCloudReconciler) cleanUpAllPVCs(ctx context.Context, cloud 
*solrv1beta1.SolrCloud, pvcLabelSelector map[string]string, logger logr.Logger) 
error {
        pvcList, err := r.getPVCList(ctx, cloud, pvcLabelSelector)
        if err != nil {
                return err
@@ -1118,7 +1276,7 @@ func (r *SolrCloudReconciler) cleanUpAllPVCs(ctx 
context.Context, cloud *solrv1b
        for _, pvcItem := range pvcList.Items {
                r.deletePVC(ctx, pvcItem, logger)
        }
-       return nil
+       return err
 }
 
 func (r *SolrCloudReconciler) deletePVC(ctx context.Context, pvcItem 
corev1.PersistentVolumeClaim, logger logr.Logger) {
diff --git a/controllers/suite_test.go b/controllers/suite_test.go
index 4d49ef5..7b89ee8 100644
--- a/controllers/suite_test.go
+++ b/controllers/suite_test.go
@@ -106,8 +106,9 @@ var _ = BeforeSuite(func(ctx context.Context) {
        // Start up Reconcilers
        By("starting the reconcilers")
        Expect((&SolrCloudReconciler{
-               Client: k8sManager.GetClient(),
-               Scheme: k8sManager.GetScheme(),
+               Client:   k8sManager.GetClient(),
+               Scheme:   k8sManager.GetScheme(),
+               Recorder: 
k8sManager.GetEventRecorderFor("solrcloud-controller"),
        }).SetupWithManager(k8sManager)).To(Succeed())
 
        Expect((&SolrPrometheusExporterReconciler{
diff --git a/controllers/util/solr_util.go b/controllers/util/solr_util.go
index 2cce36a..a3101c3 100644
--- a/controllers/util/solr_util.go
+++ b/controllers/util/solr_util.go
@@ -62,6 +62,7 @@ const (
        // These are to be saved on a statefulSet update
        ClusterOpsLockAnnotation       = "solr.apache.org/clusterOpsLock"
        ClusterOpsRetryQueueAnnotation = "solr.apache.org/clusterOpsRetryQueue"
+       StorageMinimumSizeAnnotation   = "solr.apache.org/storageMinimumSize"
 
        SolrIsNotStoppedReadinessCondition       = 
"solr.apache.org/isNotStopped"
        SolrReplicasNotEvictedReadinessCondition = 
"solr.apache.org/replicasNotEvicted"
@@ -217,6 +218,13 @@ func GenerateStatefulSet(solrCloud *solr.SolrCloud, 
solrCloudStatus *solr.SolrCl
                                Spec: pvc.Spec,
                        },
                }
+               if pvc.Spec.Resources.Requests.Storage() != nil {
+                       annotations[StorageMinimumSizeAnnotation] = 
pvc.Spec.Resources.Requests.Storage().String()
+                       if podAnnotations == nil {
+                               podAnnotations = make(map[string]string, 1)
+                       }
+                       podAnnotations[StorageMinimumSizeAnnotation] = 
pvc.Spec.Resources.Requests.Storage().String()
+               }
        } else {
                ephemeralVolume := corev1.Volume{
                        Name:         solrDataVolumeName,
@@ -687,6 +695,22 @@ func MaintainPreservedStatefulSetFields(expected, found 
*appsv1.StatefulSet) {
                        }
                        expected.Annotations[ClusterOpsRetryQueueAnnotation] = 
queue
                }
+               if storage, hasStorage := 
found.Annotations[StorageMinimumSizeAnnotation]; hasStorage {
+                       if expected.Annotations == nil {
+                               expected.Annotations = make(map[string]string, 
1)
+                       }
+                       expected.Annotations[StorageMinimumSizeAnnotation] = 
storage
+               }
+       }
+       if found.Spec.Template.Annotations != nil {
+               // Note: the Pod template storage annotation is used to start a 
rolling restart,
+               // it should always match the StatefulSet's storage annotation
+               if storage, hasStorage := 
found.Spec.Template.Annotations[StorageMinimumSizeAnnotation]; hasStorage {
+                       if expected.Spec.Template.Annotations == nil {
+                               expected.Spec.Template.Annotations = 
make(map[string]string, 1)
+                       }
+                       
expected.Spec.Template.Annotations[StorageMinimumSizeAnnotation] = storage
+               }
        }
 
        // Scaling (i.e. changing) the number of replicas in the SolrCloud 
statefulSet is handled during the clusterOps
diff --git a/docs/solr-cloud/solr-cloud-crd.md 
b/docs/solr-cloud/solr-cloud-crd.md
index 52027f0..0305188 100644
--- a/docs/solr-cloud/solr-cloud-crd.md
+++ b/docs/solr-cloud/solr-cloud-crd.md
@@ -61,8 +61,13 @@ These options can be found in `SolrCloud.spec.dataStorage`
   - **`pvcTemplate`** - The template of the PVC to use for the solr data PVCs. 
By default the name will be "data".
     Only the `pvcTemplate.spec` field is required, metadata is optional.
     
-    Note: This template cannot be changed unless the SolrCloud is deleted and 
recreated.
-    This is a [limitation of StatefulSets and PVCs in 
Kubernetes](https://github.com/kubernetes/enhancements/issues/661).
+    Note: Currently, [Kubernetes does not support PVC resizing (expanding) in 
StatefulSets](https://github.com/kubernetes/enhancements/issues/661).
+    However, the Solr Operator will manage the PVC expansion for users until 
this is supported by default in Kubernetes.
+    Therefore the `pvcTemplate.spec` can have an update to 
`pvcTemplate.spec.resources.requests`, but all other fields should be 
considered immutable.
+
+    The storage size can only be increased (PVCs cannot be shrunk), and the 
backing [`StorageClass` must allow volume 
expansion](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#expanding-persistent-volumes-claims)
 (`allowVolumeExpansion: true`).
+    When the size is increased, the operator resizes the data PVCs and then 
performs a rolling restart of the SolrCloud so the new capacity is picked up on 
each node.
+    If the storage class does not allow expansion, or the request would shrink 
the PVCs, the operator emits a warning event on the SolrCloud and leaves the 
storage unchanged.
 - **`ephemeral`**
 
   There are two types of ephemeral volumes that can be specified.
diff --git a/docs/upgrade-notes.md b/docs/upgrade-notes.md
index 267a80b..2a2eb17 100644
--- a/docs/upgrade-notes.md
+++ b/docs/upgrade-notes.md
@@ -134,7 +134,7 @@ _Note that the Helm chart version does not contain a `v` 
prefix, which the downl
 ### v0.8.0
 - **The minimum supported Solr version is now 8.11**
   If you are unable to use a newer version of Solr, please install the 
`v0.7.1` version of the Solr Operator.
-  However, it is strongly suggested to upgrade to newer versions of Solr that 
are actively supported.q
+  However, it is strongly suggested to upgrade to newer versions of Solr that 
are actively supported.
   See the [version compatibility matrix](#solr-versions) for more information.
 
 - **Kubernetes support is now limited to 1.22+.**  
diff --git a/helm/solr-operator/Chart.yaml b/helm/solr-operator/Chart.yaml
index 81138d0..a214d50 100644
--- a/helm/solr-operator/Chart.yaml
+++ b/helm/solr-operator/Chart.yaml
@@ -55,6 +55,13 @@ annotations:
   # Allowed syntax is described at: 
https://artifacthub.io/docs/topics/annotations/helm/#example
   # 'kind' accepts values: "added", "changed", "deprecated", "removed", 
"fixed" and "security"
   artifacthub.io/changes: |
+    - kind: added
+      description: The operator can now resize (expand) persistent data PVCs, 
which requires new RBAC permissions for persistentvolumeclaims (update/patch) 
and storageclasses (get/list/watch)
+      links:
+        - name: Github Issue
+          url: https://github.com/apache/solr-operator/issues/709
+        - name: Github PR
+          url: https://github.com/apache/solr-operator/pull/712
     - kind: changed
       description: A container PostStart Hook is no longer used to create the 
ZooKeeper ChRoot, instead the initContainer will manage this
       links:
diff --git a/helm/solr-operator/templates/role.yaml 
b/helm/solr-operator/templates/role.yaml
index 08c5fe4..6a267a0 100644
--- a/helm/solr-operator/templates/role.yaml
+++ b/helm/solr-operator/templates/role.yaml
@@ -47,6 +47,16 @@ rules:
   - ""
   resources:
   - persistentvolumeclaims
+  verbs:
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - ""
+  resources:
   - pods
   verbs:
   - delete
@@ -148,6 +158,14 @@ rules:
   - get
   - patch
   - update
+- apiGroups:
+  - storage.k8s.io
+  resources:
+  - storageclasses
+  verbs:
+  - get
+  - list
+  - watch
 - apiGroups:
   - zookeeper.pravega.io
   resources:
diff --git a/helm/solr/Chart.yaml b/helm/solr/Chart.yaml
index 66e0251..2a59a34 100644
--- a/helm/solr/Chart.yaml
+++ b/helm/solr/Chart.yaml
@@ -42,15 +42,12 @@ annotations:
   # Allowed syntax is described at: 
https://artifacthub.io/docs/topics/annotations/helm/#example
   artifacthub.io/changes: |
     - kind: added
-      description: Addition 1
+      description: Allow resizing (expanding) of persistent data PVCs
       links:
         - name: Github Issue
-          url: https://github.com/issue-url
-    - kind: changed
-      description: Change 2
-      links:
+          url: https://github.com/apache/solr-operator/issues/709
         - name: Github PR
-          url: https://github.com/pr-url
+          url: https://github.com/apache/solr-operator/pull/712
   artifacthub.io/containsSecurityUpdates: "false"
   artifacthub.io/recommendations: |
     - url: https://artifacthub.io/packages/helm/apache-solr/solr-operator
diff --git a/main.go b/main.go
index c4aee80..d504995 100644
--- a/main.go
+++ b/main.go
@@ -199,8 +199,9 @@ func main() {
        }
 
        if err = (&controllers.SolrCloudReconciler{
-               Client: mgr.GetClient(),
-               Scheme: mgr.GetScheme(),
+               Client:   mgr.GetClient(),
+               Scheme:   mgr.GetScheme(),
+               Recorder: mgr.GetEventRecorderFor("solrcloud-controller"),
        }).SetupWithManager(mgr); err != nil {
                setupLog.Error(err, "unable to create controller", 
"controller", "SolrCloud")
                os.Exit(1)
diff --git a/tests/e2e/solrcloud_storage_test.go 
b/tests/e2e/solrcloud_storage_test.go
new file mode 100644
index 0000000..9c96d05
--- /dev/null
+++ b/tests/e2e/solrcloud_storage_test.go
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package e2e
+
+import (
+       "context"
+       "time"
+
+       solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
+       "github.com/apache/solr-operator/controllers"
+       "github.com/apache/solr-operator/controllers/util"
+       . "github.com/onsi/ginkgo/v2"
+       . "github.com/onsi/gomega"
+       appsv1 "k8s.io/api/apps/v1"
+       corev1 "k8s.io/api/core/v1"
+       "k8s.io/apimachinery/pkg/api/resource"
+       "k8s.io/apimachinery/pkg/labels"
+       "sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+var _ = FDescribe("E2E - SolrCloud - Storage", func() {
+       var (
+               solrCloud *solrv1beta1.SolrCloud
+
+               solrCollection1 = "e2e-1"
+
+               solrCollection2 = "e2e-2"
+       )
+
+       BeforeEach(func() {
+               solrCloud = generateBaseSolrCloud(2)
+       })
+
+       JustBeforeEach(func(ctx context.Context) {
+               By("creating the SolrCloud")
+               Expect(k8sClient.Create(ctx, solrCloud)).To(Succeed())
+
+               DeferCleanup(func(ctx context.Context) {
+                       cleanupTest(ctx, solrCloud)
+               })
+
+               By("Waiting for the SolrCloud to come up healthy")
+               solrCloud = expectSolrCloudToBeReady(ctx, solrCloud)
+
+               By("creating a first Solr Collection")
+               createAndQueryCollection(ctx, solrCloud, solrCollection1, 1, 2)
+
+               By("creating a second Solr Collection")
+               createAndQueryCollection(ctx, solrCloud, solrCollection2, 2, 1)
+       })
+
+       FContext("Persistent Data - Expansion", func() {
+               BeforeEach(func() {
+                       solrCloud.Spec.StorageOptions = 
solrv1beta1.SolrDataStorageOptions{
+                               PersistentStorage: 
&solrv1beta1.SolrPersistentDataStorageOptions{
+                                       PersistentVolumeClaimTemplate: 
solrv1beta1.PersistentVolumeClaimTemplate{
+                                               Spec: 
corev1.PersistentVolumeClaimSpec{
+                                                       StorageClassName: 
new("rawfile-localpv"),
+                                                       Resources: 
corev1.VolumeResourceRequirements{
+                                                               Requests: 
map[corev1.ResourceName]resource.Quantity{
+                                                                       
corev1.ResourceStorage: resource.MustParse("1G"),
+                                                               },
+                                                       },
+                                               },
+                                       },
+                               },
+                       }
+               })
+
+               FIt("Fully Expands", func(ctx context.Context) {
+                       newStorageSize := resource.MustParse("1500M")
+                       patchedSolrCloud := solrCloud.DeepCopy()
+                       
patchedSolrCloud.Spec.StorageOptions.PersistentStorage.PersistentVolumeClaimTemplate.Spec.Resources.Requests[corev1.ResourceStorage]
 = newStorageSize
+                       By("triggering a rolling restart via pod annotations")
+                       Expect(k8sClient.Patch(ctx, patchedSolrCloud, 
client.MergeFrom(solrCloud))).To(Succeed(), "Could not add annotation to 
SolrCloud pod to initiate rolling restart")
+
+                       // Wait for new pods to come up, and when they do we 
should be doing a balanceReplicas clusterOp
+                       expectStatefulSetWithChecksAndTimeout(ctx, solrCloud, 
solrCloud.StatefulSetName(), time.Second*5, time.Millisecond*50, func(g Gomega, 
found *appsv1.StatefulSet) {
+                               clusterOp, err := 
controllers.GetCurrentClusterOp(found)
+                               g.Expect(err).ToNot(HaveOccurred(), "Error 
occurred while finding clusterLock for SolrCloud")
+                               g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet 
does not have a PvcExpansion lock.")
+                               
g.Expect(clusterOp.Operation).To(Equal(controllers.PvcExpansionLock), 
"StatefulSet does not have a PvcExpansion lock after starting managed update.")
+                       })
+
+                       By("waiting for the expansion's rolling restart to 
begin")
+                       solrCloud = expectSolrCloudWithChecksAndTimeout(ctx, 
solrCloud, time.Second*30, time.Millisecond*100, func(g Gomega, found 
*solrv1beta1.SolrCloud) {
+                               
g.Expect(found.Status.UpToDateNodes).To(BeZero(), "Cloud did not get to a state 
with zero up-to-date replicas when rolling restart began.")
+                               for _, nodeStatus := range 
found.Status.SolrNodes {
+                                       
g.Expect(nodeStatus.SpecUpToDate).To(BeFalse(), "Node not starting as 
out-of-date when rolling restart begins: %s", nodeStatus.Name)
+                               }
+                       })
+
+                       By("checking that the resize has been requested on all 
PVCs when the restart begins")
+                       internalLabels := map[string]string{
+                               util.SolrPVCTechnologyLabel: 
util.SolrCloudPVCTechnology,
+                               util.SolrPVCStorageLabel:    
util.SolrCloudPVCDataStorage,
+                               util.SolrPVCInstanceLabel:   solrCloud.Name,
+                       }
+                       pvcListOps := &client.ListOptions{
+                               Namespace:     solrCloud.Namespace,
+                               LabelSelector: 
labels.SelectorFromSet(internalLabels),
+                       }
+
+                       foundPVCs := &corev1.PersistentVolumeClaimList{}
+                       Expect(k8sClient.List(ctx, foundPVCs, 
pvcListOps)).To(Succeed(), "Could not fetch PVC list")
+                       
Expect(foundPVCs.Items).To(HaveLen(int(*solrCloud.Spec.Replicas)), "Did not 
find the same number of PVCs as Solr Pods")
+                       for _, pvc := range foundPVCs.Items {
+                               // The resize request (spec) is always set when 
the operator hands off to the rolling restart.
+                               // The node-side filesystem resize 
(status.capacity) may still be pending here, since some
+                               // provisioners only complete it when the 
volume is remounted during the restart below.
+                               
Expect(pvc.Spec.Resources.Requests).To(HaveKeyWithValue(corev1.ResourceStorage, 
newStorageSize), "The PVC %q does not have the new storage size in its resource 
requests", pvc.Name)
+                       }
+
+                       statefulSet := 
expectStatefulSetWithChecksAndTimeout(ctx, solrCloud, 
solrCloud.StatefulSetName(), 1, time.Millisecond, func(g Gomega, found 
*appsv1.StatefulSet) {
+                               clusterOp, err := 
controllers.GetCurrentClusterOp(found)
+                               g.Expect(err).ToNot(HaveOccurred(), "Error 
occurred while finding clusterLock for SolrCloud")
+                               g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet 
does not have a RollingUpdate lock.")
+                               
g.Expect(clusterOp.Operation).To(Equal(controllers.UpdateLock), "StatefulSet 
does not have a RollingUpdate lock after starting managed update to increase 
the storage size.")
+                               // The lock metadata is the JSON-encoded 
RollingUpdateMetadata. PVC-backed clouds do not require replica migration.
+                               
g.Expect(clusterOp.Metadata).To(Equal(`{"requiresReplicaMigration":false}`), 
"StatefulSet should not require replica migration, since PVCs are being used.")
+                       })
+
+                       By("waiting for the rolling restart to complete")
+                       // Use the default (longer) timeout, since a managed 
rolling restart of multiple pods waits for
+                       // Solr replicas to recover between pod restarts and 
can take a while on a busy cluster.
+                       expectSolrCloudWithChecks(ctx, solrCloud, func(g 
Gomega, cloud *solrv1beta1.SolrCloud) {
+                               
g.Expect(cloud.Status.UpToDateNodes).To(BeEquivalentTo(*statefulSet.Spec.Replicas),
 "The Rolling Update never completed, not all replicas up to date")
+                               
g.Expect(cloud.Status.ReadyReplicas).To(BeEquivalentTo(*statefulSet.Spec.Replicas),
 "The Rolling Update never completed, not all replicas ready")
+                       })
+
+                       By("waiting for the cluster operation lock to be 
cleared")
+                       expectStatefulSetWithConsistentChecksAndDuration(ctx, 
solrCloud, solrCloud.StatefulSetName(), time.Second*2, func(g Gomega, found 
*appsv1.StatefulSet) {
+                               clusterOp, err := 
controllers.GetCurrentClusterOp(found)
+                               g.Expect(err).ToNot(HaveOccurred(), "Error 
occurred while finding clusterLock for SolrCloud")
+                               g.Expect(clusterOp).To(BeNil(), "StatefulSet 
should not have any cluster lock after finishing its rolling update.")
+                       })
+
+                       By("checking that all PVCs have been fully expanded 
(status.capacity) after the restart")
+                       // The node-side filesystem resize completes as the 
volumes are remounted during the rolling
+                       // restart, so the reported capacity is only guaranteed 
to reflect the new size once the
+                       // restart has finished. This holds for both online- 
and offline-resizing provisioners.
+                       Eventually(func(g Gomega) {
+                               updatedPVCs := 
&corev1.PersistentVolumeClaimList{}
+                               g.Expect(k8sClient.List(ctx, updatedPVCs, 
pvcListOps)).To(Succeed(), "Could not fetch PVC list")
+                               
g.Expect(updatedPVCs.Items).To(HaveLen(int(*solrCloud.Spec.Replicas)), "Did not 
find the same number of PVCs as Solr Pods")
+                               for _, pvc := range updatedPVCs.Items {
+                                       
g.Expect(pvc.Status.Capacity).To(HaveKeyWithValue(corev1.ResourceStorage, 
newStorageSize), "The PVC %q does not have the new storage size in its 
status.capacity", pvc.Name)
+                               }
+                       }).WithContext(ctx).WithTimeout(time.Second * 
90).WithPolling(time.Second).Should(Succeed())
+
+                       By("checking that the collections can be queried after 
the restart")
+                       queryCollection(ctx, solrCloud, solrCollection1, 0)
+                       queryCollection(ctx, solrCloud, solrCollection2, 0)
+               })
+       })
+})
diff --git a/tests/e2e/suite_test.go b/tests/e2e/suite_test.go
index 1ac10f8..b63d227 100644
--- a/tests/e2e/suite_test.go
+++ b/tests/e2e/suite_test.go
@@ -19,10 +19,17 @@ package e2e
 
 import (
        "bufio"
-       "bytes"
        "context"
        "encoding/json"
        "fmt"
+       "io"
+       "math/rand"
+       "os"
+       "path/filepath"
+       "strings"
+       "testing"
+       "time"
+
        solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
        "github.com/apache/solr-operator/version"
        certManagerApi "github.com/cert-manager/cert-manager/pkg/api"
@@ -31,7 +38,6 @@ import (
        zkApi "github.com/pravega/zookeeper-operator/api/v1beta1"
        "golang.org/x/text/cases"
        "golang.org/x/text/language"
-       "io"
        appsv1 "k8s.io/api/apps/v1"
        corev1 "k8s.io/api/core/v1"
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -40,16 +46,10 @@ import (
        "k8s.io/client-go/kubernetes"
        "k8s.io/client-go/kubernetes/scheme"
        "k8s.io/client-go/rest"
-       "math/rand"
-       "os"
-       "path/filepath"
        "sigs.k8s.io/controller-runtime/pkg/client"
        "sigs.k8s.io/controller-runtime/pkg/client/config"
        logf "sigs.k8s.io/controller-runtime/pkg/log"
        "sigs.k8s.io/controller-runtime/pkg/log/zap"
-       "strings"
-       "testing"
-       "time"
 
        . "github.com/onsi/ginkgo/v2"
        . "github.com/onsi/gomega"
@@ -229,7 +229,7 @@ var _ = JustAfterEach(func(ctx context.Context) {
                        getSolrOperatorPodName(ctx, 
solrOperatorReleaseNamespace),
                        solrOperatorReleaseNamespace,
                        &startTime,
-                       fmt.Sprintf("%q: %q", "namespace", testNamespace()),
+                       fmt.Sprintf("%q:%q", "namespace", testNamespace()),
                )
                // Always save the logs of the Solr Operator for the test
                writeAllSolrInfoToFiles(
@@ -313,11 +313,26 @@ func writeAllSolrInfoToFiles(ctx context.Context, 
directory string, namespace st
        for _, pod := range foundPods.Items {
                writeAllPodInfoToFiles(
                        ctx,
-                       directory+pod.Name,
+                       directory+pod.Name+".pod",
                        &pod,
                )
        }
 
+       listOps = &client.ListOptions{
+               Namespace:     namespace,
+               LabelSelector: labelSelector,
+       }
+
+       foundPVCs := &corev1.PersistentVolumeClaimList{}
+       Expect(k8sClient.List(ctx, foundPVCs, listOps)).To(Succeed(), "Could 
not fetch Solr PVCs")
+       Expect(foundPVCs).ToNot(BeNil(), "No Solr PVCs could be found")
+       for _, pvc := range foundPVCs.Items {
+               writeAllPvcInfoToFiles(
+                       directory+pvc.Name+".pvc",
+                       &pvc,
+               )
+       }
+
        foundStatefulSets := &appsv1.StatefulSetList{}
        Expect(k8sClient.List(ctx, foundStatefulSets, listOps)).To(Succeed(), 
"Could not fetch Solr statefulSets")
        Expect(foundStatefulSets).ToNot(BeNil(), "No Solr statefulSet could be 
found")
@@ -388,8 +403,8 @@ func writeSolrClusterStatusInfoToFile(ctx context.Context, 
baseFilename string,
 func writeAllStatefulSetInfoToFiles(baseFilename string, statefulSet 
*appsv1.StatefulSet) {
        // Write statefulSet to a file
        statusFile, err := os.Create(baseFilename + ".status.json")
-       defer statusFile.Close()
        Expect(err).ToNot(HaveOccurred(), "Could not open file to save 
statefulSet status: %s", baseFilename+".status.json")
+       defer statusFile.Close()
        jsonBytes, marshErr := json.MarshalIndent(statefulSet, "", "\t")
        Expect(marshErr).ToNot(HaveOccurred(), "Could not serialize statefulSet 
json")
        _, writeErr := statusFile.Write(jsonBytes)
@@ -397,8 +412,8 @@ func writeAllStatefulSetInfoToFiles(baseFilename string, 
statefulSet *appsv1.Sta
 
        // Write events for statefulSet to a file
        eventsFile, err := os.Create(baseFilename + ".events.json")
-       defer eventsFile.Close()
        Expect(err).ToNot(HaveOccurred(), "Could not open file to save 
statefulSet events: %s", baseFilename+".events.yaml")
+       defer eventsFile.Close()
 
        eventList, err := 
rawK8sClient.CoreV1().Events(statefulSet.Namespace).Search(scheme.Scheme, 
statefulSet)
        Expect(err).ToNot(HaveOccurred(), "Could not find events for 
statefulSet: %s", statefulSet.Name)
@@ -408,13 +423,39 @@ func writeAllStatefulSetInfoToFiles(baseFilename string, 
statefulSet *appsv1.Sta
        Expect(writeErr).ToNot(HaveOccurred(), "Could not write statefulSet 
events json to file")
 }
 
+// writeAllPvcInfoToFiles writes the following each to a separate file with 
the given base name & directory.
+//   - PVC Spec/Status
+//   - PVC Events
+func writeAllPvcInfoToFiles(baseFilename string, pvc 
*corev1.PersistentVolumeClaim) {
+       // Write PVC to a file
+       statusFile, err := os.Create(baseFilename + ".status.json")
+       Expect(err).ToNot(HaveOccurred(), "Could not open file to save PVC 
status: %s", baseFilename+".status.json")
+       defer statusFile.Close()
+       jsonBytes, marshErr := json.MarshalIndent(pvc, "", "\t")
+       Expect(marshErr).ToNot(HaveOccurred(), "Could not serialize PVC json")
+       _, writeErr := statusFile.Write(jsonBytes)
+       Expect(writeErr).ToNot(HaveOccurred(), "Could not write PVC json to 
file")
+
+       // Write events for PVC to a file
+       eventsFile, err := os.Create(baseFilename + ".events.json")
+       Expect(err).ToNot(HaveOccurred(), "Could not open file to save PVC 
events: %s", baseFilename+".events.yaml")
+       defer eventsFile.Close()
+
+       eventList, err := 
rawK8sClient.CoreV1().Events(pvc.Namespace).Search(scheme.Scheme, pvc)
+       Expect(err).ToNot(HaveOccurred(), "Could not find events for PVC: %s", 
pvc.Name)
+       jsonBytes, marshErr = json.MarshalIndent(eventList, "", "\t")
+       Expect(marshErr).ToNot(HaveOccurred(), "Could not serialize PVC events 
json")
+       _, writeErr = eventsFile.Write(jsonBytes)
+       Expect(writeErr).ToNot(HaveOccurred(), "Could not write PVC events json 
to file")
+}
+
 // writeAllServiceInfoToFiles writes the following each to a separate file 
with the given base name & directory.
 //   - Service
 func writeAllServiceInfoToFiles(baseFilename string, service *corev1.Service) {
        // Write service to a file
        statusFile, err := os.Create(baseFilename + ".json")
-       defer statusFile.Close()
        Expect(err).ToNot(HaveOccurred(), "Could not open file to save service 
status: %s", baseFilename+".json")
+       defer statusFile.Close()
        jsonBytes, marshErr := json.MarshalIndent(service, "", "\t")
        Expect(marshErr).ToNot(HaveOccurred(), "Could not serialize service 
json")
        _, writeErr := statusFile.Write(jsonBytes)
@@ -426,8 +467,8 @@ func writeAllServiceInfoToFiles(baseFilename string, 
service *corev1.Service) {
 func writeAllSecretInfoToFiles(baseFilename string, secret *corev1.Secret) {
        // Write service to a file
        statusFile, err := os.Create(baseFilename + ".json")
-       defer statusFile.Close()
        Expect(err).ToNot(HaveOccurred(), "Could not open file to save secret 
status: %s", baseFilename+".json")
+       defer statusFile.Close()
        jsonBytes, marshErr := json.MarshalIndent(secret, "", "\t")
        Expect(marshErr).ToNot(HaveOccurred(), "Could not serialize secret 
json")
        _, writeErr := statusFile.Write(jsonBytes)
@@ -441,8 +482,8 @@ func writeAllSecretInfoToFiles(baseFilename string, secret 
*corev1.Secret) {
 func writeAllPodInfoToFiles(ctx context.Context, baseFilename string, pod 
*corev1.Pod) {
        // Write pod to a file
        statusFile, err := os.Create(baseFilename + ".status.json")
-       defer statusFile.Close()
        Expect(err).ToNot(HaveOccurred(), "Could not open file to save pod 
status: %s", baseFilename+".status.json")
+       defer statusFile.Close()
        jsonBytes, marshErr := json.MarshalIndent(pod, "", "\t")
        Expect(marshErr).ToNot(HaveOccurred(), "Could not serialize pod json")
        _, writeErr := statusFile.Write(jsonBytes)
@@ -450,8 +491,8 @@ func writeAllPodInfoToFiles(ctx context.Context, 
baseFilename string, pod *corev
 
        // Write events for pod to a file
        eventsFile, err := os.Create(baseFilename + ".events.json")
-       defer eventsFile.Close()
        Expect(err).ToNot(HaveOccurred(), "Could not open file to save pod 
events: %s", baseFilename+".events.yaml")
+       defer eventsFile.Close()
 
        eventList, err := 
rawK8sClient.CoreV1().Events(pod.Namespace).Search(scheme.Scheme, pod)
        Expect(err).ToNot(HaveOccurred(), "Could not find events for pod: %s", 
pod.Name)
@@ -489,22 +530,18 @@ func writePodLogsToFile(ctx context.Context, filename 
string, podName string, po
        Expect(logsErr).ToNot(HaveOccurred(), "Could not open stream to fetch 
pod logs. namespace: %s, pod: %s", podNamespace, podName)
        defer podLogs.Close()
 
-       var logReader io.Reader
-       logReader = podLogs
-
        if filterLinesWithString != "" {
-               filteredWriter := bytes.NewBufferString("")
                scanner := bufio.NewScanner(podLogs)
                for scanner.Scan() {
                        line := scanner.Text()
                        if strings.Contains(line, filterLinesWithString) {
-                               io.WriteString(filteredWriter, line)
-                               io.WriteString(filteredWriter, "\n")
+                               _, err = io.WriteString(logFile, line)
+                               _, err = io.WriteString(logFile, "\n")
                        }
                }
-               logReader = filteredWriter
+       } else {
+               _, err = io.Copy(logFile, podLogs)
        }
 
-       _, err = io.Copy(logFile, logReader)
        Expect(err).ToNot(HaveOccurred(), "Could not write podLogs to file: 
%s", filename)
 }
diff --git a/tests/scripts/manage_e2e_tests.sh 
b/tests/scripts/manage_e2e_tests.sh
index 09a0c77..c9cfd7f 100755
--- a/tests/scripts/manage_e2e_tests.sh
+++ b/tests/scripts/manage_e2e_tests.sh
@@ -73,7 +73,7 @@ if [[ -z "${OPERATOR_IMAGE:-}" ]]; then
   echo "Specify a Docker image for the Solr Operator through -i, or through 
the OPERATOR_IMAGE env var" >&2 && exit 1
 fi
 if [[ -z "${KUBERNETES_VERSION:-}" ]]; then
-  KUBERNETES_VERSION="v1.26.6"
+  KUBERNETES_VERSION="v1.33.7"
 fi
 if [[ -z "${SOLR_IMAGE:-}" ]]; then
   SOLR_IMAGE="${SOLR_VERSION:-9.10.0}"
@@ -96,7 +96,8 @@ export RAW_GINKGO
 export REUSE_KIND_CLUSTER_IF_EXISTS="${REUSE_KIND_CLUSTER_IF_EXISTS:-true}" # 
This is used for all start_cluster calls
 export LEAVE_KIND_CLUSTER_ON_SUCCESS="${LEAVE_KIND_CLUSTER_ON_SUCCESS:-false}" 
# This is only used when using run_tests or run_with_cluster
 
-export CERT_MANAGER_VERSION=1.12.3
+export RAWFILE_LOCAL_PV_VERSION=0.13.1
+export CERT_MANAGER_VERSION=1.17.4
 export CERT_MANAGER_CSI_DRIVER_VERSION=0.5.0
 
 function add_image_to_kind_repo_if_local() {
@@ -190,6 +191,11 @@ function setup_cluster() {
   kubectl get configmap coredns -n kube-system -o yaml | sed 's/\(.*\)ttl 
30\(.*\)/\1ttl 5\2/' | kubectl replace -n kube-system -f -
   echo ""
 
+  printf "Installing Rawfile LocalPV Provisioner\n"
+  helm repo add rawfile-localpv https://openebs.github.io/rawfile-localpv 
--force-update
+  helm upgrade -i -n openebs --create-namespace  rawfile-localpv 
rawfile-localpv/rawfile-localpv --version "${RAWFILE_LOCAL_PV_VERSION}" --set 
analytics.enabled=false
+  echo ""
+
   printf "Installing Cert Manager\n"
   helm repo add cert-manager https://charts.jetstack.io --force-update
   helm upgrade -i -n cert-manager --create-namespace  cert-manager 
cert-manager/cert-manager --version "${CERT_MANAGER_VERSION}" --set 
installCRDs=true

Reply via email to