diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 42639e9efcf1e0f9362f759ae63a31b8eeb0d581..16eb8edab4d9fdfc6e3672c56ef5c9f6962d0c0b 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -262,6 +262,8 @@ struct sve_vec_cost : simd_vec_cost
 			  unsigned int fadda_f64_cost,
 			  unsigned int gather_load_x32_cost,
 			  unsigned int gather_load_x64_cost,
+			  unsigned int gather_load_x32_init_cost,
+			  unsigned int gather_load_x64_init_cost,
 			  unsigned int scatter_store_elt_cost)
     : simd_vec_cost (base),
       clast_cost (clast_cost),
@@ -270,6 +272,8 @@ struct sve_vec_cost : simd_vec_cost
       fadda_f64_cost (fadda_f64_cost),
       gather_load_x32_cost (gather_load_x32_cost),
       gather_load_x64_cost (gather_load_x64_cost),
+      gather_load_x32_init_cost (gather_load_x32_init_cost),
+      gather_load_x64_init_cost (gather_load_x64_init_cost),
       scatter_store_elt_cost (scatter_store_elt_cost)
   {}
 
@@ -289,6 +293,12 @@ struct sve_vec_cost : simd_vec_cost
   const int gather_load_x32_cost;
   const int gather_load_x64_cost;
 
+  /* Additional loop initialization cost of using a gather load instruction.  The x32
+     value is for loads of 32-bit elements and the x64 value is for loads of
+     64-bit elements.  */
+  const int gather_load_x32_init_cost;
+  const int gather_load_x64_init_cost;
+
   /* The per-element cost of a scatter store.  */
   const int scatter_store_elt_cost;
 };
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index eafa377cb095f49408d8a926fb49ce13e2155ba2..da2feb54ddad9b39db92e0a9ec7c4e40cfa3e4e2 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -16227,6 +16227,10 @@ private:
      supported by Advanced SIMD and SVE2.  */
   bool m_has_avg = false;
 
+  /* Additional initialization costs for using gather or scatter operation in
+     the current loop.  */
+  unsigned int m_sve_gather_scatter_init_cost = 0;
+
   /* True if the vector body contains a store to a decl and if the
      function is known to have a vld1 from the same decl.
 
@@ -17291,6 +17295,20 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	stmt_cost = aarch64_detect_vector_stmt_subtype (m_vinfo, kind,
 							stmt_info, vectype,
 							where, stmt_cost);
+
+      /* Check if we've seen an SVE gather/scatter operation and which size.  */
+      if (kind == scalar_load
+	  && aarch64_sve_mode_p (TYPE_MODE (vectype))
+	  && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+	{
+	  const sve_vec_cost *sve_costs = aarch64_tune_params.vec_costs->sve;
+	  if (GET_MODE_UNIT_BITSIZE (TYPE_MODE (vectype)) == 64)
+	    m_sve_gather_scatter_init_cost
+	      += sve_costs->gather_load_x64_init_cost;
+	  else
+	    m_sve_gather_scatter_init_cost
+	      += sve_costs->gather_load_x32_init_cost;
+	}
     }
 
   /* Do any SVE-specific adjustments to the cost.  */
@@ -17676,6 +17694,12 @@ aarch64_vector_costs::finish_cost (const vector_costs *uncast_scalar_costs)
       m_costs[vect_body] = adjust_body_cost (loop_vinfo, scalar_costs,
 					     m_costs[vect_body]);
       m_suggested_unroll_factor = determine_suggested_unroll_factor ();
+
+      /* For gather and scatters there's an additional overhead for the first
+	 iteration.  For low count loops they're not beneficial so model the
+	 overhead as loop prologue costs.  */
+      if (m_sve_gather_scatter_init_cost)
+	m_costs[vect_prologue] += m_sve_gather_scatter_init_cost;
     }
 
   /* Apply the heuristic described above m_stp_sequence_cost.  Prefer
diff --git a/gcc/config/aarch64/tuning_models/a64fx.h b/gcc/config/aarch64/tuning_models/a64fx.h
index 6091289d4c3c66f01d7e4dbf97a85c1f8c40bb0b..378a1b3889ee265859786c1ff6525fce2305b615 100644
--- a/gcc/config/aarch64/tuning_models/a64fx.h
+++ b/gcc/config/aarch64/tuning_models/a64fx.h
@@ -104,6 +104,8 @@ static const sve_vec_cost a64fx_sve_vector_cost =
   13, /* fadda_f64_cost  */
   64, /* gather_load_x32_cost  */
   32, /* gather_load_x64_cost  */
+  0, /* gather_load_x32_init_cost  */
+  0, /* gather_load_x64_init_cost  */
   1 /* scatter_store_elt_cost  */
 };
 
diff --git a/gcc/config/aarch64/tuning_models/cortexx925.h b/gcc/config/aarch64/tuning_models/cortexx925.h
index 6cae5b7de5ca7ffad8a0f683e1285039bb55d159..b509cae758419a415d9067ec751ef1e6528eb09a 100644
--- a/gcc/config/aarch64/tuning_models/cortexx925.h
+++ b/gcc/config/aarch64/tuning_models/cortexx925.h
@@ -135,6 +135,8 @@ static const sve_vec_cost cortexx925_sve_vector_cost =
      operation more than a 64-bit gather.  */
   14, /* gather_load_x32_cost  */
   12, /* gather_load_x64_cost  */
+  42, /* gather_load_x32_init_cost  */
+  24, /* gather_load_x64_init_cost  */
   1 /* scatter_store_elt_cost  */
 };
 
diff --git a/gcc/config/aarch64/tuning_models/generic.h b/gcc/config/aarch64/tuning_models/generic.h
index 2b1f68b3052117814161a32f426422736ad6462b..101969bdbb9ccf7eafbd9a1cd6e25f0b584fb261 100644
--- a/gcc/config/aarch64/tuning_models/generic.h
+++ b/gcc/config/aarch64/tuning_models/generic.h
@@ -105,6 +105,8 @@ static const sve_vec_cost generic_sve_vector_cost =
   2, /* fadda_f64_cost  */
   4, /* gather_load_x32_cost  */
   2, /* gather_load_x64_cost  */
+  12, /* gather_load_x32_init_cost  */
+  4, /* gather_load_x64_init_cost  */
   1 /* scatter_store_elt_cost  */
 };
 
diff --git a/gcc/config/aarch64/tuning_models/generic_armv8_a.h b/gcc/config/aarch64/tuning_models/generic_armv8_a.h
index b38b9a8c5cad7d12aa38afdb610a14a25e755010..b5088afe068aa4be7f9dd614cfdd2a51fa96e524 100644
--- a/gcc/config/aarch64/tuning_models/generic_armv8_a.h
+++ b/gcc/config/aarch64/tuning_models/generic_armv8_a.h
@@ -106,6 +106,8 @@ static const sve_vec_cost generic_armv8_a_sve_vector_cost =
   2, /* fadda_f64_cost  */
   4, /* gather_load_x32_cost  */
   2, /* gather_load_x64_cost  */
+  12, /* gather_load_x32_init_cost  */
+  4, /* gather_load_x64_init_cost  */
   1 /* scatter_store_elt_cost  */
 };
 
diff --git a/gcc/config/aarch64/tuning_models/generic_armv9_a.h b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
index 7156dbe5787e831bc4343deb7d7b88e9823fc1bc..999985ed40f694f2681779d940bdb282f289b8e3 100644
--- a/gcc/config/aarch64/tuning_models/generic_armv9_a.h
+++ b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
@@ -136,6 +136,8 @@ static const sve_vec_cost generic_armv9_a_sve_vector_cost =
      operation more than a 64-bit gather.  */
   14, /* gather_load_x32_cost  */
   12, /* gather_load_x64_cost  */
+  42, /* gather_load_x32_init_cost  */
+  24, /* gather_load_x64_init_cost  */
   3 /* scatter_store_elt_cost  */
 };
 
diff --git a/gcc/config/aarch64/tuning_models/neoverse512tvb.h b/gcc/config/aarch64/tuning_models/neoverse512tvb.h
index 825c6a64990b72cda3641737957dc94d75db1509..d2a0b647791de8fca6d7684849d2ab1e9104b045 100644
--- a/gcc/config/aarch64/tuning_models/neoverse512tvb.h
+++ b/gcc/config/aarch64/tuning_models/neoverse512tvb.h
@@ -79,6 +79,8 @@ static const sve_vec_cost neoverse512tvb_sve_vector_cost =
      operation more than a 64-bit gather.  */
   14, /* gather_load_x32_cost  */
   12, /* gather_load_x64_cost  */
+  42, /* gather_load_x32_init_cost  */
+  24, /* gather_load_x64_init_cost  */
   3 /* scatter_store_elt_cost  */
 };
 
diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h b/gcc/config/aarch64/tuning_models/neoversen2.h
index d41e714aa045266ecae62a36ed02dfbfb7597c3a..1a5b66901b5c3fb78f87fee40236957139644585 100644
--- a/gcc/config/aarch64/tuning_models/neoversen2.h
+++ b/gcc/config/aarch64/tuning_models/neoversen2.h
@@ -135,6 +135,8 @@ static const sve_vec_cost neoversen2_sve_vector_cost =
      operation more than a 64-bit gather.  */
   14, /* gather_load_x32_cost  */
   12, /* gather_load_x64_cost  */
+  42, /* gather_load_x32_init_cost  */
+  24, /* gather_load_x64_init_cost  */
   3 /* scatter_store_elt_cost  */
 };
 
diff --git a/gcc/config/aarch64/tuning_models/neoversen3.h b/gcc/config/aarch64/tuning_models/neoversen3.h
index 36f770c0a14fc127c75a60cd37048d46c3b069c7..cfd5060e6b64a0433de41b03cde886da119d9a1c 100644
--- a/gcc/config/aarch64/tuning_models/neoversen3.h
+++ b/gcc/config/aarch64/tuning_models/neoversen3.h
@@ -135,6 +135,8 @@ static const sve_vec_cost neoversen3_sve_vector_cost =
      operation more than a 64-bit gather.  */
   14, /* gather_load_x32_cost  */
   12, /* gather_load_x64_cost  */
+  42, /* gather_load_x32_init_cost  */
+  24, /* gather_load_x64_init_cost  */
   1 /* scatter_store_elt_cost  */
 };
 
diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h b/gcc/config/aarch64/tuning_models/neoversev1.h
index 0fc41ce6a41b3135fa06d2bda1f517fdf4f8dbcf..705ed025730f6683109a4796c6eefa55b437cec9 100644
--- a/gcc/config/aarch64/tuning_models/neoversev1.h
+++ b/gcc/config/aarch64/tuning_models/neoversev1.h
@@ -126,6 +126,8 @@ static const sve_vec_cost neoversev1_sve_vector_cost =
   8, /* fadda_f64_cost  */
   32, /* gather_load_x32_cost  */
   16, /* gather_load_x64_cost  */
+  96, /* gather_load_x32_init_cost  */
+  32, /* gather_load_x64_init_cost  */
   3 /* scatter_store_elt_cost  */
 };
 
diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h
index c9c3019dd01a98bc20a76e8455fb59ff24a9ff6c..47908636b0f4c3eadd5848b590fd079c1c04aa10 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -135,6 +135,8 @@ static const sve_vec_cost neoversev2_sve_vector_cost =
      operation more than a 64-bit gather.  */
   14, /* gather_load_x32_cost  */
   12, /* gather_load_x64_cost  */
+  42, /* gather_load_x32_init_cost  */
+  24, /* gather_load_x64_init_cost  */
   3 /* scatter_store_elt_cost  */
 };
 
diff --git a/gcc/config/aarch64/tuning_models/neoversev3.h b/gcc/config/aarch64/tuning_models/neoversev3.h
index c602d067c7116cf6b081caeae8d36f9969e06d8d..c91e8c829532f9236de0102770e5c6b94e83da9a 100644
--- a/gcc/config/aarch64/tuning_models/neoversev3.h
+++ b/gcc/config/aarch64/tuning_models/neoversev3.h
@@ -135,6 +135,8 @@ static const sve_vec_cost neoversev3_sve_vector_cost =
      operation more than a 64-bit gather.  */
   14, /* gather_load_x32_cost  */
   12, /* gather_load_x64_cost  */
+  42, /* gather_load_x32_init_cost  */
+  24, /* gather_load_x64_init_cost  */
   1 /* scatter_store_elt_cost  */
 };
 
diff --git a/gcc/config/aarch64/tuning_models/neoversev3ae.h b/gcc/config/aarch64/tuning_models/neoversev3ae.h
index 96d7ccf03cd96056d09676d908c63a25e3da6765..61e439326eb6f983abf8574e657cfbb0c2f9bb33 100644
--- a/gcc/config/aarch64/tuning_models/neoversev3ae.h
+++ b/gcc/config/aarch64/tuning_models/neoversev3ae.h
@@ -135,6 +135,8 @@ static const sve_vec_cost neoversev3ae_sve_vector_cost =
      operation more than a 64-bit gather.  */
   14, /* gather_load_x32_cost  */
   12, /* gather_load_x64_cost  */
+  42, /* gather_load_x32_init_cost  */
+  24, /* gather_load_x64_init_cost  */
   1 /* scatter_store_elt_cost  */
 };
 
