hubcio commented on code in PR #2675:
URL: https://github.com/apache/iggy/pull/2675#discussion_r2793324877
##########
core/metadata/src/impls/metadata.rs:
##########
@@ -24,6 +26,106 @@ use journal::{Journal, JournalHandle};
use message_bus::MessageBus;
use tracing::{debug, warn};
+/// Trait for metadata snapshot implementations.
+///
+/// This is the interface that `MetadataHandle::Snapshot` must satisfy.
+/// It provides methods for creating, encoding, decoding, and restoring
snapshots.
+#[allow(unused)]
+pub trait Snapshot: Sized {
+ /// The error type for snapshot operations.
+ type Error: std::error::Error;
+
+ /// Create a snapshot from the current state of a state machine.
+ ///
+ /// # Arguments
+ /// * `stm` - The state machine to snapshot
+ /// * `sequence_number` - Monotonically increasing snapshot sequence number
+ fn create<T>(stm: &T, sequence_number: u64) -> Result<Self, Self::Error>
+ where
+ T: FillSnapshot;
+
+ /// Encode the snapshot to msgpack bytes.
+ fn encode(&self) -> Result<Vec<u8>, Self::Error>;
+
+ /// Decode a snapshot from msgpack bytes.
+ fn decode(bytes: &[u8]) -> Result<Self, Self::Error>;
+
+ /// Restore a state machine from this snapshot.
+ fn restore<T>(&self) -> Result<T, Self::Error>
+ where
+ T: RestoreSnapshot;
+
+ /// Get the snapshot sequence number.
+ fn sequence_number(&self) -> u64;
+
+ /// Get the timestamp when this snapshot was created.
+ fn created_at(&self) -> u64;
+}
Review Comment:
this trait belongs to `snapshot` module alongside
Snapshotable/FillSnapshot/RestoreSnapshot
##########
core/metadata/src/stm/snapshot.rs:
##########
@@ -0,0 +1,260 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use serde::{Deserialize, Serialize, de::DeserializeOwned};
+use std::fmt;
+
+use crate::stm::consumer_group::ConsumerGroupsSnapshot;
+use crate::stm::stream::StreamsSnapshot;
+use crate::stm::user::UsersSnapshot;
+
+#[derive(Debug)]
+pub enum SnapshotError {
+ /// A required section is missing from the snapshot.
+ MissingSection(&'static str),
+ /// Serialization failed.
+ Serialize(rmp_serde::encode::Error),
+ /// Deserialization failed.
+ Deserialize(rmp_serde::decode::Error),
+ /// Slab ID mismatch during snapshot restore.
+ SlabIdMismatch {
+ section: &'static str,
+ expected: usize,
+ actual: usize,
+ },
+}
+
+impl fmt::Display for SnapshotError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ SnapshotError::MissingSection(name) => {
+ write!(f, "missing snapshot section: {}", name)
+ }
+ SnapshotError::Serialize(e) => write!(f, "snapshot serialization
failed: {}", e),
+ SnapshotError::Deserialize(e) => write!(f, "snapshot
deserialization failed: {}", e),
+ SnapshotError::SlabIdMismatch {
+ section,
+ expected,
+ actual,
+ } => {
+ write!(
+ f,
+ "slab ID mismatch in section '{}': expected {}, got {}",
+ section, expected, actual
+ )
+ }
+ }
+ }
+}
+
+impl std::error::Error for SnapshotError {
+ fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+ match self {
+ SnapshotError::Serialize(e) => Some(e),
+ SnapshotError::Deserialize(e) => Some(e),
+ _ => None,
+ }
+ }
+}
+
+/// The snapshot container for all metadata state machines.
+/// Each field corresponds to one state machine's serialized state.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct MetadataSnapshot {
+ /// Timestamp when the snapshot was created (microseconds since epoch).
+ pub created_at: u64,
+ /// Monotonically increasing snapshot sequence number.
+ pub sequence_number: u64,
+ /// Users state machine snapshot data.
+ pub users: Option<UsersSnapshot>,
+ /// Streams state machine snapshot data.
+ pub streams: Option<StreamsSnapshot>,
+ /// Consumer groups state machine snapshot data.
+ pub consumer_groups: Option<ConsumerGroupsSnapshot>,
+}
Review Comment:
I think we should add `version: u32` field. What if someone will add a new
node to cluster, which will have other snapshot format because he wants to
update cluster one-by-one? This opens up possibility of implementing migrators.
I'm not saying this should be done now (migrators) but version should be OK to
put for now.
##########
core/metadata/src/stm/mux.rs:
##########
@@ -91,7 +92,58 @@ where
}
}
+/// Recursive case for variadic tuple pattern: (Head, Tail)
+/// Fills snapshot from head and tail, and restores both on restore.
+impl<S, Rest> FillSnapshot for variadic!(S, ...Rest)
+where
+ S: FillSnapshot,
+ Rest: FillSnapshot,
+{
+ fn fill_snapshot(&self, snapshot: &mut MetadataSnapshot) -> Result<(),
SnapshotError> {
+ self.0.fill_snapshot(snapshot)?;
+ self.1.fill_snapshot(snapshot)?;
+ Ok(())
+ }
+}
+
+impl<S, Rest> RestoreSnapshot for variadic!(S, ...Rest)
+where
+ S: RestoreSnapshot,
+ Rest: RestoreSnapshot,
+{
+ fn restore_snapshot(snapshot: &MetadataSnapshot) -> Result<Self,
SnapshotError> {
+ let head = S::restore_snapshot(snapshot)?;
+ let tail = Rest::restore_snapshot(snapshot)?;
+ Ok((head, tail))
+ }
+}
+
+impl<T> FillSnapshot for MuxStateMachine<T>
+where
+ T: StateMachine + FillSnapshot,
+{
+ fn fill_snapshot(&self, snapshot: &mut MetadataSnapshot) -> Result<(),
SnapshotError> {
+ self.inner.fill_snapshot(snapshot)
+ }
+}
+
+impl<T> RestoreSnapshot for MuxStateMachine<T>
+where
+ T: StateMachine + RestoreSnapshot,
+{
+ fn restore_snapshot(snapshot: &MetadataSnapshot) -> Result<Self,
SnapshotError> {
+ let inner = T::restore_snapshot(snapshot)?;
+ Ok(MuxStateMachine::new(inner))
+ }
+}
+
+#[allow(unused_imports)]
mod tests {
Review Comment:
All roundtrip tests use empty state machines. With the slab gap bug, a test
that creates items, deletes one, then snapshots+restores would have caught the
issue. Add at least one populated-with-deletions roundtrip.
##########
core/metadata/src/stm/stream.rs:
##########
@@ -466,3 +508,160 @@ impl StateHandler for DeletePartitions {
}
}
}
+
+/// Snapshot representation for the Streams state machine.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StreamsSnapshot {
+ pub items: Vec<(usize, StreamSnapshot)>,
+}
+
+impl Snapshotable for Streams {
+ type Snapshot = StreamsSnapshot;
+
+ fn to_snapshot(&self) -> Self::Snapshot {
+ self.snapshot_read(|inner| {
+ let items: Vec<(usize, StreamSnapshot)> = inner
+ .items
+ .iter()
+ .map(|(stream_id, stream)| {
+ let (size_bytes, messages_count, segments_count) =
+ stream.stats.load_for_snapshot();
+ let topics: Vec<(usize, TopicSnapshot)> = stream
+ .topics
+ .iter()
+ .map(|(topic_id, topic)| {
+ let (t_size, t_msgs, t_segs) =
topic.stats.load_for_snapshot();
+ (
+ topic_id,
+ TopicSnapshot {
+ id: topic.id,
+ name: topic.name.to_string(),
+ created_at: topic.created_at,
+ replication_factor:
topic.replication_factor,
+ message_expiry: topic.message_expiry,
+ compression_algorithm:
topic.compression_algorithm,
+ max_topic_size: topic.max_topic_size,
+ stats: StatsSnapshot {
+ size_bytes: t_size,
+ messages_count: t_msgs,
+ segments_count: t_segs,
+ },
+ partitions: topic
+ .partitions
+ .iter()
+ .map(|p| PartitionSnapshot {
+ id: p.id,
+ created_at: p.created_at,
+ })
+ .collect(),
+ round_robin_counter: topic
+ .round_robin_counter
+ .load(Ordering::Relaxed),
+ },
+ )
+ })
+ .collect();
+ (
+ stream_id,
+ StreamSnapshot {
+ id: stream.id,
+ name: stream.name.to_string(),
+ created_at: stream.created_at,
+ stats: StatsSnapshot {
+ size_bytes,
+ messages_count,
+ segments_count,
+ },
+ topics,
+ },
+ )
+ })
+ .collect();
+ StreamsSnapshot { items }
+ })
+ }
+
+ fn from_snapshot(
+ snapshot: Self::Snapshot,
+ ) -> Result<Self, crate::stm::snapshot::SnapshotError> {
+ use crate::stm::snapshot::SnapshotError;
+
+ let mut items: Slab<Stream> = Slab::new();
+ let mut index: AHashMap<Arc<str>, usize> = AHashMap::new();
+
+ for (expected_id, stream_snap) in snapshot.items {
+ let stream_stats = Arc::new(StreamStats::default());
+ stream_stats.store_from_snapshot(
+ stream_snap.stats.size_bytes,
+ stream_snap.stats.messages_count,
+ stream_snap.stats.segments_count,
+ );
+
+ let mut topics: Slab<Topic> = Slab::new();
+ let mut topic_index: AHashMap<Arc<str>, usize> = AHashMap::new();
+
+ for (expected_topic_id, topic_snap) in stream_snap.topics {
+ let topic_stats =
Arc::new(TopicStats::new(stream_stats.clone()));
+ topic_stats.store_from_snapshot(
+ topic_snap.stats.size_bytes,
+ topic_snap.stats.messages_count,
+ topic_snap.stats.segments_count,
+ );
+ let topic_name: Arc<str> = Arc::from(topic_snap.name.as_str());
+ let topic = Topic {
+ id: topic_snap.id,
+ name: topic_name.clone(),
+ created_at: topic_snap.created_at,
+ replication_factor: topic_snap.replication_factor,
+ message_expiry: topic_snap.message_expiry,
+ compression_algorithm: topic_snap.compression_algorithm,
+ max_topic_size: topic_snap.max_topic_size,
+ stats: topic_stats,
+ partitions: topic_snap
+ .partitions
+ .into_iter()
+ .map(|p| Partition {
+ id: p.id,
+ created_at: p.created_at,
+ })
+ .collect(),
+ round_robin_counter:
Arc::new(AtomicUsize::new(topic_snap.round_robin_counter)),
+ };
+ let actual_topic_id = topics.insert(topic);
+ if actual_topic_id != expected_topic_id {
+ return Err(SnapshotError::SlabIdMismatch {
+ section: "streams.topics",
+ expected: expected_topic_id,
+ actual: actual_topic_id,
+ });
+ }
+ topic_index.insert(topic_name, actual_topic_id);
+ }
+
+ let stream_name: Arc<str> = Arc::from(stream_snap.name.as_str());
+ let stream = Stream {
+ id: stream_snap.id,
+ name: stream_name.clone(),
+ created_at: stream_snap.created_at,
+ stats: stream_stats,
+ topics,
+ topic_index,
+ };
+
+ let actual_id = items.insert(stream);
+ if actual_id != expected_id {
+ return Err(SnapshotError::SlabIdMismatch {
+ section: "streams",
+ expected: expected_id,
+ actual: actual_id,
+ });
+ }
+ index.insert(stream_name, actual_id);
+ }
+
+ let inner = StreamsInner { index, items };
+ Ok(inner.into())
+ }
+}
Review Comment:
`from_snapshot` inserts into a fresh Slab and asserts actual_id ==
expected_id. This breaks if any item was deleted before snapshot:
`Slab::insert` fills gaps, so restoring {0, 2} yields IDs {0, 1} →
`SlabIdMismatch`. Same bug in `consumer_group.rs:320` and `user.rs:399`. I
fixed same problem in `core/server/metadata`. The answer is `Slab::from_iter`
with (key, value) tuples. See ` core/server/src/bootstrap.rs`
##########
core/metadata/src/stm/mod.rs:
##########
@@ -87,6 +88,19 @@ where
read: Arc<ReadHandle<T>>,
}
+impl<T, C> LeftRight<T, C>
+where
+ T: Absorb<C>,
+{
+ pub fn read<F, R>(&self, f: F) -> R
+ where
+ F: FnOnce(&T) -> R,
+ {
+ let guard = self.read.enter().expect("read handle should be
accessible");
+ f(&*guard)
+ }
+}
Review Comment:
this is duplicate of `LeftRight::with_state` from `left_right.rs:136`. Same
for `snapshot_read:195` which wraps it. The `Snapshotable` impls should use the
existing `with_state` API instead of adding parallel methods.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]