This is an automated email from the ASF dual-hosted git repository.

jyothsnakonisa pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra-sidecar.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 000a05ec CASSSIDECAR-419 : Add Docker compose setup for CDC (#330)
000a05ec is described below

commit 000a05ecc0610e1336a01811402593e2f977a629
Author: Jyothsna konisa <[email protected]>
AuthorDate: Tue May 26 15:36:26 2026 -0700

    CASSSIDECAR-419 : Add Docker compose setup for CDC (#330)
    
    Patch by Jyothsna Konisa; Reviewed by Josh McKenzie for CASSSIDECAR-419
---
 .gitignore                                      |   3 +
 CHANGES.txt                                     |   1 +
 build.gradle                                    |   2 +
 docker/cdc-demo/Dockerfile.sidecar              |  38 ++++
 docker/cdc-demo/README.md                       | 290 ++++++++++++++++++++++++
 docker/cdc-demo/conf/sidecar.yaml               |  78 +++++++
 docker/cdc-demo/docker-compose.yml              | 186 +++++++++++++++
 docker/cdc-demo/scripts/cassandra-entrypoint.sh |  49 ++++
 docker/cdc-demo/scripts/init-cdc-schema.sh      |  44 ++++
 docker/cdc-demo/scripts/seed-cdc-configs.sh     |  81 +++++++
 docker/cdc-demo/scripts/start.sh                | 147 ++++++++++++
 docker/cdc-demo/scripts/stop.sh                 |  52 +++++
 server/build.gradle                             |   3 +
 13 files changed, 974 insertions(+)

diff --git a/.gitignore b/.gitignore
index 8bdfcd61..dd88239f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -92,6 +92,9 @@ agents
 bin
 conf
 lib
+# Exception: docker demo config files are tracked
+!docker/cdc-demo/conf/
+!docker/cdc-demo/conf/**
 
 # Local gradle cache
 .gradle
diff --git a/CHANGES.txt b/CHANGES.txt
index 54ee2d2b..782c4104 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,6 @@
 0.4.0
 -----
+ * Add Docker Compose setup for local CDC demo (Cassandra → Sidecar → Kafka) 
(CASSSIDECAR-419)
  * Scope all CDC dependencies exclusively to CdcModule (CASSSIDECAR-447)
  * Add ConfigurationProvider interfaces for pluggable overlay storage 
(CASSSIDECAR-424)
  * Refactor OperationalJob to have data separate from execution logic 
(CASSSIDECAR-460)
diff --git a/build.gradle b/build.gradle
index bf5ff20d..910a98e6 100644
--- a/build.gradle
+++ b/build.gradle
@@ -142,6 +142,8 @@ allprojects {
 
         // for dtest jar
         mavenLocal()
+        // Confluent Schema Registry artifacts (kafka-avro-serializer)
+        maven { url "https://packages.confluent.io/maven/"; }
     }
 
     checkstyle {
diff --git a/docker/cdc-demo/Dockerfile.sidecar 
b/docker/cdc-demo/Dockerfile.sidecar
new file mode 100644
index 00000000..3c5b86f2
--- /dev/null
+++ b/docker/cdc-demo/Dockerfile.sidecar
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Single-stage build: copies the pre-built distribution assembled on the host
+# by start.sh (./gradlew installDist).  Keep Gradle off the critical path of
+# every docker build — the Gradle daemon on the host stays warm between runs.
+#
+# Usage (called automatically by start.sh):
+#   ./gradlew installDist -x test
+#   docker build -f docker/cdc-demo/Dockerfile.sidecar -t 
cassandra-sidecar:dev .
+
+FROM eclipse-temurin:11-jre-jammy
+
+WORKDIR /app
+
+# Distribution assembled by ./gradlew installDist
+COPY build/install/apache-cassandra-sidecar/ ./
+
+# Bake demo config; docker-compose overrides at runtime via volume mount.
+COPY docker/cdc-demo/conf/sidecar.yaml /app/conf/sidecar.yaml
+
+EXPOSE 9043
+
+ENTRYPOINT ["/app/bin/cassandra-sidecar"]
diff --git a/docker/cdc-demo/README.md b/docker/cdc-demo/README.md
new file mode 100644
index 00000000..ec356912
--- /dev/null
+++ b/docker/cdc-demo/README.md
@@ -0,0 +1,290 @@
+<!--
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+-->
+# CDC Demo — Docker Compose Setup
+
+End-to-end demo that boots Cassandra, Cassandra Sidecar, Kafka, and Confluent
+Schema Registry. Writes to a CDC-enabled Cassandra table are captured by the
+sidecar, serialized as Avro (with schemas registered in Schema Registry), and
+published to a Kafka topic.
+
+## Architecture
+
+```
+┌──────────────┐   cdc_raw/commitlog   ┌──────────────────┐
+│  Cassandra   │ ─────────────────────►│  Cassandra       │──► Kafka topic
+│  (port 9042) │   (shared volume)     │  Sidecar         │    (cdc-mutations)
+└──────────────┘                       │  (port 9043)     │
+                                       └──────────────────┘
+                                                │ KafkaAvroSerializer
+                                                ▼
+                                       ┌──────────────────┐
+                                       │ Schema Registry  │
+                                       │  (port 8081)     │
+                                       └──────────────────┘
+
+                                       ┌──────────────────┐
+                                       │   Kafka UI       │
+                                       │  (port 8080)     │
+                                       └──────────────────┘
+```
+
+**Services:**
+| Service | Image | Role |
+|---|---|---|
+| `kafka` | `confluentinc/cp-kafka:7.6.0` | KRaft broker (no ZooKeeper) |
+| `schema-registry` | `confluentinc/cp-schema-registry:7.6.0` | Avro schema 
store |
+| `cassandra` | `cassandra:5.0` | CDC-enabled Cassandra node |
+| `cassandra-init` | `cassandra:5.0` | One-shot: seeds sidecar schema + 
configs |
+| `sidecar` | `cassandra-sidecar:dev` | Reads commit logs, publishes to Kafka |
+| `kafka-ui` | `ghcr.io/kafbat/kafka-ui:v1.5.0` | Browse topics + decoded Avro 
messages |
+
+## Prerequisites
+
+| Tool | Version | Notes |
+|---|---|---|
+| Docker | 24+ | |
+| Docker Compose | v2 (plugin) | |
+| Java | 11 | Required on host for `./gradlew installDist` |
+| Gradle | via wrapper | No installation needed — `./gradlew` is 
self-contained |
+
+## Exposed ports
+
+| Port | Service |
+|---|---|
+| `9042` | Cassandra CQL |
+| `9043` | Cassandra Sidecar |
+| `8080` | Kafka UI |
+| `8081` | Confluent Schema Registry |
+
+## Serializer modes
+
+| Mode | `value.serializer` | Schema storage |
+|---|---|---|
+| `confluent` *(default)* | `KafkaAvroSerializer` | Confluent Schema Registry 
(port 8081) |
+| `bytearray` | `ByteArraySerializer` | None — raw Avro bytes, no schema 
registry lookup |
+
+## Quick Start
+
+### Step 1 — Start the stack
+
+From `docker/cdc-demo/`, run the start script. It builds the sidecar
+distribution on the host, packages it into a Docker image, and starts all
+services:
+
+```bash
+cd docker/cdc-demo
+./scripts/start.sh
+```
+
+The script handles everything in order:
+1. Stops any existing stack
+2. Runs `./gradlew installDist` on the host
+3. Builds the `cassandra-sidecar:dev` Docker image
+4. Starts all services and waits for CDC iterators to be ready
+
+**Common flags:**
+
+```bash
+./scripts/start.sh --clean        # wipe all data volumes before starting
+./scripts/start.sh --skip-build   # reuse existing cassandra-sidecar:dev image 
(skip steps 2-3)
+./scripts/start.sh --bytearray    # use ByteArraySerializer instead of 
Confluent Avro
+```
+
+> **`--skip-build`** is useful when you've only changed a config file or script
+> and don't need to recompile Java. Requires a `cassandra-sidecar:dev` image
+> from a prior run.
+
+### Step 2 — Wait for CDC to be ready
+
+`start.sh` automatically waits until the sidecar is up and CDC iterators have
+started, then prints a **Setup complete** banner with next steps.
+
+To follow progress in another terminal:
+
+```bash
+docker compose logs -f cassandra-init sidecar
+```
+
+### Step 3 — Write mutations to the CDC-enabled table
+
+```bash
+docker exec -it cdc-demo-cassandra-1 cqlsh -e "
+  INSERT INTO cdc_demo.events (id, msg, ts)
+  VALUES (uuid(), 'hello from CDC', toTimestamp(now()));
+"
+```
+
+### Step 4 — View messages in Kafka UI
+
+Open the topic in the Kafka UI:
+
+```
+http://localhost:8080/ui/clusters/local/all-topics/cdc-mutations/messages
+```
+
+**Confluent mode (default):** kafbat is pre-configured with the Schema 
Registry URL (`http://schema-registry:8081`).
+To see human-readable messages, set the serde dropdowns at the top of the
+Messages tab:
+
+| Field | Serde to select | Why |
+|---|---|---|
+| **Key Serde** | `String` | CDC keys are plain UTF-8 strings 
(`keyspace:table:pk`) |
+| **Value Serde** | `SchemaRegistry` | Values are Confluent Avro — kafbat 
fetches the schema by the embedded ID and renders the payload as JSON |
+
+Once set, each message value displays as a decoded JSON object matching the
+CDC-enabled table's schema, for example:
+
+```json
+{
+  "operationType": "INSERT",
+  "timestampMicros": 1746000000000000,
+  "sourceKeyspace": "cdc_demo",
+  "sourceTable": "events",
+  "isPartial": false,
+  "payload": {
+    "id": "550e8400-e29b-41d4-a716-446655440000",
+    "msg": "hello from CDC",
+    "ts": 1746000000000000
+  }
+}
+```
+
+**Bytearray mode:** values are raw Avro bytes with no schema registry lookup. 
Set
+**Value Serde** to `Bytes` to inspect the raw payload.
+
+### Step 5 — Inspect the registered Avro schema
+
+The sidecar auto-registers one Avro schema per CDC-enabled table on first 
publish.
+`KafkaAvroSerializer` uses the subject naming convention `{topic}-value`, so 
for
+the `cdc-mutations` topic the subject is `cdc-mutations-value`.
+
+Open the Kafka UI and navigate to the **Schema Registry** tab to browse the 
full
+Avro schema:
+
+```
+http://localhost:8080/ui/clusters/local/schemas/cdc-mutations-value
+```
+
+## Supported Cassandra Versions
+
+CDC is supported for **4.0, 4.1, 5.0, 5.1**. To use a different version:
+
+```bash
+CASSANDRA_VERSION=4.1 ./scripts/start.sh
+```
+
+The default is `5.0`. Note: the `cassandra:4.0` Docker image is `linux/amd64`
+only — on Apple Silicon it runs under Rosetta emulation and may be slow to
+start. Use `4.1` or later for ARM64 support.
+
+## Configuration
+
+`conf/sidecar.yaml` is volume-mounted into the sidecar container and can be
+edited without rebuilding the image. Restart the sidecar to pick up changes:
+
+```bash
+docker compose restart sidecar
+```
+
+CDC and Kafka properties are stored in Cassandra and seeded automatically on
+first boot by `scripts/seed-cdc-configs.sh`. To update them on a running 
cluster:
+
+```bash
+docker exec -it cdc-demo-cassandra-1 cqlsh -e "
+  UPDATE sidecar_internal.configs
+  SET config = config + {'micro_batch_delay_millis': '500'}
+  WHERE service = 'cdc';
+"
+```
+
+To switch serializer mode on a running cluster, delete the existing kafka 
config
+row and restart the stack:
+
+```bash
+docker exec -it cdc-demo-cassandra-1 cqlsh -e "DELETE FROM 
sidecar_internal.configs WHERE service = 'kafka';"
+./scripts/start.sh --bytearray --skip-build
+```
+
+## Persistence
+
+All data is stored in named Docker volumes and survives `docker compose down`.
+
+| Volume | Contents |
+|---|---|
+| `cassandra-varlib` | Cassandra data, commitlog, cdc_raw |
+| `kafka-data` | Topic partitions + consumer offsets |
+
+## Stopping
+
+```bash
+./scripts/stop.sh           # stop containers, keep volumes (data preserved)
+./scripts/stop.sh --clean   # stop containers AND delete all data volumes
+```
+
+## Troubleshooting
+
+**Sidecar keeps restarting**
+
+The sidecar waits for `cassandra-init` to complete. Check its logs:
+
+```bash
+docker compose logs cassandra-init
+```
+
+**CDC events not arriving in Kafka**
+
+1. Verify configs were seeded:
+   ```bash
+   docker exec cdc-demo-cassandra-1 cqlsh -e "SELECT * FROM 
sidecar_internal.configs;"
+   ```
+2. Check sidecar logs for `CDC iterators started successfully`
+3. Confirm CDC is enabled on the table:
+   ```bash
+   docker exec cdc-demo-cassandra-1 cqlsh -e "DESCRIBE TABLE cdc_demo.events;"
+   ```
+
+**Schema Registry connection errors (confluent mode)**
+
+Verify the registry is reachable and schemas are registered:
+
+```bash
+curl -s http://localhost:8081/subjects
+docker compose logs schema-registry
+```
+
+If `seed-cdc-configs.sh` ran before the schema registry was healthy, the kafka
+config row may be missing. Delete and re-run:
+
+```bash
+docker exec -it cdc-demo-cassandra-1 cqlsh -e "DELETE FROM 
sidecar_internal.configs WHERE service = 'kafka';"
+docker compose run --rm cassandra-init
+```
+
+**JMX connection refused**
+
+Remote JMX is enabled by the `LOCAL_JMX=no` env var on the Cassandra service,
+which causes the stock Cassandra Docker entrypoint to set
+`jmxremote.local.only=false`. The `JVM_EXTRA_OPTS` env var additionally sets
+`-Djava.rmi.server.hostname=cassandra` so RMI binds to the right interface.
+Verify the flags are active:
+
+```bash
+docker exec cdc-demo-cassandra-1 ps aux | grep jmxremote
+```
diff --git a/docker/cdc-demo/conf/sidecar.yaml 
b/docker/cdc-demo/conf/sidecar.yaml
new file mode 100644
index 00000000..3886eb7b
--- /dev/null
+++ b/docker/cdc-demo/conf/sidecar.yaml
@@ -0,0 +1,78 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+cassandra_instances:
+  - id: 1
+    host: cassandra
+    port: 9042
+    storage_dir: /var/lib/cassandra
+    cdc_dir: /var/lib/cassandra/cdc_raw
+    commitlog_dir: /var/lib/cassandra/commitlog
+    staging_dir: /var/lib/cassandra/sstable-staging
+    jmx_host: 127.0.0.1
+    jmx_port: 7199
+    jmx_ssl_enabled: false  # demo only — enable TLS in production
+
+sidecar:
+  host: 0.0.0.0
+  port: 9043
+  cdc:
+    enabled: true
+    segment_hardlink_cache_expiry: 5m
+    table_schema_refresh_time: 5s
+    config_refresh_time: 5s
+  worker_pools:
+    service:
+      name: "sidecar-worker-pool"
+      size: 20
+      max_execution_time: 1m
+    internal:
+      name: "sidecar-internal-worker-pool"
+      size: 20
+      max_execution_time: 15m
+  jmx:
+    max_retries: 10
+    retry_delay: 3s
+  # Required: sidecar creates sidecar_internal keyspace and configs/cdc_states 
tables
+  schema:
+    is_enabled: true
+    keyspace: sidecar_internal
+    replication_strategy: NetworkTopologyStrategy
+    replication_factor: 1
+    lease_schema_ttl: 5m
+  coordination:
+    cluster_lease_claim:
+      electorate_membership_strategy: 
MostReplicatedKeyspaceTokenZeroElectorateMembership
+      enabled: true
+      initial_delay: 1s
+      initial_delay_random_delta: 5s
+      execute_interval: 100s
+
+driver_parameters:
+  contact_points:
+    - "cassandra:9042"
+  num_connections: 6
+  local_dc: datacenter1
+
+healthcheck:
+  initial_delay: 0ms
+  execute_interval: 30s
+
+cluster_topology_monitor:
+  enabled: true
+  initial_delay: 0s
+  execute_interval: 1000ms
diff --git a/docker/cdc-demo/docker-compose.yml 
b/docker/cdc-demo/docker-compose.yml
new file mode 100644
index 00000000..9f76f2b1
--- /dev/null
+++ b/docker/cdc-demo/docker-compose.yml
@@ -0,0 +1,186 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Cassandra CDC demo stack
+#
+# Startup order:
+#   kafka ──► schema-registry
+#   cassandra ──► cassandra-init  (seeds schema + configs, then exits)
+#                     └──► sidecar
+#   kafka-ui
+#
+# Wipe all data: docker compose down -v  (or: ./scripts/stop.sh --clean)
+
+services:
+
+  # ── Kafka (KRaft — no ZooKeeper) 
────────────────────────────────────────────
+  kafka:
+    image: confluentinc/cp-kafka:7.6.0
+    networks:
+      - demo-network
+    environment:
+      KAFKA_NODE_ID: 1
+      KAFKA_PROCESS_ROLES: broker,controller
+      KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092,CONTROLLER://0.0.0.0:9093
+      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
+      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: 
PLAINTEXT:PLAINTEXT,CONTROLLER:PLAINTEXT
+      KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
+      KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
+      KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka:9093
+      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
+      KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
+      # Must not change across restarts — required for volume persistence.
+      CLUSTER_ID: MkU3OEVBNTcwNTJENDM2Qk
+    volumes:
+      - kafka-data:/var/lib/kafka/data
+    healthcheck:
+      test: ["CMD", "kafka-topics", "--bootstrap-server", "kafka:9092", 
"--list"]
+      interval: 10s
+      timeout: 10s
+      retries: 20
+
+  # ── Confluent Schema Registry 
────────────────────────────────────────────────
+  # Stores and serves Avro schemas; the sidecar's KafkaAvroSerializer registers
+  # table schemas here on first publish.  Exposed on host port 8081.
+  schema-registry:
+    image: confluentinc/cp-schema-registry:7.6.0
+    depends_on:
+      kafka:
+        condition: service_healthy
+    networks:
+      - demo-network
+    environment:
+      SCHEMA_REGISTRY_HOST_NAME: schema-registry
+      SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: kafka:9092
+      SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081
+    ports:
+      - "8081:8081"
+    healthcheck:
+      test: ["CMD-SHELL", "curl -sf http://localhost:8081/subjects || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 20
+
+  # ── Cassandra 
───────────────────────────────────────────────────────────────
+  cassandra:
+    image: cassandra:${CASSANDRA_VERSION:-5.0}
+    # Patches CDC settings into cassandra.yaml, then delegates to the stock 
entrypoint.
+    entrypoint: ["/bin/bash", "/scripts/cassandra-entrypoint.sh"]
+    command: ["cassandra", "-f"]
+    networks:
+      - demo-network
+    environment:
+      CASSANDRA_CLUSTER_NAME: "CDC Demo Cluster"
+      CASSANDRA_DC: datacenter1
+      CASSANDRA_RACK: rack1
+      CASSANDRA_ENDPOINT_SNITCH: GossipingPropertyFileSnitch
+      # Required: lets SidecarLoadBalancingPolicy match this node by hostname.
+      CASSANDRA_BROADCAST_RPC_ADDRESS: cassandra
+      # Enables remote JMX so the sidecar can connect.
+      LOCAL_JMX: "no"
+      JVM_EXTRA_OPTS: "-Djava.rmi.server.hostname=cassandra 
-Dcom.sun.management.jmxremote.authenticate=false 
-Dcom.sun.management.jmxremote.ssl=false"
+      MAX_HEAP_SIZE: "512M"
+      HEAP_NEWSIZE: "128M"
+    volumes:
+      - ./scripts:/scripts:ro
+      # commitlog/ and cdc_raw/ must share a filesystem for CDC hard-links.
+      - cassandra-varlib:/var/lib/cassandra
+    ports:
+      - "9042:9042"
+      # 9043 is published here because the sidecar shares this container's 
network namespace.
+      - "9043:9043"
+    healthcheck:
+      test: ["CMD-SHELL", "cqlsh -e 'SELECT now() FROM system.local' 
2>/dev/null || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 20
+      start_period: 60s
+
+  # ── Cassandra init (one-shot) 
───────────────────────────────────────────────
+  # Seeds sidecar_internal schema and CDC/Kafka configs after Cassandra and
+  # Schema Registry are both healthy.
+  cassandra-init:
+    image: cassandra:${CASSANDRA_VERSION:-5.0}
+    depends_on:
+      cassandra:
+        condition: service_healthy
+      schema-registry:
+        condition: service_healthy
+    networks:
+      - demo-network
+    entrypoint: ["/bin/bash", "-c",
+      "bash /scripts/init-cdc-schema.sh && bash /scripts/seed-cdc-configs.sh"]
+    environment:
+      CASSANDRA_HOST: cassandra
+      KAFKA_BOOTSTRAP_SERVERS: kafka:9092
+      CDC_TOPIC: cdc-mutations
+      CDC_DATACENTER: datacenter1
+      # Set by start.sh: confluent | bytearray
+      SERIALIZER_MODE: ${SERIALIZER_MODE:-confluent}
+      SCHEMA_REGISTRY_URL: http://schema-registry:8081
+    volumes:
+      - ./scripts:/scripts:ro
+    restart: "no"
+
+  # ── Kafka UI 
─────────────────────────────────────────────────────────────────
+  # Browse topics and Avro-decoded messages at http://localhost:8080
+  # kafbat/kafka-ui is the actively maintained fork of provectuslabs/kafka-ui.
+  kafka-ui:
+    image: ghcr.io/kafbat/kafka-ui:v1.5.0
+    depends_on:
+      kafka:
+        condition: service_healthy
+    networks:
+      - demo-network
+    environment:
+      KAFKA_CLUSTERS_0_NAME: local
+      KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:9092
+      KAFKA_CLUSTERS_0_SCHEMAREGISTRY: http://schema-registry:8081
+    ports:
+      - "8080:8080"
+
+  # ── Sidecar 
─────────────────────────────────────────────────────────────────
+  # Shares Cassandra's network namespace — sidecar is reachable at 
cassandra:9043.
+  sidecar:
+    image: cassandra-sidecar:dev
+    build:
+      context: ../../
+      dockerfile: docker/cdc-demo/Dockerfile.sidecar
+    depends_on:
+      cassandra-init:
+        condition: service_completed_successfully
+    network_mode: "service:cassandra"
+    volumes:
+      # Overrides the baked-in config; edit without rebuilding the image.
+      - ./conf/sidecar.yaml:/app/conf/sidecar.yaml:ro
+      # Shared with Cassandra so the sidecar can read cdc_raw/ and commitlog/.
+      - cassandra-varlib:/var/lib/cassandra
+    healthcheck:
+      test: ["CMD-SHELL", "curl -sf http://localhost:9043/api/v1/__health || 
exit 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 20
+      start_period: 30s
+
+volumes:
+  kafka-data:
+  # commitlog/ and cdc_raw/ must share a filesystem for CDC hard-links.
+  cassandra-varlib:
+
+networks:
+  demo-network:
+    driver: bridge
diff --git a/docker/cdc-demo/scripts/cassandra-entrypoint.sh 
b/docker/cdc-demo/scripts/cassandra-entrypoint.sh
new file mode 100644
index 00000000..b10dbf21
--- /dev/null
+++ b/docker/cdc-demo/scripts/cassandra-entrypoint.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Patches CDC settings into the stock cassandra.yaml,
+# then hands off to the original Docker entrypoint.
+set -euo pipefail
+
+YAML="/etc/cassandra/cassandra.yaml"
+
+patch_yaml() {
+    local key="$1" value="$2"
+    if grep -q "^${key}:" "$YAML"; then
+        sed -i "s|^${key}:.*|${key}: ${value}|" "$YAML"
+    elif grep -q "^# *${key}:" "$YAML"; then
+        sed -i "s|^# *${key}:.*|${key}: ${value}|" "$YAML"
+    else
+        echo "${key}: ${value}" >> "$YAML"
+    fi
+}
+
+# commitlog and cdc_raw must share the same filesystem for CDC hard-links.
+patch_yaml "commitlog_directory"   "/var/lib/cassandra/commitlog"
+patch_yaml "cdc_enabled"           "true"
+patch_yaml "cdc_raw_directory"     "/var/lib/cassandra/cdc_raw"
+patch_yaml "cdc_on_repair_enabled" "false"
+
+# Cassandra 4.x uses cdc_total_space_in_mb; 5.x uses cdc_total_space.
+if grep -q "cdc_total_space_in_mb" "$YAML"; then
+    patch_yaml "cdc_total_space_in_mb" "4096"
+else
+    patch_yaml "cdc_total_space" "4096MiB"
+fi
+
+exec /usr/local/bin/docker-entrypoint.sh "$@"
diff --git a/docker/cdc-demo/scripts/init-cdc-schema.sh 
b/docker/cdc-demo/scripts/init-cdc-schema.sh
new file mode 100644
index 00000000..2100d5cc
--- /dev/null
+++ b/docker/cdc-demo/scripts/init-cdc-schema.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Creates the sidecar_internal schema and CDC demo keyspace/table.
+set -euo pipefail
+
+CASSANDRA_HOST=${CASSANDRA_HOST:-cassandra}
+
+cqlsh "${CASSANDRA_HOST}" <<'CQL'
+CREATE KEYSPACE IF NOT EXISTS sidecar_internal
+  WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 1};
+
+CREATE TABLE IF NOT EXISTS sidecar_internal.configs (
+  service text,
+  config  map<text, text>,
+  PRIMARY KEY (service)
+);
+
+CREATE KEYSPACE IF NOT EXISTS cdc_demo
+  WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 1};
+
+CREATE TABLE IF NOT EXISTS cdc_demo.events (
+  id  uuid      PRIMARY KEY,
+  msg text,
+  ts  timestamp
+) WITH cdc = true;
+CQL
+
+echo "Schema initialised."
diff --git a/docker/cdc-demo/scripts/seed-cdc-configs.sh 
b/docker/cdc-demo/scripts/seed-cdc-configs.sh
new file mode 100644
index 00000000..5e497d9d
--- /dev/null
+++ b/docker/cdc-demo/scripts/seed-cdc-configs.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Seeds CDC and Kafka configuration into sidecar_internal.configs.
+# IF NOT EXISTS makes each insert idempotent across restarts.
+#
+# Environment variables:
+#   SERIALIZER_MODE       confluent (default) | bytearray
+#   SCHEMA_REGISTRY_URL   http://schema-registry:8081 (default)
+set -euo pipefail
+
+CASSANDRA_HOST=${CASSANDRA_HOST:-cassandra}
+KAFKA_BOOTSTRAP=${KAFKA_BOOTSTRAP_SERVERS:-kafka:9092}
+CDC_TOPIC=${CDC_TOPIC:-cdc-mutations}
+CDC_DATACENTER=${CDC_DATACENTER:-datacenter1}
+SERIALIZER_MODE=${SERIALIZER_MODE:-confluent}
+SCHEMA_REGISTRY_URL=${SCHEMA_REGISTRY_URL:-http://schema-registry:8081}
+
+echo "Seeding CDC configs (serializer-mode: ${SERIALIZER_MODE})..."
+
+cqlsh "${CASSANDRA_HOST}" <<CQL
+INSERT INTO sidecar_internal.configs (service, config)
+VALUES ('cdc', {
+  'cdc_enabled':              'true',
+  'topic':                    '${CDC_TOPIC}',
+  'jobid':                    'docker-demo-job',
+  'datacenter':               '${CDC_DATACENTER}',
+  'watermark_seconds':        '3600',
+  'micro_batch_delay_millis': '1000',
+  'max_commit_logs':          '4',
+  'persist_state':            'true',
+  'fail_kafka_errors':        'true',
+  'fail_kafka_too_large_errors': 'false'
+}) IF NOT EXISTS;
+CQL
+
+if [ "${SERIALIZER_MODE}" = "confluent" ]; then
+    cqlsh "${CASSANDRA_HOST}" <<CQL
+INSERT INTO sidecar_internal.configs (service, config)
+VALUES ('kafka', {
+  'bootstrap.servers':  '${KAFKA_BOOTSTRAP}',
+  'key.serializer':     
'org.apache.kafka.common.serialization.StringSerializer',
+  'value.serializer':   'io.confluent.kafka.serializers.KafkaAvroSerializer',
+  'schema.registry.url': '${SCHEMA_REGISTRY_URL}',
+  'acks':               'all',
+  'retries':            '3',
+  'linger.ms':          '5',
+  'batch.size':         '16384'
+}) IF NOT EXISTS;
+CQL
+else
+    cqlsh "${CASSANDRA_HOST}" <<CQL
+INSERT INTO sidecar_internal.configs (service, config)
+VALUES ('kafka', {
+  'bootstrap.servers': '${KAFKA_BOOTSTRAP}',
+  'key.serializer':    
'org.apache.kafka.common.serialization.StringSerializer',
+  'value.serializer':  
'org.apache.kafka.common.serialization.ByteArraySerializer',
+  'acks':              'all',
+  'retries':           '3',
+  'linger.ms':         '5',
+  'batch.size':        '16384'
+}) IF NOT EXISTS;
+CQL
+fi
+
+echo "Configs seeded."
diff --git a/docker/cdc-demo/scripts/start.sh b/docker/cdc-demo/scripts/start.sh
new file mode 100755
index 00000000..46884447
--- /dev/null
+++ b/docker/cdc-demo/scripts/start.sh
@@ -0,0 +1,147 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Builds the sidecar and starts the CDC demo stack.
+#
+# Usage (from anywhere in the repo):
+#   ./scripts/start.sh                    # build + start in confluent mode 
(default)
+#   ./scripts/start.sh --bytearray        # build + start in bytearray mode
+#   ./scripts/start.sh --clean            # wipe all data volumes before 
starting
+#   ./scripts/start.sh --skip-build       # reuse existing 
cassandra-sidecar:dev image
+#   ./scripts/start.sh --clean --skip-build
+set -euo pipefail
+
+# ANSI color codes
+BOLD='\033[1m'
+GREEN='\033[0;32m'
+CYAN='\033[0;36m'
+YELLOW='\033[0;33m'
+UNDERLINE='\033[4m'
+RESET='\033[0m'
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+DEMO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
+CLEAN=false
+SKIP_BUILD=false
+SERIALIZER_MODE=confluent
+
+usage() {
+    printf "Usage: %s [OPTIONS]\n\n" "$(basename "$0")"
+    printf "Builds the sidecar and starts the CDC demo stack.\n\n"
+    printf "Options:\n"
+    printf "  --confluent     Use Confluent Avro serializer (default)\n"
+    printf "  --bytearray     Use byte-array serializer\n"
+    printf "  --clean         Wipe all data volumes before starting\n"
+    printf "  --skip-build    Reuse existing cassandra-sidecar:dev image\n"
+    printf "  --help          Show this help message\n"
+}
+
+for arg in "$@"; do
+    case "$arg" in
+        --clean)       CLEAN=true ;;
+        --skip-build)  SKIP_BUILD=true ;;
+        --confluent)   SERIALIZER_MODE=confluent ;;
+        --bytearray)   SERIALIZER_MODE=bytearray ;;
+        --help)        usage; exit 0 ;;
+        *) echo "Unknown argument: $arg" >&2; exit 1 ;;
+    esac
+done
+
+# ── Stop existing stack 
───────────────────────────────────────────────────────
+if $CLEAN; then
+    bash "$SCRIPT_DIR/stop.sh" --clean
+else
+    bash "$SCRIPT_DIR/stop.sh"
+fi
+
+# ── Build 
─────────────────────────────────────────────────────────────────────
+if $SKIP_BUILD; then
+    if ! docker image inspect cassandra-sidecar:dev > /dev/null 2>&1; then
+        echo "ERROR: --skip-build specified but cassandra-sidecar:dev image 
not found." >&2
+        echo "       Run without --skip-build to build the image first." >&2
+        exit 1
+    fi
+    printf "${YELLOW}Skipping build — reusing existing cassandra-sidecar:dev 
image.${RESET}\n"
+else
+    printf "${BOLD}==> Building sidecar distribution (./gradlew 
installDist)...${RESET}\n"
+    "$REPO_ROOT/gradlew" -p "$REPO_ROOT" installDist \
+        -x test -x integrationTest -x containerTest \
+        --parallel --quiet
+
+    printf "${BOLD}==> Building sidecar Docker image...${RESET}\n"
+    DOCKER_BUILDKIT=1 docker build \
+        -f "$REPO_ROOT/docker/cdc-demo/Dockerfile.sidecar" \
+        -t cassandra-sidecar:dev \
+        "$REPO_ROOT"
+fi
+
+# ── Start stack 
───────────────────────────────────────────────────────────────
+printf "${BOLD}==> Starting stack (serializer-mode: 
${SERIALIZER_MODE})...${RESET}\n"
+cd "$DEMO_DIR"
+export SERIALIZER_MODE
+docker compose up -d
+
+# ── Wait for sidecar ─────────────────────────────────────────────────────────
+echo ""
+echo "Waiting for sidecar to be ready (follow progress: docker compose logs -f 
cassandra-init sidecar)..."
+until curl -sf http://localhost:9043/api/v1/__health > /dev/null 2>&1; do
+    sleep 5
+done
+
+echo "Sidecar is up. Waiting for CDC iterators to start..."
+CDC_TIMEOUT=360
+docker compose logs -f sidecar 2>&1 | grep -m 1 "CDC iterators started 
successfully" > /dev/null &
+LOG_PID=$!
+ELAPSED=0
+while kill -0 "$LOG_PID" 2>/dev/null; do
+    if [ "$ELAPSED" -ge "$CDC_TIMEOUT" ]; then
+        kill "$LOG_PID" 2>/dev/null || true
+        echo "Warning: timed out after ${CDC_TIMEOUT}s waiting for CDC 
iterators — check: docker compose logs sidecar"
+        break
+    fi
+    sleep 5
+    ELAPSED=$((ELAPSED + 5))
+done
+
+# ── Success banner 
────────────────────────────────────────────────────────────
+echo ""
+printf 
"${GREEN}${BOLD}╔══════════════════════════════════════════════════════════╗${RESET}\n"
+printf "${GREEN}${BOLD}║        Setup complete. CDC pipeline is running.       
   ║${RESET}\n"
+printf 
"${GREEN}${BOLD}╚══════════════════════════════════════════════════════════╝${RESET}\n"
+echo ""
+printf "  ${BOLD}Serializer mode:${RESET} ${SERIALIZER_MODE}\n"
+echo ""
+printf "  ${BOLD}Step 1 — Insert a test mutation:${RESET}\n"
+printf "  ${CYAN}\$ docker exec -it cdc-demo-cassandra-1 cqlsh -e \"INSERT 
INTO cdc_demo.events (id, msg, ts) VALUES (uuid(), 'hello', 
toTimestamp(now()));\"${RESET}\n"
+echo ""
+if [ "$SERIALIZER_MODE" = "confluent" ]; then
+    printf "  ${BOLD}Step 2 — Inspect the registered Avro schema:${RESET}\n"
+    printf "  
${UNDERLINE}http://localhost:8080/ui/clusters/local/schemas/cdc-mutations-value${RESET}\n";
+    echo ""
+    printf "  ${BOLD}Step 3 — View decoded messages in Kafka UI:${RESET}\n"
+    printf "  
${UNDERLINE}http://localhost:8080/ui/clusters/local/all-topics/cdc-mutations/messages${RESET}\n";
+    printf "  ${CYAN}(Set Key Serde → String, Value Serde → SchemaRegistry to 
view decoded messages)${RESET}\n"
+else
+    printf "  ${BOLD}Step 2 — View mutations in Kafka UI:${RESET}\n"
+    printf "  
${UNDERLINE}http://localhost:8080/ui/clusters/local/all-topics/cdc-mutations/messages${RESET}\n";
+fi
+echo ""
+printf "  ${BOLD}To stop:${RESET}       ${CYAN}\$ ./scripts/stop.sh${RESET}\n"
+printf "  ${BOLD}To wipe data:${RESET}  ${CYAN}\$ ./scripts/stop.sh 
--clean${RESET}\n"
+echo ""
diff --git a/docker/cdc-demo/scripts/stop.sh b/docker/cdc-demo/scripts/stop.sh
new file mode 100755
index 00000000..af6ed269
--- /dev/null
+++ b/docker/cdc-demo/scripts/stop.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Stops the CDC demo stack.
+#
+# Usage (from anywhere in the repo):
+#   ./scripts/stop.sh           # stop containers, keep volumes (data 
preserved)
+#   ./scripts/stop.sh --clean   # stop containers AND delete volumes (full 
wipe)
+set -euo pipefail
+
+BOLD='\033[1m'
+YELLOW='\033[0;33m'
+GREEN='\033[0;32m'
+RESET='\033[0m'
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+DEMO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+CLEAN=false
+
+for arg in "$@"; do
+    case "$arg" in
+        --clean) CLEAN=true ;;
+        *) echo "Unknown argument: $arg" >&2; exit 1 ;;
+    esac
+done
+
+cd "$DEMO_DIR"
+
+if $CLEAN; then
+    printf "${YELLOW}${BOLD}Stopping stack and wiping all volumes...${RESET}\n"
+    docker compose down -v --remove-orphans
+    printf "${GREEN}Stack stopped. All data volumes removed.${RESET}\n"
+else
+    printf "${YELLOW}${BOLD}Stopping stack (volumes preserved)...${RESET}\n"
+    docker compose down --remove-orphans
+    printf "${GREEN}Stack stopped. Run with --clean to also remove data 
volumes.${RESET}\n"
+fi
diff --git a/server/build.gradle b/server/build.gradle
index ba71370c..b2e1c59b 100644
--- a/server/build.gradle
+++ b/server/build.gradle
@@ -159,6 +159,9 @@ dependencies {
     implementation 
"org.apache.kafka:kafka-clients:${project.kafkaClientVersion}"
     implementation "com.esotericsoftware:kryo-shaded:${kryoVersion}"
 
+    // Confluent Avro serializer — used when 
value.serializer=KafkaAvroSerializer (confluent mode)
+    implementation 'io.confluent:kafka-avro-serializer:7.6.0'
+
     // OSHI core library for fetching system information
     implementation("com.github.oshi:oshi-core:${oshiVersion}")
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to