This is an automated email from the ASF dual-hosted git repository.
jyothsnakonisa pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra-sidecar.git
The following commit(s) were added to refs/heads/trunk by this push:
new 000a05ec CASSSIDECAR-419 : Add Docker compose setup for CDC (#330)
000a05ec is described below
commit 000a05ecc0610e1336a01811402593e2f977a629
Author: Jyothsna konisa <[email protected]>
AuthorDate: Tue May 26 15:36:26 2026 -0700
CASSSIDECAR-419 : Add Docker compose setup for CDC (#330)
Patch by Jyothsna Konisa; Reviewed by Josh McKenzie for CASSSIDECAR-419
---
.gitignore | 3 +
CHANGES.txt | 1 +
build.gradle | 2 +
docker/cdc-demo/Dockerfile.sidecar | 38 ++++
docker/cdc-demo/README.md | 290 ++++++++++++++++++++++++
docker/cdc-demo/conf/sidecar.yaml | 78 +++++++
docker/cdc-demo/docker-compose.yml | 186 +++++++++++++++
docker/cdc-demo/scripts/cassandra-entrypoint.sh | 49 ++++
docker/cdc-demo/scripts/init-cdc-schema.sh | 44 ++++
docker/cdc-demo/scripts/seed-cdc-configs.sh | 81 +++++++
docker/cdc-demo/scripts/start.sh | 147 ++++++++++++
docker/cdc-demo/scripts/stop.sh | 52 +++++
server/build.gradle | 3 +
13 files changed, 974 insertions(+)
diff --git a/.gitignore b/.gitignore
index 8bdfcd61..dd88239f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -92,6 +92,9 @@ agents
bin
conf
lib
+# Exception: docker demo config files are tracked
+!docker/cdc-demo/conf/
+!docker/cdc-demo/conf/**
# Local gradle cache
.gradle
diff --git a/CHANGES.txt b/CHANGES.txt
index 54ee2d2b..782c4104 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,6 @@
0.4.0
-----
+ * Add Docker Compose setup for local CDC demo (Cassandra → Sidecar → Kafka)
(CASSSIDECAR-419)
* Scope all CDC dependencies exclusively to CdcModule (CASSSIDECAR-447)
* Add ConfigurationProvider interfaces for pluggable overlay storage
(CASSSIDECAR-424)
* Refactor OperationalJob to have data separate from execution logic
(CASSSIDECAR-460)
diff --git a/build.gradle b/build.gradle
index bf5ff20d..910a98e6 100644
--- a/build.gradle
+++ b/build.gradle
@@ -142,6 +142,8 @@ allprojects {
// for dtest jar
mavenLocal()
+ // Confluent Schema Registry artifacts (kafka-avro-serializer)
+ maven { url "https://packages.confluent.io/maven/" }
}
checkstyle {
diff --git a/docker/cdc-demo/Dockerfile.sidecar
b/docker/cdc-demo/Dockerfile.sidecar
new file mode 100644
index 00000000..3c5b86f2
--- /dev/null
+++ b/docker/cdc-demo/Dockerfile.sidecar
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Single-stage build: copies the pre-built distribution assembled on the host
+# by start.sh (./gradlew installDist). Keep Gradle off the critical path of
+# every docker build — the Gradle daemon on the host stays warm between runs.
+#
+# Usage (called automatically by start.sh):
+# ./gradlew installDist -x test
+# docker build -f docker/cdc-demo/Dockerfile.sidecar -t
cassandra-sidecar:dev .
+
+FROM eclipse-temurin:11-jre-jammy
+
+WORKDIR /app
+
+# Distribution assembled by ./gradlew installDist
+COPY build/install/apache-cassandra-sidecar/ ./
+
+# Bake demo config; docker-compose overrides at runtime via volume mount.
+COPY docker/cdc-demo/conf/sidecar.yaml /app/conf/sidecar.yaml
+
+EXPOSE 9043
+
+ENTRYPOINT ["/app/bin/cassandra-sidecar"]
diff --git a/docker/cdc-demo/README.md b/docker/cdc-demo/README.md
new file mode 100644
index 00000000..ec356912
--- /dev/null
+++ b/docker/cdc-demo/README.md
@@ -0,0 +1,290 @@
+<!--
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+-->
+# CDC Demo — Docker Compose Setup
+
+End-to-end demo that boots Cassandra, Cassandra Sidecar, Kafka, and Confluent
+Schema Registry. Writes to a CDC-enabled Cassandra table are captured by the
+sidecar, serialized as Avro (with schemas registered in Schema Registry), and
+published to a Kafka topic.
+
+## Architecture
+
+```
+┌──────────────┐ cdc_raw/commitlog ┌──────────────────┐
+│ Cassandra │ ─────────────────────►│ Cassandra │──► Kafka topic
+│ (port 9042) │ (shared volume) │ Sidecar │ (cdc-mutations)
+└──────────────┘ │ (port 9043) │
+ └──────────────────┘
+ │ KafkaAvroSerializer
+ ▼
+ ┌──────────────────┐
+ │ Schema Registry │
+ │ (port 8081) │
+ └──────────────────┘
+
+ ┌──────────────────┐
+ │ Kafka UI │
+ │ (port 8080) │
+ └──────────────────┘
+```
+
+**Services:**
+| Service | Image | Role |
+|---|---|---|
+| `kafka` | `confluentinc/cp-kafka:7.6.0` | KRaft broker (no ZooKeeper) |
+| `schema-registry` | `confluentinc/cp-schema-registry:7.6.0` | Avro schema
store |
+| `cassandra` | `cassandra:5.0` | CDC-enabled Cassandra node |
+| `cassandra-init` | `cassandra:5.0` | One-shot: seeds sidecar schema +
configs |
+| `sidecar` | `cassandra-sidecar:dev` | Reads commit logs, publishes to Kafka |
+| `kafka-ui` | `ghcr.io/kafbat/kafka-ui:v1.5.0` | Browse topics + decoded Avro
messages |
+
+## Prerequisites
+
+| Tool | Version | Notes |
+|---|---|---|
+| Docker | 24+ | |
+| Docker Compose | v2 (plugin) | |
+| Java | 11 | Required on host for `./gradlew installDist` |
+| Gradle | via wrapper | No installation needed — `./gradlew` is
self-contained |
+
+## Exposed ports
+
+| Port | Service |
+|---|---|
+| `9042` | Cassandra CQL |
+| `9043` | Cassandra Sidecar |
+| `8080` | Kafka UI |
+| `8081` | Confluent Schema Registry |
+
+## Serializer modes
+
+| Mode | `value.serializer` | Schema storage |
+|---|---|---|
+| `confluent` *(default)* | `KafkaAvroSerializer` | Confluent Schema Registry
(port 8081) |
+| `bytearray` | `ByteArraySerializer` | None — raw Avro bytes, no schema
registry lookup |
+
+## Quick Start
+
+### Step 1 — Start the stack
+
+From `docker/cdc-demo/`, run the start script. It builds the sidecar
+distribution on the host, packages it into a Docker image, and starts all
+services:
+
+```bash
+cd docker/cdc-demo
+./scripts/start.sh
+```
+
+The script handles everything in order:
+1. Stops any existing stack
+2. Runs `./gradlew installDist` on the host
+3. Builds the `cassandra-sidecar:dev` Docker image
+4. Starts all services and waits for CDC iterators to be ready
+
+**Common flags:**
+
+```bash
+./scripts/start.sh --clean # wipe all data volumes before starting
+./scripts/start.sh --skip-build # reuse existing cassandra-sidecar:dev image
(skip steps 2-3)
+./scripts/start.sh --bytearray # use ByteArraySerializer instead of
Confluent Avro
+```
+
+> **`--skip-build`** is useful when you've only changed a config file or script
+> and don't need to recompile Java. Requires a `cassandra-sidecar:dev` image
+> from a prior run.
+
+### Step 2 — Wait for CDC to be ready
+
+`start.sh` automatically waits until the sidecar is up and CDC iterators have
+started, then prints a **Setup complete** banner with next steps.
+
+To follow progress in another terminal:
+
+```bash
+docker compose logs -f cassandra-init sidecar
+```
+
+### Step 3 — Write mutations to the CDC-enabled table
+
+```bash
+docker exec -it cdc-demo-cassandra-1 cqlsh -e "
+ INSERT INTO cdc_demo.events (id, msg, ts)
+ VALUES (uuid(), 'hello from CDC', toTimestamp(now()));
+"
+```
+
+### Step 4 — View messages in Kafka UI
+
+Open the topic in the Kafka UI:
+
+```
+http://localhost:8080/ui/clusters/local/all-topics/cdc-mutations/messages
+```
+
+**Confluent mode (default):** kafbat is pre-configured with the Schema
Registry URL (`http://schema-registry:8081`).
+To see human-readable messages, set the serde dropdowns at the top of the
+Messages tab:
+
+| Field | Serde to select | Why |
+|---|---|---|
+| **Key Serde** | `String` | CDC keys are plain UTF-8 strings
(`keyspace:table:pk`) |
+| **Value Serde** | `SchemaRegistry` | Values are Confluent Avro — kafbat
fetches the schema by the embedded ID and renders the payload as JSON |
+
+Once set, each message value displays as a decoded JSON object matching the
+CDC-enabled table's schema, for example:
+
+```json
+{
+ "operationType": "INSERT",
+ "timestampMicros": 1746000000000000,
+ "sourceKeyspace": "cdc_demo",
+ "sourceTable": "events",
+ "isPartial": false,
+ "payload": {
+ "id": "550e8400-e29b-41d4-a716-446655440000",
+ "msg": "hello from CDC",
+ "ts": 1746000000000000
+ }
+}
+```
+
+**Bytearray mode:** values are raw Avro bytes with no schema registry lookup.
Set
+**Value Serde** to `Bytes` to inspect the raw payload.
+
+### Step 5 — Inspect the registered Avro schema
+
+The sidecar auto-registers one Avro schema per CDC-enabled table on first
publish.
+`KafkaAvroSerializer` uses the subject naming convention `{topic}-value`, so
for
+the `cdc-mutations` topic the subject is `cdc-mutations-value`.
+
+Open the Kafka UI and navigate to the **Schema Registry** tab to browse the
full
+Avro schema:
+
+```
+http://localhost:8080/ui/clusters/local/schemas/cdc-mutations-value
+```
+
+## Supported Cassandra Versions
+
+CDC is supported for **4.0, 4.1, 5.0, 5.1**. To use a different version:
+
+```bash
+CASSANDRA_VERSION=4.1 ./scripts/start.sh
+```
+
+The default is `5.0`. Note: the `cassandra:4.0` Docker image is `linux/amd64`
+only — on Apple Silicon it runs under Rosetta emulation and may be slow to
+start. Use `4.1` or later for ARM64 support.
+
+## Configuration
+
+`conf/sidecar.yaml` is volume-mounted into the sidecar container and can be
+edited without rebuilding the image. Restart the sidecar to pick up changes:
+
+```bash
+docker compose restart sidecar
+```
+
+CDC and Kafka properties are stored in Cassandra and seeded automatically on
+first boot by `scripts/seed-cdc-configs.sh`. To update them on a running
cluster:
+
+```bash
+docker exec -it cdc-demo-cassandra-1 cqlsh -e "
+ UPDATE sidecar_internal.configs
+ SET config = config + {'micro_batch_delay_millis': '500'}
+ WHERE service = 'cdc';
+"
+```
+
+To switch serializer mode on a running cluster, delete the existing kafka
config
+row and restart the stack:
+
+```bash
+docker exec -it cdc-demo-cassandra-1 cqlsh -e "DELETE FROM
sidecar_internal.configs WHERE service = 'kafka';"
+./scripts/start.sh --bytearray --skip-build
+```
+
+## Persistence
+
+All data is stored in named Docker volumes and survives `docker compose down`.
+
+| Volume | Contents |
+|---|---|
+| `cassandra-varlib` | Cassandra data, commitlog, cdc_raw |
+| `kafka-data` | Topic partitions + consumer offsets |
+
+## Stopping
+
+```bash
+./scripts/stop.sh # stop containers, keep volumes (data preserved)
+./scripts/stop.sh --clean # stop containers AND delete all data volumes
+```
+
+## Troubleshooting
+
+**Sidecar keeps restarting**
+
+The sidecar waits for `cassandra-init` to complete. Check its logs:
+
+```bash
+docker compose logs cassandra-init
+```
+
+**CDC events not arriving in Kafka**
+
+1. Verify configs were seeded:
+ ```bash
+ docker exec cdc-demo-cassandra-1 cqlsh -e "SELECT * FROM
sidecar_internal.configs;"
+ ```
+2. Check sidecar logs for `CDC iterators started successfully`
+3. Confirm CDC is enabled on the table:
+ ```bash
+ docker exec cdc-demo-cassandra-1 cqlsh -e "DESCRIBE TABLE cdc_demo.events;"
+ ```
+
+**Schema Registry connection errors (confluent mode)**
+
+Verify the registry is reachable and schemas are registered:
+
+```bash
+curl -s http://localhost:8081/subjects
+docker compose logs schema-registry
+```
+
+If `seed-cdc-configs.sh` ran before the schema registry was healthy, the kafka
+config row may be missing. Delete and re-run:
+
+```bash
+docker exec -it cdc-demo-cassandra-1 cqlsh -e "DELETE FROM
sidecar_internal.configs WHERE service = 'kafka';"
+docker compose run --rm cassandra-init
+```
+
+**JMX connection refused**
+
+Remote JMX is enabled by the `LOCAL_JMX=no` env var on the Cassandra service,
+which causes the stock Cassandra Docker entrypoint to set
+`jmxremote.local.only=false`. The `JVM_EXTRA_OPTS` env var additionally sets
+`-Djava.rmi.server.hostname=cassandra` so RMI binds to the right interface.
+Verify the flags are active:
+
+```bash
+docker exec cdc-demo-cassandra-1 ps aux | grep jmxremote
+```
diff --git a/docker/cdc-demo/conf/sidecar.yaml
b/docker/cdc-demo/conf/sidecar.yaml
new file mode 100644
index 00000000..3886eb7b
--- /dev/null
+++ b/docker/cdc-demo/conf/sidecar.yaml
@@ -0,0 +1,78 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+cassandra_instances:
+ - id: 1
+ host: cassandra
+ port: 9042
+ storage_dir: /var/lib/cassandra
+ cdc_dir: /var/lib/cassandra/cdc_raw
+ commitlog_dir: /var/lib/cassandra/commitlog
+ staging_dir: /var/lib/cassandra/sstable-staging
+ jmx_host: 127.0.0.1
+ jmx_port: 7199
+ jmx_ssl_enabled: false # demo only — enable TLS in production
+
+sidecar:
+ host: 0.0.0.0
+ port: 9043
+ cdc:
+ enabled: true
+ segment_hardlink_cache_expiry: 5m
+ table_schema_refresh_time: 5s
+ config_refresh_time: 5s
+ worker_pools:
+ service:
+ name: "sidecar-worker-pool"
+ size: 20
+ max_execution_time: 1m
+ internal:
+ name: "sidecar-internal-worker-pool"
+ size: 20
+ max_execution_time: 15m
+ jmx:
+ max_retries: 10
+ retry_delay: 3s
+ # Required: sidecar creates sidecar_internal keyspace and configs/cdc_states
tables
+ schema:
+ is_enabled: true
+ keyspace: sidecar_internal
+ replication_strategy: NetworkTopologyStrategy
+ replication_factor: 1
+ lease_schema_ttl: 5m
+ coordination:
+ cluster_lease_claim:
+ electorate_membership_strategy:
MostReplicatedKeyspaceTokenZeroElectorateMembership
+ enabled: true
+ initial_delay: 1s
+ initial_delay_random_delta: 5s
+ execute_interval: 100s
+
+driver_parameters:
+ contact_points:
+ - "cassandra:9042"
+ num_connections: 6
+ local_dc: datacenter1
+
+healthcheck:
+ initial_delay: 0ms
+ execute_interval: 30s
+
+cluster_topology_monitor:
+ enabled: true
+ initial_delay: 0s
+ execute_interval: 1000ms
diff --git a/docker/cdc-demo/docker-compose.yml
b/docker/cdc-demo/docker-compose.yml
new file mode 100644
index 00000000..9f76f2b1
--- /dev/null
+++ b/docker/cdc-demo/docker-compose.yml
@@ -0,0 +1,186 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Cassandra CDC demo stack
+#
+# Startup order:
+# kafka ──► schema-registry
+# cassandra ──► cassandra-init (seeds schema + configs, then exits)
+# └──► sidecar
+# kafka-ui
+#
+# Wipe all data: docker compose down -v (or: ./scripts/stop.sh --clean)
+
+services:
+
+ # ── Kafka (KRaft — no ZooKeeper)
────────────────────────────────────────────
+ kafka:
+ image: confluentinc/cp-kafka:7.6.0
+ networks:
+ - demo-network
+ environment:
+ KAFKA_NODE_ID: 1
+ KAFKA_PROCESS_ROLES: broker,controller
+ KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092,CONTROLLER://0.0.0.0:9093
+ KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
+ KAFKA_LISTENER_SECURITY_PROTOCOL_MAP:
PLAINTEXT:PLAINTEXT,CONTROLLER:PLAINTEXT
+ KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
+ KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
+ KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka:9093
+ KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
+ KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
+ # Must not change across restarts — required for volume persistence.
+ CLUSTER_ID: MkU3OEVBNTcwNTJENDM2Qk
+ volumes:
+ - kafka-data:/var/lib/kafka/data
+ healthcheck:
+ test: ["CMD", "kafka-topics", "--bootstrap-server", "kafka:9092",
"--list"]
+ interval: 10s
+ timeout: 10s
+ retries: 20
+
+ # ── Confluent Schema Registry
────────────────────────────────────────────────
+ # Stores and serves Avro schemas; the sidecar's KafkaAvroSerializer registers
+ # table schemas here on first publish. Exposed on host port 8081.
+ schema-registry:
+ image: confluentinc/cp-schema-registry:7.6.0
+ depends_on:
+ kafka:
+ condition: service_healthy
+ networks:
+ - demo-network
+ environment:
+ SCHEMA_REGISTRY_HOST_NAME: schema-registry
+ SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: kafka:9092
+ SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081
+ ports:
+ - "8081:8081"
+ healthcheck:
+ test: ["CMD-SHELL", "curl -sf http://localhost:8081/subjects || exit 1"]
+ interval: 10s
+ timeout: 10s
+ retries: 20
+
+ # ── Cassandra
───────────────────────────────────────────────────────────────
+ cassandra:
+ image: cassandra:${CASSANDRA_VERSION:-5.0}
+ # Patches CDC settings into cassandra.yaml, then delegates to the stock
entrypoint.
+ entrypoint: ["/bin/bash", "/scripts/cassandra-entrypoint.sh"]
+ command: ["cassandra", "-f"]
+ networks:
+ - demo-network
+ environment:
+ CASSANDRA_CLUSTER_NAME: "CDC Demo Cluster"
+ CASSANDRA_DC: datacenter1
+ CASSANDRA_RACK: rack1
+ CASSANDRA_ENDPOINT_SNITCH: GossipingPropertyFileSnitch
+ # Required: lets SidecarLoadBalancingPolicy match this node by hostname.
+ CASSANDRA_BROADCAST_RPC_ADDRESS: cassandra
+ # Enables remote JMX so the sidecar can connect.
+ LOCAL_JMX: "no"
+ JVM_EXTRA_OPTS: "-Djava.rmi.server.hostname=cassandra
-Dcom.sun.management.jmxremote.authenticate=false
-Dcom.sun.management.jmxremote.ssl=false"
+ MAX_HEAP_SIZE: "512M"
+ HEAP_NEWSIZE: "128M"
+ volumes:
+ - ./scripts:/scripts:ro
+ # commitlog/ and cdc_raw/ must share a filesystem for CDC hard-links.
+ - cassandra-varlib:/var/lib/cassandra
+ ports:
+ - "9042:9042"
+ # 9043 is published here because the sidecar shares this container's
network namespace.
+ - "9043:9043"
+ healthcheck:
+ test: ["CMD-SHELL", "cqlsh -e 'SELECT now() FROM system.local'
2>/dev/null || exit 1"]
+ interval: 10s
+ timeout: 10s
+ retries: 20
+ start_period: 60s
+
+ # ── Cassandra init (one-shot)
───────────────────────────────────────────────
+ # Seeds sidecar_internal schema and CDC/Kafka configs after Cassandra and
+ # Schema Registry are both healthy.
+ cassandra-init:
+ image: cassandra:${CASSANDRA_VERSION:-5.0}
+ depends_on:
+ cassandra:
+ condition: service_healthy
+ schema-registry:
+ condition: service_healthy
+ networks:
+ - demo-network
+ entrypoint: ["/bin/bash", "-c",
+ "bash /scripts/init-cdc-schema.sh && bash /scripts/seed-cdc-configs.sh"]
+ environment:
+ CASSANDRA_HOST: cassandra
+ KAFKA_BOOTSTRAP_SERVERS: kafka:9092
+ CDC_TOPIC: cdc-mutations
+ CDC_DATACENTER: datacenter1
+ # Set by start.sh: confluent | bytearray
+ SERIALIZER_MODE: ${SERIALIZER_MODE:-confluent}
+ SCHEMA_REGISTRY_URL: http://schema-registry:8081
+ volumes:
+ - ./scripts:/scripts:ro
+ restart: "no"
+
+ # ── Kafka UI
─────────────────────────────────────────────────────────────────
+ # Browse topics and Avro-decoded messages at http://localhost:8080
+ # kafbat/kafka-ui is the actively maintained fork of provectuslabs/kafka-ui.
+ kafka-ui:
+ image: ghcr.io/kafbat/kafka-ui:v1.5.0
+ depends_on:
+ kafka:
+ condition: service_healthy
+ networks:
+ - demo-network
+ environment:
+ KAFKA_CLUSTERS_0_NAME: local
+ KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:9092
+ KAFKA_CLUSTERS_0_SCHEMAREGISTRY: http://schema-registry:8081
+ ports:
+ - "8080:8080"
+
+ # ── Sidecar
─────────────────────────────────────────────────────────────────
+ # Shares Cassandra's network namespace — sidecar is reachable at
cassandra:9043.
+ sidecar:
+ image: cassandra-sidecar:dev
+ build:
+ context: ../../
+ dockerfile: docker/cdc-demo/Dockerfile.sidecar
+ depends_on:
+ cassandra-init:
+ condition: service_completed_successfully
+ network_mode: "service:cassandra"
+ volumes:
+ # Overrides the baked-in config; edit without rebuilding the image.
+ - ./conf/sidecar.yaml:/app/conf/sidecar.yaml:ro
+ # Shared with Cassandra so the sidecar can read cdc_raw/ and commitlog/.
+ - cassandra-varlib:/var/lib/cassandra
+ healthcheck:
+ test: ["CMD-SHELL", "curl -sf http://localhost:9043/api/v1/__health ||
exit 1"]
+ interval: 10s
+ timeout: 5s
+ retries: 20
+ start_period: 30s
+
+volumes:
+ kafka-data:
+ # commitlog/ and cdc_raw/ must share a filesystem for CDC hard-links.
+ cassandra-varlib:
+
+networks:
+ demo-network:
+ driver: bridge
diff --git a/docker/cdc-demo/scripts/cassandra-entrypoint.sh
b/docker/cdc-demo/scripts/cassandra-entrypoint.sh
new file mode 100644
index 00000000..b10dbf21
--- /dev/null
+++ b/docker/cdc-demo/scripts/cassandra-entrypoint.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Patches CDC settings into the stock cassandra.yaml,
+# then hands off to the original Docker entrypoint.
+set -euo pipefail
+
+YAML="/etc/cassandra/cassandra.yaml"
+
+patch_yaml() {
+ local key="$1" value="$2"
+ if grep -q "^${key}:" "$YAML"; then
+ sed -i "s|^${key}:.*|${key}: ${value}|" "$YAML"
+ elif grep -q "^# *${key}:" "$YAML"; then
+ sed -i "s|^# *${key}:.*|${key}: ${value}|" "$YAML"
+ else
+ echo "${key}: ${value}" >> "$YAML"
+ fi
+}
+
+# commitlog and cdc_raw must share the same filesystem for CDC hard-links.
+patch_yaml "commitlog_directory" "/var/lib/cassandra/commitlog"
+patch_yaml "cdc_enabled" "true"
+patch_yaml "cdc_raw_directory" "/var/lib/cassandra/cdc_raw"
+patch_yaml "cdc_on_repair_enabled" "false"
+
+# Cassandra 4.x uses cdc_total_space_in_mb; 5.x uses cdc_total_space.
+if grep -q "cdc_total_space_in_mb" "$YAML"; then
+ patch_yaml "cdc_total_space_in_mb" "4096"
+else
+ patch_yaml "cdc_total_space" "4096MiB"
+fi
+
+exec /usr/local/bin/docker-entrypoint.sh "$@"
diff --git a/docker/cdc-demo/scripts/init-cdc-schema.sh
b/docker/cdc-demo/scripts/init-cdc-schema.sh
new file mode 100644
index 00000000..2100d5cc
--- /dev/null
+++ b/docker/cdc-demo/scripts/init-cdc-schema.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Creates the sidecar_internal schema and CDC demo keyspace/table.
+set -euo pipefail
+
+CASSANDRA_HOST=${CASSANDRA_HOST:-cassandra}
+
+cqlsh "${CASSANDRA_HOST}" <<'CQL'
+CREATE KEYSPACE IF NOT EXISTS sidecar_internal
+ WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 1};
+
+CREATE TABLE IF NOT EXISTS sidecar_internal.configs (
+ service text,
+ config map<text, text>,
+ PRIMARY KEY (service)
+);
+
+CREATE KEYSPACE IF NOT EXISTS cdc_demo
+ WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 1};
+
+CREATE TABLE IF NOT EXISTS cdc_demo.events (
+ id uuid PRIMARY KEY,
+ msg text,
+ ts timestamp
+) WITH cdc = true;
+CQL
+
+echo "Schema initialised."
diff --git a/docker/cdc-demo/scripts/seed-cdc-configs.sh
b/docker/cdc-demo/scripts/seed-cdc-configs.sh
new file mode 100644
index 00000000..5e497d9d
--- /dev/null
+++ b/docker/cdc-demo/scripts/seed-cdc-configs.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Seeds CDC and Kafka configuration into sidecar_internal.configs.
+# IF NOT EXISTS makes each insert idempotent across restarts.
+#
+# Environment variables:
+# SERIALIZER_MODE confluent (default) | bytearray
+# SCHEMA_REGISTRY_URL http://schema-registry:8081 (default)
+set -euo pipefail
+
+CASSANDRA_HOST=${CASSANDRA_HOST:-cassandra}
+KAFKA_BOOTSTRAP=${KAFKA_BOOTSTRAP_SERVERS:-kafka:9092}
+CDC_TOPIC=${CDC_TOPIC:-cdc-mutations}
+CDC_DATACENTER=${CDC_DATACENTER:-datacenter1}
+SERIALIZER_MODE=${SERIALIZER_MODE:-confluent}
+SCHEMA_REGISTRY_URL=${SCHEMA_REGISTRY_URL:-http://schema-registry:8081}
+
+echo "Seeding CDC configs (serializer-mode: ${SERIALIZER_MODE})..."
+
+cqlsh "${CASSANDRA_HOST}" <<CQL
+INSERT INTO sidecar_internal.configs (service, config)
+VALUES ('cdc', {
+ 'cdc_enabled': 'true',
+ 'topic': '${CDC_TOPIC}',
+ 'jobid': 'docker-demo-job',
+ 'datacenter': '${CDC_DATACENTER}',
+ 'watermark_seconds': '3600',
+ 'micro_batch_delay_millis': '1000',
+ 'max_commit_logs': '4',
+ 'persist_state': 'true',
+ 'fail_kafka_errors': 'true',
+ 'fail_kafka_too_large_errors': 'false'
+}) IF NOT EXISTS;
+CQL
+
+if [ "${SERIALIZER_MODE}" = "confluent" ]; then
+ cqlsh "${CASSANDRA_HOST}" <<CQL
+INSERT INTO sidecar_internal.configs (service, config)
+VALUES ('kafka', {
+ 'bootstrap.servers': '${KAFKA_BOOTSTRAP}',
+ 'key.serializer':
'org.apache.kafka.common.serialization.StringSerializer',
+ 'value.serializer': 'io.confluent.kafka.serializers.KafkaAvroSerializer',
+ 'schema.registry.url': '${SCHEMA_REGISTRY_URL}',
+ 'acks': 'all',
+ 'retries': '3',
+ 'linger.ms': '5',
+ 'batch.size': '16384'
+}) IF NOT EXISTS;
+CQL
+else
+ cqlsh "${CASSANDRA_HOST}" <<CQL
+INSERT INTO sidecar_internal.configs (service, config)
+VALUES ('kafka', {
+ 'bootstrap.servers': '${KAFKA_BOOTSTRAP}',
+ 'key.serializer':
'org.apache.kafka.common.serialization.StringSerializer',
+ 'value.serializer':
'org.apache.kafka.common.serialization.ByteArraySerializer',
+ 'acks': 'all',
+ 'retries': '3',
+ 'linger.ms': '5',
+ 'batch.size': '16384'
+}) IF NOT EXISTS;
+CQL
+fi
+
+echo "Configs seeded."
diff --git a/docker/cdc-demo/scripts/start.sh b/docker/cdc-demo/scripts/start.sh
new file mode 100755
index 00000000..46884447
--- /dev/null
+++ b/docker/cdc-demo/scripts/start.sh
@@ -0,0 +1,147 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Builds the sidecar and starts the CDC demo stack.
+#
+# Usage (from anywhere in the repo):
+# ./scripts/start.sh # build + start in confluent mode
(default)
+# ./scripts/start.sh --bytearray # build + start in bytearray mode
+# ./scripts/start.sh --clean # wipe all data volumes before
starting
+# ./scripts/start.sh --skip-build # reuse existing
cassandra-sidecar:dev image
+# ./scripts/start.sh --clean --skip-build
+set -euo pipefail
+
+# ANSI color codes
+BOLD='\033[1m'
+GREEN='\033[0;32m'
+CYAN='\033[0;36m'
+YELLOW='\033[0;33m'
+UNDERLINE='\033[4m'
+RESET='\033[0m'
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+DEMO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
+CLEAN=false
+SKIP_BUILD=false
+SERIALIZER_MODE=confluent
+
+usage() {
+ printf "Usage: %s [OPTIONS]\n\n" "$(basename "$0")"
+ printf "Builds the sidecar and starts the CDC demo stack.\n\n"
+ printf "Options:\n"
+ printf " --confluent Use Confluent Avro serializer (default)\n"
+ printf " --bytearray Use byte-array serializer\n"
+ printf " --clean Wipe all data volumes before starting\n"
+ printf " --skip-build Reuse existing cassandra-sidecar:dev image\n"
+ printf " --help Show this help message\n"
+}
+
+for arg in "$@"; do
+ case "$arg" in
+ --clean) CLEAN=true ;;
+ --skip-build) SKIP_BUILD=true ;;
+ --confluent) SERIALIZER_MODE=confluent ;;
+ --bytearray) SERIALIZER_MODE=bytearray ;;
+ --help) usage; exit 0 ;;
+ *) echo "Unknown argument: $arg" >&2; exit 1 ;;
+ esac
+done
+
+# ── Stop existing stack
───────────────────────────────────────────────────────
+if $CLEAN; then
+ bash "$SCRIPT_DIR/stop.sh" --clean
+else
+ bash "$SCRIPT_DIR/stop.sh"
+fi
+
+# ── Build
─────────────────────────────────────────────────────────────────────
+if $SKIP_BUILD; then
+ if ! docker image inspect cassandra-sidecar:dev > /dev/null 2>&1; then
+ echo "ERROR: --skip-build specified but cassandra-sidecar:dev image
not found." >&2
+ echo " Run without --skip-build to build the image first." >&2
+ exit 1
+ fi
+ printf "${YELLOW}Skipping build — reusing existing cassandra-sidecar:dev
image.${RESET}\n"
+else
+ printf "${BOLD}==> Building sidecar distribution (./gradlew
installDist)...${RESET}\n"
+ "$REPO_ROOT/gradlew" -p "$REPO_ROOT" installDist \
+ -x test -x integrationTest -x containerTest \
+ --parallel --quiet
+
+ printf "${BOLD}==> Building sidecar Docker image...${RESET}\n"
+ DOCKER_BUILDKIT=1 docker build \
+ -f "$REPO_ROOT/docker/cdc-demo/Dockerfile.sidecar" \
+ -t cassandra-sidecar:dev \
+ "$REPO_ROOT"
+fi
+
+# ── Start stack
───────────────────────────────────────────────────────────────
+printf "${BOLD}==> Starting stack (serializer-mode:
${SERIALIZER_MODE})...${RESET}\n"
+cd "$DEMO_DIR"
+export SERIALIZER_MODE
+docker compose up -d
+
+# ── Wait for sidecar ─────────────────────────────────────────────────────────
+echo ""
+echo "Waiting for sidecar to be ready (follow progress: docker compose logs -f
cassandra-init sidecar)..."
+until curl -sf http://localhost:9043/api/v1/__health > /dev/null 2>&1; do
+ sleep 5
+done
+
+echo "Sidecar is up. Waiting for CDC iterators to start..."
+CDC_TIMEOUT=360
+docker compose logs -f sidecar 2>&1 | grep -m 1 "CDC iterators started
successfully" > /dev/null &
+LOG_PID=$!
+ELAPSED=0
+while kill -0 "$LOG_PID" 2>/dev/null; do
+ if [ "$ELAPSED" -ge "$CDC_TIMEOUT" ]; then
+ kill "$LOG_PID" 2>/dev/null || true
+ echo "Warning: timed out after ${CDC_TIMEOUT}s waiting for CDC
iterators — check: docker compose logs sidecar"
+ break
+ fi
+ sleep 5
+ ELAPSED=$((ELAPSED + 5))
+done
+
+# ── Success banner
────────────────────────────────────────────────────────────
+echo ""
+printf
"${GREEN}${BOLD}╔══════════════════════════════════════════════════════════╗${RESET}\n"
+printf "${GREEN}${BOLD}║ Setup complete. CDC pipeline is running.
║${RESET}\n"
+printf
"${GREEN}${BOLD}╚══════════════════════════════════════════════════════════╝${RESET}\n"
+echo ""
+printf " ${BOLD}Serializer mode:${RESET} ${SERIALIZER_MODE}\n"
+echo ""
+printf " ${BOLD}Step 1 — Insert a test mutation:${RESET}\n"
+printf " ${CYAN}\$ docker exec -it cdc-demo-cassandra-1 cqlsh -e \"INSERT
INTO cdc_demo.events (id, msg, ts) VALUES (uuid(), 'hello',
toTimestamp(now()));\"${RESET}\n"
+echo ""
+if [ "$SERIALIZER_MODE" = "confluent" ]; then
+ printf " ${BOLD}Step 2 — Inspect the registered Avro schema:${RESET}\n"
+ printf "
${UNDERLINE}http://localhost:8080/ui/clusters/local/schemas/cdc-mutations-value${RESET}\n"
+ echo ""
+ printf " ${BOLD}Step 3 — View decoded messages in Kafka UI:${RESET}\n"
+ printf "
${UNDERLINE}http://localhost:8080/ui/clusters/local/all-topics/cdc-mutations/messages${RESET}\n"
+ printf " ${CYAN}(Set Key Serde → String, Value Serde → SchemaRegistry to
view decoded messages)${RESET}\n"
+else
+ printf " ${BOLD}Step 2 — View mutations in Kafka UI:${RESET}\n"
+ printf "
${UNDERLINE}http://localhost:8080/ui/clusters/local/all-topics/cdc-mutations/messages${RESET}\n"
+fi
+echo ""
+printf " ${BOLD}To stop:${RESET} ${CYAN}\$ ./scripts/stop.sh${RESET}\n"
+printf " ${BOLD}To wipe data:${RESET} ${CYAN}\$ ./scripts/stop.sh
--clean${RESET}\n"
+echo ""
diff --git a/docker/cdc-demo/scripts/stop.sh b/docker/cdc-demo/scripts/stop.sh
new file mode 100755
index 00000000..af6ed269
--- /dev/null
+++ b/docker/cdc-demo/scripts/stop.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Stops the CDC demo stack.
+#
+# Usage (from anywhere in the repo):
+# ./scripts/stop.sh # stop containers, keep volumes (data
preserved)
+# ./scripts/stop.sh --clean # stop containers AND delete volumes (full
wipe)
+set -euo pipefail
+
+BOLD='\033[1m'
+YELLOW='\033[0;33m'
+GREEN='\033[0;32m'
+RESET='\033[0m'
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+DEMO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+CLEAN=false
+
+for arg in "$@"; do
+ case "$arg" in
+ --clean) CLEAN=true ;;
+ *) echo "Unknown argument: $arg" >&2; exit 1 ;;
+ esac
+done
+
+cd "$DEMO_DIR"
+
+if $CLEAN; then
+ printf "${YELLOW}${BOLD}Stopping stack and wiping all volumes...${RESET}\n"
+ docker compose down -v --remove-orphans
+ printf "${GREEN}Stack stopped. All data volumes removed.${RESET}\n"
+else
+ printf "${YELLOW}${BOLD}Stopping stack (volumes preserved)...${RESET}\n"
+ docker compose down --remove-orphans
+ printf "${GREEN}Stack stopped. Run with --clean to also remove data
volumes.${RESET}\n"
+fi
diff --git a/server/build.gradle b/server/build.gradle
index ba71370c..b2e1c59b 100644
--- a/server/build.gradle
+++ b/server/build.gradle
@@ -159,6 +159,9 @@ dependencies {
implementation
"org.apache.kafka:kafka-clients:${project.kafkaClientVersion}"
implementation "com.esotericsoftware:kryo-shaded:${kryoVersion}"
+ // Confluent Avro serializer — used when
value.serializer=KafkaAvroSerializer (confluent mode)
+ implementation 'io.confluent:kafka-avro-serializer:7.6.0'
+
// OSHI core library for fetching system information
implementation("com.github.oshi:oshi-core:${oshiVersion}")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]