This is an automated email from the ASF dual-hosted git repository.
palashc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/phoenix-adapters.git
The following commit(s) were added to refs/heads/main by this push:
new 0bce23f PHOENIX-7868 : Docker setup for phoenix-adapters (#6)
0bce23f is described below
commit 0bce23f948a48180c64a7db290488e85053e9e61
Author: Palash Chauhan <[email protected]>
AuthorDate: Wed May 27 13:03:39 2026 -0700
PHOENIX-7868 : Docker setup for phoenix-adapters (#6)
Co-authored-by: Palash Chauhan
<[email protected]>
---
.dockerignore | 20 ++
README.md | 35 +++
docker/Dockerfile.hbase-phoenix | 63 +++++
docker/Dockerfile.phoenix-adapters | 97 ++++++++
docker/README.md | 335 +++++++++++++++++++++++++
docker/conf/hbase/hbase-env.sh | 19 ++
docker/conf/hbase/hbase-site.xml | 69 ++++++
docker/conf/phoenix-adapters/hbase-site.xml | 21 ++
docker/docker-compose.yml | 191 +++++++++++++++
docker/scripts/hbase-entrypoint.sh | 55 +++++
docker/scripts/phoenix-adapters-entrypoint.sh | 63 +++++
docker/scripts/smoke.sh | 336 ++++++++++++++++++++++++++
12 files changed, 1304 insertions(+)
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..5e8720b
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,20 @@
+**/target/
+**/logs/
+**/*.log
+**/*.log.*
+**/dynamodb-local-metadata.json
+**/heap-dumps/
+
+**/*.tar.gz
+**/*.tar.bz2
+**/*.zip
+
+.idea/
+.vscode/
+.cursor/
+.DS_Store
+
+.git/
+.gitignore
+
+docker/README.md
diff --git a/README.md b/README.md
index f48d461..300dfbb 100644
--- a/README.md
+++ b/README.md
@@ -67,6 +67,41 @@ The Phoenix DynamoDB REST service is fully compatible with
AWS SDKs. You can con
port 8842 with zk-quorum localhost:2181.
Alternative to `-z <zk-quorum>` is env variable `ZOO_KEEPER_QUORUM`.
+#### One-shot Docker setup (recommended for first-time users)
+
+Skip steps 1-2 above with the bundled Docker cluster. From a fresh clone:
+
+**Prerequisites:** Docker Desktop running; `jq` and `curl` on `PATH`
+(`brew install jq` on macOS).
+
+```bash
+# 1. Bring up the full stack at the versions pinned in pom.xml and BLOCK
+# until every container reports healthy (REST is ~30-60s on cold start).
+# First time: ~8-12 min total -- most of that is Maven downloading
+# ~1.5 GB of dependencies into the BuildKit cache mount. Subsequent
+# runs reuse the cache and rebuild in seconds.
+docker compose -f docker/docker-compose.yml up -d --build --wait
+
+# 2. Validate it works end-to-end (CRUD + UpdateItem + BatchWriteItem +
streams).
+bash docker/scripts/smoke.sh
+# -> "Result: 21 checks PASSED across 18 API calls"
+
+# 3. Use it. The DynamoDB-compatible endpoint is at http://localhost:8842 .
+# Point any AWS SDK at it (Java/Python/Node.js snippets in
+# phoenix-ddb-rest/README.md), or hit it with curl:
+curl -s -X POST http://localhost:8842/ \
+ -H 'Content-Type: application/x-amz-json-1.0' \
+ -H 'X-Amz-Target: DynamoDB_20120810.ListTables' -d '{}'
+
+# 4. Tear down when you're done.
+docker compose -f docker/docker-compose.yml down -v
+```
+
+See [`docker/README.md`](docker/README.md) for the full reference: port
+mappings, the developer inner loop for code changes, the smoke-test
+breakdown, troubleshooting, and how to run the REST server outside
+Docker against the dockerized cluster.
+
### Building Distribution Tarball
To build a distribution tarball that includes all components:
diff --git a/docker/Dockerfile.hbase-phoenix b/docker/Dockerfile.hbase-phoenix
new file mode 100644
index 0000000..d92e1aa
--- /dev/null
+++ b/docker/Dockerfile.hbase-phoenix
@@ -0,0 +1,63 @@
+# syntax=docker/dockerfile:1
+FROM eclipse-temurin:8-jdk-jammy
+
+ARG HBASE_VERSION=2.5.14
+ARG HBASE_FLAVOR=hadoop3
+ARG PHOENIX_HBASE_LINE=2.5
+ARG PHOENIX_VERSION=5.3.1
+
+ENV HBASE_VERSION=${HBASE_VERSION} \
+ HBASE_FLAVOR=${HBASE_FLAVOR} \
+ PHOENIX_HBASE_LINE=${PHOENIX_HBASE_LINE} \
+ PHOENIX_VERSION=${PHOENIX_VERSION} \
+ JAVA_HOME=/opt/java/openjdk \
+ HBASE_HOME=/opt/hbase \
+ HBASE_CONF_DIR=/opt/hbase/conf \
+ PHOENIX_HOME=/opt/phoenix \
+ HBASE_MANAGES_ZK=false \
+
PATH=/opt/hbase/bin:/opt/phoenix/bin:/opt/java/openjdk/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+
+RUN set -eux; \
+ apt-get update; \
+ apt-get install -y --no-install-recommends \
+ bash curl ca-certificates netcat-openbsd procps tini less; \
+ rm -rf /var/lib/apt/lists/*
+
+RUN set -eux; \
+ mkdir -p "${HBASE_HOME}"; \
+ curl -fSL --retry 5 --retry-delay 5 \
+
"https://archive.apache.org/dist/hbase/${HBASE_VERSION}/hbase-${HBASE_VERSION}-${HBASE_FLAVOR}-bin.tar.gz"
\
+ -o /tmp/hbase.tar.gz; \
+ tar -xzf /tmp/hbase.tar.gz -C "${HBASE_HOME}" --strip-components=1; \
+ rm /tmp/hbase.tar.gz; \
+ mkdir -p /var/log/hbase /var/run/hbase
+
+# phoenix-server JAR is copied into HBase's lib so the coprocessors and
+# the IndexedWALEditCodec are visible to both the master and every RS.
+RUN set -eux; \
+ mkdir -p "${PHOENIX_HOME}"; \
+ curl -fSL --retry 5 --retry-delay 5 \
+
"https://archive.apache.org/dist/phoenix/phoenix-${PHOENIX_VERSION}/phoenix-hbase-${PHOENIX_HBASE_LINE}-${PHOENIX_VERSION}-bin.tar.gz"
\
+ -o /tmp/phoenix.tar.gz; \
+ tar -xzf /tmp/phoenix.tar.gz -C "${PHOENIX_HOME}" --strip-components=1; \
+ rm /tmp/phoenix.tar.gz; \
+ cp
"${PHOENIX_HOME}/phoenix-server-hbase-${PHOENIX_HBASE_LINE}-${PHOENIX_VERSION}.jar"
"${HBASE_HOME}/lib/"
+
+# Kept below the tarball downloads to preserve their (multi-hundred-MB) cache.
+# python3 is required by /opt/phoenix/bin/sqlline.py.
+RUN set -eux; \
+ apt-get update; \
+ apt-get install -y --no-install-recommends python3; \
+ rm -rf /var/lib/apt/lists/*; \
+ ln -sf /usr/bin/python3 /usr/local/bin/python
+
+COPY conf/hbase/hbase-site.xml ${HBASE_HOME}/conf/hbase-site.xml
+COPY conf/hbase/hbase-env.sh ${HBASE_HOME}/conf/hbase-env.sh
+
+COPY scripts/hbase-entrypoint.sh /usr/local/bin/entrypoint.sh
+RUN chmod +x /usr/local/bin/entrypoint.sh
+
+WORKDIR /opt
+
+ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/entrypoint.sh"]
+CMD ["help"]
diff --git a/docker/Dockerfile.phoenix-adapters
b/docker/Dockerfile.phoenix-adapters
new file mode 100644
index 0000000..0c3df89
--- /dev/null
+++ b/docker/Dockerfile.phoenix-adapters
@@ -0,0 +1,97 @@
+# syntax=docker/dockerfile:1
+#
+# Build context: project root (the Maven reactor needs every module).
+#
+FROM maven:3.9-eclipse-temurin-8 AS builder
+
+WORKDIR /workspace
+
+# Copy poms first to maximise dep-layer cache hits on rebuild.
+COPY pom.xml ./
+COPY phoenix-ddb-utils/pom.xml phoenix-ddb-utils/pom.xml
+COPY phoenix-ddb-rest/pom.xml phoenix-ddb-rest/pom.xml
+COPY phoenix-ddb-assembly/pom.xml phoenix-ddb-assembly/pom.xml
+COPY coverage-report/pom.xml coverage-report/pom.xml
+
+# `|| true` because the cross-module reactor can't resolve siblings yet;
+# this step is only here to warm ~/.m2.
+RUN --mount=type=cache,target=/root/.m2 \
+ mvn -B -q -DskipTests \
+ -pl phoenix-ddb-utils,phoenix-ddb-rest,phoenix-ddb-assembly -am \
+ dependency:go-offline || true
+
+COPY phoenix-ddb-utils phoenix-ddb-utils
+COPY phoenix-ddb-rest phoenix-ddb-rest
+COPY phoenix-ddb-assembly phoenix-ddb-assembly
+COPY coverage-report coverage-report
+COPY bin bin
+COPY conf conf
+COPY README.md DDB_API_REFERENCE.md ./
+
+RUN --mount=type=cache,target=/root/.m2 \
+ mvn -B -DskipTests \
+ -pl phoenix-ddb-assembly -am \
+ clean package
+
+RUN set -eux; \
+ # If the assembly module ever ships an additional *-bin.tar.gz (e.g.
+ # with a classifier), fail loudly rather than silently picking one.
+ count=$(find phoenix-ddb-assembly/target -maxdepth 1 -type f -name
'phoenix-adapters-*-bin.tar.gz' | wc -l); \
+ if [ "$count" -ne 1 ]; then \
+ echo "Expected exactly one phoenix-adapters-*-bin.tar.gz, found
$count:" >&2; \
+ find phoenix-ddb-assembly/target -maxdepth 1 -type f -name
'phoenix-adapters-*-bin.tar.gz' >&2; \
+ exit 1; \
+ fi; \
+ tarball=$(find phoenix-ddb-assembly/target -maxdepth 1 -type f -name
'phoenix-adapters-*-bin.tar.gz'); \
+ cp "$tarball" /tmp/phoenix-adapters-bin.tar.gz
+
+FROM eclipse-temurin:8-jdk-jammy
+
+ENV JAVA_HOME=/opt/java/openjdk \
+ PHOENIX_ADAPTERS_HOME=/opt/phoenix-adapters \
+ PHOENIX_ADAPTERS_CONF_DIR=/opt/phoenix-adapters/conf \
+ PHOENIX_ADAPTERS_LOG_DIR=/var/log/phoenix-adapters \
+ PHOENIX_ADAPTERS_PID_DIR=/var/run/phoenix-adapters \
+ PHOENIX_REST_PORT=8842 \
+ ZOO_KEEPER_QUORUM=zookeeper:2181 \
+ HBASE_MASTER_HOST=hbase-master \
+ HBASE_MASTER_PORT=16000
+
+RUN set -eux; \
+ apt-get update; \
+ apt-get install -y --no-install-recommends \
+ bash curl ca-certificates netcat-openbsd tini procps; \
+ rm -rf /var/lib/apt/lists/*; \
+ mkdir -p "${PHOENIX_ADAPTERS_LOG_DIR}" "${PHOENIX_ADAPTERS_PID_DIR}"
+
+COPY --from=builder /tmp/phoenix-adapters-bin.tar.gz
/tmp/phoenix-adapters-bin.tar.gz
+
+RUN set -eux; \
+ mkdir -p "${PHOENIX_ADAPTERS_HOME}"; \
+ tar -xzf /tmp/phoenix-adapters-bin.tar.gz -C "${PHOENIX_ADAPTERS_HOME}"
--strip-components=1; \
+ rm /tmp/phoenix-adapters-bin.tar.gz; \
+ chmod -R +x "${PHOENIX_ADAPTERS_HOME}/bin"; \
+ # The assembly ships a mix of hadoop-common 3.3.6 (declared in pom.xml)
+ # and hadoop-hdfs/yarn/mapreduce 3.4.x (transitive from phoenix-core-client
+ # via hbase-server:2.5.14-hadoop3). The 3.4.x jars register FileSystem
+ # impls that reference `WithErasureCoding`, a class only present in
+ # hadoop-common 3.4.x. When HBase returns a remote exception during
+ # bootstrap, the client's classloader tries to enumerate FileSystem
+ # impls, hits NoClassDefFoundError, and poisons the JVM. The REST
+ # server only talks to HBase via RPC and never opens HDFS directly,
+ # so we strip the 3.4.x hadoop client jars to break the cycle.
+ rm -f "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-hdfs-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-hdfs-client-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-yarn-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-mapreduce-client-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-distcp-"*.jar
+
+# Client-side WAL codec / RPC controller must match the server cluster.
+COPY docker/conf/phoenix-adapters/hbase-site.xml
${PHOENIX_ADAPTERS_CONF_DIR}/hbase-site.xml
+
+COPY docker/scripts/phoenix-adapters-entrypoint.sh /usr/local/bin/entrypoint.sh
+RUN chmod +x /usr/local/bin/entrypoint.sh
+
+EXPOSE 8842
+
+ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/entrypoint.sh"]
diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 0000000..5fd2352
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,335 @@
+# Local Docker Cluster for Phoenix Adapters
+
+Brings up the full dependency stack (Hadoop / ZooKeeper / HBase / Phoenix)
+required to run **phoenix-adapters** on your laptop. Uses upstream images
+where they exist; custom only where they don't.
+
+| Component | Version | Image |
+| --- | --- | --- |
+| Apache ZooKeeper | 3.8.4 |
[`library/zookeeper:3.8.4`](https://hub.docker.com/_/zookeeper) (Docker
Official) |
+| Apache Hadoop (HDFS) | 3.3.6 |
[`apache/hadoop:3.3.6`](https://hub.docker.com/r/apache/hadoop) (Apache
convenience build) |
+| Apache HBase | 2.5.14-hadoop3 | `phoenix-adapters/hbase-phoenix:latest`
(custom) |
+| Apache Phoenix | 5.3.1 (phoenix-hbase-2.5) | bundled into
`phoenix-adapters/hbase-phoenix` |
+| Phoenix Adapters REST | this repo | `phoenix-adapters/rest:latest` (custom) |
+
+Versions are kept in lockstep with the top-level [`pom.xml`](../pom.xml).
+
+> **Apple Silicon.** `apache/hadoop:3.3.6` is amd64-only; the compose file
+> pins `platform: linux/amd64` so the NameNode/DataNode run under Rosetta
+> emulation. Slower than native, but functional.
+
+## Layout
+
+```
+docker/
+├── Dockerfile.hbase-phoenix # HBase 2.5.14 + Phoenix 5.3.1
+├── Dockerfile.phoenix-adapters # Multi-stage build of the REST server
+├── docker-compose.yml
+├── conf/
+│ ├── hbase/{hbase-site.xml,hbase-env.sh}
+│ └── phoenix-adapters/hbase-site.xml # Client-side overrides
+└── scripts/
+ ├── hbase-entrypoint.sh # hbase-master, hbase-regionserver
+ ├── phoenix-adapters-entrypoint.sh
+ └── smoke.sh # End-to-end DDB validation suite
+```
+
+ZooKeeper and Hadoop config lives entirely in `docker-compose.yml` as env
+vars that the upstream images template into XML.
+
+## Quick start
+
+**Prerequisites:** Docker Desktop running; `jq` and `curl` on `PATH`
+(`brew install jq` on macOS).
+
+From the **project root**:
+
+```bash
+# 1. Bring up the full stack (ZK + HDFS + HBase+Phoenix + REST) and BLOCK
+# until every service reports healthy (REST takes ~30-60s on a cold
+# start because Phoenix has to bootstrap SYSTEM.* tables).
+# First time: ~8-12 min -- most of that is Maven downloading ~1.5 GB
+# of dependencies into the BuildKit cache mount; subsequent runs reuse
+# the cache and rebuild in seconds.
+docker compose -f docker/docker-compose.yml up -d --build --wait
+
+# 2. Validate it works end-to-end (CRUD + UpdateItem + BatchWriteItem +
streams).
+bash docker/scripts/smoke.sh
+# -> "Result: 21 checks PASSED across 18 API calls"
+
+# 3. Use it. The DynamoDB-compatible REST endpoint is at http://localhost:8842
.
+# Point any AWS SDK at it (Java/Python/Node.js snippets in
+# phoenix-ddb-rest/README.md), or hit it directly with curl:
+curl -s -X POST http://localhost:8842/ \
+ -H 'Content-Type: application/x-amz-json-1.0' \
+ -H 'X-Amz-Target: DynamoDB_20120810.ListTables' -d '{}'
+
+# 4. Tear down when you're done.
+docker compose -f docker/docker-compose.yml down # keep volumes
+docker compose -f docker/docker-compose.yml down -v # also wipe HDFS + ZK
+```
+
+### URLs
+
+| URL | Service |
+| --- | --- |
+| http://localhost:8842 | **Phoenix Adapters REST (DynamoDB-compatible)** |
+| http://localhost:9870 | HDFS NameNode UI |
+| http://localhost:9864 | HDFS DataNode UI |
+| http://localhost:16010 | HBase Master UI |
+| http://localhost:16030 | HBase RegionServer UI |
+
+Two host ports are remapped because their defaults often collide on dev
+machines (macOS AirPlay on 9000, a locally installed Kafka/ZK on 2181):
+
+| Service | Container | Host |
+| --- | --- | --- |
+| HDFS NameNode RPC | `namenode:9000` | `localhost:19000` |
+| ZooKeeper client | `zookeeper:2181` | `localhost:12181` |
+
+Inter-container traffic still uses the standard ports.
+
+### Bring up just the cluster (no REST)
+
+```bash
+docker compose -f docker/docker-compose.yml up -d --build --wait \
+ zookeeper namenode datanode hbase-master hbase-regionserver
+```
+
+## Validation suite
+
+`docker/scripts/smoke.sh` exercises every supported DynamoDB API against
+the running REST server and asserts the expected behaviour. It prints
+each request, response, and assertion as it runs.
+
+```bash
+docker compose -f docker/docker-compose.yml up -d --build --wait
+bash docker/scripts/smoke.sh
+```
+
+Exits `0` on full pass; exits non-zero on the first failed assertion and
+prints the offending response.
+
+| Step | API |
+| --- | --- |
+| 1 | `ListTables` (baseline) |
+| 2 | `CreateTable` (with `StreamSpecification` enabled,
`NEW_AND_OLD_IMAGES`) |
+| 3 | `DescribeTable` |
+| 4 | `PutItem` (`id=a`) |
+| 5 | `UpdateItem` (`SET score, bonus`, `ReturnValues=ALL_NEW`) |
+| 6 | `GetItem` |
+| 7 | `PutItem` (`id=b`) |
+| 8 | `Scan` |
+| 9 | `Query` |
+| 10 | `DeleteItem` |
+| 11 | `Scan` (after delete) |
+| 12 | `BatchWriteItem` (mixed put + delete) |
+| 13 | `Scan` paginated (drains all pages) |
+| 14 | `ListStreams` |
+| 15 | `DescribeStream` (polls until `StreamStatus == ENABLED`) |
+| 16 | `GetShardIterator` (`TRIM_HORIZON`) |
+| 17 | `GetRecords` (drains all pages) |
+| 18 | `DeleteTable` |
+
+## Poking around the cluster
+
+HBase shell:
+
+```bash
+docker compose -f docker/docker-compose.yml exec hbase-master hbase shell
+```
+
+```text
+status
+list
+create 'demo', 'cf'
+put 'demo', 'r1', 'cf:c1', 'hello'
+scan 'demo'
+```
+
+Phoenix sqlline:
+
+```bash
+docker compose -f docker/docker-compose.yml exec hbase-master \
+ /opt/phoenix/bin/sqlline.py zookeeper:2181
+```
+
+```sql
+!tables
+CREATE TABLE IF NOT EXISTS t1 (id BIGINT PRIMARY KEY, name VARCHAR);
+UPSERT INTO t1 VALUES (1, 'phoenix-adapters');
+SELECT * FROM t1;
+```
+
+## Developer inner loop: code change → live endpoint
+
+```
+phoenix-ddb-rest/src/**.java
+ │ (1) edit on host
+ ▼
+docker compose ... up -d --build phoenix-adapters-rest
+ ├── stage 1: mvn package -DskipTests (BuildKit caches ~/.m2)
+ ├── stage 1 output: phoenix-ddb-assembly/target/*-bin.tar.gz
+ └── stage 2: temurin runtime extracts that tarball
+ │
+ ▼
+http://localhost:8842/ (new code, live)
+```
+
+The cluster (ZK + HDFS + HBase) keeps running across REST rebuilds, and
+HBase data persists across full `down`/`up` cycles.
+
+### The loop
+
+1. Edit code in `phoenix-ddb-rest/src/...` or `phoenix-ddb-utils/src/...`.
+2. *(Optional)* sanity-check the compile on the host:
+
+ ```bash
+ mvn -B -DskipTests -pl phoenix-ddb-rest -am package
+ ```
+
+3. Rebuild and recreate just the REST container:
+
+ ```bash
+ docker compose -f docker/docker-compose.yml up -d --build
phoenix-adapters-rest
+ ```
+
+ No-dep-change rebuilds typically take 30-60 s on a warm cache.
+4. Watch logs:
+
+ ```bash
+ docker compose -f docker/docker-compose.yml logs -f phoenix-adapters-rest
+ ```
+5. Hit the endpoint and verify.
+
+### Quick reference
+
+| Task | Command |
+| --- | --- |
+| Rebuild REST + restart it | `docker compose -f docker/docker-compose.yml up
-d --build phoenix-adapters-rest` |
+| Restart REST (no code change) | `docker compose -f docker/docker-compose.yml
restart phoenix-adapters-rest` |
+| Tail REST logs | `docker compose -f docker/docker-compose.yml logs -f
phoenix-adapters-rest` |
+| Tail HBase logs | `docker compose -f docker/docker-compose.yml logs -f
hbase-master hbase-regionserver` |
+| HBase shell | `docker compose -f docker/docker-compose.yml exec hbase-master
hbase shell` |
+| Phoenix sqlline | `docker compose -f docker/docker-compose.yml exec
hbase-master /opt/phoenix/bin/sqlline.py zookeeper:2181` |
+| List containers | `docker compose -f docker/docker-compose.yml ps` |
+| Stop (keep data) | `docker compose -f docker/docker-compose.yml down` |
+| Stop + wipe data | `docker compose -f docker/docker-compose.yml down -v` |
+
+### Edge cases
+
+| Situation | What to do |
+| --- | --- |
+| Changed `conf/hbase/hbase-site.xml` or `hbase-env.sh` | `docker compose ...
up -d --build hbase-master hbase-regionserver`. Existing tables survive. |
+| Bumped `hbase.version` / `phoenix.version` in `pom.xml` | Bump matching
`ARG`s in `Dockerfile.hbase-phoenix`, then `--build hbase-master
hbase-regionserver phoenix-adapters-rest`. Often pair with `down -v`. |
+| Added a Maven dep to `phoenix-ddb-rest/pom.xml` | `--build
phoenix-adapters-rest`. New dep downloads once; cache warms after. |
+| Clean slate | `docker compose ... down -v` then `up -d --build`. |
+| Code doesn't seem picked up | You ran `restart` instead of `up --build`.
`restart` does not rebuild. |
+| Stack left running for days / many smoke iterations | HBase + REST logs grow
unbounded inside the containers. `down -v` periodically to reclaim disk. |
+
+### Pre-PR checklist
+
+```bash
+# 1. Host-side compile + unit tests (no cluster required).
+mvn -B clean install -DskipITs
+
+# 2. End-to-end validation: fresh stack + full DDB round-trip including
streams.
+docker compose -f docker/docker-compose.yml down -v
+docker compose -f docker/docker-compose.yml up -d --build --wait
+bash docker/scripts/smoke.sh
+
+# 3. Tear it down.
+docker compose -f docker/docker-compose.yml down -v
+```
+
+If `smoke.sh` finishes with `Result: 21 checks PASSED across 18 API calls`,
+your change is wire-compatible end to end through Phoenix on dockerized
+HBase across CRUD, batch, and the change-stream chain.
+
+## Running the REST server outside Docker
+
+1. Bring up only the cluster services.
+2. Add cluster hostnames to `/etc/hosts` (HBase advertises hostnames over ZK):
+
+ ```
+ 127.0.0.1 zookeeper namenode datanode hbase-master hbase-regionserver
+ ```
+
+3. Start the REST server pointing at the dockerized ZooKeeper:
+
+ ```bash
+ mvn -DskipTests clean package
+ tar xzf phoenix-ddb-assembly/target/phoenix-adapters-*-bin.tar.gz -C /tmp
+ cd /tmp/phoenix-adapters-*
+ export JAVA_HOME=$(/usr/libexec/java_home -v 1.8) # macOS example
+ export PHOENIX_ADAPTERS_HOME=$(pwd)
+ bin/phoenix-adapters rest foreground_start -p 8842 -z localhost:12181
+ ```
+
+## Phoenix tuning baked into the image
+
+[`docker/conf/hbase/hbase-site.xml`](conf/hbase/hbase-site.xml) enables what
+Phoenix 5.x needs for secondary indexes, DDL events, and the multi-priority
+RPC controller:
+
+| Property | Value |
+| --- | --- |
+| `hbase.coprocessor.master.classes` | `…PhoenixMasterObserver` |
+| `hbase.coprocessor.regionserver.classes` | `…PhoenixRegionServerEndpoint` |
+| `hbase.regionserver.wal.codec` | `…IndexedWALEditCodec` |
+| `hbase.region.server.rpc.scheduler.factory.class` |
`…PhoenixRpcSchedulerFactory` |
+| `hbase.rpc.controllerfactory.class` | `…ServerRpcControllerFactory` |
+| `phoenix.task.handling.interval.ms` | `1000` |
+| `phoenix.task.handling.initial.delay.ms` | `1` |
+
+`phoenix-server-hbase-2.5-5.3.1.jar` is copied into `${HBASE_HOME}/lib/` so
+the coprocessors and WAL codec are visible to master and every RegionServer.
+
+## Why upstream images for ZK + Hadoop but not HBase?
+
+| Component | Decision | Reason |
+| --- | --- | --- |
+| ZooKeeper 3.8.4 | Upstream `zookeeper:3.8.4` | Docker Official, exact
version, multi-arch. |
+| Hadoop 3.3.6 | Upstream `apache/hadoop:3.3.6` | Apache convenience build at
the exact version. amd64-only, runs under emulation on Apple Silicon. |
+| HBase 2.5.14-hadoop3 | Custom | No official Apache image; community images
don't cover `2.5.14-hadoop3`. |
+| Phoenix 5.3.1 | Custom (layered on HBase) | No Phoenix image anywhere;
server JAR must be on HBase's classpath. |
+
+## Troubleshooting
+
+* **NameNode unhealthy on first start.** First start formats the NameNode
+ via `ENSURE_NAMENODE_DIR`. Watch with `docker compose ... logs -f namenode`.
+* **HBase Master `RegionTooBusyException` / `NotServingRegion`.** Wait ~30 s
+ after RegionServer comes up; Phoenix bootstraps `SYSTEM.*` tables on its
+ first connection and the REST server retries transparently.
+* **REST exits with `NoClassDefFoundError:
org/apache/hadoop/fs/WithErasureCoding`.**
+ The phoenix-ddb-assembly tarball ships `hadoop-common:3.3.6` (from
+ `pom.xml`) alongside `hadoop-hdfs:3.4.x` / `hadoop-yarn:3.4.x`
+ (transitive from `phoenix-core-client`). The 3.4.x JARs register
+ FileSystem impls that need `WithErasureCoding`, which only exists in
+ hadoop-common 3.4+. When HBase returns a remote exception during
+ bootstrap, the client tries to enumerate FileSystem impls, hits
+ `NoClassDefFoundError`, and poisons the JVM. The REST image
+ `Dockerfile.phoenix-adapters` strips the 3.4.x `hadoop-hdfs*`,
+ `hadoop-yarn-*`, `hadoop-mapreduce-client-*`, and `hadoop-distcp-*`
+ jars after extracting the tarball — the REST server only talks to
+ HBase via RPC and never opens HDFS directly, so removing them is safe.
+ If this error reappears, check that those `rm -f` lines in
+ `Dockerfile.phoenix-adapters` weren't dropped.
+* **`Datanode denied communication with namenode`.** Cluster ID mismatch.
+ `docker compose down -v` and bring the stack back up.
+* **`platform mismatch` warnings on Apple Silicon.** Expected for the
+ Hadoop containers (amd64 image, emulated). No action needed.
+
+## Customising versions
+
+HBase / Phoenix versions are `ARG`s on `Dockerfile.hbase-phoenix`:
+
+```bash
+docker compose -f docker/docker-compose.yml build \
+ --build-arg HBASE_VERSION=2.5.13 \
+ --build-arg PHOENIX_VERSION=5.3.0 \
+ hbase-master
+```
+
+Hadoop and ZooKeeper versions are pinned by tag in `docker-compose.yml`.
+Keep all four in lockstep with `pom.xml`.
diff --git a/docker/conf/hbase/hbase-env.sh b/docker/conf/hbase/hbase-env.sh
new file mode 100644
index 0000000..e8d7c6d
--- /dev/null
+++ b/docker/conf/hbase/hbase-env.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+export JAVA_HOME=${JAVA_HOME:-/opt/java/openjdk}
+export HBASE_MANAGES_ZK=false
+export HBASE_LOG_DIR=/var/log/hbase
+export HBASE_PID_DIR=/var/run/hbase
+
+# Sized to fit the whole stack in ~4 GB of Docker memory.
+export HBASE_HEAPSIZE=1G
+export HBASE_OFFHEAPSIZE=256m
+
+# Strip JDK11-specific GC flags HBase ships with; we run on JDK8.
+# This intentionally REPLACES the upstream value (rather than appending),
+# so any future upstream flag drops out of the container -- add new flags
+# to this list directly instead of re-deriving from upstream's HBASE_OPTS.
+export HBASE_OPTS="-XX:+UseG1GC -XX:+UnlockExperimentalVMOptions"
+export HBASE_MASTER_OPTS="${HBASE_OPTS} -Xms256m"
+export HBASE_REGIONSERVER_OPTS="${HBASE_OPTS} -Xms512m"
+
+unset HBASE_JSHELL_ARGS
diff --git a/docker/conf/hbase/hbase-site.xml b/docker/conf/hbase/hbase-site.xml
new file mode 100644
index 0000000..56c279c
--- /dev/null
+++ b/docker/conf/hbase/hbase-site.xml
@@ -0,0 +1,69 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+ <property>
+ <name>hbase.rootdir</name>
+ <value>hdfs://namenode:9000/hbase</value>
+ </property>
+ <property>
+ <name>hbase.cluster.distributed</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>hbase.zookeeper.quorum</name>
+ <value>zookeeper</value>
+ </property>
+ <property>
+ <name>hbase.zookeeper.property.clientPort</name>
+ <value>2181</value>
+ </property>
+ <!--
+ Dev-only: disables the HDFS hflush/hsync capability check so the
+ single-replica datanode in this stack can serve HBase WAL writes.
+ Do NOT copy this property into a production hbase-site.xml.
+ -->
+ <property>
+ <name>hbase.unsafe.stream.capability.enforce</name>
+ <value>false</value>
+ </property>
+ <property>
+ <name>hbase.wal.provider</name>
+ <value>filesystem</value>
+ </property>
+
+ <!-- Phoenix 5.x required configuration. -->
+ <property>
+ <name>hbase.coprocessor.master.classes</name>
+ <value>org.apache.phoenix.coprocessor.PhoenixMasterObserver</value>
+ </property>
+ <property>
+ <name>hbase.coprocessor.regionserver.classes</name>
+
<value>org.apache.phoenix.coprocessor.PhoenixRegionServerEndpoint</value>
+ </property>
+ <property>
+ <name>hbase.regionserver.wal.codec</name>
+
<value>org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec</value>
+ </property>
+ <property>
+ <name>hbase.region.server.rpc.scheduler.factory.class</name>
+ <value>org.apache.hadoop.hbase.ipc.PhoenixRpcSchedulerFactory</value>
+ </property>
+ <property>
+ <name>hbase.rpc.controllerfactory.class</name>
+
<value>org.apache.hadoop.hbase.ipc.controller.ServerRpcControllerFactory</value>
+ </property>
+ <!--
+ Phoenix default is 60000 ms; we cut it to 1000 ms so dev-cluster
+ background tasks (index state transitions, dropped-table GC, etc.)
+ run quickly enough to keep iteration tight, without the laptop
+ CPU cost of a ~10 ms poll loop.
+ -->
+ <property>
+ <name>phoenix.task.handling.interval.ms</name>
+ <value>1000</value>
+ </property>
+ <property>
+ <name>phoenix.task.handling.initial.delay.ms</name>
+ <value>1</value>
+ </property>
+</configuration>
diff --git a/docker/conf/phoenix-adapters/hbase-site.xml
b/docker/conf/phoenix-adapters/hbase-site.xml
new file mode 100644
index 0000000..0657edd
--- /dev/null
+++ b/docker/conf/phoenix-adapters/hbase-site.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!-- Must match the WAL codec and RPC controller settings on the server. -->
+<configuration>
+ <property>
+ <name>hbase.regionserver.wal.codec</name>
+
<value>org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec</value>
+ </property>
+ <property>
+ <name>hbase.rpc.controllerfactory.class</name>
+
<value>org.apache.hadoop.hbase.ipc.controller.ClientRpcControllerFactory</value>
+ </property>
+ <property>
+ <name>phoenix.task.handling.interval.ms</name>
+ <value>1000</value>
+ </property>
+ <property>
+ <name>phoenix.task.handling.initial.delay.ms</name>
+ <value>1</value>
+ </property>
+</configuration>
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
new file mode 100644
index 0000000..329bef5
--- /dev/null
+++ b/docker/docker-compose.yml
@@ -0,0 +1,191 @@
+# Bring up from the project root:
+# docker compose -f docker/docker-compose.yml up --build
+#
+# Cluster only (no REST):
+# docker compose -f docker/docker-compose.yml up --build \
+# zookeeper namenode datanode hbase-master hbase-regionserver
+
+name: phoenix-adapters
+
+# The container_name keys below pin each service to a fixed name (phx-*).
+# Combined with the fixed host ports (8842/9870/12181/etc.), this means
+# only one copy of the stack can run on a workstation at a time. That's
+# intentional given the host-port collisions; if you need parallel stacks,
+# drop the container_name keys AND change the host-port mappings.
+services:
+
+ zookeeper:
+ image: zookeeper:3.8.4
+ container_name: phx-zookeeper
+ hostname: zookeeper
+ environment:
+ ZOO_4LW_COMMANDS_WHITELIST: "srvr,ruok,mntr,conf"
+ ZOO_ADMINSERVER_ENABLED: "false"
+ ZOO_AUTOPURGE_PURGEINTERVAL: 24
+ ZOO_AUTOPURGE_SNAPRETAINCOUNT: 3
+ ports:
+ # Host port shifted off the default; 2181 is often busy on dev machines.
+ - "12181:2181"
+ volumes:
+ - zookeeper-data:/data
+ - zookeeper-datalog:/datalog
+ healthcheck:
+ test: ["CMD-SHELL", "echo ruok | nc -w 2 localhost 2181 | grep -q imok"]
+ interval: 5s
+ timeout: 5s
+ retries: 20
+ networks: [phoenix-net]
+
+ # apache/hadoop:3.3.6 is amd64-only; on Apple Silicon Docker emulates
+ # via Rosetta/qemu (slower but functional).
+ namenode:
+ image: apache/hadoop:3.3.6
+ platform: linux/amd64
+ container_name: phx-namenode
+ hostname: namenode
+ environment:
+ # Triggers a first-time `hdfs namenode -format` when this dir is empty.
+ ENSURE_NAMENODE_DIR: /data/namenode
+ # The apache/hadoop image templates *-SITE.XML files from these env vars.
+ CORE-SITE.XML_fs.defaultFS: "hdfs://namenode:9000"
+ HDFS-SITE.XML_dfs.replication: "1"
+ HDFS-SITE.XML_dfs.namenode.name.dir: "file:///data/namenode"
+ HDFS-SITE.XML_dfs.datanode.data.dir: "file:///data/datanode"
+ HDFS-SITE.XML_dfs.permissions.enabled: "false"
+ HDFS-SITE.XML_dfs.namenode.datanode.registration.ip-hostname-check:
"false"
+ HDFS-SITE.XML_dfs.client.use.datanode.hostname: "true"
+ HDFS-SITE.XML_dfs.datanode.use.datanode.hostname: "true"
+ command: ["hdfs", "namenode"]
+ ports:
+ - "9870:9870"
+ # Host port shifted off 9000 (macOS AirPlay et al).
+ - "19000:9000"
+ volumes:
+ - namenode-data:/data
+ healthcheck:
+ # Hadoop binds to the hostname, not localhost.
+ test: ["CMD-SHELL", "nc -z namenode 9000 || exit 1"]
+ interval: 5s
+ timeout: 5s
+ retries: 30
+ networks: [phoenix-net]
+
+ datanode:
+ image: apache/hadoop:3.3.6
+ platform: linux/amd64
+ container_name: phx-datanode
+ hostname: datanode
+ depends_on:
+ namenode:
+ condition: service_healthy
+ environment:
+ WAITFOR: namenode:9000
+ CORE-SITE.XML_fs.defaultFS: "hdfs://namenode:9000"
+ HDFS-SITE.XML_dfs.replication: "1"
+ HDFS-SITE.XML_dfs.namenode.name.dir: "file:///data/namenode"
+ HDFS-SITE.XML_dfs.datanode.data.dir: "file:///data/datanode"
+ HDFS-SITE.XML_dfs.permissions.enabled: "false"
+ HDFS-SITE.XML_dfs.client.use.datanode.hostname: "true"
+ HDFS-SITE.XML_dfs.datanode.use.datanode.hostname: "true"
+ command: ["hdfs", "datanode"]
+ ports:
+ - "9864:9864"
+ volumes:
+ - datanode-data:/data
+ healthcheck:
+ test: ["CMD-SHELL", "nc -z datanode 9866 || exit 1"]
+ interval: 5s
+ timeout: 5s
+ retries: 30
+ networks: [phoenix-net]
+
+ hbase-master:
+ image: phoenix-adapters/hbase-phoenix:latest
+ build:
+ context: .
+ dockerfile: Dockerfile.hbase-phoenix
+ container_name: phx-hbase-master
+ hostname: hbase-master
+ command: ["hbase-master"]
+ depends_on:
+ zookeeper:
+ condition: service_healthy
+ namenode:
+ condition: service_healthy
+ datanode:
+ condition: service_started
+ ports:
+ - "16000:16000"
+ - "16010:16010"
+ healthcheck:
+ test: ["CMD-SHELL", "nc -z hbase-master 16000 || exit 1"]
+ interval: 10s
+ timeout: 5s
+ retries: 30
+ networks: [phoenix-net]
+
+ hbase-regionserver:
+ image: phoenix-adapters/hbase-phoenix:latest
+ build:
+ context: .
+ dockerfile: Dockerfile.hbase-phoenix
+ container_name: phx-hbase-regionserver
+ hostname: hbase-regionserver
+ command: ["hbase-regionserver"]
+ depends_on:
+ hbase-master:
+ condition: service_healthy
+ ports:
+ - "16020:16020"
+ - "16030:16030"
+ healthcheck:
+ test: ["CMD-SHELL", "nc -z hbase-regionserver 16020 || exit 1"]
+ interval: 5s
+ timeout: 5s
+ retries: 30
+ networks: [phoenix-net]
+
+ phoenix-adapters-rest:
+ image: phoenix-adapters/rest:latest
+ build:
+ context: ..
+ dockerfile: docker/Dockerfile.phoenix-adapters
+ container_name: phx-adapters-rest
+ hostname: phoenix-adapters-rest
+ depends_on:
+ hbase-master:
+ condition: service_healthy
+ hbase-regionserver:
+ condition: service_healthy
+ environment:
+ - ZOO_KEEPER_QUORUM=zookeeper:2181
+ - PHOENIX_REST_PORT=8842
+ - HBASE_MASTER_HOST=hbase-master
+ - HBASE_MASTER_PORT=16000
+ ports:
+ - "8842:8842"
+ # Probes the real API: only "healthy" once Phoenix has bootstrapped
+ # SYSTEM.* tables and Jetty is accepting POSTs.
+ healthcheck:
+ test:
+ - "CMD-SHELL"
+ - >-
+ curl -fs -m 3 -X POST http://localhost:8842/
+ -H 'Content-Type: application/x-amz-json-1.0'
+ -H 'X-Amz-Target: DynamoDB_20120810.ListTables'
+ -d '{}' || exit 1
+ interval: 5s
+ timeout: 5s
+ retries: 60
+ start_period: 30s
+ networks: [phoenix-net]
+
+volumes:
+ zookeeper-data:
+ zookeeper-datalog:
+ namenode-data:
+ datanode-data:
+
+networks:
+ phoenix-net:
+ driver: bridge
diff --git a/docker/scripts/hbase-entrypoint.sh
b/docker/scripts/hbase-entrypoint.sh
new file mode 100644
index 0000000..ed21d07
--- /dev/null
+++ b/docker/scripts/hbase-entrypoint.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+#
+# Usage: entrypoint.sh <role>
+# role := hbase-master | hbase-regionserver | bash | help
+#
+set -euo pipefail
+
+ROLE="${1:-help}"
+
+log() { echo "[hbase-entrypoint][$(date -u +%H:%M:%S)] $*"; }
+fail() { log "ERROR: $*"; exit 1; }
+
+wait_for() {
+ local host="$1" port="$2"
+ log "Waiting for ${host}:${port} ..."
+ until nc -z "${host}" "${port}" 2>/dev/null; do
+ sleep 2
+ done
+ log "${host}:${port} is reachable."
+}
+
+case "${ROLE}" in
+ hbase-master)
+ wait_for "${ZOOKEEPER_HOST:-zookeeper}" "${ZOOKEEPER_PORT:-2181}"
+ wait_for "${NAMENODE_HOST:-namenode}" "${NAMENODE_PORT:-9000}"
+ exec "${HBASE_HOME}/bin/hbase" master start
+ ;;
+
+ hbase-regionserver)
+ wait_for "${ZOOKEEPER_HOST:-zookeeper}" "${ZOOKEEPER_PORT:-2181}"
+ wait_for "${HMASTER_HOST:-hbase-master}" "${HMASTER_PORT:-16000}"
+ exec "${HBASE_HOME}/bin/hbase" regionserver start
+ ;;
+
+ bash|shell)
+ exec /bin/bash
+ ;;
+
+ help|*)
+ cat <<EOF
+Usage: docker run ... phoenix-adapters/hbase-phoenix:latest <role>
+
+Roles:
+ hbase-master Run the HBase Master.
+ hbase-regionserver Run an HBase RegionServer.
+ bash Drop into a shell inside the image.
+
+Versions:
+ HBase ${HBASE_VERSION}-${HBASE_FLAVOR}
+ Phoenix ${PHOENIX_VERSION} (phoenix-hbase-${PHOENIX_HBASE_LINE})
+EOF
+ [[ "${ROLE}" == "help" ]] && exit 0
+ fail "Unknown role: ${ROLE}"
+ ;;
+esac
diff --git a/docker/scripts/phoenix-adapters-entrypoint.sh
b/docker/scripts/phoenix-adapters-entrypoint.sh
new file mode 100644
index 0000000..f77b971
--- /dev/null
+++ b/docker/scripts/phoenix-adapters-entrypoint.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+log() { echo "[phoenix-adapters][$(date -u +%H:%M:%S)] $*"; }
+
+# Guard against accidental reintroduction of the 3.4.x hadoop client jars.
+# Dockerfile.phoenix-adapters strips them because they reference
+# org.apache.hadoop.fs.WithErasureCoding (only present in hadoop-common
+# 3.4+), which poisons the client JVM via FileSystem ServiceLoader the
+# first time HBase returns a remote exception. If anyone re-adds them,
+# fail fast with a clear pointer instead of dying mid-bootstrap.
+shopt -s nullglob
+stray=( "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-hdfs-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-hdfs-client-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-yarn-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-mapreduce-client-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-distcp-"*.jar )
+shopt -u nullglob
+if [[ ${#stray[@]} -gt 0 ]]; then
+ log "ERROR: assembly contains hadoop 3.4.x jars that must be stripped:"
+ for j in "${stray[@]}"; do log " - ${j##*/}"; done
+ log "See the 'rm -f hadoop-hdfs-*' block in
docker/Dockerfile.phoenix-adapters."
+ exit 1
+fi
+
+wait_for() {
+ local host="$1" port="$2"
+ log "Waiting for ${host}:${port} ..."
+ until nc -z "${host}" "${port}" 2>/dev/null; do
+ sleep 2
+ done
+ log "${host}:${port} is reachable."
+}
+
+zk_quorum="${ZOO_KEEPER_QUORUM:-zookeeper:2181}"
+zk_host="${zk_quorum%%:*}"
+zk_port="${zk_quorum##*:}"
+[[ "${zk_host}" == "${zk_port}" ]] && zk_port=2181
+
+wait_for "${zk_host}" "${zk_port}"
+wait_for "${HBASE_MASTER_HOST:-hbase-master}" "${HBASE_MASTER_PORT:-16000}"
+
+# Give the master a moment to finish initialising hbase:meta before the
+# first Phoenix connection bootstraps SYSTEM.* tables.
+sleep "${PHOENIX_BOOTSTRAP_SLEEP_SECONDS:-5}"
+
+log "Starting Phoenix Adapters REST on :${PHOENIX_REST_PORT} (ZK=${zk_quorum})"
+
+CLASSPATH="${PHOENIX_ADAPTERS_CONF_DIR}:${PHOENIX_ADAPTERS_HOME}/lib/*"
+
+exec "${JAVA_HOME}/bin/java" \
+ -Dproc_rest \
+ -XX:+UseG1GC \
+ -XX:OnOutOfMemoryError="kill -9 %p" \
+ -XX:+HeapDumpOnOutOfMemoryError \
+ -XX:HeapDumpPath="${PHOENIX_ADAPTERS_LOG_DIR}" \
+ -Dphoenix.adapters.log.dir="${PHOENIX_ADAPTERS_LOG_DIR}" \
+
-Dlog4j2.configurationFile="file:${PHOENIX_ADAPTERS_CONF_DIR}/log4j2.properties"
\
+ -cp "${CLASSPATH}" \
+ org.apache.phoenix.ddb.rest.RESTServer \
+ start \
+ -p "${PHOENIX_REST_PORT}" \
+ -z "${zk_quorum}"
diff --git a/docker/scripts/smoke.sh b/docker/scripts/smoke.sh
new file mode 100755
index 0000000..4962a57
--- /dev/null
+++ b/docker/scripts/smoke.sh
@@ -0,0 +1,336 @@
+#!/usr/bin/env bash
+#
+# Phoenix Adapters DynamoDB validation suite.
+#
+# Hits every supported API against the dockerized REST server and asserts
+# the expected behaviour. Prints each request, response, and assertion in
+# a readable format. Exits 0 on full pass; exits non-zero on the first
+# failed assertion (and dumps the offending response).
+#
+# Usage: docker/scripts/smoke.sh [label]
+#
+# Requires the cluster to already be up (see docker/README.md).
+# Requires: jq, curl.
+#
+set -euo pipefail
+
+URL="${PHX_URL:-http://localhost:8842}"
+LABEL="${1:-}"
+TBL="Smoke${LABEL}"
+CT='Content-Type: application/x-amz-json-1.0'
+TARGET='X-Amz-Target: DynamoDB_20120810'
+TOTAL=18
+
+if ! command -v jq >/dev/null 2>&1; then
+ echo "smoke.sh: jq is required but not on PATH" >&2
+ exit 2
+fi
+
+# ─── ANSI helpers ────────────────────────────────────────────────────────────
+B='\033[1m' # bold
+DIM='\033[2m' # dim
+CYAN='\033[1;36m'
+GREEN='\033[32m'
+RED='\033[31m'
+RESET='\033[0m'
+RULE='─────────────────────────────────────────────────────────────'
+BAR='━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
+
+STEP=0
+PASS=0
+
+banner() {
+ printf "\n${CYAN}%s${RESET}\n" "$BAR"
+ printf "${CYAN} %s${RESET}\n" "$1"
+ while [[ $# -gt 1 ]]; do shift; printf "${CYAN} %s${RESET}\n" "$1"; done
+ printf "${CYAN}%s${RESET}\n" "$BAR"
+}
+
+step() {
+ STEP=$((STEP + 1))
+ printf "\n${CYAN}[%2d/%2d]${RESET} ${B}%s${RESET}\n" "$STEP" "$TOTAL" "$1"
+ printf "${DIM}%s${RESET}\n" "$RULE"
+}
+
+show_json() {
+ local label="$1" body="$2"
+ printf " ${DIM}%s:${RESET}\n" "$label"
+ if printf '%s' "$body" | jq . >/dev/null 2>&1; then
+ printf '%s' "$body" | jq . | sed 's/^/ /'
+ else
+ printf " %s\n" "$body"
+ fi
+}
+
+LAST_RESP=""
+
+# Prints request + response visually and stashes the raw JSON in LAST_RESP.
+# Aborts immediately if the response is a DDB error envelope (has __type),
+# so per-step assertions don't have to translate confusing "expected X got
+# null" failures back into the underlying Phoenix error.
+ddb() {
+ local action="$1" body="$2"
+ show_json "request " "$body"
+ LAST_RESP=$(curl -sS -X POST "$URL/" -H "$CT" -H "$TARGET.$action" -d
"$body")
+ show_json "response" "$LAST_RESP"
+ if printf '%s' "$LAST_RESP" | jq -e 'type == "object" and has("__type")'
>/dev/null 2>&1; then
+ local err_type err_msg
+ err_type=$(printf '%s' "$LAST_RESP" | jq -r '.__type // "?"')
+ err_msg=$(printf '%s' "$LAST_RESP" | jq -r '.Message // .message //
""')
+ printf " ${RED}✗${RESET} %s returned error ${B}%s${RESET}: %s\n" \
+ "$action" "$err_type" "$err_msg" >&2
+ exit 1
+ fi
+}
+
+assert_eq() {
+ local label="$1" actual="$2" expected="$3"
+ if [[ "$actual" == "$expected" ]]; then
+ printf " ${GREEN}✓${RESET} %s ${B}==${RESET} %s\n" "$label"
"$expected"
+ PASS=$((PASS + 1))
+ else
+ printf " ${RED}✗${RESET} %s ${B}expected${RESET} %s, ${B}got${RESET}
%s\n" \
+ "$label" "$expected" "$actual" >&2
+ exit 1
+ fi
+}
+
+assert_nonempty() {
+ local label="$1" value="$2"
+ if [[ -n "$value" && "$value" != "null" ]]; then
+ printf " ${GREEN}✓${RESET} %s present (%s)\n" "$label" "$value"
+ PASS=$((PASS + 1))
+ else
+ printf " ${RED}✗${RESET} %s missing\n" "$label" >&2
+ exit 1
+ fi
+}
+
+assert_ge() {
+ local label="$1" actual="$2" threshold="$3"
+ # Coerce non-numeric (null, empty, "true", etc.) to 0 so the arithmetic
+ # comparison can't abort the script with "integer expression expected".
+ [[ "$actual" =~ ^-?[0-9]+$ ]] || actual=0
+ if (( actual >= threshold )); then
+ printf " ${GREEN}✓${RESET} %s ${B}>=${RESET} %s (got %s)\n" "$label"
"$threshold" "$actual"
+ PASS=$((PASS + 1))
+ else
+ printf " ${RED}✗${RESET} %s expected >= %s, got %s\n" "$label"
"$threshold" "$actual" >&2
+ exit 1
+ fi
+}
+
+banner "Phoenix Adapters DynamoDB Validation Suite" \
+ "Endpoint : $URL" \
+ "Table : $TBL"
+
+# ─── Confirm the REST server is up before exercising the API ────────────────
+# When the stack is launched with `docker compose up --wait` the
+# phoenix-adapters-rest healthcheck has already ensured readiness; this
+# check returns almost immediately in that case. Otherwise we probe
+# ListTables until it responds (cold-start bootstrap takes ~30-60s).
+TIMEOUT=180
+SPIN=( '⠋' '⠙' '⠹' '⠸' '⠼' '⠴' '⠦' '⠧' '⠇' '⠏' )
+ready=false
+printf "\n"
+for i in $(seq 1 $TIMEOUT); do
+ if curl -fs -m 3 -X POST "$URL/" \
+ -H "$CT" -H "$TARGET.ListTables" -d '{}' >/dev/null 2>&1; then
+ printf "\r${GREEN}✓${RESET} REST server is ready at %s (verified in
%ds) \n" "$URL" "$i"
+ ready=true
+ break
+ fi
+ printf "\r${DIM}%s${RESET} Confirming REST server is ready at %s
${DIM}(%ds elapsed)${RESET}" \
+ "${SPIN[$((i % ${#SPIN[@]}))]}" "$URL" "$i"
+ sleep 1
+done
+if ! $ready; then
+ printf "\n${RED}✗ REST server did not become ready within %ds at
%s${RESET}\n" "$TIMEOUT" "$URL" >&2
+ printf "${DIM}Last 30 lines of phx-adapters-rest:${RESET}\n" >&2
+ docker logs phx-adapters-rest 2>&1 | tail -30 >&2 || true
+ exit 1
+fi
+
+###############################################################################
+# CRUD
+###############################################################################
+
+step "ListTables (baseline)"
+ddb ListTables '{}'
+
+step "CreateTable (streams enabled, NEW_AND_OLD_IMAGES)"
+ddb CreateTable "$(cat <<EOF
+{
+ "TableName": "$TBL",
+ "AttributeDefinitions": [{"AttributeName":"id","AttributeType":"S"}],
+ "KeySchema": [{"AttributeName":"id","KeyType":"HASH"}],
+ "BillingMode": "PAY_PER_REQUEST",
+ "StreamSpecification": {"StreamEnabled": true, "StreamViewType":
"NEW_AND_OLD_IMAGES"}
+}
+EOF
+)"
+assert_eq "TableStatus" "$(jq -r '.TableDescription.TableStatus'
<<<"$LAST_RESP")" "ACTIVE"
+
+step "DescribeTable"
+ddb DescribeTable "{\"TableName\":\"$TBL\"}"
+assert_eq "StreamSpecification.StreamEnabled" "$(jq -r
'.Table.StreamSpecification.StreamEnabled' <<<"$LAST_RESP")" "true"
+assert_eq "StreamSpecification.StreamViewType" "$(jq -r
'.Table.StreamSpecification.StreamViewType' <<<"$LAST_RESP")"
"NEW_AND_OLD_IMAGES"
+assert_nonempty "LatestStreamArn" "$(jq -r '.Table.LatestStreamArn // empty'
<<<"$LAST_RESP")"
+
+step "PutItem id=a (Alice, score=10)"
+ddb PutItem
"{\"TableName\":\"$TBL\",\"Item\":{\"id\":{\"S\":\"a\"},\"name\":{\"S\":\"Alice\"},\"score\":{\"N\":\"10\"}}}"
+
+step "UpdateItem id=a (SET score=20, bonus=5, ReturnValues=ALL_NEW)"
+ddb UpdateItem "$(cat <<EOF
+{
+ "TableName": "$TBL",
+ "Key": {"id": {"S": "a"}},
+ "UpdateExpression": "SET score = :s, bonus = :b",
+ "ExpressionAttributeValues": {":s": {"N":"20"}, ":b": {"N":"5"}},
+ "ReturnValues": "ALL_NEW"
+}
+EOF
+)"
+assert_eq "Attributes.score.N" "$(jq -r '.Attributes.score.N'
<<<"$LAST_RESP")" "20"
+assert_eq "Attributes.bonus.N" "$(jq -r '.Attributes.bonus.N'
<<<"$LAST_RESP")" "5"
+
+step "GetItem id=a"
+ddb GetItem "{\"TableName\":\"$TBL\",\"Key\":{\"id\":{\"S\":\"a\"}}}"
+assert_eq "Item.name.S" "$(jq -r '.Item.name.S' <<<"$LAST_RESP")" "Alice"
+assert_eq "Item.score.N" "$(jq -r '.Item.score.N' <<<"$LAST_RESP")" "20"
+assert_eq "Item.bonus.N" "$(jq -r '.Item.bonus.N' <<<"$LAST_RESP")" "5"
+
+step "PutItem id=b (Bob, score=7)"
+ddb PutItem
"{\"TableName\":\"$TBL\",\"Item\":{\"id\":{\"S\":\"b\"},\"name\":{\"S\":\"Bob\"},\"score\":{\"N\":\"7\"}}}"
+
+step "Scan"
+ddb Scan "{\"TableName\":\"$TBL\"}"
+assert_eq "Count" "$(jq -r '.Count' <<<"$LAST_RESP")" "2"
+
+step "Query id = 'a'"
+ddb Query "$(cat <<EOF
+{
+ "TableName": "$TBL",
+ "KeyConditionExpression": "id = :v",
+ "ExpressionAttributeValues": {":v": {"S": "a"}}
+}
+EOF
+)"
+assert_eq "Count" "$(jq -r '.Count' <<<"$LAST_RESP")" "1"
+assert_eq "Items[0].name.S" "$(jq -r '.Items[0].name.S' <<<"$LAST_RESP")"
"Alice"
+
+step "DeleteItem id=b"
+ddb DeleteItem "{\"TableName\":\"$TBL\",\"Key\":{\"id\":{\"S\":\"b\"}}}"
+
+step "Scan (after delete)"
+ddb Scan "{\"TableName\":\"$TBL\"}"
+assert_eq "Count" "$(jq -r '.Count' <<<"$LAST_RESP")" "1"
+
+step "BatchWriteItem (put id=c, id=d; delete id=a)"
+ddb BatchWriteItem "$(cat <<EOF
+{
+ "RequestItems": {
+ "$TBL": [
+ {"PutRequest": {"Item": {"id": {"S": "c"}, "name": {"S": "Carol"}}}},
+ {"PutRequest": {"Item": {"id": {"S": "d"}, "name": {"S": "Dan"}}}},
+ {"DeleteRequest": {"Key": {"id": {"S": "a"}}}}
+ ]
+ }
+}
+EOF
+)"
+assert_eq "UnprocessedItems (size)" "$(jq -r '.UnprocessedItems // {} |
length' <<<"$LAST_RESP")" "0"
+
+step "Scan (drain all pages after batch)"
+total=0; iter_key=""
+for page in $(seq 1 10); do
+ if [[ -z "$iter_key" ]]; then
+ ddb Scan "{\"TableName\":\"$TBL\"}"
+ else
+ ddb Scan "{\"TableName\":\"$TBL\",\"ExclusiveStartKey\":$iter_key}"
+ fi
+ n=$(jq -r '.Count // 0' <<<"$LAST_RESP")
+ total=$((total + n))
+ printf " ${DIM}page %d: %d item(s)${RESET}\n" "$page" "$n"
+ iter_key=$(jq -c '.LastEvaluatedKey // empty' <<<"$LAST_RESP")
+ [[ -z "$iter_key" ]] && break
+done
+assert_eq "total Items across all pages" "$total" "2"
+
+###############################################################################
+# Streams API
+###############################################################################
+
+step "ListStreams"
+ddb ListStreams "{\"TableName\":\"$TBL\"}"
+listed_arn=$(jq -r ".Streams[]? | select(.TableName == \"$TBL\") | .StreamArn"
<<<"$LAST_RESP" | head -n1)
+assert_nonempty "StreamArn for $TBL" "$listed_arn"
+
+step "DescribeStream (poll until StreamStatus==ENABLED, max 30s)"
+shard_id=""; status=""
+for attempt in $(seq 1 15); do
+ ddb DescribeStream "{\"StreamArn\":\"$listed_arn\"}"
+ status=$(jq -r '.StreamDescription.StreamStatus // empty' <<<"$LAST_RESP")
+ if [[ "$status" == "ENABLED" ]]; then
+ shard_id=$(jq -r '.StreamDescription.Shards[0].ShardId // empty'
<<<"$LAST_RESP")
+ break
+ fi
+ printf " ${DIM}attempt %d: status=%s${RESET}\n" "$attempt" "$status"
+ sleep 2
+done
+assert_eq "StreamDescription.StreamStatus" "$status" "ENABLED"
+assert_nonempty "StreamDescription.Shards[0].ShardId" "$shard_id"
+
+step "GetShardIterator (TRIM_HORIZON)"
+ddb GetShardIterator "$(cat <<EOF
+{
+ "StreamArn": "$listed_arn",
+ "ShardId": "$shard_id",
+ "ShardIteratorType": "TRIM_HORIZON"
+}
+EOF
+)"
+iter=$(jq -r '.ShardIterator // empty' <<<"$LAST_RESP")
+assert_nonempty "ShardIterator" "$iter"
+
+step "GetRecords (drain pages until empty)"
+total=0; pages=0; seen_keys=""; advanced=false
+while [[ -n "$iter" && "$iter" != "null" && $pages -lt 10 ]]; do
+ pages=$((pages + 1))
+ ddb GetRecords "{\"ShardIterator\":\"$iter\"}"
+ n=$(jq -r '.Records | length' <<<"$LAST_RESP")
+ keys=$(jq -r '.Records[]?.dynamodb.Keys.id.S' <<<"$LAST_RESP" | tr '\n'
',' | sed 's/,$//')
+ [[ -n "$keys" ]] && seen_keys="${seen_keys:+$seen_keys,}$keys"
+ total=$((total + n))
+ printf " ${DIM}page %d: %d record(s) keys=[%s]${RESET}\n" "$pages" "$n"
"$keys"
+ next=$(jq -r '.NextShardIterator // empty' <<<"$LAST_RESP")
+ # Iterator stuck-at-position guard: if NextShardIterator equals the
+ # current one across consecutive empty pages, the stream isn't actually
+ # being consumed, so further pages would just spin.
+ if [[ -z "$next" || "$next" == "null" || "$next" == "$iter" ]]; then
+ break
+ fi
+ advanced=true
+ iter="$next"
+ [[ $n -eq 0 ]] && break
+done
+printf " ${DIM}total records: %d keys=[%s]${RESET}\n" "$total" "$seen_keys"
+# Expect >= 4 mutations (PutItem-a, UpdateItem-a, PutItem-b, DeleteItem-b) plus
+# 3 from the batch (delete-a, put-c, put-d) -- 7 total in steady state.
+assert_ge "stream record count" "$total" "4"
+if $advanced; then
+ printf " ${GREEN}✓${RESET} ShardIterator advanced across pages\n"
+ PASS=$((PASS + 1))
+else
+ printf " ${RED}✗${RESET} ShardIterator never advanced; stream appears
stuck\n" >&2
+ exit 1
+fi
+
+step "DeleteTable (cleanup)"
+ddb DeleteTable "{\"TableName\":\"$TBL\"}"
+
+###############################################################################
+# Summary
+###############################################################################
+
+banner "Result: ${PASS} checks PASSED across ${TOTAL} API calls"