timoninmaxim commented on code in PR #311: URL: https://github.com/apache/ignite-extensions/pull/311#discussion_r2245232910
########## modules/cdc-ext/src/main/java/org/apache/ignite/cdc/postgresql/IgniteToPostgreSqlCdcApplier.java: ########## @@ -0,0 +1,670 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgresql; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Types; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import javax.sql.DataSource; +import org.apache.ignite.IgniteException; +import org.apache.ignite.IgniteLogger; +import org.apache.ignite.binary.BinaryObject; +import org.apache.ignite.cache.CacheEntryVersion; +import org.apache.ignite.cache.QueryEntity; +import org.apache.ignite.cdc.CdcCacheEvent; +import org.apache.ignite.cdc.CdcEvent; +import org.apache.ignite.internal.util.typedef.F; + +import static org.apache.ignite.internal.processors.cache.GridCacheUtils.UNDEFINED_CACHE_ID; + +/** */ +public class IgniteToPostgreSqlCdcApplier { + /** */ + public static final String DFLT_SQL_TYPE = "OTHER"; + + /** */ + public static final Map<String, String> JAVA_TO_SQL_TYPES; + + /** */ + public static final Set<String> SQL_TYPES_WITH_PRECISION_ONLY; + + /** */ + public static final Set<String> SQL_TYPES_WITH_PRECISION_AND_SCALE; + + static { + Map<String, String> javaToSqlTypes = new HashMap<>(); + + javaToSqlTypes.put("java.lang.String", "VARCHAR"); + javaToSqlTypes.put("java.lang.Integer", "INT"); + javaToSqlTypes.put("int", "INT"); + javaToSqlTypes.put("java.lang.Long", "BIGINT"); + javaToSqlTypes.put("long", "BIGINT"); + javaToSqlTypes.put("java.lang.Boolean", "BOOLEAN"); + javaToSqlTypes.put("boolean", "BOOLEAN"); + javaToSqlTypes.put("java.lang.Double", "DOUBLE PRECISION"); + javaToSqlTypes.put("double", "DOUBLE PRECISION"); + javaToSqlTypes.put("java.lang.Float", "REAL"); + javaToSqlTypes.put("float", "REAL"); + javaToSqlTypes.put("java.math.BigDecimal", "DECIMAL"); + javaToSqlTypes.put("java.lang.Short", "SMALLINT"); + javaToSqlTypes.put("short", "SMALLINT"); + javaToSqlTypes.put("java.lang.Byte", "SMALLINT"); + javaToSqlTypes.put("byte", "SMALLINT"); + javaToSqlTypes.put("java.util.UUID", "UUID"); + javaToSqlTypes.put("[B", "BYTEA"); + javaToSqlTypes.put("java.lang.Object", "OTHER"); + + JAVA_TO_SQL_TYPES = Collections.unmodifiableMap(javaToSqlTypes); + + Set<String> sqlTypesWithPrecisionOnly = new HashSet<>(); + + sqlTypesWithPrecisionOnly.add("VARCHAR"); + sqlTypesWithPrecisionOnly.add("DOUBLE PRECISION"); + + SQL_TYPES_WITH_PRECISION_ONLY = Collections.unmodifiableSet(sqlTypesWithPrecisionOnly); + + Set<String> sqlTypesWithPrecisionAndScale = new HashSet<>(); + + sqlTypesWithPrecisionAndScale.add("DECIMAL"); + sqlTypesWithPrecisionAndScale.add("REAL"); + + SQL_TYPES_WITH_PRECISION_AND_SCALE = Collections.unmodifiableSet(sqlTypesWithPrecisionAndScale); + } + + /** */ + private static final boolean DFLT_AUTO_COMMIT = false; + + /** */ + private final DataSource dataSrc; + + /** */ + private final long batchSize; + + /** */ + private final IgniteLogger log; + + /** */ + private final Map<Integer, String> cacheIdToUpsertQry = new HashMap<>(); + + /** */ + private final Map<Integer, String> cacheIdToDeleteQry = new HashMap<>(); + + /** */ + private final Map<Integer, Set<String>> cacheIdToPrimaryKeys = new HashMap<>(); + + /** */ + private final Map<Integer, Set<String>> cacheIdToFields = new HashMap<>(); + + /** */ + private final Set<Object> curKeys = new HashSet<>(); + + /** + * @param dataSrc {@link DataSource} - connection pool to PostgreSql + * @param batchSize the number of CDC events to include in a single batch + * @param log the {@link IgniteLogger} instance used for logging CDC processing events + */ + public IgniteToPostgreSqlCdcApplier( + DataSource dataSrc, + long batchSize, + IgniteLogger log + ) { + this.dataSrc = dataSrc; + this.batchSize = batchSize; + this.log = log; + } + + /** + * @param evts an {@link Iterator} of {@link CdcEvent} objects to be applied + * @return the total number of events successfully batched and executed + */ + public long applyEvents(Iterator<CdcEvent> evts) { + try (Connection conn = dataSrc.getConnection()) { + conn.setAutoCommit(DFLT_AUTO_COMMIT); Review Comment: Use `false` here and let's add a comment here about transaction behavior (that setting to `true` doesn't make every SQL query commit, it commits full batch in any case. We decided to commit it by self for more control). ########## modules/cdc-ext/src/main/java/org/apache/ignite/cdc/postgresql/IgniteToPostgreSqlCdcConsumer.java: ########## @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgresql; + +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; +import java.util.stream.Collectors; +import javax.sql.DataSource; +import org.apache.ignite.IgniteLogger; +import org.apache.ignite.binary.BinaryType; +import org.apache.ignite.cdc.CdcCacheEvent; +import org.apache.ignite.cdc.CdcConsumer; +import org.apache.ignite.cdc.CdcEvent; +import org.apache.ignite.cdc.TypeMapping; +import org.apache.ignite.internal.processors.metric.MetricRegistryImpl; +import org.apache.ignite.internal.processors.metric.impl.AtomicLongMetric; +import org.apache.ignite.internal.util.typedef.F; +import org.apache.ignite.internal.util.typedef.internal.A; +import org.apache.ignite.internal.util.typedef.internal.CU; +import org.apache.ignite.metric.MetricRegistry; +import org.apache.ignite.resources.LoggerResource; + +/** + * This class represents a consumer component that replicates cache changes from Apache Ignite to PostgreSQL using + * Change Data Capture (CDC) mechanism. It applies events to PostgreSQL via batch-prepared SQL statements, ensuring + * efficient handling of large volumes of updates. + * + * <p>Additionally, it provides methods for initializing connections, managing transactions, and performing atomic batches + * of writes.</p> + */ +public class IgniteToPostgreSqlCdcConsumer implements CdcConsumer { + /** */ + public static final String EVTS_SENT_CNT = "EventsCount"; + + /** */ + public static final String EVTS_SENT_CNT_DESC = "Count of events applied to PostgreSQL"; + + /** */ + public static final String LAST_EVT_SENT_TIME = "LastEventTime"; + + /** */ + public static final String LAST_EVT_SENT_TIME_DESC = "Timestamp of last applied event to PostgreSQL"; + + /** */ + private static final boolean DFLT_IS_ONLY_PRIMARY = true; + + /** */ + private static final long DFLT_BATCH_SIZE = 1024; + + /** */ + private static final boolean DFLT_CREATE_TABLES = false; + + /** */ + private DataSource dataSrc; + + /** Collection of cache names which will be replicated to PostgreSQL. */ + private Collection<String> caches; + + /** */ + private boolean onlyPrimary = DFLT_IS_ONLY_PRIMARY; + + /** */ + private long batchSize = DFLT_BATCH_SIZE; + + /** */ + private boolean createTables = DFLT_CREATE_TABLES; + + /** Log. */ + @LoggerResource + private IgniteLogger log; + + /** Cache IDs. */ + private Set<Integer> cachesIds; + + /** Applier instance responsible for applying individual CDC events to PostgreSQL. */ + private IgniteToPostgreSqlCdcApplier applier; + + /** Count of events applied to PostgreSQL. */ + private AtomicLongMetric evtsCnt; + + /** Timestamp of last applied batch to PostgreSQL. */ + private AtomicLongMetric lastEvtTs; + + /** {@inheritDoc} */ + @Override public void start(MetricRegistry reg) { + A.notNull(dataSrc, "dataSource"); + A.notEmpty(caches, "caches"); + A.ensure(batchSize > 0, "batchSize"); + + cachesIds = caches.stream() + .map(CU::cacheId) + .collect(Collectors.toSet()); + + applier = new IgniteToPostgreSqlCdcApplier(dataSrc, batchSize, log); + + MetricRegistryImpl mreg = (MetricRegistryImpl)reg; + + this.evtsCnt = mreg.longMetric(EVTS_SENT_CNT, EVTS_SENT_CNT_DESC); + this.lastEvtTs = mreg.longMetric(LAST_EVT_SENT_TIME, LAST_EVT_SENT_TIME_DESC); + + if (log.isInfoEnabled()) + log.info("CDC Ignite to PostgreSQL start-up [cacheIds=" + cachesIds + ']'); + } + + /** {@inheritDoc} */ + @Override public boolean onEvents(Iterator<CdcEvent> events) { + Iterator<CdcEvent> filtered = F.iterator( + events, + F.identity(), + true, + evt -> !onlyPrimary || evt.primary(), + evt -> cachesIds.contains(evt.cacheId())); + + long evtsSent = applier.applyEvents(filtered); + + if (evtsSent > 0) { + evtsCnt.add(evtsSent); + lastEvtTs.value(System.currentTimeMillis()); + + if (log.isInfoEnabled()) + log.info("Events applied [evtsApplied=" + evtsCnt.value() + ']'); + } + + return true; + } + + /** {@inheritDoc} */ + @Override public void onTypes(Iterator<BinaryType> types) { + types.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void onMappings(Iterator<TypeMapping> mappings) { + mappings.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void onCacheChange(Iterator<CdcCacheEvent> cacheEvents) { + Iterator<CdcCacheEvent> filtered = F.iterator( + cacheEvents, + F.identity(), + true, + evt -> cachesIds.contains(evt.cacheId())); + + long tablesCreated = applier.applyCacheEvents(filtered, createTables); + + if (tablesCreated > 0 && log.isInfoEnabled()) + log.info("Cache changes applied [tablesCreatedCnt=" + tablesCreated + ']'); + } + + /** {@inheritDoc} */ + @Override public void onCacheDestroy(Iterator<Integer> caches) { + caches.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void stop() { + + } + + /** + * Sets the datasource configuration for connecting to the PostgreSQL database. + * + * @param dataSrc Configured data source. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setDataSource(DataSource dataSrc) { + this.dataSrc = dataSrc; + + return this; + } + + /** + * Sets cache names to replicate. + * + * @param caches Cache names. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setCaches(Set<String> caches) { + this.caches = caches; Review Comment: Let's use `new HashSet()` here. The `caches` collections is provided by user, then we can't guarantee that it won't be changed in any moment. ########## modules/cdc-ext/src/main/java/org/apache/ignite/cdc/postgresql/IgniteToPostgreSqlCdcConsumer.java: ########## @@ -0,0 +1,268 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgresql; + +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; +import java.util.stream.Collectors; +import javax.sql.DataSource; +import org.apache.ignite.IgniteLogger; +import org.apache.ignite.binary.BinaryType; +import org.apache.ignite.cdc.CdcCacheEvent; +import org.apache.ignite.cdc.CdcConsumer; +import org.apache.ignite.cdc.CdcEvent; +import org.apache.ignite.cdc.TypeMapping; +import org.apache.ignite.internal.processors.metric.MetricRegistryImpl; +import org.apache.ignite.internal.processors.metric.impl.AtomicLongMetric; +import org.apache.ignite.internal.util.typedef.F; +import org.apache.ignite.internal.util.typedef.internal.A; +import org.apache.ignite.internal.util.typedef.internal.CU; +import org.apache.ignite.metric.MetricRegistry; +import org.apache.ignite.resources.LoggerResource; + +/** + * This class represents a consumer component that replicates cache changes from Apache Ignite to PostgreSQL using + * Change Data Capture (CDC) mechanism. It applies events to PostgreSQL via batch-prepared SQL statements, ensuring + * efficient handling of large volumes of updates. + * + * <p>Additionally, it provides methods for initializing connections, managing transactions, and performing atomic batches + * of writes.</p> + */ +public class IgniteToPostgreSqlCdcConsumer implements CdcConsumer { + /** */ + public static final String EVTS_SENT_CNT = "EventsCount"; + + /** */ + public static final String EVTS_SENT_CNT_DESC = "Count of events applied to PostgreSQL"; + + /** */ + public static final String LAST_EVT_SENT_TIME = "LastEventTime"; + + /** */ + public static final String LAST_EVT_SENT_TIME_DESC = "Timestamp of last applied event to PostgreSQL"; + + /** */ + private static final boolean DFLT_IS_ONLY_PRIMARY = true; + + /** */ + private static final long DFLT_BATCH_SIZE = 1024; + + /** */ + private static final boolean DFLT_CREATE_TABLES = false; + + /** */ + private static final boolean DFLT_AUTO_COMMIT = false; + + /** */ + private DataSource dataSrc; + + /** Collection of cache names which will be replicated to PostgreSQL. */ + private Collection<String> caches; + + /** */ + private boolean onlyPrimary = DFLT_IS_ONLY_PRIMARY; + + /** */ + private long maxBatchSize = DFLT_BATCH_SIZE; + + /** */ + private boolean createTables = DFLT_CREATE_TABLES; + + /** */ + private boolean autoCommit = DFLT_AUTO_COMMIT; + + /** Log. */ + @LoggerResource + private IgniteLogger log; + + /** Cache IDs. */ + private Set<Integer> cachesIds; + + /** Applier instance responsible for applying individual CDC events to PostgreSQL. */ + private IgniteToPostgreSqlCdcApplier applier; + + /** Count of events applied to PostgreSQL. */ + private AtomicLongMetric evtsCnt; + + /** Timestamp of last applied batch to PostgreSQL. */ + private AtomicLongMetric lastEvtTs; + + /** {@inheritDoc} */ + @Override public void start(MetricRegistry reg) { + A.notNull(dataSrc, "dataSource"); + A.notEmpty(caches, "caches"); + + cachesIds = caches.stream() + .map(CU::cacheId) + .collect(Collectors.toSet()); + + applier = new IgniteToPostgreSqlCdcApplier(dataSrc, autoCommit, maxBatchSize, log); + + MetricRegistryImpl mreg = (MetricRegistryImpl)reg; + + this.evtsCnt = mreg.longMetric(EVTS_SENT_CNT, EVTS_SENT_CNT_DESC); + this.lastEvtTs = mreg.longMetric(LAST_EVT_SENT_TIME, LAST_EVT_SENT_TIME_DESC); + + if (log.isInfoEnabled()) + log.info("CDC Ignite to PostgreSQL start-up [cacheIds=" + cachesIds + ']'); + } + + /** {@inheritDoc} */ + @Override public boolean onEvents(Iterator<CdcEvent> events) { + Iterator<CdcEvent> filtered = F.iterator( + events, + F.identity(), + true, + evt -> !onlyPrimary || evt.primary(), + evt -> cachesIds.contains(evt.cacheId())); + + long evtsSent = applier.applyEvents(filtered); + + if (evtsSent > 0) { + evtsCnt.add(evtsSent); + lastEvtTs.value(System.currentTimeMillis()); + + if (log.isInfoEnabled()) + log.info("Events applied [evtsApplied=" + evtsCnt.value() + ']'); + } + + return true; + } + + /** {@inheritDoc} */ + @Override public void onTypes(Iterator<BinaryType> types) { + types.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void onMappings(Iterator<TypeMapping> mappings) { + mappings.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void onCacheChange(Iterator<CdcCacheEvent> cacheEvents) { + Iterator<CdcCacheEvent> filtered = F.iterator( + cacheEvents, + F.identity(), + true, + evt -> cachesIds.contains(evt.cacheId())); + + long tablesCreated = applier.applyCacheEvents(filtered, createTables); + + if (tablesCreated > 0 && log.isInfoEnabled()) + log.info("Cache changes applied [tablesCreatedCnt=" + tablesCreated + ']'); + } + + /** {@inheritDoc} */ + @Override public void onCacheDestroy(Iterator<Integer> caches) { + caches.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void stop() { + + } + + /** + * Sets the datasource configuration for connecting to the PostgreSQL database. + * + * @param dataSrc Configured data source. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setDataSource(DataSource dataSrc) { + this.dataSrc = dataSrc; + + return this; + } + + /** + * Sets cache names to replicate. + * + * @param caches Cache names. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setCaches(Set<String> caches) { + this.caches = caches; + + return this; + } + + /** + * Enables/disables filtering to accept only primary-node originated events. + * + * @param onlyPrimary True to restrict replication to primary events only. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setOnlyPrimary(boolean onlyPrimary) { + this.onlyPrimary = onlyPrimary; + + return this; + } + + /** + * Sets the maximum batch size that will be submitted to PostgreSQL. + * <p> + * This setting controls how many statements are sent in a single {@link java.sql.PreparedStatement#executeBatch()} call. + * <p> + * Commit behavior depends on the {@code autoCommit} setting: + * <ul> + * <li>If {@code autoCommit} is {@code true}, each batch will be committed immediately after submission.</li> Review Comment: We don't expose autoCommit to user. No need document it. ########## modules/cdc-ext/src/main/java/org/apache/ignite/cdc/postgresql/IgniteToPostgreSqlCdcApplier.java: ########## @@ -0,0 +1,670 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgresql; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Types; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import javax.sql.DataSource; +import org.apache.ignite.IgniteException; +import org.apache.ignite.IgniteLogger; +import org.apache.ignite.binary.BinaryObject; +import org.apache.ignite.cache.CacheEntryVersion; +import org.apache.ignite.cache.QueryEntity; +import org.apache.ignite.cdc.CdcCacheEvent; +import org.apache.ignite.cdc.CdcEvent; +import org.apache.ignite.internal.util.typedef.F; + +import static org.apache.ignite.internal.processors.cache.GridCacheUtils.UNDEFINED_CACHE_ID; + +/** */ +public class IgniteToPostgreSqlCdcApplier { + /** */ + public static final String DFLT_SQL_TYPE = "OTHER"; + + /** */ + public static final Map<String, String> JAVA_TO_SQL_TYPES; + + /** */ + public static final Set<String> SQL_TYPES_WITH_PRECISION_ONLY; + + /** */ + public static final Set<String> SQL_TYPES_WITH_PRECISION_AND_SCALE; + + static { + Map<String, String> javaToSqlTypes = new HashMap<>(); + + javaToSqlTypes.put("java.lang.String", "VARCHAR"); + javaToSqlTypes.put("java.lang.Integer", "INT"); + javaToSqlTypes.put("int", "INT"); + javaToSqlTypes.put("java.lang.Long", "BIGINT"); + javaToSqlTypes.put("long", "BIGINT"); + javaToSqlTypes.put("java.lang.Boolean", "BOOLEAN"); + javaToSqlTypes.put("boolean", "BOOLEAN"); + javaToSqlTypes.put("java.lang.Double", "DOUBLE PRECISION"); + javaToSqlTypes.put("double", "DOUBLE PRECISION"); + javaToSqlTypes.put("java.lang.Float", "REAL"); + javaToSqlTypes.put("float", "REAL"); + javaToSqlTypes.put("java.math.BigDecimal", "DECIMAL"); + javaToSqlTypes.put("java.lang.Short", "SMALLINT"); + javaToSqlTypes.put("short", "SMALLINT"); + javaToSqlTypes.put("java.lang.Byte", "SMALLINT"); + javaToSqlTypes.put("byte", "SMALLINT"); + javaToSqlTypes.put("java.util.UUID", "UUID"); + javaToSqlTypes.put("[B", "BYTEA"); + javaToSqlTypes.put("java.lang.Object", "OTHER"); + + JAVA_TO_SQL_TYPES = Collections.unmodifiableMap(javaToSqlTypes); + + Set<String> sqlTypesWithPrecisionOnly = new HashSet<>(); + + sqlTypesWithPrecisionOnly.add("VARCHAR"); + sqlTypesWithPrecisionOnly.add("DOUBLE PRECISION"); + + SQL_TYPES_WITH_PRECISION_ONLY = Collections.unmodifiableSet(sqlTypesWithPrecisionOnly); + + Set<String> sqlTypesWithPrecisionAndScale = new HashSet<>(); + + sqlTypesWithPrecisionAndScale.add("DECIMAL"); + sqlTypesWithPrecisionAndScale.add("REAL"); + + SQL_TYPES_WITH_PRECISION_AND_SCALE = Collections.unmodifiableSet(sqlTypesWithPrecisionAndScale); + } + + /** */ + private static final boolean DFLT_AUTO_COMMIT = false; + + /** */ + private final DataSource dataSrc; + + /** */ + private final long batchSize; + + /** */ + private final IgniteLogger log; + + /** */ + private final Map<Integer, String> cacheIdToUpsertQry = new HashMap<>(); + + /** */ + private final Map<Integer, String> cacheIdToDeleteQry = new HashMap<>(); + + /** */ + private final Map<Integer, Set<String>> cacheIdToPrimaryKeys = new HashMap<>(); + + /** */ + private final Map<Integer, Set<String>> cacheIdToFields = new HashMap<>(); + + /** */ + private final Set<Object> curKeys = new HashSet<>(); + + /** + * @param dataSrc {@link DataSource} - connection pool to PostgreSql + * @param batchSize the number of CDC events to include in a single batch + * @param log the {@link IgniteLogger} instance used for logging CDC processing events + */ + public IgniteToPostgreSqlCdcApplier( + DataSource dataSrc, + long batchSize, + IgniteLogger log + ) { + this.dataSrc = dataSrc; + this.batchSize = batchSize; + this.log = log; + } + + /** + * @param evts an {@link Iterator} of {@link CdcEvent} objects to be applied + * @return the total number of events successfully batched and executed + */ + public long applyEvents(Iterator<CdcEvent> evts) { + try (Connection conn = dataSrc.getConnection()) { + conn.setAutoCommit(DFLT_AUTO_COMMIT); + + long res = applyEvents(conn, evts); + + conn.commit(); + + return res; + } + catch (Throwable e) { + log.error(e.getMessage(), e); + + throw new IgniteException("CDC failure", e); + } + } + + /** + * @param conn connection to PostgreSql + * @param evts an {@link Iterator} of {@link CdcEvent} objects to be applied + * @return the total number of events successfully batched and executed + */ + private long applyEvents(Connection conn, Iterator<CdcEvent> evts) throws SQLException { + long evtsApplied = 0; + + int currCacheId = UNDEFINED_CACHE_ID; + boolean prevOpIsDelete = false; + + PreparedStatement curPrepStmt = null; + CdcEvent evt; + + while (evts.hasNext()) { + evt = evts.next(); + + if (log.isDebugEnabled()) + log.debug("Event received [evt=" + evt + ']'); + + if (currCacheId != evt.cacheId() || prevOpIsDelete ^ (evt.value() == null)) { + if (curPrepStmt != null) + evtsApplied += executeBatch(conn, curPrepStmt); + + currCacheId = evt.cacheId(); + prevOpIsDelete = evt.value() == null; + + curPrepStmt = prepareStatement(conn, evt); + } + + if (curKeys.size() >= batchSize || curKeys.contains(evt.key())) + evtsApplied += executeBatch(conn, curPrepStmt); + + addEvent(curPrepStmt, evt); + } + + if (!curKeys.isEmpty()) + evtsApplied += executeBatch(conn, curPrepStmt); + + return evtsApplied; + } + + /** + * @param conn connection to PostgreSql + * @param curPrepStmt {@link PreparedStatement} + * @return the total number of batches successfully executed. One CdcEvent - one batch. + */ + private int executeBatch(Connection conn, PreparedStatement curPrepStmt) { + try { + curKeys.clear(); + + if (log.isDebugEnabled()) + log.debug("Applying batch " + curPrepStmt.toString()); + + if (!curPrepStmt.isClosed()) { + int batchSize = curPrepStmt.executeBatch().length; + + // It's better to use autoCommit = false and call commit() manually for improved performance and + // clearer transaction boundaries + conn.commit(); + + return batchSize; + } + + throw new IgniteException("Tried to execute on closed prepared statement!"); + } + catch (SQLException e) { + log.error(e.getMessage(), e); + + throw new IgniteException(e); + } + } + + /** + * @param conn connection to PostgreSql + * @param evt {@link CdcEvent} + * @return relevant {@link PreparedStatement} + */ + private PreparedStatement prepareStatement(Connection conn, CdcEvent evt) { + String sqlQry; + + if (evt.value() == null) + sqlQry = cacheIdToDeleteQry.get(evt.cacheId()); + else + sqlQry = cacheIdToUpsertQry.get(evt.cacheId()); + + if (sqlQry == null) + throw new IgniteException("No SQL query is found for cacheId=" + evt.cacheId()); + + if (log.isDebugEnabled()) + log.debug("Statement updated [cacheId=" + evt.cacheId() + ", sqlQry=" + sqlQry + ']'); + + try { + return conn.prepareStatement(sqlQry); + } + catch (SQLException e) { + log.error(e.getMessage(), e); + + throw new IgniteException(e); + } + } + + /** + * @param curPrepStmt current {@link PreparedStatement} + * @param evt {@link CdcEvent} + */ + private void addEvent(PreparedStatement curPrepStmt, CdcEvent evt) { + try { + Iterator<String> itFields = evt.value() == null ? + cacheIdToPrimaryKeys.get(evt.cacheId()).iterator() : + cacheIdToFields.get(evt.cacheId()).iterator(); + + String field; + + BinaryObject keyObj = (evt.key() instanceof BinaryObject) ? (BinaryObject)evt.key() : null; + BinaryObject valObj = (evt.value() instanceof BinaryObject) ? (BinaryObject)evt.value() : null; + + int idx = 1; + Object obj; + + while (itFields.hasNext()) { + field = itFields.next(); + + if (cacheIdToPrimaryKeys.get(evt.cacheId()).contains(field)) + obj = keyObj != null ? keyObj.field(field) : evt.key(); + else + obj = valObj != null ? valObj.field(field) : evt.value(); + + addObject(curPrepStmt, idx, obj); + + idx++; + } + + if (evt.value() != null) + curPrepStmt.setBytes(idx, encodeVersion(evt.version())); + + curKeys.add(evt.key()); + + curPrepStmt.addBatch(); + } + catch (Throwable e) { + log.error(e.getMessage(), e); + + throw new IgniteException(e); + } + } + + /** + * Sets a value in the PreparedStatement at the given index using the appropriate setter + * based on the runtime type of the object. + * @param curPrepStmt {@link PreparedStatement} + * @param idx value index in {@link PreparedStatement} + * @param obj value + */ + private void addObject(PreparedStatement curPrepStmt, int idx, Object obj) throws SQLException { + if (obj == null) { + curPrepStmt.setObject(idx, null); + + return; + } + + if (obj instanceof String) + curPrepStmt.setString(idx, (String)obj); + else if (obj instanceof Integer) + curPrepStmt.setInt(idx, (Integer)obj); + else if (obj instanceof Long) + curPrepStmt.setLong(idx, (Long)obj); + else if (obj instanceof Short) + curPrepStmt.setShort(idx, (Short)obj); + else if (obj instanceof Byte) + curPrepStmt.setByte(idx, (Byte)obj); + else if (obj instanceof Boolean) + curPrepStmt.setBoolean(idx, (Boolean)obj); + else if (obj instanceof Float) + curPrepStmt.setFloat(idx, (Float)obj); + else if (obj instanceof Double) + curPrepStmt.setDouble(idx, (Double)obj); + else if (obj instanceof BigDecimal) + curPrepStmt.setBigDecimal(idx, (BigDecimal)obj); + else if (obj instanceof UUID) + curPrepStmt.setObject(idx, obj, Types.OTHER); // PostgreSQL expects UUID as OTHER + else if (obj instanceof byte[]) + curPrepStmt.setBytes(idx, (byte[])obj); + else + curPrepStmt.setObject(idx, obj); + } + + /** + * @param evts an {@link Iterator} of {@link CdcCacheEvent} objects to apply + * @param createTables tables creation flag. If true - attempt to create tables will be made. + * @return Number of applied events. + */ + public long applyCacheEvents(Iterator<CdcCacheEvent> evts, boolean createTables) { + CdcCacheEvent evt; + QueryEntity entity; + + long cnt = 0; + + while (evts.hasNext()) { + evt = evts.next(); + + if (evt.queryEntities().size() != 1) + throw new IgniteException("There should be exactly 1 QueryEntity for cacheId: " + evt.cacheId()); + + entity = evt.queryEntities().iterator().next(); + + if (createTables) + createTableIfNotExists(entity); + + cacheIdToUpsertQry.put(evt.cacheId(), getUpsertSqlQry(entity)); + + cacheIdToDeleteQry.put(evt.cacheId(), getDeleteSqlQry(entity)); + + cacheIdToPrimaryKeys.put(evt.cacheId(), getPrimaryKeys(entity)); + + cacheIdToFields.put(evt.cacheId(), entity.getFields().keySet()); + + if (log.isInfoEnabled()) + log.info("Cache table created [tableName=" + entity.getTableName() + Review Comment: It logs even if `createTables = false` ########## modules/cdc-ext/src/main/java/org/apache/ignite/cdc/postgresql/IgniteToPostgreSqlCdcApplier.java: ########## @@ -0,0 +1,670 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgresql; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Types; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import javax.sql.DataSource; +import org.apache.ignite.IgniteException; +import org.apache.ignite.IgniteLogger; +import org.apache.ignite.binary.BinaryObject; +import org.apache.ignite.cache.CacheEntryVersion; +import org.apache.ignite.cache.QueryEntity; +import org.apache.ignite.cdc.CdcCacheEvent; +import org.apache.ignite.cdc.CdcEvent; +import org.apache.ignite.internal.util.typedef.F; + +import static org.apache.ignite.internal.processors.cache.GridCacheUtils.UNDEFINED_CACHE_ID; + +/** */ +public class IgniteToPostgreSqlCdcApplier { + /** */ + public static final String DFLT_SQL_TYPE = "OTHER"; + + /** */ + public static final Map<String, String> JAVA_TO_SQL_TYPES; + + /** */ + public static final Set<String> SQL_TYPES_WITH_PRECISION_ONLY; + + /** */ + public static final Set<String> SQL_TYPES_WITH_PRECISION_AND_SCALE; + + static { + Map<String, String> javaToSqlTypes = new HashMap<>(); + + javaToSqlTypes.put("java.lang.String", "VARCHAR"); + javaToSqlTypes.put("java.lang.Integer", "INT"); + javaToSqlTypes.put("int", "INT"); + javaToSqlTypes.put("java.lang.Long", "BIGINT"); + javaToSqlTypes.put("long", "BIGINT"); + javaToSqlTypes.put("java.lang.Boolean", "BOOLEAN"); + javaToSqlTypes.put("boolean", "BOOLEAN"); + javaToSqlTypes.put("java.lang.Double", "DOUBLE PRECISION"); + javaToSqlTypes.put("double", "DOUBLE PRECISION"); + javaToSqlTypes.put("java.lang.Float", "REAL"); + javaToSqlTypes.put("float", "REAL"); + javaToSqlTypes.put("java.math.BigDecimal", "DECIMAL"); + javaToSqlTypes.put("java.lang.Short", "SMALLINT"); + javaToSqlTypes.put("short", "SMALLINT"); + javaToSqlTypes.put("java.lang.Byte", "SMALLINT"); + javaToSqlTypes.put("byte", "SMALLINT"); + javaToSqlTypes.put("java.util.UUID", "UUID"); + javaToSqlTypes.put("[B", "BYTEA"); + javaToSqlTypes.put("java.lang.Object", "OTHER"); + + JAVA_TO_SQL_TYPES = Collections.unmodifiableMap(javaToSqlTypes); + + Set<String> sqlTypesWithPrecisionOnly = new HashSet<>(); + + sqlTypesWithPrecisionOnly.add("VARCHAR"); + sqlTypesWithPrecisionOnly.add("DOUBLE PRECISION"); + + SQL_TYPES_WITH_PRECISION_ONLY = Collections.unmodifiableSet(sqlTypesWithPrecisionOnly); + + Set<String> sqlTypesWithPrecisionAndScale = new HashSet<>(); + + sqlTypesWithPrecisionAndScale.add("DECIMAL"); + sqlTypesWithPrecisionAndScale.add("REAL"); + + SQL_TYPES_WITH_PRECISION_AND_SCALE = Collections.unmodifiableSet(sqlTypesWithPrecisionAndScale); + } + + /** */ + private static final boolean DFLT_AUTO_COMMIT = false; + + /** */ + private final DataSource dataSrc; + + /** */ + private final long batchSize; + + /** */ + private final IgniteLogger log; + + /** */ + private final Map<Integer, String> cacheIdToUpsertQry = new HashMap<>(); + + /** */ + private final Map<Integer, String> cacheIdToDeleteQry = new HashMap<>(); + + /** */ + private final Map<Integer, Set<String>> cacheIdToPrimaryKeys = new HashMap<>(); + + /** */ + private final Map<Integer, Set<String>> cacheIdToFields = new HashMap<>(); + + /** */ + private final Set<Object> curKeys = new HashSet<>(); + + /** + * @param dataSrc {@link DataSource} - connection pool to PostgreSql + * @param batchSize the number of CDC events to include in a single batch + * @param log the {@link IgniteLogger} instance used for logging CDC processing events + */ + public IgniteToPostgreSqlCdcApplier( + DataSource dataSrc, + long batchSize, + IgniteLogger log + ) { + this.dataSrc = dataSrc; + this.batchSize = batchSize; + this.log = log; + } + + /** + * @param evts an {@link Iterator} of {@link CdcEvent} objects to be applied + * @return the total number of events successfully batched and executed + */ + public long applyEvents(Iterator<CdcEvent> evts) { + try (Connection conn = dataSrc.getConnection()) { + conn.setAutoCommit(DFLT_AUTO_COMMIT); + + long res = applyEvents(conn, evts); + + conn.commit(); + + return res; + } + catch (Throwable e) { + log.error(e.getMessage(), e); + + throw new IgniteException("CDC failure", e); + } + } + + /** + * @param conn connection to PostgreSql + * @param evts an {@link Iterator} of {@link CdcEvent} objects to be applied + * @return the total number of events successfully batched and executed + */ + private long applyEvents(Connection conn, Iterator<CdcEvent> evts) throws SQLException { Review Comment: never throws SQLException ########## modules/cdc-ext/src/main/java/org/apache/ignite/cdc/postgresql/IgniteToPostgreSqlCdcApplier.java: ########## @@ -0,0 +1,670 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgresql; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Types; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import javax.sql.DataSource; +import org.apache.ignite.IgniteException; +import org.apache.ignite.IgniteLogger; +import org.apache.ignite.binary.BinaryObject; +import org.apache.ignite.cache.CacheEntryVersion; +import org.apache.ignite.cache.QueryEntity; +import org.apache.ignite.cdc.CdcCacheEvent; +import org.apache.ignite.cdc.CdcEvent; +import org.apache.ignite.internal.util.typedef.F; + +import static org.apache.ignite.internal.processors.cache.GridCacheUtils.UNDEFINED_CACHE_ID; + +/** */ +public class IgniteToPostgreSqlCdcApplier { + /** */ + public static final String DFLT_SQL_TYPE = "OTHER"; + + /** */ + public static final Map<String, String> JAVA_TO_SQL_TYPES; + + /** */ + public static final Set<String> SQL_TYPES_WITH_PRECISION_ONLY; + + /** */ + public static final Set<String> SQL_TYPES_WITH_PRECISION_AND_SCALE; + + static { + Map<String, String> javaToSqlTypes = new HashMap<>(); + + javaToSqlTypes.put("java.lang.String", "VARCHAR"); + javaToSqlTypes.put("java.lang.Integer", "INT"); + javaToSqlTypes.put("int", "INT"); + javaToSqlTypes.put("java.lang.Long", "BIGINT"); + javaToSqlTypes.put("long", "BIGINT"); + javaToSqlTypes.put("java.lang.Boolean", "BOOLEAN"); + javaToSqlTypes.put("boolean", "BOOLEAN"); + javaToSqlTypes.put("java.lang.Double", "DOUBLE PRECISION"); + javaToSqlTypes.put("double", "DOUBLE PRECISION"); + javaToSqlTypes.put("java.lang.Float", "REAL"); + javaToSqlTypes.put("float", "REAL"); + javaToSqlTypes.put("java.math.BigDecimal", "DECIMAL"); + javaToSqlTypes.put("java.lang.Short", "SMALLINT"); + javaToSqlTypes.put("short", "SMALLINT"); + javaToSqlTypes.put("java.lang.Byte", "SMALLINT"); + javaToSqlTypes.put("byte", "SMALLINT"); + javaToSqlTypes.put("java.util.UUID", "UUID"); + javaToSqlTypes.put("[B", "BYTEA"); + javaToSqlTypes.put("java.lang.Object", "OTHER"); + + JAVA_TO_SQL_TYPES = Collections.unmodifiableMap(javaToSqlTypes); + + Set<String> sqlTypesWithPrecisionOnly = new HashSet<>(); + + sqlTypesWithPrecisionOnly.add("VARCHAR"); + sqlTypesWithPrecisionOnly.add("DOUBLE PRECISION"); + + SQL_TYPES_WITH_PRECISION_ONLY = Collections.unmodifiableSet(sqlTypesWithPrecisionOnly); + + Set<String> sqlTypesWithPrecisionAndScale = new HashSet<>(); + + sqlTypesWithPrecisionAndScale.add("DECIMAL"); + sqlTypesWithPrecisionAndScale.add("REAL"); + + SQL_TYPES_WITH_PRECISION_AND_SCALE = Collections.unmodifiableSet(sqlTypesWithPrecisionAndScale); + } + + /** */ + private static final boolean DFLT_AUTO_COMMIT = false; + + /** */ + private final DataSource dataSrc; + + /** */ + private final long batchSize; + + /** */ + private final IgniteLogger log; + + /** */ + private final Map<Integer, String> cacheIdToUpsertQry = new HashMap<>(); + + /** */ + private final Map<Integer, String> cacheIdToDeleteQry = new HashMap<>(); + + /** */ + private final Map<Integer, Set<String>> cacheIdToPrimaryKeys = new HashMap<>(); + + /** */ + private final Map<Integer, Set<String>> cacheIdToFields = new HashMap<>(); + + /** */ + private final Set<Object> curKeys = new HashSet<>(); + + /** + * @param dataSrc {@link DataSource} - connection pool to PostgreSql + * @param batchSize the number of CDC events to include in a single batch + * @param log the {@link IgniteLogger} instance used for logging CDC processing events + */ + public IgniteToPostgreSqlCdcApplier( + DataSource dataSrc, + long batchSize, + IgniteLogger log + ) { + this.dataSrc = dataSrc; + this.batchSize = batchSize; + this.log = log; + } + + /** + * @param evts an {@link Iterator} of {@link CdcEvent} objects to be applied + * @return the total number of events successfully batched and executed + */ + public long applyEvents(Iterator<CdcEvent> evts) { + try (Connection conn = dataSrc.getConnection()) { + conn.setAutoCommit(DFLT_AUTO_COMMIT); + + long res = applyEvents(conn, evts); + + conn.commit(); Review Comment: Looks like we commit everything inside applyEvents ########## modules/cdc-ext/src/main/java/org/apache/ignite/cdc/postgresql/IgniteToPostgreSqlCdcApplier.java: ########## @@ -0,0 +1,670 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgresql; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Types; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import javax.sql.DataSource; +import org.apache.ignite.IgniteException; +import org.apache.ignite.IgniteLogger; +import org.apache.ignite.binary.BinaryObject; +import org.apache.ignite.cache.CacheEntryVersion; +import org.apache.ignite.cache.QueryEntity; +import org.apache.ignite.cdc.CdcCacheEvent; +import org.apache.ignite.cdc.CdcEvent; +import org.apache.ignite.internal.util.typedef.F; + +import static org.apache.ignite.internal.processors.cache.GridCacheUtils.UNDEFINED_CACHE_ID; + +/** */ +public class IgniteToPostgreSqlCdcApplier { + /** */ + public static final String DFLT_SQL_TYPE = "OTHER"; + + /** */ + public static final Map<String, String> JAVA_TO_SQL_TYPES; + + /** */ + public static final Set<String> SQL_TYPES_WITH_PRECISION_ONLY; + + /** */ + public static final Set<String> SQL_TYPES_WITH_PRECISION_AND_SCALE; + + static { + Map<String, String> javaToSqlTypes = new HashMap<>(); + + javaToSqlTypes.put("java.lang.String", "VARCHAR"); + javaToSqlTypes.put("java.lang.Integer", "INT"); + javaToSqlTypes.put("int", "INT"); + javaToSqlTypes.put("java.lang.Long", "BIGINT"); + javaToSqlTypes.put("long", "BIGINT"); + javaToSqlTypes.put("java.lang.Boolean", "BOOLEAN"); + javaToSqlTypes.put("boolean", "BOOLEAN"); + javaToSqlTypes.put("java.lang.Double", "DOUBLE PRECISION"); + javaToSqlTypes.put("double", "DOUBLE PRECISION"); + javaToSqlTypes.put("java.lang.Float", "REAL"); + javaToSqlTypes.put("float", "REAL"); + javaToSqlTypes.put("java.math.BigDecimal", "DECIMAL"); + javaToSqlTypes.put("java.lang.Short", "SMALLINT"); + javaToSqlTypes.put("short", "SMALLINT"); + javaToSqlTypes.put("java.lang.Byte", "SMALLINT"); + javaToSqlTypes.put("byte", "SMALLINT"); + javaToSqlTypes.put("java.util.UUID", "UUID"); + javaToSqlTypes.put("[B", "BYTEA"); + javaToSqlTypes.put("java.lang.Object", "OTHER"); + + JAVA_TO_SQL_TYPES = Collections.unmodifiableMap(javaToSqlTypes); + + Set<String> sqlTypesWithPrecisionOnly = new HashSet<>(); + + sqlTypesWithPrecisionOnly.add("VARCHAR"); + sqlTypesWithPrecisionOnly.add("DOUBLE PRECISION"); + + SQL_TYPES_WITH_PRECISION_ONLY = Collections.unmodifiableSet(sqlTypesWithPrecisionOnly); + + Set<String> sqlTypesWithPrecisionAndScale = new HashSet<>(); + + sqlTypesWithPrecisionAndScale.add("DECIMAL"); + sqlTypesWithPrecisionAndScale.add("REAL"); + + SQL_TYPES_WITH_PRECISION_AND_SCALE = Collections.unmodifiableSet(sqlTypesWithPrecisionAndScale); + } + + /** */ + private static final boolean DFLT_AUTO_COMMIT = false; + + /** */ + private final DataSource dataSrc; + + /** */ + private final long batchSize; + + /** */ + private final IgniteLogger log; + + /** */ + private final Map<Integer, String> cacheIdToUpsertQry = new HashMap<>(); + + /** */ + private final Map<Integer, String> cacheIdToDeleteQry = new HashMap<>(); + + /** */ + private final Map<Integer, Set<String>> cacheIdToPrimaryKeys = new HashMap<>(); + + /** */ + private final Map<Integer, Set<String>> cacheIdToFields = new HashMap<>(); + + /** */ + private final Set<Object> curKeys = new HashSet<>(); + + /** + * @param dataSrc {@link DataSource} - connection pool to PostgreSql + * @param batchSize the number of CDC events to include in a single batch + * @param log the {@link IgniteLogger} instance used for logging CDC processing events + */ + public IgniteToPostgreSqlCdcApplier( + DataSource dataSrc, + long batchSize, + IgniteLogger log + ) { + this.dataSrc = dataSrc; + this.batchSize = batchSize; + this.log = log; + } + + /** + * @param evts an {@link Iterator} of {@link CdcEvent} objects to be applied + * @return the total number of events successfully batched and executed + */ + public long applyEvents(Iterator<CdcEvent> evts) { + try (Connection conn = dataSrc.getConnection()) { + conn.setAutoCommit(DFLT_AUTO_COMMIT); + + long res = applyEvents(conn, evts); + + conn.commit(); + + return res; + } + catch (Throwable e) { + log.error(e.getMessage(), e); Review Comment: Should we call `conn.rollback()` here? ########## modules/cdc-ext/src/test/java/org/apache/ignite/cdc/postgres/CdcPostgreSqlReplicationTest.java: ########## @@ -0,0 +1,699 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgres; + +import java.sql.ResultSet; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.function.Function; +import java.util.function.IntConsumer; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import io.zonky.test.db.postgres.embedded.EmbeddedPostgres; +import org.apache.ignite.IgniteCache; +import org.apache.ignite.IgniteCheckedException; +import org.apache.ignite.IgniteException; +import org.apache.ignite.cache.CacheAtomicityMode; +import org.apache.ignite.cache.QueryEntity; +import org.apache.ignite.cdc.postgresql.IgniteToPostgreSqlCdcConsumer; +import org.apache.ignite.cluster.ClusterState; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.configuration.DataRegionConfiguration; +import org.apache.ignite.configuration.DataStorageConfiguration; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.IgniteInternalFuture; +import org.apache.ignite.internal.IgniteInterruptedCheckedException; +import org.apache.ignite.internal.util.typedef.internal.A; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import static org.apache.ignite.cache.CacheAtomicityMode.ATOMIC; +import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; +import static org.apache.ignite.testframework.GridTestUtils.waitForCondition; + +/** */ +@RunWith(Parameterized.class) +public class CdcPostgreSqlReplicationTest extends CdcPostgreSqlReplicationAbstractTest { + /** */ + private static final int BACKUP = 0; + + /** */ + private static final String CACHE_MODE = "PARTITIONED"; + + /** */ + @Parameterized.Parameter() + public CacheAtomicityMode atomicity; + + /** */ + @Parameterized.Parameter(1) + public boolean createTables; + + /** @return Test parameters. */ + @Parameterized.Parameters(name = "atomicity={0}, createTables={1}") + public static Collection<?> parameters() { + List<Object[]> params = new ArrayList<>(); + + for (CacheAtomicityMode atomicity : EnumSet.of(ATOMIC, TRANSACTIONAL)) { + for (boolean createTables : new boolean[] {true, false}) + params.add(new Object[] {atomicity, createTables}); + } + + return params; + } + + /** */ + protected IgniteEx src; + + /** */ + protected EmbeddedPostgres postgres; + + /** {@inheritDoc} */ + @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception { + IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName); + + DataRegionConfiguration dataRegionConfiguration = new DataRegionConfiguration() + .setPersistenceEnabled(true) + .setCdcEnabled(true); + + DataStorageConfiguration dataStorageConfiguration = new DataStorageConfiguration() + .setWalForceArchiveTimeout(5_000) + .setDefaultDataRegionConfiguration(dataRegionConfiguration); + + cfg.setDataStorageConfiguration(dataStorageConfiguration); + cfg.setConsistentId(igniteInstanceName); + + return cfg; + } + + /** {@inheritDoc} */ + @Override protected IgniteToPostgreSqlCdcConsumer getCdcConsumerConfiguration() { + IgniteToPostgreSqlCdcConsumer cdcCfg = super.getCdcConsumerConfiguration(); + + cdcCfg.setCreateTables(createTables); + + return cdcCfg; + } + + /** {@inheritDoc} */ + @Override protected void beforeTest() throws Exception { + cleanPersistenceDir(); + + src = startGrid(0); + + src.cluster().state(ClusterState.ACTIVE); + + postgres = EmbeddedPostgres.builder().start(); + } + + /** {@inheritDoc} */ + @Override protected void afterTest() throws Exception { + stopAllGrids(); + + cleanPersistenceDir(); + + postgres.close(); + } + + /** */ + @Test + public void testSingleColumnKeyDataReplicationWithPrimaryFirst() throws Exception { + testSingleColumnKeyDataReplication(false); + } + + /** */ + @Test + public void testSingleColumnKeyDataReplicationWithPrimaryLast() throws Exception { + testSingleColumnKeyDataReplication(true); + } + + /** */ + public void testSingleColumnKeyDataReplication(boolean isPrimaryLast) throws Exception { + String[] tableFields; + + String insertQry = "INSERT INTO T1 VALUES(?, ?)"; + String updateQry; + + IntConsumer insert; + IntConsumer update; + + if (isPrimaryLast) { + tableFields = new String[] {"NAME VARCHAR(20)", "ID BIGINT PRIMARY KEY"}; + + updateQry = "MERGE INTO T1 (NAME, ID) VALUES (?, ?)"; + + insert = id -> executeOnIgnite(src, insertQry, "Name" + id, id); + update = id -> executeOnIgnite(src, updateQry, id + "Name", id); + } + else { + tableFields = new String[] {"ID BIGINT PRIMARY KEY", "NAME VARCHAR(20)"}; + + updateQry = "MERGE INTO T1 (ID, NAME) VALUES (?, ?)"; + + insert = id -> executeOnIgnite(src, insertQry, id, "Name" + id); + update = id -> executeOnIgnite(src, updateQry, id, id + "Name"); + } + + createTable("T1", tableFields, null, null, null); + + Supplier<Boolean> checkInsert = () -> checkSingleColumnKeyTable(id -> "Name" + id); + + Supplier<Boolean> checkUpdate = () -> checkSingleColumnKeyTable(id -> id + "Name"); + + testDataReplication("T1", insert, checkInsert, update, checkUpdate); + } + + /** */ + private boolean checkSingleColumnKeyTable(Function<Long, String> idToName) { + String qry = "SELECT ID, NAME FROM T1"; + + try (ResultSet res = selectOnPostgreSql(postgres, qry)) { + long cnt = 0; + + long id; + String curName; + + while (res.next()) { + id = res.getLong("ID"); + curName = res.getString("NAME"); + + if (!idToName.apply(id).equals(curName)) Review Comment: Do you have a check that all rows are different? Can we compare ID with expected value? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@ignite.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org