TheNeuralBit commented on a change in pull request #12827:
URL: https://github.com/apache/beam/pull/12827#discussion_r503552395
##########
File path:
sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/kafka/KafkaTestTable.java
##########
@@ -73,6 +77,10 @@ public void setNumberOfRecordsForRate(int
numberOfRecordsForRate) {
this.numberOfRecordsForRate = numberOfRecordsForRate;
}
+ private byte[] getRecordValueBytes(KafkaTestRecord record) {
+ return record.getValue().toByteArray();
+ }
Review comment:
nit: I think this is cleaner inlined, it doesn't look like its used
anywhere else.
##########
File path:
sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/kafka/BeamKafkaTableTest.java
##########
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.meta.provider.kafka;
+
+import java.util.List;
+import org.apache.beam.sdk.extensions.sql.impl.BeamTableStatistics;
+import org.apache.beam.sdk.testing.PAssert;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.transforms.SimpleFunction;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.Row;
+import
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+
+/** Test utility for BeamKafkaTable implementations. */
+public abstract class BeamKafkaTableTest {
+ @Rule public TestPipeline pipeline = TestPipeline.create();
+
+ protected static final List<String> TOPICS = ImmutableList.of("topic1",
"topic2");
+
+ /** Returns proper implementation of KafkaTestTable for the tested format */
+ protected abstract KafkaTestTable getTestTable(int numberOfPartitions);
+
+ /** Returns proper implementation of BeamKafkaTable for the tested format */
+ protected abstract BeamKafkaTable getBeamKafkaTable();
+
+ /** Returns encoded payload for the tested format. */
+ protected abstract byte[] generateEncodedPayload(int i);
+
+ /** Provides a deterministic row from the given integer. */
+ protected abstract Row generateRow(int i);
+
+ @Test
+ public void testOrderedArrivalSinglePartitionRate() {
+ KafkaTestTable table = getTestTable(1);
+ for (int i = 0; i < 100; i++) {
+ table.addRecord(createKafkaTestRecord("k" + i, i, 500L * i));
+ }
+
+ BeamTableStatistics stats = table.getTableStatistics(null);
+ Assert.assertEquals(2d, stats.getRate(), 0.001);
+ }
+
+ @Test
+ public void testOrderedArrivalMultiplePartitionsRate() {
+ KafkaTestTable table = getTestTable(3);
+ for (int i = 0; i < 100; i++) {
+ table.addRecord(createKafkaTestRecord("k" + i, i, 500L * i));
+ }
+
+ BeamTableStatistics stats = table.getTableStatistics(null);
+ Assert.assertEquals(2d, stats.getRate(), 0.001);
+ }
+
+ @Test
+ public void testOnePartitionAheadRate() {
+ KafkaTestTable table = getTestTable(3);
+ for (int i = 0; i < 100; i++) {
+ table.addRecord(createKafkaTestRecord("1", i, 1000L * i));
+ table.addRecord(createKafkaTestRecord("2", i, 500L * i));
+ }
+
+ table.setNumberOfRecordsForRate(20);
+ BeamTableStatistics stats = table.getTableStatistics(null);
+ Assert.assertEquals(1d, stats.getRate(), 0.001);
+ }
+
+ @Test
+ public void testLateRecords() {
+ KafkaTestTable table = getTestTable(3);
+
+ table.addRecord(createKafkaTestRecord("1", 132, 1000L));
+ for (int i = 0; i < 98; i++) {
+ table.addRecord(createKafkaTestRecord("1", i, 500L));
+ }
+ table.addRecord(createKafkaTestRecord("1", 133, 2000L));
+
+ table.setNumberOfRecordsForRate(200);
+ BeamTableStatistics stats = table.getTableStatistics(null);
+ Assert.assertEquals(1d, stats.getRate(), 0.001);
+ }
+
+ @Test
+ public void testAllLate() {
+ KafkaTestTable table = getTestTable(3);
+
+ table.addRecord(createKafkaTestRecord("1", 132, 1000L));
+ for (int i = 0; i < 98; i++) {
+ table.addRecord(createKafkaTestRecord("1", i, 500L));
+ }
+
+ table.setNumberOfRecordsForRate(200);
+ BeamTableStatistics stats = table.getTableStatistics(null);
+ Assert.assertTrue(stats.isUnknown());
+ }
+
+ @Test
+ public void testEmptyPartitionsRate() {
+ KafkaTestTable table = getTestTable(3);
+ BeamTableStatistics stats = table.getTableStatistics(null);
+ Assert.assertTrue(stats.isUnknown());
+ }
+
+ @Test
+ public void allTheRecordsSameTimeRate() {
+ KafkaTestTable table = getTestTable(3);
+ for (int i = 0; i < 100; i++) {
+ table.addRecord(createKafkaTestRecord("key" + i, i, 1000L));
+ }
+ BeamTableStatistics stats = table.getTableStatistics(null);
+ Assert.assertTrue(stats.isUnknown());
+ }
Review comment:
The tests above this point are the only ones that use `KafkaTestTable`,
and they're only exercising the `getTableStatistics` method, which never
deserializes any records. So:
1. We shouldn't really need to repeat these tests for each payload format.
2. We don't need a separate `KafkaTestTableCSV` and `KafkaTestTableAvro`.
There could just be a single concrete `KafkaTestTable` that raises an error in
`getPTransformFor{Input,Output}`.
I'm fine if we don't worry about (1) for now, but I'd like to address (2)
for clarity.
##########
File path:
sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/kafka/BeamKafkaTableTest.java
##########
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.meta.provider.kafka;
+
+import java.util.List;
+import org.apache.beam.sdk.extensions.sql.impl.BeamTableStatistics;
+import org.apache.beam.sdk.testing.PAssert;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.transforms.SimpleFunction;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.Row;
+import
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+
+/** Test utility for BeamKafkaTable implementations. */
+public abstract class BeamKafkaTableTest {
+ @Rule public TestPipeline pipeline = TestPipeline.create();
+
+ protected static final List<String> TOPICS = ImmutableList.of("topic1",
"topic2");
+
+ /** Returns proper implementation of KafkaTestTable for the tested format */
+ protected abstract KafkaTestTable getTestTable(int numberOfPartitions);
+
+ /** Returns proper implementation of BeamKafkaTable for the tested format */
+ protected abstract BeamKafkaTable getBeamKafkaTable();
+
+ /** Returns encoded payload for the tested format. */
Review comment:
```suggestion
/** Returns encoded payload in the tested format corresponding to the row
in `generateRow(i)`. */
```
##########
File path:
sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/kafka/BeamKafkaTable.java
##########
@@ -91,12 +91,14 @@ public BeamKafkaTable updateConsumerProperties(Map<String,
Object> configUpdates
return PCollection.IsBounded.UNBOUNDED;
}
- public abstract PTransform<PCollection<KV<byte[], byte[]>>, PCollection<Row>>
+ protected abstract PTransform<PCollection<KV<byte[], byte[]>>,
PCollection<Row>>
getPTransformForInput();
- public abstract PTransform<PCollection<Row>, PCollection<KV<byte[], byte[]>>>
+ protected abstract PTransform<PCollection<Row>, PCollection<KV<byte[],
byte[]>>>
getPTransformForOutput();
+ protected abstract BeamKafkaTable getTable();
+
Review comment:
I don't think `BeamKafkaTable#getTable` is used, can we get rid of it?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]