[GitHub] [beam] mosche commented on a change in pull request #15848: [BEAM-13835] An any-type implementation for readWithPartitions for JdbcIO

GitBox Mon, 07 Feb 2022 05:03:33 -0800


mosche commented on a change in pull request #15848:
URL: https://github.com/apache/beam/pull/15848#discussion_r800633013




##########
File path: 
sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcUtilTest.java
##########
@@ -41,4 +53,126 @@ public void testGetPreparedStatementSetCaller() throws 
Exception {
     String expectedStmt = "INSERT INTO test_table(col1, col2, col3) VALUES(?, 
?, ?)";
     assertEquals(expectedStmt, generatedStmt);
   }
+
+  @Test
+  public void testStringPartitioningWithSingleKeyFn() {
+    JdbcReadWithPartitionsHelper<String> helper =
+        
JdbcUtil.JdbcReadWithPartitionsHelper.getPartitionsHelper(TypeDescriptors.strings());
+    List<KV<String, String>> expectedRanges =
+        Lists.<KV<String, String>>newArrayList(KV.of("a", "b"));
+    List<KV<String, String>> ranges = 
Lists.newArrayList(helper.calculateRanges("a", "a", 10L));
+    // It is not possible to generate any more than one range, because the 
lower and upper range are
+    // exactly the same.
+    // The range is "a" to the very next element after it, which would be 
"a"+1 -> "b".
+    // Because the query's filter statement is : WHERE column >= lowerBound 
AND column < upperBound.
+    assertEquals(1, ranges.size());
+    assertArrayEquals(expectedRanges.toArray(), ranges.toArray());
+  }
+
+  @Test
+  public void testStringPartitioningWithSingleKeyMultiletterFn() {
+    JdbcReadWithPartitionsHelper<String> helper =
+        
JdbcUtil.JdbcReadWithPartitionsHelper.getPartitionsHelper(TypeDescriptors.strings());
+    List<KV<String, String>> expectedRanges =
+        Lists.<KV<String, String>>newArrayList(KV.of("afar", "afas"));
+    List<KV<String, String>> ranges =
+        Lists.newArrayList(helper.calculateRanges("afar", "afar", 10L));
+    // It is not possible to generate any more than one range, because the 
lower and upper range are
+    // exactly the same.
+    // The range is "afar" to the very next element after it, which would be 
"afar"+1 -> "afas".
+    // Because the query's filter statement is : WHERE column >= lowerBound 
AND column < upperBound.
+    assertEquals(1, ranges.size());
+    assertArrayEquals(expectedRanges.toArray(), ranges.toArray());
+  }
+
+  @Test
+  public void testStringPartitioningWithMultiletter() {
+    JdbcReadWithPartitionsHelper<String> helper =
+        
JdbcUtil.JdbcReadWithPartitionsHelper.getPartitionsHelper(TypeDescriptors.strings());
+    List<KV<String, String>> ranges =
+        Lists.newArrayList(helper.calculateRanges("afarisade", 
"zfastaridoteaf", 10L));
+    // The upper bound is "zfastaridoteaf" to the very next element after it, 
which would
+    // be "zfastaridoteaf"+1 -> "zfastaridoteaf".
+    // Because the query's filter statement is : WHERE column >= lowerBound 
AND column < upperBound.
+    assertEquals(13L, ranges.size());
+    assertThat(
+        ranges,
+        containsInAnyOrder(
+            KV.of("afarisade", "c"),
+            KV.of("c", "e"),
+            KV.of("e", "g"),
+            KV.of("g", "i"),
+            KV.of("i", "k"),
+            KV.of("k", "m"),
+            KV.of("m", "o"),
+            KV.of("o", "q"),
+            KV.of("q", "s"),
+            KV.of("s", "u"),
+            KV.of("u", "w"),
+            KV.of("w", "y"),
+            KV.of("y", "zfastaridoteag")));
+  }
+
+  @Test
+  public void testDatetimePartitioningWithSingleKey() {
+    JdbcReadWithPartitionsHelper<DateTime> helper =
+        JdbcUtil.JdbcReadWithPartitionsHelper.getPartitionsHelper(
+            TypeDescriptor.of(DateTime.class));
+    DateTime onlyPoint = DateTime.now();
+    List<KV<DateTime, DateTime>> expectedRanges =
+        Lists.newArrayList(KV.of(onlyPoint, onlyPoint.plusMillis(1)));
+    List<KV<DateTime, DateTime>> ranges =
+        Lists.newArrayList(helper.calculateRanges(onlyPoint, onlyPoint, 10L));
+    // It is not possible to generate any more than one range, because the 
lower and upper range are
+    // exactly the same.
+    // The range goes from the current DateTime to ONE MILISECOND AFTER.
+    // Because the query's filter statement is : WHERE column >= lowerBound 
AND column < upperBound.
+    assertEquals(1, ranges.size());
+    assertArrayEquals(expectedRanges.toArray(), ranges.toArray());
+  }
+
+  @Test
+  public void testDatetimePartitioningWithMultiKey() {
+    JdbcReadWithPartitionsHelper<DateTime> helper =
+        JdbcUtil.JdbcReadWithPartitionsHelper.getPartitionsHelper(
+            TypeDescriptor.of(DateTime.class));
+    DateTime lastPoint = DateTime.now();
+    // At least 10ms in the past, or more.
+    DateTime firstPoint = lastPoint.minusMillis(10 + new 
Random().nextInt(Integer.MAX_VALUE));
+    List<KV<DateTime, DateTime>> ranges =
+        Lists.newArrayList(helper.calculateRanges(firstPoint, lastPoint, 10L));
+    // DateTime ranges are able to work out 10-11 ranges because they split in 
miliseconds which is
+    // very small granularity.
+    assertThat(Double.valueOf(ranges.size()), closeTo(10, 1));
+  }
+
+  @Test
+  public void testLongPartitioningWithSingleKey() {
+    JdbcReadWithPartitionsHelper<Long> helper =
+        
JdbcUtil.JdbcReadWithPartitionsHelper.getPartitionsHelper(TypeDescriptors.longs());
+    List<KV<Long, Long>> expectedRanges = Lists.newArrayList(KV.of(12L, 13L));
+    List<KV<Long, Long>> ranges = 
Lists.newArrayList(helper.calculateRanges(12L, 12L, 10L));
+    // It is not possible to generate any more than one range, because the 
lower and upper range are
+    // exactly the same.
+    // The range goes from the current Long element to ONE ELEMENT AFTER.
+    // Because the query's filter statement is : WHERE column >= lowerBound 
AND column < upperBound.
+    assertEquals(1, ranges.size());
+    assertArrayEquals(expectedRanges.toArray(), ranges.toArray());
+  }
+
+  @Test
+  public void testLongPartitioningNotEnoughRanges() {
+    JdbcReadWithPartitionsHelper<Long> helper =
+        
JdbcUtil.JdbcReadWithPartitionsHelper.getPartitionsHelper(TypeDescriptors.longs());
+    // The minimum stride is one, which is what causes this sort of 
partitioning.
+    List<KV<Long, Long>> expectedRanges =
+        Lists.newArrayList(KV.of(12L, 14L), KV.of(14L, 16L), KV.of(16L, 18L), 
KV.of(18L, 21L));
+    List<KV<Long, Long>> ranges = 
Lists.newArrayList(helper.calculateRanges(12L, 20L, 10L));
+    // It is not possible to generate any more than one range, because the 
lower and upper range are

Review comment:
       pls remove, comment is wrong for this case




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [beam] mosche commented on a change in pull request #15848: [BEAM-13835] An any-type implementation for readWithPartitions for JdbcIO

Reply via email to