gharris1727 commented on code in PR #17510:
URL: https://github.com/apache/kafka/pull/17510#discussion_r1924063426
##########
connect/api/src/main/java/org/apache/kafka/connect/data/Values.java:
##########
@@ -71,6 +71,8 @@ public class Values {
static final String ISO_8601_DATE_FORMAT_PATTERN = "yyyy-MM-dd";
static final String ISO_8601_TIME_FORMAT_PATTERN = "HH:mm:ss.SSS'Z'";
static final String ISO_8601_TIMESTAMP_FORMAT_PATTERN =
ISO_8601_DATE_FORMAT_PATTERN + "'T'" + ISO_8601_TIME_FORMAT_PATTERN;
+ private static BigDecimal TOO_BIG = new BigDecimal("1e1000000");
+ private static BigDecimal TOO_SMALL = new BigDecimal("1e-1000000");
Review Comment:
These constants are unused.
##########
connect/api/src/test/java/org/apache/kafka/connect/data/ValuesTest.java:
##########
@@ -1154,7 +1152,17 @@ public void
shouldParseFractionalPartsAsIntegerWhenNoFractionalPart() {
assertEquals(new SchemaAndValue(Schema.INT32_SCHEMA, 66000),
Values.parseString("66000.0"));
assertEquals(new SchemaAndValue(Schema.FLOAT32_SCHEMA, 66000.0008f),
Values.parseString("66000.0008"));
}
+ @Test
+ public void avoidCpuAndMemoryIssuesConvertingExtremeBigDecimals() {
+ long s = System.currentTimeMillis();
+ String veryBig = "1e+100000000"; // new BigDecimal().setScale(0,
RoundingMode.FLOOR) takes around two minutes and uses 3GB;
+ System.out.println("time taken" + System.currentTimeMillis() - s)
+ assertEquals(new SchemaAndValue(Schema.BYTES_SCHEMA, veryBig),
Values.parseString(veryBig));
+ String verySmall = "1e-100000000";
+ assertEquals(new SchemaAndValue(Schema.STRING_SCHEMA, verySmall),
Values.parseString(verySmall));
+ assertFalse(true);
Review Comment:
This will always fail.
##########
connect/api/src/test/java/org/apache/kafka/connect/data/ValuesTest.java:
##########
@@ -1154,7 +1152,17 @@ public void
shouldParseFractionalPartsAsIntegerWhenNoFractionalPart() {
assertEquals(new SchemaAndValue(Schema.INT32_SCHEMA, 66000),
Values.parseString("66000.0"));
assertEquals(new SchemaAndValue(Schema.FLOAT32_SCHEMA, 66000.0008f),
Values.parseString("66000.0008"));
}
+ @Test
+ public void avoidCpuAndMemoryIssuesConvertingExtremeBigDecimals() {
+ long s = System.currentTimeMillis();
+ String veryBig = "1e+100000000"; // new BigDecimal().setScale(0,
RoundingMode.FLOOR) takes around two minutes and uses 3GB;
+ System.out.println("time taken" + System.currentTimeMillis() - s)
Review Comment:
I think we should remove this debug print, and have a Timeout annotation on
this method to cause fails on regressions.
##########
connect/api/src/main/java/org/apache/kafka/connect/data/Values.java:
##########
@@ -1039,30 +1041,34 @@ private static SchemaAndValue parseAsNumber(String
token) {
return new SchemaAndValue(schema, decimal);
}
}
+
+ public static boolean isBigInteger(BigDecimal bd) {
Review Comment:
This uses tabs, which is disallowed by our linter.
##########
connect/api/src/test/java/org/apache/kafka/connect/data/ValuesTest.java:
##########
@@ -45,6 +41,8 @@
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
+import org.junit.jupiter.api.Test;
Review Comment:
If you run into problems with the imports, run `./gradlew spotlessApply`
##########
connect/api/src/test/java/org/apache/kafka/connect/data/ValuesTest.java:
##########
@@ -1154,7 +1152,17 @@ public void
shouldParseFractionalPartsAsIntegerWhenNoFractionalPart() {
assertEquals(new SchemaAndValue(Schema.INT32_SCHEMA, 66000),
Values.parseString("66000.0"));
assertEquals(new SchemaAndValue(Schema.FLOAT32_SCHEMA, 66000.0008f),
Values.parseString("66000.0008"));
}
+ @Test
+ public void avoidCpuAndMemoryIssuesConvertingExtremeBigDecimals() {
+ long s = System.currentTimeMillis();
+ String veryBig = "1e+100000000"; // new BigDecimal().setScale(0,
RoundingMode.FLOOR) takes around two minutes and uses 3GB;
+ System.out.println("time taken" + System.currentTimeMillis() - s)
+ assertEquals(new SchemaAndValue(Schema.BYTES_SCHEMA, veryBig),
Values.parseString(veryBig));
+ String verySmall = "1e-100000000";
+ assertEquals(new SchemaAndValue(Schema.STRING_SCHEMA, verySmall),
Values.parseString(verySmall));
Review Comment:
This actually gets parsed as a FLOAT32. Arguably that's a bug and we're
losing precision, but it's the legacy behavior and we shouldn't change it, or
at least have a separate change for this. For now, we should just update this
test to match the behavior.
##########
connect/api/src/main/java/org/apache/kafka/connect/data/Values.java:
##########
@@ -1039,30 +1041,34 @@ private static SchemaAndValue parseAsNumber(String
token) {
return new SchemaAndValue(schema, decimal);
}
}
+
+ public static boolean isBigInteger(BigDecimal bd) {
+ return bd.signum() == 0 || bd.scale() <= 0 ||
bd.stripTrailingZeros().scale() <= 0;
+ }
private static SchemaAndValue parseAsExactDecimal(BigDecimal decimal) {
+ BigDecimal abs = decimal.abs();
Review Comment:
This value is unused.
##########
connect/api/src/test/java/org/apache/kafka/connect/data/ValuesTest.java:
##########
@@ -1154,7 +1152,17 @@ public void
shouldParseFractionalPartsAsIntegerWhenNoFractionalPart() {
assertEquals(new SchemaAndValue(Schema.INT32_SCHEMA, 66000),
Values.parseString("66000.0"));
assertEquals(new SchemaAndValue(Schema.FLOAT32_SCHEMA, 66000.0008f),
Values.parseString("66000.0008"));
}
+ @Test
+ public void avoidCpuAndMemoryIssuesConvertingExtremeBigDecimals() {
+ long s = System.currentTimeMillis();
+ String veryBig = "1e+100000000"; // new BigDecimal().setScale(0,
RoundingMode.FLOOR) takes around two minutes and uses 3GB;
Review Comment:
I think this example actually still has a regression. i.e. the bug is still
there because ceil takes forever.
##########
connect/api/src/main/java/org/apache/kafka/connect/data/Values.java:
##########
@@ -1039,30 +1041,34 @@ private static SchemaAndValue parseAsNumber(String
token) {
return new SchemaAndValue(schema, decimal);
}
}
+
+ public static boolean isBigInteger(BigDecimal bd) {
+ return bd.signum() == 0 || bd.scale() <= 0 ||
bd.stripTrailingZeros().scale() <= 0;
Review Comment:
This stripTrailingZeros covers the cases that the ceil/floor comparison was
supposed to cover, great find!
##########
connect/api/src/test/java/org/apache/kafka/connect/data/ValuesTest.java:
##########
@@ -1154,7 +1152,17 @@ public void
shouldParseFractionalPartsAsIntegerWhenNoFractionalPart() {
assertEquals(new SchemaAndValue(Schema.INT32_SCHEMA, 66000),
Values.parseString("66000.0"));
assertEquals(new SchemaAndValue(Schema.FLOAT32_SCHEMA, 66000.0008f),
Values.parseString("66000.0008"));
}
+ @Test
+ public void avoidCpuAndMemoryIssuesConvertingExtremeBigDecimals() {
+ long s = System.currentTimeMillis();
+ String veryBig = "1e+100000000"; // new BigDecimal().setScale(0,
RoundingMode.FLOOR) takes around two minutes and uses 3GB;
+ System.out.println("time taken" + System.currentTimeMillis() - s)
+ assertEquals(new SchemaAndValue(Schema.BYTES_SCHEMA, veryBig),
Values.parseString(veryBig));
Review Comment:
This shouldn't be a plain BYTES schema, it should be a connect `Decimal`
type. And veryBig needs to be parsed into a BigDecimal.
##########
connect/api/src/test/java/org/apache/kafka/connect/data/ValuesTest.java:
##########
@@ -1154,7 +1152,17 @@ public void
shouldParseFractionalPartsAsIntegerWhenNoFractionalPart() {
assertEquals(new SchemaAndValue(Schema.INT32_SCHEMA, 66000),
Values.parseString("66000.0"));
assertEquals(new SchemaAndValue(Schema.FLOAT32_SCHEMA, 66000.0008f),
Values.parseString("66000.0008"));
}
+ @Test
Review Comment:
nit: missing a newline above this test.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]