github-advanced-security[bot] commented on code in PR #16259:
URL: https://github.com/apache/druid/pull/16259#discussion_r1559307018


##########
extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java:
##########
@@ -437,4 +581,55 @@
                                 + "}";
     Assert.assertEquals(expectedJson, 
DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
   }
+
+  private static class SeekableFileEntity extends FileEntity
+  {
+    // It is wrong to declare this variable at this level since it is tied to 
the seekable stream that might be created
+    // multiple times. However, it is convenient for this test to access this 
variable to test the seekable stream.
+    long pos = 0;
+    public SeekableFileEntity(File file)
+    {
+      super(file);
+    }
+
+    @Override
+    public boolean isSeekable()
+    {
+      return true;
+    }
+
+    @Override
+    public long getSize()
+    {
+      return getFile().length();
+    }
+
+    @Override
+    public SeekableInputStream openSeekable() throws IOException
+    {
+      return new SeekableInputStream()
+      {
+       // long pos = 0;
+        InputStream delegate = open();
+        @Override
+        public int read() throws IOException
+        {
+          return delegate.read();
+        }
+        @Override
+        public long getPos()
+        {
+          return pos;
+        }
+
+        @Override
+        public void seek(long newPos) throws IOException
+        {
+          delegate = open();
+          delegate.skip(newPos);

Review Comment:
   ## Ignored error status of call
   
   Method seek ignores exceptional return value of InputStream.skip.
   
   [Show more 
details](https://github.com/apache/druid/security/code-scanning/7251)



##########
processing/src/main/java/org/apache/druid/data/input/impl/RetryingInputStream.java:
##########
@@ -171,6 +174,47 @@
     throw new IOException(t);
   }
 
+  @Override
+  public long getPos() throws IOException
+  {
+    if (delegate == null) {
+      return startOffset;
+    }
+    return startOffset + delegate.getCount();
+  }
+
+  @Override
+  public void seek(long newPos) throws IOException
+  {
+    if (delegate == null) {
+      startOffset = newPos;
+      return;
+    }
+
+    // A less sophisticated version of 
https://github.com/apache/hadoop/blob/87fb97777745b2cefed6bef57490b84676d2343d/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java#L364
+    // skip instead of re-opening the stream when possible
+    long diff = newPos - getPos();
+    if (diff > 0) {
+      // forward seek -this is where data can be skipped
+
+      int available = delegate.available();
+      if (diff < available) {
+        delegate.skip(diff);

Review Comment:
   ## Ignored error status of call
   
   Method seek ignores exceptional return value of CountingInputStream.skip.
   
   [Show more 
details](https://github.com/apache/druid/security/code-scanning/7252)



##########
extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java:
##########
@@ -308,6 +316,142 @@
     Assert.assertEquals(expectedJson, 
DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
   }
 
+  @Test
+  public void testParquetThriftCompatProjectPushdown() throws IOException
+  {
+    /*
+      message ParquetSchema {
+        required boolean boolColumn;
+        required int32 byteColumn;
+        required int32 shortColumn;
+        required int32 intColumn;
+        required int64 longColumn;
+        required double doubleColumn;
+        required binary binaryColumn (UTF8);
+        required binary stringColumn (UTF8);
+        required binary enumColumn (ENUM);
+        optional boolean maybeBoolColumn;
+        optional int32 maybeByteColumn;
+        optional int32 maybeShortColumn;
+        optional int32 maybeIntColumn;
+        optional int64 maybeLongColumn;
+        optional double maybeDoubleColumn;
+        optional binary maybeBinaryColumn (UTF8);
+        optional binary maybeStringColumn (UTF8);
+        optional binary maybeEnumColumn (ENUM);
+        required group stringsColumn (LIST) {
+          repeated binary stringsColumn_tuple (UTF8);
+        }
+        required group intSetColumn (LIST) {
+          repeated int32 intSetColumn_tuple;
+        }
+        required group intToStringColumn (MAP) {
+          repeated group map (MAP_KEY_VALUE) {
+            required int32 key;
+            optional binary value (UTF8);
+          }
+        }
+        required group complexColumn (MAP) {
+          repeated group map (MAP_KEY_VALUE) {
+            required int32 key;
+            optional group value (LIST) {
+              repeated group value_tuple {
+                required group nestedIntsColumn (LIST) {
+                  repeated int32 nestedIntsColumn_tuple;
+                }
+                required binary nestedStringColumn (UTF8);
+              }
+            }
+          }
+        }
+      }
+     */
+    final String file = "example/compat/parquet-thrift-compat.snappy.parquet";
+    final SeekableFileEntity entity = new SeekableFileEntity(new File(file));
+    InputRowSchema schema = new InputRowSchema(
+        new TimestampSpec("timestamp", "auto", 
DateTimes.of("2018-09-01T00:00:00.000Z")),
+        new 
DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("longColumn", 
"stringsColumn", "intToStringColumn", "complexColumn"))),
+        ColumnsFilter.all()
+    );
+    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(
+        new JSONPathFieldSpec(JSONPathFieldType.PATH, "extractByLogicalMap", 
"$.intToStringColumn.1"),
+        new JSONPathFieldSpec(JSONPathFieldType.PATH, 
"extractByComplexLogicalMap", "$.complexColumn.1[0].nestedIntsColumn[1]")
+    );
+    JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
+    InputEntityReader reader = createReader(
+        entity,
+        schema,
+        flattenSpec,
+        false,
+        true
+    );
+
+    List<InputRow> rows = readAllRows(reader);
+
+    // Verify that Parquet library did indeed seek to a particular offset 
while reading the data
+    Assert.assertTrue(String.valueOf(entity.pos), entity.pos > 0);
+    Assert.assertEquals("2018-09-01T00:00:00.000Z", 
rows.get(0).getTimestamp().toString());
+    Assert.assertEquals(4, rows.get(0).getDimensions().size());
+    Assert.assertEquals("0", rows.get(0).getDimension("longColumn").get(0));
+    Assert.assertEquals("arr_0", 
rows.get(0).getDimension("stringsColumn").get(0));
+    Assert.assertEquals("arr_1", 
rows.get(0).getDimension("stringsColumn").get(1));
+    Assert.assertEquals("val_1", 
rows.get(0).getDimension("extractByLogicalMap").get(0));
+    Assert.assertEquals("1", 
rows.get(0).getDimension("extractByComplexLogicalMap").get(0));
+
+    entity.pos = 0;
+    reader = createReader(
+        entity,
+        schema,
+        flattenSpec,
+        false,
+        true
+    );
+    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
+    final String expectedJson = "{\n"
+                                + "  \"intToStringColumn\" : {\n"
+                                + "    \"0\" : \"val_0\",\n"
+                                + "    \"1\" : \"val_1\",\n"
+                                + "    \"2\" : \"val_2\"\n"
+                                + "  },\n"
+                                + "  \"stringsColumn\" : [ \"arr_0\", 
\"arr_1\", \"arr_2\" ],\n"
+                                + "  \"longColumn\" : 0,\n"
+                                + "  \"complexColumn\" : {\n"
+                                + "    \"0\" : [ {\n"
+                                + "      \"nestedStringColumn\" : \"val_0\",\n"
+                                + "      \"nestedIntsColumn\" : [ 0, 1, 2 ]\n"
+                                + "    }, {\n"
+                                + "      \"nestedStringColumn\" : \"val_1\",\n"
+                                + "      \"nestedIntsColumn\" : [ 1, 2, 3 ]\n"
+                                + "    }, {\n"
+                                + "      \"nestedStringColumn\" : \"val_2\",\n"
+                                + "      \"nestedIntsColumn\" : [ 2, 3, 4 ]\n"
+                                + "    } ],\n"
+                                + "    \"1\" : [ {\n"
+                                + "      \"nestedStringColumn\" : \"val_0\",\n"
+                                + "      \"nestedIntsColumn\" : [ 0, 1, 2 ]\n"
+                                + "    }, {\n"
+                                + "      \"nestedStringColumn\" : \"val_1\",\n"
+                                + "      \"nestedIntsColumn\" : [ 1, 2, 3 ]\n"
+                                + "    }, {\n"
+                                + "      \"nestedStringColumn\" : \"val_2\",\n"
+                                + "      \"nestedIntsColumn\" : [ 2, 3, 4 ]\n"
+                                + "    } ],\n"
+                                + "    \"2\" : [ {\n"
+                                + "      \"nestedStringColumn\" : \"val_0\",\n"
+                                + "      \"nestedIntsColumn\" : [ 0, 1, 2 ]\n"
+                                + "    }, {\n"
+                                + "      \"nestedStringColumn\" : \"val_1\",\n"
+                                + "      \"nestedIntsColumn\" : [ 1, 2, 3 ]\n"
+                                + "    }, {\n"
+                                + "      \"nestedStringColumn\" : \"val_2\",\n"
+                                + "      \"nestedIntsColumn\" : [ 2, 3, 4 ]\n"
+                                + "    } ]\n"
+                                + "  }\n"
+                                + "}";
+    Assert.assertEquals(expectedJson, 
DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
+    Assert.assertTrue(String.valueOf(entity.pos), entity.pos > 0);

Review Comment:
   ## Useless comparison test
   
   Test is always false.
   
   [Show more 
details](https://github.com/apache/druid/security/code-scanning/7254)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to