[
https://issues.apache.org/jira/browse/HADOOP-19389?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17914179#comment-17914179
]
ASF GitHub Bot commented on HADOOP-19389:
-----------------------------------------
cnauroth commented on code in PR #7291:
URL: https://github.com/apache/hadoop/pull/7291#discussion_r1920472703
##########
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestTextCommand.java:
##########
@@ -67,39 +75,194 @@ public void testDisplayForAvroFiles() throws Exception {
assertEquals(expectedOutput, output);
}
+ @Test
+ public void testEmptyAvroFile() throws Exception {
+ String output = readUsingTextCommand(AVRO_FILENAME,
+ generateEmptyAvroBinaryData());
+ assertTrue(output.isEmpty());
+ }
+
+ @Test(expected = NullPointerException.class)
+ public void testAvroFileInputStreamNullBuffer() throws Exception {
+ createFile(AVRO_FILENAME, generateWeatherAvroBinaryData());
+ URI uri = new URI(AVRO_FILENAME);
+ Configuration conf = new Configuration();
+ try (InputStream is = getInputStream(uri, conf)) {
+ is.read(null, 0, 10);
+ }
+ }
+
+ @Test(expected = IndexOutOfBoundsException.class)
+ public void testAvroFileInputStreamNegativePosition() throws Exception {
+ createFile(AVRO_FILENAME, generateWeatherAvroBinaryData());
+ URI uri = new URI(AVRO_FILENAME);
+ Configuration conf = new Configuration();
+ try (InputStream is = getInputStream(uri, conf)) {
+ is.read(new byte[10], -1, 10);
+ }
+ }
+
+ @Test(expected = IndexOutOfBoundsException.class)
+ public void testAvroFileInputStreamTooLong() throws Exception {
+ createFile(AVRO_FILENAME, generateWeatherAvroBinaryData());
+ URI uri = new URI(AVRO_FILENAME);
+ Configuration conf = new Configuration();
+ try (InputStream is = getInputStream(uri, conf)) {
+ is.read(new byte[10], 0, 11);
+ }
+ }
+
+ @Test
+ public void testAvroFileInputStreamZeroLengthRead() throws Exception {
+ createFile(AVRO_FILENAME, generateWeatherAvroBinaryData());
+ URI uri = new URI(AVRO_FILENAME);
+ Configuration conf = new Configuration();
+ try (InputStream is = getInputStream(uri, conf)) {
+ assertEquals(0, is.read(new byte[10], 0, 0));
Review Comment:
I'm behind the times! Changed the whole class to AssertJ.
##########
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestTextCommand.java:
##########
@@ -67,39 +75,194 @@ public void testDisplayForAvroFiles() throws Exception {
assertEquals(expectedOutput, output);
}
+ @Test
+ public void testEmptyAvroFile() throws Exception {
+ String output = readUsingTextCommand(AVRO_FILENAME,
+ generateEmptyAvroBinaryData());
+ assertTrue(output.isEmpty());
+ }
+
+ @Test(expected = NullPointerException.class)
+ public void testAvroFileInputStreamNullBuffer() throws Exception {
+ createFile(AVRO_FILENAME, generateWeatherAvroBinaryData());
+ URI uri = new URI(AVRO_FILENAME);
+ Configuration conf = new Configuration();
+ try (InputStream is = getInputStream(uri, conf)) {
+ is.read(null, 0, 10);
+ }
+ }
+
+ @Test(expected = IndexOutOfBoundsException.class)
+ public void testAvroFileInputStreamNegativePosition() throws Exception {
+ createFile(AVRO_FILENAME, generateWeatherAvroBinaryData());
+ URI uri = new URI(AVRO_FILENAME);
+ Configuration conf = new Configuration();
+ try (InputStream is = getInputStream(uri, conf)) {
+ is.read(new byte[10], -1, 10);
+ }
+ }
+
+ @Test(expected = IndexOutOfBoundsException.class)
+ public void testAvroFileInputStreamTooLong() throws Exception {
+ createFile(AVRO_FILENAME, generateWeatherAvroBinaryData());
+ URI uri = new URI(AVRO_FILENAME);
+ Configuration conf = new Configuration();
+ try (InputStream is = getInputStream(uri, conf)) {
+ is.read(new byte[10], 0, 11);
+ }
+ }
+
+ @Test
+ public void testAvroFileInputStreamZeroLengthRead() throws Exception {
+ createFile(AVRO_FILENAME, generateWeatherAvroBinaryData());
+ URI uri = new URI(AVRO_FILENAME);
+ Configuration conf = new Configuration();
+ try (InputStream is = getInputStream(uri, conf)) {
+ assertEquals(0, is.read(new byte[10], 0, 0));
+ }
+ }
+
+ @Test
+ public void testAvroFileInputStreamConsistentEOF() throws Exception {
+ createFile(AVRO_FILENAME, generateWeatherAvroBinaryData());
+ URI uri = new URI(AVRO_FILENAME);
+ Configuration conf = new Configuration();
+ try (InputStream is = getInputStream(uri, conf)) {
+ inputStreamToString(is);
+ assertEquals(-1, is.read());
+ assertEquals(-1, is.read(new byte[10], 0, 10));
+ }
+ }
+
+ @Test
+ public void testAvroFileInputStreamSingleAndMultiByteReads() throws
Exception {
+ createFile(AVRO_FILENAME, generateWeatherAvroBinaryData());
+ URI uri = new URI(AVRO_FILENAME);
+ Configuration conf = new Configuration();
+ try (InputStream is1 = getInputStream(uri, conf);
+ InputStream is2 = getInputStream(uri, conf)) {
+ String multiByteReads = inputStreamToString(is1);
+ String singleByteReads = inputStreamSingleByteReadsToString(is2);
+ assertEquals(multiByteReads, singleByteReads);
+ }
+ }
+
/**
* Tests that a zero-length file is displayed correctly.
*/
- @Test (timeout = 30000)
- public void testEmptyTextFil() throws Exception {
+ @Test
+ public void testEmptyTextFile() throws Exception {
byte[] emptyContents = { };
String output = readUsingTextCommand(TEXT_FILENAME, emptyContents);
- assertTrue("".equals(output));
+ assertTrue(output.isEmpty());
Review Comment:
Done.
> Optimize shell -text command I/O with multi-byte read.
> ------------------------------------------------------
>
> Key: HADOOP-19389
> URL: https://issues.apache.org/jira/browse/HADOOP-19389
> Project: Hadoop Common
> Issue Type: Improvement
> Components: command, fs, fs/azure, fs/gcs, fs/s3
> Affects Versions: 3.4.1
> Reporter: Chris Nauroth
> Assignee: Chris Nauroth
> Priority: Minor
> Labels: pull-request-available
>
> {{hadoop fs -text}} reads Avro files and sequence files by internally
> wrapping the stream in
> [{{AvroFileInputStream}}|https://github.com/apache/hadoop/blob/rel/release-3.4.1/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java#L270]
> or
> [{{TextRecordInputStream}}|https://github.com/apache/hadoop/blob/rel/release-3.4.1/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java#L217].
> These classes implement the required single-byte
> [{{read()}}|https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/io/InputStream.html#read()],
> but not the optional multi-byte buffered [{{read(byte[], int,
> int)}}|https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/io/InputStream.html#read(byte%5B%5D,int,int)].
> The default implementation in the JDK is a [loop over single-byte
> read|https://github.com/openjdk/jdk11u-dev/blob/a47c72fad455bfdf9053cb8e94c99e73965ab50d/src/java.base/share/classes/java/io/InputStream.java#L280],
> which causes sub-optimal I/O and method call overhead. We can optimize this
> by overriding the multi-byte read method.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]