This is an automated email from the ASF dual-hosted git repository. gabor pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
commit a150f245f0783fbd7d52da04320d256c60087bdb Author: Gabor Szadovszky <[email protected]> AuthorDate: Wed Aug 22 14:36:12 2018 +0200 PARQUET-1399: Move parquet-mr related code from parquet-format --- parquet-avro/pom.xml | 4 +- parquet-common/pom.xml | 4 +- parquet-format-structures/pom.xml | 178 +++++++-------------- .../apache/parquet/format/InterningProtocol.java | 3 - .../main/java/org/apache/parquet/format/Util.java | 9 +- .../org/apache/parquet/format/event/Consumers.java | 12 +- .../format/event/EventBasedThriftReader.java | 15 +- .../apache/parquet/format/event/FieldConsumer.java | 6 +- .../apache/parquet/format/event/TypedConsumer.java | 15 +- parquet-hadoop/pom.xml | 4 +- parquet-pig/pom.xml | 4 +- parquet-protobuf/pom.xml | 11 ++ parquet-thrift/pom.xml | 11 ++ parquet-tools/pom.xml | 4 +- pom.xml | 8 + 15 files changed, 116 insertions(+), 172 deletions(-) diff --git a/parquet-avro/pom.xml b/parquet-avro/pom.xml index 3592121..bc3603f 100644 --- a/parquet-avro/pom.xml +++ b/parquet-avro/pom.xml @@ -45,8 +45,8 @@ </dependency> <dependency> <groupId>org.apache.parquet</groupId> - <artifactId>parquet-format</artifactId> - <version>${parquet.format.version}</version> + <artifactId>parquet-format-structures</artifactId> + <version>${project.version}</version> </dependency> <dependency> <groupId>org.apache.avro</groupId> diff --git a/parquet-common/pom.xml b/parquet-common/pom.xml index e7b2446..f9a60a9 100644 --- a/parquet-common/pom.xml +++ b/parquet-common/pom.xml @@ -38,8 +38,8 @@ <dependencies> <dependency> <groupId>org.apache.parquet</groupId> - <artifactId>parquet-format</artifactId> - <version>${parquet.format.version}</version> + <artifactId>parquet-format-structures</artifactId> + <version>${project.version}</version> </dependency> <dependency> diff --git a/parquet-format-structures/pom.xml b/parquet-format-structures/pom.xml index 0b0c114..e69cced 100644 --- a/parquet-format-structures/pom.xml +++ b/parquet-format-structures/pom.xml @@ -21,85 +21,59 @@ <modelVersion>4.0.0</modelVersion> <parent> - <groupId>org.apache</groupId> - <artifactId>apache</artifactId> - <version>16</version> + <groupId>org.apache.parquet</groupId> + <artifactId>parquet</artifactId> + <relativePath>../pom.xml</relativePath> + <version>1.10.1-SNAPSHOT</version> </parent> - <groupId>org.apache.parquet</groupId> - <artifactId>parquet-format</artifactId> - <version>2.5.1-SNAPSHOT</version> + <artifactId>parquet-format-structures</artifactId> <packaging>jar</packaging> - <name>Apache Parquet Format</name> + <name>Apache Parquet Format Structures</name> <url>http://parquet.apache.org/</url> - <description>Parquet is a columnar storage format that supports nested data. This provides all generated metadata code.</description> - - <scm> - <connection>scm:git:[email protected]:apache/parquet-format.git</connection> - <url>scm:git:[email protected]:apache/parquet-format.git</url> - <developerConnection>scm:git:[email protected]:apache/parquet-format.git</developerConnection> - <tag>HEAD</tag> - </scm> - - <licenses> - <!-- This is also in the Apache parent POM, but adding it here includes it - in dependency-reduced-pom.xml so that it passes the rat check. --> - <license> - <name>The Apache Software License, Version 2.0</name> - <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url> - </license> - </licenses> - - <issueManagement> - <system>JIRA</system> - <url>https://issues.apache.org/jira/browse/PARQUET</url> - </issueManagement> - - <mailingLists> - <mailingList> - <name>Dev Mailing List</name> - <post>[email protected]</post> - <subscribe>[email protected]</subscribe> - <unsubscribe>[email protected]</unsubscribe> - </mailingList> - <mailingList> - <name>Commits Mailing List</name> - <post>[email protected]</post> - <subscribe>[email protected]</subscribe> - <unsubscribe>[email protected]</unsubscribe> - </mailingList> - </mailingLists> - - <developers> - <developer> - <name>Julien Le Dem</name> - <email>[email protected]</email> - </developer> - <developer> - <name>Nong Li</name> - <email>[email protected]</email> - </developer> - </developers> + <description>Parquet-mr related java classes to use the parquet-format thrift structures.</description> <properties> - <maven.compiler.source>1.6</maven.compiler.source> - <maven.compiler.target>1.6</maven.compiler.target> - <shade.prefix>shaded.parquet</shade.prefix> - <thrift.executable>thrift</thrift.executable> - <thrift.version>0.9.3</thrift.version> + <parquet.thrift.path>${project.build.directory}/parquet-format-thrift</parquet.thrift.path> </properties> <build> <plugins> + <!-- Getting the parquet-format thrift file --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + <executions> + <execution> + <id>unpack</id> + <phase>generate-sources</phase> + <goals> + <goal>unpack</goal> + </goals> + <configuration> + <artifactItems> + <artifactItem> + <groupId>org.apache.parquet</groupId> + <artifactId>parquet-format</artifactId> + <version>${parquet.format.version}</version> + <type>jar</type> + </artifactItem> + </artifactItems> + <includes>parquet.thrift</includes> + <outputDirectory>${parquet.thrift.path}</outputDirectory> + </configuration> + </execution> + </executions> + </plugin> <!-- thrift --> <plugin> <groupId>org.apache.thrift.tools</groupId> <artifactId>maven-thrift-plugin</artifactId> <version>0.1.11</version> <configuration> - <thriftSourceRoot>src/main/thrift</thriftSourceRoot> - <thriftExecutable>${thrift.executable}</thriftExecutable> + <thriftSourceRoot>${parquet.thrift.path}</thriftSourceRoot> + <thriftExecutable>${format.thrift.executable}</thriftExecutable> </configuration> <executions> <execution> @@ -112,28 +86,8 @@ </executions> </plugin> <plugin> - <!-- Disable the source artifact from ASF parent --> - <artifactId>maven-assembly-plugin</artifactId> - <executions> - <execution> - <id>source-release-assembly</id> - <phase>none</phase> - </execution> - </executions> - </plugin> - <plugin> - <!-- Override source and target from the ASF parent --> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-compiler-plugin</artifactId> - <configuration> - <source>${maven.compiler.source}</source> - <target>${maven.compiler.target}</target> - </configuration> - </plugin> - <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> - <version>2.2</version> <executions> <execution> <phase>package</phase> @@ -141,6 +95,7 @@ <goal>shade</goal> </goals> <configuration> + <keepDependenciesWithProvidedScope>true</keepDependenciesWithProvidedScope> <artifactSet> <includes> <include>org.apache.thrift:libthrift</include> @@ -167,60 +122,41 @@ </execution> </executions> </plugin> + <!-- Configure build/javadoc as well to support "mvn javadoc:javadoc" --> <plugin> - <groupId>org.apache.rat</groupId> - <artifactId>apache-rat-plugin</artifactId> - <version>0.12</version> - <executions> - <execution> - <phase>test</phase> - <goals> - <goal>check</goal> - </goals> - </execution> - </executions> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> <configuration> - <consoleOutput>true</consoleOutput> - <excludes> - <exclude>**/*.avro</exclude> - <exclude>**/*.avsc</exclude> - <exclude>**/*.avdl</exclude> - <exclude>**/*.iml</exclude> - <exclude>**/*.log</exclude> - <exclude>**/*.md.vm</exclude> - <exclude>**/.classpath</exclude> - <exclude>**/.project</exclude> - <exclude>**/.settings/**</exclude> - <exclude>**/build/**</exclude> - <exclude>**/target/**</exclude> - <exclude>.git/**</exclude> - <exclude>.idea/**</exclude> - <exclude>*/jdiff/*.xml</exclude> - <exclude>licenses/**</exclude> - <exclude>thrift-${thrift.version}/**</exclude> - <exclude>thrift-${thrift.version}.tar.gz</exclude> - </excludes> + <!-- We have to turn off the javadoc check because thrift generates improper comments --> + <additionalparam>-Xdoclint:none</additionalparam> </configuration> </plugin> </plugins> </build> + <reports> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <configuration> + <!-- We have to turn off the javadoc check because thrift generates improper comments --> + <additionalparam>-Xdoclint:none</additionalparam> + </configuration> + </plugin> + </plugins> + </reports> + <dependencies> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> - <version>1.7.12</version> + <version>${slf4j.version}</version> </dependency> <dependency> <groupId>org.apache.thrift</groupId> <artifactId>libthrift</artifactId> - <version>${thrift.version}</version> - </dependency> - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <version>4.10</version> - <scope>test</scope> + <version>${format.thrift.version}</version> </dependency> </dependencies> diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/InterningProtocol.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/InterningProtocol.java index 6297f1c..a405d4f 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/InterningProtocol.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/InterningProtocol.java @@ -33,9 +33,6 @@ import org.apache.thrift.transport.TTransport; /** * TProtocol that interns the strings. - * - * @author Julien Le Dem - * */ public class InterningProtocol extends TProtocol { diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java index 55d61ff..d09d007 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java @@ -51,9 +51,6 @@ import org.apache.parquet.format.event.TypedConsumer.StringConsumer; /** * Utility to read/write metadata * We use the TCompactProtocol to serialize metadata - * - * @author Julien Le Dem - * */ public class Util { @@ -93,7 +90,7 @@ public class Util { * @param from the stream to read the metadata from * @param skipRowGroups whether row groups should be skipped * @return the resulting metadata - * @throws IOException + * @throws IOException if any I/O error occurs during the reading */ public static FileMetaData readFileMetaData(InputStream from, boolean skipRowGroups) throws IOException { FileMetaData md = new FileMetaData(); @@ -108,8 +105,6 @@ public class Util { /** * To read metadata in a streaming fashion. * - * @author Julien Le Dem - * */ public static abstract class FileMetaDataConsumer { abstract public void setVersion(int version); @@ -123,8 +118,6 @@ public class Util { /** * Simple default consumer that sets the fields * - * @author Julien Le Dem - * */ public static final class DefaultFileMetaDataConsumer extends FileMetaDataConsumer { private final FileMetaData md; diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/event/Consumers.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/Consumers.java index ea61d63..ef87997 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/event/Consumers.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/Consumers.java @@ -34,21 +34,16 @@ import org.apache.thrift.protocol.TProtocol; import org.apache.thrift.protocol.TProtocolUtil; import org.apache.parquet.format.event.Consumers.Consumer; -import org.apache.parquet.format.event.TypedConsumer.BoolConsumer; import org.apache.parquet.format.event.TypedConsumer.ListConsumer; import org.apache.parquet.format.event.TypedConsumer.StructConsumer; /** * Entry point for reading thrift in a streaming fashion - * - * @author Julien Le Dem - * */ public class Consumers { /** * To consume objects coming from a DelegatingFieldConsumer - * @author Julien Le Dem * * @param <T> the type of consumed objects */ @@ -59,10 +54,8 @@ public class Consumers { /** * Delegates reading the field to TypedConsumers. * There is one TypedConsumer per thrift type. - * use {@link DelegatingFieldConsumer#onField(TFieldIdEnum, BoolConsumer)} et al. to consume specific thrift fields. + * use {@link #onField(TFieldIdEnum, TypedConsumer)} et al. to consume specific thrift fields. * @see Consumers#fieldConsumer() - * @author Julien Le Dem - * */ public static class DelegatingFieldConsumer implements FieldConsumer { @@ -111,8 +104,9 @@ public class Consumers { /** * To consume a list of elements - * @param c the type of the list content + * @param c the class of the list content * @param consumer the consumer that will receive the list + * @param <T> the type of the list content * @return a ListConsumer that can be passed to the DelegatingFieldConsumer */ public static <T extends TBase<T,? extends TFieldIdEnum>> ListConsumer listOf(Class<T> c, final Consumer<List<T>> consumer) { diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/event/EventBasedThriftReader.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/EventBasedThriftReader.java index e88432f..2fb9cf6 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/event/EventBasedThriftReader.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/EventBasedThriftReader.java @@ -32,9 +32,6 @@ import org.apache.parquet.format.event.TypedConsumer.SetConsumer; /** * Event based reader for Thrift - * - * @author Julien Le Dem - * */ public final class EventBasedThriftReader { @@ -50,7 +47,7 @@ public final class EventBasedThriftReader { /** * reads a Struct from the underlying protocol and passes the field events to the FieldConsumer * @param c the field consumer - * @throws TException + * @throws TException if any thrift related error occurs during the reading */ public void readStruct(FieldConsumer c) throws TException { protocol.readStructBegin(); @@ -61,7 +58,7 @@ public final class EventBasedThriftReader { /** * reads the content of a struct (fields) from the underlying protocol and passes the events to c * @param c the field consumer - * @throws TException + * @throws TException if any thrift related error occurs during the reading */ public void readStructContent(FieldConsumer c) throws TException { TField field; @@ -78,7 +75,7 @@ public final class EventBasedThriftReader { * reads the set content (elements) from the underlying protocol and passes the events to the set event consumer * @param eventConsumer the consumer * @param tSet the set descriptor - * @throws TException + * @throws TException if any thrift related error occurs during the reading */ public void readSetContent(SetConsumer eventConsumer, TSet tSet) throws TException { @@ -91,7 +88,7 @@ public final class EventBasedThriftReader { * reads the map content (key values) from the underlying protocol and passes the events to the map event consumer * @param eventConsumer the consumer * @param tMap the map descriptor - * @throws TException + * @throws TException if any thrift related error occurs during the reading */ public void readMapContent(MapConsumer eventConsumer, TMap tMap) throws TException { @@ -106,7 +103,7 @@ public final class EventBasedThriftReader { * @param keyConsumer the consumer for the key * @param valueType the type of the value * @param valueConsumer the consumer for the value - * @throws TException + * @throws TException if any thrift related error occurs during the reading */ public void readMapEntry(byte keyType, TypedConsumer keyConsumer, byte valueType, TypedConsumer valueConsumer) throws TException { @@ -118,7 +115,7 @@ public final class EventBasedThriftReader { * reads the list content (elements) from the underlying protocol and passes the events to the list event consumer * @param eventConsumer the consumer * @param tList the list descriptor - * @throws TException + * @throws TException if any thrift related error occurs during the reading */ public void readListContent(ListConsumer eventConsumer, TList tList) throws TException { diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/event/FieldConsumer.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/FieldConsumer.java index 2be4d6e..6656934 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/event/FieldConsumer.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/FieldConsumer.java @@ -23,9 +23,6 @@ import org.apache.thrift.protocol.TProtocol; /** * To receive Thrift field events - * - * @author Julien Le Dem - * */ public interface FieldConsumer { @@ -35,8 +32,7 @@ public interface FieldConsumer { * @param eventBasedThriftReader the reader to delegate to further calls. * @param id the id of the field * @param type the type of the field - * @return the typed consumer to pass the value to - * @throws TException + * @throws TException if any thrift related error occurs during the reading */ public void consumeField(TProtocol protocol, EventBasedThriftReader eventBasedThriftReader, short id, byte type) throws TException; diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/event/TypedConsumer.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/TypedConsumer.java index 1d10ad6..734449f 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/event/TypedConsumer.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/TypedConsumer.java @@ -38,9 +38,6 @@ import org.apache.thrift.protocol.TSet; /** * receive thrift events of a given type - * - * @author Julien Le Dem - * */ abstract public class TypedConsumer { @@ -117,7 +114,7 @@ abstract public class TypedConsumer { * reader.readStruct(fieldConsumer); * @param protocol the underlying protocol * @param reader the reader to delegate to - * @throws TException + * @throws TException if any thrift related error occurs during the reading */ abstract public void consumeStruct(TProtocol protocol, EventBasedThriftReader reader) throws TException; } @@ -136,7 +133,8 @@ abstract public class TypedConsumer { * can either delegate to the reader or read the element from the protocol * @param protocol the underlying protocol * @param reader the reader to delegate to - * @throws TException + * @param elemType the type of the element + * @throws TException if any thrift related error occurs during the reading */ abstract public void consumeElement(TProtocol protocol, EventBasedThriftReader reader, byte elemType) throws TException; } @@ -155,7 +153,8 @@ abstract public class TypedConsumer { * can either delegate to the reader or read the set from the protocol * @param protocol the underlying protocol * @param reader the reader to delegate to - * @throws TException + * @param elemType the type of the element + * @throws TException if any thrift related error occurs during the reading */ abstract public void consumeElement( TProtocol protocol, EventBasedThriftReader reader, @@ -177,7 +176,9 @@ abstract public class TypedConsumer { * can either delegate to the reader or read the map entry from the protocol * @param protocol the underlying protocol * @param reader the reader to delegate to - * @throws TException + * @param keyType the type of the key + * @param valueType the type of the value + * @throws TException if any thrift related error occurs during the reading */ abstract public void consumeEntry( TProtocol protocol, EventBasedThriftReader reader, diff --git a/parquet-hadoop/pom.xml b/parquet-hadoop/pom.xml index 98972a2..8d31f7d 100644 --- a/parquet-hadoop/pom.xml +++ b/parquet-hadoop/pom.xml @@ -43,8 +43,8 @@ </dependency> <dependency> <groupId>org.apache.parquet</groupId> - <artifactId>parquet-format</artifactId> - <version>${parquet.format.version}</version> + <artifactId>parquet-format-structures</artifactId> + <version>${project.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> diff --git a/parquet-pig/pom.xml b/parquet-pig/pom.xml index 3b7e570..0d3f202 100644 --- a/parquet-pig/pom.xml +++ b/parquet-pig/pom.xml @@ -48,8 +48,8 @@ </dependency> <dependency> <groupId>org.apache.parquet</groupId> - <artifactId>parquet-format</artifactId> - <version>${parquet.format.version}</version> + <artifactId>parquet-format-structures</artifactId> + <version>${project.version}</version> </dependency> <dependency> <groupId>org.apache.pig</groupId> diff --git a/parquet-protobuf/pom.xml b/parquet-protobuf/pom.xml index b6f4627..329046d 100644 --- a/parquet-protobuf/pom.xml +++ b/parquet-protobuf/pom.xml @@ -86,6 +86,17 @@ </dependency> </dependencies> + <dependencyManagement> + <dependencies> + <!-- com.twitter.elephantbird brings in an older version of libthrift so we force to use our own one --> + <dependency> + <groupId>org.apache.thrift</groupId> + <artifactId>libthrift</artifactId> + <version>${format.thrift.version}</version> + </dependency> + </dependencies> + </dependencyManagement> + <developers> <developer> <id>lukasnalezenec</id> diff --git a/parquet-thrift/pom.xml b/parquet-thrift/pom.xml index 51a6b9b..4340430 100644 --- a/parquet-thrift/pom.xml +++ b/parquet-thrift/pom.xml @@ -144,6 +144,17 @@ </dependencies> + <dependencyManagement> + <dependencies> + <!-- com.twitter.elephantbird brings in an older version of libthrift so we force to use our own one --> + <dependency> + <groupId>org.apache.thrift</groupId> + <artifactId>libthrift</artifactId> + <version>${thrift.version}</version> + </dependency> + </dependencies> + </dependencyManagement> + <build> <plugins> <plugin> diff --git a/parquet-tools/pom.xml b/parquet-tools/pom.xml index 566f8f1..32ee4d8 100644 --- a/parquet-tools/pom.xml +++ b/parquet-tools/pom.xml @@ -48,8 +48,8 @@ <dependencies> <dependency> <groupId>org.apache.parquet</groupId> - <artifactId>parquet-format</artifactId> - <version>${parquet.format.version}</version> + <artifactId>parquet-format-structures</artifactId> + <version>${project.version}</version> </dependency> <dependency> <groupId>org.apache.parquet</groupId> diff --git a/pom.xml b/pom.xml index 7b3f36f..4c9d79c 100644 --- a/pom.xml +++ b/pom.xml @@ -84,6 +84,7 @@ <parquet.format.version>2.4.0</parquet.format.version> <previous.version>1.7.0</previous.version> <thrift.executable>thrift</thrift.executable> + <format.thrift.executable>thrift</format.thrift.executable> <scala.version>2.10.6</scala.version> <!-- scala.binary.version is used for projects that fetch dependencies that are in scala --> <scala.binary.version>2.10</scala.binary.version> @@ -92,6 +93,7 @@ <pig.classifier>h2</pig.classifier> <thrift-maven-plugin.version>0.10.0</thrift-maven-plugin.version> <thrift.version>0.9.3</thrift.version> + <format.thrift.version>0.9.3</format.thrift.version> <fastutil.version>7.0.13</fastutil.version> <semver.api.version>0.9.33</semver.api.version> <slf4j.version>1.7.22</slf4j.version> @@ -117,6 +119,7 @@ <module>parquet-column</module> <module>parquet-common</module> <module>parquet-encoding</module> + <module>parquet-format-structures</module> <module>parquet-generator</module> <module>parquet-hadoop</module> <module>parquet-jackson</module> @@ -175,6 +178,11 @@ </reports> </reportSet> </reportSets> + <configuration> + <sourceFileExcludes> + <sourceFileExclude>**/generated-sources/**/*.java</sourceFileExclude> + </sourceFileExcludes> + </configuration> </plugin> <plugin> <groupId>org.codehaus.mojo</groupId>
