TAJO-849: Add Parquet storage to HCatalogStore. (jaehwa)
Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/64106a32 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/64106a32 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/64106a32 Branch: refs/heads/window_function Commit: 64106a3223c882c260ba39edd53c85d1900b83a0 Parents: f781007 Author: blrunner <[email protected]> Authored: Mon Jun 2 16:14:37 2014 -0700 Committer: blrunner <[email protected]> Committed: Mon Jun 2 16:14:37 2014 -0700 ---------------------------------------------------------------------- CHANGES | 2 ++ .../tajo-catalog-drivers/tajo-hcatalog/pom.xml | 7 +++++ .../tajo/catalog/store/HCatalogStore.java | 9 +++++- .../apache/tajo/catalog/store/HCatalogUtil.java | 5 +++- .../tajo/catalog/store/TestHCatalogStore.java | 30 ++++++++++++++++++++ tajo-dist/pom.xml | 8 ++++++ 6 files changed, 59 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/64106a32/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 84a83e8..d2436e9 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,8 @@ Release 0.9.0 - unreleased NEW FEATURES + TAJO-849: Add Parquet storage to HCatalogStore. (jaehwa) + TAJO-494: Extend TajoClient to run a query with a plan context serialized as the JSON form. (jihoon) http://git-wip-us.apache.org/repos/asf/tajo/blob/64106a32/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/pom.xml ---------------------------------------------------------------------- diff --git a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/pom.xml b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/pom.xml index a3cb99b..2c939d4 100644 --- a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/pom.xml +++ b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/pom.xml @@ -138,6 +138,8 @@ </activation> <properties> <hive.version>0.12.0</hive.version> + <parquet.version>1.4.2</parquet.version> + <parquet.format.version>2.0.0</parquet.format.version> </properties> <dependencies> <dependency> @@ -304,6 +306,11 @@ </exclusion> </exclusions> </dependency> + <dependency> + <groupId>com.twitter</groupId> + <artifactId>parquet-hive-bundle</artifactId> + <version>${parquet.version}</version> + </dependency> </dependencies> </profile> <profile> http://git-wip-us.apache.org/repos/asf/tajo/blob/64106a32/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogStore.java ---------------------------------------------------------------------- diff --git a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogStore.java b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogStore.java index 7924af1..3008ed9 100644 --- a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogStore.java +++ b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogStore.java @@ -537,7 +537,14 @@ public class HCatalogStore extends CatalogConstants implements CatalogStore { table.getParameters().remove(StorageConstants.SEQUENCEFILE_NULL); } } else { - throw new CatalogException(new NotImplementedException(tableDesc.getMeta().getStoreType().name())); + if (tableDesc.getMeta().getStoreType().equals(CatalogProtos.StoreType.PARQUET)) { + sd.setInputFormat(parquet.hive.DeprecatedParquetInputFormat.class.getName()); + sd.setOutputFormat(parquet.hive.DeprecatedParquetOutputFormat.class.getName()); + sd.getSerdeInfo().setSerializationLib(parquet.hive.serde.ParquetHiveSerDe.class.getName()); + } else { + throw new CatalogException(new NotImplementedException(tableDesc.getMeta().getStoreType + ().name())); + } } sd.setSortCols(new ArrayList<Order>()); http://git-wip-us.apache.org/repos/asf/tajo/blob/64106a32/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogUtil.java ---------------------------------------------------------------------- diff --git a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogUtil.java b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogUtil.java index 98aa7c5..9e60768 100644 --- a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogUtil.java +++ b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogUtil.java @@ -30,6 +30,7 @@ import org.apache.hcatalog.data.schema.HCatSchema; import org.apache.tajo.catalog.exception.CatalogException; import org.apache.tajo.catalog.proto.CatalogProtos; import org.apache.tajo.common.TajoDataTypes; +import parquet.hadoop.mapred.DeprecatedParquetOutputFormat; public class HCatalogUtil { protected final Log LOG = LogFactory.getLog(getClass()); @@ -128,7 +129,9 @@ public class HCatalogUtil { } else if(outputFormatClass.equals(HiveSequenceFileOutputFormat.class.getSimpleName())) { return CatalogProtos.StoreType.SEQUENCEFILE.name(); } else if(outputFormatClass.equals(RCFileOutputFormat.class.getSimpleName())) { - return CatalogProtos.StoreType.RCFILE.name(); + return CatalogProtos.StoreType.RCFILE.name(); + } else if(outputFormatClass.equals(DeprecatedParquetOutputFormat.class.getSimpleName())) { + return CatalogProtos.StoreType.PARQUET.name(); } else { throw new CatalogException("Not supported file output format. - file output format:" + fileFormat); } http://git-wip-us.apache.org/repos/asf/tajo/blob/64106a32/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/test/java/org/apache/tajo/catalog/store/TestHCatalogStore.java ---------------------------------------------------------------------- diff --git a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/test/java/org/apache/tajo/catalog/store/TestHCatalogStore.java b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/test/java/org/apache/tajo/catalog/store/TestHCatalogStore.java index 729184a..a507b08 100644 --- a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/test/java/org/apache/tajo/catalog/store/TestHCatalogStore.java +++ b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/test/java/org/apache/tajo/catalog/store/TestHCatalogStore.java @@ -369,4 +369,34 @@ public class TestHCatalogStore { store.dropTable(DB_NAME, REGION); } + + @Test + public void testTableUsingParquet() throws Exception { + TableMeta meta = new TableMeta(CatalogProtos.StoreType.PARQUET, new KeyValueSet()); + + org.apache.tajo.catalog.Schema schema = new org.apache.tajo.catalog.Schema(); + schema.addColumn("c_custkey", TajoDataTypes.Type.INT4); + schema.addColumn("c_name", TajoDataTypes.Type.TEXT); + schema.addColumn("c_address", TajoDataTypes.Type.TEXT); + schema.addColumn("c_nationkey", TajoDataTypes.Type.INT4); + schema.addColumn("c_phone", TajoDataTypes.Type.TEXT); + schema.addColumn("c_acctbal", TajoDataTypes.Type.FLOAT8); + schema.addColumn("c_mktsegment", TajoDataTypes.Type.TEXT); + schema.addColumn("c_comment", TajoDataTypes.Type.TEXT); + + TableDesc table = new TableDesc(CatalogUtil.buildFQName(DB_NAME, CUSTOMER), schema, meta, + new Path(warehousePath, new Path(DB_NAME, CUSTOMER))); + store.createTable(table.getProto()); + assertTrue(store.existTable(DB_NAME, CUSTOMER)); + + TableDesc table1 = new TableDesc(store.getTable(DB_NAME, CUSTOMER)); + assertEquals(table.getName(), table1.getName()); + assertEquals(table.getPath(), table1.getPath()); + assertEquals(table.getSchema().size(), table1.getSchema().size()); + for (int i = 0; i < table.getSchema().size(); i++) { + assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); + } + + store.dropTable(DB_NAME, CUSTOMER); + } } http://git-wip-us.apache.org/repos/asf/tajo/blob/64106a32/tajo-dist/pom.xml ---------------------------------------------------------------------- diff --git a/tajo-dist/pom.xml b/tajo-dist/pom.xml index c826c57..103bbac 100644 --- a/tajo-dist/pom.xml +++ b/tajo-dist/pom.xml @@ -120,6 +120,14 @@ run cp -r $ROOT/tajo-jdbc/target/tajo-jdbc-${project.version}.jar ./share/jdbc-dist run cp -r $ROOT/tajo-jdbc/target/lib/* ./share/jdbc-dist + if [ -f $ROOT/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/target/lib/parquet-hive-bundle-*.jar ] + then + run cp -r $ROOT/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/target/lib/parquet-hive-bundle-*.jar lib/ + echo + echo "Tajo installed parquet-hive-bundle library at: ${project.build.directory}/tajo-${project.version}" + echo + fi + echo echo "Tajo dist layout available at: ${project.build.directory}/tajo-${project.version}" echo
