This is an automated email from the ASF dual-hosted git repository.

ihuzenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit df48eb8167219b7c632e8f6d760e76242805d6af
Author: Arina Ielchiieva <[email protected]>
AuthorDate: Wed Jul 31 14:04:32 2019 +0300

    DRILL-7334: Update Iceberg Metastore Parquet write mode
    
    closes #1832
---
 metastore/iceberg-metastore/README.md                 | 19 ++++++++++---------
 metastore/iceberg-metastore/pom.xml                   |  2 +-
 .../metastore/iceberg/write/ParquetFileWriter.java    |  7 ++++---
 .../src/main/resources/drill-metastore-module.conf    |  9 ++++-----
 .../iceberg/write/TestParquetFileWriter.java          |  8 +++-----
 5 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/metastore/iceberg-metastore/README.md 
b/metastore/iceberg-metastore/README.md
index cc46c2a..3db08d1 100644
--- a/metastore/iceberg-metastore/README.md
+++ b/metastore/iceberg-metastore/README.md
@@ -56,16 +56,18 @@ Assume Iceberg table location is 
`/drill/metastore/iceberg/tables`, metadata for
 
 ### Metadata Storage Format
 
-By default, Metadata will be stored in Parquet files. 
-Each parquet file will hold information for one partition.
+Iceberg tables support data storage in three formats: Parquet, Avro, ORC.
+Drill metadata will be stored in Parquet files. This format was chosen over 
others
+since it is column oriented and efficient in terms of disk I/O 
+when specific columns need to be queried.
+
+Each Parquet file will hold information for one partition.
 Partition keys will depend on Metastore component characteristics.
 For example, for `tables` component, partitions keys are
 storage plugin, workspace, table name and metadata key.
 
 Parquet files name will be based on `UUID` to ensure uniqueness.
-
-Iceberg also supports data storage in Avro and ORC files, writing metadata
-in these formats can be added later.
+If somehow collision occurs, modify operation in Metastore will fail.
 
 ## Metastore Operations flow
 
@@ -73,8 +75,7 @@ Metastore main goal is to provide ability to read and modify 
metadata.
 
 ### Read
 
-Metastore data is read using `IcebergGenerics#read`. Iceberg will 
automatically determine
-format in which data is stored (three formats are supported Parquet, Avro, 
ORC).
+Metastore data is read using `IcebergGenerics#read`.
 Based on given filter and select columns list, data will be returned in 
 `org.apache.iceberg.data.Record` format which will be transformed 
 into the list of Metastore component units and returned to the caller.
@@ -86,7 +87,7 @@ partition keys can be included into filter expression.
 ### Add
 
 To add metadata to Iceberg table, caller provides list of component units which
-will be written into Parquet files (current default format) and grouped by 
partition keys.
+will be written into Parquet files and grouped by partition keys.
 Each group will be written into separate Parquet file 
 and stored in the location inside of Iceberg table based on component unit 
location keys.
 Note: partition keys must not be null.
@@ -107,7 +108,7 @@ Parquet files with metadata for this table will be stored in
 `[METASTORE_ROOT_DIRECTORY]/[COMPONENT_LOCATION]/dfs/tmp/nation` folder.
 
 If `dfs.tmp.nation` is un-partitioned, it's metadata will be stored in two
-parquet files: one file with general table information, 
+Parquet files: one file with general table information, 
 another file with default segment information. 
 If `dfs.tmp.nation` is partitioned, it will have also one file with general
 information and `N` files with top-level segments information. 
diff --git a/metastore/iceberg-metastore/pom.xml 
b/metastore/iceberg-metastore/pom.xml
index f8544fd..d935750 100644
--- a/metastore/iceberg-metastore/pom.xml
+++ b/metastore/iceberg-metastore/pom.xml
@@ -33,7 +33,7 @@
   <name>metastore/Drill Iceberg Metastore</name>
 
   <properties>
-    <iceberg.version>77a456a</iceberg.version>
+    <iceberg.version>08e0873</iceberg.version>
     <caffeine.version>2.7.0</caffeine.version>
   </properties>
 
diff --git 
a/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/write/ParquetFileWriter.java
 
b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/write/ParquetFileWriter.java
index 3c1604d..3286676 100644
--- 
a/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/write/ParquetFileWriter.java
+++ 
b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/write/ParquetFileWriter.java
@@ -23,6 +23,7 @@ import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.data.parquet.GenericParquetWriter;
+import org.apache.iceberg.exceptions.RuntimeIOException;
 import org.apache.iceberg.io.FileAppender;
 import org.apache.iceberg.io.OutputFile;
 import org.apache.iceberg.parquet.Parquet;
@@ -35,7 +36,7 @@ import java.util.Objects;
 /**
  * Parquet File Writer implementation. Creates Parquet file in given location 
and name
  * and '.parquet' extension and writes given data into it.
- * Note: if file already exists, it will be overwritten.
+ * Note: if file already exists, write operation will fail.
  */
 public class ParquetFileWriter implements FileWriter {
 
@@ -83,7 +84,7 @@ public class ParquetFileWriter implements FileWriter {
       fileAppender.close();
       // metrics are available only when file was written (i.e. close method 
was executed)
       return new File(outputFile, fileAppender.metrics());
-    } catch (IOException | ClassCastException e) {
+    } catch (IOException | ClassCastException | RuntimeIOException e) {
       if (fileAppender != null) {
         try {
           fileAppender.close();
@@ -91,7 +92,7 @@ public class ParquetFileWriter implements FileWriter {
           // write has failed anyway, ignore closing exception if any and 
throw initial one
         }
       }
-      throw new IcebergMetastoreException("Unable to write data into parquet 
file", e);
+      throw new IcebergMetastoreException(String.format("Unable to write data 
into parquet file [%s]", outputFile.location()), e);
     }
   }
 }
diff --git 
a/metastore/iceberg-metastore/src/main/resources/drill-metastore-module.conf 
b/metastore/iceberg-metastore/src/main/resources/drill-metastore-module.conf
index be98a97..33fe795 100644
--- a/metastore/iceberg-metastore/src/main/resources/drill-metastore-module.conf
+++ b/metastore/iceberg-metastore/src/main/resources/drill-metastore-module.conf
@@ -43,11 +43,10 @@ drill.metastore.iceberg: {
   }
 
   components: {
-    // Common properties for all Iceberg tables from 
org.apache.iceberg.TableProperties can be specified
-    common.properties: {
-      write.format.default: "parquet",
-      write.metadata.compression-codec: "none"
-    },
+        // Common properties for all Iceberg tables from 
org.apache.iceberg.TableProperties can be specified
+       common.properties: {
+          write.metadata.compression-codec: "none"
+       },
 
     tables: {
       // Iceberg table location in Iceberg Metastore
diff --git 
a/metastore/iceberg-metastore/src/test/java/org/apache/drill/metastore/iceberg/write/TestParquetFileWriter.java
 
b/metastore/iceberg-metastore/src/test/java/org/apache/drill/metastore/iceberg/write/TestParquetFileWriter.java
index 6943185..4a9211a 100644
--- 
a/metastore/iceberg-metastore/src/test/java/org/apache/drill/metastore/iceberg/write/TestParquetFileWriter.java
+++ 
b/metastore/iceberg-metastore/src/test/java/org/apache/drill/metastore/iceberg/write/TestParquetFileWriter.java
@@ -261,15 +261,13 @@ public class TestParquetFileWriter extends 
IcebergBaseTest {
     java.nio.file.Path file = Paths.get(new File(location, 
FileFormat.PARQUET.addExtension(fileName)).getPath());
     Files.write(file, Collections.singletonList("abc"));
 
-    org.apache.drill.metastore.iceberg.write.File result = new 
ParquetFileWriter(table)
+    thrown.expect(IcebergMetastoreException.class);
+
+    new ParquetFileWriter(table)
       .records(Collections.singletonList(record))
       .location(location)
       .name(fileName)
       .write();
-
-    List<Record> rows = readData(result.input(), schema);
-    assertEquals(1, rows.size());
-    assertEquals(1, rows.get(0).getField("int_field"));
   }
 
   private List<Record> readData(InputFile inputFile, Schema schema) throws 
IOException {

Reply via email to