[incubator-iceberg] branch master updated: Remove unused fields from DataFile (#914)

blue Sat, 11 Apr 2020 08:48:26 -0700

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git



The following commit(s) were added to refs/heads/master by this push:
     new 3334afc  Remove unused fields from DataFile (#914)
3334afc is described below

commit 3334afc7a375547b35264d3d5f2cb2605630346f
Author: Ryan Blue <[email protected]>
AuthorDate: Sat Apr 11 08:47:22 2020 -0700

    Remove unused fields from DataFile (#914)
---
 api/src/main/java/org/apache/iceberg/DataFile.java | 12 ------
 .../test/java/org/apache/iceberg/TestHelpers.java  | 10 -----
 .../java/org/apache/iceberg/GenericDataFile.java   | 48 ++++++----------------
 .../java/org/apache/iceberg/ManifestEntry.java     | 26 +++---------
 site/docs/spec.md                                  |  4 +-
 5 files changed, 20 insertions(+), 80 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/DataFile.java 
b/api/src/main/java/org/apache/iceberg/DataFile.java
index 456082e..90758a3 100644
--- a/api/src/main/java/org/apache/iceberg/DataFile.java
+++ b/api/src/main/java/org/apache/iceberg/DataFile.java
@@ -46,8 +46,6 @@ public interface DataFile {
         required(103, "record_count", LongType.get()),
         required(104, "file_size_in_bytes", LongType.get()),
         required(105, "block_size_in_bytes", LongType.get()),
-        optional(106, "file_ordinal", IntegerType.get()),
-        optional(107, "sort_columns", ListType.ofRequired(112, 
IntegerType.get())),
         optional(108, "column_sizes", MapType.ofRequired(117, 118,
             IntegerType.get(), LongType.get())),
         optional(109, "value_counts", MapType.ofRequired(119, 120,
@@ -90,16 +88,6 @@ public interface DataFile {
   long fileSizeInBytes();
 
   /**
-   * @return file ordinal if written in a global ordering, or null
-   */
-  Integer fileOrdinal();
-
-  /**
-   * @return list of columns the file records are sorted by, or null
-   */
-  List<Integer> sortColumns();
-
-  /**
    * @return if collected, map from column ID to the size of the column in 
bytes, null otherwise
    */
   Map<Integer, Long> columnSizes();
diff --git a/api/src/test/java/org/apache/iceberg/TestHelpers.java 
b/api/src/test/java/org/apache/iceberg/TestHelpers.java
index 5f50181..2fc71c4 100644
--- a/api/src/test/java/org/apache/iceberg/TestHelpers.java
+++ b/api/src/test/java/org/apache/iceberg/TestHelpers.java
@@ -319,16 +319,6 @@ public class TestHelpers {
     }
 
     @Override
-    public Integer fileOrdinal() {
-      return null;
-    }
-
-    @Override
-    public List<Integer> sortColumns() {
-      return null;
-    }
-
-    @Override
     public Map<Integer, Long> columnSizes() {
       return null;
     }
diff --git a/core/src/main/java/org/apache/iceberg/GenericDataFile.java 
b/core/src/main/java/org/apache/iceberg/GenericDataFile.java
index 03d1015..92d84f9 100644
--- a/core/src/main/java/org/apache/iceberg/GenericDataFile.java
+++ b/core/src/main/java/org/apache/iceberg/GenericDataFile.java
@@ -56,8 +56,6 @@ class GenericDataFile
   private long fileSizeInBytes = -1L;
 
   // optional fields
-  private Integer fileOrdinal = null; // boxed for nullability
-  private List<Integer> sortColumns = null;
   private Map<Integer, Long> columnSizes = null;
   private Map<Integer, Long> valueCounts = null;
   private Map<Integer, Long> nullValueCounts = null;
@@ -173,8 +171,6 @@ class GenericDataFile
     this.partitionType = toCopy.partitionType;
     this.recordCount = toCopy.recordCount;
     this.fileSizeInBytes = toCopy.fileSizeInBytes;
-    this.fileOrdinal = toCopy.fileOrdinal;
-    this.sortColumns = copy(toCopy.sortColumns);
     if (fullCopy) {
       // TODO: support lazy conversion to/from map
       this.columnSizes = copy(toCopy.columnSizes);
@@ -226,16 +222,6 @@ class GenericDataFile
   }
 
   @Override
-  public Integer fileOrdinal() {
-    return fileOrdinal;
-  }
-
-  @Override
-  public List<Integer> sortColumns() {
-    return sortColumns;
-  }
-
-  @Override
   public Map<Integer, Long> columnSizes() {
     return columnSizes;
   }
@@ -306,30 +292,24 @@ class GenericDataFile
       case 5:
         return;
       case 6:
-        this.fileOrdinal = (Integer) v;
-        return;
-      case 7:
-        this.sortColumns = (List<Integer>) v;
-        return;
-      case 8:
         this.columnSizes = (Map<Integer, Long>) v;
         return;
-      case 9:
+      case 7:
         this.valueCounts = (Map<Integer, Long>) v;
         return;
-      case 10:
+      case 8:
         this.nullValueCounts = (Map<Integer, Long>) v;
         return;
-      case 11:
+      case 9:
         this.lowerBounds = SerializableByteBufferMap.wrap((Map<Integer, 
ByteBuffer>) v);
         return;
-      case 12:
+      case 10:
         this.upperBounds = SerializableByteBufferMap.wrap((Map<Integer, 
ByteBuffer>) v);
         return;
-      case 13:
+      case 11:
         this.keyMetadata = ByteBuffers.toByteArray((ByteBuffer) v);
         return;
-      case 14:
+      case 12:
         this.splitOffsets = (List<Long>) v;
         return;
       default:
@@ -365,22 +345,18 @@ class GenericDataFile
         // to maintain compatibility, we need to return something.
         return DEFAULT_BLOCK_SIZE;
       case 6:
-        return fileOrdinal;
-      case 7:
-        return sortColumns;
-      case 8:
         return columnSizes;
-      case 9:
+      case 7:
         return valueCounts;
-      case 10:
+      case 8:
         return nullValueCounts;
-      case 11:
+      case 9:
         return lowerBounds;
-      case 12:
+      case 10:
         return upperBounds;
-      case 13:
+      case 11:
         return keyMetadata();
-      case 14:
+      case 12:
         return splitOffsets;
       default:
         throw new UnsupportedOperationException("Unknown field ordinal: " + 
pos);
diff --git a/core/src/main/java/org/apache/iceberg/ManifestEntry.java 
b/core/src/main/java/org/apache/iceberg/ManifestEntry.java
index 82eca81..d0f85d6 100644
--- a/core/src/main/java/org/apache/iceberg/ManifestEntry.java
+++ b/core/src/main/java/org/apache/iceberg/ManifestEntry.java
@@ -276,22 +276,18 @@ class ManifestEntry implements IndexedRecord, 
SpecificData.SchemaConstructable {
         case 5:
           return DEFAULT_BLOCK_SIZE;
         case 6:
-          return wrapped.fileOrdinal();
-        case 7:
-          return wrapped.sortColumns();
-        case 8:
           return wrapped.columnSizes();
-        case 9:
+        case 7:
           return wrapped.valueCounts();
-        case 10:
+        case 8:
           return wrapped.nullValueCounts();
-        case 11:
+        case 9:
           return wrapped.lowerBounds();
-        case 12:
+        case 10:
           return wrapped.upperBounds();
-        case 13:
+        case 11:
           return wrapped.keyMetadata();
-        case 14:
+        case 12:
           return wrapped.splitOffsets();
       }
       throw new IllegalArgumentException("Unknown field ordinal: " + pos);
@@ -333,16 +329,6 @@ class ManifestEntry implements IndexedRecord, 
SpecificData.SchemaConstructable {
     }
 
     @Override
-    public Integer fileOrdinal() {
-      return wrapped.fileOrdinal();
-    }
-
-    @Override
-    public List<Integer> sortColumns() {
-      return wrapped.sortColumns();
-    }
-
-    @Override
     public Map<Integer, Long> columnSizes() {
       return wrapped.columnSizes();
     }
diff --git a/site/docs/spec.md b/site/docs/spec.md
index f05f2ee..24b9360 100644
--- a/site/docs/spec.md
+++ b/site/docs/spec.md
@@ -222,8 +222,8 @@ The schema of a manifest file is a struct called 
`manifest_entry` with the follo
 | **`103  record_count`**           | `long`                                | 
Number of records in this file                                                  
                                                                                
                                     |
 | **`104  file_size_in_bytes`**     | `long`                                | 
Total file size in bytes                                                        
                                                                                
                                     |
 | ~~**`105 block_size_in_bytes`**~~ | `long`                                | 
**Deprecated. Always write a default value and do not read.**                   
                                                                                
                                     |
-| **`106  file_ordinal`**           | `optional int`                        | 
Ordinal of the file w.r.t files with the same partition tuple and snapshot id   
                                                                                
                                     |
-| **`107  sort_columns`**           | `optional list`                       | 
Columns the file is sorted by                                                   
                                                                                
                                     |
+| ~~**`106  file_ordinal`**~~       | `optional int`                        | 
**Deprecated. Do not use.**                                                     
                                                                                
                                     |
+| ~~**`107  sort_columns`**~~       | `optional list`                       | 
**Deprecated. Do not use.**                                                     
                                                                                
                                     |
 | **`108  column_sizes`**           | `optional map`                        | 
Map from column id to the total size on disk of all regions that store the 
column. Does not include bytes necessary to read other columns, like footers. 
Leave null for row-oriented formats (Avro). |
 | **`109  value_counts`**           | `optional map`                        | 
Map from column id to number of values in the column (including null values)    
                                                                                
                                     |
 | **`110  null_value_counts`**      | `optional map`                        | 
Map from column id to number of null values in the column                       
                                                                                
                                     |

[incubator-iceberg] branch master updated: Remove unused fields from DataFile (#914)

Reply via email to