This is an automated email from the ASF dual-hosted git repository. lesun pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git
The following commit(s) were added to refs/heads/master by this push: new 3bdad11 [GOBBLIN-1145] add path in serde props 3bdad11 is described below commit 3bdad1148d86d479f0a27172a75f8bccda15371f Author: Arjun <ab...@linkedin.com> AuthorDate: Fri May 8 17:31:47 2020 -0700 [GOBBLIN-1145] add path in serde props Closes #2982 from arjun4084346/distcpSdParams --- .../management/copy/hive/HiveCopyEntityHelper.java | 4 +++- .../management/copy/hive/HivePartitionFileSet.java | 2 ++ .../copy/hive/avro/HiveAvroCopyEntityHelper.java | 25 ++++++++++++---------- .../org/apache/gobblin/hive/HiveConstants.java | 1 + 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HiveCopyEntityHelper.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HiveCopyEntityHelper.java index e440460..7fa22e0 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HiveCopyEntityHelper.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HiveCopyEntityHelper.java @@ -28,6 +28,8 @@ import java.util.List; import java.util.Map; import org.apache.commons.lang3.StringUtils; + +import org.apache.gobblin.hive.HiveConstants; import org.apache.gobblin.util.filesystem.ModTimeDataFileVersionStrategy; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -486,10 +488,10 @@ public class HiveCopyEntityHelper { targetTable.getTTable().putToParameters(HiveDataset.REGISTERER, GOBBLIN_DISTCP); targetTable.getTTable().putToParameters(HiveDataset.REGISTRATION_GENERATION_TIME_MILLIS, Long.toString(this.startTime)); + targetTable.getTTable().getSd().getSerdeInfo().getParameters().put(HiveConstants.PATH, targetLocation.toString()); targetTable.getTTable().unsetCreateTime(); HiveAvroCopyEntityHelper.updateTableAttributesIfAvro(targetTable, this); - return targetTable; } catch (HiveException he) { throw new IOException(he); diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HivePartitionFileSet.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HivePartitionFileSet.java index 93ae40f..7a90b59 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HivePartitionFileSet.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HivePartitionFileSet.java @@ -37,6 +37,7 @@ import org.apache.gobblin.data.management.copy.entities.PostPublishStep; import org.apache.gobblin.data.management.copy.entities.PrePublishStep; import org.apache.gobblin.dataset.DatasetDescriptor; import org.apache.gobblin.dataset.PartitionDescriptor; +import org.apache.gobblin.hive.HiveConstants; import org.apache.gobblin.hive.HiveRegisterStep; import org.apache.gobblin.hive.metastore.HiveMetaStoreUtils; import org.apache.gobblin.hive.spec.HiveSpec; @@ -185,6 +186,7 @@ public class HivePartitionFileSet extends HiveFileSet { targetPartition.getTPartition().putToParameters(HiveDataset.REGISTRATION_GENERATION_TIME_MILLIS, Long.toString(this.hiveCopyEntityHelper.getStartTime())); targetPartition.setLocation(targetLocation.toString()); + targetPartition.getTPartition().getSd().getSerdeInfo().getParameters().put(HiveConstants.PATH, targetLocation.toString()); targetPartition.getTPartition().unsetCreateTime(); return targetPartition; } catch (HiveException he) { diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/avro/HiveAvroCopyEntityHelper.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/avro/HiveAvroCopyEntityHelper.java index 5166b60..952439d 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/avro/HiveAvroCopyEntityHelper.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/avro/HiveAvroCopyEntityHelper.java @@ -28,10 +28,14 @@ import lombok.extern.slf4j.Slf4j; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat; +import org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.serde2.avro.AvroSerDe; import org.apache.gobblin.data.management.copy.hive.HiveCopyEntityHelper; +import org.apache.gobblin.hive.HiveConstants; import org.apache.gobblin.util.PathUtils; @@ -69,7 +73,6 @@ public class HiveAvroCopyEntityHelper { } /** - * * @param entity, name of the entity to be changed, e.g. hive table or partition * @param sd, StorageDescriptor of the entity */ @@ -96,16 +99,16 @@ public class HiveAvroCopyEntityHelper { /** * Tell whether a hive table is actually an Avro table - * @param targetTable - * @return - * @throws IOException + * @param table a hive {@link Table} + * @return true if it is a hive table */ - public static boolean isHiveTableAvroType(Table targetTable) throws IOException { - String serializationLib = targetTable.getTTable().getSd().getSerdeInfo().getSerializationLib(); - String inputFormat = targetTable.getTTable().getSd().getInputFormat(); - String outputFormat = targetTable.getTTable().getSd().getOutputFormat(); - - return inputFormat.endsWith("AvroContainerInputFormat") || outputFormat.endsWith("AvroContainerOutputFormat") - || serializationLib.endsWith("AvroSerDe"); + public static boolean isHiveTableAvroType(Table table) { + String serializationLib = table.getTTable().getSd().getSerdeInfo().getSerializationLib(); + String inputFormat = table.getTTable().getSd().getInputFormat(); + String outputFormat = table.getTTable().getSd().getOutputFormat(); + + return inputFormat.endsWith(AvroContainerInputFormat.class.getSimpleName()) + || outputFormat.endsWith(AvroContainerOutputFormat.class.getSimpleName()) + || serializationLib.endsWith(AvroSerDe.class.getSimpleName()); } } diff --git a/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/HiveConstants.java b/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/HiveConstants.java index 8b03e6f..3444cc8 100644 --- a/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/HiveConstants.java +++ b/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/HiveConstants.java @@ -47,6 +47,7 @@ public class HiveConstants { * Storage properties */ public static final String LOCATION = "location"; + public static final String PATH = "path"; public static final String INPUT_FORMAT = "input.format"; public static final String OUTPUT_FORMAT = "output.format"; public static final String COMPRESSED = "compressed";