This is an automated email from the ASF dual-hosted git repository.
lesun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 3bdad11 [GOBBLIN-1145] add path in serde props
3bdad11 is described below
commit 3bdad1148d86d479f0a27172a75f8bccda15371f
Author: Arjun <[email protected]>
AuthorDate: Fri May 8 17:31:47 2020 -0700
[GOBBLIN-1145] add path in serde props
Closes #2982 from arjun4084346/distcpSdParams
---
.../management/copy/hive/HiveCopyEntityHelper.java | 4 +++-
.../management/copy/hive/HivePartitionFileSet.java | 2 ++
.../copy/hive/avro/HiveAvroCopyEntityHelper.java | 25 ++++++++++++----------
.../org/apache/gobblin/hive/HiveConstants.java | 1 +
4 files changed, 20 insertions(+), 12 deletions(-)
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HiveCopyEntityHelper.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HiveCopyEntityHelper.java
index e440460..7fa22e0 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HiveCopyEntityHelper.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HiveCopyEntityHelper.java
@@ -28,6 +28,8 @@ import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
+
+import org.apache.gobblin.hive.HiveConstants;
import org.apache.gobblin.util.filesystem.ModTimeDataFileVersionStrategy;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -486,10 +488,10 @@ public class HiveCopyEntityHelper {
targetTable.getTTable().putToParameters(HiveDataset.REGISTERER,
GOBBLIN_DISTCP);
targetTable.getTTable().putToParameters(HiveDataset.REGISTRATION_GENERATION_TIME_MILLIS,
Long.toString(this.startTime));
+
targetTable.getTTable().getSd().getSerdeInfo().getParameters().put(HiveConstants.PATH,
targetLocation.toString());
targetTable.getTTable().unsetCreateTime();
HiveAvroCopyEntityHelper.updateTableAttributesIfAvro(targetTable, this);
-
return targetTable;
} catch (HiveException he) {
throw new IOException(he);
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HivePartitionFileSet.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HivePartitionFileSet.java
index 93ae40f..7a90b59 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HivePartitionFileSet.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/HivePartitionFileSet.java
@@ -37,6 +37,7 @@ import
org.apache.gobblin.data.management.copy.entities.PostPublishStep;
import org.apache.gobblin.data.management.copy.entities.PrePublishStep;
import org.apache.gobblin.dataset.DatasetDescriptor;
import org.apache.gobblin.dataset.PartitionDescriptor;
+import org.apache.gobblin.hive.HiveConstants;
import org.apache.gobblin.hive.HiveRegisterStep;
import org.apache.gobblin.hive.metastore.HiveMetaStoreUtils;
import org.apache.gobblin.hive.spec.HiveSpec;
@@ -185,6 +186,7 @@ public class HivePartitionFileSet extends HiveFileSet {
targetPartition.getTPartition().putToParameters(HiveDataset.REGISTRATION_GENERATION_TIME_MILLIS,
Long.toString(this.hiveCopyEntityHelper.getStartTime()));
targetPartition.setLocation(targetLocation.toString());
+
targetPartition.getTPartition().getSd().getSerdeInfo().getParameters().put(HiveConstants.PATH,
targetLocation.toString());
targetPartition.getTPartition().unsetCreateTime();
return targetPartition;
} catch (HiveException he) {
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/avro/HiveAvroCopyEntityHelper.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/avro/HiveAvroCopyEntityHelper.java
index 5166b60..952439d 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/avro/HiveAvroCopyEntityHelper.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/hive/avro/HiveAvroCopyEntityHelper.java
@@ -28,10 +28,14 @@ import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat;
+import org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.serde2.avro.AvroSerDe;
import org.apache.gobblin.data.management.copy.hive.HiveCopyEntityHelper;
+import org.apache.gobblin.hive.HiveConstants;
import org.apache.gobblin.util.PathUtils;
@@ -69,7 +73,6 @@ public class HiveAvroCopyEntityHelper {
}
/**
- *
* @param entity, name of the entity to be changed, e.g. hive table or
partition
* @param sd, StorageDescriptor of the entity
*/
@@ -96,16 +99,16 @@ public class HiveAvroCopyEntityHelper {
/**
* Tell whether a hive table is actually an Avro table
- * @param targetTable
- * @return
- * @throws IOException
+ * @param table a hive {@link Table}
+ * @return true if it is a hive table
*/
- public static boolean isHiveTableAvroType(Table targetTable) throws
IOException {
- String serializationLib =
targetTable.getTTable().getSd().getSerdeInfo().getSerializationLib();
- String inputFormat = targetTable.getTTable().getSd().getInputFormat();
- String outputFormat = targetTable.getTTable().getSd().getOutputFormat();
-
- return inputFormat.endsWith("AvroContainerInputFormat") ||
outputFormat.endsWith("AvroContainerOutputFormat")
- || serializationLib.endsWith("AvroSerDe");
+ public static boolean isHiveTableAvroType(Table table) {
+ String serializationLib =
table.getTTable().getSd().getSerdeInfo().getSerializationLib();
+ String inputFormat = table.getTTable().getSd().getInputFormat();
+ String outputFormat = table.getTTable().getSd().getOutputFormat();
+
+ return inputFormat.endsWith(AvroContainerInputFormat.class.getSimpleName())
+ ||
outputFormat.endsWith(AvroContainerOutputFormat.class.getSimpleName())
+ || serializationLib.endsWith(AvroSerDe.class.getSimpleName());
}
}
diff --git
a/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/HiveConstants.java
b/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/HiveConstants.java
index 8b03e6f..3444cc8 100644
---
a/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/HiveConstants.java
+++
b/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/HiveConstants.java
@@ -47,6 +47,7 @@ public class HiveConstants {
* Storage properties
*/
public static final String LOCATION = "location";
+ public static final String PATH = "path";
public static final String INPUT_FORMAT = "input.format";
public static final String OUTPUT_FORMAT = "output.format";
public static final String COMPRESSED = "compressed";