This is an automated email from the ASF dual-hosted git repository.
zihanli58 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new af48b310c [GOBBLIN-1871] Fix bug that hiveMetadataWriter may make the
hive schema columns inconsistent with the Avro.schema.literal (#3734)
af48b310c is described below
commit af48b310c59593c48bc0491a0b6960d2d8a83e16
Author: Zihan Li <[email protected]>
AuthorDate: Wed Aug 9 12:15:48 2023 -0700
[GOBBLIN-1871] Fix bug that hiveMetadataWriter may make the hive schema
columns inconsistent with the Avro.schema.literal (#3734)
* address comments
* use connectionmanager when httpclient is not cloesable
* [GOBBLIN-1871]Fix bug that hiveMetadataWriter may make the hive schema
columns inconsistent with the Avro.schema.literal
---------
Co-authored-by: Zihan Li <[email protected]>
---
.../java/org/apache/gobblin/hive/orc/HiveOrcSerDeManager.java | 2 +-
.../java/org/apache/gobblin/hive/writer/HiveMetadataWriter.java | 9 +++------
2 files changed, 4 insertions(+), 7 deletions(-)
diff --git
a/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManager.java
b/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManager.java
index 9a7376e9b..98adb4ca7 100644
---
a/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManager.java
+++
b/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManager.java
@@ -255,7 +255,6 @@ public class HiveOrcSerDeManager extends HiveSerDeManager {
private void addSchemaProperties(Path path, HiveRegistrationUnit hiveUnit)
throws IOException {
- Preconditions.checkArgument(this.fs.getFileStatus(path).isDirectory(),
path + " is not a directory.");
try (Timer.Context context =
metricContext.timer(HIVE_SPEC_SCHEMA_READING_TIMER).time()) {
addSchemaPropertiesHelper(path, hiveUnit);
}
@@ -281,6 +280,7 @@ public class HiveOrcSerDeManager extends HiveSerDeManager {
schema = TypeInfoUtils.getTypeInfoFromObjectInspector(
TypeDescriptionToObjectInspectorUtil.getObjectInspector(orcSchema));
} else {
+ Preconditions.checkArgument(this.fs.getFileStatus(path).isDirectory(),
path + " is not a directory.");
schema = getSchemaFromLatestFile(path, this.fs);
}
if (schema instanceof StructTypeInfo) {
diff --git
a/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/writer/HiveMetadataWriter.java
b/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/writer/HiveMetadataWriter.java
index 138ac3d94..e5ac2741c 100644
---
a/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/writer/HiveMetadataWriter.java
+++
b/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/writer/HiveMetadataWriter.java
@@ -456,12 +456,9 @@ public class HiveMetadataWriter implements MetadataWriter {
//Force to set the schema even there is no schema literal defined in the
spec
String latestSchema = latestSchemaMap.get(tableKey);
if (latestSchema != null) {
- String tableSchema =
spec.getTable().getSerDeProps().getProp(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName());
- if (tableSchema == null || !tableSchema.equals(latestSchema)) {
- spec.getTable().getSerDeProps()
-
.setProp(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(),
latestSchemaMap.get(tableKey));
- HiveMetaStoreUtils.updateColumnsInfoIfNeeded(spec);
- }
+ spec.getTable().getSerDeProps()
+
.setProp(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(),
latestSchema);
+ HiveMetaStoreUtils.updateColumnsInfoIfNeeded(spec);
}
}