vikrambohra commented on code in PR #3632:
URL: https://github.com/apache/gobblin/pull/3632#discussion_r1103195419
##########
gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/metastore/HiveMetaStoreUtils.java:
##########
@@ -256,6 +262,71 @@ public static SerDeInfo getSerDeInfo(HiveRegistrationUnit
unit) {
return si;
}
+ public static boolean containsNonOptionalUnionTypeColumn(Table t) {
+ return containsNonOptionalUnionTypeColumn(getHiveTable(t));
+ }
+
+ /**
+ * Util for detecting if a hive table has a non-optional union (aka complex
unions) column types. A non optional
+ * union is defined as a uniontype with n >= 2 non-null subtypes
+ *
+ * @param hiveTable Hive table
+ * @return if hive table contains non-optional uniontype columns
+ */
+ public static boolean containsNonOptionalUnionTypeColumn(HiveTable
hiveTable) {
+ if (hiveTable.getProps().contains("avro.schema.literal")) {
Review Comment:
use AvroSerdeUtils.SCHEMA_LITERAL
##########
gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/metastore/HiveMetaStoreUtils.java:
##########
@@ -256,6 +262,71 @@ public static SerDeInfo getSerDeInfo(HiveRegistrationUnit
unit) {
return si;
}
+ public static boolean containsNonOptionalUnionTypeColumn(Table t) {
+ return containsNonOptionalUnionTypeColumn(getHiveTable(t));
+ }
+
+ /**
+ * Util for detecting if a hive table has a non-optional union (aka complex
unions) column types. A non optional
+ * union is defined as a uniontype with n >= 2 non-null subtypes
+ *
+ * @param hiveTable Hive table
+ * @return if hive table contains non-optional uniontype columns
+ */
+ public static boolean containsNonOptionalUnionTypeColumn(HiveTable
hiveTable) {
+ if (hiveTable.getProps().contains("avro.schema.literal")) {
+ Schema.Parser parser = new Schema.Parser();
+ Schema schema =
parser.parse(hiveTable.getProps().getProp("avro.schema.literal"));
Review Comment:
use AvroSerdeUtils.SCHEMA_LITERAL
##########
gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/metastore/HiveMetaStoreUtils.java:
##########
@@ -256,6 +262,71 @@ public static SerDeInfo getSerDeInfo(HiveRegistrationUnit
unit) {
return si;
}
+ public static boolean containsNonOptionalUnionTypeColumn(Table t) {
+ return containsNonOptionalUnionTypeColumn(getHiveTable(t));
+ }
+
+ /**
+ * Util for detecting if a hive table has a non-optional union (aka complex
unions) column types. A non optional
+ * union is defined as a uniontype with n >= 2 non-null subtypes
+ *
+ * @param hiveTable Hive table
+ * @return if hive table contains non-optional uniontype columns
+ */
+ public static boolean containsNonOptionalUnionTypeColumn(HiveTable
hiveTable) {
+ if (hiveTable.getProps().contains("avro.schema.literal")) {
+ Schema.Parser parser = new Schema.Parser();
+ Schema schema =
parser.parse(hiveTable.getProps().getProp("avro.schema.literal"));
+ return isNonOptionalUnion(schema);
+ }
+
+ if (isNonAvroFormat(hiveTable)) {
+ return hiveTable.getColumns().stream()
+ .map(HiveRegistrationUnit.Column::getType)
+ .filter(type -> type.contains("uniontype"))
+ .map(type -> TypeDescription.fromString(type))
+ .anyMatch(type -> isNonOptionalUnion(type));
+ }
+
+ throw new RuntimeException("Avro based Hive tables without
\"avro.schema.literal\" are not supported");
+ }
+
+ private static boolean isNonOptionalUnion(Schema schema) {
+ switch (schema.getType()) {
+ case UNION:
+ Stream<Schema.Type> nonNullSubTypes = schema.getTypes().stream()
+ .map(Schema::getType).filter(t -> !t.equals(Schema.Type.NULL));
+ if (nonNullSubTypes.count() >= 2) {
+ return true;
+ }
+ return schema.getTypes().stream().anyMatch(s -> isNonOptionalUnion(s));
+ case MAP: // key is a string and doesn't need to be checked
+ return isNonOptionalUnion(schema.getValueType());
+ case ARRAY:
+ return isNonOptionalUnion(schema.getElementType());
+ case RECORD:
+ return
schema.getFields().stream().map(Schema.Field::schema).anyMatch(s ->
isNonOptionalUnion(s));
+ default:
+ return false;
+ }
+ }
+
+ private static boolean isNonOptionalUnion(TypeDescription description) {
Review Comment:
Useful method. can be made public
##########
gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/metastore/HiveMetaStoreUtils.java:
##########
@@ -256,6 +262,71 @@ public static SerDeInfo getSerDeInfo(HiveRegistrationUnit
unit) {
return si;
}
+ public static boolean containsNonOptionalUnionTypeColumn(Table t) {
+ return containsNonOptionalUnionTypeColumn(getHiveTable(t));
+ }
+
+ /**
+ * Util for detecting if a hive table has a non-optional union (aka complex
unions) column types. A non optional
+ * union is defined as a uniontype with n >= 2 non-null subtypes
+ *
+ * @param hiveTable Hive table
+ * @return if hive table contains non-optional uniontype columns
+ */
+ public static boolean containsNonOptionalUnionTypeColumn(HiveTable
hiveTable) {
+ if (hiveTable.getProps().contains("avro.schema.literal")) {
+ Schema.Parser parser = new Schema.Parser();
+ Schema schema =
parser.parse(hiveTable.getProps().getProp("avro.schema.literal"));
+ return isNonOptionalUnion(schema);
+ }
+
+ if (isNonAvroFormat(hiveTable)) {
+ return hiveTable.getColumns().stream()
+ .map(HiveRegistrationUnit.Column::getType)
+ .filter(type -> type.contains("uniontype"))
+ .map(type -> TypeDescription.fromString(type))
+ .anyMatch(type -> isNonOptionalUnion(type));
+ }
+
+ throw new RuntimeException("Avro based Hive tables without
\"avro.schema.literal\" are not supported");
+ }
+
+ private static boolean isNonOptionalUnion(Schema schema) {
Review Comment:
Useful method. can be made public
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]