Repository: incubator-gobblin Updated Branches: refs/heads/master ba44dd304 -> 368ff92e3
[GOBBLIN-313] Added option to explicitly set group name for destination and staging directories for Avro2Orc conversion Closes #2167 from aditya1105/HiveSource Project: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/commit/368ff92e Tree: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/tree/368ff92e Diff: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/diff/368ff92e Branch: refs/heads/master Commit: 368ff92e340a4cdfd33b7179729ecc7041338d8b Parents: ba44dd3 Author: adsharma <[email protected]> Authored: Mon Nov 13 02:00:07 2017 -0800 Committer: Abhishek Tiwari <[email protected]> Committed: Mon Nov 13 02:00:07 2017 -0800 ---------------------------------------------------------------------- .../converter/AbstractAvroToOrcConverter.java | 29 ++++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/368ff92e/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/conversion/hive/converter/AbstractAvroToOrcConverter.java ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/conversion/hive/converter/AbstractAvroToOrcConverter.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/conversion/hive/converter/AbstractAvroToOrcConverter.java index b8591e7..ed42946 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/conversion/hive/converter/AbstractAvroToOrcConverter.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/conversion/hive/converter/AbstractAvroToOrcConverter.java @@ -136,6 +136,10 @@ public abstract class AbstractAvroToOrcConverter extends Converter<Schema, Schem public static final String HIVE_DATASET_DESTINATION_SKIP_SETGROUP = "hive.dataset.destination.skip.setGroup"; public static final boolean DEFAULT_HIVE_DATASET_DESTINATION_SKIP_SETGROUP = false; + public static final String HIVE_DATASET_DESTINATION_GROUP_NAME = "hive.dataset.destination.groupName"; + public static final String HIVE_DATASET_STAGING_GROUP_NAME = "hive.dataset.staging.groupName"; + + /** * If set to true, a set format DDL will be separate from add partition DDL @@ -276,14 +280,33 @@ public abstract class AbstractAvroToOrcConverter extends Converter<Schema, Schem getConversionConfig().getDestinationDataPath()), sourceDataPermission)); } else { this.fs.setPermission(new Path(getConversionConfig().getDestinationDataPath()), sourceDataPermission); - // Set the same group as source + + // Explicitly set group name for destination location if specified otherwise preserve source group name + String destinationGroupName; + if (workUnit.contains(HIVE_DATASET_DESTINATION_GROUP_NAME)) { + destinationGroupName = workUnit.getProp(HIVE_DATASET_DESTINATION_GROUP_NAME); + } else { + destinationGroupName = sourceDataFileStatus.getGroup(); + } if (!workUnit.getPropAsBoolean(HIVE_DATASET_DESTINATION_SKIP_SETGROUP, DEFAULT_HIVE_DATASET_DESTINATION_SKIP_SETGROUP)) { - this.fs.setOwner(new Path(getConversionConfig().getDestinationDataPath()), null, - sourceDataFileStatus.getGroup()); + this.fs.setOwner(new Path(getConversionConfig().getDestinationDataPath()), null, destinationGroupName); } log.info(String.format("Created %s with permissions %s and group %s", new Path(getConversionConfig() .getDestinationDataPath()), sourceDataPermission, sourceDataFileStatus.getGroup())); + + // Explicitly set group name for staging directory if specified + if (workUnit.contains(HIVE_DATASET_STAGING_GROUP_NAME)) { + String stagingGroupName = workUnit.getProp(HIVE_DATASET_STAGING_GROUP_NAME); + log.info("Setting staging directory group name as " + stagingGroupName); + this.fs.mkdirs(new Path(getOrcStagingDataLocation(orcStagingTableName))); + this.fs.setOwner(new Path(getOrcStagingDataLocation(orcStagingTableName)), null, stagingGroupName); + + // Staging directory will be renamed to getOrcDataLocation() and hence it's group name should match + // with the group name of the staging directory + this.fs.mkdirs(new Path(getOrcDataLocation())); + this.fs.setOwner(new Path(getOrcDataLocation()), null, stagingGroupName); + } } } catch (IOException e) { Throwables.propagate(e);
