Repository: incubator-gobblin Updated Branches: refs/heads/master 15ac4679b -> 725a0829d
gobblin-data-management cli + example configuration Project: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/commit/5a896d23 Tree: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/tree/5a896d23 Diff: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/diff/5a896d23 Branch: refs/heads/master Commit: 5a896d23aab7f037fd206b984caab7d39085f68b Parents: 0751146 Author: Michal Ferlinski <[email protected]> Authored: Wed May 17 10:28:45 2017 +0200 Committer: Michal Ferlinski <[email protected]> Committed: Mon May 22 08:19:08 2017 +0200 ---------------------------------------------------------------------- .../config-example/gobblin-retention-run.sh | 20 +++++ .../config-example/gobblin-retention.properties | 25 ++++++ .../_CONFIG_STORE/1.0/hive/db1/main.conf | 18 ++++ .../_CONFIG_STORE/1.0/hive/db2/main.conf | 18 ++++ .../_CONFIG_STORE/1.0/hive/db2/table1/main.conf | 18 ++++ .../_CONFIG_STORE/1.0/hive/includes.conf | 18 ++++ .../1.0/tags/retention/hive/main.conf | 38 +++++++++ .../1.0/tags/retention/timebased/main.conf | 31 +++++++ .../_CONFIG_STORE/store-metadata.conf | 18 ++++ .../runtime/retention/DatasetCleanerCli.java | 89 ++++++++++++++++++++ .../data-management/Gobblin-Retention.md | 3 + 11 files changed, 296 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/5a896d23/gobblin-data-management/config-example/gobblin-retention-run.sh ---------------------------------------------------------------------- diff --git a/gobblin-data-management/config-example/gobblin-retention-run.sh b/gobblin-data-management/config-example/gobblin-retention-run.sh new file mode 100755 index 0000000..72ce340 --- /dev/null +++ b/gobblin-data-management/config-example/gobblin-retention-run.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +$GOBBLIN_HOME/bin/gobblin cleaner -c gobblin-retention.properties http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/5a896d23/gobblin-data-management/config-example/gobblin-retention.properties ---------------------------------------------------------------------- diff --git a/gobblin-data-management/config-example/gobblin-retention.properties b/gobblin-data-management/config-example/gobblin-retention.properties new file mode 100644 index 0000000..0e7c805 --- /dev/null +++ b/gobblin-data-management/config-example/gobblin-retention.properties @@ -0,0 +1,25 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +gobblin.config.management.store.uri=simple-hdfs://cluster1/user/root/gobblin-config-management + +hive.dataset.database=db1,db2 +gobblin.retention.hive.shouldDeleteData=true + +gobblin.retention.tag=simple-hdfs://cluster1/user/root/gobblin-config-management/tags/retention/timebased,simple-hdfs://cluster1/user/root/gobblin-config-management/tags/retention/hive + +gobblin.retention.skip.trash=true http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/5a896d23/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/db1/main.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/db1/main.conf b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/db1/main.conf new file mode 100644 index 0000000..7fb8d7f --- /dev/null +++ b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/db1/main.conf @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +gobblin.retention.selection.timeBased.lookbackTime=30d http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/5a896d23/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/db2/main.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/db2/main.conf b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/db2/main.conf new file mode 100644 index 0000000..35fa65d --- /dev/null +++ b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/db2/main.conf @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +gobblin.retention.selection.timeBased.lookbackTime=32d http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/5a896d23/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/db2/table1/main.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/db2/table1/main.conf b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/db2/table1/main.conf new file mode 100644 index 0000000..595ab06 --- /dev/null +++ b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/db2/table1/main.conf @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +gobblin.retention.selection.timeBased.lookbackTime=15d http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/5a896d23/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/includes.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/includes.conf b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/includes.conf new file mode 100644 index 0000000..1763dcf --- /dev/null +++ b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/hive/includes.conf @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +/tags/retention/hive http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/5a896d23/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/hive/main.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/hive/main.conf b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/hive/main.conf new file mode 100644 index 0000000..14fd2d5 --- /dev/null +++ b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/hive/main.conf @@ -0,0 +1,38 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +gobblin.retention : { + + is.blacklisted=false + + dataset : { + finder.class=gobblin.data.management.retention.dataset.finder.CleanableHiveDatasetFinder + } + + selection : { + policy.class=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + } + + version.finder.class=gobblin.data.management.version.finder.DatePartitionHiveVersionFinder + + hive { + partition { + key.name=partition_column_name + value.datetime.pattern=yyyy-MM-dd + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/5a896d23/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/timebased/main.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/timebased/main.conf b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/timebased/main.conf new file mode 100644 index 0000000..ad20ad2 --- /dev/null +++ b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/timebased/main.conf @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +gobblin.retention : { + + dataset : { + finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + } + + selection : { + policy.class=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + } + + version : { + finder.class=gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder + } +} http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/5a896d23/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/store-metadata.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/store-metadata.conf b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/store-metadata.conf new file mode 100644 index 0000000..d4e32ac --- /dev/null +++ b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/store-metadata.conf @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +config.hdfs.store.version.current=1.0 http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/5a896d23/gobblin-data-management/src/main/java/gobblin/runtime/retention/DatasetCleanerCli.java ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/main/java/gobblin/runtime/retention/DatasetCleanerCli.java b/gobblin-data-management/src/main/java/gobblin/runtime/retention/DatasetCleanerCli.java new file mode 100644 index 0000000..f1ca40c --- /dev/null +++ b/gobblin-data-management/src/main/java/gobblin/runtime/retention/DatasetCleanerCli.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package gobblin.runtime.retention; + +import gobblin.annotation.Alias; +import gobblin.data.management.retention.DatasetCleaner; +import gobblin.runtime.cli.CliApplication; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; + +import java.io.FileInputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.Properties; + + +@Alias(value = "cleaner", description = "Data retention utility") +public class DatasetCleanerCli implements CliApplication { + private static final Option CLEANER_CONFIG = + Option.builder("c").longOpt("config").hasArg().required().desc("DatasetCleaner configuration").build(); + + @Override + public void run(String[] args) { + try { + Properties properties = readProperties(parseConfigLocation(args)); + DatasetCleaner datasetCleaner = new DatasetCleaner(FileSystem.get(new Configuration()), properties); + datasetCleaner.clean(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private Properties readProperties(String fileLocation) { + try { + Properties prop = new Properties(); + FileInputStream input = new FileInputStream(fileLocation); + prop.load(input); + input.close(); + return prop; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private String parseConfigLocation(String[] args) { + Options options = new Options(); + options.addOption(CLEANER_CONFIG); + + CommandLine cli; + try { + CommandLineParser parser = new DefaultParser(); + cli = parser.parse(options, Arrays.copyOfRange(args, 1, args.length)); + } catch (ParseException pe) { + System.out.println("Command line parse exception: " + pe.getMessage()); + printUsage(options); + throw new RuntimeException(pe); + } + return cli.getOptionValue(CLEANER_CONFIG.getOpt()); + } + + private void printUsage(Options options) { + HelpFormatter formatter = new HelpFormatter(); + + String usage = "DatasetCleaner configuration "; + formatter.printHelp(usage, options); + } +} http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/5a896d23/gobblin-docs/data-management/Gobblin-Retention.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/data-management/Gobblin-Retention.md b/gobblin-docs/data-management/Gobblin-Retention.md index e29b81f..7aa51c5 100644 --- a/gobblin-docs/data-management/Gobblin-Retention.md +++ b/gobblin-docs/data-management/Gobblin-Retention.md @@ -145,6 +145,9 @@ gobblin.retention : { } </pre> +### Examples +Browse the [gobblin-data-management/config-example](/gobblin-data-management/config-example) directory to see example configuration. + ## Supported Retention Configurations Below is a list of ready to use supported retention configurations. But users can always implement their own ```DatasetFinder```,```VersionFinder``` and ```VersionSelectionPolicy``` and plug it in.
