Changed package from gobblin to org.apache.gobblin in docs and pull files without quotes
Project: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/commit/fcfad779 Tree: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/tree/fcfad779 Diff: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/diff/fcfad779 Branch: refs/heads/master Commit: fcfad779545b5e2af5cdc0a04452f6ef821e7774 Parents: 2717a51 Author: Abhishek Tiwari <[email protected]> Authored: Sun Jul 30 19:24:56 2017 -0700 Committer: Abhishek Tiwari <[email protected]> Committed: Sun Jul 30 19:24:56 2017 -0700 ---------------------------------------------------------------------- conf/aws/application.conf | 4 +- conf/gobblin-mapreduce.properties | 2 +- conf/gobblin-standalone.properties | 2 +- conf/standalone/application.conf | 2 +- conf/yarn/application.conf | 2 +- .../GobblinAWSClusterLauncherTest.conf | 4 +- .../resources/GobblinClusterKillTestJob.conf | 8 ++-- .../resources/GobblinHelixJobLauncherTest.conf | 6 +-- .../src/test/resources/reference.conf | 2 +- .../1.0/tags/retention/hive/main.conf | 6 +-- .../1.0/tags/retention/timebased/main.conf | 6 +-- .../action/MultiAccessControlAction.java | 2 +- .../dataset/ConfigurableCleanableDataset.java | 4 +- .../main/resources/data-management.properties | 6 +-- .../testCombinePolicy/retention.job | 10 ++--- .../testCombinePolicy/selection.conf | 6 +-- .../daily-retention.job | 4 +- .../hive-retention.job | 6 +-- .../testHiveTimeBasedRetention/replacement.conf | 6 +-- .../testHiveTimeBasedRetention/selection.conf | 6 +-- .../hourly-retention.job | 4 +- .../daily-retention-with-accessControl.conf | 8 ++-- .../daily-hourly-retention.conf | 6 +-- .../testNewestKRetention/retention.conf | 6 +-- .../testNewestKRetention/selection.conf | 6 +-- .../testTimeBasedAccessControl/selection.conf | 6 +-- .../testTimeBasedRetention/retention.conf | 6 +-- .../testTimeBasedRetention/selection.conf | 6 +-- gobblin-docs/case-studies/Hive-Distcp.md | 12 +++--- .../case-studies/Kafka-HDFS-Ingestion.md | 16 ++++---- gobblin-docs/data-management/DistcpNgEvents.md | 4 +- .../data-management/Gobblin-Retention.md | 42 ++++++++++---------- gobblin-docs/sinks/AvroHdfsDataWriter.md | 2 +- gobblin-docs/sinks/ConsoleWriter.md | 2 +- gobblin-docs/sinks/Gobblin-JDBC-Writer.md | 12 +++--- gobblin-docs/sinks/Kafka.md | 4 +- gobblin-docs/sinks/SimpleBytesWriter.md | 2 +- gobblin-docs/user-guide/Compaction.md | 6 +-- .../Configuration-Properties-Glossary.md | 4 +- gobblin-docs/user-guide/Gobblin-on-Yarn.md | 4 +- gobblin-docs/user-guide/Gobblin-template.md | 10 ++--- .../user-guide/Working-with-the-ForkOperator.md | 2 +- .../src/main/resources/avro-eventhub.job | 8 ++-- .../src/main/resources/avro-to-mysql.pull | 8 ++-- .../src/main/resources/csvToAvro.conf | 2 +- .../src/main/resources/distcp-hive.pull | 14 +++---- .../src/main/resources/distcpFromS3.job | 18 ++++----- .../src/main/resources/distcpToS3.job | 18 ++++----- .../resources/hdfs-monthly-to-hdfs-daily.pull | 10 ++--- .../src/main/resources/hive-avro-to-orc.pull | 10 ++--- .../src/main/resources/kafka-console.pull | 12 +++--- .../src/main/resources/simplejson.pull | 2 +- .../main/resources/streaming-kafka-console.pull | 6 +-- .../main/resources/streaming-kafka-kafka.pull | 10 ++--- .../main/resources/streaming-test-kafka.pull | 12 +++--- .../src/main/resources/wikipedia-console.pull | 10 ++--- .../src/main/resources/wikipedia-kafka.pull | 14 +++---- .../src/main/resources/wikipedia-orc.pull | 2 +- .../src/main/resources/wikipedia.pull | 10 ++--- .../src/main/resources/wikipedia.template | 8 ++-- .../metrics/ConsoleEventReporterFactory.java | 2 +- .../gobblin/metrics/ConsoleReporterFactory.java | 2 +- .../resource/job-props/testKafkaIngest.pull | 12 +++--- .../gobblin-oozie-example-system.properties | 2 +- ...obblin-oozie-mr-example-sysconfig.properties | 2 +- .../main/resources/templates/distcp.template | 12 +++--- .../resources/templates/gobblin-kafka.template | 8 ++-- .../resources/templates/hello-world.template | 4 +- .../resources/templates/kafka-to-kafka.template | 8 ++-- .../apache/gobblin/runtime/TaskContextTest.java | 2 +- .../brokerTest/SimpleHelloWorldJob.jobconf | 4 +- .../instance/SimpleHelloWorldJob.jobconf | 4 +- .../instance/SimpleHelloWorldJob.template | 4 +- .../resources/mr-job-conf/GobblinMRTest.pull | 6 +-- .../resources/templates/distcp-ng-hive.template | 8 ++-- .../test/resources/templates/distcp-ng.template | 14 +++---- .../templates/performanceTest.template | 2 +- .../templates/textFileBasedSourceTest.template | 2 +- .../dataManagement/copy/job-props/copy.pull | 10 ++--- .../runtime_test/skip_workunits_test.properties | 2 +- .../resource/job-conf/GobblinTest1.pull | 2 +- .../resource/job-conf/GobblinTest2.pull | 2 +- .../resource/job-conf/GobblinTest3.pull | 2 +- .../resource/mr-job-conf/GobblinMRTest.pull | 2 +- gobblin-yarn/src/test/resources/reference.conf | 2 +- 85 files changed, 279 insertions(+), 279 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/conf/aws/application.conf ---------------------------------------------------------------------- diff --git a/conf/aws/application.conf b/conf/aws/application.conf index ab16438..be4e04c 100644 --- a/conf/aws/application.conf +++ b/conf/aws/application.conf @@ -126,7 +126,7 @@ writer.staging.dir=${gobblin.aws.work.dir}"/task-staging" writer.output.dir=${gobblin.aws.work.dir}"/task-output" ## Data publisher related configuration properties -data.publisher.type=gobblin.publisher.BaseDataPublisher +data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher data.publisher.final.dir=${gobblin.aws.work.dir}"/job-output" data.publisher.replace.final.dir=false @@ -150,7 +150,7 @@ task.status.reportintervalinms=1000 ## Enable metrics / events metrics.enabled=true -metrics.reporting.custom.builders=gobblin.metrics.ConsoleEventReporterFactory +metrics.reporting.custom.builders=org.apache.gobblin.metrics.ConsoleEventReporterFactory ## All Gobblin Jars and Configuration on S3 gobblin.aws.master.s3.conf.files="application.conf,log4j-aws.properties,quartz.properties" http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/conf/gobblin-mapreduce.properties ---------------------------------------------------------------------- diff --git a/conf/gobblin-mapreduce.properties b/conf/gobblin-mapreduce.properties index 04cc5b2..4c704c8 100644 --- a/conf/gobblin-mapreduce.properties +++ b/conf/gobblin-mapreduce.properties @@ -36,7 +36,7 @@ writer.staging.dir=${env:GOBBLIN_WORK_DIR}/task-staging writer.output.dir=${env:GOBBLIN_WORK_DIR}/task-output # Data publisher related configuration properties -data.publisher.type=gobblin.publisher.BaseDataPublisher +data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher data.publisher.final.dir=${env:GOBBLIN_WORK_DIR}/job-output data.publisher.replace.final.dir=false http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/conf/gobblin-standalone.properties ---------------------------------------------------------------------- diff --git a/conf/gobblin-standalone.properties b/conf/gobblin-standalone.properties index f050798..8b26fd8 100644 --- a/conf/gobblin-standalone.properties +++ b/conf/gobblin-standalone.properties @@ -36,7 +36,7 @@ writer.staging.dir=${env:GOBBLIN_WORK_DIR}/task-staging writer.output.dir=${env:GOBBLIN_WORK_DIR}/task-output # Data publisher related configuration properties -data.publisher.type=gobblin.publisher.BaseDataPublisher +data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher data.publisher.final.dir=${env:GOBBLIN_WORK_DIR}/job-output data.publisher.replace.final.dir=false http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/conf/standalone/application.conf ---------------------------------------------------------------------- diff --git a/conf/standalone/application.conf b/conf/standalone/application.conf index 9e90f1d..fa601dd 100644 --- a/conf/standalone/application.conf +++ b/conf/standalone/application.conf @@ -32,7 +32,7 @@ writer.staging.dir=${gobblin.cluster.work.dir}/task-staging writer.output.dir=${gobblin.cluster.work.dir}/task-output # Data publisher related configuration properties -data.publisher.type=gobblin.publisher.BaseDataPublisher +data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher data.publisher.final.dir=${gobblin.cluster.work.dir}/job-output data.publisher.replace.final.dir=false http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/conf/yarn/application.conf ---------------------------------------------------------------------- diff --git a/conf/yarn/application.conf b/conf/yarn/application.conf index 47023fa..ebf6187 100644 --- a/conf/yarn/application.conf +++ b/conf/yarn/application.conf @@ -43,7 +43,7 @@ writer.staging.dir=${gobblin.yarn.work.dir}/task-staging writer.output.dir=${gobblin.yarn.work.dir}/task-output # Data publisher related configuration properties -data.publisher.type=gobblin.publisher.BaseDataPublisher +data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher data.publisher.final.dir=${gobblin.yarn.work.dir}/job-output data.publisher.replace.final.dir=false http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-aws/src/test/resources/GobblinAWSClusterLauncherTest.conf ---------------------------------------------------------------------- diff --git a/gobblin-aws/src/test/resources/GobblinAWSClusterLauncherTest.conf b/gobblin-aws/src/test/resources/GobblinAWSClusterLauncherTest.conf index 4e2024d..8a4904a 100644 --- a/gobblin-aws/src/test/resources/GobblinAWSClusterLauncherTest.conf +++ b/gobblin-aws/src/test/resources/GobblinAWSClusterLauncherTest.conf @@ -119,7 +119,7 @@ writer.staging.dir=${gobblin.aws.work.dir}"/task-staging" writer.output.dir=${gobblin.aws.work.dir}"/task-output" ## Data publisher related configuration properties -data.publisher.type=gobblin.publisher.BaseDataPublisher +data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher data.publisher.final.dir=${gobblin.aws.work.dir}"/job-output" data.publisher.replace.final.dir=false @@ -143,7 +143,7 @@ task.status.reportintervalinms=1000 ## Enable metrics / events metrics.enabled=true -metrics.reporting.custom.builders=gobblin.metrics.ConsoleEventReporterFactory +metrics.reporting.custom.builders=org.apache.gobblin.metrics.ConsoleEventReporterFactory ## All Gobblin Jars and Configuration on S3 gobblin.aws.master.s3.conf.files="application.conf,log4j-aws.properties,quartz.properties" http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-cluster/src/test/resources/GobblinClusterKillTestJob.conf ---------------------------------------------------------------------- diff --git a/gobblin-cluster/src/test/resources/GobblinClusterKillTestJob.conf b/gobblin-cluster/src/test/resources/GobblinClusterKillTestJob.conf index 3071fb5..6d908e3 100644 --- a/gobblin-cluster/src/test/resources/GobblinClusterKillTestJob.conf +++ b/gobblin-cluster/src/test/resources/GobblinClusterKillTestJob.conf @@ -26,15 +26,15 @@ job.group=GobblinSamples job.description=The "Hello World" Gobblin job # Source, Converters, Writer, Publisher -source.class=gobblin.util.test.HelloWorldSource +source.class=org.apache.gobblin.util.test.HelloWorldSource writer.destination.type=HDFS -writer.builder.class=gobblin.writer.SimpleDataWriterBuilder +writer.builder.class=org.apache.gobblin.writer.SimpleDataWriterBuilder writer.output.format=txt #gobblin.writer.throttle_type=QPS #gobblin.writer.throttle_rate=1 -data.publisher.type=gobblin.publisher.BaseDataPublisher -converter.classes=gobblin.converter.string.StringToBytesConverter +data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher +converter.classes=org.apache.gobblin.converter.string.StringToBytesConverter data.publisher.final.dir=${gobblin.workDir}/job-output http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-cluster/src/test/resources/GobblinHelixJobLauncherTest.conf ---------------------------------------------------------------------- diff --git a/gobblin-cluster/src/test/resources/GobblinHelixJobLauncherTest.conf b/gobblin-cluster/src/test/resources/GobblinHelixJobLauncherTest.conf index 8fe4cd2..6251c3d 100644 --- a/gobblin-cluster/src/test/resources/GobblinHelixJobLauncherTest.conf +++ b/gobblin-cluster/src/test/resources/GobblinHelixJobLauncherTest.conf @@ -25,9 +25,9 @@ gobblin.cluster.zk.connection.string="localhost:3084" # Gobblin job configuration properties job.name=GobblinHelixJobLauncherTest job.group=test -source.class=gobblin.example.simplejson.SimpleJsonSource -converter.classes=gobblin.example.simplejson.SimpleJsonConverter +source.class=org.apache.gobblin.example.simplejson.SimpleJsonSource +converter.classes=org.apache.gobblin.example.simplejson.SimpleJsonConverter writer.file.name="foo.avro" writer.file.path=avro -writer.builder.class=gobblin.writer.AvroDataWriterBuilder +writer.builder.class=org.apache.gobblin.writer.AvroDataWriterBuilder source.schema="{\"namespace\":\"example.avro\", \"type\":\"record\", \"name\":\"User\", \"fields\":[{\"name\":\"name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}" http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-cluster/src/test/resources/reference.conf ---------------------------------------------------------------------- diff --git a/gobblin-cluster/src/test/resources/reference.conf b/gobblin-cluster/src/test/resources/reference.conf index 7c01967..a7a4eab 100644 --- a/gobblin-cluster/src/test/resources/reference.conf +++ b/gobblin-cluster/src/test/resources/reference.conf @@ -29,7 +29,7 @@ writer.staging.dir=${gobblin.cluster.work.dir}/task-staging writer.output.dir=${gobblin.cluster.work.dir}/task-output # Data publisher related configuration properties -data.publisher.type=gobblin.publisher.BaseDataPublisher +data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher data.publisher.final.dir=${gobblin.cluster.work.dir}/job-output data.publisher.replace.final.dir=false http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/hive/main.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/hive/main.conf b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/hive/main.conf index 14fd2d5..c1f26f0 100644 --- a/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/hive/main.conf +++ b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/hive/main.conf @@ -20,14 +20,14 @@ gobblin.retention : { is.blacklisted=false dataset : { - finder.class=gobblin.data.management.retention.dataset.finder.CleanableHiveDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.dataset.finder.CleanableHiveDatasetFinder } selection : { - policy.class=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + policy.class=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy } - version.finder.class=gobblin.data.management.version.finder.DatePartitionHiveVersionFinder + version.finder.class=org.apache.gobblin.data.management.version.finder.DatePartitionHiveVersionFinder hive { partition { http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/timebased/main.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/timebased/main.conf b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/timebased/main.conf index ad20ad2..ae69465 100644 --- a/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/timebased/main.conf +++ b/gobblin-data-management/config-example/hdfs-gobblin-config-store/user/root/gobblin-config-management/_CONFIG_STORE/1.0/tags/retention/timebased/main.conf @@ -18,14 +18,14 @@ gobblin.retention : { dataset : { - finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder } selection : { - policy.class=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + policy.class=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy } version : { - finder.class=gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder + finder.class=org.apache.gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder } } http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/action/MultiAccessControlAction.java ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/action/MultiAccessControlAction.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/action/MultiAccessControlAction.java index 4a7e1d3..35bd2e1 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/action/MultiAccessControlAction.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/action/MultiAccessControlAction.java @@ -56,7 +56,7 @@ public class MultiAccessControlAction extends RetentionAction { * * restricted { * selection { - * policy.class=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + * policy.class=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy * timeBased.lookbackTime = 7d * } * mode : 750 http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/dataset/ConfigurableCleanableDataset.java ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/dataset/ConfigurableCleanableDataset.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/dataset/ConfigurableCleanableDataset.java index 3a3a160..1c47727 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/dataset/ConfigurableCleanableDataset.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/dataset/ConfigurableCleanableDataset.java @@ -85,7 +85,7 @@ public class ConfigurableCleanableDataset<T extends FileSystemDatasetVersion> * timeBased.lookbackTime = 5d * } * version : { - * finder.class=gobblin.data.management.version.finder.DateTimeDatasetVersionFinder + * finder.class=org.apache.gobblin.data.management.version.finder.DateTimeDatasetVersionFinder * pattern="hourly/*/" * } * }, @@ -95,7 +95,7 @@ public class ConfigurableCleanableDataset<T extends FileSystemDatasetVersion> * timeBased.lookbackTime = 20d * } * version : { - * finder.class=gobblin.data.management.version.finder.DateTimeDatasetVersionFinder + * finder.class=org.apache.gobblin.data.management.version.finder.DateTimeDatasetVersionFinder * pattern="daily/*/" * } * } http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/main/resources/data-management.properties ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/main/resources/data-management.properties b/gobblin-data-management/src/main/resources/data-management.properties index 34d0eec..127abfb 100644 --- a/gobblin-data-management/src/main/resources/data-management.properties +++ b/gobblin-data-management/src/main/resources/data-management.properties @@ -46,7 +46,7 @@ gobblin.retention.delete.empty.directories=true # For tracking data # --------------------------------------- -gobblin.retention.dataset.profile.class=gobblin.data.management.retention.profile.TrackingDatasetProfile +gobblin.retention.dataset.profile.class=org.apache.gobblin.data.management.retention.profile.TrackingDatasetProfile # Glob pattern to look for datasets. gobblin.retention.dataset.pattern=/data/datasets/* # Datetime directory pattern. @@ -62,7 +62,7 @@ gobblin.retention.minutes.retained=1440 # For snapshot data # ---------------------------------------- -# gobblin.retention.dataset.profile.class=gobblin.data.management.retention.profile.SnapshotDatasetProfile +# gobblin.retention.dataset.profile.class=org.apache.gobblin.data.management.retention.profile.SnapshotDatasetProfile # Glob pattern to look for datasets. # gobblin.retention.dataset.pattern=/data/datasets/* @@ -80,6 +80,6 @@ gobblin.retention.minutes.retained=1440 # ========================================= # Class of trash snapshot cleanup policy. Default: TimeBasedSnapshotCleanupPolicy -gobblin.trash.snapshot.cleanup.policy.class=gobblin.data.management.trash.TimeBasedSnapshotCleanupPolicy +gobblin.trash.snapshot.cleanup.policy.class=org.apache.gobblin.data.management.trash.TimeBasedSnapshotCleanupPolicy # Trash snapshot retention in minuted. Default: 1440 / one day gobblin.trash.snapshot.retention.minutes=1440 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/test/resources/retentionIntegrationTest/testCombinePolicy/retention.job ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testCombinePolicy/retention.job b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testCombinePolicy/retention.job index 54c2e7e..942ca6d 100644 --- a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testCombinePolicy/retention.job +++ b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testCombinePolicy/retention.job @@ -7,10 +7,10 @@ gobblin.retention.watermark.regex=^([0-9]*)\- gobblin.retention.dataset.pattern=${testNameTempPath}/user/gobblin/snapshots/*/* gobblin.retention.dataset.blacklist= -gobblin.retention.dataset.profile.class=gobblin.data.management.retention.profile.GlobCleanableDatasetFinder -gobblin.retention.version.finder.class=gobblin.data.management.retention.version.finder.UnixTimestampVersionFinder +gobblin.retention.dataset.profile.class=org.apache.gobblin.data.management.retention.profile.GlobCleanableDatasetFinder +gobblin.retention.version.finder.class=org.apache.gobblin.data.management.retention.version.finder.UnixTimestampVersionFinder -gobblin.retention.retention.policy.class=gobblin.data.management.retention.policy.CombineRetentionPolicy -gobblin.retention.combine.retention.policy.class.1=gobblin.data.management.retention.policy.NewestKRetentionPolicy -gobblin.retention.combine.retention.policy.class.2=gobblin.data.management.retention.policy.TimeBasedRetentionPolicy +gobblin.retention.retention.policy.class=org.apache.gobblin.data.management.retention.policy.CombineRetentionPolicy +gobblin.retention.combine.retention.policy.class.1=org.apache.gobblin.data.management.retention.policy.NewestKRetentionPolicy +gobblin.retention.combine.retention.policy.class.2=org.apache.gobblin.data.management.retention.policy.TimeBasedRetentionPolicy gobblin.retention.combine.retention.policy.delete.sets.combine.operation=INTERSECT \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/test/resources/retentionIntegrationTest/testCombinePolicy/selection.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testCombinePolicy/selection.conf b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testCombinePolicy/selection.conf index bad65e5..8f521ae 100644 --- a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testCombinePolicy/selection.conf +++ b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testCombinePolicy/selection.conf @@ -3,11 +3,11 @@ gobblin.retention : { dataset : { # ${testNameTempPath} is resolved at runtime by the test pattern=${testNameTempPath}"/user/gobblin/snapshots/*/*" - finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder } selection : { - policy.class=gobblin.data.management.policy.CombineSelectionPolicy + policy.class=org.apache.gobblin.data.management.policy.CombineSelectionPolicy combine.operation=INTERSECT combine.policy.classes=[ gobblin.data.management.policy.SelectBeforeTimeBasedPolicy, @@ -19,6 +19,6 @@ gobblin.retention : { } version : { - finder.class=gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder + finder.class=org.apache.gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/test/resources/retentionIntegrationTest/testDailyPatternRetention/daily-retention.job ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testDailyPatternRetention/daily-retention.job b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testDailyPatternRetention/daily-retention.job index 8dcdb0e..150fa1d 100644 --- a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testDailyPatternRetention/daily-retention.job +++ b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testDailyPatternRetention/daily-retention.job @@ -1,7 +1,7 @@ type=hadoopJava -job.class=gobblin.data.management.retention.DatasetCleanerJob +job.class=org.apache.gobblin.data.management.retention.DatasetCleanerJob -gobblin.retention.dataset.profile.class=gobblin.data.management.retention.profile.TrackingDatasetProfile +gobblin.retention.dataset.profile.class=org.apache.gobblin.data.management.retention.profile.TrackingDatasetProfile gobblin.retention.dataset.pattern=${testNameTempPath}/user/gobblin/data/trackingData/*/daily gobblin.retention.datetime.pattern=yyyy/MM/dd http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/hive-retention.job ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/hive-retention.job b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/hive-retention.job index 0d7a40b..282cc27 100644 --- a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/hive-retention.job +++ b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/hive-retention.job @@ -1,10 +1,10 @@ hive.dataset.whitelist=hiveTestDb.testTable -gobblin.retention.dataset.finder.class=gobblin.data.management.retention.dataset.finder.CleanableHiveDatasetFinder +gobblin.retention.dataset.finder.class=org.apache.gobblin.data.management.retention.dataset.finder.CleanableHiveDatasetFinder -gobblin.retention.selection.policy.class=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy +gobblin.retention.selection.policy.class=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy gobblin.retention.selection.timeBased.lookbackTime=7d -gobblin.retention.version.finder.class=gobblin.data.management.version.finder.DatePartitionHiveVersionFinder +gobblin.retention.version.finder.class=org.apache.gobblin.data.management.version.finder.DatePartitionHiveVersionFinder gobblin.retention.hive.partition.key.name=datepartition gobblin.retention.hive.partition.value.datetime.pattern=yyyy-MM-dd-HH http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/replacement.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/replacement.conf b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/replacement.conf index 9faf81b..99dae85 100644 --- a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/replacement.conf +++ b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/replacement.conf @@ -3,15 +3,15 @@ gobblin.retention : { is.blacklisted=false dataset : { - finder.class=gobblin.data.management.retention.dataset.finder.CleanableHiveDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.dataset.finder.CleanableHiveDatasetFinder } selection : { - policy.class=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + policy.class=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy timeBased.lookbackTime=7d } - version.finder.class=gobblin.data.management.version.finder.DatePartitionHiveVersionFinder + version.finder.class=org.apache.gobblin.data.management.version.finder.DatePartitionHiveVersionFinder hive { partition { http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/selection.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/selection.conf b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/selection.conf index 7443fba..c3cd2e4 100644 --- a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/selection.conf +++ b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHiveTimeBasedRetention/selection.conf @@ -3,15 +3,15 @@ gobblin.retention : { is.blacklisted=false dataset : { - finder.class=gobblin.data.management.retention.dataset.finder.CleanableHiveDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.dataset.finder.CleanableHiveDatasetFinder } selection : { - policy.class=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + policy.class=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy timeBased.lookbackTime=7d } - version.finder.class=gobblin.data.management.version.finder.DatePartitionHiveVersionFinder + version.finder.class=org.apache.gobblin.data.management.version.finder.DatePartitionHiveVersionFinder hive { partition { http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHourlyPatternRetention/hourly-retention.job ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHourlyPatternRetention/hourly-retention.job b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHourlyPatternRetention/hourly-retention.job index 3043418..a81a0d1 100644 --- a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHourlyPatternRetention/hourly-retention.job +++ b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testHourlyPatternRetention/hourly-retention.job @@ -1,7 +1,7 @@ type=hadoopJava -job.class=gobblin.data.management.retention.DatasetCleanerJob +job.class=org.apache.gobblin.data.management.retention.DatasetCleanerJob -gobblin.retention.dataset.profile.class=gobblin.data.management.retention.profile.TrackingDatasetProfile +gobblin.retention.dataset.profile.class=org.apache.gobblin.data.management.retention.profile.TrackingDatasetProfile gobblin.retention.dataset.pattern=${testNameTempPath}/user/gobblin/data/trackingData/*/hourly gobblin.retention.datetime.pattern=yyyy/MM/dd/HH http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/test/resources/retentionIntegrationTest/testMultiVersionAccessControl/daily-retention-with-accessControl.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testMultiVersionAccessControl/daily-retention-with-accessControl.conf b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testMultiVersionAccessControl/daily-retention-with-accessControl.conf index 81cd377..cb090f7 100644 --- a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testMultiVersionAccessControl/daily-retention-with-accessControl.conf +++ b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testMultiVersionAccessControl/daily-retention-with-accessControl.conf @@ -1,12 +1,12 @@ gobblin.retention : { - TimeBasedSelectionPolicy=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy - DateTimeDatasetVersionFinder=gobblin.data.management.version.finder.DateTimeDatasetVersionFinder + TimeBasedSelectionPolicy=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + DateTimeDatasetVersionFinder=org.apache.gobblin.data.management.version.finder.DateTimeDatasetVersionFinder # ${testNameTempPath} is resolved at runtime by the test dataset : { pattern=${testNameTempPath}"/user/gobblin/data/*" - finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder partitions=[${gobblin.retention.daily}] } @@ -25,7 +25,7 @@ gobblin.retention : { restricted { selection { - policy.class=gobblin.data.management.policy.SelectBetweenTimeBasedPolicy + policy.class=org.apache.gobblin.data.management.policy.SelectBetweenTimeBasedPolicy timeBased.maxLookbackTime = 7d timeBased.minLookbackTime = 4d } http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/test/resources/retentionIntegrationTest/testMultiVersionRetention/daily-hourly-retention.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testMultiVersionRetention/daily-hourly-retention.conf b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testMultiVersionRetention/daily-hourly-retention.conf index 343394d..9169360 100644 --- a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testMultiVersionRetention/daily-hourly-retention.conf +++ b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testMultiVersionRetention/daily-hourly-retention.conf @@ -1,12 +1,12 @@ gobblin.retention : { - TimeBasedSelectionPolicy=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy - DateTimeDatasetVersionFinder=gobblin.data.management.version.finder.DateTimeDatasetVersionFinder + TimeBasedSelectionPolicy=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + DateTimeDatasetVersionFinder=org.apache.gobblin.data.management.version.finder.DateTimeDatasetVersionFinder # ${testNameTempPath} is resolved at runtime by the test dataset : { pattern=${testNameTempPath}"/user/gobblin/data/*" - finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder partitions=[${gobblin.retention.hourly}, ${gobblin.retention.daily}] } http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/test/resources/retentionIntegrationTest/testNewestKRetention/retention.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testNewestKRetention/retention.conf b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testNewestKRetention/retention.conf index 7bbc4c1..d69c5f3 100644 --- a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testNewestKRetention/retention.conf +++ b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testNewestKRetention/retention.conf @@ -3,7 +3,7 @@ gobblin.retention : { dataset.pattern=${testNameTempPath}"/user/gobblin/*" newestK.versions.retained = 2 - dataset.finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder - retention.policy.class=gobblin.data.management.retention.policy.NewestKRetentionPolicy - version.finder.class=gobblin.data.management.retention.version.finder.GlobModTimeDatasetVersionFinder + dataset.finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + retention.policy.class=org.apache.gobblin.data.management.retention.policy.NewestKRetentionPolicy + version.finder.class=org.apache.gobblin.data.management.retention.version.finder.GlobModTimeDatasetVersionFinder } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/test/resources/retentionIntegrationTest/testNewestKRetention/selection.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testNewestKRetention/selection.conf b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testNewestKRetention/selection.conf index 2e4d01a..a6c306c 100644 --- a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testNewestKRetention/selection.conf +++ b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testNewestKRetention/selection.conf @@ -3,15 +3,15 @@ gobblin.retention : { dataset : { # ${testNameTempPath} is resolved at runtime by the test pattern=${testNameTempPath}"/user/gobblin/*" - finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder } selection : { - policy.class=gobblin.data.management.policy.NewestKSelectionPolicy + policy.class=org.apache.gobblin.data.management.policy.NewestKSelectionPolicy newestK.versionsNotSelected=2 } version : { - finder.class=gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder + finder.class=org.apache.gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedAccessControl/selection.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedAccessControl/selection.conf b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedAccessControl/selection.conf index 9357a96..a4d465a 100644 --- a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedAccessControl/selection.conf +++ b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedAccessControl/selection.conf @@ -3,11 +3,11 @@ gobblin.retention : { dataset : { # ${testNameTempPath} is resolved at runtime by the test pattern=${testNameTempPath}"/user/gobblin/*" - finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder } version : { - finder.class=gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder + finder.class=org.apache.gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder } accessControl { @@ -15,7 +15,7 @@ gobblin.retention : { restricted { selection { - policy.class=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + policy.class=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy timeBased.lookbackTime = 7d } mode : 777 http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedRetention/retention.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedRetention/retention.conf b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedRetention/retention.conf index 949e133..0cdf7d1 100644 --- a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedRetention/retention.conf +++ b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedRetention/retention.conf @@ -3,7 +3,7 @@ gobblin.retention : { dataset.pattern=${testNameTempPath}"/user/gobblin/*" timebased.duration = P7D - dataset.finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder - retention.policy.class=gobblin.data.management.retention.policy.TimeBasedRetentionPolicy - version.finder.class=gobblin.data.management.retention.version.finder.GlobModTimeDatasetVersionFinder + dataset.finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + retention.policy.class=org.apache.gobblin.data.management.retention.policy.TimeBasedRetentionPolicy + version.finder.class=org.apache.gobblin.data.management.retention.version.finder.GlobModTimeDatasetVersionFinder } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedRetention/selection.conf ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedRetention/selection.conf b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedRetention/selection.conf index fc579d5..069c39d 100644 --- a/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedRetention/selection.conf +++ b/gobblin-data-management/src/test/resources/retentionIntegrationTest/testTimeBasedRetention/selection.conf @@ -3,15 +3,15 @@ gobblin.retention : { dataset : { # ${testNameTempPath} is resolved at runtime by the test pattern=${testNameTempPath}"/user/gobblin/*" - finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder } selection : { - policy.class=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + policy.class=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy timeBased.lookbackTime=7d } version : { - finder.class=gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder + finder.class=org.apache.gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-docs/case-studies/Hive-Distcp.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/case-studies/Hive-Distcp.md b/gobblin-docs/case-studies/Hive-Distcp.md index 271f2ff..3559ffb 100644 --- a/gobblin-docs/case-studies/Hive-Distcp.md +++ b/gobblin-docs/case-studies/Hive-Distcp.md @@ -18,12 +18,12 @@ job.name=SampleHiveDistcp job.group=HiveDistcp job.description=Sample job config for hive distcp -extract.namespace=gobblin.copy.tracking -gobblin.dataset.profile.class=gobblin.data.management.copy.hive.HiveDatasetFinder -data.publisher.type=gobblin.data.management.copy.publisher.CopyDataPublisher -source.class=gobblin.data.management.copy.CopySource -writer.builder.class=gobblin.data.management.copy.writer.FileAwareInputStreamDataWriterBuilder -converter.classes=gobblin.converter.IdentityConverter +extract.namespace=org.apache.gobblin.copy.tracking +gobblin.dataset.profile.class=org.apache.gobblin.data.management.copy.hive.HiveDatasetFinder +data.publisher.type=org.apache.gobblin.data.management.copy.publisher.CopyDataPublisher +source.class=org.apache.gobblin.data.management.copy.CopySource +writer.builder.class=org.apache.gobblin.data.management.copy.writer.FileAwareInputStreamDataWriterBuilder +converter.classes=org.apache.gobblin.converter.IdentityConverter hive.dataset.copy.target.table.prefixToBeReplaced= hive.dataset.copy.target.table.prefixReplacement= http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-docs/case-studies/Kafka-HDFS-Ingestion.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/case-studies/Kafka-HDFS-Ingestion.md b/gobblin-docs/case-studies/Kafka-HDFS-Ingestion.md index fb23950..3c9cd43 100644 --- a/gobblin-docs/case-studies/Kafka-HDFS-Ingestion.md +++ b/gobblin-docs/case-studies/Kafka-HDFS-Ingestion.md @@ -21,15 +21,15 @@ job.lock.enabled=false kafka.brokers=localhost:9092 -source.class=gobblin.source.extractor.extract.kafka.KafkaSimpleSource -extract.namespace=gobblin.extract.kafka +source.class=org.apache.gobblin.source.extractor.extract.kafka.KafkaSimpleSource +extract.namespace=org.apache.gobblin.extract.kafka -writer.builder.class=gobblin.writer.SimpleDataWriterBuilder +writer.builder.class=org.apache.gobblin.writer.SimpleDataWriterBuilder writer.file.path.type=tablename writer.destination.type=HDFS writer.output.format=txt -data.publisher.type=gobblin.publisher.BaseDataPublisher +data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher mr.job.max.mappers=1 @@ -68,15 +68,15 @@ job.lock.enabled=false kafka.brokers=localhost:9092 -source.class=gobblin.source.extractor.extract.kafka.KafkaSimpleSource -extract.namespace=gobblin.extract.kafka +source.class=org.apache.gobblin.source.extractor.extract.kafka.KafkaSimpleSource +extract.namespace=org.apache.gobblin.extract.kafka -writer.builder.class=gobblin.writer.SimpleDataWriterBuilder +writer.builder.class=org.apache.gobblin.writer.SimpleDataWriterBuilder writer.file.path.type=tablename writer.destination.type=HDFS writer.output.format=txt -data.publisher.type=gobblin.publisher.BaseDataPublisher +data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher mr.job.max.mappers=1 http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-docs/data-management/DistcpNgEvents.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/data-management/DistcpNgEvents.md b/gobblin-docs/data-management/DistcpNgEvents.md index 29bbaf6..122faa4 100644 --- a/gobblin-docs/data-management/DistcpNgEvents.md +++ b/gobblin-docs/data-management/DistcpNgEvents.md @@ -6,8 +6,8 @@ The following metadata attributes are shared across all events: - Standard execution metadata (TODO add link) -- `namespace=gobblin.copy.CopyDataPublisher` -- `metadata["class"]=gobblin.data.management.copy.publisher.CopyDataPublisher` +- `namespace=org.apache.gobblin.copy.CopyDataPublisher` +- `metadata["class"]=org.apache.gobblin.data.management.copy.publisher.CopyDataPublisher` Events by `name`: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-docs/data-management/Gobblin-Retention.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/data-management/Gobblin-Retention.md b/gobblin-docs/data-management/Gobblin-Retention.md index 7aa51c5..0a3eb64 100644 --- a/gobblin-docs/data-management/Gobblin-Retention.md +++ b/gobblin-docs/data-management/Gobblin-Retention.md @@ -92,11 +92,11 @@ For maintainability and reusablity we define all the configs as tags and import gobblin.retention : { ##Alias - TimeBasedSelectionPolicy=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy - DateTimeDatasetVersionFinder=gobblin.data.management.version.finder.DateTimeDatasetVersionFinder + TimeBasedSelectionPolicy=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + DateTimeDatasetVersionFinder=org.apache.gobblin.data.management.version.finder.DateTimeDatasetVersionFinder dataset : { - finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder partitions=[${gobblin.retention.daily} } @@ -159,16 +159,16 @@ gobblin.retention : { dataset : { pattern="/user/gobblin/*" - finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder } selection : { - policy.class=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + policy.class=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy timeBased.lookbackTime=7d } version : { - finder.class=gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder + finder.class=org.apache.gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder } } </pre> @@ -181,16 +181,16 @@ gobblin.retention : { dataset : { pattern="/user/gobblin/*" - finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder } selection : { - policy.class=gobblin.data.management.policy.NewestKSelectionPolicy + policy.class=org.apache.gobblin.data.management.policy.NewestKSelectionPolicy newestK.versionsNotSelected=2 } version : { - finder.class=gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder + finder.class=org.apache.gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder } } </pre> @@ -203,11 +203,11 @@ gobblin.retention : { dataset : { pattern="/user/gobblin/snapshots/*/*" - finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder } selection : { - policy.class=gobblin.data.management.policy.CombineSelectionPolicy + policy.class=org.apache.gobblin.data.management.policy.CombineSelectionPolicy combine.operation=INTERSECT combine.policy.classes=[ gobblin.data.management.policy.SelectBeforeTimeBasedPolicy, @@ -219,7 +219,7 @@ gobblin.retention : { } version : { - finder.class=gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder + finder.class=org.apache.gobblin.data.management.version.finder.GlobModTimeDatasetVersionFinder } } </pre> @@ -230,12 +230,12 @@ This is mostly useful for retention management of datasets that have different k <pre> gobblin.retention : { - TimeBasedSelectionPolicy=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy - DateTimeDatasetVersionFinder=gobblin.data.management.version.finder.DateTimeDatasetVersionFinder + TimeBasedSelectionPolicy=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + DateTimeDatasetVersionFinder=org.apache.gobblin.data.management.version.finder.DateTimeDatasetVersionFinder dataset : { pattern="/user/gobblin/data/*" - finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder partitions=[${gobblin.retention.hourly}, ${gobblin.retention.daily}] } @@ -277,17 +277,17 @@ gobblin.retention : { is.blacklisted=false dataset : { - finder.class=gobblin.data.management.retention.dataset.finder.CleanableHiveDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.dataset.finder.CleanableHiveDatasetFinder } selection : { - policy.class=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + policy.class=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy ## Partitions older than 3 days will be deleted timeBased.lookbackTime=3d } - version.finder.class=gobblin.data.management.version.finder.DatePartitionHiveVersionFinder + version.finder.class=org.apache.gobblin.data.management.version.finder.DatePartitionHiveVersionFinder hive { partition { @@ -311,12 +311,12 @@ All the access control policies to apply are discovered through the key ```acces <pre> gobblin.retention : { - TimeBasedSelectionPolicy=gobblin.data.management.policy.SelectBeforeTimeBasedPolicy - DateTimeDatasetVersionFinder=gobblin.data.management.version.finder.DateTimeDatasetVersionFinder + TimeBasedSelectionPolicy=org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy + DateTimeDatasetVersionFinder=org.apache.gobblin.data.management.version.finder.DateTimeDatasetVersionFinder dataset : { pattern="/user/gobblin/data/*" - finder.class=gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder + finder.class=org.apache.gobblin.data.management.retention.profile.ManagedCleanableDatasetFinder partitions=[${gobblin.retention.hourly}, ${gobblin.retention.daily}] } http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-docs/sinks/AvroHdfsDataWriter.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/sinks/AvroHdfsDataWriter.md b/gobblin-docs/sinks/AvroHdfsDataWriter.md index 1e404e0..f74d6a4 100644 --- a/gobblin-docs/sinks/AvroHdfsDataWriter.md +++ b/gobblin-docs/sinks/AvroHdfsDataWriter.md @@ -6,7 +6,7 @@ Writes Avro records to Avro data files on Hadoop file systems. # Usage - writer.builder.class=gobblin.writer.AvroDataWriterBuilder + writer.builder.class=org.apache.gobblin.writer.AvroDataWriterBuilder writer.destination.type=HDFS For more info, see [`AvroHdfsDataWriter`](https://github.com/linkedin/gobblin/search?utf8=%E2%9C%93&q=AvroHdfsDataWriter) http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-docs/sinks/ConsoleWriter.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/sinks/ConsoleWriter.md b/gobblin-docs/sinks/ConsoleWriter.md index d7c6129..1edf64b 100644 --- a/gobblin-docs/sinks/ConsoleWriter.md +++ b/gobblin-docs/sinks/ConsoleWriter.md @@ -6,4 +6,4 @@ A simple implementation that writes records to Stdout. # Usage - writer.builder.class=gobblin.writer.ConsoleWriterBuilder \ No newline at end of file + writer.builder.class=org.apache.gobblin.writer.ConsoleWriterBuilder \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-docs/sinks/Gobblin-JDBC-Writer.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/sinks/Gobblin-JDBC-Writer.md b/gobblin-docs/sinks/Gobblin-JDBC-Writer.md index 0484573..63b2c57 100644 --- a/gobblin-docs/sinks/Gobblin-JDBC-Writer.md +++ b/gobblin-docs/sinks/Gobblin-JDBC-Writer.md @@ -142,13 +142,13 @@ The MySQL writer uses [buffered inserts](http://dev.mysql.com/doc/refman/5.0/en/ The sink configuration for MySQL in a Gobblin job is as follows: ``` writer.destination.type=MYSQL -writer.builder.class=gobblin.writer.JdbcWriterBuilder +writer.builder.class=org.apache.gobblin.writer.JdbcWriterBuilder -data.publisher.type=gobblin.publisher.JdbcPublisher +data.publisher.type=org.apache.gobblin.publisher.JdbcPublisher jdbc.publisher.url=jdbc:mysql://host:3306 jdbc.publisher.driver=com.mysql.jdbc.Driver -converter.classes=gobblin.converter.jdbc.AvroToJdbcEntryConverter +converter.classes=org.apache.gobblin.converter.jdbc.AvroToJdbcEntryConverter # If field name mapping is needed between the input Avro and the target table: converter.avro.jdbc.entry_fields_pairs={\"src_fn\":\"firstname\",\"src_ln\":\"lastname\"} ``` @@ -163,13 +163,13 @@ Gobblin submitter scripts. Teradata may use the FASTLOAD option during the inser The sink configuration for Teradata in a Gobblin job is as follows: ``` writer.destination.type=TERADATA -writer.builder.class=gobblin.writer.JdbcWriterBuilder +writer.builder.class=org.apache.gobblin.writer.JdbcWriterBuilder -data.publisher.type=gobblin.publisher.JdbcPublisher +data.publisher.type=org.apache.gobblin.publisher.JdbcPublisher jdbc.publisher.url=jdbc:teradata://host/TMODE=ANSI,CHARSET=UTF16,TYPE=FASTLOAD jdbc.publisher.driver=com.teradata.jdbc.TeraDriver -converter.classes=gobblin.converter.jdbc.AvroToJdbcEntryConverter +converter.classes=org.apache.gobblin.converter.jdbc.AvroToJdbcEntryConverter # If field name mapping is needed between the input Avro and the target table: converter.avro.jdbc.entry_fields_pairs={\"src_fn\":\"firstname\",\"src_ln\":\"lastname\"} ``` http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-docs/sinks/Kafka.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/sinks/Kafka.md b/gobblin-docs/sinks/Kafka.md index bcf3474..af47f4b 100644 --- a/gobblin-docs/sinks/Kafka.md +++ b/gobblin-docs/sinks/Kafka.md @@ -31,8 +31,8 @@ writer.kafka.producerConfig.key.serializer=io.confluent.kafka.serializers.KafkaA writer.kafka.producerConfig.schema.registry.url=http://localhost:8081 #Set this to the correct schema-reg url ##Use Local Schema Registry and serializers -#writer.kafka.producerConfig.value.serializer=gobblin.kafka.serialize.LiAvroSerializer -#writer.kafka.producerConfig.kafka.schemaRegistry.class=gobblin.kafka.schemareg.ConfigDrivenMd5SchemaRegistry +#writer.kafka.producerConfig.value.serializer=org.apache.gobblin.kafka.serialize.LiAvroSerializer +#writer.kafka.producerConfig.kafka.schemaRegistry.class=org.apache.gobblin.kafka.schemareg.ConfigDrivenMd5SchemaRegistry #writer.kafka.producerConfig.schemaRegistry.schema.name=WikipediaExample #writer.kafka.producerConfig.schemaRegistry.schema.value={"namespace": "example.wikipedia.avro","type": "record","name": "WikipediaArticle","fields": [{"name": "pageid", "type": ["double", "null"]},{"name": "title", "type": ["string", "null"]},{"name": "user", "type": ["string", "null"]},{"name": "anon", "type": ["string", "null"]},{"name": "userid", "type": ["double", "null"]},{"name": "timestamp", "type": ["string", "null"]},{"name": "size", "type": ["double", "null"]},{"name": "contentformat", "type": ["string", "null"]},{"name": "contentmodel", "type": ["string", "null"]},{"name": "content", "type": ["string", "null"]}]} ``` http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-docs/sinks/SimpleBytesWriter.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/sinks/SimpleBytesWriter.md b/gobblin-docs/sinks/SimpleBytesWriter.md index 750ded2..5582a85 100644 --- a/gobblin-docs/sinks/SimpleBytesWriter.md +++ b/gobblin-docs/sinks/SimpleBytesWriter.md @@ -6,7 +6,7 @@ A simple writer for byte arrays to a Hadoop file system file. The byte arrays ca # Usage - writer.builder.class=gobblin.writer.AvroDataWriterBuilder + writer.builder.class=org.apache.gobblin.writer.AvroDataWriterBuilder # Configuration http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-docs/user-guide/Compaction.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/user-guide/Compaction.md b/gobblin-docs/user-guide/Compaction.md index 7f5b81e..5784d39 100644 --- a/gobblin-docs/user-guide/Compaction.md +++ b/gobblin-docs/user-guide/Compaction.md @@ -37,7 +37,7 @@ Suppose we ingest data from a Kafka broker, and we would like to publish the dat In the above example use case, for hourly compaction, each dataset contains an hour's data in the `hourly_staging` folder, e.g., `/data/kafka_topics/PageViewEvent/hourly_staging/2015/10/29/08`; for daily compaction, each dataset contains 24 hourly folder of a day, e.g., `/data/kafka_topics/PageViewEvent/hourly/2015/10/29`. In hourly compaction, you may use the following config properties: ``` -compaction.datasets.finder=gobblin.compaction.dataset.TimeBasedSubDirDatasetsFinder +compaction.datasets.finder=org.apache.gobblin.compaction.dataset.TimeBasedSubDirDatasetsFinder compaction.input.dir=/data/kafka_topics compaction.dest.dir=/data/kafka_topics compaction.input.subdir=hourly_staging @@ -45,8 +45,8 @@ compaction.dest.subdir=hourly compaction.folder.pattern=YYYY/MM/dd compaction.timebased.max.time.ago=3h compaction.timebased.min.time.ago=1h -compaction.jobprops.creator.class=gobblin.compaction.mapreduce.MRCompactorTimeBasedJobPropCreator -compaction.job.runner.class=gobblin.compaction.mapreduce.avro.MRCompactorAvroKeyDedupJobRunner (if your data is Avro) +compaction.jobprops.creator.class=org.apache.gobblin.compaction.mapreduce.MRCompactorTimeBasedJobPropCreator +compaction.job.runner.class=org.apache.gobblin.compaction.mapreduce.avro.MRCompactorAvroKeyDedupJobRunner (if your data is Avro) ``` If your data format is not Avro, you can implement a different job runner class for deduplicating your data format. http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-docs/user-guide/Configuration-Properties-Glossary.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/user-guide/Configuration-Properties-Glossary.md b/gobblin-docs/user-guide/Configuration-Properties-Glossary.md index c59f349..e275ca2 100644 --- a/gobblin-docs/user-guide/Configuration-Properties-Glossary.md +++ b/gobblin-docs/user-guide/Configuration-Properties-Glossary.md @@ -1159,8 +1159,8 @@ No # JDBC Writer properties <a name="JdbcWriter-Properties"></a> Writer(and publisher) that writes to JDBC database. Please configure below two properties to use JDBC writer & publisher. -* writer.builder.class=gobblin.writer.JdbcWriterBuilder -* data.publisher.type=gobblin.publisher.JdbcPublisher +* writer.builder.class=org.apache.gobblin.writer.JdbcWriterBuilder +* data.publisher.type=org.apache.gobblin.publisher.JdbcPublisher #### jdbc.publisher.database_name ###### Description http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-docs/user-guide/Gobblin-on-Yarn.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/user-guide/Gobblin-on-Yarn.md b/gobblin-docs/user-guide/Gobblin-on-Yarn.md index b2dd4c2..194cf4a 100644 --- a/gobblin-docs/user-guide/Gobblin-on-Yarn.md +++ b/gobblin-docs/user-guide/Gobblin-on-Yarn.md @@ -213,7 +213,7 @@ writer.staging.dir=${gobblin.yarn.work.dir}/task-staging writer.output.dir=${gobblin.yarn.work.dir}/task-output # Data publisher related configuration properties -data.publisher.type=gobblin.publisher.BaseDataPublisher +data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher data.publisher.final.dir=${gobblin.yarn.work.dir}/job-output data.publisher.replace.final.dir=false @@ -225,7 +225,7 @@ qualitychecker.row.err.file=${gobblin.yarn.work.dir}/err # Use zookeeper for maintaining the job lock job.lock.enabled=true -job.lock.type=gobblin.runtime.locks.ZookeeperBasedJobLock +job.lock.type=org.apache.gobblin.runtime.locks.ZookeeperBasedJobLock # Directory where job locks are stored job.lock.dir=${gobblin.yarn.work.dir}/locks http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-docs/user-guide/Gobblin-template.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/user-guide/Gobblin-template.md b/gobblin-docs/user-guide/Gobblin-template.md index 6259d84..2e4d0b6 100644 --- a/gobblin-docs/user-guide/Gobblin-template.md +++ b/gobblin-docs/user-guide/Gobblin-template.md @@ -45,21 +45,21 @@ job.name=PullFromWikipedia job.group=Wikipedia job.description=A getting started example for Gobblin -source.class=gobblin.example.wikipedia.WikipediaSource +source.class=org.apache.gobblin.example.wikipedia.WikipediaSource source.revisions.cnt=5 wikipedia.api.rooturl=https://en.wikipedia.org/w/api.php?format=json&action=query&prop=revisions&rvprop=content|timestamp|user|userid|size wikipedia.avro.schema={"namespace": "example.wikipedia.avro","type": "record","name": "WikipediaArticle","fields": [{"name": "pageid", "type": ["double", "null"]},{"name": "title", "type": ["string", "null"]},{"name": "user", "type": ["string", "null"]},{"name": "anon", "type": ["string", "null"]},{"name": "userid", "type": ["double", "null"]},{"name": "timestamp", "type": ["string", "null"]},{"name": "size", "type": ["double", "null"]},{"name": "contentformat", "type": ["string", "null"]},{"name": "contentmodel", "type": ["string", "null"]},{"name": "content", "type": ["string", "null"]}]} -converter.classes=gobblin.example.wikipedia.WikipediaConverter +converter.classes=org.apache.gobblin.example.wikipedia.WikipediaConverter -extract.namespace=gobblin.example.wikipedia +extract.namespace=org.apache.gobblin.example.wikipedia writer.destination.type=HDFS writer.output.format=AVRO -writer.partitioner.class=gobblin.example.wikipedia.WikipediaPartitioner +writer.partitioner.class=org.apache.gobblin.example.wikipedia.WikipediaPartitioner -data.publisher.type=gobblin.publisher.BaseDataPublisher +data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher gobblin.template.required_attributes=source.page.titles http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-docs/user-guide/Working-with-the-ForkOperator.md ---------------------------------------------------------------------- diff --git a/gobblin-docs/user-guide/Working-with-the-ForkOperator.md b/gobblin-docs/user-guide/Working-with-the-ForkOperator.md index 54e3a59..5e4c319 100644 --- a/gobblin-docs/user-guide/Working-with-the-ForkOperator.md +++ b/gobblin-docs/user-guide/Working-with-the-ForkOperator.md @@ -23,7 +23,7 @@ Using the ForkOperator The [`ForkOperator`](https://github.com/linkedin/gobblin/blob/master/gobblin-api/src/main/java/gobblin/fork/ForkOperator.java), like most other operators in a Gobblin task flow, is pluggable through the configuration, or more specifically , the configuration property `fork.operator.class` that points to a class that implements the `ForkOperator` interface. For instance: ``` -fork.operator.class=gobblin.fork.IdentityForkOperator +fork.operator.class=org.apache.gobblin.fork.IdentityForkOperator ``` By default, if no `ForkOperator` class is specified, internally Gobblin uses the default implementation [`IdentityForkOperator`](https://github.com/linkedin/gobblin/blob/master/gobblin-core/src/main/java/gobblin/fork/IdentityForkOperator.java) with a single forked branch (although it does supports multiple forked branches). The `IdentityForkOperator` simply unconditionally forwards the schema and ingested data records to all the forked branches, the number of which is specified through the configuration property `fork.branches` with a default value of 1. When an `IdentityForkOperator` instance is initialized, it will read the value of `fork.branches` and use that as the return value of `getBranches`. http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-example/src/main/resources/avro-eventhub.job ---------------------------------------------------------------------- diff --git a/gobblin-example/src/main/resources/avro-eventhub.job b/gobblin-example/src/main/resources/avro-eventhub.job index 76ce3c0..87e2cb1 100644 --- a/gobblin-example/src/main/resources/avro-eventhub.job +++ b/gobblin-example/src/main/resources/avro-eventhub.job @@ -7,18 +7,18 @@ job.name=TestSourceToEventhub job.group=eventhubTest job.description=Pull from Test Source and write to Eventhub gobblin.workDir=gobblin -source.class=gobblin.source.extractor.hadoop.AvroFileSource -converter.classes=gobblin.converter.avro.AvroToJsonStringConverter +source.class=org.apache.gobblin.source.extractor.hadoop.AvroFileSource +converter.classes=org.apache.gobblin.converter.avro.AvroToJsonStringConverter source.filebased.fs.uri=file://localhost/ job.lock.dir=/tmp/gobblin-eventhub/locks extract.table.type=snapshot_append source.filebased.data.directory=/tmp/gobblin-eventhub/data -writer.builder.class=gobblin.eventhub.writer.EventhubDataWriterBuilder +writer.builder.class=org.apache.gobblin.eventhub.writer.EventhubDataWriterBuilder #config for eventhub -data.publisher.type=gobblin.publisher.NoopPublisher +data.publisher.type=org.apache.gobblin.publisher.NoopPublisher state.store.enabled=false task.data.root.dir=${gobblin.workDir} http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-example/src/main/resources/avro-to-mysql.pull ---------------------------------------------------------------------- diff --git a/gobblin-example/src/main/resources/avro-to-mysql.pull b/gobblin-example/src/main/resources/avro-to-mysql.pull index aedf16a..a20350c 100644 --- a/gobblin-example/src/main/resources/avro-to-mysql.pull +++ b/gobblin-example/src/main/resources/avro-to-mysql.pull @@ -1,7 +1,7 @@ job.name=AVRO_TO_MySQL job.description=AVRO_TO_MySQL -source.class=gobblin.source.extractor.hadoop.AvroFileSource +source.class=org.apache.gobblin.source.extractor.hadoop.AvroFileSource source.filebased.data.directory=/path/to/avro/file/directory source.max.number.of.partitions=4 @@ -12,15 +12,15 @@ extract.table.name=dummy_name filebased.report.status.on.count=100 -converter.classes=gobblin.converter.jdbc.AvroToJdbcEntryConverter +converter.classes=org.apache.gobblin.converter.jdbc.AvroToJdbcEntryConverter #name pairs if name does not match between source and JDBC destination converter.avro.jdbc.entry_fields_pairs={"businessUnit":"business_unit", "geoRegion":"geo_region", "superRegion":"super_region", "subRegion":"sub_region"} -writer.builder.class=gobblin.writer.JdbcWriterBuilder +writer.builder.class=org.apache.gobblin.writer.JdbcWriterBuilder writer.destination.type=MYSQL writer.jdbc.batch_size=1000 -data.publisher.type=gobblin.publisher.JdbcPublisher +data.publisher.type=org.apache.gobblin.publisher.JdbcPublisher jdbc.publisher.driver=com.mysql.jdbc.Driver jdbc.publisher.database_name=database_name jdbc.publisher.encrypt_key_loc=/cryptokey/file/path http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-example/src/main/resources/csvToAvro.conf ---------------------------------------------------------------------- diff --git a/gobblin-example/src/main/resources/csvToAvro.conf b/gobblin-example/src/main/resources/csvToAvro.conf index e318588..39bd692 100644 --- a/gobblin-example/src/main/resources/csvToAvro.conf +++ b/gobblin-example/src/main/resources/csvToAvro.conf @@ -5,7 +5,7 @@ converter.classes="org.apache.gobblin.converter.StringSchemaInjector,org.apache. writer.builder.class=org.apache.gobblin.writer.AvroDataWriterBuilder extract.table.name=CsvToAvro -extract.namespace=gobblin.example +extract.namespace=org.apache.gobblin.example extract.table.type=APPEND_ONLY gobblin.converter.schemaInjector.schema=SCHEMA http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-example/src/main/resources/distcp-hive.pull ---------------------------------------------------------------------- diff --git a/gobblin-example/src/main/resources/distcp-hive.pull b/gobblin-example/src/main/resources/distcp-hive.pull index c7df51e..663faf2 100644 --- a/gobblin-example/src/main/resources/distcp-hive.pull +++ b/gobblin-example/src/main/resources/distcp-hive.pull @@ -25,7 +25,7 @@ job.description=Test Gobblin job for copy # target location for copy data.publisher.final.dir=/tmp/gobblin-copy -gobblin.dataset.profile.class=gobblin.data.management.copy.hive.HiveDatasetFinder +gobblin.dataset.profile.class=org.apache.gobblin.data.management.copy.hive.HiveDatasetFinder gobblin.dataset.pattern=${env:HOME}/gobblin-copy @@ -40,13 +40,13 @@ hive.dataset.copy.target.database=copy # ==================================================================== type=hadoopJava -job.class=gobblin.azkaban.AzkabanJobLauncher +job.class=org.apache.gobblin.azkaban.AzkabanJobLauncher -extract.namespace=gobblin.copy -data.publisher.type=gobblin.data.management.copy.publisher.CopyDataPublisher -source.class=gobblin.data.management.copy.CopySource -writer.builder.class=gobblin.data.management.copy.writer.FileAwareInputStreamDataWriterBuilder -converter.classes=gobblin.converter.IdentityConverter +extract.namespace=org.apache.gobblin.copy +data.publisher.type=org.apache.gobblin.data.management.copy.publisher.CopyDataPublisher +source.class=org.apache.gobblin.data.management.copy.CopySource +writer.builder.class=org.apache.gobblin.data.management.copy.writer.FileAwareInputStreamDataWriterBuilder +converter.classes=org.apache.gobblin.converter.IdentityConverter task.maxretries=0 workunit.retry.enabled=false http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-example/src/main/resources/distcpFromS3.job ---------------------------------------------------------------------- diff --git a/gobblin-example/src/main/resources/distcpFromS3.job b/gobblin-example/src/main/resources/distcpFromS3.job index 430e28c..4fbc702 100644 --- a/gobblin-example/src/main/resources/distcpFromS3.job +++ b/gobblin-example/src/main/resources/distcpFromS3.job @@ -7,17 +7,17 @@ job.description=Gobblin job for copy to S3 -# target publishing location for copy +# target publishing location for copy # The folder containing result files that will show up in the s3 data.publisher.final.dir=<Full local(target) FS Path> -gobblin.dataset.profile.class=gobblin.data.management.copy.CopyableGlobDatasetFinder +gobblin.dataset.profile.class=org.apache.gobblin.data.management.copy.CopyableGlobDatasetFinder #e.g. s3a://gobblinoutput/ source.filebased.fs.uri=<Full Remote FS Path> gobblin.dataset.pattern=<Remote Dataset Pattern> -# For s3 to work, Need to also add hadoop-aws.jar as the dependency in the classpath. +# For s3 to work, Need to also add hadoop-aws.jar as the dependency in the classpath. fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem source.filebased.encrypted.fs.s3a.access.key=<Encrypted Access Key> source.filebased.encrypted.fs.s3a.secret.key=<Encrypted Secret key> @@ -38,14 +38,14 @@ encrypt.key.loc=<Local master password file location> # ==================================================================== type=hadoopJava -job.class=gobblin.azkaban.AzkabanJobLauncher +job.class=org.apache.gobblin.azkaban.AzkabanJobLauncher -extract.namespace=gobblin.copy +extract.namespace=org.apache.gobblin.copy -source.class=gobblin.data.management.copy.CopySource -converter.classes=gobblin.converter.IdentityConverter -writer.builder.class=gobblin.data.management.copy.writer.FileAwareInputStreamDataWriterBuilder -data.publisher.type=gobblin.data.management.copy.publisher.CopyDataPublisher +source.class=org.apache.gobblin.data.management.copy.CopySource +converter.classes=org.apache.gobblin.converter.IdentityConverter +writer.builder.class=org.apache.gobblin.data.management.copy.writer.FileAwareInputStreamDataWriterBuilder +data.publisher.type=org.apache.gobblin.data.management.copy.publisher.CopyDataPublisher distcp.persist.dir=/tmp/distcp-persist-dir task.maxretries=0 http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-example/src/main/resources/distcpToS3.job ---------------------------------------------------------------------- diff --git a/gobblin-example/src/main/resources/distcpToS3.job b/gobblin-example/src/main/resources/distcpToS3.job index 4e391e8..d7e1454 100644 --- a/gobblin-example/src/main/resources/distcpToS3.job +++ b/gobblin-example/src/main/resources/distcpToS3.job @@ -10,12 +10,12 @@ job.description=Gobblin job for pushing data to S3 # target publishing location for copy data.publisher.final.dir=<testfolder> -gobblin.dataset.profile.class=gobblin.data.management.copy.CopyableGlobDatasetFinder +gobblin.dataset.profile.class=org.apache.gobblin.data.management.copy.CopyableGlobDatasetFinder gobblin.dataset.pattern=< Dataset pattern describing your to-be-copied data's location > -# For s3 to work, Need to also add hadoop-aws.jar as the dependency in the classpath. +# For s3 to work, Need to also add hadoop-aws.jar as the dependency in the classpath. # Attention that don't use any version later than **2.6.3** (Which means, pre-2015, very old) -# Or you will hit into some messy dependecies problem related to hadoop version and amazon dependencies. +# Or you will hit into some messy dependecies problem related to hadoop version and amazon dependencies. fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem writer.encrypted.fs.s3a.access.key=<Encrypted Access Key> writer.encrypted.fs.s3a.secret.key=<Encrypted Secret Key> @@ -35,14 +35,14 @@ encrypt.key.loc=<Local File Path for master password> # ==================================================================== type=hadoopJava -job.class=gobblin.azkaban.AzkabanJobLauncher +job.class=org.apache.gobblin.azkaban.AzkabanJobLauncher -extract.namespace=gobblin.copy +extract.namespace=org.apache.gobblin.copy -source.class=gobblin.data.management.copy.CopySource -converter.classes=gobblin.converter.IdentityConverter -writer.builder.class=gobblin.data.management.copy.writer.FileAwareInputStreamDataWriterBuilder -data.publisher.type=gobblin.data.management.copy.publisher.CopyDataPublisher +source.class=org.apache.gobblin.data.management.copy.CopySource +converter.classes=org.apache.gobblin.converter.IdentityConverter +writer.builder.class=org.apache.gobblin.data.management.copy.writer.FileAwareInputStreamDataWriterBuilder +data.publisher.type=org.apache.gobblin.data.management.copy.publisher.CopyDataPublisher distcp.persist.dir=/tmp/distcp-persist-dir task.maxretries=0 http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-example/src/main/resources/hdfs-monthly-to-hdfs-daily.pull ---------------------------------------------------------------------- diff --git a/gobblin-example/src/main/resources/hdfs-monthly-to-hdfs-daily.pull b/gobblin-example/src/main/resources/hdfs-monthly-to-hdfs-daily.pull index ef5e71a..6bfc933 100644 --- a/gobblin-example/src/main/resources/hdfs-monthly-to-hdfs-daily.pull +++ b/gobblin-example/src/main/resources/hdfs-monthly-to-hdfs-daily.pull @@ -22,7 +22,7 @@ job.description=Pull monthly partitioned data from HDFS, write it back as daily source.filebased.data.directory=/data/monthly_partitioned/job_output/dataset source.filebased.fs.uri=hdfs://localhost:8020 -source.class=gobblin.source.DatePartitionedAvroFileSource +source.class=org.apache.gobblin.source.DatePartitionedAvroFileSource source.entity=dataset # Looking for data in /data/monthly_partitioned/job_output/dataset/monthly/2015/02... @@ -30,7 +30,7 @@ date.partitioned.source.min.watermark.value=2015/01 date.partitioned.source.partition.prefix=monthly date.partitioned.source.partition.pattern=yyyy/MM -extract.namespace=gobblin.example.partitioned +extract.namespace=org.apache.gobblin.example.partitioned extract.table.name=dataset extract.is.full=true extract.table.type=snapshot_only @@ -39,17 +39,17 @@ extract.table.type=snapshot_only # Wite to HDFS as daily partitioned writer.fs.uri=hdfs://localhost:8020 writer.codec.type=snappy -writer.builder.class=gobblin.writer.AvroDataWriterBuilder +writer.builder.class=org.apache.gobblin.writer.AvroDataWriterBuilder writer.buffer.size=4096 writer.file.path.type=TABLENAME -writer.partitioner.class=gobblin.writer.partitioner.TimeBasedAvroWriterPartitioner +writer.partitioner.class=org.apache.gobblin.writer.partitioner.TimeBasedAvroWriterPartitioner writer.partition.columns=change_dt writer.partition.pattern=yyyy/MM/dd writer.partition.prefix=daily writer.partition.timezone=Europe/Zurich -data.publisher.type=gobblin.publisher.TimePartitionedDataPublisher +data.publisher.type=org.apache.gobblin.publisher.TimePartitionedDataPublisher # Misc source.timezone=Europe/Zurich http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-example/src/main/resources/hive-avro-to-orc.pull ---------------------------------------------------------------------- diff --git a/gobblin-example/src/main/resources/hive-avro-to-orc.pull b/gobblin-example/src/main/resources/hive-avro-to-orc.pull index 6008432..bd94cd9 100644 --- a/gobblin-example/src/main/resources/hive-avro-to-orc.pull +++ b/gobblin-example/src/main/resources/hive-avro-to-orc.pull @@ -32,13 +32,13 @@ hive.dataset.hive.metastore.uri=thrift://localhost:9083 hiveserver.connection.string=jdbc:hive2://localhost:10000 type=hadoopJava -job.class=gobblin.azkaban.AzkabanJobLauncher +job.class=org.apache.gobblin.azkaban.AzkabanJobLauncher job.lock.enabled=false -data.publisher.type=gobblin.data.management.convertion.hive.HiveConvertPublisher -source.class=gobblin.data.management.convertion.hive.HiveSource -writer.builder.class=gobblin.data.management.convertion.hive.HiveQueryWriterBuilder -converter.classes=gobblin.data.management.convertion.hive.HiveAvroToOrcConverter +data.publisher.type=org.apache.gobblin.data.management.convertion.hive.HiveConvertPublisher +source.class=org.apache.gobblin.data.management.convertion.hive.HiveSource +writer.builder.class=org.apache.gobblin.data.management.convertion.hive.HiveQueryWriterBuilder +converter.classes=org.apache.gobblin.data.management.convertion.hive.HiveAvroToOrcConverter task.maxretries=0 workunit.retry.enabled=false \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-example/src/main/resources/kafka-console.pull ---------------------------------------------------------------------- diff --git a/gobblin-example/src/main/resources/kafka-console.pull b/gobblin-example/src/main/resources/kafka-console.pull index cbf23ea..ad0c465 100644 --- a/gobblin-example/src/main/resources/kafka-console.pull +++ b/gobblin-example/src/main/resources/kafka-console.pull @@ -19,13 +19,13 @@ job.name=PullWikipediaFromKafkaToConsole job.group=KafkaConsole job.description=Pull Wikipedia data from Kafka and write to Console -source.class=gobblin.source.extractor.extract.kafka.KafkaDeserializerSource +source.class=org.apache.gobblin.source.extractor.extract.kafka.KafkaDeserializerSource topic.whitelist=WikipediaExample #Use Local Schema Registry and serializers -#kafka.deserializer.type=gobblin.kafka.serialize.LiAvroDeserializer -#kafka.schemaRegistry.class=gobblin.kafka.schemareg.ConfigDrivenMd5SchemaRegistry +#kafka.deserializer.type=org.apache.gobblin.kafka.serialize.LiAvroDeserializer +#kafka.schemaRegistry.class=org.apache.gobblin.kafka.schemareg.ConfigDrivenMd5SchemaRegistry #schemaRegistry.schema.name=WikipediaExample #schemaRegistry.schema.value={"namespace": "example.wikipedia.avro","type": "record","name": "WikipediaArticle","fields": [{"name": "pageid", "type": ["double", "null"]},{"name": "title", "type": ["string", "null"]},{"name": "user", "type": ["string", "null"]},{"name": "anon", "type": ["string", "null"]},{"name": "userid", "type": ["double", "null"]},{"name": "timestamp", "type": ["string", "null"]},{"name": "size", "type": ["double", "null"]},{"name": "contentformat", "type": ["string", "null"]},{"name": "contentmodel", "type": ["string", "null"]},{"name": "content", "type": ["string", "null"]}]} @@ -33,14 +33,14 @@ topic.whitelist=WikipediaExample #Use Confluent Schema Registry and serializers kafka.deserializer.type=CONFLUENT_AVRO kafka.schema.registry.url=http://localhost:8081 -extract.namespace=gobblin.example.wikipedia +extract.namespace=org.apache.gobblin.example.wikipedia zookeeper.connect=127.0.0.1:2181 kafka.brokers=localhost:9092 mr.job.max.mappers=1 bootstrap.with.offset=earliest -writer.builder.class=gobblin.writer.ConsoleWriterBuilder +writer.builder.class=org.apache.gobblin.writer.ConsoleWriterBuilder -data.publisher.type=gobblin.publisher.NoopPublisher +data.publisher.type=org.apache.gobblin.publisher.NoopPublisher http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/fcfad779/gobblin-example/src/main/resources/simplejson.pull ---------------------------------------------------------------------- diff --git a/gobblin-example/src/main/resources/simplejson.pull b/gobblin-example/src/main/resources/simplejson.pull index 22b6186..de45192 100644 --- a/gobblin-example/src/main/resources/simplejson.pull +++ b/gobblin-example/src/main/resources/simplejson.pull @@ -25,7 +25,7 @@ job.description=A Gobblin job for demo purpose source.class=org.apache.gobblin.example.simplejson.SimpleJsonSource converter.classes=org.apache.gobblin.example.simplejson.SimpleJsonConverter -extract.namespace=gobblin.example.simplejson +extract.namespace=org.apache.gobblin.example.simplejson # source configuration properties # comma-separated list of file URIs (supporting different schemes, e.g., file://, ftp://, sftp://, http://, etc)
