Repository: incubator-gobblin Updated Branches: refs/heads/master 33d4fea4b -> 22a951f0a
http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/flow.conf ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/flow.conf b/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/flow.conf new file mode 100644 index 0000000..64d6921 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/flow.conf @@ -0,0 +1,20 @@ +gobblin.flow.edge.input.dataset.descriptor.0.class=org.apache.gobblin.service.modules.dataset.FSDatasetDescriptor +gobblin.flow.edge.input.dataset.descriptor.0.platform=hdfs +gobblin.flow.edge.input.dataset.descriptor.0.path=/data/inbound/${team.name}/${dataset.name} +gobblin.flow.edge.input.dataset.descriptor.0.format=avro + +gobblin.flow.edge.output.dataset.descriptor.0.class=${gobblin.flow.edge.input.dataset.descriptor.0.class} +gobblin.flow.edge.output.dataset.descriptor.0.platform=${gobblin.flow.edge.input.dataset.descriptor.0.platform} +gobblin.flow.edge.output.dataset.descriptor.0.path=${gobblin.flow.edge.input.dataset.descriptor.0.path} +gobblin.flow.edge.output.dataset.descriptor.0.format=${gobblin.flow.edge.input.dataset.descriptor.0.format} + +gobblin.flow.edge.input.dataset.descriptor.1.class=org.apache.gobblin.service.modules.dataset.FSDatasetDescriptor +gobblin.flow.edge.input.dataset.descriptor.1.platform=hdfs +gobblin.flow.edge.input.dataset.descriptor.1.path=/data/outbound/${team.name}/${dataset.name} +gobblin.flow.edge.input.dataset.descriptor.1.format=avro + +gobblin.flow.edge.output.dataset.descriptor.1.class=${gobblin.flow.edge.input.dataset.descriptor.1.class} +gobblin.flow.edge.output.dataset.descriptor.1.platform=${gobblin.flow.edge.input.dataset.descriptor.1.platform} +gobblin.flow.edge.output.dataset.descriptor.1.path=${gobblin.flow.edge.input.dataset.descriptor.1.path} +gobblin.flow.edge.output.dataset.descriptor.1.format=${gobblin.flow.edge.input.dataset.descriptor.1.format} + http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job1.job ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job1.job b/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job1.job new file mode 100644 index 0000000..0d4f7c3 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job1.job @@ -0,0 +1 @@ +gobblin.template.uri="resource:///template_catalog/templates/job1.template" http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job2.job ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job2.job b/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job2.job new file mode 100644 index 0000000..c26ade4 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job2.job @@ -0,0 +1,3 @@ +gobblin.template.uri="resource:///template_catalog/templates/job2.template" + +dependencies=job1 http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job3.job ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job3.job b/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job3.job new file mode 100644 index 0000000..cac20ed --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job3.job @@ -0,0 +1,2 @@ +gobblin.template.uri="resource:///template_catalog/templates/job3.template" +dependencies=job1 http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job4.job ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job4.job b/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job4.job new file mode 100644 index 0000000..9b86c77 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/flowEdgeTemplate/jobs/job4.job @@ -0,0 +1,2 @@ +gobblin.template.uri="resource:///template_catalog/templates/job4.template" +dependencies="job2,job3" http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsConvertToJsonAndEncrypt/flow.conf ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsConvertToJsonAndEncrypt/flow.conf b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsConvertToJsonAndEncrypt/flow.conf new file mode 100644 index 0000000..0a53e5b --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsConvertToJsonAndEncrypt/flow.conf @@ -0,0 +1,18 @@ +gobblin.flow.edge.input.dataset.descriptor.0.class=org.apache.gobblin.service.modules.dataset.FSDatasetDescriptor +gobblin.flow.edge.input.dataset.descriptor.0.platform=hdfs +gobblin.flow.edge.input.dataset.descriptor.0.path=/data/out/${team.name}/${dataset.name} +gobblin.flow.edge.input.dataset.descriptor.0.format=avro +############################################################# +# Define input dataset to be uncompressed and unencrypted +############################################################# +gobblin.flow.edge.output.dataset.descriptor.0.codec=NONE +gobblin.flow.edge.output.dataset.descriptor.0.encrypt.algorithm=NONE + +gobblin.flow.edge.output.dataset.descriptor.0.class=${gobblin.flow.edge.input.dataset.descriptor.0.class} +gobblin.flow.edge.output.dataset.descriptor.0.platform=${gobblin.flow.edge.input.dataset.descriptor.0.platform} +gobblin.flow.edge.output.dataset.descriptor.0.path=/data/encrypted/${team.name}/${dataset.name} +gobblin.flow.edge.output.dataset.descriptor.0.format=json +gobblin.flow.edge.output.dataset.descriptor.0.codec=gzip +gobblin.flow.edge.output.dataset.descriptor.0.encrypt.algorithm=aes_rotating +gobblin.flow.edge.output.dataset.descriptor.0.encrypt.keystore_type=json +gobblin.flow.edge.output.dataset.descriptor.0.encrypt.keystore_encoding=base64 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsConvertToJsonAndEncrypt/jobs/hdfs-encrypt-avro-to-json.job ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsConvertToJsonAndEncrypt/jobs/hdfs-encrypt-avro-to-json.job b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsConvertToJsonAndEncrypt/jobs/hdfs-encrypt-avro-to-json.job new file mode 100644 index 0000000..cda75cf --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsConvertToJsonAndEncrypt/jobs/hdfs-encrypt-avro-to-json.job @@ -0,0 +1 @@ +gobblin.template.uri="resource:///template_catalog/multihop/jobTemplates/hdfs-convert-to-json-and-encrypt.template" http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToAdl/flow.conf ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToAdl/flow.conf b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToAdl/flow.conf new file mode 100644 index 0000000..2cbf420 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToAdl/flow.conf @@ -0,0 +1,18 @@ +gobblin.flow.edge.input.dataset.descriptor.0.class=org.apache.gobblin.service.modules.dataset.FSDatasetDescriptor +gobblin.flow.edge.input.dataset.descriptor.0.platform=hdfs +gobblin.flow.edge.input.dataset.descriptor.0.path=/data/encrypted/${team.name}/${dataset.name} +gobblin.flow.edge.input.dataset.descriptor.0.format=json +gobblin.flow.edge.input.dataset.descriptor.0.codec=gzip +gobblin.flow.edge.input.dataset.descriptor.0.encrypt.algorithm=aes_rotating +gobblin.flow.edge.input.dataset.descriptor.0.encrypt.keystore_type=json +gobblin.flow.edge.input.dataset.descriptor.0.encrypt.keystore_encoding=base64 + +gobblin.flow.edge.output.dataset.descriptor.0.class=org.apache.gobblin.service.modules.dataset.FSDatasetDescriptor +gobblin.flow.edge.output.dataset.descriptor.0.platform=adls +gobblin.flow.edge.output.dataset.descriptor.0.path=${gobblin.flow.edge.input.dataset.descriptor.0.path} +gobblin.flow.edge.output.dataset.descriptor.0.format=${gobblin.flow.edge.input.dataset.descriptor.0.format} +gobblin.flow.edge.output.dataset.descriptor.0.format=${gobblin.flow.edge.input.dataset.descriptor.0.format} +gobblin.flow.edge.output.dataset.descriptor.0.codec=${gobblin.flow.edge.input.dataset.descriptor.0.codec} +gobblin.flow.edge.output.dataset.descriptor.0.encrypt.algorithm=${gobblin.flow.edge.input.dataset.descriptor.0.encrypt.algorithm} +gobblin.flow.edge.output.dataset.descriptor.0.encrypt.keystore_type=${gobblin.flow.edge.input.dataset.descriptor.0.encrypt.keystore_type} +gobblin.flow.edge.output.dataset.descriptor.0.encrypt.keystore_encoding=${gobblin.flow.edge.input.dataset.descriptor.0.encrypt.keystore_encoding} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToAdl/jobs/distcp-hdfs-to-adl.job ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToAdl/jobs/distcp-hdfs-to-adl.job b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToAdl/jobs/distcp-hdfs-to-adl.job new file mode 100644 index 0000000..37d0d9c --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToAdl/jobs/distcp-hdfs-to-adl.job @@ -0,0 +1 @@ +gobblin.template.uri="resource:///template_catalog/multihop/jobTemplates/distcp-push-hdfs-to-adl.template" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToHdfs/flow.conf ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToHdfs/flow.conf b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToHdfs/flow.conf new file mode 100644 index 0000000..abac6b5 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToHdfs/flow.conf @@ -0,0 +1,15 @@ +gobblin.flow.edge.input.dataset.descriptor.0.class=org.apache.gobblin.service.modules.dataset.FSDatasetDescriptor +gobblin.flow.edge.input.dataset.descriptor.0.platform=hdfs +gobblin.flow.edge.input.dataset.descriptor.0.path=/data/out/${team.name}/${dataset.name} + +gobblin.flow.edge.output.dataset.descriptor.0.class=${gobblin.flow.edge.input.dataset.descriptor.0.class} +gobblin.flow.edge.output.dataset.descriptor.0.platform=${gobblin.flow.edge.input.dataset.descriptor.0.platform} +gobblin.flow.edge.output.dataset.descriptor.0.path=${gobblin.flow.edge.input.dataset.descriptor.0.path} + +gobblin.flow.edge.input.dataset.descriptor.1.class=org.apache.gobblin.service.modules.dataset.FSDatasetDescriptor +gobblin.flow.edge.input.dataset.descriptor.1.platform=hdfs +gobblin.flow.edge.input.dataset.descriptor.1.path=/data/encrypted/${team.name}/${dataset.name} + +gobblin.flow.edge.output.dataset.descriptor.1.class=${gobblin.flow.edge.input.dataset.descriptor.1.class} +gobblin.flow.edge.output.dataset.descriptor.1.platform=${gobblin.flow.edge.input.dataset.descriptor.1.platform} +gobblin.flow.edge.output.dataset.descriptor.1.path=${gobblin.flow.edge.input.dataset.descriptor.1.path} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToHdfs/jobs/distcp-hdfs-to-hdfs.job ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToHdfs/jobs/distcp-hdfs-to-hdfs.job b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToHdfs/jobs/distcp-hdfs-to-hdfs.job new file mode 100644 index 0000000..fe627c9 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/hdfsToHdfs/jobs/distcp-hdfs-to-hdfs.job @@ -0,0 +1 @@ +gobblin.template.uri="resource:///template_catalog/multihop/jobTemplates/distcp.template" http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/localToHdfs/flow.conf ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/localToHdfs/flow.conf b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/localToHdfs/flow.conf new file mode 100644 index 0000000..d0765e1 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/localToHdfs/flow.conf @@ -0,0 +1,9 @@ +gobblin.flow.edge.input.dataset.descriptor.0.class=org.apache.gobblin.service.modules.dataset.FSDatasetDescriptor +gobblin.flow.edge.input.dataset.descriptor.0.platform=hdfs +gobblin.flow.edge.input.dataset.descriptor.0.path=/data/out/${team.name}/${dataset.name} +gobblin.flow.edge.input.dataset.descriptor.0.format=avro + +gobblin.flow.edge.output.dataset.descriptor.0.class=${gobblin.flow.edge.input.dataset.descriptor.0.class} +gobblin.flow.edge.output.dataset.descriptor.0.platform=${gobblin.flow.edge.input.dataset.descriptor.0.platform} +gobblin.flow.edge.output.dataset.descriptor.0.path=${gobblin.flow.edge.input.dataset.descriptor.0.path} +gobblin.flow.edge.output.dataset.descriptor.0.format=${gobblin.flow.edge.input.dataset.descriptor.0.format} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/localToHdfs/jobs/distcp-local-to-hdfs.job ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/localToHdfs/jobs/distcp-local-to-hdfs.job b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/localToHdfs/jobs/distcp-local-to-hdfs.job new file mode 100644 index 0000000..fe627c9 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/multihop/flowEdgeTemplates/localToHdfs/jobs/distcp-local-to-hdfs.job @@ -0,0 +1 @@ +gobblin.template.uri="resource:///template_catalog/multihop/jobTemplates/distcp.template" http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/multihop/jobTemplates/distcp-push-hdfs-to-adl.template ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/multihop/jobTemplates/distcp-push-hdfs-to-adl.template b/gobblin-service/src/test/resources/template_catalog/multihop/jobTemplates/distcp-push-hdfs-to-adl.template new file mode 100644 index 0000000..b92a2bf --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/multihop/jobTemplates/distcp-push-hdfs-to-adl.template @@ -0,0 +1,65 @@ +# ==================================================================== +# Job configurations +# ==================================================================== +job.name=Distcp-HDFS-ADL + +#team.name and dataset.name to be supplied via flowConfig +from=/data/encrypted/${team.name}/${dataset.name} +to=/data/encrypted/${team.name}/${dataset.name} + +#Will delete files in target(ADL) if not exist in source +gobblin.copy.recursive.update=true +gobblin.copy.recursive.delete=true +gobblin.copy.recursive.deleteEmptyDirectories=true +gobblin.trash.skip.trash=true + +#Will make the job fail if there's any failure +gobblin.copy.abortOnSingleDatasetFailure=true + +#gobblin.copy.preserved.attributes=p + +#Job properties to be resolved from source and dest data node config. +fs.uri=${source.data.node.fs.uri} +source.filebased.fs.uri=${fs.uri} +state.store.fs.uri=${fs.uri} +target.filebased.fs.uri=${destination.data.node.fs.uri} +writer.fs.uri=${target.filebased.fs.uri} + +#ADL parameters +fs.AbstractFileSystem.adl.impl="org.apache.hadoop.fs.adl.Adl" +dfs.adls.oauth2.access.token.provider.type=ClientCredential +dfs.adls.oauth2.refresh.url="https://login.microsoftonline.com/67893-erty-1234-7678-123456/oauth2/token" +dfs.adls.oauth2.client.id=${adls.oauth2.client.id} +writer.encrypted.dfs.adls.oauth2.credential=${adls.ouath2.credential} + +encrypt.key.loc=/user/${user.to.proxy}/master.password +work.dir=/tmp/${user.to.proxy} +writer.user.to.proxy=${adls.user.to.proxy} + +# ==================================================================== +# Distcp configurations +# ==================================================================== +extract.namespace="gobblin.copy" + +gobblin.dataset.profile.class="org.apache.gobblin.data.management.copy.CopyableGlobDatasetFinder" + +# target location for copy +data.publisher.final.dir=${to} +gobblin.dataset.pattern=${from} + +data.publisher.type="org.apache.gobblin.data.management.copy.publisher.CopyDataPublisher" +source.class="org.apache.gobblin.data.management.copy.CopySource" +writer.builder.class="org.apache.gobblin.data.management.copy.writer.FileAwareInputStreamDataWriterBuilder" +converter.classes="org.apache.gobblin.converter.IdentityConverter" + +# ======================================= +# Job Parameters to be resolved using SpecExecutor properties +# ======================================= +type=${specExecInstance.job.type} + +job.jars="lib/*" +job.lock.enabled=false +job.class=${specExecInstance.job.launcher.class} + +# Gobblin Hadoop Parameters +launcher.type=${specExecInstance.job.launcher.type} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/multihop/jobTemplates/distcp.template ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/multihop/jobTemplates/distcp.template b/gobblin-service/src/test/resources/template_catalog/multihop/jobTemplates/distcp.template new file mode 100644 index 0000000..844dc92 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/multihop/jobTemplates/distcp.template @@ -0,0 +1,57 @@ +# ==================================================================== +# Job configurations +# ==================================================================== +job.name=Distcp-HDFS-HDFS + +# Source and destination paths to be obtained from flow config. +from=${gobblin.flow.edge.input.dataset.descriptor.path} +to=${gobblin.flow.edge.output.dataset.descriptor.path} + +#Will delete files in target if not exist in source +gobblin.copy.recursive.update=true +gobblin.copy.recursive.delete=true +gobblin.copy.recursive.deleteEmptyDirectories=true +gobblin.trash.skip.trash=true + +#Will make the job fail if there's any failure +gobblin.copy.abortOnSingleDatasetFailure=true + +#gobblin.copy.preserved.attributes=p + +#Job properties to be resolved from source and dest data node config. +fs.uri=${source.data.node.fs.uri} +source.filebased.fs.uri=${fs.uri} +state.store.fs.uri=${fs.uri} +target.filebased.fs.uri=${destination.data.node.fs.uri} +writer.fs.uri=${target.filebased.fs.uri} + +work.dir=/tmp/${user.to.proxy} +writer.user.to.proxy=${adls.user.to.proxy} + +# ==================================================================== +# Distcp configurations +# ==================================================================== +extract.namespace="gobblin.copy" + +gobblin.dataset.profile.class="org.apache.gobblin.data.management.copy.CopyableGlobDatasetFinder" + +# target location for copy +data.publisher.final.dir=${to} +gobblin.dataset.pattern=${from} + +data.publisher.type="org.apache.gobblin.data.management.copy.publisher.CopyDataPublisher" +source.class="org.apache.gobblin.data.management.copy.CopySource" +writer.builder.class="org.apache.gobblin.data.management.copy.writer.FileAwareInputStreamDataWriterBuilder" +converter.classes="org.apache.gobblin.converter.IdentityConverter" + +# ======================================= +# Job Parameters to be resolved using SpecExecutor properties +# ======================================= +type=${specExecInstance.job.type} + +job.jars="lib/*" +job.lock.enabled=false +job.class=${specExecInstance.job.launcher.class} + +# Gobblin Hadoop Parameters +launcher.type=${specExecInstance.job.launcher.type} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/multihop/jobTemplates/hdfs-convert-to-json-and-encrypt.template ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/multihop/jobTemplates/hdfs-convert-to-json-and-encrypt.template b/gobblin-service/src/test/resources/template_catalog/multihop/jobTemplates/hdfs-convert-to-json-and-encrypt.template new file mode 100644 index 0000000..fcc78cd --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/multihop/jobTemplates/hdfs-convert-to-json-and-encrypt.template @@ -0,0 +1,42 @@ +# ==================================================================== +# Job configurations (can be changed) +# ==================================================================== +job.name=convert-to-json-and-encrypt +job.description="Convert date partitioned avro files to json and encrypt" +from=/data/out/${team.name}/${dataset.name} +to=/data/encrypted/${team.name}/${dataset.name} + +# ==================================================================== +# Distcp configurations +# ==================================================================== + +source.class="org.apache.gobblin.source.DatePartitionedAvroFileSource" +date.partitioned.source.partition.pattern=yyyy-MM-dd +date.partitioned.source.min.watermark.value=2017-03-01 +source.filebased.data.directory=${from} +source.entity=avro + +converter.classes="org.apache.gobblin.converter.avro.AvroToJsonStringConverter,org.apache.gobblin.converter.string.StringToBytesConverter" + +writer.builder.class="org.apache.gobblin.writer.SimpleDataWriterBuilder" +writer.output.format=json +writer.codec.type=gzip +simple.writer.prepend.size=false +writer.partitioner.class="org.apache.gobblin.writer.partitioner.WorkUnitStateWriterPartitioner" +writer.partition.pattern=${date.partitioned.source.partition.pattern} + +writer.encrypt.algorithm=aes_rotating +writer.encrypt.keystore_type=json +writer.encrypt.keystore_path="hdfs://path/to/keystore/keystore.json" + +data.publisher.type="org.apache.gobblin.publisher.BaseDataPublisher" +data.publisher.appendExtractToFinalDir=false +data.publisher.metadata.output_file="metadata.json" +data.publisher.metadata.publish.writer=true + +data.publisher.final.dir=${to} + +task.maxretries=0 +workunit.retry.enabled=false + +qualitychecker.task.policies="org.apache.gobblin.policies.count.RowCountPolicy" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/templates/job1.template ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/templates/job1.template b/gobblin-service/src/test/resources/template_catalog/templates/job1.template new file mode 100644 index 0000000..321e984 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/templates/job1.template @@ -0,0 +1,2 @@ +key11=val11 +key12=val12 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/templates/job2.template ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/templates/job2.template b/gobblin-service/src/test/resources/template_catalog/templates/job2.template new file mode 100644 index 0000000..5141d92 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/templates/job2.template @@ -0,0 +1,2 @@ +key21=val21 +key22=val22 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/templates/job3.template ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/templates/job3.template b/gobblin-service/src/test/resources/template_catalog/templates/job3.template new file mode 100644 index 0000000..c192cc4 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/templates/job3.template @@ -0,0 +1,2 @@ +key31=val31 +key32=val32 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/templates/job4.template ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/templates/job4.template b/gobblin-service/src/test/resources/template_catalog/templates/job4.template new file mode 100644 index 0000000..a6a508e --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/templates/job4.template @@ -0,0 +1,2 @@ +key41=val41 +key42=val42 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/test-template/flow.conf ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/test-template/flow.conf b/gobblin-service/src/test/resources/template_catalog/test-template/flow.conf index a13af7d..85e686b 100644 --- a/gobblin-service/src/test/resources/template_catalog/test-template/flow.conf +++ b/gobblin-service/src/test/resources/template_catalog/test-template/flow.conf @@ -1,16 +1,22 @@ -gobblin.flow.dataset.descriptor.input.0.class=org.apache.gobblin.service.modules.dataset.BaseHdfsDatasetDescriptor -gobblin.flow.dataset.descriptor.input.0.path=/data/inbound/<TEAM_NAME>/<DATASET_NAME> -gobblin.flow.dataset.descriptor.input.0.format=avro +team.name=test-team +dataset.name=test-dataset +gobblin.flow.edge.input.dataset.descriptor.0.class=org.apache.gobblin.service.modules.dataset.FSDatasetDescriptor +gobblin.flow.edge.input.dataset.descriptor.0.platform=hdfs +gobblin.flow.edge.input.dataset.descriptor.0.path=/data/inbound/${team.name}/${dataset.name} +gobblin.flow.edge.input.dataset.descriptor.0.format=avro -gobblin.flow.dataset.descriptor.output.0.class=${gobblin.flow.dataset.descriptor.input.0.class} -gobblin.flow.dataset.descriptor.output.0.path=${gobblin.flow.dataset.descriptor.input.0.path} -gobblin.flow.dataset.descriptor.output.0.format=${gobblin.flow.dataset.descriptor.input.0.format} +gobblin.flow.edge.output.dataset.descriptor.0.class=${gobblin.flow.edge.input.dataset.descriptor.0.class} +gobblin.flow.edge.output.dataset.descriptor.0.platform=${gobblin.flow.edge.input.dataset.descriptor.0.platform} +gobblin.flow.edge.output.dataset.descriptor.0.path=${gobblin.flow.edge.input.dataset.descriptor.0.path} +gobblin.flow.edge.output.dataset.descriptor.0.format=${gobblin.flow.edge.input.dataset.descriptor.0.format} -gobblin.flow.dataset.descriptor.input.1.class=org.apache.gobblin.service.modules.dataset.BaseHdfsDatasetDescriptor -gobblin.flow.dataset.descriptor.input.1.path=/data/outbound/<TEAM_NAME>/<DATASET_NAME> -gobblin.flow.dataset.descriptor.input.1.format=avro +gobblin.flow.edge.input.dataset.descriptor.1.class=org.apache.gobblin.service.modules.dataset.FSDatasetDescriptor +gobblin.flow.edge.input.dataset.descriptor.1.platform=hdfs +gobblin.flow.edge.input.dataset.descriptor.1.path=/data/outbound/${team.name}/${dataset.name} +gobblin.flow.edge.input.dataset.descriptor.1.format=avro -gobblin.flow.dataset.descriptor.output.1.class=${gobblin.flow.dataset.descriptor.input.1.class} -gobblin.flow.dataset.descriptor.output.1.path=${gobblin.flow.dataset.descriptor.input.1.path} -gobblin.flow.dataset.descriptor.output.1.format=${gobblin.flow.dataset.descriptor.input.1.format} +gobblin.flow.edge.output.dataset.descriptor.1.class=${gobblin.flow.edge.input.dataset.descriptor.1.class} +gobblin.flow.edge.output.dataset.descriptor.1.platform=${gobblin.flow.edge.input.dataset.descriptor.1.platform} +gobblin.flow.edge.output.dataset.descriptor.1.path=${gobblin.flow.edge.input.dataset.descriptor.1.path} +gobblin.flow.edge.output.dataset.descriptor.1.format=${gobblin.flow.edge.input.dataset.descriptor.1.format} http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job1.conf ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job1.conf b/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job1.conf deleted file mode 100644 index 4a59fcc..0000000 --- a/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job1.conf +++ /dev/null @@ -1,2 +0,0 @@ -key11=val11 -key12=val12 http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job1.job ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job1.job b/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job1.job new file mode 100644 index 0000000..00e274e --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job1.job @@ -0,0 +1 @@ +gobblin.template.uri=resource:///template_catalog/templates/job1.template http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job2.conf ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job2.conf b/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job2.conf deleted file mode 100644 index f174940..0000000 --- a/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job2.conf +++ /dev/null @@ -1,3 +0,0 @@ -dependencies=job1 -key21=val21 -key22=val22 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job2.job ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job2.job b/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job2.job new file mode 100644 index 0000000..c4db05f --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job2.job @@ -0,0 +1,3 @@ +gobblin.template.uri=resource:///template_catalog/templates/job2.template + +dependencies=job1 http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job3.conf ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job3.conf b/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job3.conf deleted file mode 100644 index fda7f39..0000000 --- a/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job3.conf +++ /dev/null @@ -1,3 +0,0 @@ -dependencies=job1 -key31=val31 -key32=val32 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job3.job ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job3.job b/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job3.job new file mode 100644 index 0000000..59867b3 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job3.job @@ -0,0 +1,2 @@ +gobblin.template.uri=resource:///template_catalog/templates/job3.template +dependencies=job1 http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job4.conf ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job4.conf b/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job4.conf deleted file mode 100644 index c5ef881..0000000 --- a/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job4.conf +++ /dev/null @@ -1,3 +0,0 @@ -dependencies="job2,job3" -key41=val41 -key42=val42 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/22a951f0/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job4.job ---------------------------------------------------------------------- diff --git a/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job4.job b/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job4.job new file mode 100644 index 0000000..8fdc611 --- /dev/null +++ b/gobblin-service/src/test/resources/template_catalog/test-template/jobs/job4.job @@ -0,0 +1,2 @@ +gobblin.template.uri=resource:///template_catalog/templates/job4.template +dependencies=job2,job3
