Repository: incubator-gobblin Updated Branches: refs/heads/master 49e0f5b50 -> 149ebd441
[GOBBLIN-339] Example to illustrate how to build custom source and extractor in Gobblin. Closes #2195 from sv2000/master Project: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/commit/149ebd44 Tree: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/tree/149ebd44 Diff: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/diff/149ebd44 Branch: refs/heads/master Commit: 149ebd441ae5cd8765a643c5deaa668680d18737 Parents: 49e0f5b Author: suvasude <[email protected]> Authored: Tue Dec 12 01:11:08 2017 -0800 Committer: Abhishek Tiwari <[email protected]> Committed: Tue Dec 12 01:11:08 2017 -0800 ---------------------------------------------------------------------- .../hadoop/HadoopTextFileInputExtractor.java | 41 +++++++++++++++++ .../example/hadoop/HadoopTextFileSource.java | 47 ++++++++++++++++++++ 2 files changed, 88 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/149ebd44/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileInputExtractor.java ---------------------------------------------------------------------- diff --git a/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileInputExtractor.java b/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileInputExtractor.java new file mode 100644 index 0000000..91e0993 --- /dev/null +++ b/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileInputExtractor.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.gobblin.example.hadoop; +import java.io.IOException; +import org.apache.gobblin.source.extractor.hadoop.HadoopFileInputExtractor; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.RecordReader; + +/** + * An implementation of {@link org.apache.gobblin.source.extractor.hadoop.HadoopFileInputExtractor} to be used with + * {@link org.apache.gobblin.example.hadoop.HadoopTextFileSource}. + * + * @author Sudarshan Vasudevan + */ + +public class HadoopTextFileInputExtractor extends HadoopFileInputExtractor<String,String,LongWritable,Text> { + public HadoopTextFileInputExtractor(RecordReader<LongWritable,Text> recordReader, boolean readKeys) { + super(recordReader,readKeys); + } + + @Override + public String getSchema() throws IOException { + return ""; + } +} http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/149ebd44/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileSource.java ---------------------------------------------------------------------- diff --git a/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileSource.java b/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileSource.java new file mode 100644 index 0000000..d560857 --- /dev/null +++ b/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileSource.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.gobblin.example.hadoop; + +import org.apache.gobblin.configuration.WorkUnitState; +import org.apache.gobblin.source.extractor.hadoop.HadoopFileInputExtractor; +import org.apache.gobblin.source.extractor.hadoop.HadoopFileInputSource; +import org.apache.gobblin.source.extractor.hadoop.HadoopTextInputSource; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.lib.input.FileSplit; + +/** + * An implementation of {@link org.apache.gobblin.source.extractor.hadoop.HadoopTextInputSource} for reading + * data from Hadoop. + * + * <p> + * This source returns an {@link org.apache.gobblin.example.hadoop.HadoopTextFileInputExtractor} to + * pull the data from Hadoop. + * </p> + * + * @author Sudarshan Vasudevan + */ + +public class HadoopTextFileSource extends HadoopFileInputSource<String,String,LongWritable,Text> { + @Override + protected HadoopFileInputExtractor<String,String,LongWritable,Text> getExtractor(WorkUnitState workUnitState, RecordReader recordReader, + FileSplit fileSplit, boolean readKeys) { + return new HadoopTextFileInputExtractor(recordReader, readKeys); + } +}
