Repository: incubator-gobblin
Updated Branches:
  refs/heads/master 49e0f5b50 -> 149ebd441


[GOBBLIN-339] Example to illustrate how to build custom source and extractor in 
Gobblin.

Closes #2195 from sv2000/master


Project: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/commit/149ebd44
Tree: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/tree/149ebd44
Diff: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/diff/149ebd44

Branch: refs/heads/master
Commit: 149ebd441ae5cd8765a643c5deaa668680d18737
Parents: 49e0f5b
Author: suvasude <[email protected]>
Authored: Tue Dec 12 01:11:08 2017 -0800
Committer: Abhishek Tiwari <[email protected]>
Committed: Tue Dec 12 01:11:08 2017 -0800

----------------------------------------------------------------------
 .../hadoop/HadoopTextFileInputExtractor.java    | 41 +++++++++++++++++
 .../example/hadoop/HadoopTextFileSource.java    | 47 ++++++++++++++++++++
 2 files changed, 88 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/149ebd44/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileInputExtractor.java
----------------------------------------------------------------------
diff --git 
a/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileInputExtractor.java
 
b/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileInputExtractor.java
new file mode 100644
index 0000000..91e0993
--- /dev/null
+++ 
b/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileInputExtractor.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.example.hadoop;
+import java.io.IOException;
+import org.apache.gobblin.source.extractor.hadoop.HadoopFileInputExtractor;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.RecordReader;
+
+/**
+ * An implementation of {@link 
org.apache.gobblin.source.extractor.hadoop.HadoopFileInputExtractor} to be used 
with
+ * {@link org.apache.gobblin.example.hadoop.HadoopTextFileSource}.
+ *
+ * @author Sudarshan Vasudevan
+ */
+
+public class HadoopTextFileInputExtractor extends 
HadoopFileInputExtractor<String,String,LongWritable,Text> {
+  public HadoopTextFileInputExtractor(RecordReader<LongWritable,Text> 
recordReader, boolean readKeys) {
+    super(recordReader,readKeys);
+  }
+
+  @Override
+  public String getSchema() throws IOException {
+    return "";
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/149ebd44/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileSource.java
----------------------------------------------------------------------
diff --git 
a/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileSource.java
 
b/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileSource.java
new file mode 100644
index 0000000..d560857
--- /dev/null
+++ 
b/gobblin-example/src/main/java/org/apache/gobblin/example/hadoop/HadoopTextFileSource.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.example.hadoop;
+
+import org.apache.gobblin.configuration.WorkUnitState;
+import org.apache.gobblin.source.extractor.hadoop.HadoopFileInputExtractor;
+import org.apache.gobblin.source.extractor.hadoop.HadoopFileInputSource;
+import org.apache.gobblin.source.extractor.hadoop.HadoopTextInputSource;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+
+/**
+ * An implementation of {@link 
org.apache.gobblin.source.extractor.hadoop.HadoopTextInputSource} for reading
+ * data from Hadoop.
+ *
+ * <p>
+ *   This source returns an {@link 
org.apache.gobblin.example.hadoop.HadoopTextFileInputExtractor} to
+ *   pull the data from Hadoop.
+ * </p>
+ *
+ * @author Sudarshan Vasudevan
+ */
+
+public class HadoopTextFileSource extends 
HadoopFileInputSource<String,String,LongWritable,Text> {
+  @Override
+  protected HadoopFileInputExtractor<String,String,LongWritable,Text> 
getExtractor(WorkUnitState workUnitState, RecordReader recordReader,
+      FileSplit fileSplit, boolean readKeys) {
+    return new HadoopTextFileInputExtractor(recordReader, readKeys);
+  }
+}

Reply via email to