Hi everybody!
I'm new into Spark and i'm trying to make a simple read into hdfs with a custom
inputformat. but i'am having this problem :
[INFO] --- exec-maven-plugin:1.2.1:java (default-cli) @ simple-project
---log4j:WARN No appenders could be found for logger
(akka.event.slf4j.Slf4jEventHandler).log4j:WARN Please initialize the log4j
system properly.log4j:WARN See
http://logging.apache.org/log4j/1.2/faq.html#noconfig for more
info.[WARNING]java.lang.reflect.InvocationTargetException at
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606) at
org.codehaus.mojo.exec.ExecJavaMojo$1.run(ExecJavaMojo.java:297) at
java.lang.Thread.run(Thread.java:724)Caused by:
org.apache.spark.SparkException: Job aborted: Task 0.0:0 failed more than 0
times; aborting job java.io.IOException: File Data Format not coherent with
configuration parameters at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:827)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:825)
at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:60)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:825)
at
org.apache.spark.scheduler.DAGScheduler.processEvent(DAGScheduler.scala:440)
at
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$run(DAGScheduler.scala:502)
at
org.apache.spark.scheduler.DAGScheduler$$anon$1.run(DAGScheduler.scala:157)[INFO]
------------------------------------------------------------------------[INFO]
BUILD FAILURE[INFO]
------------------------------------------------------------------------[INFO]
Total time: 4.367s[INFO] Finished at: Wed Jan 22 14:42:00 CET 2014[INFO] Final
Memory: 25M/439M[INFO]
------------------------------------------------------------------------
my driver :package spark.test;/*** SimpleApp.java ***/import
java.io.IOException;
import org.apache.hadoop.conf.Configuration;import
org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import
org.apache.spark.api.java.JavaPairRDD;import
org.apache.spark.api.java.JavaSparkContext;
public class SimpleApp { public static void main(String[] args) throws
IOException { String NameNodeFolder =
"hdfs://localhost:8020/user/cjaber/"; String logFile
= NameNodeFolder+"input/CSVfile.csv"; // Should be some file on your system
JavaSparkContext sc = new JavaSparkContext("local", "Simple App",
"$YOUR_SPARK_HOME", new
String[]{"target/simple-project-1.0.jar"});
Configuration conf = new Configuration(); Job job= new
Job(conf); job.setInputFormatClass(SignalInputFormat.class);
JavaPairRDD<Text,SignalWritable> data =
sc.newAPIHadoopFile(logFile,SignalInputFormat.class, Text.class,
SignalWritable.class,job.getConfiguration());
System.out.println(data.count());
}}
Best Regards;Chadi