Hello everyone, I am try to run a very easy example but unfortunately I am stuck on the follow exception:
Exception in thread "main" java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative path in absolute URI: file: "absolute directory" I was wondering if anyone got this exception trying to run the examples on the spark git repo; actually the code I am try to run is the follow: //$example on$ import org.apache.spark.ml.Pipeline; import org.apache.spark.ml.PipelineModel; import org.apache.spark.ml.PipelineStage; import org.apache.spark.ml.evaluation.RegressionEvaluator; import org.apache.spark.ml.feature.VectorIndexer; import org.apache.spark.ml.feature.VectorIndexerModel; import org.apache.spark.ml.regression.RandomForestRegressionModel; import org.apache.spark.ml.regression.RandomForestRegressor; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; //$example off$ public class JavaRandomForestRegressorExample { public static void main(String[] args) { System.setProperty("hadoop.home.dir", "C:\\winutils"); SparkSession spark = SparkSession .builder() .master("local[*]") .appName("JavaRandomForestRegressorExample") .getOrCreate(); // $example on$ // Load and parse the data file, converting it to a DataFrame. Dataset<Row> data = spark.read().format("libsvm").load("C:\\data\\sample_libsvm_data.txt"); // Automatically identify categorical features, and index them. // Set maxCategories so features with > 4 distinct values are treated as // continuous. VectorIndexerModel featureIndexer = new VectorIndexer().setInputCol("features").setOutputCol("indexedFeatures") .setMaxCategories(4).fit(data); // Split the data into training and test sets (30% held out for testing) Dataset<Row>[] splits = data.randomSplit(new double[] { 0.7, 0.3 }); Dataset<Row> trainingData = splits[0]; Dataset<Row> testData = splits[1]; // Train a RandomForest model. RandomForestRegressor rf = new RandomForestRegressor().setLabelCol("label").setFeaturesCol("indexedFeatures"); // Chain indexer and forest in a Pipeline Pipeline pipeline = new Pipeline().setStages(new PipelineStage[] { featureIndexer, rf }); // Train model. This also runs the indexer. PipelineModel model = pipeline.fit(trainingData); // Make predictions. Dataset<Row> predictions = model.transform(testData); // Select example rows to display. predictions.select("prediction", "label", "features").show(5); // Select (prediction, true label) and compute test error RegressionEvaluator evaluator = new RegressionEvaluator().setLabelCol("label").setPredictionCol("prediction") .setMetricName("rmse"); double rmse = evaluator.evaluate(predictions); System.out.println("Root Mean Squared Error (RMSE) on test data = " + rmse); RandomForestRegressionModel rfModel = (RandomForestRegressionModel) (model.stages()[1]); System.out.println("Learned regression forest model:\n" + rfModel.toDebugString()); // $example off$ spark.stop(); } } Thanks to everyone for reading/answering! Flavio -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/java-net-URISyntaxException-Relative-path-in-absolute-URI-tp27466.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe e-mail: user-unsubscr...@spark.apache.org