[13/17] incubator-sdap-mudrod git commit: SDAP-7 Change all package namespaces to org.apache.sdap

lewismc Tue, 19 Dec 2017 06:13:49 -0800

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/DataGenerator.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/DataGenerator.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/DataGenerator.java
deleted file mode 100644
index 598ad61..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/DataGenerator.java
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ssearch.ranking;
-
-import au.com.bytecode.opencsv.CSVReader;
-import au.com.bytecode.opencsv.CSVWriter;
-
-import java.io.*;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-/**
- * SVMData is a program designed to create appropriate input data for the 
RankSVM
- * algorithm that involves Pairwise Classification.  Specifically, instead of 
working in
- * the space of query-document vectors, e.g. x1, x2, x3, we transform them 
into a new space
- * in which a pair of documents is represented as the difference between their 
feature vectors.
- */
-public class DataGenerator {
-  private static String mySourceDir;
-  private static String myResultDir;
-  private static boolean isMultFiles;
-
-  private static String[] myHeader;
-  private static List<List<String>> myMasterList = new 
ArrayList<List<String>>();
-
-  // HashMap used for comparing evaluation classes
-  public static final HashMap<String, Integer> map1 = new HashMap<String, 
Integer>();
-
-  static {
-    map1.put("Excellent", 7);
-    map1.put("Very good", 6);
-    map1.put("Good", 5);
-    map1.put("OK", 4);
-    map1.put("Bad", 3);
-    map1.put("Very bad", 2);
-    map1.put("Terrible", 1);
-  }
-
-  /**
-   * Constructor which takes in path containing one or multiple files to 
process.
-   * Also takes in argument specifying whether or not a single file needs to 
be processed,
-   * or multiple files need to be processed.
-   *
-   * @param sourceDir directory containing single file or multiple files to be 
processed
-   * @param resultDir output folder
-   * @param multFiles true if multiple files in directory need to be processed 
and false if
-   *                  only a single file needs to be processed
-   */
-  public DataGenerator(String sourceDir, String resultDir, boolean multFiles) {
-    mySourceDir = sourceDir;
-    myResultDir = resultDir;
-    isMultFiles = multFiles;
-  }
-
-  /**
-   * Responsible for invoking the processing of data file(s) and their 
subsequent storage
-   * into a user specified directory.
-   */
-  public void process() {
-    parseFile();
-    writeCSVfile(myMasterList);
-  }
-
-  /**
-   * Parses the original user-specified CSV file, storing the contents for 
future calculations
-   * and formatting.
-   */
-  public static void parseFile() {
-    String[][] dataArr = null;
-    try {
-      String sourceDir = mySourceDir;
-
-      if (isMultFiles == true) // Case where multiple files have to be 
processed
-      {
-        // Iterate over files in directory 
-        File directory = new File(sourceDir);
-        File[] directoryListing = directory.listFiles();
-
-        if (directoryListing != null) {
-          for (File child : directoryListing) {
-            CSVReader csvReader = new CSVReader(new FileReader(child));
-            List<String[]> list = csvReader.readAll();
-
-            // Store into 2D array by transforming array list to normal array
-            dataArr = new String[list.size()][];
-            dataArr = list.toArray(dataArr);
-
-            calculateVec(dataArr);
-
-            csvReader.close();
-          }
-          storeHead(dataArr); // Store the header
-        }
-      } else // Process only one file
-      {
-        File file = new File(sourceDir);
-
-        if (file != null) {
-          CSVReader csvReader = new CSVReader(new FileReader(file));
-          List<String[]> list = csvReader.readAll();
-
-          // Store into 2D array by transforming array list to normal array
-          dataArr = new String[list.size()][];
-          dataArr = list.toArray(dataArr);
-
-          storeHead(dataArr); // Store the header
-          calculateVec(dataArr);
-
-          csvReader.close();
-        }
-      }
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-
-  /**
-   * Performs the necessary vector calculations on each possible combination 
of vectors,
-   * also storing a value that indicates the evaluation.
-   *
-   * @param arr the parsed contents of the original CSV file
-   */
-  public static void calculateVec(String[][] arr) {
-    List<List<String>> listofLists = new ArrayList<List<String>>(); // Holds 
calculations 
-
-    int rowStart = 1;
-    for (int row = rowStart; row < arr.length; row++) // Start at row 1 
because row 0 is heading lol
-    {
-      for (int i = 1; i < arr.length - row; i++) {
-        List<String> colList = new ArrayList<String>(); // create vector to 
store all values inside of a column, which is stored inside 2D vector
-        for (int col = 0; col < arr[0].length - 1; col++) // Columns go until 
the next to last column
-        {
-          //System.out.println(col + " " + arr[row][col]);
-          // Extract double value from each cell
-          double x1 = Double.parseDouble(arr[row][col]);
-          double x2 = Double.parseDouble(arr[row + i][col]);
-
-          // Perform calculation for each cell
-          double result = x1 - x2;
-
-          // Convert this double value into string, and store inside array list
-          String strResult = Double.toString(result);
-          colList.add(strResult);
-        }
-
-        // Finally, add either 1, -1, or do not add row at all when 
encountering evaluation value
-        int addEvalNum = compareEvaluation(arr[row][arr[0].length - 1], 
arr[row + i][arr[0].length - 1]);
-        if (addEvalNum == 1) {
-          colList.add("1");
-          listofLists.add(colList); // Add this list to 2D list - row is 
finished now, move on
-        } else if (addEvalNum == -1) {
-          colList.add("-1");
-          listofLists.add(colList); // Add this list to 2D list - row is 
finished now, move on
-        }
-        // Else, they are equal, do not even add this row to 2D vector
-      }
-    }
-
-    // After all processing takes place, send to method that recreates data 
with equal # of 1's and -1's
-    List<List<String>> equalizedList = equalizeList(listofLists);
-    myMasterList.addAll(equalizedList);
-  }
-
-  /**
-   * Taking in two vector evaluation parameters, compares these two 
evaluations, returning a 1
-   * if the first evaluation is greater than the second, a -1 in the case the 
first evaluation is
-   * less than the second, and a 10 in the case that the two are equal, 
meaning this vector will
-   * not be used.
-   *
-   * @param eval1 evaluation from first vector
-   * @param eval2 evaluation from second vector
-   * @return 1 if first evaluation is greater than the second, -1 if first 
evaluation is less than the second, and
-   * 10 in the case that the two are equal
-   */
-  public static int compareEvaluation(String eval1, String eval2) {
-    int evalNum1 = map1.get(eval1);
-    int evalNum2 = map1.get(eval2);
-
-    if (evalNum1 > evalNum2) // ">" means it is more relevant - assign a 1
-    {
-      return 1;
-    } else if (evalNum1 < evalNum2) {
-      return -1;
-    } else {
-      return 10; // Return 10 if they are equal - signifies you cannot use the 
row
-    }
-  }
-
-  /**
-   * After vector calculations and new evaluation values are set, produces 
refined output data such that
-   * there is an equal or close to equal number of rows containing both "1" 
and "-1" as the new evaluation value.
-   *
-   * @param rawList originally calculated data from the input CSV file
-   * @return data that has an equal distribution of evaluation values
-   */
-  public static List<List<String>> equalizeList(List<List<String>> rawList) {
-    // Create two sets - one containing row index for +1 and the other for -1
-    List<Integer> pos1List = new ArrayList<Integer>();
-    List<Integer> neg1List = new ArrayList<Integer>();
-
-    for (int i = 0; i < rawList.size(); i++) // Iterate through all rows to 
get indexes
-    {
-      int evalNum = Integer.parseInt(rawList.get(i).get(rawList.get(0).size() 
- 1)); // Get 1 or -1 from original array list
-      if (evalNum == 1) {
-        pos1List.add(i); // Add row index that has 1
-      } else if (evalNum == -1) {
-        neg1List.add(i); // Add row index that has -1
-      }
-    }
-
-    int totPosCount = pos1List.size(); // Total # of 1's
-    int totNegCount = neg1List.size(); // Total # of -1's
-
-    if ((totPosCount - totNegCount) >= 1) // There are more 1's than -1's, 
equalize them
-    {
-      int indexOfPosList = 0; // Start getting indexes from the first index of 
positive index location list
-      while ((totPosCount - totNegCount) >= 1) // Keep going until we have 
acceptable amount of both +1 and -1
-      {
-        int pos1IndexVal = pos1List.get(indexOfPosList); // Get index from 
previously made list of indexes
-        for (int col = 0; col < rawList.get(0).size(); col++) // Go through 
elements of indexed row, negating it to transform to -1 row
-        {
-          double d = Double.parseDouble(rawList.get(pos1IndexVal).get(col)); 
// Transform to double first
-          d = d * -1; // Negate it
-          String negatedValue = Double.toString(d); // Change back to String
-          rawList.get(pos1IndexVal).set(col, negatedValue);// Put this value 
back into dat row
-        }
-
-        totPosCount--; // We changed a +1 row to a -1 row, decrement count of 
positives
-        totNegCount++; // Increment count of negatives
-        indexOfPosList++; // Get next +1 location in raw data
-      }
-    } else if ((totNegCount - totPosCount) > 1) // There are more -1's than 
1's, equalize them
-    {
-      int indexOfNegList = 0;
-      while ((totNegCount - totPosCount) > 1) // Keep going until we have 
acceptable amount of both +1 and -1
-      {
-        int neg1IndexVal = neg1List.get(indexOfNegList); // Get index from 
previously made list of indexes
-        for (int col = 0; col < rawList.get(0).size(); col++) // Go through 
elements of indexed row, negating it to transform to +1 row
-        {
-          double d = Double.parseDouble(rawList.get(neg1IndexVal).get(col)); 
// Transform to double first
-          d = d * -1; // Negate it
-          String negatedValue = Double.toString(d); // Change back to String
-          rawList.get(neg1IndexVal).set(col, negatedValue);// Put this value 
back into dat row
-        }
-
-        totNegCount--; // We changed a -1 row to a +1 row, decrement count of 
negatives now
-        totPosCount++; // Increment count of positives
-        indexOfNegList++; // Get next -1 location in raw data
-      }
-    } else {
-      // Do nothing - rows are within acceptable equality bounds of plus or 
minus 1
-    }
-
-    return rawList;
-  }
-
-  /**
-   * Retrieves the heading from a file to be processed so it can be written to 
the output file later.
-   *
-   * @param arr 2D array containing the parsed information from input file
-   */
-  public static void storeHead(String[][] arr) {
-    myHeader = new String[arr[0].length]; // Reside private variable
-
-    for (int col = 0; col < arr[0].length; col++) {
-      myHeader[col] = arr[0][col];
-    }
-  }
-
-  /**
-   * Writes newly calculated and equally distributed vector data to user 
specified CSV file.
-   *
-   * @param list finalized vector data to write to user specified output file
-   */
-  public static void writeCSVfile(List<List<String>> list) {
-    String outputFile = myResultDir;
-    boolean alreadyExists = new File(outputFile).exists();
-
-    try {
-      CSVWriter csvOutput = new CSVWriter(new FileWriter(outputFile), ','); // 
Create new instance of CSVWriter to write to file output
-
-      if (!alreadyExists) {
-        csvOutput.writeNext(myHeader); // Write the text headers first before 
data
-
-        for (int i = 0; i < list.size(); i++) // Iterate through all rows in 
2D array
-        {
-          String[] temp = new String[list.get(i).size()]; // Convert row array 
list in 2D array to regular string array
-          temp = list.get(i).toArray(temp);
-          csvOutput.writeNext(temp); // Write this array to the file
-        }
-      }
-
-      csvOutput.close(); // Close csvWriter
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-
-}


http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/Evaluator.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/Evaluator.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/Evaluator.java
deleted file mode 100644
index 8edb6ad..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/Evaluator.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ssearch.ranking;
-
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
-
-/**
- * Supports ability to evaluating ranking results
- */
-public class Evaluator {
-  /**
-   * Method of calculating NDCG score
-   *
-   * @param list a list of integer with each integer element indicating
-   *             the performance at its position
-   * @param K    the number of elements needed to be included in the 
calculation
-   * @return NDCG score
-   */
-  public double getNDCG(int[] list, int K) {
-    double dcg = this.getDCG(list, K);
-    double idcg = this.getIDCG(list, K);
-    double ndcg = 0.0;
-    if (idcg > 0.0) {
-      ndcg = dcg / idcg;
-    }
-    return ndcg;
-  }
-
-  /**
-   * Method of getting the precision of a list at position K
-   *
-   * @param list a list of integer with each integer element indicating
-   *             the performance at its position
-   * @param K    the number of elements needed to be included in the 
calculation
-   * @return precision at K
-   */
-  public double getPrecision(int[] list, int K) {
-    int size = list.length;
-    if (size == 0 || K == 0) {
-      return 0;
-    }
-
-    if (K > size) {
-      K = size;
-    }
-
-    int rel_doc_num = this.getRelevantDocNum(list, K);
-    double precision = (double) rel_doc_num / (double) K;
-    return precision;
-  }
-
-  /**
-   * Method of getting the number of relevant element in a ranking results
-   *
-   * @param list a list of integer with each integer element indicating
-   *             the performance at its position
-   * @param K    the number of elements needed to be included in the 
calculation
-   * @return the number of relevant element
-   */
-  private int getRelevantDocNum(int[] list, int K) {
-    int size = list.length;
-    if (size == 0 || K == 0) {
-      return 0;
-    }
-
-    if (K > size) {
-      K = size;
-    }
-
-    int rel_num = 0;
-    for (int i = 0; i < K; i++) {
-      if (list[i] > 3) { // 3 refers to "OK"
-        rel_num++;
-      }
-    }
-    return rel_num;
-  }
-
-  /**
-   * Method of calculating DCG score from a list of ranking results
-   *
-   * @param list a list of integer with each integer element indicating
-   *             the performance at its position
-   * @param K    the number of elements needed to be included in the 
calculation
-   * @return DCG score
-   */
-  private double getDCG(int[] list, int K) {
-    int size = list.length;
-    if (size == 0 || K == 0) {
-      return 0.0;
-    }
-
-    if (K > size) {
-      K = size;
-    }
-
-    double dcg = list[0];
-    for (int i = 1; i < K; i++) {
-      int rel = list[i];
-      int pos = i + 1;
-      double rel_log = Math.log(pos) / Math.log(2);
-      dcg += rel / rel_log;
-    }
-    return dcg;
-  }
-
-  /**
-   * Method of calculating ideal DCG score from a list of ranking results
-   *
-   * @param list a list of integer with each integer element indicating
-   *             the performance at its position
-   * @param K    the number of elements needed to be included in the 
calculation
-   * @return IDCG score
-   */
-  private double getIDCG(int[] list, int K) {
-    Comparator<Integer> comparator = new Comparator<Integer>() {
-      @Override
-      public int compare(Integer o1, Integer o2) {
-        return o2.compareTo(o1);
-      }
-    };
-    List<Integer> sortlist = 
IntStream.of(list).boxed().collect(Collectors.toList());
-    ;
-    Collections.sort(sortlist, comparator);
-    int[] sortedArr = sortlist.stream().mapToInt(i -> i).toArray();
-    double idcg = this.getDCG(sortedArr, K);
-    return idcg;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/Learner.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/Learner.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/Learner.java
deleted file mode 100644
index d1c5199..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/Learner.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ssearch.ranking;
-
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import org.apache.spark.SparkContext;
-import org.apache.spark.mllib.classification.SVMModel;
-import org.apache.spark.mllib.regression.LabeledPoint;
-
-import java.io.Serializable;
-
-/**
- * Supports the ability to importing classifier into memory
- */
-public class Learner implements Serializable {
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-  private static final String SPARKSVM = "SparkSVM";
-  SVMModel model = null;
-  transient SparkContext sc = null;
-
-  /**
-   * Constructor to load in spark SVM classifier
-   *
-   * @param classifierName classifier type
-   * @param skd            an instance of spark driver
-   * @param svmSgdModel    path to a trained model
-   */
-  public Learner(String classifierName, SparkDriver skd, String svmSgdModel) {
-    if (classifierName.equals(SPARKSVM)) {
-      sc = skd.sc.sc();
-      sc.addFile(svmSgdModel, true);
-      model = SVMModel.load(sc, svmSgdModel);
-    }
-  }
-
-  /**
-   * Method of classifying instance
-   *
-   * @param p the instance that needs to be classified
-   * @return the class id
-   */
-  public double classify(LabeledPoint p) {
-    return model.predict(p.features());
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/SparkFormatter.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/SparkFormatter.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/SparkFormatter.java
deleted file mode 100644
index ba46d41..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/SparkFormatter.java
+++ /dev/null
@@ -1,55 +0,0 @@
-package gov.nasa.jpl.mudrod.ssearch.ranking;
-
-import java.io.*;
-import java.text.DecimalFormat;
-
-public class SparkFormatter {
-  DecimalFormat NDForm = new DecimalFormat("#.###");
-
-  public SparkFormatter() {
-  }
-
-  public void toSparkSVMformat(String inputCSVFileName, String 
outputTXTFileName) {
-    File file = new File(outputTXTFileName);
-    if (file.exists()) {
-      file.delete();
-    }
-    try {
-      file.createNewFile();
-      FileWriter fw = new FileWriter(outputTXTFileName);
-      BufferedWriter bw = new BufferedWriter(fw);
-
-      BufferedReader br = new BufferedReader(new FileReader(inputCSVFileName));
-      br.readLine();
-      String line = br.readLine();
-      while (line != null) {
-        String[] list = line.split(",");
-        String output = "";
-        Double label = Double.parseDouble(list[list.length - 1].replace("\"", 
""));
-        if (label == -1.0) {
-          output = "0 ";
-        } else if (label == 1.0) {
-          output = "1 ";
-        }
-
-        for (int i = 0; i < list.length - 1; i++) {
-          int index = i + 1;
-          output += index + ":" + 
NDForm.format(Double.parseDouble(list[i].replace("\"", ""))) + " ";
-        }
-        bw.write(output + "\n");
-
-        line = br.readLine();
-      }
-      br.close();
-      bw.close();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-
-  public static void main(String[] args) {
-    SparkFormatter sf = new SparkFormatter();
-    
sf.toSparkSVMformat("C:/mudrodCoreTestData/rankingResults/inputDataForSVM.csv", 
"C:/mudrodCoreTestData/rankingResults/inputDataForSVM_spark.txt");
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/SparkSVM.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/SparkSVM.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/SparkSVM.java
deleted file mode 100644
index 1ddebf3..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/SparkSVM.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ssearch.ranking;
-
-import gov.nasa.jpl.mudrod.main.MudrodEngine;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.mllib.classification.SVMModel;
-import org.apache.spark.mllib.classification.SVMWithSGD;
-import org.apache.spark.mllib.regression.LabeledPoint;
-import org.apache.spark.mllib.util.MLUtils;
-
-public class SparkSVM {
-
-  private SparkSVM() {
-    //public constructor
-  }
-
-  public static void main(String[] args) {
-    MudrodEngine me = new MudrodEngine();
-
-    JavaSparkContext jsc = me.startSparkDriver().sc;
-
-    String path = 
SparkSVM.class.getClassLoader().getResource("inputDataForSVM_spark.txt").toString();
-    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(jsc.sc(), 
path).toJavaRDD();
-
-    // Run training algorithm to build the model.
-    int numIterations = 100;
-    final SVMModel model = SVMWithSGD.train(data.rdd(), numIterations);
-
-    // Save and load model
-    model.save(jsc.sc(), 
SparkSVM.class.getClassLoader().getResource("javaSVMWithSGDModel").toString());
-
-    jsc.sc().stop();
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/TrainingImporter.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/TrainingImporter.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/TrainingImporter.java
deleted file mode 100644
index ae48b55..0000000
--- 
a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/TrainingImporter.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ssearch.ranking;
-
-import gov.nasa.jpl.mudrod.discoveryengine.MudrodAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.main.MudrodConstants;
-import org.elasticsearch.action.index.IndexRequest;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.Properties;
-
-import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
-
-/**
- * Supports the ability to importing training set into Elasticsearch
- */
-public class TrainingImporter extends MudrodAbstract {
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-
-  public TrainingImporter(Properties props, ESDriver es, SparkDriver spark) {
-    super(props, es, spark);
-    es.deleteAllByQuery(props.getProperty(MudrodConstants.ES_INDEX_NAME), 
"trainingranking", QueryBuilders.matchAllQuery());
-    addMapping();
-  }
-
-  /**
-   * Method of adding mapping to traning set type
-   */
-  public void addMapping() {
-    XContentBuilder Mapping;
-    try {
-      Mapping = 
jsonBuilder().startObject().startObject("trainingranking").startObject("properties").startObject("query").field("type",
 "string").field("index", "not_analyzed").endObject()
-          .startObject("dataID").field("type", "string").field("index", 
"not_analyzed").endObject().startObject("label").field("type", 
"string").field("index", "not_analyzed").endObject().endObject()
-          .endObject().endObject();
-
-      
es.getClient().admin().indices().preparePutMapping(props.getProperty("indexName")).setType("trainingranking").setSource(Mapping).execute().actionGet();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-
-  /**
-   * Method of importing training set in to Elasticsearch
-   *
-   * @param dataFolder the path to the traing set
-   * @throws IOException IOException
-   */
-  public void importTrainingSet(String dataFolder) throws IOException {
-    es.createBulkProcessor();
-
-    File[] files = new File(dataFolder).listFiles();
-    for (File file : files) {
-      BufferedReader br = new BufferedReader(new 
FileReader(file.getAbsolutePath()));
-      br.readLine();
-      String line = br.readLine();
-      while (line != null) {
-        String[] list = line.split(",");
-        String query = file.getName().replace(".csv", "");
-        if (list.length > 0) {
-          IndexRequest ir = new IndexRequest(props.getProperty("indexName"), 
"trainingranking")
-              .source(jsonBuilder().startObject().field("query", 
query).field("dataID", list[0]).field("label", list[list.length - 
1]).endObject());
-          es.getBulkProcessor().add(ir);
-        }
-        line = br.readLine();
-      }
-      br.close();
-    }
-    es.destroyBulkProcessor();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/package-info.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/package-info.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/package-info.java
deleted file mode 100644
index 205e7a7..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ranking/package-info.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes classes for importing training data, ML models,
- * generating input data for RankSVM, and evaluating ranking results
- */
-package gov.nasa.jpl.mudrod.ssearch.ranking;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/structure/SResult.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/structure/SResult.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/structure/SResult.java
deleted file mode 100644
index 33b6233..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/structure/SResult.java
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ssearch.structure;
-
-import java.lang.reflect.Field;
-
-/**
- * Data structure class for search result
- */
-public class SResult {
-  public static final String rlist[] = { "term_score", "releaseDate_score", 
/*"versionNum_score",*/
-      "processingL_score", "allPop_score", "monthPop_score", 
"userPop_score"/*, "termAndv_score"*/ };
-  String shortName = null;
-  String longName = null;
-  String topic = null;
-  String description = null;
-  String relase_date = null;
-
-  public Double final_score = 0.0;
-  public Double term_score = 0.0;
-  public Double releaseDate_score = 0.0;
-  public Double versionNum_score = 0.0;
-  public Double processingL_score = 0.0;
-  public Double click_score = 0.0;
-  public Double allPop_score = 0.0;
-  public Double monthPop_score = 0.0;
-  public Double userPop_score = 0.0;
-  public Double termAndv_score = 0.0;
-  public Integer below = 0;
-
-  public Double Dataset_LongName_score = null;
-  public Double Dataset_Metadata_score = null;
-  public Double DatasetParameter_Term_score = null;
-  public Double DatasetSource_Source_LongName_score = null;
-  public Double DatasetSource_Sensor_LongName_score = null;
-
-  public String version = null;
-  public String processingLevel = null;
-  public String latency = null;
-  public String stopDateLong = null;
-  public String stopDateFormat = null;
-  public Double spatialR_Sat = null;
-  public Double spatialR_Grid = null;
-  public String temporalR = null;
-
-  public Double releaseDate = null;
-  public Double click = null;
-  public Double term = null;
-  public Double versionNum = null;
-  public Double processingL = null;
-  public Double allPop = null;
-  public Double monthPop = null;
-  public Double userPop = null;
-  public Double termAndv = null;
-
-  public Double Dataset_LongName = null;
-  public Double Dataset_Metadata = null;
-  public Double DatasetParameter_Term = null;
-  public Double DatasetSource_Source_LongName = null;
-  public Double DatasetSource_Sensor_LongName = null;
-
-  public Double prediction = 0.0;
-  public String label = null;
-
-  //add by quintinali
-  public String startDate;
-  public String endDate;
-  public String sensors;
-
-  /**
-   * @param shortName   short name of dataset
-   * @param longName    long name of dataset
-   * @param topic       topic of dataset
-   * @param description description of dataset
-   * @param date        release date of dataset
-   */
-  public SResult(String shortName, String longName, String topic, String 
description, String date) {
-    this.shortName = shortName;
-    this.longName = longName;
-    this.topic = topic;
-    this.description = description;
-    this.relase_date = date;
-  }
-
-  public SResult(SResult sr) {
-    for (int i = 0; i < rlist.length; i++) {
-      set(this, rlist[i], get(sr, rlist[i]));
-    }
-  }
-
-  /**
-   * Method of getting export header
-   *
-   * @param delimiter the delimiter used to separate strings
-   * @return header
-   */
-  public static String getHeader(String delimiter) {
-    String str = "";
-    for (int i = 0; i < rlist.length; i++) {
-      str += rlist[i] + delimiter;
-    }
-    str = str + "label" + "\n";
-    return "ShortName" + delimiter + "below" + delimiter + str;
-  }
-
-  /**
-   * Method of get a search results as string
-   *
-   * @param delimiter the delimiter used to separate strings
-   * @return search result as string
-   */
-  public String toString(String delimiter) {
-    String str = "";
-    for (int i = 0; i < rlist.length; i++) {
-      double score = get(this, rlist[i]);
-      str += score + delimiter;
-    }
-    str = str + label + "\n";
-    return shortName + delimiter + below + delimiter + str;
-  }
-
-  /**
-   * Generic setter method
-   *
-   * @param object     instance of SResult
-   * @param fieldName  field name that needs to be set on
-   * @param fieldValue field value that needs to be set to
-   * @return 1 means success, and 0 otherwise
-   */
-  public static boolean set(Object object, String fieldName, Object 
fieldValue) {
-    Class<?> clazz = object.getClass();
-    while (clazz != null) {
-      try {
-        Field field = clazz.getDeclaredField(fieldName);
-        field.setAccessible(true);
-        field.set(object, fieldValue);
-        return true;
-      } catch (NoSuchFieldException e) {
-        clazz = clazz.getSuperclass();
-      } catch (Exception e) {
-        throw new IllegalStateException(e);
-      }
-    }
-    return false;
-  }
-
-  /**
-   * Generic getter method
-   *
-   * @param object    instance of SResult
-   * @param fieldName field name of search result
-   * @param <V>       data type
-   * @return the value of the filed in the object
-   */
-  @SuppressWarnings("unchecked")
-  public static <V> V get(Object object, String fieldName) {
-    Class<?> clazz = object.getClass();
-    while (clazz != null) {
-      try {
-        Field field = clazz.getDeclaredField(fieldName);
-        field.setAccessible(true);
-        return (V) field.get(object);
-      } catch (NoSuchFieldException e) {
-        clazz = clazz.getSuperclass();
-      } catch (Exception e) {
-        throw new IllegalStateException(e);
-      }
-    }
-    return null;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/structure/package-info.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/structure/package-info.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/structure/package-info.java
deleted file mode 100644
index a0f9ce5..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/structure/package-info.java
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes data structure needed for ranking process
- */
-package gov.nasa.jpl.mudrod.ssearch.structure;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/utils/ESTransportClient.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/utils/ESTransportClient.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/utils/ESTransportClient.java
deleted file mode 100644
index 151ac8d..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/utils/ESTransportClient.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package gov.nasa.jpl.mudrod.utils;
-
-import org.elasticsearch.client.transport.TransportClient;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.index.reindex.ReindexPlugin;
-import org.elasticsearch.percolator.PercolatorPlugin;
-import org.elasticsearch.plugins.Plugin;
-import org.elasticsearch.script.mustache.MustachePlugin;
-import org.elasticsearch.transport.Netty3Plugin;
-
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-
-/**
- * A builder to create an instance of {@link TransportClient} This class
- * pre-installs the {@link Netty3Plugin}, for the client. These plugins are all
- * elasticsearch core modules required.
- */
-@SuppressWarnings({ "unchecked", "varargs" })
-public class ESTransportClient extends TransportClient {
-
-  private static final Collection<Class<? extends Plugin>> 
PRE_INSTALLED_PLUGINS = Collections
-      .unmodifiableList(Arrays.asList(ReindexPlugin.class, 
PercolatorPlugin.class, MustachePlugin.class, Netty3Plugin.class));
-
-  @SafeVarargs
-  public ESTransportClient(Settings settings, Class<? extends Plugin>... 
plugins) {
-    this(settings, Arrays.asList(plugins));
-  }
-
-  public ESTransportClient(Settings settings, Collection<Class<? extends 
Plugin>> plugins) {
-    super(settings, Settings.EMPTY, addPlugins(plugins, 
PRE_INSTALLED_PLUGINS), null);
-
-  }
-
-  @Override
-  public void close() {
-    super.close();
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/utils/HttpRequest.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/utils/HttpRequest.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/utils/HttpRequest.java
deleted file mode 100644
index be0a46d..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/utils/HttpRequest.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.utils;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedReader;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.net.HttpURLConnection;
-import java.net.URL;
-
-/**
- * ClassName: HttpRequest
- * Function: Http request tool.
- */
-public class HttpRequest {
-
-  private static final Logger LOG = LoggerFactory.getLogger(HttpRequest.class);
-
-  public HttpRequest() {
-  }
-
-  public String getRequest(String requestUrl) {
-    String line = null;
-    try {
-      URL url = new URL(requestUrl);
-
-      HttpURLConnection connection = (HttpURLConnection) url.openConnection();
-      connection.setDoOutput(true);
-
-      connection.setConnectTimeout(5000);
-      connection.setReadTimeout(5000);
-      int code = connection.getResponseCode();
-      if (code != HttpURLConnection.HTTP_OK) {
-        line = "{\"exception\":\"Service failed\"}";
-        LOG.info(line);
-      } else {
-        InputStream content = connection.getInputStream();
-        BufferedReader in = new BufferedReader(new InputStreamReader(content));
-        line = in.readLine();
-      }
-    } catch (Exception e) {
-      line = "{\"exception\":\"No service was found\"}";
-      LOG.error(line);
-    }
-    return line;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/utils/LabeledRowMatrix.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/utils/LabeledRowMatrix.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/utils/LabeledRowMatrix.java
deleted file mode 100644
index d1d144b..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/utils/LabeledRowMatrix.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.utils;
-
-import org.apache.spark.mllib.linalg.distributed.RowMatrix;
-
-import java.util.List;
-
-/**
- * ClassName: LabeledRowMatrix
- * Function: LabeledRowMatrix strut.
- */
-public class LabeledRowMatrix {
-
-  // words: matrix row titles
-  public List<String> rowkeys;
-  // docs: matrix column titles
-  public List<String> colkeys;
-  // wordDocMatrix: a matrix in which each row is corresponding to a term and
-  // each column is a doc.
-  public RowMatrix rowMatrix;
-
-  public LabeledRowMatrix() {
-    // TODO Auto-generated constructor stub
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/utils/LinkageTriple.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/utils/LinkageTriple.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/utils/LinkageTriple.java
deleted file mode 100644
index 90b1568..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/utils/LinkageTriple.java
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.utils;
-
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import org.elasticsearch.action.index.IndexRequest;
-import org.elasticsearch.action.search.SearchRequestBuilder;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.action.update.UpdateRequest;
-import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.aggregations.bucket.terms.Terms;
-import org.elasticsearch.search.sort.SortOrder;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.text.DecimalFormat;
-import java.util.List;
-import java.util.Map;
-
-import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
-
-/**
- * ClassName: LinkageTriple Function: Vocabulary linkage operations
- */
-public class LinkageTriple implements Serializable {
-
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-  // keyAId: ID of term A
-  public long keyAId;
-  // keyBId: ID of term B
-  public long keyBId;
-  // weight: The relationship between term A and Term B
-  public double weight;
-  // keyA: TermA
-  public String keyA;
-  // keyB: TermB
-  public String keyB;
-  // df: Format number
-  public static DecimalFormat df = new DecimalFormat("#.00");
-
-  public LinkageTriple() {
-    // TODO Auto-generated constructor stub
-  }
-
-  /**
-   * TODO Output linkage triples in string format.
-   *
-   * @see java.lang.Object#toString()
-   */
-  @Override
-  public String toString() {
-    return keyA + "," + keyB + ":" + weight;
-  }
-
-  public static void insertTriples(ESDriver es, List<LinkageTriple> triples, 
String index, String type) throws IOException {
-    LinkageTriple.insertTriples(es, triples, index, type, false, false);
-  }
-
-  public static void insertTriples(ESDriver es, List<LinkageTriple> triples, 
String index, String type, Boolean bTriple, boolean bSymmetry) throws 
IOException {
-    es.deleteType(index, type);
-    if (bTriple) {
-      LinkageTriple.addMapping(es, index, type);
-    }
-
-    if (triples == null) {
-      return;
-    }
-
-    es.createBulkProcessor();
-    int size = triples.size();
-    for (int i = 0; i < size; i++) {
-
-      XContentBuilder jsonBuilder = jsonBuilder().startObject();
-      if (bTriple) {
-
-        jsonBuilder.field("concept_A", triples.get(i).keyA);
-        jsonBuilder.field("concept_B", triples.get(i).keyB);
-
-      } else {
-        jsonBuilder.field("keywords", triples.get(i).keyA + "," + 
triples.get(i).keyB);
-      }
-
-      jsonBuilder.field("weight", 
Double.parseDouble(df.format(triples.get(i).weight)));
-      jsonBuilder.endObject();
-
-      IndexRequest ir = new IndexRequest(index, type).source(jsonBuilder);
-      es.getBulkProcessor().add(ir);
-
-      if (bTriple && bSymmetry) {
-        XContentBuilder symmetryJsonBuilder = jsonBuilder().startObject();
-        symmetryJsonBuilder.field("concept_A", triples.get(i).keyB);
-        symmetryJsonBuilder.field("concept_B", triples.get(i).keyA);
-
-        symmetryJsonBuilder.field("weight", 
Double.parseDouble(df.format(triples.get(i).weight)));
-
-        symmetryJsonBuilder.endObject();
-
-        IndexRequest symmetryir = new IndexRequest(index, 
type).source(symmetryJsonBuilder);
-        es.getBulkProcessor().add(symmetryir);
-      }
-    }
-    es.destroyBulkProcessor();
-  }
-
-  public static void addMapping(ESDriver es, String index, String type) {
-    XContentBuilder Mapping;
-    try {
-      Mapping = 
jsonBuilder().startObject().startObject(type).startObject("properties").startObject("concept_A").field("type",
 "string").field("index", "not_analyzed").endObject()
-          .startObject("concept_B").field("type", "string").field("index", 
"not_analyzed").endObject()
-
-          .endObject().endObject().endObject();
-
-      
es.getClient().admin().indices().preparePutMapping(index).setType(type).setSource(Mapping).execute().actionGet();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-
-  public static void standardTriples(ESDriver es, String index, String type) 
throws IOException {
-    es.createBulkProcessor();
-
-    SearchResponse sr = 
es.getClient().prepareSearch(index).setTypes(type).setQuery(QueryBuilders.matchAllQuery()).setSize(0)
-        
.addAggregation(AggregationBuilders.terms("concepts").field("concept_A").size(0)).execute().actionGet();
-    Terms concepts = sr.getAggregations().get("concepts");
-
-    for (Terms.Bucket entry : concepts.getBuckets()) {
-      String concept = (String) entry.getKey();
-      double maxSim = LinkageTriple.getMaxSimilarity(es, index, type, concept);
-      if (maxSim == 1.0) {
-        continue;
-      }
-
-      SearchResponse scrollResp = 
es.getClient().prepareSearch(index).setTypes(type).setScroll(new 
TimeValue(60000)).setQuery(QueryBuilders.termQuery("concept_A", concept))
-          .addSort("weight", 
SortOrder.DESC).setSize(100).execute().actionGet();
-
-      while (true) {
-        for (SearchHit hit : scrollResp.getHits().getHits()) {
-          Map<String, Object> metadata = hit.getSource();
-          double sim = (double) metadata.get("weight");
-          double newSim = sim / maxSim;
-          UpdateRequest ur = es.generateUpdateRequest(index, type, 
hit.getId(), "weight", Double.parseDouble(df.format(newSim)));
-          es.getBulkProcessor().add(ur);
-        }
-
-        scrollResp = 
es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new 
TimeValue(600000)).execute().actionGet();
-        if (scrollResp.getHits().getHits().length == 0) {
-          break;
-        }
-      }
-    }
-
-    es.destroyBulkProcessor();
-  }
-
-  private static double getMaxSimilarity(ESDriver es, String index, String 
type, String concept) {
-
-    double maxSim = 1.0;
-    SearchRequestBuilder builder = 
es.getClient().prepareSearch(index).setTypes(type).setQuery(QueryBuilders.termQuery("concept_A",
 concept)).addSort("weight", SortOrder.DESC).setSize(1);
-
-    SearchResponse usrhis = builder.execute().actionGet();
-    SearchHit[] hits = usrhis.getHits().getHits();
-    if (hits.length == 1) {
-      SearchHit hit = hits[0];
-      Map<String, Object> result = hit.getSource();
-      maxSim = (double) result.get("weight");
-    }
-
-    if (maxSim == 0.0) {
-      maxSim = 1.0;
-    }
-
-    return maxSim;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/utils/MatrixUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/utils/MatrixUtil.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/utils/MatrixUtil.java
deleted file mode 100644
index 942f1e0..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/utils/MatrixUtil.java
+++ /dev/null
@@ -1,488 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.utils;
-
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.Optional;
-import org.apache.spark.api.java.function.*;
-import org.apache.spark.mllib.feature.IDF;
-import org.apache.spark.mllib.feature.IDFModel;
-import org.apache.spark.mllib.linalg.*;
-import org.apache.spark.mllib.linalg.distributed.IndexedRow;
-import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
-import org.apache.spark.mllib.linalg.distributed.RowMatrix;
-import scala.Tuple2;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.stream.Stream;
-
-/**
- * Matrix utility tool
- */
-public class MatrixUtil {
-
-  private MatrixUtil() {
-  }
-
-  /**
-   * buildSVDMatrix: Generate SVD matrix from TF-IDF matrix. Please make sure
-   * the TF-IDF matrix has been already built from the original documents.
-   *
-   * @param tfidfMatrix,
-   *          each row is a term and each column is a document name and each
-   *          cell is the TF-IDF value of the term in the corresponding
-   *          document.
-   * @param dimension
-   *          Column number of the SVD matrix
-   * @return RowMatrix, each row is a term and each column is a dimension in 
the
-   *         feature space, each cell is value of the term in the corresponding
-   *         dimension.
-   */
-  public static RowMatrix buildSVDMatrix(RowMatrix tfidfMatrix, int dimension) 
{
-    int matrixCol = (int) tfidfMatrix.numCols();
-    if (matrixCol < dimension) {
-      dimension = matrixCol;
-    }
-
-    SingularValueDecomposition<RowMatrix, Matrix> svd = 
tfidfMatrix.computeSVD(dimension, true, 1.0E-9d);
-    RowMatrix u = svd.U();
-    Vector s = svd.s();
-    return u.multiply(Matrices.diag(s));
-  }
-
-  /**
-   * buildSVDMatrix: Generate SVD matrix from Vector RDD.
-   *
-   * @param vecRDD
-   *          vectors of terms in feature space
-   * @param dimension
-   *          Column number of the SVD matrix
-   * @return RowMatrix, each row is a term and each column is a dimension in 
the
-   *         feature space, each cell is value of the term in the corresponding
-   *         dimension.
-   */
-  public static RowMatrix buildSVDMatrix(JavaRDD<Vector> vecRDD, int 
dimension) {
-    RowMatrix tfidfMatrix = new RowMatrix(vecRDD.rdd());
-    SingularValueDecomposition<RowMatrix, Matrix> svd = 
tfidfMatrix.computeSVD(dimension, true, 1.0E-9d);
-    RowMatrix u = svd.U();
-    Vector s = svd.s();
-    return u.multiply(Matrices.diag(s));
-  }
-
-  /**
-   * Create TF-IDF matrix from word-doc matrix.
-   *
-   * @param wordDocMatrix,
-   *          each row is a term, each column is a document name and each cell
-   *          is number of the term in the corresponding document.
-   * @return RowMatrix, each row is a term and each column is a document name
-   *         and each cell is the TF-IDF value of the term in the corresponding
-   *         document.
-   */
-  public static RowMatrix createTFIDFMatrix(RowMatrix wordDocMatrix) {
-    JavaRDD<Vector> newcountRDD = wordDocMatrix.rows().toJavaRDD();
-    IDFModel idfModel = new IDF().fit(newcountRDD);
-    JavaRDD<Vector> idf = idfModel.transform(newcountRDD);
-    return new RowMatrix(idf.rdd());
-  }
-
-  /**
-   * Create matrix from doc-terms JavaPairRDD.
-   *
-   * @param uniqueDocRDD
-   *          doc-terms JavaPairRDD, in which each key is a doc name, and value
-   *          is term list extracted from that doc
-   * @return LabeledRowMatrix {@link LabeledRowMatrix}
-   */
-  public static LabeledRowMatrix createWordDocMatrix(JavaPairRDD<String, 
List<String>> uniqueDocRDD) {
-    // Index documents with unique IDs
-    JavaPairRDD<List<String>, Long> corpus = 
uniqueDocRDD.values().zipWithIndex();
-    // cal word-doc numbers
-    JavaPairRDD<Tuple2<String, Long>, Double> worddocNumRDD = 
corpus.flatMapToPair(new PairFlatMapFunction<Tuple2<List<String>, Long>, 
Tuple2<String, Long>, Double>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Iterator<Tuple2<Tuple2<String, Long>, Double>> 
call(Tuple2<List<String>, Long> docwords) throws Exception {
-        List<Tuple2<Tuple2<String, Long>, Double>> pairs = new ArrayList<>();
-        List<String> words = docwords._1;
-        int n = words.size();
-        for (int i = 0; i < n; i++) {
-          Tuple2<String, Long> worddoc = new Tuple2<>(words.get(i), 
docwords._2);
-          pairs.add(new Tuple2<Tuple2<String, Long>, Double>(worddoc, 1.0));
-        }
-        return pairs.iterator();
-      }
-    }).reduceByKey(new Function2<Double, Double, Double>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Double call(Double first, Double second) throws Exception {
-        return first + second;
-      }
-    });
-    // cal word doc-numbers
-    JavaPairRDD<String, Tuple2<List<Long>, List<Double>>> wordDocnumRDD = 
worddocNumRDD
-        .mapToPair(new PairFunction<Tuple2<Tuple2<String, Long>, Double>, 
String, Tuple2<List<Long>, List<Double>>>() {
-          /**
-           *
-           */
-          private static final long serialVersionUID = 1L;
-
-          @Override
-          public Tuple2<String, Tuple2<List<Long>, List<Double>>> 
call(Tuple2<Tuple2<String, Long>, Double> worddocNum) throws Exception {
-            List<Long> docs = new ArrayList<>();
-            docs.add(worddocNum._1._2);
-            List<Double> nums = new ArrayList<>();
-            nums.add(worddocNum._2);
-            Tuple2<List<Long>, List<Double>> docmums = new Tuple2<>(docs, 
nums);
-            return new Tuple2<>(worddocNum._1._1, docmums);
-          }
-        });
-    // trans to vector
-    final int corporsize = (int) uniqueDocRDD.keys().count();
-    JavaPairRDD<String, Vector> wordVectorRDD = wordDocnumRDD.reduceByKey(new 
Function2<Tuple2<List<Long>, List<Double>>, Tuple2<List<Long>, List<Double>>, 
Tuple2<List<Long>, List<Double>>>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<List<Long>, List<Double>> call(Tuple2<List<Long>, 
List<Double>> arg0, Tuple2<List<Long>, List<Double>> arg1) throws Exception {
-        arg0._1.addAll(arg1._1);
-        arg0._2.addAll(arg1._2);
-        return new Tuple2<>(arg0._1, arg0._2);
-      }
-    }).mapToPair(new PairFunction<Tuple2<String, Tuple2<List<Long>, 
List<Double>>>, String, Vector>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, Vector> call(Tuple2<String, Tuple2<List<Long>, 
List<Double>>> arg0) throws Exception {
-        int docsize = arg0._2._1.size();
-        int[] intArray = new int[docsize];
-        double[] doubleArray = new double[docsize];
-        for (int i = 0; i < docsize; i++) {
-          intArray[i] = arg0._2._1.get(i).intValue();
-          doubleArray[i] = arg0._2._2.get(i).intValue();
-        }
-        Vector sv = Vectors.sparse(corporsize, intArray, doubleArray);
-        return new Tuple2<>(arg0._1, sv);
-      }
-    });
-
-    RowMatrix wordDocMatrix = new RowMatrix(wordVectorRDD.values().rdd());
-
-    LabeledRowMatrix labeledRowMatrix = new LabeledRowMatrix();
-    labeledRowMatrix.rowMatrix = wordDocMatrix;
-    labeledRowMatrix.rowkeys = wordVectorRDD.keys().collect();
-    labeledRowMatrix.colkeys = uniqueDocRDD.keys().collect();
-    return labeledRowMatrix;
-  }
-
-  public static LabeledRowMatrix createDocWordMatrix(JavaPairRDD<String, 
List<String>> uniqueDocRDD, JavaSparkContext sc) {
-    // Index word with unique IDs
-    JavaPairRDD<String, Long> wordIDRDD = uniqueDocRDD.values().flatMap(new 
FlatMapFunction<List<String>, String>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Iterator<String> call(List<String> arg0) throws Exception {
-        return arg0.iterator();
-      }
-    }).distinct().zipWithIndex();
-
-    //
-    JavaPairRDD<Tuple2<String, String>, Double> docwordNumRDD = 
uniqueDocRDD.flatMapToPair(new PairFlatMapFunction<Tuple2<String, 
List<String>>, Tuple2<String, String>, Double>() {
-
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Iterator<Tuple2<Tuple2<String, String>, Double>> 
call(Tuple2<String, List<String>> docwords) throws Exception {
-        List<Tuple2<Tuple2<String, String>, Double>> pairs = new ArrayList<>();
-        List<String> words = docwords._2;
-        int n = words.size();
-        for (int i = 0; i < n; i++) {
-          Tuple2<String, String> worddoc = new Tuple2<>(docwords._1, 
words.get(i));
-          pairs.add(new Tuple2<Tuple2<String, String>, Double>(worddoc, 1.0));
-        }
-        return pairs.iterator();
-      }
-    }).reduceByKey(new Function2<Double, Double, Double>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Double call(Double first, Double second) throws Exception {
-        return first + second;
-      }
-    });
-
-    //
-    JavaPairRDD<String, Tuple2<String, Double>> wordDocnumRDD = 
docwordNumRDD.mapToPair(new PairFunction<Tuple2<Tuple2<String, String>, 
Double>, String, Tuple2<String, Double>>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, Tuple2<String, Double>> call(Tuple2<Tuple2<String, 
String>, Double> arg0) throws Exception {
-
-        Tuple2<String, Double> wordmums = new Tuple2<>(arg0._1._1, arg0._2);
-        return new Tuple2<>(arg0._1._2, wordmums);
-      }
-    });
-
-    //
-
-    JavaPairRDD<String, Tuple2<Tuple2<String, Double>, Optional<Long>>> 
testRDD = wordDocnumRDD.leftOuterJoin(wordIDRDD);
-
-    int wordsize = (int) wordIDRDD.count();
-    JavaPairRDD<String, Vector> docVectorRDD = testRDD.mapToPair(new 
PairFunction<Tuple2<String, Tuple2<Tuple2<String, Double>, Optional<Long>>>, 
String, Tuple2<List<Long>, List<Double>>>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, Tuple2<List<Long>, List<Double>>> 
call(Tuple2<String, Tuple2<Tuple2<String, Double>, Optional<Long>>> arg0) 
throws Exception {
-        Optional<Long> oid = arg0._2._2;
-        Long wordId = (long) 0;
-        if (oid.isPresent()) {
-          wordId = oid.get();
-        }
-
-        List<Long> word = new ArrayList<>();
-        word.add(wordId);
-
-        List<Double> count = new ArrayList<>();
-        count.add(arg0._2._1._2);
-
-        Tuple2<List<Long>, List<Double>> wordcount = new Tuple2<>(word, count);
-
-        return new Tuple2<>(arg0._2._1._1, wordcount);
-      }
-
-    }).reduceByKey(new Function2<Tuple2<List<Long>, List<Double>>, 
Tuple2<List<Long>, List<Double>>, Tuple2<List<Long>, List<Double>>>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<List<Long>, List<Double>> call(Tuple2<List<Long>, 
List<Double>> arg0, Tuple2<List<Long>, List<Double>> arg1) throws Exception {
-        arg0._1.addAll(arg1._1);
-        arg0._2.addAll(arg1._2);
-        return new Tuple2<>(arg0._1, arg0._2);
-      }
-    }).mapToPair(new PairFunction<Tuple2<String, Tuple2<List<Long>, 
List<Double>>>, String, Vector>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, Vector> call(Tuple2<String, Tuple2<List<Long>, 
List<Double>>> arg0) throws Exception {
-        int docsize = arg0._2._1.size();
-        int[] intArray = new int[docsize];
-        double[] doubleArray = new double[docsize];
-        for (int i = 0; i < docsize; i++) {
-          intArray[i] = arg0._2._1.get(i).intValue();
-          doubleArray[i] = arg0._2._2.get(i).intValue();
-        }
-        Vector sv = Vectors.sparse(wordsize, intArray, doubleArray);
-        return new Tuple2<>(arg0._1, sv);
-      }
-    });
-
-    RowMatrix docwordMatrix = new RowMatrix(docVectorRDD.values().rdd());
-
-    LabeledRowMatrix labeledRowMatrix = new LabeledRowMatrix();
-    labeledRowMatrix.rowMatrix = docwordMatrix;
-    labeledRowMatrix.rowkeys = docVectorRDD.keys().collect();
-    labeledRowMatrix.colkeys = wordIDRDD.keys().collect();
-
-    return labeledRowMatrix;
-  }
-
-  /**
-   * loadVectorFromCSV: Load term vector from csv file.
-   *
-   * @param spark
-   *          spark instance
-   * @param csvFileName
-   *          csv matrix file
-   * @param skipNum
-   *          the numbers of rows which should be skipped Ignore the top skip
-   *          number rows of the csv file
-   * @return JavaPairRDD, each key is a term, and value is the vector of the
-   *         term in feature space.
-   */
-  public static JavaPairRDD<String, Vector> loadVectorFromCSV(SparkDriver 
spark, String csvFileName, int skipNum) {
-    // skip the first line (header), important!
-    JavaRDD<String> importRDD = spark.sc.textFile(csvFileName);
-    JavaPairRDD<String, Long> importIdRDD = 
importRDD.zipWithIndex().filter(new Function<Tuple2<String, Long>, Boolean>() {
-      /** */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Boolean call(Tuple2<String, Long> v1) throws Exception {
-        if (v1._2 > (skipNum - 1)) {
-          return true;
-        }
-        return false;
-      }
-    });
-
-    if (importIdRDD.count() == 0) {
-      return null;
-    }
-
-    return importIdRDD.mapToPair(new PairFunction<Tuple2<String, Long>, 
String, Vector>() {
-      /** */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, Vector> call(Tuple2<String, Long> t) throws 
Exception {
-        String[] fields = t._1.split(",");
-        String word = fields[0];
-        int fieldsize = fields.length;
-        int nStart = 1;
-        int nEnd = fieldsize - 1;
-        if (fieldsize < 2) {
-          nStart = 0;
-          nEnd = 0;
-        }
-        String[] numfields = Arrays.copyOfRange(fields, nStart, nEnd);
-
-        double[] nums = 
Stream.of(numfields).mapToDouble(Double::parseDouble).toArray();
-        Vector vec = Vectors.dense(nums);
-        return new Tuple2<>(word, vec);
-      }
-    });
-  }
-
-  /**
-   * Convert vectorRDD to indexed row matrix.
-   *
-   * @param vecs
-   *          Vector RDD
-   * @return IndexedRowMatrix
-   */
-  public static IndexedRowMatrix buildIndexRowMatrix(JavaRDD<Vector> vecs) {
-    JavaRDD<IndexedRow> indexrows = vecs.zipWithIndex().map(new 
Function<Tuple2<Vector, Long>, IndexedRow>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public IndexedRow call(Tuple2<Vector, Long> docId) {
-        return new IndexedRow(docId._2, docId._1);
-      }
-    });
-    return new IndexedRowMatrix(indexrows.rdd());
-  }
-
-  /**
-   * Transpose matrix
-   *
-   * @param indexedMatrix
-   *          spark indexed matrix
-   * @return rowmatrix, each row is corresponding to the column in the original
-   *         matrix and vice versa
-   */
-  public static RowMatrix transposeMatrix(IndexedRowMatrix indexedMatrix) {
-    return indexedMatrix.toCoordinateMatrix().transpose().toRowMatrix();
-  }
-
-  /**
-   * Output matrix to a CSV file.
-   *
-   * @param matrix
-   *          spark row matrix
-   * @param rowKeys
-   *          matrix row names
-   * @param colKeys
-   *          matrix coloum names
-   * @param fileName
-   *          csv file name
-   */
-  public static void exportToCSV(RowMatrix matrix, List<String> rowKeys, 
List<String> colKeys, String fileName) {
-
-    if (matrix.rows().isEmpty()) {
-      return;
-    }
-
-    int rownum = (int) matrix.numRows();
-    int colnum = (int) matrix.numCols();
-    List<Vector> rows = matrix.rows().toJavaRDD().collect();
-
-    File file = new File(fileName);
-    if (file.exists()) {
-      file.delete();
-    }
-    try {
-      file.createNewFile();
-      FileWriter fw = new FileWriter(file.getAbsoluteFile());
-      BufferedWriter bw = new BufferedWriter(fw);
-      String coltitle = " Num" + ",";
-      for (int j = 0; j < colnum; j++) {
-        coltitle += "\"" + colKeys.get(j) + "\",";
-      }
-      coltitle = coltitle.substring(0, coltitle.length() - 1);
-      bw.write(coltitle + "\n");
-
-      for (int i = 0; i < rownum; i++) {
-        double[] rowvlaue = rows.get(i).toArray();
-        String row = rowKeys.get(i) + ",";
-        for (int j = 0; j < colnum; j++) {
-          row += rowvlaue[j] + ",";
-        }
-        row = row.substring(0, row.length() - 1);
-        bw.write(row + "\n");
-      }
-
-      bw.close();
-
-    } catch (IOException e) {
-      e.printStackTrace();
-
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/utils/RDDUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/utils/RDDUtil.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/utils/RDDUtil.java
deleted file mode 100644
index 3fc45f4..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/utils/RDDUtil.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.utils;
-
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.function.FlatMapFunction;
-
-import java.util.Iterator;
-import java.util.List;
-
-/**
- * ClassName: RDDUtil Function: Mudrod Spark RDD common methods
- */
-public class RDDUtil {
-
-  public RDDUtil() {
-  }
-
-  /**
-   * getAllWordsInDoc: Extracted all unique terms from all docs.
-   *
-   * @param docwordRDD Pair RDD, each key is a doc, and value is term list 
extracted from
-   *                   that doc.
-   * @return unique term list
-   */
-  public static JavaRDD<String> getAllWordsInDoc(JavaPairRDD<String, 
List<String>> docwordRDD) {
-    JavaRDD<String> wordRDD = docwordRDD.values().flatMap(new 
FlatMapFunction<List<String>, String>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Iterator<String> call(List<String> list) {
-        return list.iterator();
-      }
-    }).distinct();
-
-    return wordRDD;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/utils/SVDUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/utils/SVDUtil.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/utils/SVDUtil.java
deleted file mode 100644
index 1982996..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/utils/SVDUtil.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.utils;
-
-import gov.nasa.jpl.mudrod.discoveryengine.MudrodAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix;
-import org.apache.spark.mllib.linalg.distributed.RowMatrix;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Properties;
-
-/**
- * Singular value decomposition
- */
-public class SVDUtil extends MudrodAbstract {
-
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-  // wordRDD: terms extracted from all documents
-  JavaRDD<String> wordRDD;
-  // svdMatrix: svd matrix
-  private RowMatrix svdMatrix;
-  // simMatrix: similarity matrix
-  private CoordinateMatrix simMatrix;
-
-  /**
-   * Creates a new instance of SVDUtil.
-   *
-   * @param config the Mudrod configuration
-   * @param es     the Elasticsearch drive
-   * @param spark  the spark driver
-   */
-  public SVDUtil(Properties config, ESDriver es, SparkDriver spark) {
-    super(config, es, spark);
-  }
-
-  /**
-   * Build SVD matrix from docment-terms pairs.
-   *
-   * @param docwordRDD    JavaPairRDD, key is short name of data set and 
values are terms in
-   *                      the corresponding data set
-   * @param svdDimension: Dimension of matrix after singular value 
decomposition
-   * @return row matrix
-   */
-  public RowMatrix buildSVDMatrix(JavaPairRDD<String, List<String>> 
docwordRDD, int svdDimension) {
-
-    RowMatrix svdMatrix = null;
-    LabeledRowMatrix wordDocMatrix = 
MatrixUtil.createWordDocMatrix(docwordRDD);
-    RowMatrix ifIdfMatrix = 
MatrixUtil.createTFIDFMatrix(wordDocMatrix.rowMatrix);
-    svdMatrix = MatrixUtil.buildSVDMatrix(ifIdfMatrix, svdDimension);
-    this.svdMatrix = svdMatrix;
-    this.wordRDD = RDDUtil.getAllWordsInDoc(docwordRDD);
-    return svdMatrix;
-  }
-
-  /**
-   * Build svd matrix from CSV file.
-   *
-   * @param tfidfCSVfile  tf-idf matrix csv file
-   * @param svdDimension: Dimension of matrix after singular value 
decomposition
-   * @return row matrix
-   */
-  public RowMatrix buildSVDMatrix(String tfidfCSVfile, int svdDimension) {
-    RowMatrix svdMatrix = null;
-    JavaPairRDD<String, Vector> tfidfRDD = MatrixUtil.loadVectorFromCSV(spark, 
tfidfCSVfile, 2);
-    JavaRDD<Vector> vectorRDD = tfidfRDD.values();
-
-    svdMatrix = MatrixUtil.buildSVDMatrix(vectorRDD, svdDimension);
-    this.svdMatrix = svdMatrix;
-
-    this.wordRDD = tfidfRDD.keys();
-
-    return svdMatrix;
-  }
-
-  /**
-   * Calculate similarity
-   */
-  public void calSimilarity() {
-    CoordinateMatrix simMatrix = 
SimilarityUtil.calculateSimilarityFromMatrix(svdMatrix);
-    this.simMatrix = simMatrix;
-  }
-
-  /**
-   * Insert linkage triples to elasticsearch
-   *
-   * @param index index name
-   * @param type  linkage triple name
-   */
-  public void insertLinkageToES(String index, String type) {
-    List<LinkageTriple> triples = SimilarityUtil.matrixToTriples(wordRDD, 
simMatrix);
-    try {
-      LinkageTriple.insertTriples(es, triples, index, type);
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/utils/SimilarityUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/utils/SimilarityUtil.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/utils/SimilarityUtil.java
deleted file mode 100644
index 8ae9770..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/utils/SimilarityUtil.java
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.utils;
-
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.Optional;
-import org.apache.spark.api.java.function.Function;
-import org.apache.spark.api.java.function.PairFunction;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix;
-import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
-import org.apache.spark.mllib.linalg.distributed.MatrixEntry;
-import org.apache.spark.mllib.linalg.distributed.RowMatrix;
-import scala.Tuple2;
-
-import java.util.List;
-
-/**
- * Similarity and distrance calculation utilities
- */
-public class SimilarityUtil {
-
-  public static final int SIM_COSINE = 3;
-  public static final int SIM_HELLINGER = 2;
-  public static final int SIM_PEARSON = 1;
-  /**
-   * CalSimilarityFromMatrix: Calculate term similarity from matrix.
-   *
-   * @param svdMatrix. Each row is corresponding to a term, and each column is
-   *                   corresponding to a dimension of feature
-   * @return CoordinateMatrix, each row is corresponding to a term, and each
-   * column is also a term, the cell value is the similarity between the
-   * two terms
-   */
-  public static CoordinateMatrix calculateSimilarityFromMatrix(RowMatrix 
svdMatrix) {
-    JavaRDD<Vector> vecs = svdMatrix.rows().toJavaRDD();
-    return SimilarityUtil.calculateSimilarityFromVector(vecs);
-  }
-
-  /**
-   * CalSimilarityFromVector:Calculate term similarity from vector.
-   *
-   * @param vecs Each vector is corresponding to a term in the feature space.
-   * @return CoordinateMatrix, each row is corresponding to a term, and each
-   * column is also a term, the cell value is the similarity between the
-   * two terms
-   */
-  public static CoordinateMatrix calculateSimilarityFromVector(JavaRDD<Vector> 
vecs) {
-    IndexedRowMatrix indexedMatrix = MatrixUtil.buildIndexRowMatrix(vecs);
-    RowMatrix transposeMatrix = MatrixUtil.transposeMatrix(indexedMatrix);
-    return transposeMatrix.columnSimilarities();
-  }
-
-  /**
-   * Calculate term similarity from vector.
-   *
-   * @param importRDD the {@link org.apache.spark.api.java.JavaPairRDD}
-   *                  data structure containing the vectors.
-   * @param simType   the similarity calculation to execute e.g. 
-   * <ul>
-   * <li>{@link gov.nasa.jpl.mudrod.utils.SimilarityUtil#SIM_COSINE} - 3,</li>
-   * <li>{@link gov.nasa.jpl.mudrod.utils.SimilarityUtil#SIM_HELLINGER} - 
2,</li>
-   * <li>{@link gov.nasa.jpl.mudrod.utils.SimilarityUtil#SIM_PEARSON} - 1</li>
-   * </ul>
-   * @return a new {@link org.apache.spark.api.java.JavaPairRDD}
-   */
-  public static JavaRDD<LinkageTriple> 
calculateSimilarityFromVector(JavaPairRDD<String, Vector> importRDD, int 
simType) {
-    JavaRDD<Tuple2<String, Vector>> importRDD1 = importRDD.map(f -> new 
Tuple2<String, Vector>(f._1, f._2));
-    JavaPairRDD<Tuple2<String, Vector>, Tuple2<String, Vector>> cartesianRDD = 
importRDD1.cartesian(importRDD1);
-
-    return cartesianRDD.map(new Function<Tuple2<Tuple2<String, Vector>, 
Tuple2<String, Vector>>, LinkageTriple>() {
-
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public LinkageTriple call(Tuple2<Tuple2<String, Vector>, Tuple2<String, 
Vector>> arg) {
-        String keyA = arg._1._1;
-        String keyB = arg._2._1;
-
-        if (keyA.equals(keyB)) {
-          return null;
-        }
-
-        Vector vecA = arg._1._2;
-        Vector vecB = arg._2._2;
-        Double weight = 0.0;
-
-        if (simType == SimilarityUtil.SIM_PEARSON) {
-          weight = SimilarityUtil.pearsonDistance(vecA, vecB);
-        } else if (simType == SimilarityUtil.SIM_HELLINGER) {
-          weight = SimilarityUtil.hellingerDistance(vecA, vecB);
-        }
-
-        LinkageTriple triple = new LinkageTriple();
-        triple.keyA = keyA;
-        triple.keyB = keyB;
-        triple.weight = weight;
-        return triple;
-      }
-    }).filter(new Function<LinkageTriple, Boolean>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Boolean call(LinkageTriple arg0) throws Exception {
-        if (arg0 == null) {
-          return false;
-        }
-        return true;
-      }
-    });
-  }
-
-  /**
-   * MatrixtoTriples:Convert term similarity matrix to linkage triple list.
-   *
-   * @param keys      each key is a term
-   * @param simMatirx term similarity matrix, in which each row and column is 
a term and
-   *                  the cell value is the similarity between the two terms
-   * @return linkage triple list
-   */
-  public static List<LinkageTriple> matrixToTriples(JavaRDD<String> keys, 
CoordinateMatrix simMatirx) {
-    if (simMatirx.numCols() != keys.count()) {
-      return null;
-    }
-
-    // index words
-    JavaPairRDD<Long, String> keyIdRDD = 
JavaPairRDD.fromJavaRDD(keys.zipWithIndex().map(new Function<Tuple2<String, 
Long>, Tuple2<Long, String>>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<Long, String> call(Tuple2<String, Long> docId) {
-        return docId.swap();
-      }
-    }));
-
-    JavaPairRDD<Long, LinkageTriple> entriesRowRDD = 
simMatirx.entries().toJavaRDD().mapToPair(new PairFunction<MatrixEntry, Long, 
LinkageTriple>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<Long, LinkageTriple> call(MatrixEntry t) throws Exception {
-        LinkageTriple triple = new LinkageTriple();
-        triple.keyAId = t.i();
-        triple.keyBId = t.j();
-        triple.weight = t.value();
-        return new Tuple2<>(triple.keyAId, triple);
-      }
-    });
-
-    JavaPairRDD<Long, LinkageTriple> entriesColRDD = 
entriesRowRDD.leftOuterJoin(keyIdRDD).values().mapToPair(new 
PairFunction<Tuple2<LinkageTriple, Optional<String>>, Long, LinkageTriple>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<Long, LinkageTriple> call(Tuple2<LinkageTriple, 
Optional<String>> t) throws Exception {
-        LinkageTriple triple = t._1;
-        Optional<String> stra = t._2;
-        if (stra.isPresent()) {
-          triple.keyA = stra.get();
-        }
-        return new Tuple2<>(triple.keyBId, triple);
-      }
-    });
-
-    JavaRDD<LinkageTriple> tripleRDD = 
entriesColRDD.leftOuterJoin(keyIdRDD).values().map(new 
Function<Tuple2<LinkageTriple, Optional<String>>, LinkageTriple>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public LinkageTriple call(Tuple2<LinkageTriple, Optional<String>> t) 
throws Exception {
-        LinkageTriple triple = t._1;
-        Optional<String> strb = t._2;
-        if (strb.isPresent()) {
-          triple.keyB = strb.get();
-        }
-        return triple;
-      }
-    });
-    return tripleRDD.collect();
-  }
-
-  /**
-   * Calculate similarity (Hellinger Distance) between vectors
-   *
-   * @param vecA initial vector from which to calculate a similarity
-   * @param vecB second vector involved in similarity calculation
-   * @return similarity between two vectors
-   */
-  public static double hellingerDistance(Vector vecA, Vector vecB) {
-    double[] arrA = vecA.toArray();
-    double[] arrB = vecB.toArray();
-
-    double sim = 0.0;
-
-    int arrsize = arrA.length;
-    for (int i = 0; i < arrsize; i++) {
-      double a = arrA[i];
-      double b = arrB[i];
-      double sqrtDiff = Math.sqrt(a) - Math.sqrt(b);
-      sim += sqrtDiff * sqrtDiff;
-    }
-
-    sim = sim / Math.sqrt(2);
-
-    return sim;
-  }
-
-  /**
-   * Calculate similarity (Pearson Distance) between vectors
-   *
-   * @param vecA initial vector from which to calculate a similarity
-   * @param vecB second vector involved in similarity calculation
-   * @return similarity between two vectors
-   */
-  public static double pearsonDistance(Vector vecA, Vector vecB) {
-    double[] arrA = vecA.toArray();
-    double[] arrB = vecB.toArray();
-
-    int viewA = 0;
-    int viewB = 0;
-    int viewAB = 0;
-
-    int arrsize = arrA.length;
-    for (int i = 0; i < arrsize; i++) {
-      if (arrA[i] > 0) {
-        viewA++;
-      }
-
-      if (arrB[i] > 0) {
-        viewB++;
-      }
-
-      if (arrB[i] > 0 && arrA[i] > 0) {
-        viewAB++;
-      }
-    }
-    return viewAB / (Math.sqrt(viewA) * Math.sqrt(viewB));
-  }
-
-  /**
-   * calculate similarity between vectors
-   *
-   * @param vecA initial vector from which to calculate a similarity
-   * @param vecB second vector involved in similarity calculation
-   * @return similarity between two vectors
-   */
-  public static double cosineDistance(Vector vecA, Vector vecB) {
-    return 1;
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/utils/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/utils/package-info.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/utils/package-info.java
deleted file mode 100644
index 3fcd95e..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/utils/package-info.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes utilities classes for calculating similarity and
- * parsing HTTP request
- */
-package gov.nasa.jpl.mudrod.utils;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/weblog/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/package-info.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/package-info.java
deleted file mode 100644
index f4a8b86..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/package-info.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes web log pre-processing, processing, and data structure
- * classes.
- */
-package gov.nasa.jpl.mudrod.weblog;
\ No newline at end of file

[13/17] incubator-sdap-mudrod git commit: SDAP-7 Change all package namespaces to org.apache.sdap

Reply via email to